Whamcloud - gitweb
- update from parent
authoryury <yury>
Fri, 14 Apr 2006 07:37:24 +0000 (07:37 +0000)
committeryury <yury>
Fri, 14 Apr 2006 07:37:24 +0000 (07:37 +0000)
156 files changed:
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.12.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch
ldiskfs/kernel_patches/patches/ext3-nlinks-2.6.7.patch
ldiskfs/kernel_patches/patches/ext3-nlinks-2.6.9.patch
lustre/ChangeLog
lustre/autoconf/lustre-core.m4
lustre/autoconf/lustre-version.ac
lustre/include/liblustre.h
lustre/include/linux/lustre_compat25.h
lustre/include/linux/lustre_debug.h
lustre/include/linux/lustre_dlm.h
lustre/include/linux/lustre_export.h
lustre/include/linux/lustre_fsfilt.h
lustre/include/linux/lustre_ha.h
lustre/include/linux/lustre_idl.h
lustre/include/linux/lustre_import.h
lustre/include/linux/lustre_lite.h
lustre/include/linux/lustre_net.h
lustre/include/linux/lvfs.h
lustre/include/linux/obd.h
lustre/include/linux/obd_class.h
lustre/include/linux/obd_support.h
lustre/include/lustre/lustre_user.h
lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-x86_64-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-x86_64.config
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-i686-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64.config
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-x86_64-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-x86_64.config
lustre/kernel_patches/patches/export-filemap_populate.patch [deleted file]
lustre/kernel_patches/patches/export_symbols-2.6-rhel4.patch
lustre/kernel_patches/patches/export_symbols-2.6-suse.patch
lustre/kernel_patches/patches/export_symbols-2.6.12.patch
lustre/kernel_patches/patches/ext3-htree-path-ops.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-mballoc2-2.4.24.patch [deleted file]
lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch
lustre/kernel_patches/patches/ext3-nlinks-2.4.20-hp_pnnl.patch
lustre/kernel_patches/patches/ext3-nlinks-2.4.21-chaos.patch
lustre/kernel_patches/patches/ext3-nlinks-2.4.24.patch
lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch
lustre/kernel_patches/patches/ext3-nlinks-2.6.9.patch
lustre/kernel_patches/patches/nfs-cifs-intent-2.6-suse.patch
lustre/kernel_patches/patches/nfs-cifs-intent-2.6.12.patch
lustre/kernel_patches/patches/tcp-rto_proc-2.6.9.patch [new file with mode: 0644]
lustre/kernel_patches/patches/tcp-zero-copy-2.6.9-rhel4.patch [new file with mode: 0644]
lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch
lustre/kernel_patches/patches/vfs_intent-2.6.12.patch
lustre/kernel_patches/series/2.6-rhel4.series
lustre/kernel_patches/series/2.6-suse.series
lustre/kernel_patches/targets/2.6-suse.target.in
lustre/ldiskfs/quotafmt_test.c
lustre/ldlm/ldlm_internal.h
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_llog.c [deleted file]
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_request.c
lustre/liblustre/llite_lib.c
lustre/liblustre/namei.c
lustre/liblustre/rw.c
lustre/liblustre/super.c
lustre/liblustre/tests/sanity.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/llite_mmap.c
lustre/llite/lproc_llite.c
lustre/llite/namei.c
lustre/llite/rw.c
lustre/llite/rw26.c
lustre/llite/special.c
lustre/lov/lov_merge.c
lustre/lov/lov_obd.c
lustre/lvfs/fsfilt.c
lustre/lvfs/fsfilt_ext3.c
lustre/mdc/mdc_lib.c
lustre/mdc/mdc_request.c
lustre/mds/handler.c
lustre/mds/mds_fs.c
lustre/mds/mds_internal.h
lustre/mds/mds_join.c
lustre/mds/mds_lov.c
lustre/mds/mds_open.c
lustre/mds/mds_reint.c
lustre/mds/mds_unlink_open.c
lustre/mds/mds_xattr.c
lustre/mdt/mdt_handler.c
lustre/mgc/mgc_request.c
lustre/mgs/mgs_handler.c
lustre/mgs/mgs_llog.c
lustre/obdclass/class_obd.c
lustre/obdclass/genops.c
lustre/obdclass/llog_lvfs.c
lustre/obdclass/llog_obd.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/obd_config.c
lustre/obdclass/obd_mount.c
lustre/obdclass/obdo.c
lustre/obdclass/sysctl.c
lustre/obdecho/echo_client.c
lustre/obdfilter/filter.c
lustre/obdfilter/filter_io.c
lustre/obdfilter/filter_io_24.c
lustre/obdfilter/filter_io_26.c
lustre/obdfilter/filter_log.c
lustre/obdfilter/filter_lvb.c
lustre/osc/lproc_osc.c
lustre/osc/osc_create.c
lustre/osc/osc_request.c
lustre/ost/ost_handler.c
lustre/ptlrpc/autoMakefile.am
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/import.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/ptlrpc_module.c
lustre/ptlrpc/ptlrpcd.c
lustre/ptlrpc/recov_thread.c
lustre/ptlrpc/recover.c
lustre/ptlrpc/service.c
lustre/quota/quota_check.c
lustre/quota/quota_context.c
lustre/quota/quota_master.c
lustre/tests/acceptance-small.sh
lustre/tests/cfg/insanity-local.sh
lustre/tests/cfg/insanity-ltest.sh
lustre/tests/cfg/local.sh
lustre/tests/cfg/mdev.sh
lustre/tests/conf-sanity.sh
lustre/tests/insanity.sh
lustre/tests/lov.sh
lustre/tests/mountconf.sh
lustre/tests/oos.sh
lustre/tests/recovery-small.sh
lustre/tests/replay-dual.sh
lustre/tests/replay-single.sh
lustre/tests/runregression-mds.sh [deleted file]
lustre/tests/runtests
lustre/tests/sanity.sh
lustre/tests/sanityN.sh
lustre/tests/test-framework.sh
lustre/utils/.cvsignore
lustre/utils/Lustre/lustredb.py
lustre/utils/lfs.c
lustre/utils/mkfs_lustre.c
lustre/utils/mount_lustre.c
lustre/utils/rmmod_all.sh
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index bb9928a..1d8a4af 100644 (file)
@@ -1679,7 +1679,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +               */
 +
 +              /*if (ac.ac_found > ext3_mb_max_to_scan)
-+                      printk(KERN_ERR "EXT3-fs: too long searching at "
++                      printk(KERN_DEBUG "EXT3-fs: too long searching at "
 +                              "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len,
 +                              ac.ac_g_ex.fe_len);*/
 +              ext3_mb_try_best_found(&ac, &e3b);
@@ -1688,8 +1688,8 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +                       * Someone more lucky has already allocated it.
 +                       * The only thing we can do is just take first
 +                       * found block(s)
++                      printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n");
 +                       */
-+                      printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
 +                      ac.ac_b_ex.fe_group = 0;
 +                      ac.ac_b_ex.fe_start = 0;
 +                      ac.ac_b_ex.fe_len = 0;
@@ -1708,9 +1708,9 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +              *errp = -ENOSPC;
 +              block = 0;
 +#if 1
-+              printk(KERN_ERR "EXT3-fs: cant allocate: status %d, flags %d\n",
++              printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n",
 +                      ac.ac_status, ac.ac_flags);
-+              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d, cr %d\n",
++              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n",
 +                      ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group,
 +                      ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr);
 +              printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n",
@@ -2083,12 +2083,12 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +
 +              sbi->s_group_info[i] = kmalloc(len, GFP_KERNEL);
 +              if (sbi->s_group_info[i] == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant allocate mem for buddy\n");
++                      printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n");
 +                      goto err_out;
 +              }
 +              desc = ext3_get_group_desc(sb, i, NULL);
 +              if (desc == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant read descriptor %u\n", i);
++                      printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i);
 +                      goto err_out;
 +              }
 +              memset(sbi->s_group_info[i], 0, len);
@@ -2605,7 +2605,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      char str[32];
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_STATS_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2639,7 +2639,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2678,7 +2678,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2704,7 +2704,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +
 +      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
 +      if (proc_root_ext3 == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n", EXT3_ROOT);
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT);
 +              return -EIO;
 +      }
 +
@@ -2712,7 +2712,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_stats == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_STATS_NAME);
 +              remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +              return -EIO;
@@ -2727,7 +2727,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +                      EXT3_MB_MAX_TO_SCAN_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_max_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_MAX_TO_SCAN_NAME);
 +              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
 +              remove_proc_entry(EXT3_ROOT, proc_root_fs);
@@ -2743,7 +2743,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +                      EXT3_MB_MIN_TO_SCAN_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_min_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_MIN_TO_SCAN_NAME);
 +              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
 +              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
index a2b9caf..0c2f445 100644 (file)
@@ -1674,7 +1674,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +               */
 +
 +              /*if (ac.ac_found > ext3_mb_max_to_scan)
-+                      printk(KERN_ERR "EXT3-fs: too long searching at "
++                      printk(KERN_DEBUG "EXT3-fs: too long searching at "
 +                              "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len,
 +                              ac.ac_g_ex.fe_len);*/
 +              ext3_mb_try_best_found(&ac, &e3b);
@@ -1683,8 +1683,8 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +                       * Someone more lucky has already allocated it.
 +                       * The only thing we can do is just take first
 +                       * found block(s)
++                      printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n");
 +                       */
-+                      printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
 +                      ac.ac_b_ex.fe_group = 0;
 +                      ac.ac_b_ex.fe_start = 0;
 +                      ac.ac_b_ex.fe_len = 0;
@@ -1703,9 +1703,9 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +              *errp = -ENOSPC;
 +              block = 0;
 +#if 1
-+              printk(KERN_ERR "EXT3-fs: cant allocate: status %d, flags %d\n",
++              printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n",
 +                      ac.ac_status, ac.ac_flags);
-+              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d, cr %d\n",
++              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n",
 +                      ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group,
 +                      ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr);
 +              printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n",
@@ -2078,12 +2078,12 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +
 +              sbi->s_group_info[i] = kmalloc(len, GFP_KERNEL);
 +              if (sbi->s_group_info[i] == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant allocate mem for buddy\n");
++                      printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n");
 +                      goto err_out;
 +              }
 +              desc = ext3_get_group_desc(sb, i, NULL);
 +              if (desc == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant read descriptor %u\n", i);
++                      printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i);
 +                      goto err_out;
 +              }
 +              memset(sbi->s_group_info[i], 0, len);
@@ -2599,7 +2599,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      char str[32];
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_STATS_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2633,7 +2633,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2672,7 +2672,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2698,7 +2698,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +
 +      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
 +      if (proc_root_ext3 == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n", EXT3_ROOT);
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT);
 +              return -EIO;
 +      }
 +
@@ -2706,7 +2706,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_stats == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_STATS_NAME);
 +              remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +              return -EIO;
@@ -2721,7 +2721,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +                      EXT3_MB_MAX_TO_SCAN_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_max_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_MAX_TO_SCAN_NAME);
 +              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
 +              remove_proc_entry(EXT3_ROOT, proc_root_fs);
@@ -2737,7 +2737,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +                      EXT3_MB_MIN_TO_SCAN_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_min_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_MIN_TO_SCAN_NAME);
 +              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
 +              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
index d12c678..5ff3d3b 100644 (file)
@@ -1693,7 +1693,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +               */
 +
 +              /*if (ac.ac_found > ext3_mb_max_to_scan)
-+                      printk(KERN_ERR "EXT3-fs: too long searching at "
++                      printk(KERN_DEBUG "EXT3-fs: too long searching at "
 +                              "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len,
 +                              ac.ac_g_ex.fe_len);*/
 +              ext3_mb_try_best_found(&ac, &e3b);
@@ -1702,8 +1702,8 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                       * Someone more lucky has already allocated it.
 +                       * The only thing we can do is just take first
 +                       * found block(s)
++                      printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n");
 +                       */
-+                      printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
 +                      ac.ac_b_ex.fe_group = 0;
 +                      ac.ac_b_ex.fe_start = 0;
 +                      ac.ac_b_ex.fe_len = 0;
@@ -1722,9 +1722,9 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +              *errp = -ENOSPC;
 +              block = 0;
 +#if 1
-+              printk(KERN_ERR "EXT3-fs: cant allocate: status %d, flags %d\n",
++              printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n",
 +                      ac.ac_status, ac.ac_flags);
-+              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d, cr %d\n",
++              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n",
 +                      ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group,
 +                      ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr);
 +              printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n",
@@ -2097,12 +2097,12 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +
 +              sbi->s_group_info[i] = kmalloc(len, GFP_KERNEL);
 +              if (sbi->s_group_info[i] == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant allocate mem for buddy\n");
++                      printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n");
 +                      goto err_out;
 +              }
 +              desc = ext3_get_group_desc(sb, i, NULL);
 +              if (desc == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant read descriptor %u\n", i);
++                      printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i);
 +                      goto err_out;
 +              }
 +              memset(sbi->s_group_info[i], 0, len);
@@ -2618,7 +2618,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      char str[32];
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_STATS_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2652,7 +2652,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2691,7 +2691,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
index bb9fc1b..0d360fa 100644 (file)
@@ -26,7 +26,7 @@ Index: linux-2.6.7/fs/ext3/namei.c
        int err;
  
 -      if (dir->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(dir))
++      if (EXT3_DIR_LINK_MAX(dir))
                return -EMLINK;
  
        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
@@ -86,7 +86,7 @@ Index: linux-2.6.7/fs/ext3/namei.c
        int err;
  
 -      if (inode->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(inode))
++      if (EXT3_DIR_LINK_MAX(inode))
                return -EMLINK;
  
        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
@@ -97,7 +97,7 @@ Index: linux-2.6.7/fs/ext3/namei.c
 -              if (!new_inode && new_dir!=old_dir &&
 -                              new_dir->i_nlink >= EXT3_LINK_MAX)
 +              if (!new_inode && new_dir != old_dir &&
-+                  EXT3_DIR_LINK_MAXED(new_dir))
++                  EXT3_DIR_LINK_MAX(new_dir))
                        goto end_rename;
        }
        if (!new_bh) {
@@ -140,24 +140,3 @@ Index: linux-2.6.7/include/linux/ext3_fs.h
  
  /*
   * Macro-instructions used to manage several block sizes
-@@ -595,14 +595,15 @@ struct ext3_dir_entry_2 {
-  */
- #ifdef CONFIG_EXT3_INDEX
--  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
--                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-+#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
-+                                          EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-                     (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
--#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
--#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-+#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
-+                                (is_dx(dir) && (dir)->i_nlink == 1))
- #else
-   #define is_dx(dir) 0
--#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
- #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
- #endif
index 62bf156..37cca81 100644 (file)
@@ -20,16 +20,16 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
  }
  
  static int ext3_add_nondir(handle_t *handle,
-@@ -1706,7 +1712,7 @@
+@@ -1706,7 +1712,7 @@ static int ext3_add_nondir(handle_t
        struct ext3_dir_entry_2 * de;
        int err, retries = 0;
  
 -      if (dir->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(dir))
++      if (EXT3_DIR_LINK_MAX(dir))
                return -EMLINK;
  
  retry:
-@@ -1729,7 +1735,7 @@
+@@ -1729,7 +1735,7 @@ static int ext3_mkdir(struct inode
        inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
        dir_block = ext3_bread (handle, inode, 0, 1, &err);
        if (!dir_block) {
@@ -38,7 +38,7 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
                ext3_mark_inode_dirty(handle, inode);
                iput (inode);
                goto out_stop;
-@@ -1761,7 +1767,7 @@
+@@ -1761,7 +1767,7 @@ static int ext3_mkdir(struct inode
                iput (inode);
                goto out_stop;
        }
@@ -47,7 +47,7 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
        ext3_update_dx_flag(dir);
        ext3_mark_inode_dirty(handle, dir);
        d_instantiate(dentry, inode);
-@@ -2026,10 +2032,10 @@
+@@ -2026,10 +2032,10 @@ static int ext3_rmdir (struct inode
        retval = ext3_delete_entry(handle, dir, de, bh);
        if (retval)
                goto end_rmdir;
@@ -62,7 +62,7 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
        inode->i_version++;
        inode->i_nlink = 0;
        /* There's no need to set i_disksize: the fact that i_nlink is
-@@ -2039,7 +2045,7 @@
+@@ -2039,7 +2045,7 @@ static int ext3_rmdir (struct inode
        ext3_orphan_add(handle, inode);
        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
        ext3_mark_inode_dirty(handle, inode);
@@ -71,7 +71,7 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
        ext3_update_dx_flag(dir);
        ext3_mark_inode_dirty(handle, dir);
  
-@@ -2090,7 +2096,7 @@
+@@ -2090,7 +2096,7 @@ static int ext3_unlink(struct inode
        dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
        ext3_update_dx_flag(dir);
        ext3_mark_inode_dirty(handle, dir);
@@ -80,27 +80,27 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
        if (!inode->i_nlink)
                ext3_orphan_add(handle, inode);
        inode->i_ctime = dir->i_ctime;
-@@ -2165,7 +2171,7 @@
+@@ -2165,7 +2171,7 @@ static int ext3_link (struct dentry
        struct inode *inode = old_dentry->d_inode;
        int err, retries = 0;
  
 -      if (inode->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(inode))
++      if (EXT3_DIR_LINK_MAX(inode))
                return -EMLINK;
  
  retry:
-@@ -2252,8 +2258,8 @@
+@@ -2252,8 +2258,8 @@ static int ext3_rename (struct inode
                if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
                        goto end_rename;
                retval = -EMLINK;
 -              if (!new_inode && new_dir!=old_dir &&
 -                              new_dir->i_nlink >= EXT3_LINK_MAX)
 +              if (!new_inode && new_dir != old_dir &&
-+                  EXT3_DIR_LINK_MAXED(new_dir))
++                  EXT3_DIR_LINK_MAX(new_dir))
                        goto end_rename;
        }
        if (!new_bh) {
-@@ -2310,7 +2316,7 @@
+@@ -2310,7 +2316,7 @@ static int ext3_rename (struct inode
        }
  
        if (new_inode) {
@@ -109,7 +109,7 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
                new_inode->i_ctime = CURRENT_TIME_SEC;
        }
        old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
-@@ -2321,11 +2327,13 @@
+@@ -2321,11 +2327,13 @@ static int ext3_rename (struct inode
                PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
                BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
                ext3_journal_dirty_metadata(handle, dir_bh);
@@ -140,24 +140,3 @@ Index: linux-2.6.7/include/linux/ext3_fs.h
  
  /*
   * Macro-instructions used to manage several block sizes
-@@ -595,14 +595,15 @@ struct ext3_dir_entry_2 {
-  */
- #ifdef CONFIG_EXT3_INDEX
--  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
--                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-+#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
-+                                          EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-                     (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
--#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
--#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-+#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
-+                                (is_dx(dir) && (dir)->i_nlink == 1))
- #else
-   #define is_dx(dir) 0
--#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
- #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
- #endif
index 05fbc7e..6fad0a6 100644 (file)
-03-16-2006  Cluster File Systems, Inc. <info@clusterfs.com>
-       * version 1.4.6.1
-       * Support for newer kernels: 2.6.9-34.EL (RHEL 4), 2.6.5-7.252 (SLES 9)
+tbd         Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.4.7
+       * bug fixes
+
+Severity   : major
+Frequency  : rare
+Bugzilla   : 5719, 9635, 9792, 9684, 
+Description: OST (or MDS) trips assertions in (re)connection under heavy load
+Details    : If a server is under heavy load and cannot reply to new
+            connection requests before the client resends the (re)connect,
+            the connection handling code can behave badly if two service
+            threads are concurrently handing separate (re)connections from
+            the same client.  Add better locking to the connection handling
+            code, and ensure that only a single connection will be processed
+            for a given client UUID, even if the lock is dropped.
+
+Severity   : enhancement
+Bugzilla   : 3627
+Description: add TCP zero-copy support to kernel
+Details    : Add support to the kernel TCP stack to allow zero-copy bulk
+            sends if the hardware supports scatter-gather and checksumming.
+            This allows socklnd to do client-write and server-read more
+            efficiently and reduce CPU utilization from skbuf copying.
+
+Severity   : minor
+Frequency  : only if NFS exporting from client
+Bugzilla   : 10258
+Description: NULL pointer deref in ll_iocontrol() if chattr mknod file
+Details    : If setting attributes on a file created under NFS that had
+            never been opened it would be possible to oops the client
+            if the file had no objects.
+
+Severity   : minor
+Frequency  : always for liblustre
+Bugzilla   : 10290
+Description: liblustre client does MDS+OSTs setattr RPC for each write
+Details    : When doing a write from a liblustre client, the client
+            incorrectly issued an RPC to the MDS and each OST the file was
+            striped over in order to update the timestamps.  When writing
+            with small chunks and many clients this could overwhelm the MDS
+            with RPCs.  In all cases it would slow down the write because
+            these RPCs are unnecessary.
+
+Severity   : enhancement
+Bugzilla   : 9340
+Description: allow number of MDS service threads to be changed at module load
+Details    : It is now possible to change the number of MDS service threads
+            running.  Adding "options mds mds_num_threads=N" will set the
+            number of threads for the next time Lustre is restarted (assuming
+            the "mds" module is also reloaded at that time).  The default
+            number of threads will stay the same, 32 for most systems.
+
+Severity   : major
+Frequency  : rare
+Bugzilla   : 10300
+Description: OST crash if filesystem is unformatted or corrupt
+Details    : If an OST is started on a device that has never been formatted
+            or if the filesystem is corrupt and cannot even mount then the
+            error handling cleanup routines would dereference a NULL pointer.
+
+Severity   : medium
+Frequency  : rare
+Bugzilla   : 10047
+Description: NULL pointer deref in llap_from_page.
+Details    : get_cache_page_nowait can return a page with NULL (or otherwise
+            incorrect) mapping if the page was truncated/reclaimed while it was
+            searched for. Check for this condition and skip such pages when
+            doing readahead. Introduce extra check to llap_from_page() to
+            verify page->mapping->host is non-NULL (so page is not anonymous).
+
+Severity   : minor
+Frequency  : Sometimes when using sys_sendfile
+Bugzilla   : 7020
+Description: "page not covered by a lock" warnings from ll_readpage
+Details    : sendfile called ll_readpage without right page locks present.
+            Now we introduced ll_file_sendfile that does necessary locking
+            around call to generic_file_sendfile() much like we do in
+            ll_file_read().
+
+Severity   : medium
+Frequency  : with certain MDS communication failures at client mount time
+Bugzilla   : 10268
+Description: NULL pointer deref after failed client mount
+Details    : a client connection request may delayed by the network layer
+            and not be sent until after the PTLRPC layer has timed out the
+            request.  If the client fails the mount immediately it will try
+            to clean up before the network times out the request.  Add a
+            reference from the request import to the obd device and delay
+            the cleanup until the network drops the request.
+
+Severity   : medium
+Frequency  : occasionally during client (re)connect
+Bugzilla   : 9387
+Description: assertion failure during client (re)connect
+Details    : processing a client connection request may be delayed by the
+            client or server longer than the client connect timeout.  This
+            causes the client to resend the connection request.  If the
+            original connection request is replied in this interval, the
+            client may trip an assertion failure in ptlrpc_connect_interpret()
+            which thought it would be the only running connect process.
+
+Severity   : medium
+Frequency  : only with obd_echo servers and clients that are rebooted
+Bugzilla   : 10140
+Description: kernel BUG accessing uninitialized data structure
+Details    : When running an obd_echo server it did not start the ping_evictor
+            thread, and when a client was evicted an uninitialized data
+            structure was accessed.  Start the ping_evictor in the RPC
+            service startup instead of the OBD startup.
+
+Severity   : enhancement
+Bugzilla   : 10393 (patchless)
+Description: Remove dependency on various unexported kernel interfaces.
+Details    : No longer need reparent_to_init, exit_mm, exit_files,
+            sock_getsockopt, filemap_populate, FMODE_EXEC, put_filp.
 
+Severity   : minor
+Frequency  : rare (only users of deprecated and unsupported LDAP config)
+Bugzilla   : 9337
+Description: write_conf for zeroconf mount queried LDAP incorrectly for client
+Details    : LDAP apparently contains 'lustreName' attributes instead of
+            'name'.  A simple remapping of the name is sufficient.
+
+------------------------------------------------------------------------------
 
 02-14-2006  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.4.6
        * WIRE PROTOCOL CHANGE.  This version of Lustre networking WILL NOT
-        INTEROPERATE with older versions automatically.  Please read the 
+        INTEROPERATE with older versions automatically.  Please read the
         user documentation before upgrading any part of a live system.
        * WARNING: Lustre networking configuration changes are required with
         this release.  See https://bugzilla.clusterfs.com/show_bug.cgi?id=10052
         for details.
        * bug fixes
-       * Support for newer kernels: 2.6.9-22.0.2.EL (RHEL 4),
-         2.6.5-7.244 (SLES 9) - same as 1.4.5.2.
+       * Support for newer kernels:
+       2.6.9-22.0.2.EL (RHEL 4),
+       2.6.5-7.244 (SLES 9) - same as 1.4.5.2.
+       2.6.12.6 vanilla (kernel.org)
 
 
 Severity   : enhancement
@@ -27,6 +149,17 @@ Details    : LNET is new networking infrastructure for Lustre, it includes
             created for this new infrastructure.
 
 Severity   : enhancement
+Description: Introduced Access control lists
+Details    : clients can set ACLs on files and directories in order to have
+            more fine-grained permissions than the standard Unix UGO+RWX.
+            The MDS must be started with the "-o acl" mount option.
+
+Severity   : enhancement
+Description: Introduced filesystem quotas
+Details    : Administrators may now establish per-user quotas on the
+            filesystem.
+
+Severity   : enhancement
 Bugzilla   : 7982
 Description: Configuration change for the XT3
             The PTLLND is now used to run Lustre over Portals on the XT3
@@ -605,7 +738,6 @@ Details    : Add sub-command 'df' on 'lfs' to report the disk space usage of
             MDS/OSDs. Usage: lfs df [-i][-h]. Command Options: '-i' to report
             usage of objects; '-h' to report in human readable format.
 
-
 ------------------------------------------------------------------------------
 
 08-26-2005  Cluster File Systems, Inc. <info@clusterfs.com>
index 4e98936..0fbbd58 100644 (file)
@@ -443,6 +443,49 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
+AC_DEFUN([LC_STRUCT_FILE_OPS_UNLOCKED_IOCTL],
+[AC_MSG_CHECKING([if struct file_operations has an unlocked_ioctl field])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/fs.h>
+],[
+        struct file_operations fops;
+        &fops.unlocked_ioctl;
+],[
+        AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_UNLOCKED_IOCTL, 1, [struct file_operations has an unlock ed_ioctl field])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
+AC_DEFUN([LC_FILEMAP_POPULATE],
+[AC_MSG_CHECKING([for exported filemap_populate])
+LB_LINUX_TRY_COMPILE([
+        #include <asm/page.h>
+        #include <linux/mm.h>
+],[
+       filemap_populate(NULL, 0, 0, __pgprot(0), 0, 0);
+],[
+        AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_FILEMAP_POPULATE, 1, [Kernel exports filemap_populate])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
+AC_DEFUN([LC_D_ADD_UNIQUE],
+[AC_MSG_CHECKING([for d_add_unique])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/dcache.h>
+],[
+       d_add_unique(NULL, NULL);
+],[
+        AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_D_ADD_UNIQUE, 1, [Kernel has d_add_unique])
+],[
+        AC_MSG_RESULT([no])
+])
+])
 
 #
 # LC_PROG_LINUX
@@ -469,6 +512,9 @@ LC_FUNC_DEV_SET_RDONLY
 LC_FUNC_FILEMAP_FDATAWRITE
 LC_STRUCT_STATFS
 LC_FUNC_PAGE_MAPPED
+LC_STRUCT_FILE_OPS_UNLOCKED_IOCTL
+LC_FILEMAP_POPULATE
+LC_D_ADD_UNIQUE
 ])
 
 #
index bc74354..85b2de6 100644 (file)
@@ -1,7 +1,7 @@
 m4_define([LUSTRE_MAJOR],[1])
 m4_define([LUSTRE_MINOR],[4])
 m4_define([LUSTRE_PATCH],[6])
-m4_define([LUSTRE_FIX],[0])
+m4_define([LUSTRE_FIX],[90])
 
 dnl # 288 stands for 0.0.1.32 , next version with fixes is ok, but next after
 dnl # next release candidate/beta would spill this warning already.
index 88be1b7..9b8d4aa 100644 (file)
@@ -100,6 +100,12 @@ typedef unsigned short umode_t;
 #define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c)
 #define LINUX_VERSION_CODE KERNEL_VERSION(2,5,0)
 
+#ifndef page_private
+#define page_private(page) ((page)->private)
+#define set_page_private(page, v) ((page)->private = (v))
+#endif
+
+
 static inline void inter_module_put(void *a)
 {
         return;
index 2eb9780..5d804c8 100644 (file)
 
 #include <libcfs/linux/portals_compat25.h>
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+#define UNLOCK_INODE_MUTEX(inode) do {mutex_unlock(&(inode)->i_mutex); } while(0)
+#define LOCK_INODE_MUTEX(inode) do {mutex_lock(&(inode)->i_mutex); } while(0)
+#define TRYLOCK_INODE_MUTEX(inode) mutex_trylock(&(inode)->i_mutex)
+#define d_child d_u.d_child
+#define d_rcu d_u.d_rcu
+#else
+#define UNLOCK_INODE_MUTEX(inode) do {up(&(inode)->i_sem); } while(0)
+#define LOCK_INODE_MUTEX(inode) do {down(&(inode)->i_sem); } while(0)
+#define TRYLOCK_INODE_MUTEX(inode) (!down_trylock(&(inode)->i_sem))
+#endif
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
 #define NGROUPS_SMALL           NGROUPS
 #define NGROUPS_PER_BLOCK       ((int)(EXEC_PAGESIZE / sizeof(gid_t)))
@@ -54,6 +66,15 @@ void groups_free(struct group_info *ginfo);
 
 #endif
 
+#ifndef page_private
+#define page_private(page) ((page)->private)
+#define set_page_private(page, v) ((page)->private = (v))
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15)
+#define gfp_t int
+#endif
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
 
 #define lock_dentry(___dentry)          spin_lock(&(___dentry)->d_lock)
@@ -103,17 +124,6 @@ void groups_free(struct group_info *ginfo);
 
 #include <linux/writeback.h>
 
-static inline void lustre_daemonize_helper(void)
-{
-        LASSERT(current->signal != NULL);
-        current->signal->session = 1;
-        if (current->group_leader)
-                current->group_leader->signal->pgrp = 1;
-        else
-                CERROR("we aren't group leader\n");
-        current->signal->tty = NULL;
-}
-
 static inline int cleanup_group_info(void)
 {
         struct group_info *ginfo;
@@ -132,12 +142,12 @@ static inline int cleanup_group_info(void)
         do {       \
                 page_cache_get(page); \
                 SetPagePrivate(page); \
-                page->private = (unsigned long)llap; \
+                set_page_private(page, (unsigned long)llap); \
         } while (0)
 #define __clear_page_ll_data(page) \
         do {       \
                 ClearPagePrivate(page); \
-                page->private = 0; \
+                set_page_private(page, 0); \
                 page_cache_release(page); \
         } while(0)
 
@@ -248,15 +258,7 @@ static inline void ll_redirty_page(struct page *page)
 
 static inline void __d_drop(struct dentry *dentry)
 {
-        list_del(&dentry->d_hash);
-        INIT_LIST_HEAD(&dentry->d_hash);
-}
-
-static inline void lustre_daemonize_helper(void)
-{
-        current->session = 1;
-        current->pgrp = 1;
-        current->tty = NULL;
+        list_del_init(&dentry->d_hash);
 }
 
 static inline int cleanup_group_info(void)
@@ -282,8 +284,8 @@ static inline void cond_resched(void)
 #define PDE(ii)         ((ii)->u.generic_ip)
 #endif
 
-#define __set_page_ll_data(page, llap) page->private = (unsigned long)llap
-#define __clear_page_ll_data(page) page->private = 0
+#define __set_page_ll_data(page, llap) set_page_private(page, (unsigned long)llap)
+#define __clear_page_ll_data(page) set_page_private(page, 0)
 #define PageWriteback(page) 0
 #define set_page_writeback(page) do {} while (0)
 #define end_page_writeback(page) do {} while (0)
@@ -314,13 +316,20 @@ static inline int page_mapped(struct page *page)
 }
 #endif /* !HAVE_PAGE_MAPPED */
 
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16))
+static inline void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
+{
+        update_atime(dentry->d_inode);
+}
+#endif
+
 static inline void file_accessed(struct file *file)
 {
 #ifdef O_NOATIME
         if (file->f_flags & O_NOATIME)
                 return;
 #endif
-        update_atime(file->f_dentry->d_inode);
+        touch_atime(file->f_vfsmnt, file->f_dentry);
 }
 
 #endif /* end of 2.4 compat macros */
index a554739..7d76d8d 100644 (file)
@@ -46,11 +46,11 @@ do { if (offset > ASSERT_MAX_SIZE_MB << 20) {                           \
 #define LL_CDEBUG_PAGE(mask, page, fmt, arg...)                               \
         CDEBUG(mask, "page %p map %p index %lu flags %lx count %u priv %0lx: "\
                fmt, page, page->mapping, page->index, (long)page->flags,      \
-               page_count(page), page->private, ## arg)
+               page_count(page), page_private(page), ## arg)
 #else
 #define LL_CDEBUG_PAGE(mask, page, fmt, arg...)                               \
         CDEBUG(mask, "page %p index %lu priv %0lx: "\
-               fmt, page, page->index, page->private, ## arg)
+               fmt, page, page->index, page_private(page), ## arg)
 #endif
 
 /* lib/debug.c */
index 9d632eb..85bb433 100644 (file)
@@ -266,7 +266,7 @@ struct ldlm_resource {
         struct list_head       lr_converting;
         struct list_head       lr_waiting;
         ldlm_mode_t            lr_most_restr;
-        ldlm_type_t            lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK,LLOG} */
+        ldlm_type_t            lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK} */
         struct ldlm_resource  *lr_root;
         struct ldlm_res_id     lr_name;
         atomic_t               lr_refcount;
index a58641a..820426b 100644 (file)
@@ -72,6 +72,7 @@ struct obd_export {
         int                       exp_flags;
         unsigned int              exp_failed:1,
                                   exp_disconnected:1,
+                                  exp_connecting:1,
                                   exp_replay_needed:1,
                                   exp_libclient:1; /* liblustre client? */
         union {
@@ -81,7 +82,6 @@ struct obd_export {
         } u;
 };
 
-#define exp_mgs_data    u.eu_mgs_data
 #define exp_mds_data    u.eu_mds_data
 #define exp_lov_data    u.eu_lov_data
 #define exp_filter_data u.eu_filter_data
index 41109f1..bc831e5 100644 (file)
@@ -45,7 +45,8 @@ struct fsfilt_operations {
         struct list_head fs_list;
         struct module *fs_owner;
         char   *fs_type;
-        char   *(* fs_label)(struct super_block *sb);
+        char   *(* fs_getlabel)(struct super_block *sb);
+        int     (* fs_setlabel)(struct super_block *sb, char *label);
         char   *(* fs_uuid)(struct super_block *sb);
         void   *(* fs_start)(struct inode *inode, int op, void *desc_private,
                              int logs);
@@ -109,14 +110,23 @@ extern void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops);
 extern struct fsfilt_operations *fsfilt_get_ops(const char *type);
 extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
 
-static inline char *fsfilt_label(struct obd_device *obd, struct super_block *sb)
+static inline char *fsfilt_get_label(struct obd_device *obd,
+                                     struct super_block *sb)
 {
-        if (obd->obd_fsops->fs_label == NULL)
+        if (obd->obd_fsops->fs_getlabel == NULL)
                 return NULL;
-        if (obd->obd_fsops->fs_label(sb)[0] == '\0')
+        if (obd->obd_fsops->fs_getlabel(sb)[0] == '\0')
                 return NULL;
 
-        return obd->obd_fsops->fs_label(sb);
+        return obd->obd_fsops->fs_getlabel(sb);
+}
+
+static inline int fsfilt_set_label(struct obd_device *obd,
+                                   struct super_block *sb, char *label)
+{
+        if (obd->obd_fsops->fs_setlabel == NULL)
+                return -ENOSYS;
+        return (obd->obd_fsops->fs_setlabel(sb, label));
 }
 
 static inline __u8 *fsfilt_uuid(struct obd_device *obd, struct super_block *sb)
@@ -144,6 +154,8 @@ static inline __u8 *fsfilt_uuid(struct obd_device *obd, struct super_block *sb)
 do {                                                                    \
         if (time_before(jiffies, start + 15 * HZ))                      \
                 break;                                                  \
+        else if (time_before(jiffies, start + 30 * HZ))                 \
+                CDEBUG(D_VFSTRACE,"slow %s %lus\n", msg,(jiffies-start)/HZ);\
         else if (time_before(jiffies, start + timeout / 2 * HZ))        \
                 CWARN("slow %s %lus\n", msg, (jiffies - start) / HZ);   \
         else                                                            \
index 5083b94..8377728 100644 (file)
@@ -22,6 +22,6 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active);
 void ptlrpc_activate_import(struct obd_import *imp);
 void ptlrpc_deactivate_import(struct obd_import *imp);
 void ptlrpc_invalidate_import(struct obd_import *imp);
-void ptlrpc_fail_import(struct obd_import *imp, int generation);
+void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt);
 
 #endif
index 7fb4d8a..5b7774a 100644 (file)
@@ -223,7 +223,6 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
 #define MSG_CONNECT_ASYNC       0x40
 
 /* Connect flags */
-#define OBD_CONNECT_RDONLY       0x1ULL
 #define OBD_CONNECT_RDONLY       0x1ULL /* client allowed read-only access */
 #define OBD_CONNECT_INDEX        0x2ULL /* connect to specific LOV idx */
 #define OBD_CONNECT_GRANT        0x8ULL /* OSC acquires grant at connect */
@@ -388,7 +387,6 @@ struct lov_mds_md_v1 {            /* LOV EA mds/wire data (little-endian) */
         struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
 };
 
-
 #define OBD_MD_FLID        (0x00000001ULL) /* object ID */
 #define OBD_MD_FLATIME     (0x00000002ULL) /* access time */
 #define OBD_MD_FLMTIME     (0x00000004ULL) /* data modification time */
@@ -847,9 +845,7 @@ extern void lustre_swab_mdt_rec_setattr (struct mdt_rec_setattr *sa);
 #define FMODE_READ               00000001
 #define FMODE_WRITE              00000002
 #endif
-#ifndef FMODE_EXEC
-#define FMODE_EXEC               00000004
-#endif
+#define MDS_FMODE_EXEC           00000004
 #define MDS_OPEN_CREAT           00000100
 #define MDS_OPEN_EXCL            00000200
 #define MDS_OPEN_TRUNC           00001000
index e460ae3..6b87e84 100644 (file)
@@ -73,7 +73,6 @@ struct obd_import {
         __u32                     imp_conn_cnt;
         __u64                     imp_max_transno;
         __u64                     imp_peer_committed_transno;
-        struct obd_uuid           imp_target_uuid; /* XXX -> lustre_name */
         struct lustre_handle      imp_remote_handle;
         unsigned long             imp_next_ping;   /* jiffies */
 
@@ -85,14 +84,22 @@ struct obd_import {
         spinlock_t                imp_lock;
 
         /* flags */
-        unsigned int              imp_invalid:1, imp_replayable:1,
-                                  imp_dlm_fake:1, imp_server_timeout:1,
-                                  imp_initial_recov:1, imp_initial_recov_bk:1,
-                                  imp_force_verify:1, imp_pingable:1,
-                                  imp_resend_replay:1, imp_deactive:1;
+        unsigned int             
+                imp_invalid:1,          /* evicted */
+                imp_replayable:1,       /* try to recover the import */
+                imp_dlm_fake:1,         /* don't run recovery (timeout instead) */
+                imp_server_timeout:1,   /* use 1/2 timeout on MDS' OSCs */
+                imp_initial_recov:1,    /* retry the initial connection */  
+                imp_initial_recov_bk:1, /* turn off init_recov after trying all failover nids */
+                imp_force_verify:1,     /* force an immidiate ping */
+                imp_pingable:1,         /* pingable */
+                imp_resend_replay:1,    /* resend for replay */
+                imp_deactive:1;         /* administratively disabled */
         __u32                     imp_connect_op;
         struct obd_connect_data   imp_connect_data;
         __u64                     imp_connect_flags_orig;
+
+        struct ptlrpc_request_pool *imp_rq_pool; /* emergency request pool */
 };
 
 typedef void (*obd_import_callback)(struct obd_import *imp, void *closure,
index 389f1f8..578a7c0 100644 (file)
@@ -171,7 +171,7 @@ static inline int ll_ocd_update(struct obd_device *host,
                 spin_unlock(&lco->lco_lock);
                 result = 0;
         } else {
-                CERROR("unexpected notification of %s %s!\n",
+                CERROR("unexpected notification from %s %s!\n",
                        watched->obd_type->typ_name,
                        watched->obd_name);
                 result = -EINVAL;
index 9160999..67d1914 100644 (file)
@@ -84,7 +84,7 @@
  */
 
 #define LDLM_NUM_THREADS min((int)(smp_num_cpus * smp_num_cpus * 8), 64)
-#define LDLM_NBUFS       64
+#define LDLM_NBUFS      (64 * smp_num_cpus)
 #define LDLM_BUFSIZE    (8 * 1024)
 #define LDLM_MAXREQSIZE (5 * 1024)
 #define LDLM_MAXREPSIZE (1024)
@@ -93,6 +93,7 @@
 #define MDT_MAX_THREADS 32UL
 #define MDT_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \
                                   num_physpages >> (25 - PAGE_SHIFT)), 2UL)
+
 #define MDS_NBUFS       (64 * smp_num_cpus)
 #define MDS_BUFSIZE     (8 * 1024)
 /* Assume file name length = FNAME_MAX = 256 (true for ext3).
 #define MDS_MAXREPSIZE  max(9 * 1024, 280 + LOV_MAX_STRIPE_COUNT * 56)
 
 /* FIXME fix all constants here */
-#define MGS_MAX_THREADS 32UL
-#define MGS_NUM_THREADS max(min_t(unsigned long, num_physpages / 8192, \
-                                  MGS_MAX_THREADS), 2UL)
+#define MGS_MAX_THREADS 8UL
+#define MGS_NUM_THREADS max(2UL, min_t(unsigned long, MGS_MAX_THREADS, \
+                            num_physpages * smp_num_cpus >> (26 - PAGE_SHIFT)))
+                                  
 #define MGS_NBUFS       (64 * smp_num_cpus)
 #define MGS_BUFSIZE     (8 * 1024)
 #define MGS_MAXREQSIZE  (5 * 1024)
@@ -407,7 +409,7 @@ CDEB_TYPE(level, "@@@ " fmt                                                    \
        REQ_FLAGS_FMT"/%x/%x rc %d/%d\n" , ## args, req, req->rq_xid,           \
        req->rq_transno,                                                        \
        req->rq_reqmsg ? req->rq_reqmsg->opc : -1,                              \
-       req->rq_import ? (char *)req->rq_import->imp_target_uuid.uuid : "<?>",  \
+       req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : "<?>",  \
        req->rq_import ?                                                        \
           (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : "<?>",  \
        (req->rq_import && req->rq_import->imp_client) ?                        \
@@ -731,7 +733,7 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc,
                         char *name, int id);
 int ptlrpc_unregister_service(struct ptlrpc_service *service);
 int liblustre_check_services (void *arg);
-void ptlrpc_daemonize(void);
+void ptlrpc_daemonize(char *name);
 int ptlrpc_service_health_check(struct ptlrpc_service *);
 
 
@@ -798,6 +800,13 @@ int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid);
 /* ptlrpc/pinger.c */
 int ptlrpc_pinger_add_import(struct obd_import *imp);
 int ptlrpc_pinger_del_import(struct obd_import *imp);
+#ifdef __KERNEL__
+void ping_evictor_start(void);
+void ping_evictor_stop(void);
+#else
+#define ping_evictor_start()    do {} while (0)
+#define ping_evictor_stop()     do {} while (0)
+#endif
 
 /* ptlrpc/ptlrpcd.c */
 void ptlrpcd_wake(struct ptlrpc_request *req);
index 4d0250c..c90a78f 100644 (file)
@@ -118,9 +118,9 @@ static inline struct dentry *ll_lookup_one_len(const char *fid_name,
 {
         struct dentry *dchild;
 
-        down(&dparent->d_inode->i_sem);
+        LOCK_INODE_MUTEX(dparent->d_inode);
         dchild = lookup_one_len(fid_name, dparent, fid_namelen);
-        up(&dparent->d_inode->i_sem);
+        UNLOCK_INODE_MUTEX(dparent->d_inode);
 
         if (IS_ERR(dchild) || dchild->d_inode == NULL)
                 return dchild;
index bdcc9c8..52d64e2 100644 (file)
@@ -102,7 +102,7 @@ struct lov_stripe_md {
         struct {
                 /* Public members. */
                 __u64 lw_object_id;        /* lov object id */
-                __u64 lw_object_gr;        /* lov object id */
+                __u64 lw_object_gr;        /* lov object group */
                 __u64 lw_maxbytes;         /* maximum possible file size */
                 unsigned long lw_xfersize; /* optimal transfer size */
 
@@ -159,7 +159,7 @@ struct brw_page {
 enum async_flags {
         ASYNC_READY = 0x1, /* ap_make_ready will not be called before this
                               page is added to an rpc */
-        ASYNC_URGENT = 0x2,
+        ASYNC_URGENT = 0x2, /* page must be put into an RPC before return */
         ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called
                                      to give the caller a chance to update
                                      or cancel the size of the io */
@@ -305,8 +305,10 @@ struct filter_obd {
 #define OSC_MAX_DIRTY_MB_MAX   2048     /* totally arbitrary */
 
 struct mdc_rpc_lock;
+struct obd_import;
 struct client_obd {
-        struct obd_import       *cl_import;
+        struct obd_uuid          cl_target_uuid;
+        struct obd_import       *cl_import; /* ptlrpc connection state */
         struct semaphore         cl_sem;
         int                      cl_conn_count;
         /* max_mds_easize is purely a performance thing so we don't have to
@@ -366,8 +368,8 @@ struct client_obd {
 
         /* used by quotacheck */
         int                      cl_qchk_stat; /* quotacheck stat of the peer */
-        struct ptlrpc_request_pool *cl_rq_pool; /* emergency pool of requests */
 };
+#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
 
 #define CL_NOT_QUOTACHECKED 1   /* client->cl_qchk_stat init value */
 
@@ -377,9 +379,8 @@ struct mgs_obd {
         struct super_block              *mgs_sb;
         struct dentry                   *mgs_configs_dir;
         struct dentry                   *mgs_fid_de;
-        spinlock_t                       mgs_fs_db_lock; /* add/remove db's */
         struct list_head                 mgs_fs_db_list;
-        struct semaphore                 mgs_log_sem;    /* unused */
+        struct semaphore                 mgs_sem;
 };
 
 struct mds_obd {
@@ -701,17 +702,19 @@ struct obd_device {
 #define OBD_LLOG_FL_SENDNOW     0x0001
 
 
+enum obd_cleanup_stage {
 /* Special case hack for MDS LOVs */
-#define OBD_CLEANUP_EARLY       0
+        OBD_CLEANUP_EARLY,
 /* Precleanup stage 1, we must make sure all exports (other than the
    self-export) get destroyed. */
-#define OBD_CLEANUP_EXPORTS     1
+        OBD_CLEANUP_EXPORTS,
 /* Precleanup stage 2,  do other type-specific cleanup requiring the
    self-export. */
-#define OBD_CLEANUP_SELF_EXP    2
+        OBD_CLEANUP_SELF_EXP,
 /* FIXME we should eliminate the "precleanup" function and make them stages
    of the "cleanup" function. */
-#define OBD_CLEANUP_OBD         3
+        OBD_CLEANUP_OBD,
+};
 
 struct obd_ops {
         struct module *o_owner;
@@ -724,7 +727,8 @@ struct obd_ops {
         int (*o_attach)(struct obd_device *dev, obd_count len, void *data);
         int (*o_detach)(struct obd_device *dev);
         int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg);
-        int (*o_precleanup)(struct obd_device *dev, int cleanup_stage);
+        int (*o_precleanup)(struct obd_device *dev,
+                            enum obd_cleanup_stage cleanup_stage);
         int (*o_cleanup)(struct obd_device *dev);
         int (*o_process_config)(struct obd_device *dev, obd_count len,
                                 void *data);
index af9a055..c2d5095 100644 (file)
@@ -84,27 +84,19 @@ void oig_complete_one(struct obd_io_group *oig,
                       struct oig_callback_context *occ, int rc);
 void oig_release(struct obd_io_group *oig);
 int oig_wait(struct obd_io_group *oig);
-/* ping evictor */
-#ifdef __KERNEL__
-void ping_evictor_start(void);
-void ping_evictor_stop(void);
-#else
-#define ping_evictor_start()    do {} while (0)
-#define ping_evictor_stop()     do {} while (0)
-#endif
-
 
 char *obd_export_nid2str(struct obd_export *exp);
 
 int obd_export_evict_by_nid(struct obd_device *obd, const char *nid);
 int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid);
 
-/* config.c */
+/* obd_config.c */
 int class_process_config(struct lustre_cfg *lcfg);
 int class_attach(struct lustre_cfg *lcfg);
 int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
 int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg);
 int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg);
+struct obd_device *class_incref(struct obd_device *obd);
 void class_decref(struct obd_device *obd);
 
 #define CFG_F_START     0x01   /* Set when we start updating from a log */
@@ -167,13 +159,13 @@ do {                                                                           \
         __class_export_put(exp);                                               \
 } while (0)
 void __class_export_put(struct obd_export *);
-struct obd_export *class_new_export(struct obd_device *obddev);
+struct obd_export *class_new_export(struct obd_device *obddev,
+                                    struct obd_uuid *cluuid);
 void class_unlink_export(struct obd_export *exp);
-void class_update_export_timer(struct obd_export *exp, time_t extra_delay);
 
 struct obd_import *class_import_get(struct obd_import *);
 void class_import_put(struct obd_import *);
-struct obd_import *class_new_import(void);
+struct obd_import *class_new_import(struct obd_device *obd);
 void class_destroy_import(struct obd_import *exp);
 
 struct obd_type *class_search_type(const char *name);
@@ -333,7 +325,8 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
         RETURN(rc);
 }
 
-static inline int obd_precleanup(struct obd_device *obd, int cleanup_stage)
+static inline int obd_precleanup(struct obd_device *obd,
+                                 enum obd_cleanup_stage cleanup_stage)
 {
         int rc;
         ENTRY;
@@ -592,7 +585,7 @@ static inline int obd_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
         RETURN(rc);
 }
 
-static inline int obd_connect(struct lustre_handle *conn, struct obd_device *obd,
+static inline int obd_connect(struct lustre_handle *conn,struct obd_device *obd,
                               struct obd_uuid *cluuid,
                               struct obd_connect_data *d)
 {
@@ -1255,7 +1248,6 @@ static inline void obdo_free(struct obdo *oa)
  * <shaver> // XXX do not look into _superhack with remaining eye
  * <shaver> // XXX if this were any uglier, I'd get my own show on MTV */
 extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
-extern void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp);
 
 /* sysctl.c */
 extern void obd_sysctl_init (void);
index 54a7bdb..55ab3a4 100644 (file)
@@ -160,6 +160,7 @@ extern wait_queue_head_t obd_race_waitq;
 #define OBD_FAIL_PTLRPC_BULK_GET_NET     0x503
 #define OBD_FAIL_PTLRPC_BULK_PUT_NET     0x504
 #define OBD_FAIL_PTLRPC_DROP_RPC         0x505
+#define OBD_FAIL_PTLRPC_DELAY_SEND       0x506
 
 #define OBD_FAIL_OBD_PING_NET            0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
@@ -169,6 +170,9 @@ extern wait_queue_head_t obd_race_waitq;
 
 #define OBD_FAIL_TGT_REPLY_NET           0x700
 #define OBD_FAIL_TGT_CONN_RACE           0x701
+#define OBD_FAIL_TGT_FORCE_RECONNECT     0x702
+#define OBD_FAIL_TGT_DELAY_CONNECT       0x703
+#define OBD_FAIL_TGT_DELAY_RECONNECT     0x704
 
 #define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
 
index 2f1c6df..8df4aea 100644 (file)
@@ -45,6 +45,8 @@
 #define EXT3_IOC_SETVERSION_OLD         _IOW('v', 2, long)
 #endif
 
+struct obd_statfs;
+
 #define LL_IOC_GETFLAGS                 _IOR ('f', 151, long)
 #define LL_IOC_SETFLAGS                 _IOW ('f', 152, long)
 #define LL_IOC_CLRFLAGS                 _IOW ('f', 153, long)
index 5295a33..9b1c043 100644 (file)
@@ -26,9 +26,9 @@ CONFIG_KMOD=y
 #
 # Processor type and features
 #
-CONFIG_MK8=y
+# CONFIG_MK8
 # CONFIG_IA32E is not set
-# CONFIG_GENERIC_CPU is not set
+CONFIG_GENERIC_CPU=y
 CONFIG_X86_L1_CACHE_BYTES=64
 CONFIG_X86_L1_CACHE_SHIFT=6
 CONFIG_X86_TSC=y
index 527d397..ea03f03 100644 (file)
@@ -26,9 +26,9 @@ CONFIG_KMOD=y
 #
 # Processor type and features
 #
-CONFIG_MK8=y
+# CONFIG_MK8 is not set
 # CONFIG_IA32E is not set
-# CONFIG_GENERIC_CPU is not set
+CONFIG_GENERIC_CPU=y
 CONFIG_X86_L1_CACHE_BYTES=64
 CONFIG_X86_L1_CACHE_SHIFT=6
 CONFIG_X86_TSC=y
index 8629266..e7eb927 100644 (file)
@@ -2365,7 +2365,7 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_SPINLOCK=y
 CONFIG_DEBUG_SPINLOCK_SLEEP=y
 CONFIG_DEBUG_HIGHMEM=y
-CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_INFO is not set
 # CONFIG_FRAME_POINTER is not set
 CONFIG_EARLY_PRINTK=y
 CONFIG_DEBUG_STACKOVERFLOW=y
index 173c099..46499d4 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet
-# Wed Mar 15 17:33:05 2006
+# Thu Oct 27 17:05:00 2005
 #
 
 #
@@ -84,7 +84,6 @@ CONFIG_FORCE_MAX_ZONEORDER=18
 CONFIG_SMP=y
 CONFIG_NR_CPUS=64
 # CONFIG_HOTPLUG_CPU is not set
-CONFIG_SCHED_SMT=y
 # CONFIG_PREEMPT is not set
 CONFIG_HAVE_DEC_LOCK=y
 # CONFIG_IA32_SUPPORT is not set
@@ -99,7 +98,6 @@ CONFIG_PTRACK=y
 #
 CONFIG_EFI_VARS=y
 CONFIG_EFI_PCDP=y
-CONFIG_DELL_RBU=m
 CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=y
 
@@ -309,8 +307,6 @@ CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SAS_CLASS=m
-# CONFIG_SAS_DEBUG is not set
 
 #
 # SCSI low-level drivers
@@ -325,7 +321,6 @@ CONFIG_AIC7XXX_RESET_DELAY_MS=15000
 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set
 CONFIG_AIC7XXX_DEBUG_MASK=0
 # CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
-# CONFIG_SCSI_AIC94XX is not set
 CONFIG_SCSI_AIC7XXX_OLD=m
 CONFIG_SCSI_AIC79XX=m
 CONFIG_AIC79XX_CMDS_PER_DEVICE=4
@@ -337,7 +332,6 @@ CONFIG_AIC79XX_DEBUG_MASK=0
 CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -414,14 +408,10 @@ CONFIG_DM_MULTIPATH_EMC=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=m
-CONFIG_FUSION_FC=m
-CONFIG_FUSION_SAS=m
+CONFIG_FUSION=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
-CONFIG_FUSION_OLD_MODULE_COMPAT=m
 
 #
 # IEEE 1394 (FireWire) support
@@ -840,11 +830,9 @@ CONFIG_NS83820=m
 # CONFIG_YELLOWFIN is not set
 CONFIG_R8169=m
 CONFIG_R8169_NAPI=y
-CONFIG_SKY2=m
 CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
-CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1082,12 +1070,6 @@ CONFIG_ISDN_CAPI_CAPIDRV=m
 # Active AVM cards
 #
 CONFIG_CAPI_AVM=y
-CONFIG_ISDN_DRV_AVMB1_B1PCI=m
-CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
-CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
-CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
-CONFIG_ISDN_DRV_AVMB1_T1PCI=m
-CONFIG_ISDN_DRV_AVMB1_C4=m
 
 #
 # Active Eicon DIVA Server cards
@@ -1172,8 +1154,6 @@ CONFIG_SERIAL_NONSTANDARD=y
 CONFIG_N_HDLC=m
 CONFIG_STALDRV=y
 CONFIG_SGI_SNSC=y
-CONFIG_SGI_TIOCX=y
-CONFIG_SGI_MBCS=m
 
 #
 # Serial drivers
@@ -1195,7 +1175,6 @@ CONFIG_SERIAL_8250_RSA=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_SERIAL_SGI_L1_CONSOLE=y
-# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_CRASH is not set
@@ -1263,7 +1242,6 @@ CONFIG_DRM_MGA=m
 CONFIG_RAW_DRIVER=y
 # CONFIG_HPET is not set
 CONFIG_MAX_RAW_DEVS=8192
-CONFIG_HANGCHECK_TIMER=m
 # CONFIG_MMTIMER is not set
 
 #
@@ -1720,38 +1698,6 @@ CONFIG_USB_SPEEDTOUCH=m
 # CONFIG_USB_GADGET is not set
 
 #
-# InfiniBand support
-#
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_MTHCA=m
-# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
-CONFIG_INFINIBAND_IPOIB=m
-# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
-CONFIG_INFINIBAND_SDP=m
-# CONFIG_INFINIBAND_SDP_DEBUG is not set
-CONFIG_INFINIBAND_SRP=m
-
-#
-# EDAC - error detection and reporting (RAS)
-#
-CONFIG_EDAC=m
-
-#
-# Reporting subsystems
-#
-# CONFIG_EDAC_DEBUG is not set
-CONFIG_EDAC_MM_EDAC=m
-CONFIG_EDAC_AMD76X=m
-CONFIG_EDAC_E7XXX=m
-CONFIG_EDAC_E752X=m
-CONFIG_EDAC_I82875P=m
-CONFIG_EDAC_I82860=m
-CONFIG_EDAC_R82600=m
-CONFIG_EDAC_POLL=y
-
-#
 # File systems
 #
 CONFIG_EXT2_FS=y
@@ -1837,20 +1783,15 @@ CONFIG_VXFS_FS=m
 #
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V2_ACL=y
 CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
-CONFIG_NFS_ACL_SUPPORT=m
-CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=m
 CONFIG_SUNRPC_GSS=m
 CONFIG_RPCSEC_GSS_KRB5=m
index 87e1d04..92aa946 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet
-# Wed Mar 15 17:35:26 2006
+# Thu Oct 27 17:04:10 2005
 #
 
 #
@@ -84,7 +84,6 @@ CONFIG_FORCE_MAX_ZONEORDER=18
 CONFIG_SMP=y
 CONFIG_NR_CPUS=64
 # CONFIG_HOTPLUG_CPU is not set
-CONFIG_SCHED_SMT=y
 # CONFIG_PREEMPT is not set
 CONFIG_HAVE_DEC_LOCK=y
 # CONFIG_IA32_SUPPORT is not set
@@ -99,7 +98,6 @@ CONFIG_PTRACK=y
 #
 CONFIG_EFI_VARS=y
 CONFIG_EFI_PCDP=y
-CONFIG_DELL_RBU=m
 CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=y
 
@@ -309,8 +307,6 @@ CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SAS_CLASS=m
-# CONFIG_SAS_DEBUG is not set
 
 #
 # SCSI low-level drivers
@@ -325,7 +321,6 @@ CONFIG_AIC7XXX_RESET_DELAY_MS=15000
 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set
 CONFIG_AIC7XXX_DEBUG_MASK=0
 # CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
-# CONFIG_SCSI_AIC94XX is not set
 CONFIG_SCSI_AIC7XXX_OLD=m
 CONFIG_SCSI_AIC79XX=m
 CONFIG_AIC79XX_CMDS_PER_DEVICE=4
@@ -337,7 +332,6 @@ CONFIG_AIC79XX_DEBUG_MASK=0
 CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -414,14 +408,10 @@ CONFIG_DM_MULTIPATH_EMC=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=m
-CONFIG_FUSION_FC=m
-CONFIG_FUSION_SAS=m
+CONFIG_FUSION=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
-CONFIG_FUSION_OLD_MODULE_COMPAT=m
 
 #
 # IEEE 1394 (FireWire) support
@@ -840,11 +830,9 @@ CONFIG_NS83820=m
 # CONFIG_YELLOWFIN is not set
 CONFIG_R8169=m
 CONFIG_R8169_NAPI=y
-CONFIG_SKY2=m
 CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
-CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1082,12 +1070,6 @@ CONFIG_ISDN_CAPI_CAPIDRV=m
 # Active AVM cards
 #
 CONFIG_CAPI_AVM=y
-CONFIG_ISDN_DRV_AVMB1_B1PCI=m
-CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
-CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
-CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
-CONFIG_ISDN_DRV_AVMB1_T1PCI=m
-CONFIG_ISDN_DRV_AVMB1_C4=m
 
 #
 # Active Eicon DIVA Server cards
@@ -1172,8 +1154,6 @@ CONFIG_SERIAL_NONSTANDARD=y
 CONFIG_N_HDLC=m
 CONFIG_STALDRV=y
 CONFIG_SGI_SNSC=y
-CONFIG_SGI_TIOCX=y
-CONFIG_SGI_MBCS=m
 
 #
 # Serial drivers
@@ -1195,7 +1175,6 @@ CONFIG_SERIAL_8250_RSA=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_SERIAL_SGI_L1_CONSOLE=y
-# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_CRASH is not set
@@ -1263,7 +1242,6 @@ CONFIG_DRM_MGA=m
 CONFIG_RAW_DRIVER=y
 # CONFIG_HPET is not set
 CONFIG_MAX_RAW_DEVS=8192
-CONFIG_HANGCHECK_TIMER=m
 # CONFIG_MMTIMER is not set
 
 #
@@ -1720,38 +1698,6 @@ CONFIG_USB_SPEEDTOUCH=m
 # CONFIG_USB_GADGET is not set
 
 #
-# InfiniBand support
-#
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_MTHCA=m
-# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
-CONFIG_INFINIBAND_IPOIB=m
-# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
-CONFIG_INFINIBAND_SDP=m
-# CONFIG_INFINIBAND_SDP_DEBUG is not set
-CONFIG_INFINIBAND_SRP=m
-
-#
-# EDAC - error detection and reporting (RAS)
-#
-CONFIG_EDAC=m
-
-#
-# Reporting subsystems
-#
-# CONFIG_EDAC_DEBUG is not set
-CONFIG_EDAC_MM_EDAC=m
-CONFIG_EDAC_AMD76X=m
-CONFIG_EDAC_E7XXX=m
-CONFIG_EDAC_E752X=m
-CONFIG_EDAC_I82875P=m
-CONFIG_EDAC_I82860=m
-CONFIG_EDAC_R82600=m
-CONFIG_EDAC_POLL=y
-
-#
 # File systems
 #
 CONFIG_EXT2_FS=y
@@ -1837,20 +1783,15 @@ CONFIG_VXFS_FS=m
 #
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V2_ACL=y
 CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
-CONFIG_NFS_ACL_SUPPORT=m
-CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=m
 CONFIG_SUNRPC_GSS=m
 CONFIG_RPCSEC_GSS_KRB5=m
index a210500..aa67bfe 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet
-# Wed Mar 15 17:39:44 2006
+# Thu Oct 27 17:06:20 2005
 #
 CONFIG_X86_64=y
 CONFIG_64BIT=y
@@ -1884,34 +1884,12 @@ CONFIG_USB_SPEEDTOUCH=m
 #
 # InfiniBand support
 #
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_MTHCA=m
-# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
-CONFIG_INFINIBAND_IPOIB=m
-# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
-CONFIG_INFINIBAND_SDP=m
-# CONFIG_INFINIBAND_SDP_DEBUG is not set
-CONFIG_INFINIBAND_SRP=m
+# CONFIG_INFINIBAND is not set
 
 #
 # EDAC - error detection and reporting (RAS)
 #
-CONFIG_EDAC=m
-
-#
-# Reporting subsystems
-#
-# CONFIG_EDAC_DEBUG is not set
-CONFIG_EDAC_MM_EDAC=m
-CONFIG_EDAC_AMD76X=m
-CONFIG_EDAC_E7XXX=m
-CONFIG_EDAC_E752X=m
-CONFIG_EDAC_I82875P=m
-CONFIG_EDAC_I82860=m
-CONFIG_EDAC_R82600=m
-CONFIG_EDAC_POLL=y
+# CONFIG_EDAC is not set
 
 #
 # Firmware Drivers
@@ -2013,19 +1991,15 @@ CONFIG_VXFS_FS=m
 #
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V2_ACL=y
 CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
-CONFIG_NFS_ACL_SUPPORT=m
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=m
 CONFIG_SUNRPC_GSS=m
index ebe65e8..8a1b02f 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet
-# Wed Mar 15 17:38:17 2006
+# Thu Oct 27 17:05:31 2005
 #
 CONFIG_X86_64=y
 CONFIG_64BIT=y
@@ -438,8 +438,6 @@ CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SAS_CLASS=m
-# CONFIG_SAS_DEBUG is not set
 
 #
 # SCSI low-level drivers
@@ -454,7 +452,6 @@ CONFIG_AIC7XXX_RESET_DELAY_MS=15000
 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set
 CONFIG_AIC7XXX_DEBUG_MASK=0
 # CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
-# CONFIG_SCSI_AIC94XX is not set
 CONFIG_SCSI_AIC7XXX_OLD=m
 CONFIG_SCSI_AIC79XX=m
 CONFIG_AIC79XX_CMDS_PER_DEVICE=4
@@ -466,7 +463,6 @@ CONFIG_AIC79XX_DEBUG_MASK=0
 CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -543,14 +539,10 @@ CONFIG_DM_MULTIPATH_EMC=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=m
-CONFIG_FUSION_FC=m
-CONFIG_FUSION_SAS=m
+CONFIG_FUSION=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
-CONFIG_FUSION_OLD_MODULE_COMPAT=m
 
 #
 # IEEE 1394 (FireWire) support
@@ -569,7 +561,7 @@ CONFIG_IEEE1394_CONFIG_ROM_IP1394=y
 # Device Drivers
 #
 CONFIG_IEEE1394_PCILYNX=m
-CONFIG_IEEE1394_OHCI1394=m
+CONFIG_IEEE1394_OHCI1394=y
 
 #
 # Protocol Drivers
@@ -973,11 +965,9 @@ CONFIG_NS83820=m
 # CONFIG_YELLOWFIN is not set
 CONFIG_R8169=m
 CONFIG_R8169_NAPI=y
-CONFIG_SKY2=m
 CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
-CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1223,12 +1213,6 @@ CONFIG_ISDN_CAPI_CAPIDRV=m
 # Active AVM cards
 #
 CONFIG_CAPI_AVM=y
-CONFIG_ISDN_DRV_AVMB1_B1PCI=m
-CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
-CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
-CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
-CONFIG_ISDN_DRV_AVMB1_T1PCI=m
-CONFIG_ISDN_DRV_AVMB1_C4=m
 
 #
 # Active Eicon DIVA Server cards
@@ -1334,7 +1318,6 @@ CONFIG_SERIAL_8250_RSA=y
 #
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
-# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_CRASH=m
@@ -1882,42 +1865,9 @@ CONFIG_USB_SPEEDTOUCH=m
 # CONFIG_USB_GADGET is not set
 
 #
-# InfiniBand support
-#
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_MTHCA=m
-# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
-CONFIG_INFINIBAND_IPOIB=m
-# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
-CONFIG_INFINIBAND_SDP=m
-# CONFIG_INFINIBAND_SDP_DEBUG is not set
-CONFIG_INFINIBAND_SRP=m
-
-#
-# EDAC - error detection and reporting (RAS)
-#
-CONFIG_EDAC=m
-
-#
-# Reporting subsystems
-#
-# CONFIG_EDAC_DEBUG is not set
-CONFIG_EDAC_MM_EDAC=m
-CONFIG_EDAC_AMD76X=m
-CONFIG_EDAC_E7XXX=m
-CONFIG_EDAC_E752X=m
-CONFIG_EDAC_I82875P=m
-CONFIG_EDAC_I82860=m
-CONFIG_EDAC_R82600=m
-CONFIG_EDAC_POLL=y
-
-#
 # Firmware Drivers
 #
 CONFIG_EDD=m
-CONFIG_DELL_RBU=m
 
 #
 # File systems
@@ -2013,20 +1963,15 @@ CONFIG_VXFS_FS=m
 #
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V2_ACL=y
 CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
-CONFIG_NFS_ACL_SUPPORT=m
-CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=m
 CONFIG_SUNRPC_GSS=m
 CONFIG_RPCSEC_GSS_KRB5=m
diff --git a/lustre/kernel_patches/patches/export-filemap_populate.patch b/lustre/kernel_patches/patches/export-filemap_populate.patch
deleted file mode 100644 (file)
index 8f78a79..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-Index: linux-2.6.7/mm/filemap.c
-===================================================================
---- linux-2.6.7.orig/mm/filemap.c      2004-11-15 12:02:35.000000000 +0800
-+++ linux-2.6.7/mm/filemap.c   2004-11-15 12:04:38.000000000 +0800
-@@ -1409,6 +1409,7 @@
-       return 0;
- }
-+EXPORT_SYMBOL_GPL(filemap_populate);
- static struct vm_operations_struct generic_file_vm_ops = {
-       .nopage         = filemap_nopage,
-Index: linux-2.6.7/include/linux/mm.h
-===================================================================
---- linux-2.6.7.orig/include/linux/mm.h        2004-11-15 12:02:43.000000000 +0800
-+++ linux-2.6.7/include/linux/mm.h     2004-11-15 12:04:23.000000000 +0800
-@@ -661,6 +661,8 @@
- /* generic vm_area_ops exported for stackable file systems */
- struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *);
-+int filemap_populate(struct vm_area_struct *, unsigned long, unsigned long,
-+                   pgprot_t, unsigned long, int);
- /* mm/page-writeback.c */
- int write_one_page(struct page *page, int wait);
index a2b07f8..0561e65 100644 (file)
@@ -42,18 +42,6 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/ext2_fs_sb.h
  /*
   * second extended-fs super-block data in memory
   */
-Index: linux-2.6.9-5.0.3.EL/net/core/sock.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/net/core/sock.c  2005-02-26 13:24:35.490810168 +0200
-+++ linux-2.6.9-5.0.3.EL/net/core/sock.c       2005-02-26 13:53:13.801587224 +0200
-@@ -602,6 +602,7 @@
-               return -EFAULT;
-       return 0;
- }
-+EXPORT_SYMBOL(sock_getsockopt);
- static kmem_cache_t *sk_cachep;
 Index: linux-2.6.9-5.0.3.EL/fs/namespace.c
 ===================================================================
 --- linux-2.6.9-5.0.3.EL.orig/fs/namespace.c   2005-02-26 13:47:31.282658016 +0200
@@ -79,23 +67,6 @@ Index: linux-2.6.9-5.0.3.EL/kernel/exit.c
  void __set_special_pids(pid_t session, pid_t pgrp)
  {
        struct task_struct *curr = current;
-@@ -428,6 +430,8 @@
-       __exit_files(tsk);
- }
-+EXPORT_SYMBOL(exit_files);
-+
- static inline void __put_fs_struct(struct fs_struct *fs)
- {
-       /* No need to hold fs->lock if we are killing it */
-@@ -516,6 +516,7 @@
- {
-       __exit_mm(tsk);
- }
-+EXPORT_SYMBOL(exit_mm);
- static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
- {
 Index: linux-2.6.9-5.0.3.EL/fs/dcache.c
 ===================================================================
 --- linux-2.6.9-5.0.3.EL.orig/fs/dcache.c      2005-02-26 13:49:04.365507272 +0200
@@ -108,50 +79,3 @@ Index: linux-2.6.9-5.0.3.EL/fs/dcache.c
  
  void d_genocide(struct dentry *root)
  {
-Index: linux-2.6.9-5.0.3.EL/mm/filemap.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/mm/filemap.c     2005-02-26 13:24:35.502808344 +0200
-+++ linux-2.6.9-5.0.3.EL/mm/filemap.c  2005-02-26 13:53:59.787596288 +0200
-@@ -1473,7 +1473,7 @@
-       return NULL;
- }
--static int filemap_populate(struct vm_area_struct *vma,
-+int filemap_populate(struct vm_area_struct *vma,
-                       unsigned long addr,
-                       unsigned long len,
-                       pgprot_t prot,
-@@ -1520,6 +1520,7 @@
-       return 0;
- }
-+EXPORT_SYMBOL_GPL(filemap_populate);
- struct vm_operations_struct generic_file_vm_ops = {
-       .nopage         = filemap_nopage,
-Index: linux-2.6.9-5.0.3.EL/fs/file_table.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/fs/file_table.c  2005-02-26 13:24:35.512806824 +0200
-+++ linux-2.6.9-5.0.3.EL/fs/file_table.c       2005-02-26 13:53:13.811585704 +0200
-@@ -196,6 +196,7 @@
-               file_free(file);
-       }
- }
-+EXPORT_SYMBOL(put_filp);
- void file_move(struct file *file, struct list_head *list)
- {
-Index: linux-2.6.9-5.0.3.EL/include/linux/mm.h
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/include/linux/mm.h       2005-02-26 13:49:05.823285656 +0200
-+++ linux-2.6.9-5.0.3.EL/include/linux/mm.h    2005-02-26 13:53:54.181448552 +0200
-@@ -721,6 +721,9 @@
- /* generic vm_area_ops exported for stackable file systems */
- struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *);
-+int filemap_populate(struct vm_area_struct *vma, unsigned long addr,
-+                        unsigned long len, pgprot_t prot, unsigned long pgoff,
-+                        int nonblock);
- /* mm/page-writeback.c */
- int write_one_page(struct page *page, int wait);
index fbaf63d..8360ce4 100644 (file)
@@ -55,12 +55,3 @@ Index: linux-2.6.5-12.1/kernel/exit.c
  void __set_special_pids(pid_t session, pid_t pgrp)
  {
        struct task_struct *curr = current;
-@@ -429,6 +431,8 @@
-       __exit_files(tsk);
- }
-+EXPORT_SYMBOL(exit_files);
-+
- static inline void __put_fs_struct(struct fs_struct *fs)
- {
-       /* No need to hold fs->lock if we are killing it */
index c08e30f..e21fcf4 100644 (file)
@@ -25,18 +25,6 @@ Index: linux-2.6.12-rc6/include/linux/fs.h
  #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
  
  extern int vfs_readlink(struct dentry *, char __user *, int, const char *);
-Index: linux-2.6.12-rc6/net/core/sock.c
-===================================================================
---- linux-2.6.12-rc6.orig/net/core/sock.c      2005-06-06 17:22:29.000000000 +0200
-+++ linux-2.6.12-rc6/net/core/sock.c   2005-06-14 15:53:58.349304101 +0200
-@@ -613,6 +613,7 @@
-               return -EFAULT;
-       return 0;
- }
-+EXPORT_SYMBOL(sock_getsockopt);
- /**
-  *    sk_alloc - All socket objects are allocated here
 Index: linux-2.6.12-rc6/fs/namespace.c
 ===================================================================
 --- linux-2.6.12-rc6.orig/fs/namespace.c       2005-06-14 15:53:17.868835847 +0200
@@ -62,23 +50,6 @@ Index: linux-2.6.12.5/kernel/exit.c
  void __set_special_pids(pid_t session, pid_t pgrp)
  {
        struct task_struct *curr = current;
-@@ -432,6 +434,8 @@
-       __exit_files(tsk);
- }
-+EXPORT_SYMBOL(exit_files);
-+
- static inline void __put_fs_struct(struct fs_struct *fs)
- {
-       /* No need to hold fs->lock if we are killing it */
-@@ -515,6 +515,7 @@
-       task_unlock(tsk);
-       mmput(mm);
- }
-+EXPORT_SYMBOL(exit_mm);
- static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
- {
 Index: linux-2.6.12-rc6/fs/dcache.c
 ===================================================================
 --- linux-2.6.12-rc6.orig/fs/dcache.c  2005-06-14 15:53:19.812195198 +0200
@@ -91,15 +62,3 @@ Index: linux-2.6.12-rc6/fs/dcache.c
  
  void d_genocide(struct dentry *root)
  {
-Index: linux-2.6.12-rc6/fs/file_table.c
-===================================================================
---- linux-2.6.12-rc6.orig/fs/file_table.c      2005-06-06 17:22:29.000000000 +0200
-+++ linux-2.6.12-rc6/fs/file_table.c   2005-06-14 15:53:58.396179101 +0200
-@@ -197,6 +197,7 @@
-               file_free(file);
-       }
- }
-+EXPORT_SYMBOL(put_filp);
- void file_move(struct file *file, struct list_head *list)
- {
diff --git a/lustre/kernel_patches/patches/ext3-htree-path-ops.patch b/lustre/kernel_patches/patches/ext3-htree-path-ops.patch
new file mode 100644 (file)
index 0000000..9a2edbd
--- /dev/null
@@ -0,0 +1,894 @@
+Index: iam-src/fs/ext3/namei.c
+===================================================================
+--- iam-src.orig/fs/ext3/namei.c       2006-02-12 16:43:57.000000000 +0300
++++ iam-src/fs/ext3/namei.c    2006-02-12 23:22:12.000000000 +0300
+@@ -83,22 +83,21 @@ static struct buffer_head *ext3_append(h
+ #define dxtrace(command)
+ #endif
+-struct fake_dirent
+-{
++struct fake_dirent {
+       __le32 inode;
+       __le16 rec_len;
+       u8 name_len;
+       u8 file_type;
+ };
+-struct dx_countlimit
+-{
++struct dx_countlimit {
+       __le16 limit;
+       __le16 count;
+ };
+-struct dx_entry
+-{
++struct dx_entry; /* incomplete type */
++
++struct dx_entry_compat {
+       __le32 hash;
+       __le32 block;
+ };
+@@ -109,8 +108,7 @@ struct dx_entry
+  * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
+  */
+-struct dx_root
+-{
++struct dx_root {
+       struct fake_dirent dot;
+       char dot_name[4];
+       struct fake_dirent dotdot;
+@@ -124,13 +122,13 @@ struct dx_root
+               u8 unused_flags;
+       }
+       info;
+-      struct dx_entry entries[0];
++      struct {} entries[0];
+ };
+ struct dx_node
+ {
+       struct fake_dirent fake;
+-      struct dx_entry entries[0];
++      struct {} entries[0];
+ };
+@@ -147,38 +145,76 @@ struct dx_map_entry
+       u32 offs;
+ };
++struct dx_path;
++struct dx_param {
++      size_t       dpo_key_size;
++      size_t       dpo_ptr_size;
++      size_t       dpo_node_gap;
++      size_t       dpo_root_gap;
++
++      u32 (*dpo_root_ptr)(struct dx_path *path);
++      int (*dpo_node_check)(struct dx_path *path,
++                            struct dx_frame *frame, void *cookie);
++      int (*dpo_node_init)(struct dx_path *path,
++                           struct buffer_head *bh, int root);
++};
++
+ /*
+  * Structure to keep track of a path drilled through htree.
+  */
+ struct dx_path {
+-      struct inode    *dp_object;
+-      struct dx_frame  dp_frames[DX_MAX_TREE_HEIGHT];
+-      struct dx_frame *dp_frame;
++      struct inode         *dp_object;
++      struct dx_param      *dp_param;
++      int                   dp_indirect;
++      struct dx_frame       dp_frames[DX_MAX_TREE_HEIGHT];
++      struct dx_frame      *dp_frame;
++      void                 *dp_key_target;
++      void                 *dp_key;
+ };
++static u32 htree_root_ptr(struct dx_path *p);
++static int htree_node_check(struct dx_path *path,
++                          struct dx_frame *frame, void *cookie);
++static int htree_node_init(struct dx_path *path,
++                         struct buffer_head *bh, int root);
++
++static struct dx_param htree_compat_param = {
++      .dpo_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
++      .dpo_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
++      .dpo_node_gap = offsetof(struct dx_node, entries),
++      .dpo_root_gap = offsetof(struct dx_root, entries),
++
++      .dpo_root_ptr   = htree_root_ptr,
++      .dpo_node_check = htree_node_check,
++      .dpo_node_init  = htree_node_init
++};
++
++
+ #ifdef CONFIG_EXT3_INDEX
+-static inline unsigned dx_get_block (struct dx_entry *entry);
+-static void dx_set_block (struct dx_entry *entry, unsigned value);
+-static inline unsigned dx_get_hash (struct dx_entry *entry);
+-static void dx_set_hash (struct dx_entry *entry, unsigned value);
+-static unsigned dx_get_count (struct dx_entry *entries);
+-static unsigned dx_get_limit (struct dx_entry *entries);
+-static void dx_set_count (struct dx_entry *entries, unsigned value);
+-static void dx_set_limit (struct dx_entry *entries, unsigned value);
+-static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
+-static unsigned dx_node_limit (struct inode *dir);
+-static struct dx_frame *dx_probe(struct dentry *dentry,
+-                               struct inode *dir,
+-                               struct dx_hash_info *hinfo,
+-                               struct dx_path *path,
+-                               int *err);
++static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry);
++static void dx_set_block(struct dx_path *p,
++                       struct dx_entry *entry, unsigned value);
++static inline void *dx_get_key(struct dx_path *p,
++                             struct dx_entry *entry, void *key);
++static void dx_set_key(struct dx_path *p, struct dx_entry *entry, void *key);
++static unsigned dx_get_count(struct dx_entry *entries);
++static unsigned dx_get_limit(struct dx_entry *entries);
++static void dx_set_count(struct dx_entry *entries, unsigned value);
++static void dx_set_limit(struct dx_entry *entries, unsigned value);
++static unsigned dx_root_limit(struct dx_path *p);
++static unsigned dx_node_limit(struct dx_path *p);
++static int dx_probe(struct dentry *dentry,
++                  struct inode *dir,
++                  struct dx_hash_info *hinfo,
++                  struct dx_path *path);
+ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+                       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
+ static void dx_sort_map(struct dx_map_entry *map, unsigned count);
+ static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+               struct dx_map_entry *offsets, int count);
+ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+-static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
++static void dx_insert_block (struct dx_path *path,
++                           struct dx_frame *frame, u32 hash, u32 block);
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+                                struct dx_path *path, __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+@@ -186,29 +222,65 @@ static struct buffer_head * ext3_dx_find
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+                            struct inode *inode);
++static inline void dx_path_init(struct dx_path *path, struct inode *inode);
++static inline void dx_path_fini(struct dx_path *path);
++
++
+ /*
+  * Future: use high four bits of block for coalesce-on-delete flags
+  * Mask them off for now.
+  */
+-static inline unsigned dx_get_block (struct dx_entry *entry)
++static inline void *entry_off(struct dx_entry *entry, ptrdiff_t off)
++{
++      return (void *)((char *)entry + off);
++}
++
++static inline size_t dx_entry_size(struct dx_path *p)
+ {
+-      return le32_to_cpu(entry->block) & 0x00ffffff;
++      return p->dp_param->dpo_key_size + p->dp_param->dpo_ptr_size;
+ }
+-static inline void dx_set_block (struct dx_entry *entry, unsigned value)
++static inline struct dx_entry *dx_entry_shift(struct dx_path *p,
++                                            struct dx_entry *entry, int shift)
+ {
+-      entry->block = cpu_to_le32(value);
++      void *e = entry;
++      return e + shift * dx_entry_size(p);
+ }
+-static inline unsigned dx_get_hash (struct dx_entry *entry)
++static inline ptrdiff_t dx_entry_diff(struct dx_path *p,
++                                    struct dx_entry *e1, struct dx_entry *e2)
+ {
+-      return le32_to_cpu(entry->hash);
++      ptrdiff_t diff;
++
++      diff = (void *)e1 - (void *)e2;
++      assert(diff / dx_entry_size(p) * dx_entry_size(p) == diff);
++      return diff / dx_entry_size(p);
++}
++
++static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry)
++{
++      return le32_to_cpu(*(u32 *)entry_off(entry, p->dp_param->dpo_key_size))
++              & 0x00ffffff;
+ }
+-static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
++static inline void dx_set_block(struct dx_path *p,
++                              struct dx_entry *entry, unsigned value)
+ {
+-      entry->hash = cpu_to_le32(value);
++      *(u32*)entry_off(entry, p->dp_param->dpo_key_size) = cpu_to_le32(value);
++}
++
++static inline void *dx_get_key(struct dx_path *p,
++                             struct dx_entry *entry, void *key)
++{
++      memcpy(key, entry, p->dp_param->dpo_key_size);
++      return key;
++}
++
++static inline void dx_set_key(struct dx_path *p,
++                            struct dx_entry *entry, void *key)
++{
++      memcpy(entry, key, p->dp_param->dpo_key_size);
+ }
+ static inline unsigned dx_get_count (struct dx_entry *entries)
+@@ -231,17 +303,123 @@ static inline void dx_set_limit (struct 
+       ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
+ }
+-static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
++static inline unsigned dx_root_limit(struct dx_path *p)
+ {
+-      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
+-              EXT3_DIR_REC_LEN(2) - infosize;
+-      return 0? 20: entry_space / sizeof(struct dx_entry);
++      struct dx_param *param = p->dp_param;
++      unsigned entry_space   = p->dp_object->i_sb->s_blocksize -
++              param->dpo_root_gap;
++      return entry_space / (param->dpo_key_size + param->dpo_ptr_size);
++}
++
++static inline unsigned dx_node_limit(struct dx_path *p)
++{
++      struct dx_param *param = p->dp_param;
++      unsigned entry_space   = p->dp_object->i_sb->s_blocksize -
++              param->dpo_node_gap;
++      return entry_space / (param->dpo_key_size + param->dpo_ptr_size);
++}
++
++static inline int dx_index_is_compat(struct dx_path *path)
++{
++      return path->dp_param == &htree_compat_param;
++}
++
++static struct dx_entry *dx_get_entries(struct dx_path *path, void *data,
++                                     int root)
++{
++      return data +
++              (root ?
++               path->dp_param->dpo_root_gap : path->dp_param->dpo_node_gap);
++}
++
++static struct dx_entry *dx_node_get_entries(struct dx_path *path,
++                                          struct dx_frame *frame)
++{
++      return dx_get_entries(path,
++                            frame->bh->b_data, frame == path->dp_frames);
++}
++
++static u32 htree_root_ptr(struct dx_path *path)
++{
++      return 0;
++}
++
++struct htree_cookie {
++      struct dx_hash_info *hinfo;
++      struct dentry       *dentry;
++};
++
++static int htree_node_check(struct dx_path *path, struct dx_frame *frame,
++                          void *cookie)
++{
++      void *data;
++      struct dx_entry *entries;
++      struct super_block *sb;
++
++      data = frame->bh->b_data;
++      entries = dx_node_get_entries(path, frame);
++      sb = path->dp_object->i_sb;
++      if (frame == path->dp_frames) {
++              /* root node */
++              struct dx_root *root;
++              struct htree_cookie *hc = cookie;
++
++              root = data;
++              if (root->info.hash_version != DX_HASH_TEA &&
++                  root->info.hash_version != DX_HASH_HALF_MD4 &&
++                  root->info.hash_version != DX_HASH_R5 &&
++                  root->info.hash_version != DX_HASH_LEGACY) {
++                      ext3_warning(sb, __FUNCTION__,
++                                   "Unrecognised inode hash code %d",
++                                   root->info.hash_version);
++                      return ERR_BAD_DX_DIR;
++              }
++
++              if (root->info.unused_flags & 1) {
++                      ext3_warning(sb, __FUNCTION__,
++                                   "Unimplemented inode hash flags: %#06x",
++                                   root->info.unused_flags);
++                      return ERR_BAD_DX_DIR;
++              }
++
++              path->dp_indirect = root->info.indirect_levels;
++              if (path->dp_indirect > DX_MAX_TREE_HEIGHT - 1) {
++                      ext3_warning(sb, __FUNCTION__,
++                                   "Unimplemented inode hash depth: %#06x",
++                                   root->info.indirect_levels);
++                      return ERR_BAD_DX_DIR;
++              }
++
++              assert((char *)entries == (((char *)&root->info) +
++                                         root->info.info_length));
++              assert(dx_get_limit(entries) == dx_root_limit(path));
++
++              hc->hinfo->hash_version = root->info.hash_version;
++              hc->hinfo->seed = EXT3_SB(sb)->s_hash_seed;
++              if (hc->dentry)
++                      ext3fs_dirhash(hc->dentry->d_name.name,
++                                     hc->dentry->d_name.len, hc->hinfo);
++              path->dp_key_target = &hc->hinfo->hash;
++      } else {
++              /* non-root index */
++              assert(entries == data + path->dp_param->dpo_node_gap);
++              assert(dx_get_limit(entries) == dx_node_limit(path));
++      }
++      frame->entries = frame->at = entries;
++      return 0;
+ }
+-static inline unsigned dx_node_limit (struct inode *dir)
++static int htree_node_init(struct dx_path *path,
++                         struct buffer_head *bh, int root)
+ {
+-      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+-      return 0? 22: entry_space / sizeof(struct dx_entry);
++      struct dx_node *node;
++
++      assert(!root);
++
++      node = (void *)bh->b_data;
++      node->fake.rec_len = cpu_to_le16(path->dp_object->i_sb->s_blocksize);
++      node->fake.inode = 0;
++      return 0;
+ }
+ /*
+@@ -327,123 +505,101 @@ struct stats dx_show_entries(struct dx_h
+ }
+ #endif /* DX_DEBUG */
+-/*
+- * Probe for a directory leaf block to search.
+- *
+- * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
+- * error in the directory index, and the caller should fall back to
+- * searching the directory normally.  The callers of dx_probe **MUST**
+- * check for this error code, and make sure it never gets reflected
+- * back to userspace.
+- */
+-static struct dx_frame *
+-dx_probe(struct dentry *dentry, struct inode *dir,
+-       struct dx_hash_info *hinfo, struct dx_path *path, int *err)
+-{
+-      unsigned count, indirect;
+-      struct dx_entry *at, *entries, *p, *q, *m;
+-      struct dx_root *root;
+-      struct buffer_head *bh;
+-      struct dx_frame *frame = path->dp_frames;
+-      u32 hash;
++static int dx_lookup(struct dx_path *path, void *cookie)
++{
++      u32 ptr;
++      int err;
++      int i;
+-      frame->bh = NULL;
+-      if (dentry)
+-              dir = dentry->d_parent->d_inode;
+-      if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
+-              goto fail;
+-      root = (struct dx_root *) bh->b_data;
+-      if (root->info.hash_version != DX_HASH_TEA &&
+-          root->info.hash_version != DX_HASH_HALF_MD4 &&
+-          root->info.hash_version != DX_HASH_R5 &&
+-          root->info.hash_version != DX_HASH_LEGACY) {
+-              ext3_warning(dir->i_sb, __FUNCTION__,
+-                           "Unrecognised inode hash code %d", root->info.hash_version);
+-              brelse(bh);
+-              *err = ERR_BAD_DX_DIR;
+-              goto fail;
+-      }
+-      hinfo->hash_version = root->info.hash_version;
+-      hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+-      if (dentry)
+-              ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
+-      hash = hinfo->hash;
+-
+-      if (root->info.unused_flags & 1) {
+-              ext3_warning(dir->i_sb, __FUNCTION__,
+-                           "Unimplemented inode hash flags: %#06x",
+-                           root->info.unused_flags);
+-              brelse(bh);
+-              *err = ERR_BAD_DX_DIR;
+-              goto fail;
+-      }
++      struct dx_param *param;
++      struct dx_frame *frame;
+-      if ((indirect = root->info.indirect_levels) > DX_MAX_TREE_HEIGHT - 1) {
+-              ext3_warning(dir->i_sb, __FUNCTION__,
+-                           "Unimplemented inode hash depth: %#06x",
+-                           root->info.indirect_levels);
+-              brelse(bh);
+-              *err = ERR_BAD_DX_DIR;
+-              goto fail;
+-      }
++      param = path->dp_param;
+-      entries = (struct dx_entry *) (((char *)&root->info) +
+-                                     root->info.info_length);
+-      assert(dx_get_limit(entries) == dx_root_limit(dir,
+-                                                    root->info.info_length));
+-      dxtrace (printk("Look up %x", hash));
+-      while (1)
+-      {
++      for (frame = path->dp_frames, i = 0,
++           ptr = param->dpo_root_ptr(path); i <= path->dp_indirect;
++           ptr = dx_get_block(path, frame->at), ++frame, ++i) {
++              struct dx_entry *entries;
++              struct dx_entry *p;
++              struct dx_entry *q;
++              struct dx_entry *m;
++              unsigned count;
++
++              frame->bh = ext3_bread(NULL, path->dp_object, ptr, 0, &err);
++              if (frame->bh == NULL) {
++                      err = -EIO;
++                      break;
++              }
++              err = param->dpo_node_check(path, frame, cookie);
++              if (err != 0)
++                      break;
++
++              entries = frame->entries;
+               count = dx_get_count(entries);
+-              assert (count && count <= dx_get_limit(entries));
+-              p = entries + 1;
+-              q = entries + count - 1;
+-              while (p <= q)
+-              {
+-                      m = p + (q - p)/2;
++              assert(count && count <= dx_get_limit(entries));
++              p = dx_entry_shift(path, entries, 1);
++              q = dx_entry_shift(path, entries, count - 1);
++              while (p <= q) {
++                      m = dx_entry_shift(path,
++                                         p, dx_entry_diff(path, q, p) / 2);
+                       dxtrace(printk("."));
+-                      if (dx_get_hash(m) > hash)
+-                              q = m - 1;
++                      if (memcmp(dx_get_key(path, m, path->dp_key),
++                                 path->dp_key_target,
++                                 param->dpo_key_size) > 0)
++                              q = dx_entry_shift(path, m, -1);
+                       else
+-                              p = m + 1;
++                              p = dx_entry_shift(path, m, +1);
+               }
+-              if (0) // linear search cross check
+-              {
++              frame->at = dx_entry_shift(path, p, -1);
++              if (1) { // linear search cross check
+                       unsigned n = count - 1;
++                      struct dx_entry *at;
++
+                       at = entries;
+-                      while (n--)
+-                      {
++                      while (n--) {
+                               dxtrace(printk(","));
+-                              if (dx_get_hash(++at) > hash)
+-                              {
+-                                      at--;
++                              at = dx_entry_shift(path, at, +1);
++                              if (memcmp(dx_get_key(path, at, path->dp_key),
++                                         path->dp_key_target,
++                                         param->dpo_key_size) > 0) {
++                                      at = dx_entry_shift(path, at, -1);
+                                       break;
+                               }
+                       }
+-                      assert (at == p - 1);
++                      assert(at == frame->at);
+               }
+-
+-              at = p - 1;
+-              dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
+-              frame->bh = bh;
+-              frame->entries = entries;
+-              frame->at = at;
+-              if (!indirect--)
+-                      return path->dp_frame = frame;
+-              if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
+-                      goto fail2;
+-              at = entries = ((struct dx_node *) bh->b_data)->entries;
+-              assert (dx_get_limit(entries) == dx_node_limit (dir));
+-              frame++;
+-      }
+-fail2:
+-      while (frame >= path->dp_frames) {
+-              brelse(frame->bh);
+-              frame--;
+       }
+-fail:
+-      return NULL;
++      if (err != 0)
++              dx_path_fini(path);
++      path->dp_frame = --frame;
++      return err;
++}
++
++/*
++ * Probe for a directory leaf block to search.
++ *
++ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
++ * error in the directory index, and the caller should fall back to
++ * searching the directory normally.  The callers of dx_probe **MUST**
++ * check for this error code, and make sure it never gets reflected
++ * back to userspace.
++ */
++static int dx_probe(struct dentry *dentry, struct inode *dir,
++                  struct dx_hash_info *hinfo, struct dx_path *path)
++{
++      int err;
++      __u32 hash_storage;
++      struct htree_cookie hc = {
++              .dentry = dentry,
++              .hinfo  = hinfo
++      };
++
++      assert(dx_index_is_compat(path));
++      path->dp_key = &hash_storage;
++      err = dx_lookup(path, &hc);
++      assert(err != 0 || path->dp_frames[path->dp_indirect].bh != NULL);
++      return err;
+ }
+ static inline void dx_path_init(struct dx_path *path, struct inode *inode)
+@@ -458,8 +614,10 @@ static inline void dx_path_fini(struct d
+       int i;
+       for (i = 0; i < ARRAY_SIZE(path->dp_frames); i--) {
+-              if (path->dp_frames[i].bh != NULL)
++              if (path->dp_frames[i].bh != NULL) {
+                       brelse(path->dp_frames[i].bh);
++                      path->dp_frames[i].bh = NULL;
++              }
+       }
+ }
+@@ -488,6 +646,8 @@ static int ext3_htree_next_block(struct 
+       int err, num_frames = 0;
+       __u32 bhash;
++      assert(dx_index_is_compat(path));
++
+       p = path->dp_frame;
+       /*
+        * Find the next leaf page by incrementing the frame pointer.
+@@ -497,7 +657,9 @@ static int ext3_htree_next_block(struct 
+        * nodes need to be read.
+        */
+       while (1) {
+-              if (++(p->at) < p->entries + dx_get_count(p->entries))
++              p->at = dx_entry_shift(path, p->at, +1);
++              if (p->at < dx_entry_shift(path, p->entries,
++                                         dx_get_count(p->entries)))
+                       break;
+               if (p == path->dp_frames)
+                       return 0;
+@@ -512,7 +674,7 @@ static int ext3_htree_next_block(struct 
+        * desired contiuation hash.  If it doesn't, return since
+        * there's no point to read in the successive index pages.
+        */
+-      bhash = dx_get_hash(p->at);
++      dx_get_key(path, p->at, &bhash);
+       if (start_hash)
+               *start_hash = bhash;
+       if ((hash & 1) == 0) {
+@@ -524,12 +686,13 @@ static int ext3_htree_next_block(struct 
+        * block so no check is necessary
+        */
+       while (num_frames--) {
+-              if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), 0, &err)))
++              if (!(bh = ext3_bread(NULL, dir,
++                                    dx_get_block(path, p->at), 0, &err)))
+                       return err; /* Failure */
+               ++p;
+               brelse (p->bh);
+               p->bh = bh;
+-              p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
++              p->at = p->entries = dx_node_get_entries(path, p);
+       }
+       return 1;
+ }
+@@ -609,6 +772,7 @@ int ext3_htree_fill_tree(struct file *di
+                      start_minor_hash));
+       dir = dir_file->f_dentry->d_inode;
+       dx_path_init(&path, dir);
++      path.dp_param = &htree_compat_param;
+       if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
+               hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+               hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+@@ -619,7 +783,8 @@ int ext3_htree_fill_tree(struct file *di
+       }
+       hinfo.hash = start_hash;
+       hinfo.minor_hash = 0;
+-      if (!dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, &path, &err))
++      err = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, &path);
++      if (err != 0)
+               return err;
+       /* Add '.' and '..' from the htree header */
+@@ -634,7 +799,7 @@ int ext3_htree_fill_tree(struct file *di
+       }
+       while (1) {
+-              block = dx_get_block(path.dp_frame->at);
++              block = dx_get_block(&path, path.dp_frame->at);
+               ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
+                                            start_hash, start_minor_hash);
+               if (ret < 0) {
+@@ -722,17 +887,19 @@ static void dx_sort_map (struct dx_map_e
+       } while(more);
+ }
+-static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
++static void dx_insert_block(struct dx_path *path,
++                          struct dx_frame *frame, u32 hash, u32 block)
+ {
+       struct dx_entry *entries = frame->entries;
+-      struct dx_entry *old = frame->at, *new = old + 1;
++      struct dx_entry *old = frame->at, *new = dx_entry_shift(path, old, +1);
+       int count = dx_get_count(entries);
+       assert(count < dx_get_limit(entries));
+-      assert(old < entries + count);
+-      memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
+-      dx_set_hash(new, hash);
+-      dx_set_block(new, block);
++      assert(old < dx_entry_shift(path, entries, count));
++      memmove(dx_entry_shift(path, new, 1), new,
++              (char *)dx_entry_shift(path, entries, count) - (char *)new);
++      dx_set_key(path, new, &hash);
++      dx_set_block(path, new, block);
+       dx_set_count(entries, count + 1);
+ }
+ #endif
+@@ -934,7 +1101,9 @@ static struct buffer_head * ext3_dx_find
+       struct dx_hash_info     hinfo;
+       u32 hash;
+       struct dx_path path;
+-      struct dx_entry dummy_dot;
++      struct dx_entry_compat dummy_dot = {
++              .block = 0
++      };
+       struct ext3_dir_entry_2 *de, *top;
+       struct buffer_head *bh;
+       unsigned long block;
+@@ -944,19 +1113,21 @@ static struct buffer_head * ext3_dx_find
+       struct inode *dir = dentry->d_parent->d_inode;
+       dx_path_init(&path, dir);
++      path.dp_param = &htree_compat_param;
++      
+       sb = dir->i_sb;
+       /* NFS may look up ".." - look at dx_root directory block */
+       if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
+-              if (!(dx_probe(dentry, NULL, &hinfo, &path, err)))
++              *err = dx_probe(dentry, NULL, &hinfo, &path);
++              if (*err != 0)
+                       return NULL;
+       } else {
+-              path.dp_frame->bh = NULL;                       /* for dx_path_fini() */
+-              path.dp_frame->at = &dummy_dot;         /* hack for zero entry*/
+-              dx_set_block(path.dp_frame->at, 0);     /* dx_root block is 0 */
++              path.dp_frame->bh = NULL;               /* for dx_path_fini() */
++              path.dp_frame->at = (void *)&dummy_dot; /* hack for zero entry*/
+       }
+       hash = hinfo.hash;
+       do {
+-              block = dx_get_block(path.dp_frame->at);
++              block = dx_get_block(&path, path.dp_frame->at);
+               if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+                       goto errout;
+               de = (struct ext3_dir_entry_2 *) bh->b_data;
+@@ -1115,10 +1286,11 @@ static struct ext3_dir_entry_2* dx_pack_
+ /* Allocate new node, and split leaf node @bh into it, inserting new pointer
+  * into parent node identified by @frame */
+-static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
++static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct dx_path *path,
+                       struct buffer_head **bh,struct dx_frame *frame,
+                       struct dx_hash_info *hinfo, int *error)
+ {
++      struct inode *dir = path->dp_object;
+       unsigned blocksize = dir->i_sb->s_blocksize;
+       unsigned count, continued;
+       struct buffer_head *bh2;
+@@ -1180,7 +1352,7 @@ static struct ext3_dir_entry_2 *do_split
+               swap(*bh, bh2);
+               de = de2;
+       }
+-      dx_insert_block (frame, hash2 + continued, newblock);
++      dx_insert_block(path, frame, hash2 + continued, newblock);
+       err = ext3_journal_dirty_metadata (handle, bh2);
+       if (err)
+               goto journal_error;
+@@ -1315,6 +1487,7 @@ static int make_indexed_dir(handle_t *ha
+       struct fake_dirent *fde;
+       dx_path_init(&path, dir);
++      path.dp_param = &htree_compat_param;
+       blocksize =  dir->i_sb->s_blocksize;
+       dxtrace(printk("Creating index\n"));
+       retval = ext3_journal_get_write_access(handle, bh);
+@@ -1350,10 +1523,10 @@ static int make_indexed_dir(handle_t *ha
+       root->info.info_length = sizeof(root->info);
+       root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+       root->info.hash_version = DX_HASH_R5;
+-      entries = root->entries;
+-      dx_set_block (entries, 1);
++      entries = (void *)root->entries;
++      dx_set_block (&path, entries, 1);
+       dx_set_count (entries, 1);
+-      dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
++      dx_set_limit (entries, dx_root_limit(&path));
+       /* Initialize as for dx_probe */
+       hinfo.hash_version = root->info.hash_version;
+@@ -1363,7 +1536,7 @@ static int make_indexed_dir(handle_t *ha
+       path.dp_frame->at = entries;
+       path.dp_frame->bh = bh;
+       bh = bh2;
+-      de = do_split(handle,dir, &bh, path.dp_frame, &hinfo, &retval);
++      de = do_split(handle, &path, &bh, path.dp_frame, &hinfo, &retval);
+       dx_path_fini(&path);
+       if (!de)
+               return retval;
+@@ -1446,8 +1619,8 @@ static int ext3_dx_add_entry(handle_t *h
+                            struct inode *inode)
+ {
+       struct dx_path path;
++      struct dx_param *param;
+       struct dx_frame *frame, *safe;
+-      struct dx_node *node2;
+       struct dx_entry *entries;   /* old block contents */
+       struct dx_entry *entries2;  /* new block contents */
+       struct dx_hash_info hinfo;
+@@ -1463,7 +1636,10 @@ static int ext3_dx_add_entry(handle_t *h
+       size_t isize;
+       dx_path_init(&path, dir);
+-      if (!dx_probe(dentry, NULL, &hinfo, &path, &err))
++      param = path.dp_param = &htree_compat_param;
++
++      err = dx_probe(dentry, NULL, &hinfo, &path);
++      if (err != 0)
+               return err;
+       frame = path.dp_frame;
+       entries = frame->entries;
+@@ -1471,7 +1647,8 @@ static int ext3_dx_add_entry(handle_t *h
+       /* XXX nikita: global serialization! */
+       isize = dir->i_size;
+-      if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
++      if (!(bh = ext3_bread(handle, dir,
++                            dx_get_block(&path, frame->at), 0, &err)))
+               goto cleanup;
+       BUFFER_TRACE(bh, "get_write_access");
+@@ -1519,12 +1696,9 @@ static int ext3_dx_add_entry(handle_t *h
+        * transaction... */
+       for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
+               bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
+-              if (!bh_new[i])
++              if (!bh_new[i] ||
++                  param->dpo_node_init(&path, bh_new[i], 0) != 0)
+                       goto cleanup;
+-              node2 = (struct dx_node *)(bh_new[i]->b_data);
+-              entries2 = node2->entries;
+-              node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+-              node2->fake.inode = 0;
+               BUFFER_TRACE(frame->bh, "get_write_access");
+               err = ext3_journal_get_write_access(handle, frame->bh);
+               if (err)
+@@ -1545,11 +1719,10 @@ static int ext3_dx_add_entry(handle_t *h
+               entries = frame->entries;
+               count = dx_get_count(entries);
+-              idx = frame->at - entries;
++              idx = dx_entry_diff(&path, frame->at, entries);
+               bh2 = bh_new[i];
+-              node2 = (struct dx_node *)(bh2->b_data);
+-              entries2 = node2->entries;
++              entries2 = dx_get_entries(&path, bh2->b_data, 0);
+               if (frame == path.dp_frames) {
+                       /* splitting root node. Tricky point:
+@@ -1571,19 +1744,19 @@ static int ext3_dx_add_entry(handle_t *h
+                       indirects = root->info.indirect_levels;
+                       dxtrace(printk("Creating new root %d\n", indirects));
+                       memcpy((char *) entries2, (char *) entries,
+-                             count * sizeof(struct dx_entry));
+-                      dx_set_limit(entries2, dx_node_limit(dir));
++                             count * dx_entry_size(&path));
++                      dx_set_limit(entries2, dx_node_limit(&path));
+                       /* Set up root */
+                       dx_set_count(entries, 1);
+-                      dx_set_block(entries + 0, newblock[i]);
++                      dx_set_block(&path, entries, newblock[i]);
+                       root->info.indirect_levels = indirects + 1;
+                       /* Shift frames in the path */
+                       memmove(frames + 2, frames + 1,
+                               (sizeof path.dp_frames) - 2 * sizeof frames[0]);
+                       /* Add new access path frame */
+-                      frames[1].at = entries2 + idx;
++                      frames[1].at = dx_entry_shift(&path, entries2, idx);
+                       frames[1].entries = entries = entries2;
+                       frames[1].bh = bh2;
+                       ++ frame;
+@@ -1594,23 +1767,30 @@ static int ext3_dx_add_entry(handle_t *h
+               } else {
+                       /* splitting non-root index node. */
+                       unsigned count1 = count/2, count2 = count - count1;
+-                      unsigned hash2 = dx_get_hash(entries + count1);
++                      unsigned hash2;
++
++                      dx_get_key(&path,
++                                 dx_entry_shift(&path, entries, count1),
++                                 &hash2);
++
+                       dxtrace(printk("Split index %i/%i\n", count1, count2));
+-                      memcpy ((char *) entries2, (char *) (entries + count1),
+-                              count2 * sizeof(struct dx_entry));
++                      memcpy ((char *) entries2,
++                              (char *) dx_entry_shift(&path, entries, count1),
++                              count2 * dx_entry_size(&path));
+                       dx_set_count (entries, count1);
+                       dx_set_count (entries2, count2);
+-                      dx_set_limit (entries2, dx_node_limit(dir));
++                      dx_set_limit (entries2, dx_node_limit(&path));
+                       /* Which index block gets the new entry? */
+                       if (idx >= count1) {
+-                              frame->at = entries2 + idx - count1;
++                              frame->at = dx_entry_shift(&path, entries2,
++                                                         idx - count1);
+                               frame->entries = entries = entries2;
+                               swap(frame->bh, bh2);
+                               bh_new[i] = bh2;
+                       }
+-                      dx_insert_block (frame - 1, hash2, newblock[i]);
++                      dx_insert_block(&path, frame - 1, hash2, newblock[i]);
+                       dxtrace(dx_show_index ("node", frame->entries));
+                       dxtrace(dx_show_index ("node",
+                              ((struct dx_node *) bh2->b_data)->entries));
+@@ -1619,7 +1799,7 @@ static int ext3_dx_add_entry(handle_t *h
+                               goto journal_error;
+               }
+       }
+-      de = do_split(handle, dir, &bh, --frame, &hinfo, &err);
++      de = do_split(handle, &path, &bh, --frame, &hinfo, &err);
+       if (!de)
+               goto cleanup;
+       err = add_dirent_to_buf(handle, dentry, inode, de, bh);
diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.4.24.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.4.24.patch
deleted file mode 100644 (file)
index 172432a..0000000
+++ /dev/null
@@ -1,1766 +0,0 @@
-Index: linux-2.4.20-rh-20.9/fs/ext3/mballoc.c
-===================================================================
---- linux-2.4.20-rh-20.9.orig/fs/ext3/mballoc.c        2003-01-30 13:24:37.000000000 +0300
-+++ linux-2.4.20-rh-20.9/fs/ext3/mballoc.c     2004-10-20 22:28:51.000000000 +0400
-@@ -0,0 +1,1459 @@
-+/*
-+ * Copyright (c) 2004, Cluster File Systems, Inc, info@clusterfs.com
-+ * Written by Alex Tomas <alex@clusterfs.com>
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 as
-+ * published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public Licens
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
-+ */
-+
-+
-+/*
-+ * mballoc.c contains the multiblocks allocation routines
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/time.h>
-+#include <linux/fs.h>
-+#include <linux/locks.h>
-+#include <linux/jbd.h>
-+#include <linux/slab.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/quotaops.h>
-+#include <linux/module.h>
-+
-+/*
-+ * TODO:
-+ *   - do not scan from the beginning, try to remember first free block
-+ *   - mb_mark_used_* may allocate chunk right after splitting buddy
-+ *   - special flag to advice allocator to look for requested + N blocks
-+ *     this may improve interaction between extents and mballoc
-+ */
-+
-+/*
-+ * with AGRESSIVE_CHECK allocator runs consistency checks over
-+ * structures. this checks slow things down a lot
-+ */
-+#define AGGRESSIVE_CHECK__
-+
-+/*
-+ */
-+#define MB_DEBUG__
-+#ifdef MB_DEBUG
-+#define mb_debug(fmt,a...)    printk(fmt, ##a)
-+#else
-+#define mb_debug(fmt,a...)
-+#endif
-+
-+/*
-+ * where to save buddies structures beetween umount/mount (clean case only)
-+ */
-+#define EXT3_BUDDY_FILE               ".buddy"
-+
-+/*
-+ * max. number of chunks to be tracked in ext3_free_extent struct
-+ */
-+#define MB_ARR_SIZE   32
-+
-+struct ext3_allocation_context {
-+      struct super_block *ac_sb;
-+
-+      /* search goals */
-+      int ac_g_group;
-+      int ac_g_start;
-+      int ac_g_len;
-+      int ac_g_flags;
-+      
-+      /* the best found extent */
-+      int ac_b_group;
-+      int ac_b_start;
-+      int ac_b_len;
-+      
-+      /* number of iterations done. we have to track to limit searching */
-+      int ac_repeats;
-+      int ac_groups_scanned;
-+      int ac_status;
-+};
-+
-+#define AC_STATUS_CONTINUE    1
-+#define AC_STATUS_FOUND               2
-+
-+
-+struct ext3_buddy {
-+      void *bd_bitmap;
-+      void *bd_buddy;
-+      int bd_blkbits;
-+      struct buffer_head *bd_bh;
-+      struct buffer_head *bd_bh2;
-+      struct ext3_buddy_group_blocks *bd_bd;
-+      struct super_block *bd_sb;
-+};
-+
-+struct ext3_free_extent {
-+      int fe_start;
-+      int fe_len;
-+      unsigned char fe_orders[MB_ARR_SIZE];
-+      unsigned char fe_nums;
-+      unsigned char fe_back;
-+};
-+
-+#define in_range(b, first, len)       ((b) >= (first) && (b) <= (first) + (len) - 1)
-+
-+
-+int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
-+void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
-+int ext3_new_block_old(handle_t *, struct inode *, unsigned long, u32 *, u32 *, int *);
-+int ext3_mb_reserve_blocks(struct super_block *, int);
-+void ext3_mb_release_blocks(struct super_block *, int);
-+void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
-+void ext3_mb_free_committed_blocks(struct super_block *);
-+int load_block_bitmap (struct super_block *, unsigned int);
-+
-+#define mb_correct_addr_and_bit(bit,addr)     \
-+{                                             \
-+      if ((unsigned long) addr & 1) {         \
-+              bit += 8;                       \
-+              addr--;                         \
-+      }                                       \
-+      if ((unsigned long) addr & 2) {         \
-+              bit += 16;                      \
-+              addr--;                         \
-+              addr--;                         \
-+      }                                       \
-+}
-+
-+static inline int mb_test_bit(int bit, void *addr)
-+{
-+      mb_correct_addr_and_bit(bit,addr);
-+      return test_bit(bit, addr);
-+}
-+
-+static inline void mb_set_bit(int bit, void *addr)
-+{
-+      mb_correct_addr_and_bit(bit,addr);
-+      set_bit(bit, addr);
-+}
-+
-+static inline void mb_clear_bit(int bit, void *addr)
-+{
-+      mb_correct_addr_and_bit(bit,addr);
-+      clear_bit(bit, addr);
-+}
-+
-+struct buffer_head * 
-+read_block_bitmap_bh(struct super_block *sb, unsigned int block_group)
-+{
-+      struct buffer_head *bh;
-+      int bitmap_nr;
-+
-+      bitmap_nr = load_block_bitmap(sb, block_group);
-+      if (bitmap_nr < 0)
-+              return NULL;
-+      
-+      bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
-+      return bh;
-+}
-+
-+static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
-+{
-+      int i = 1;
-+      void *bb;
-+
-+      J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
-+      J_ASSERT(max != NULL);
-+
-+      if (order > e3b->bd_blkbits + 1)
-+              return NULL;
-+
-+      /* at order 0 we see each particular block */
-+      *max = 1 << (e3b->bd_blkbits + 3);
-+      if (order == 0)
-+              return e3b->bd_bitmap;
-+
-+      bb = e3b->bd_buddy;
-+      *max = *max >> 1;
-+      while (i < order) {
-+              bb += 1 << (e3b->bd_blkbits - i);
-+              i++;
-+              *max = *max >> 1;
-+      }
-+      return bb;
-+}
-+
-+static int ext3_mb_load_desc(struct super_block *sb, int group,
-+                              struct ext3_buddy *e3b)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+
-+      J_ASSERT(sbi->s_buddy_blocks[group]->bb_bitmap);
-+      J_ASSERT(sbi->s_buddy_blocks[group]->bb_buddy);
-+
-+      /* load bitmap */
-+      e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group]->bb_bitmap);
-+      if (e3b->bd_bh == NULL) {
-+              ext3_error(sb, "ext3_mb_load_desc",
-+                              "can't get block for buddy bitmap\n");
-+              goto out;
-+      }
-+      if (!buffer_uptodate(e3b->bd_bh)) {
-+              ll_rw_block(READ, 1, &e3b->bd_bh);
-+              wait_on_buffer(e3b->bd_bh);
-+      }
-+      J_ASSERT(buffer_uptodate(e3b->bd_bh));
-+
-+      /* load buddy */
-+      e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group]->bb_buddy);
-+      if (e3b->bd_bh2 == NULL) {
-+              ext3_error(sb, "ext3_mb_load_desc",
-+                              "can't get block for buddy bitmap\n");
-+              goto out;
-+      }
-+      if (!buffer_uptodate(e3b->bd_bh2)) {
-+              ll_rw_block(READ, 1, &e3b->bd_bh2);
-+              wait_on_buffer(e3b->bd_bh2);
-+      }
-+      J_ASSERT(buffer_uptodate(e3b->bd_bh2));
-+
-+      e3b->bd_bitmap = e3b->bd_bh->b_data;
-+      e3b->bd_buddy = e3b->bd_bh2->b_data;
-+      e3b->bd_blkbits = sb->s_blocksize_bits;
-+      e3b->bd_bd = sbi->s_buddy_blocks[group];
-+      e3b->bd_sb = sb;
-+
-+      return 0;
-+out:
-+      brelse(e3b->bd_bh);
-+      brelse(e3b->bd_bh2);
-+      e3b->bd_bh = NULL;
-+      e3b->bd_bh2 = NULL;
-+      return -EIO;
-+}
-+
-+static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
-+{
-+      mark_buffer_dirty(e3b->bd_bh);
-+      mark_buffer_dirty(e3b->bd_bh2);
-+}
-+
-+static void ext3_mb_release_desc(struct ext3_buddy *e3b)
-+{
-+      brelse(e3b->bd_bh);
-+      brelse(e3b->bd_bh2);
-+}
-+
-+#ifdef AGGRESSIVE_CHECK
-+static void mb_check_buddy(struct ext3_buddy *e3b)
-+{
-+      int order = e3b->bd_blkbits + 1;
-+      int max, max2, i, j, k, count;
-+      void *buddy, *buddy2;
-+
-+      if (!test_opt(e3b->bd_sb, MBALLOC))
-+              return;
-+
-+      while (order > 1) {
-+              buddy = mb_find_buddy(e3b, order, &max);
-+              J_ASSERT(buddy);
-+              buddy2 = mb_find_buddy(e3b, order - 1, &max2);
-+              J_ASSERT(buddy2);
-+              J_ASSERT(buddy != buddy2);
-+              J_ASSERT(max * 2 == max2);
-+
-+              count = 0;
-+              for (i = 0; i < max; i++) {
-+
-+                      if (!mb_test_bit(i, buddy)) {
-+                              /* only single bit in buddy2 may be 1 */
-+                              if (mb_test_bit(i << 1, buddy2))
-+                                      J_ASSERT(!mb_test_bit((i<<1)+1, buddy2));
-+                              else if (mb_test_bit((i << 1) + 1, buddy2))
-+                                      J_ASSERT(!mb_test_bit(i << 1, buddy2));
-+                              continue;
-+                      }
-+
-+                      /* both bits in buddy2 must be 0 */
-+                      J_ASSERT(!mb_test_bit(i << 1, buddy2));
-+                      J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2));
-+
-+                      for (j = 0; j < (1 << order); j++) {
-+                              k = (i * (1 << order)) + j;
-+                              J_ASSERT(mb_test_bit(k, e3b->bd_bitmap));
-+                      }
-+                      count++;
-+              }
-+              J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
-+              order--;
-+      }
-+
-+      buddy = mb_find_buddy(e3b, 0, &max);
-+      for (i = 0; i < max; i++) {
-+              if (mb_test_bit(i, buddy))
-+                      continue;
-+              /* check used bits only */
-+              for (j = 0; j < e3b->bd_blkbits + 1; j++) {
-+                      buddy2 = mb_find_buddy(e3b, j, &max2);
-+                      k = i >> j;
-+                      J_ASSERT(k < max2);
-+                      J_ASSERT(!mb_test_bit(k, buddy2));
-+              }
-+      }
-+}
-+#else
-+#define mb_check_buddy(e3b)
-+#endif
-+
-+static inline void
-+ext3_lock_group(struct super_block *sb, int group)
-+{
-+      spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group]->bb_lock);
-+}
-+
-+static inline void
-+ext3_unlock_group(struct super_block *sb, int group)
-+{
-+      spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group]->bb_lock);
-+}
-+
-+static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
-+{
-+      int order = 1;
-+      void *bb;
-+
-+      J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
-+      J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
-+
-+      bb = e3b->bd_buddy;
-+      while (order <= e3b->bd_blkbits + 1) {
-+              block = block >> 1;
-+              if (mb_test_bit(block, bb)) {
-+                      /* this block is part of buddy of order 'order' */
-+                      return order;
-+              }
-+              bb += 1 << (e3b->bd_blkbits - order);
-+              order++;
-+      }
-+      return 0;
-+}
-+
-+static inline void mb_clear_bits(void *bm, int cur, int len)
-+{
-+      __u32 *addr;
-+
-+      len = cur + len;
-+      while (cur < len) {
-+              if ((cur & 31) == 0 && (len - cur) >= 32) {
-+                      /* fast path: clear whole word at once */
-+                      addr = bm + (cur >> 3);
-+                      *addr = 0;
-+                      cur += 32;
-+                      continue;
-+              }
-+              mb_clear_bit(cur, bm);
-+              cur++;
-+      }
-+}
-+
-+static inline void mb_set_bits(void *bm, int cur, int len)
-+{
-+      __u32 *addr;
-+
-+      len = cur + len;
-+      while (cur < len) {
-+              if ((cur & 31) == 0 && (len - cur) >= 32) {
-+                      /* fast path: clear whole word at once */
-+                      addr = bm + (cur >> 3);
-+                      *addr = 0xffffffff;
-+                      cur += 32;
-+                      continue;
-+              }
-+              mb_set_bit(cur, bm);
-+              cur++;
-+      }
-+}
-+
-+static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
-+{
-+      int block, max, order;
-+      void *buddy, *buddy2;
-+
-+      mb_check_buddy(e3b);
-+      while (count-- > 0) {
-+              block = first++;
-+              order = 0;
-+
-+              J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap));
-+              mb_set_bit(block, e3b->bd_bitmap);
-+              e3b->bd_bd->bb_counters[order]++;
-+
-+              /* start of the buddy */
-+              buddy = mb_find_buddy(e3b, order, &max);
-+
-+              do {
-+                      block &= ~1UL;
-+                      if (!mb_test_bit(block, buddy) ||
-+                                      !mb_test_bit(block + 1, buddy))
-+                              break;
-+
-+                      /* both the buddies are free, try to coalesce them */
-+                      buddy2 = mb_find_buddy(e3b, order + 1, &max);
-+
-+                      if (!buddy2)
-+                              break;
-+
-+                      if (order > 0) {
-+                              /* for special purposes, we don't clear
-+                               * free bits in bitmap */
-+                              mb_clear_bit(block, buddy);
-+                              mb_clear_bit(block + 1, buddy);
-+                      }
-+                      e3b->bd_bd->bb_counters[order]--;
-+                      e3b->bd_bd->bb_counters[order]--;
-+
-+                      block = block >> 1;
-+                      order++;
-+                      e3b->bd_bd->bb_counters[order]++;
-+
-+                      mb_set_bit(block, buddy2);
-+                      buddy = buddy2;
-+              } while (1);
-+      }
-+      mb_check_buddy(e3b);
-+
-+      return 0;
-+}
-+
-+/*
-+ * returns 1 if out extent is enough to fill needed space
-+ */
-+int mb_make_backward_extent(struct ext3_free_extent *in,
-+                              struct ext3_free_extent *out, int needed)
-+{
-+      int i;
-+
-+      J_ASSERT(in);
-+      J_ASSERT(out);
-+      J_ASSERT(in->fe_nums < MB_ARR_SIZE);
-+
-+      out->fe_len = 0;
-+      out->fe_start = in->fe_start + in->fe_len;
-+      out->fe_nums = 0;
-+
-+      /* for single-chunk extent we need not back order
-+       * also, if an extent doesn't fill needed space
-+       * then it makes no sense to try back order becase
-+       * if we select this extent then it'll be use as is */
-+      if (in->fe_nums < 2 || in->fe_len < needed)
-+              return 0;
-+
-+      i = in->fe_nums - 1;
-+      while (i >= 0 && out->fe_len < needed) {
-+              out->fe_len += (1 << in->fe_orders[i]);
-+              out->fe_start -= (1 << in->fe_orders[i]);
-+              i--;
-+      }
-+      /* FIXME: in some situation fe_orders may be too small to hold
-+       * all the buddies */
-+      J_ASSERT(out->fe_len >= needed);
-+      
-+      for (i++; i < in->fe_nums; i++)
-+              out->fe_orders[out->fe_nums++] = in->fe_orders[i];
-+      J_ASSERT(out->fe_nums < MB_ARR_SIZE);
-+      out->fe_back = 1;
-+
-+      return 1;
-+}
-+
-+int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
-+                      int needed, struct ext3_free_extent *ex)
-+{
-+      int space = needed;
-+      int next, max, ord;
-+      void *buddy;
-+
-+      J_ASSERT(ex != NULL);
-+
-+      ex->fe_nums = 0;
-+      ex->fe_len = 0;
-+      
-+      buddy = mb_find_buddy(e3b, order, &max);
-+      J_ASSERT(buddy);
-+      J_ASSERT(block < max);
-+      if (!mb_test_bit(block, buddy))
-+              goto nofree;
-+
-+      if (order == 0) {
-+              /* find actual order */
-+              order = mb_find_order_for_block(e3b, block);
-+              block = block >> order;
-+      }
-+
-+      ex->fe_orders[ex->fe_nums++] = order;
-+      ex->fe_len = 1 << order;
-+      ex->fe_start = block << order;
-+      ex->fe_back = 0;
-+
-+      while ((space = space - (1 << order)) > 0) {
-+
-+              buddy = mb_find_buddy(e3b, order, &max);
-+              J_ASSERT(buddy);
-+
-+              if (block + 1 >= max)
-+                      break;
-+
-+              next = (block + 1) * (1 << order);
-+              if (!mb_test_bit(next, e3b->bd_bitmap))
-+                      break;
-+
-+              ord = mb_find_order_for_block(e3b, next);
-+
-+              if ((1 << ord) >= needed) {
-+                      /* we dont want to coalesce with self-enough buddies */
-+                      break;
-+              }
-+              order = ord;
-+              block = next >> order;
-+              ex->fe_len += 1 << order;
-+
-+              if (ex->fe_nums < MB_ARR_SIZE)
-+                      ex->fe_orders[ex->fe_nums++] = order;
-+      }
-+
-+nofree:
-+      J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
-+      return ex->fe_len;
-+}
-+
-+static int mb_mark_used_backward(struct ext3_buddy *e3b,
-+                                      struct ext3_free_extent *ex, int len)
-+{
-+      int start = ex->fe_start, len0 = len;
-+      int ord, mlen, max, cur;
-+      void *buddy;
-+
-+      start = ex->fe_start + ex->fe_len - 1;
-+      while (len) {
-+              ord = mb_find_order_for_block(e3b, start);
-+              if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
-+                              len >= (1 << ord)) {
-+                      /* the whole chunk may be allocated at once! */
-+                      mlen = 1 << ord;
-+                      buddy = mb_find_buddy(e3b, ord, &max);
-+                      J_ASSERT((start >> ord) < max);
-+                      mb_clear_bit(start >> ord, buddy);
-+                      e3b->bd_bd->bb_counters[ord]--;
-+                      start -= mlen;
-+                      len -= mlen;
-+                      J_ASSERT(len >= 0);
-+                      J_ASSERT(start >= 0);
-+                      continue;
-+              }
-+
-+              /* we have to split large buddy */
-+              J_ASSERT(ord > 0);
-+              buddy = mb_find_buddy(e3b, ord, &max);
-+              mb_clear_bit(start >> ord, buddy);
-+              e3b->bd_bd->bb_counters[ord]--;
-+
-+              ord--;
-+              cur = (start >> ord) & ~1U;
-+              buddy = mb_find_buddy(e3b, ord, &max);
-+              mb_set_bit(cur, buddy);
-+              mb_set_bit(cur + 1, buddy);
-+              e3b->bd_bd->bb_counters[ord]++;
-+              e3b->bd_bd->bb_counters[ord]++;
-+      }
-+
-+      /* now drop all the bits in bitmap */
-+      mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
-+
-+      mb_check_buddy(e3b);
-+
-+      return 0;
-+}
-+
-+static int mb_mark_used_forward(struct ext3_buddy *e3b,
-+                              struct ext3_free_extent *ex, int len)
-+{
-+      int start = ex->fe_start, len0 = len;
-+      int ord, mlen, max, cur;
-+      void *buddy;
-+
-+      while (len) {
-+              ord = mb_find_order_for_block(e3b, start);
-+
-+              if (((start >> ord) << ord) == start && len >= (1 << ord)) {
-+                      /* the whole chunk may be allocated at once! */
-+                      mlen = 1 << ord;
-+                      buddy = mb_find_buddy(e3b, ord, &max);
-+                      J_ASSERT((start >> ord) < max);
-+                      mb_clear_bit(start >> ord, buddy);
-+                      e3b->bd_bd->bb_counters[ord]--;
-+                      start += mlen;
-+                      len -= mlen;
-+                      J_ASSERT(len >= 0);
-+                      continue;
-+              }
-+
-+              /* we have to split large buddy */
-+              J_ASSERT(ord > 0);
-+              buddy = mb_find_buddy(e3b, ord, &max);
-+              mb_clear_bit(start >> ord, buddy);
-+              e3b->bd_bd->bb_counters[ord]--;
-+
-+              ord--;
-+              cur = (start >> ord) & ~1U;
-+              buddy = mb_find_buddy(e3b, ord, &max);
-+              mb_set_bit(cur, buddy);
-+              mb_set_bit(cur + 1, buddy);
-+              e3b->bd_bd->bb_counters[ord]++;
-+              e3b->bd_bd->bb_counters[ord]++;
-+      }
-+
-+      /* now drop all the bits in bitmap */
-+      mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
-+
-+      mb_check_buddy(e3b);
-+
-+      return 0;
-+}
-+
-+int inline mb_mark_used(struct ext3_buddy *e3b,
-+                      struct ext3_free_extent *ex, int len)
-+{
-+      int err;
-+
-+      J_ASSERT(ex);
-+      if (ex->fe_back == 0)
-+              err = mb_mark_used_forward(e3b, ex, len);
-+      else
-+              err = mb_mark_used_backward(e3b, ex, len);
-+      return err;
-+}
-+
-+int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
-+                              struct ext3_buddy *e3b, int group)
-+{
-+      struct super_block *sb = ac->ac_sb;
-+      int err, gorder, max, i;
-+      struct ext3_free_extent curex;
-+
-+      /* let's know order of allocation */
-+      gorder = 0;
-+      while (ac->ac_g_len > (1 << gorder))
-+              gorder++;
-+
-+      if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
-+              /* someone asks for space at this specified block
-+               * probably he wants to merge it into existing extent */
-+              if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
-+                      /* good. at least one block is free */
-+                      max = mb_find_extent(e3b, 0, ac->ac_g_start,
-+                                              ac->ac_g_len, &curex);
-+                      max = min(curex.fe_len, ac->ac_g_len);
-+                      mb_mark_used(e3b, &curex, max);
-+                      
-+                      ac->ac_b_group = group;
-+                      ac->ac_b_start = curex.fe_start;
-+                      ac->ac_b_len = max;
-+                      ac->ac_status = AC_STATUS_FOUND;
-+                      err = 0;
-+                      goto out;
-+              }
-+              /* don't try to find goal anymore */
-+              ac->ac_g_flags &= ~1;
-+      }
-+
-+      i = 0;
-+      while (1) {
-+              i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
-+              if (i >= sb->s_blocksize * 8)
-+                      break;
-+
-+              max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
-+              if (max >= ac->ac_g_len) {
-+                      max = min(curex.fe_len, ac->ac_g_len);
-+                      mb_mark_used(e3b, &curex, max);
-+                      
-+                      ac->ac_b_group = group;
-+                      ac->ac_b_start = curex.fe_start;
-+                      ac->ac_b_len = max;
-+                      ac->ac_status = AC_STATUS_FOUND;
-+                      break;
-+              }
-+              i += max;
-+      }
-+
-+      return 0;
-+
-+out:
-+      return err;
-+}
-+
-+int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
-+{
-+      struct ext3_group_desc *gdp;
-+      int free_blocks;
-+
-+      gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
-+      if (!gdp)
-+              return 0;
-+      free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
-+      if (free_blocks == 0)
-+              return 0;
-+
-+      /* someone wants this block very much */
-+      if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
-+              return 1;
-+
-+      /* FIXME: I'd like to take fragmentation into account here */
-+      if (cr == 0) {
-+              if (free_blocks >= ac->ac_g_len >> 1)
-+                      return 1;
-+      } else if (cr == 1) {
-+              if (free_blocks >= ac->ac_g_len >> 2)
-+                      return 1;
-+      } else if (cr == 2) {
-+              return 1;
-+      } else {
-+              BUG();
-+      }
-+      return 0;
-+}
-+
-+int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
-+                      unsigned long goal, int *len, int flags, int *errp)
-+{
-+      struct buffer_head *bitmap_bh = NULL;
-+      struct ext3_allocation_context ac;
-+      int i, group, block, cr, err = 0;
-+      struct ext3_group_desc *gdp;
-+      struct ext3_super_block *es;
-+      struct buffer_head *gdp_bh;
-+      struct ext3_sb_info *sbi;
-+      struct super_block *sb;
-+      struct ext3_buddy e3b;
-+
-+      J_ASSERT(len != NULL);
-+      J_ASSERT(*len > 0);
-+
-+      sb = inode->i_sb;
-+      if (!sb) {
-+              printk("ext3_mb_new_nblocks: nonexistent device");
-+              return 0;
-+      }
-+
-+      if (!test_opt(sb, MBALLOC)) {
-+              static int ext3_mballoc_warning = 0;
-+              if (ext3_mballoc_warning == 0) {
-+                      printk(KERN_ERR "EXT3-fs: multiblock request with "
-+                              "mballoc disabled!\n");
-+                      ext3_mballoc_warning++;
-+              }
-+              *len = 1;
-+              err = ext3_new_block_old(handle, inode, goal, NULL,NULL, errp);
-+              return err;
-+      }
-+
-+      ext3_mb_poll_new_transaction(sb, handle);
-+
-+      sbi = EXT3_SB(sb);
-+      es = EXT3_SB(sb)->s_es;
-+
-+      if (!(flags & 2)) {
-+              /* someone asks for non-reserved blocks */
-+              BUG_ON(*len > 1);
-+              err = ext3_mb_reserve_blocks(sb, 1);
-+              if (err) {
-+                      *errp = err;
-+                      return 0;
-+              }
-+      }
-+
-+      /*
-+       * Check quota for allocation of this blocks.
-+       */
-+      while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
-+              *len -= 1;
-+      if (*len == 0) {
-+              *errp = -EDQUOT;
-+              block = 0;
-+              goto out;
-+      }
-+
-+      /* start searching from the goal */
-+      if (goal < le32_to_cpu(es->s_first_data_block) ||
-+          goal >= le32_to_cpu(es->s_blocks_count))
-+              goal = le32_to_cpu(es->s_first_data_block);
-+      group = (goal - le32_to_cpu(es->s_first_data_block)) /
-+                      EXT3_BLOCKS_PER_GROUP(sb);
-+      block = ((goal - le32_to_cpu(es->s_first_data_block)) %
-+                      EXT3_BLOCKS_PER_GROUP(sb));
-+
-+      /* set up allocation goals */
-+      ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
-+      ac.ac_status = 0;
-+      ac.ac_groups_scanned = 0;
-+      ac.ac_sb = inode->i_sb;
-+      ac.ac_g_group = group;
-+      ac.ac_g_start = block;
-+      ac.ac_g_len = *len;
-+      ac.ac_g_flags = flags;
-+
-+      /* loop over the groups */
-+      for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
-+              for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
-+                      if (group == EXT3_SB(sb)->s_groups_count)
-+                              group = 0;
-+
-+                      /* check is group good for our criteries */
-+                      if (!mb_good_group(&ac, group, cr))
-+                              continue;
-+
-+                      err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
-+                      if (err)
-+                              goto out_err;
-+
-+                      ext3_lock_group(sb, group);
-+                      if (!mb_good_group(&ac, group, cr)) {
-+                              /* someone did allocation from this group */
-+                              ext3_unlock_group(sb, group);
-+                              ext3_mb_release_desc(&e3b);
-+                              continue;
-+                      }
-+
-+                      err = ext3_mb_new_in_group(&ac, &e3b, group);
-+                      ext3_unlock_group(sb, group);
-+                      if (ac.ac_status == AC_STATUS_FOUND)
-+                              ext3_mb_dirty_buddy(&e3b);
-+                      ext3_mb_release_desc(&e3b);
-+                      if (err)
-+                              goto out_err;
-+                      if (ac.ac_status == AC_STATUS_FOUND)
-+                              break;
-+              }
-+      }
-+
-+      if (ac.ac_status != AC_STATUS_FOUND) {
-+              /* unfortunately, we can't satisfy this request */
-+              J_ASSERT(ac.ac_b_len == 0);
-+              DQUOT_FREE_BLOCK(inode, *len);
-+              *errp = -ENOSPC;
-+              block = 0;
-+              goto out;
-+      }
-+
-+      /* good news - free block(s) have been found. now it's time
-+       * to mark block(s) in good old journaled bitmap */
-+      block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
-+                      + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
-+
-+      /* we made a desicion, now mark found blocks in good old
-+       * bitmap to be journaled */
-+
-+      ext3_debug("using block group %d(%d)\n",
-+                      ac.ac_b_group.group, gdp->bg_free_blocks_count);
-+
-+      bitmap_bh = read_block_bitmap_bh(sb, ac.ac_b_group);
-+      if (!bitmap_bh) {
-+              *errp = -EIO;
-+              goto out_err;
-+      }
-+
-+      err = ext3_journal_get_write_access(handle, bitmap_bh);
-+      if (err) {
-+              *errp = err;
-+              goto out_err;
-+      }
-+
-+      gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
-+      if (!gdp) {
-+              *errp = -EIO;
-+              goto out_err;
-+      }
-+      
-+      err = ext3_journal_get_write_access(handle, gdp_bh);
-+      if (err)
-+              goto out_err;
-+
-+      block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
-+                              + le32_to_cpu(es->s_first_data_block);
-+
-+      if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
-+          block == le32_to_cpu(gdp->bg_inode_bitmap) ||
-+          in_range(block, le32_to_cpu(gdp->bg_inode_table),
-+                    EXT3_SB(sb)->s_itb_per_group))
-+              ext3_error(sb, "ext3_new_block",
-+                          "Allocating block in system zone - "
-+                          "block = %u", block);
-+#if 0
-+      for (i = 0; i < ac.ac_b_len; i++)
-+              J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
-+#endif
-+      mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
-+
-+      ext3_lock_group(sb, ac.ac_b_group);
-+      gdp->bg_free_blocks_count =
-+                      cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 
-+                                      ac.ac_b_len);
-+      ext3_unlock_group(sb, ac.ac_b_group);
-+      spin_lock(&sbi->s_md_lock);
-+      es->s_free_blocks_count =
-+              cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - ac.ac_b_len);
-+      spin_unlock(&sbi->s_md_lock);
-+
-+      err = ext3_journal_dirty_metadata(handle, bitmap_bh);
-+      if (err)
-+              goto out_err;
-+      err = ext3_journal_dirty_metadata(handle, gdp_bh);
-+      if (err)
-+              goto out_err;
-+
-+      sb->s_dirt = 1;
-+      *errp = 0;
-+
-+      /* drop non-allocated, but dquote'd blocks */
-+      J_ASSERT(*len >= ac.ac_b_len);
-+      DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
-+
-+      *len = ac.ac_b_len;
-+      J_ASSERT(block != 0);
-+      goto out;
-+
-+out_err:
-+      /* if we've already allocated something, roll it back */
-+      if (ac.ac_status == AC_STATUS_FOUND) {
-+              /* FIXME: free blocks here */
-+      }
-+
-+      DQUOT_FREE_BLOCK(inode, *len);
-+      *errp = err;
-+      block = 0;
-+out:
-+      if (!(flags & 2)) {
-+              /* block wasn't reserved before and we reserved it
-+               * at the beginning of allocation. it doesn't matter
-+               * whether we allocated anything or we failed: time
-+               * to release reservation. NOTE: because I expect
-+               * any multiblock request from delayed allocation
-+               * path only, here is single block always */
-+              ext3_mb_release_blocks(sb, 1);
-+      }
-+      return block;
-+}
-+
-+int ext3_mb_generate_buddy(struct super_block *sb, int group)
-+{
-+      struct buffer_head *bh;
-+      int i, err, count = 0;
-+      struct ext3_buddy e3b;
-+      
-+      err = ext3_mb_load_desc(sb, group, &e3b);
-+      if (err)
-+              goto out;
-+      memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
-+      memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
-+
-+      bh = read_block_bitmap_bh(sb, group);
-+      if (bh == NULL) {
-+              err = -EIO; 
-+              goto out2;
-+      }
-+
-+      /* loop over the blocks, nad create buddies for free ones */
-+      for (i = 0; i < sb->s_blocksize * 8; i++) {
-+              if (!mb_test_bit(i, (void *) bh->b_data)) {
-+                      mb_free_blocks(&e3b, i, 1);
-+                      count++;
-+              }
-+      }
-+      mb_check_buddy(&e3b);
-+      ext3_mb_dirty_buddy(&e3b);
-+
-+out2:
-+      ext3_mb_release_desc(&e3b);
-+out:
-+      return err;
-+}
-+
-+EXPORT_SYMBOL(ext3_mb_new_blocks);
-+
-+#define MB_CREDITS    \
-+      (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS)
-+
-+int ext3_mb_init_backend(struct super_block *sb)
-+{
-+      struct inode *root = sb->s_root->d_inode;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      struct dentry *db;
-+      tid_t target;
-+      int err, i;
-+
-+      sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks *) *
-+                                      sbi->s_groups_count, GFP_KERNEL);
-+      if (sbi->s_buddy_blocks == NULL) {
-+              printk("EXT3-fs: can't allocate mem for buddy maps\n");
-+              return -ENOMEM;
-+      }
-+      memset(sbi->s_buddy_blocks, 0,
-+              sizeof(struct ext3_buddy_group_blocks *) * sbi->s_groups_count);
-+      sbi->s_buddy = NULL;
-+
-+      down(&root->i_sem);
-+      db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
-+                              strlen(EXT3_BUDDY_FILE));
-+      if (IS_ERR(db)) {
-+              err = PTR_ERR(db);
-+              printk("EXT3-fs: can't lookup buddy file: %d\n", err);
-+              goto out;
-+      }
-+
-+      if (db->d_inode != NULL) {
-+              sbi->s_buddy = igrab(db->d_inode);
-+              goto map;
-+      }
-+
-+      err = ext3_create(root, db, S_IFREG, NULL);
-+      if (err) {
-+              printk("error while creation buddy file: %d\n", err);
-+      } else {
-+              sbi->s_buddy = igrab(db->d_inode);
-+      }
-+
-+map:
-+      for (i = 0; i < sbi->s_groups_count; i++) {
-+              struct buffer_head *bh = NULL;
-+              handle_t *handle;
-+
-+              sbi->s_buddy_blocks[i] =
-+                      kmalloc(sizeof(struct ext3_buddy_group_blocks),
-+                                      GFP_KERNEL);
-+              if (sbi->s_buddy_blocks[i] == NULL) {
-+                      printk("EXT3-fs: can't allocate mem for buddy\n");
-+                      err = -ENOMEM;
-+                      goto out2;
-+              }
-+
-+              handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
-+              if (IS_ERR(handle)) {
-+                      err = PTR_ERR(handle);
-+                      goto out2;
-+              }
-+              
-+              /* allocate block for bitmap */
-+              bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
-+              if (bh == NULL) {
-+                      printk("can't get block for buddy bitmap: %d\n", err);
-+                      goto out2;
-+              }
-+              sbi->s_buddy_blocks[i]->bb_bitmap = bh->b_blocknr;
-+              brelse(bh);
-+
-+              /* allocate block for buddy */
-+              bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
-+              if (bh == NULL) {
-+                      printk("can't get block for buddy: %d\n", err);
-+                      goto out2;
-+              }
-+              sbi->s_buddy_blocks[i]->bb_buddy = bh->b_blocknr;
-+              brelse(bh);
-+              ext3_journal_stop(handle, sbi->s_buddy);
-+              spin_lock_init(&sbi->s_buddy_blocks[i]->bb_lock);
-+              sbi->s_buddy_blocks[i]->bb_md_cur = NULL;
-+              sbi->s_buddy_blocks[i]->bb_tid = 0;
-+      }
-+
-+      if ((target = log_start_commit(sbi->s_journal, NULL)))
-+              log_wait_commit(sbi->s_journal, target);
-+
-+out2:
-+      dput(db);
-+out:
-+      up(&root->i_sem);
-+      return err;
-+}
-+
-+int ext3_mb_release(struct super_block *sb)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      int i;
-+      
-+      if (!test_opt(sb, MBALLOC))
-+              return 0;
-+
-+      /* release freed, non-committed blocks */
-+      spin_lock(&sbi->s_md_lock);
-+      list_splice_init(&sbi->s_closed_transaction,
-+                      &sbi->s_committed_transaction);
-+      list_splice_init(&sbi->s_active_transaction,
-+                      &sbi->s_committed_transaction);
-+      spin_unlock(&sbi->s_md_lock);
-+      ext3_mb_free_committed_blocks(sb);
-+
-+      if (sbi->s_buddy_blocks) {
-+              for (i = 0; i < sbi->s_groups_count; i++)
-+                      if (sbi->s_buddy_blocks[i])
-+                              kfree(sbi->s_buddy_blocks[i]);
-+              kfree(sbi->s_buddy_blocks);
-+      }
-+      if (sbi->s_buddy)
-+              iput(sbi->s_buddy);
-+      if (sbi->s_blocks_reserved)
-+              printk("ext3-fs: %ld blocks being reserved at umount!\n",
-+                              sbi->s_blocks_reserved);
-+      return 0;
-+}
-+
-+int ext3_mb_init(struct super_block *sb)
-+{
-+      struct ext3_super_block *es;
-+      int i;
-+
-+      if (!test_opt(sb, MBALLOC))
-+              return 0;
-+
-+      /* init file for buddy data */
-+      clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
-+      ext3_mb_init_backend(sb);
-+
-+      es = EXT3_SB(sb)->s_es;
-+      for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
-+              ext3_mb_generate_buddy(sb, i);
-+      spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
-+      spin_lock_init(&EXT3_SB(sb)->s_md_lock);
-+      INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
-+      INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
-+      INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
-+      set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
-+      printk("EXT3-fs: mballoc enabled\n");
-+      return 0;
-+}
-+
-+void ext3_mb_free_committed_blocks(struct super_block *sb)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      int err, i, count = 0, count2 = 0;
-+      struct ext3_free_metadata *md;
-+      struct ext3_buddy e3b;
-+
-+      if (list_empty(&sbi->s_committed_transaction))
-+              return;
-+
-+      /* there is committed blocks to be freed yet */
-+      do {
-+              /* get next array of blocks */
-+              md = NULL;
-+              spin_lock(&sbi->s_md_lock);
-+              if (!list_empty(&sbi->s_committed_transaction)) {
-+                      md = list_entry(sbi->s_committed_transaction.next,
-+                                      struct ext3_free_metadata, list);
-+                      list_del(&md->list);
-+              }
-+              spin_unlock(&sbi->s_md_lock);
-+
-+              if (md == NULL)
-+                      break;
-+
-+              mb_debug("gonna free %u blocks in group %u (0x%p):",
-+                              md->num, md->group, md);
-+
-+              err = ext3_mb_load_desc(sb, md->group, &e3b);
-+              BUG_ON(err != 0);
-+
-+              /* there are blocks to put in buddy to make them really free */
-+              count += md->num;
-+              count2++;
-+              ext3_lock_group(sb, md->group);
-+              for (i = 0; i < md->num; i++) {
-+                      mb_debug(" %u", md->blocks[i]);
-+                      mb_free_blocks(&e3b, md->blocks[i], 1);
-+              }
-+              mb_debug("\n");
-+              ext3_unlock_group(sb, md->group);
-+
-+              kfree(md);
-+              ext3_mb_dirty_buddy(&e3b);
-+              ext3_mb_release_desc(&e3b);
-+
-+      } while (md);
-+      mb_debug("freed %u blocks in %u structures\n", count, count2);
-+}
-+
-+void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+
-+      if (sbi->s_last_transaction == handle->h_transaction->t_tid)
-+              return;
-+
-+      /* new transaction! time to close last one and free blocks for
-+       * committed transaction. we know that only transaction can be
-+       * active, so previos transaction can be being logged and we
-+       * know that transaction before previous is known to be alreade
-+       * logged. this means that now we may free blocks freed in all
-+       * transactions before previous one. hope I'm clear enough ... */
-+
-+      spin_lock(&sbi->s_md_lock);
-+      if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
-+              mb_debug("new transaction %lu, old %lu\n",
-+                              (unsigned long) handle->h_transaction->t_tid,
-+                              (unsigned long) sbi->s_last_transaction);
-+              list_splice_init(&sbi->s_closed_transaction,
-+                                      &sbi->s_committed_transaction);
-+              list_splice_init(&sbi->s_active_transaction,
-+                                      &sbi->s_closed_transaction);
-+              sbi->s_last_transaction = handle->h_transaction->t_tid;
-+      }
-+      spin_unlock(&sbi->s_md_lock);
-+
-+      ext3_mb_free_committed_blocks(sb);
-+}
-+
-+int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
-+                              int group, int block, int count)
-+{
-+      struct ext3_buddy_group_blocks *db = e3b->bd_bd;
-+      struct super_block *sb = e3b->bd_sb;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      struct ext3_free_metadata *md;
-+      int i;
-+
-+      ext3_lock_group(sb, group);
-+      for (i = 0; i < count; i++) {
-+              md = db->bb_md_cur;
-+              if (md && db->bb_tid != handle->h_transaction->t_tid) {
-+                      db->bb_md_cur = NULL;
-+                      md = NULL;
-+              }
-+
-+              if (md == NULL) {
-+                      ext3_unlock_group(sb, group);
-+                      md = kmalloc(sizeof(*md), GFP_KERNEL);
-+                      if (md == NULL)
-+                              return -ENOMEM;
-+                      md->num = 0;
-+                      md->group = group;
-+
-+                      ext3_lock_group(sb, group);
-+                      if (db->bb_md_cur == NULL) {
-+                              spin_lock(&sbi->s_md_lock);
-+                              list_add(&md->list, &sbi->s_active_transaction);
-+                              spin_unlock(&sbi->s_md_lock);
-+                              db->bb_md_cur = md;
-+                              db->bb_tid = handle->h_transaction->t_tid;
-+                              mb_debug("new md 0x%p for group %u\n",
-+                                                      md, md->group);
-+                      } else {
-+                              kfree(md);
-+                              md = db->bb_md_cur;
-+                      }
-+              }
-+
-+              BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
-+              md->blocks[md->num] = block + i;
-+              md->num++;
-+              if (md->num == EXT3_BB_MAX_BLOCKS) {
-+                      /* no more space, put full container on a sb's list */
-+                      db->bb_md_cur = NULL;
-+              }
-+      }
-+      ext3_unlock_group(sb, group);
-+      return 0;
-+}
-+
-+void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
-+                      unsigned long block, unsigned long count, int metadata)
-+{
-+      struct buffer_head *bitmap_bh = NULL;
-+      struct ext3_group_desc *gdp;
-+      struct ext3_super_block *es;
-+      unsigned long bit, overflow;
-+      struct buffer_head *gd_bh;
-+      unsigned long block_group;
-+      struct ext3_sb_info *sbi;
-+      struct super_block *sb;
-+      struct ext3_buddy e3b;
-+      int err = 0, ret;
-+
-+      sb = inode->i_sb;
-+      if (!sb) {
-+              printk ("ext3_free_blocks: nonexistent device");
-+              return;
-+      }
-+
-+      ext3_mb_poll_new_transaction(sb, handle);
-+
-+      sbi = EXT3_SB(sb);
-+      es = EXT3_SB(sb)->s_es;
-+      if (block < le32_to_cpu(es->s_first_data_block) ||
-+          block + count < block ||
-+          block + count > le32_to_cpu(es->s_blocks_count)) {
-+              ext3_error (sb, "ext3_free_blocks",
-+                          "Freeing blocks not in datazone - "
-+                          "block = %lu, count = %lu", block, count);
-+              goto error_return;
-+      }
-+
-+      ext3_debug("freeing block %lu\n", block);
-+
-+do_more:
-+      overflow = 0;
-+      block_group = (block - le32_to_cpu(es->s_first_data_block)) /
-+                    EXT3_BLOCKS_PER_GROUP(sb);
-+      bit = (block - le32_to_cpu(es->s_first_data_block)) %
-+                    EXT3_BLOCKS_PER_GROUP(sb);
-+      /*
-+       * Check to see if we are freeing blocks across a group
-+       * boundary.
-+       */
-+      if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
-+              overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
-+              count -= overflow;
-+      }
-+      bitmap_bh = read_block_bitmap_bh(sb, block_group);
-+      if (!bitmap_bh)
-+              goto error_return;
-+      gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
-+      if (!gdp)
-+              goto error_return;
-+
-+      if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
-+          in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
-+          in_range (block, le32_to_cpu(gdp->bg_inode_table),
-+                    EXT3_SB(sb)->s_itb_per_group) ||
-+          in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
-+                    EXT3_SB(sb)->s_itb_per_group))
-+              ext3_error (sb, "ext3_free_blocks",
-+                          "Freeing blocks in system zones - "
-+                          "Block = %lu, count = %lu",
-+                          block, count);
-+
-+      BUFFER_TRACE(bitmap_bh, "getting write access");
-+      err = ext3_journal_get_write_access(handle, bitmap_bh);
-+      if (err)
-+              goto error_return;
-+
-+      /*
-+       * We are about to modify some metadata.  Call the journal APIs
-+       * to unshare ->b_data if a currently-committing transaction is
-+       * using it
-+       */
-+      BUFFER_TRACE(gd_bh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, gd_bh);
-+      if (err)
-+              goto error_return;
-+
-+      err = ext3_mb_load_desc(sb, block_group, &e3b);
-+      if (err)
-+              goto error_return;
-+
-+      if (metadata) {
-+              /* blocks being freed are metadata. these blocks shouldn't
-+               * be used until this transaction is committed */
-+              ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
-+      } else { 
-+              ext3_lock_group(sb, block_group);
-+              mb_free_blocks(&e3b, bit, count);
-+              gdp->bg_free_blocks_count =
-+                      cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
-+              ext3_unlock_group(sb, block_group);
-+              spin_lock(&sbi->s_md_lock);
-+              es->s_free_blocks_count =
-+                      cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) + count);
-+              spin_unlock(&sbi->s_md_lock);
-+      }
-+      
-+      ext3_mb_dirty_buddy(&e3b);
-+      ext3_mb_release_desc(&e3b);
-+
-+      /* FIXME: undo logic will be implemented later and another way */
-+      mb_clear_bits(bitmap_bh->b_data, bit, count);
-+      DQUOT_FREE_BLOCK(inode, count);
-+
-+      /* We dirtied the bitmap block */
-+      BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
-+      err = ext3_journal_dirty_metadata(handle, bitmap_bh);
-+
-+      /* And the group descriptor block */
-+      BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
-+      ret = ext3_journal_dirty_metadata(handle, gd_bh);
-+      if (!err) err = ret;
-+
-+      if (overflow && !err) {
-+              block += count;
-+              count = overflow;
-+              goto do_more;
-+      }
-+      sb->s_dirt = 1;
-+error_return:
-+      ext3_std_error(sb, err);
-+      return;
-+}
-+
-+int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      struct ext3_super_block *es;
-+      int free, ret = -ENOSPC;
-+
-+      BUG_ON(blocks < 0);
-+      es = EXT3_SB(sb)->s_es;
-+      spin_lock(&sbi->s_reserve_lock);
-+      free = le32_to_cpu(es->s_free_blocks_count);
-+      if (blocks <= free - sbi->s_blocks_reserved) {
-+              sbi->s_blocks_reserved += blocks;
-+              ret = 0;
-+      }
-+      spin_unlock(&sbi->s_reserve_lock);
-+      return ret;
-+}
-+
-+void ext3_mb_release_blocks(struct super_block *sb, int blocks)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+
-+      BUG_ON(blocks < 0);
-+      spin_lock(&sbi->s_reserve_lock);
-+      sbi->s_blocks_reserved -= blocks;
-+      if (sbi->s_blocks_reserved < 0)
-+              printk("EXT3-fs: reserve leak %ld\n", sbi->s_blocks_reserved);
-+      if (sbi->s_blocks_reserved < 0)
-+              sbi->s_blocks_reserved = 0;
-+      spin_unlock(&sbi->s_reserve_lock);
-+}
-+
-+int ext3_new_block(handle_t *handle, struct inode *inode,
-+                      unsigned long goal, u32 *pc, u32 *pb, int *errp)
-+{
-+      int ret, len;
-+
-+      if (!test_opt(inode->i_sb, MBALLOC)) {
-+              ret = ext3_new_block_old(handle, inode, goal, pc, pb, errp);
-+              goto out;
-+      }
-+      len = 1;
-+      ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
-+out:
-+      return ret;
-+}
-+
-+
-+void ext3_free_blocks(handle_t *handle, struct inode * inode,
-+                      unsigned long block, unsigned long count, int metadata)
-+{
-+      if (!test_opt(inode->i_sb, MBALLOC))
-+              ext3_free_blocks_old(handle, inode, block, count);
-+      else
-+              ext3_mb_free_blocks(handle, inode, block, count, metadata);
-+      return;
-+}
-+
-Index: linux-2.4.20-rh-20.9/fs/ext3/super.c
-===================================================================
---- linux-2.4.20-rh-20.9.orig/fs/ext3/super.c  2004-10-15 20:43:32.000000000 +0400
-+++ linux-2.4.20-rh-20.9/fs/ext3/super.c       2004-10-15 20:57:33.000000000 +0400
-@@ -622,6 +622,7 @@
-       kdev_t j_dev = sbi->s_journal->j_dev;
-       int i;
-+      ext3_mb_release(sb);
-       J_ASSERT(sbi->s_delete_inodes == 0);
-       ext3_ext_release(sb);
-       ext3_xattr_put_super(sb);
-@@ -877,6 +878,8 @@
-                       else if (want_numeric(value, "journal", inum))
-                               return 0;
-               }
-+              else if (!strcmp (this_char, "mballoc"))
-+                      set_opt (*mount_options, MBALLOC);
-               else if (!strcmp (this_char, "noload"))
-                       set_opt (*mount_options, NOLOAD);
-               else if (!strcmp (this_char, "data")) {
-@@ -1506,6 +1509,7 @@
-       }
-       ext3_ext_init(sb);
-+      ext3_mb_init(sb);
-               
-       return sb;
-Index: linux-2.4.20-rh-20.9/fs/ext3/Makefile
-===================================================================
---- linux-2.4.20-rh-20.9.orig/fs/ext3/Makefile 2004-10-15 20:43:32.000000000 +0400
-+++ linux-2.4.20-rh-20.9/fs/ext3/Makefile      2004-10-15 22:00:29.000000000 +0400
-@@ -13,8 +13,8 @@
- obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-               ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \
--              xattr_trusted.o extents.o
--export-objs += extents.o
-+              xattr_trusted.o extents.o mballoc.o
-+export-objs += extents.o mballoc.o
- obj-m    := $(O_TARGET)
-Index: linux-2.4.20-rh-20.9/fs/ext3/balloc.c
-===================================================================
---- linux-2.4.20-rh-20.9.orig/fs/ext3/balloc.c 2004-10-15 20:43:28.000000000 +0400
-+++ linux-2.4.20-rh-20.9/fs/ext3/balloc.c      2004-10-15 20:57:33.000000000 +0400
-@@ -203,8 +203,7 @@
-  * differentiating between a group for which we have never performed a bitmap
-  * IO request, and a group for which the last bitmap read request failed.
-  */
--static inline int load_block_bitmap (struct super_block * sb,
--                                   unsigned int block_group)
-+int load_block_bitmap (struct super_block * sb, unsigned int block_group)
- {
-       int slot;
-       
-@@ -253,8 +252,8 @@
- }
- /* Free given blocks, update quota and i_blocks field */
--void ext3_free_blocks (handle_t *handle, struct inode * inode,
--                      unsigned long block, unsigned long count)
-+void ext3_free_blocks_old (handle_t *handle, struct inode * inode,
-+                              unsigned long block, unsigned long count)
- {
-       struct buffer_head *bitmap_bh;
-       struct buffer_head *gd_bh;
-@@ -531,9 +530,9 @@
-  * bitmap, and then for any free bit if that fails.
-  * This function also updates quota and i_blocks field.
-  */
--int ext3_new_block (handle_t *handle, struct inode * inode,
--              unsigned long goal, u32 * prealloc_count,
--              u32 * prealloc_block, int * errp)
-+int ext3_new_block_old (handle_t *handle, struct inode * inode,
-+                      unsigned long goal, u32 * prealloc_count,
-+                      u32 * prealloc_block, int * errp)
- {
-       struct buffer_head * bh, *bhtmp;
-       struct buffer_head * bh2;
-Index: linux-2.4.20-rh-20.9/fs/ext3/namei.c
-===================================================================
---- linux-2.4.20-rh-20.9.orig/fs/ext3/namei.c  2004-10-15 20:43:30.000000000 +0400
-+++ linux-2.4.20-rh-20.9/fs/ext3/namei.c       2004-10-15 20:57:33.000000000 +0400
-@@ -1877,7 +1877,7 @@
-  * If the create succeeds, we fill in the inode information
-  * with d_instantiate(). 
-  */
--static int ext3_create (struct inode * dir, struct dentry * dentry, int mode)
-+int ext3_create (struct inode * dir, struct dentry * dentry, int mode)
- {
-       handle_t *handle; 
-       struct inode * inode;
-Index: linux-2.4.20-rh-20.9/fs/ext3/inode.c
-===================================================================
---- linux-2.4.20-rh-20.9.orig/fs/ext3/inode.c  2004-10-15 20:43:32.000000000 +0400
-+++ linux-2.4.20-rh-20.9/fs/ext3/inode.c       2004-10-15 20:57:33.000000000 +0400
-@@ -255,7 +255,7 @@
-               inode->u.ext3_i.i_prealloc_count = 0;
-               inode->u.ext3_i.i_prealloc_block = 0;
-               /* Writer: end */
--              ext3_free_blocks (inode, block, total);
-+              ext3_free_blocks (inode, block, total, 1);
-       }
-       unlock_kernel();
- #endif
-@@ -619,7 +619,7 @@
-               ext3_journal_forget(handle, branch[i].bh);
-       }
-       for (i = 0; i < keys; i++)
--              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
-+              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
-       return err;
- }
-@@ -723,7 +723,7 @@
-       if (err == -EAGAIN)
-               for (i = 0; i < num; i++)
-                       ext3_free_blocks(handle, inode, 
--                                       le32_to_cpu(where[i].key), 1);
-+                                       le32_to_cpu(where[i].key), 1, 1);
-       return err;
- }
-@@ -1751,7 +1751,7 @@
-               }
-       }
--      ext3_free_blocks(handle, inode, block_to_free, count);
-+      ext3_free_blocks(handle, inode, block_to_free, count, 1);
- }
- /**
-@@ -1923,7 +1923,7 @@
-                               ext3_journal_test_restart(handle, inode);
-                       }
--                      ext3_free_blocks(handle, inode, nr, 1);
-+                      ext3_free_blocks(handle, inode, nr, 1, 1);
-                       if (parent_bh) {
-                               /*
-Index: linux-2.4.20-rh-20.9/fs/ext3/extents.c
-===================================================================
---- linux-2.4.20-rh-20.9.orig/fs/ext3/extents.c        2004-10-15 20:43:32.000000000 +0400
-+++ linux-2.4.20-rh-20.9/fs/ext3/extents.c     2004-10-15 20:57:33.000000000 +0400
-@@ -741,7 +741,7 @@
-               for (i = 0; i < depth; i++) {
-                       if (!ablocks[i])
-                               continue;
--                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
-+                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
-               }
-       }
-       kfree(ablocks);
-@@ -1389,7 +1389,7 @@
-                       path->p_idx->ei_leaf);
-       bh = sb_get_hash_table(tree->inode->i_sb, path->p_idx->ei_leaf);
-       ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
--      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
-+      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
-       return err;
- }
-@@ -1847,10 +1847,12 @@
-       int needed = ext3_remove_blocks_credits(tree, ex, from, to);
-       handle_t *handle = ext3_journal_start(tree->inode, needed);
-       struct buffer_head *bh;
--      int i;
-+      int i, metadata = 0;
-       if (IS_ERR(handle))
-               return PTR_ERR(handle);
-+      if (S_ISDIR(tree->inode->i_mode))
-+              metadata = 1;
-       if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
-               /* tail removal */
-               unsigned long num, start;
-@@ -1862,7 +1864,7 @@
-                       bh = sb_get_hash_table(tree->inode->i_sb, start + i);
-                       ext3_forget(handle, 0, tree->inode, bh, start + i);
-               }
--              ext3_free_blocks(handle, tree->inode, start, num);
-+              ext3_free_blocks(handle, tree->inode, start, num, metadata);
-       } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
-               printk("strange request: removal %lu-%lu from %u:%u\n",
-                       from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.4.20-rh-20.9/fs/ext3/xattr.c
-===================================================================
---- linux-2.4.20-rh-20.9.orig/fs/ext3/xattr.c  2004-10-15 20:43:31.000000000 +0400
-+++ linux-2.4.20-rh-20.9/fs/ext3/xattr.c       2004-10-15 20:57:33.000000000 +0400
-@@ -174,7 +174,7 @@
- ext3_xattr_free_block(handle_t *handle, struct inode * inode,
-                     unsigned long block)
- {
--      ext3_free_blocks(handle, inode, block, 1);
-+      ext3_free_blocks(handle, inode, block, 1, 1);
-       inode->i_blocks -= inode->i_sb->s_blocksize >> 9;
- }
-@@ -182,7 +182,7 @@
- # define ext3_xattr_quota_free(inode) \
-       DQUOT_FREE_BLOCK(inode, 1)
- # define ext3_xattr_free_block(handle, inode, block) \
--      ext3_free_blocks(handle, inode, block, 1)
-+      ext3_free_blocks(handle, inode, block, 1, 1)
- #endif
- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18)
-Index: linux-2.4.20-rh-20.9/include/linux/ext3_fs.h
-===================================================================
---- linux-2.4.20-rh-20.9.orig/include/linux/ext3_fs.h  2004-10-15 20:43:32.000000000 +0400
-+++ linux-2.4.20-rh-20.9/include/linux/ext3_fs.h       2004-10-15 20:57:33.000000000 +0400
-@@ -334,6 +334,7 @@
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
- #define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
- #define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
-+#define EXT3_MOUNT_MBALLOC            0x400000/* buddy allocation support */
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef _LINUX_EXT2_FS_H
-@@ -664,7 +665,7 @@
- extern int ext3_new_block (handle_t *, struct inode *, unsigned long,
-                                           __u32 *, __u32 *, int *);
- extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
--                            unsigned long);
-+                            unsigned long, int);
- extern unsigned long ext3_count_free_blocks (struct super_block *);
- extern void ext3_check_blocks_bitmap (struct super_block *);
- extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
-@@ -727,6 +728,13 @@
- extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
-                      unsigned long);
-+/* mballoc.c */
-+extern int ext3_mb_init(struct super_block *sb);
-+extern int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
-+                            unsigned long goal,int *len, int flags,int *errp);
-+extern int ext3_mb_release(struct super_block *sb);
-+extern void ext3_mb_release_blocks(struct super_block *, int);
-+
- /* namei.c */
- extern int ext3_orphan_add(handle_t *, struct inode *);
- extern int ext3_orphan_del(handle_t *, struct inode *);
-Index: linux-2.4.20-rh-20.9/include/linux/ext3_fs_sb.h
-===================================================================
---- linux-2.4.20-rh-20.9.orig/include/linux/ext3_fs_sb.h       2004-10-15 20:43:29.000000000 +0400
-+++ linux-2.4.20-rh-20.9/include/linux/ext3_fs_sb.h    2004-10-20 22:08:40.000000000 +0400
-@@ -19,6 +19,7 @@
- #ifdef __KERNEL__
- #include <linux/timer.h>
- #include <linux/wait.h>
-+#include <linux/list.h>
- #endif
- /*
-@@ -31,6 +32,25 @@
- #define EXT3_DELETE_THREAD
-+#define EXT3_BB_MAX_BLOCKS    30
-+struct ext3_free_metadata {
-+      unsigned short group;
-+      unsigned short num;
-+      unsigned short blocks[EXT3_BB_MAX_BLOCKS];
-+      struct list_head list;
-+};
-+
-+#define EXT3_BB_MAX_ORDER     14
-+
-+struct ext3_buddy_group_blocks {
-+      unsigned long   bb_bitmap;
-+      unsigned long   bb_buddy;
-+      spinlock_t      bb_lock;
-+      unsigned        bb_counters[EXT3_BB_MAX_ORDER];
-+      struct ext3_free_metadata *bb_md_cur;
-+      unsigned long bb_tid;
-+};
-+
- /*
-  * third extended-fs super-block data in memory
-  */
-@@ -86,6 +106,17 @@
-       wait_queue_head_t s_delete_thread_queue;
-       wait_queue_head_t s_delete_waiter_queue;
- #endif
-+
-+      /* for buddy allocator */
-+      struct ext3_buddy_group_blocks **s_buddy_blocks;
-+      struct inode *s_buddy;
-+      long s_blocks_reserved;
-+      spinlock_t s_reserve_lock;
-+      struct list_head s_active_transaction;
-+      struct list_head s_closed_transaction;
-+      struct list_head s_committed_transaction;
-+      spinlock_t s_md_lock;
-+      unsigned int s_last_transaction;
- };
- #endif        /* _LINUX_EXT3_FS_SB */
index bb9928a..1d8a4af 100644 (file)
@@ -1679,7 +1679,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +               */
 +
 +              /*if (ac.ac_found > ext3_mb_max_to_scan)
-+                      printk(KERN_ERR "EXT3-fs: too long searching at "
++                      printk(KERN_DEBUG "EXT3-fs: too long searching at "
 +                              "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len,
 +                              ac.ac_g_ex.fe_len);*/
 +              ext3_mb_try_best_found(&ac, &e3b);
@@ -1688,8 +1688,8 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +                       * Someone more lucky has already allocated it.
 +                       * The only thing we can do is just take first
 +                       * found block(s)
++                      printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n");
 +                       */
-+                      printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
 +                      ac.ac_b_ex.fe_group = 0;
 +                      ac.ac_b_ex.fe_start = 0;
 +                      ac.ac_b_ex.fe_len = 0;
@@ -1708,9 +1708,9 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +              *errp = -ENOSPC;
 +              block = 0;
 +#if 1
-+              printk(KERN_ERR "EXT3-fs: cant allocate: status %d, flags %d\n",
++              printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n",
 +                      ac.ac_status, ac.ac_flags);
-+              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d, cr %d\n",
++              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n",
 +                      ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group,
 +                      ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr);
 +              printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n",
@@ -2083,12 +2083,12 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +
 +              sbi->s_group_info[i] = kmalloc(len, GFP_KERNEL);
 +              if (sbi->s_group_info[i] == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant allocate mem for buddy\n");
++                      printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n");
 +                      goto err_out;
 +              }
 +              desc = ext3_get_group_desc(sb, i, NULL);
 +              if (desc == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant read descriptor %u\n", i);
++                      printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i);
 +                      goto err_out;
 +              }
 +              memset(sbi->s_group_info[i], 0, len);
@@ -2605,7 +2605,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      char str[32];
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_STATS_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2639,7 +2639,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2678,7 +2678,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2704,7 +2704,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +
 +      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
 +      if (proc_root_ext3 == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n", EXT3_ROOT);
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT);
 +              return -EIO;
 +      }
 +
@@ -2712,7 +2712,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_stats == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_STATS_NAME);
 +              remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +              return -EIO;
@@ -2727,7 +2727,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +                      EXT3_MB_MAX_TO_SCAN_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_max_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_MAX_TO_SCAN_NAME);
 +              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
 +              remove_proc_entry(EXT3_ROOT, proc_root_fs);
@@ -2743,7 +2743,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +                      EXT3_MB_MIN_TO_SCAN_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_min_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_MIN_TO_SCAN_NAME);
 +              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
 +              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
index a2b9caf..0c2f445 100644 (file)
@@ -1674,7 +1674,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +               */
 +
 +              /*if (ac.ac_found > ext3_mb_max_to_scan)
-+                      printk(KERN_ERR "EXT3-fs: too long searching at "
++                      printk(KERN_DEBUG "EXT3-fs: too long searching at "
 +                              "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len,
 +                              ac.ac_g_ex.fe_len);*/
 +              ext3_mb_try_best_found(&ac, &e3b);
@@ -1683,8 +1683,8 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +                       * Someone more lucky has already allocated it.
 +                       * The only thing we can do is just take first
 +                       * found block(s)
++                      printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n");
 +                       */
-+                      printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
 +                      ac.ac_b_ex.fe_group = 0;
 +                      ac.ac_b_ex.fe_start = 0;
 +                      ac.ac_b_ex.fe_len = 0;
@@ -1703,9 +1703,9 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +              *errp = -ENOSPC;
 +              block = 0;
 +#if 1
-+              printk(KERN_ERR "EXT3-fs: cant allocate: status %d, flags %d\n",
++              printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n",
 +                      ac.ac_status, ac.ac_flags);
-+              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d, cr %d\n",
++              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n",
 +                      ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group,
 +                      ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr);
 +              printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n",
@@ -2078,12 +2078,12 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +
 +              sbi->s_group_info[i] = kmalloc(len, GFP_KERNEL);
 +              if (sbi->s_group_info[i] == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant allocate mem for buddy\n");
++                      printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n");
 +                      goto err_out;
 +              }
 +              desc = ext3_get_group_desc(sb, i, NULL);
 +              if (desc == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant read descriptor %u\n", i);
++                      printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i);
 +                      goto err_out;
 +              }
 +              memset(sbi->s_group_info[i], 0, len);
@@ -2599,7 +2599,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      char str[32];
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_STATS_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2633,7 +2633,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2672,7 +2672,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2698,7 +2698,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +
 +      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
 +      if (proc_root_ext3 == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n", EXT3_ROOT);
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT);
 +              return -EIO;
 +      }
 +
@@ -2706,7 +2706,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_stats == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_STATS_NAME);
 +              remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +              return -EIO;
@@ -2721,7 +2721,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +                      EXT3_MB_MAX_TO_SCAN_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_max_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_MAX_TO_SCAN_NAME);
 +              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
 +              remove_proc_entry(EXT3_ROOT, proc_root_fs);
@@ -2737,7 +2737,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +                      EXT3_MB_MIN_TO_SCAN_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_min_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_MIN_TO_SCAN_NAME);
 +              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
 +              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
index d12c678..5ff3d3b 100644 (file)
@@ -1693,7 +1693,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +               */
 +
 +              /*if (ac.ac_found > ext3_mb_max_to_scan)
-+                      printk(KERN_ERR "EXT3-fs: too long searching at "
++                      printk(KERN_DEBUG "EXT3-fs: too long searching at "
 +                              "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len,
 +                              ac.ac_g_ex.fe_len);*/
 +              ext3_mb_try_best_found(&ac, &e3b);
@@ -1702,8 +1702,8 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                       * Someone more lucky has already allocated it.
 +                       * The only thing we can do is just take first
 +                       * found block(s)
++                      printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n");
 +                       */
-+                      printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
 +                      ac.ac_b_ex.fe_group = 0;
 +                      ac.ac_b_ex.fe_start = 0;
 +                      ac.ac_b_ex.fe_len = 0;
@@ -1722,9 +1722,9 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +              *errp = -ENOSPC;
 +              block = 0;
 +#if 1
-+              printk(KERN_ERR "EXT3-fs: cant allocate: status %d, flags %d\n",
++              printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n",
 +                      ac.ac_status, ac.ac_flags);
-+              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d, cr %d\n",
++              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n",
 +                      ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group,
 +                      ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr);
 +              printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n",
@@ -2097,12 +2097,12 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +
 +              sbi->s_group_info[i] = kmalloc(len, GFP_KERNEL);
 +              if (sbi->s_group_info[i] == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant allocate mem for buddy\n");
++                      printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n");
 +                      goto err_out;
 +              }
 +              desc = ext3_get_group_desc(sb, i, NULL);
 +              if (desc == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant read descriptor %u\n", i);
++                      printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i);
 +                      goto err_out;
 +              }
 +              memset(sbi->s_group_info[i], 0, len);
@@ -2618,7 +2618,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      char str[32];
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_STATS_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2652,7 +2652,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2691,7 +2691,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
index 40bbaa5..3273075 100644 (file)
@@ -26,7 +26,7 @@ Index: linux/fs/ext3/namei.c
        int err;
  
 -      if (dir->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(dir))
++      if (EXT3_DIR_LINK_MAX(dir))
                return -EMLINK;
  
        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
@@ -98,7 +98,7 @@ Index: linux/fs/ext3/namei.c
                return -EPERM;
  
 -      if (inode->i_nlink >= EXT3_LINK_MAX) {
-+      if (EXT3_DIR_LINK_MAXED(inode))
++      if (EXT3_DIR_LINK_MAX(inode))
                return -EMLINK;
 -      }
  
@@ -111,7 +111,7 @@ Index: linux/fs/ext3/namei.c
 -              if (!new_inode && new_dir!=old_dir &&
 -                              new_dir->i_nlink >= EXT3_LINK_MAX)
 +              if (!new_inode && new_dir != old_dir &&
-+                  EXT3_DIR_LINK_MAXED(new_dir))
++                  EXT3_DIR_LINK_MAX(new_dir))
                        goto end_rename;
        }
        if (!new_bh) {
@@ -154,24 +154,3 @@ Index: linux/include/linux/ext3_fs.h
  
  /*
   * Macro-instructions used to manage several block sizes
-@@ -580,14 +580,15 @@
-  */
- #ifdef CONFIG_EXT3_INDEX
--  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
--                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-+#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
-+                                          EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-                     (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
--#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
--#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-+#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
-+                                (is_dx(dir) && (dir)->i_nlink == 1))
- #else
-   #define is_dx(dir) 0
--#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
- #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
- #endif
index 4543943..4c3ebb8 100644 (file)
@@ -26,7 +26,7 @@ Index: 69chaos/fs/ext3/namei.c
        int err;
  
 -      if (dir->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(dir))
++      if (EXT3_DIR_LINK_MAX(dir))
                return -EMLINK;
  
        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
@@ -98,7 +98,7 @@ Index: 69chaos/fs/ext3/namei.c
                return -EPERM;
  
 -      if (inode->i_nlink >= EXT3_LINK_MAX) {
-+      if (EXT3_DIR_LINK_MAXED(inode))
++      if (EXT3_DIR_LINK_MAX(inode))
                return -EMLINK;
 -      }
  
@@ -111,7 +111,7 @@ Index: 69chaos/fs/ext3/namei.c
 -              if (!new_inode && new_dir!=old_dir &&
 -                              new_dir->i_nlink >= EXT3_LINK_MAX)
 +              if (!new_inode && new_dir != old_dir &&
-+                  EXT3_DIR_LINK_MAXED(new_dir))
++                  EXT3_DIR_LINK_MAX(new_dir))
                        goto end_rename;
        }
        if (!new_bh) {
@@ -154,24 +154,3 @@ Index: 69chaos/include/linux/ext3_fs.h
  
  /*
   * Macro-instructions used to manage several block sizes
-@@ -582,14 +582,15 @@
-  */
- #ifdef CONFIG_EXT3_INDEX
--  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
--                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-+#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
-+                                          EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-                     (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
--#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
--#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-+#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
-+                                (is_dx(dir) && (dir)->i_nlink == 1))
- #else
-   #define is_dx(dir) 0
--#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
- #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
- #endif
index 245d83e..621d1b3 100644 (file)
@@ -24,7 +24,7 @@
        int err;
  
 -      if (dir->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(dir))
++      if (EXT3_DIR_LINK_MAX(dir))
                return -EMLINK;
  
        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
@@ -96,7 +96,7 @@
                return -EPERM;
  
 -      if (inode->i_nlink >= EXT3_LINK_MAX) {
-+      if (EXT3_DIR_LINK_MAXED(inode))
++      if (EXT3_DIR_LINK_MAX(inode))
                return -EMLINK;
 -      }
  
 -              if (!new_inode && new_dir!=old_dir &&
 -                              new_dir->i_nlink >= EXT3_LINK_MAX)
 +              if (!new_inode && new_dir != old_dir &&
-+                  EXT3_DIR_LINK_MAXED(new_dir))
++                  EXT3_DIR_LINK_MAX(new_dir))
                        goto end_rename;
        }
        if (!new_bh) {
  
  /*
   * Macro-instructions used to manage several block sizes
-@@ -581,14 +581,15 @@
-  */
- #ifdef CONFIG_EXT3_INDEX
--  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
--                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-+#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
-+                                          EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-                     (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
--#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
--#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-+#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
-+                                (is_dx(dir) && (dir)->i_nlink == 1))
- #else
-   #define is_dx(dir) 0
--#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
- #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
- #endif
index bb9fc1b..0d360fa 100644 (file)
@@ -26,7 +26,7 @@ Index: linux-2.6.7/fs/ext3/namei.c
        int err;
  
 -      if (dir->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(dir))
++      if (EXT3_DIR_LINK_MAX(dir))
                return -EMLINK;
  
        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
@@ -86,7 +86,7 @@ Index: linux-2.6.7/fs/ext3/namei.c
        int err;
  
 -      if (inode->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(inode))
++      if (EXT3_DIR_LINK_MAX(inode))
                return -EMLINK;
  
        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
@@ -97,7 +97,7 @@ Index: linux-2.6.7/fs/ext3/namei.c
 -              if (!new_inode && new_dir!=old_dir &&
 -                              new_dir->i_nlink >= EXT3_LINK_MAX)
 +              if (!new_inode && new_dir != old_dir &&
-+                  EXT3_DIR_LINK_MAXED(new_dir))
++                  EXT3_DIR_LINK_MAX(new_dir))
                        goto end_rename;
        }
        if (!new_bh) {
@@ -140,24 +140,3 @@ Index: linux-2.6.7/include/linux/ext3_fs.h
  
  /*
   * Macro-instructions used to manage several block sizes
-@@ -595,14 +595,15 @@ struct ext3_dir_entry_2 {
-  */
- #ifdef CONFIG_EXT3_INDEX
--  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
--                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-+#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
-+                                          EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-                     (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
--#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
--#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-+#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
-+                                (is_dx(dir) && (dir)->i_nlink == 1))
- #else
-   #define is_dx(dir) 0
--#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
- #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
- #endif
index 62bf156..37cca81 100644 (file)
@@ -20,16 +20,16 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
  }
  
  static int ext3_add_nondir(handle_t *handle,
-@@ -1706,7 +1712,7 @@
+@@ -1706,7 +1712,7 @@ static int ext3_add_nondir(handle_t
        struct ext3_dir_entry_2 * de;
        int err, retries = 0;
  
 -      if (dir->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(dir))
++      if (EXT3_DIR_LINK_MAX(dir))
                return -EMLINK;
  
  retry:
-@@ -1729,7 +1735,7 @@
+@@ -1729,7 +1735,7 @@ static int ext3_mkdir(struct inode
        inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
        dir_block = ext3_bread (handle, inode, 0, 1, &err);
        if (!dir_block) {
@@ -38,7 +38,7 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
                ext3_mark_inode_dirty(handle, inode);
                iput (inode);
                goto out_stop;
-@@ -1761,7 +1767,7 @@
+@@ -1761,7 +1767,7 @@ static int ext3_mkdir(struct inode
                iput (inode);
                goto out_stop;
        }
@@ -47,7 +47,7 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
        ext3_update_dx_flag(dir);
        ext3_mark_inode_dirty(handle, dir);
        d_instantiate(dentry, inode);
-@@ -2026,10 +2032,10 @@
+@@ -2026,10 +2032,10 @@ static int ext3_rmdir (struct inode
        retval = ext3_delete_entry(handle, dir, de, bh);
        if (retval)
                goto end_rmdir;
@@ -62,7 +62,7 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
        inode->i_version++;
        inode->i_nlink = 0;
        /* There's no need to set i_disksize: the fact that i_nlink is
-@@ -2039,7 +2045,7 @@
+@@ -2039,7 +2045,7 @@ static int ext3_rmdir (struct inode
        ext3_orphan_add(handle, inode);
        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
        ext3_mark_inode_dirty(handle, inode);
@@ -71,7 +71,7 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
        ext3_update_dx_flag(dir);
        ext3_mark_inode_dirty(handle, dir);
  
-@@ -2090,7 +2096,7 @@
+@@ -2090,7 +2096,7 @@ static int ext3_unlink(struct inode
        dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
        ext3_update_dx_flag(dir);
        ext3_mark_inode_dirty(handle, dir);
@@ -80,27 +80,27 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
        if (!inode->i_nlink)
                ext3_orphan_add(handle, inode);
        inode->i_ctime = dir->i_ctime;
-@@ -2165,7 +2171,7 @@
+@@ -2165,7 +2171,7 @@ static int ext3_link (struct dentry
        struct inode *inode = old_dentry->d_inode;
        int err, retries = 0;
  
 -      if (inode->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(inode))
++      if (EXT3_DIR_LINK_MAX(inode))
                return -EMLINK;
  
  retry:
-@@ -2252,8 +2258,8 @@
+@@ -2252,8 +2258,8 @@ static int ext3_rename (struct inode
                if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
                        goto end_rename;
                retval = -EMLINK;
 -              if (!new_inode && new_dir!=old_dir &&
 -                              new_dir->i_nlink >= EXT3_LINK_MAX)
 +              if (!new_inode && new_dir != old_dir &&
-+                  EXT3_DIR_LINK_MAXED(new_dir))
++                  EXT3_DIR_LINK_MAX(new_dir))
                        goto end_rename;
        }
        if (!new_bh) {
-@@ -2310,7 +2316,7 @@
+@@ -2310,7 +2316,7 @@ static int ext3_rename (struct inode
        }
  
        if (new_inode) {
@@ -109,7 +109,7 @@ diff -Nur orig/fs/ext3/namei.c patch/fs/ext3/namei.c
                new_inode->i_ctime = CURRENT_TIME_SEC;
        }
        old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
-@@ -2321,11 +2327,13 @@
+@@ -2321,11 +2327,13 @@ static int ext3_rename (struct inode
                PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
                BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
                ext3_journal_dirty_metadata(handle, dir_bh);
@@ -140,24 +140,3 @@ Index: linux-2.6.7/include/linux/ext3_fs.h
  
  /*
   * Macro-instructions used to manage several block sizes
-@@ -595,14 +595,15 @@ struct ext3_dir_entry_2 {
-  */
- #ifdef CONFIG_EXT3_INDEX
--  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
--                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-+#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
-+                                          EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-                     (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
--#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
--#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-+#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
-+                                (is_dx(dir) && (dir)->i_nlink == 1))
- #else
-   #define is_dx(dir) 0
--#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
- #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
- #endif
index 77d5b30..0adb06c 100644 (file)
@@ -2,6 +2,15 @@ Index: linux-2.6.5-7.108/fs/nfs/dir.c
 ===================================================================
 --- linux-2.6.5-7.108.orig/fs/nfs/dir.c        2004-09-15 19:26:43.012732408 +0300
 +++ linux-2.6.5-7.108/fs/nfs/dir.c     2004-09-15 20:03:32.882781096 +0300
+@@ -709,7 +709,7 @@
+               return 0;
+       if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
+               return 0;
+-      return (nd->intent.open.flags & O_EXCL) != 0;
++      return (nd->intent.it_flags & O_EXCL) != 0;
+ }
+ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 @@ -782,7 +782,7 @@
        if (nd->flags & LOOKUP_DIRECTORY)
                return 0;
@@ -47,6 +56,15 @@ Index: linux-2.6.5-7.108/fs/nfs/dir.c
        if (openflags & O_CREAT) {
                /* If this is a negative dentry, just drop it */
                if (!inode)
+@@ -1026,7 +1026,7 @@
+       attr.ia_valid = ATTR_MODE;
+       if (nd && (nd->flags & LOOKUP_CREATE))
+-              open_flags = nd->intent.open.flags;
++              open_flags = nd->intent.it_flags;
+       /*
+        * The 0 argument passed into the create function should one day
 Index: linux-2.6.5-7.108/fs/nfs/nfs4proc.c
 ===================================================================
 --- linux-2.6.5-7.108.orig/fs/nfs/nfs4proc.c   2004-04-04 06:37:39.000000000 +0300
index 41e5ecb..ff06d68 100644 (file)
@@ -2,6 +2,15 @@ Index: linux-2.6.12-rc6/fs/nfs/dir.c
 ===================================================================
 --- linux-2.6.12-rc6.orig/fs/nfs/dir.c 2005-06-14 14:22:14.585699648 +0200
 +++ linux-2.6.12-rc6/fs/nfs/dir.c      2005-06-14 14:26:39.884524523 +0200
+@@ -727,7 +727,7 @@
+               return 0;
+       if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
+               return 0;
+-      return (nd->intent.open.flags & O_EXCL) != 0;
++      return (nd->intent.it_flags & O_EXCL) != 0;
+ }
+ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 @@ -783,7 +783,7 @@
        if (nd->flags & LOOKUP_DIRECTORY)
                return 0;
@@ -47,6 +56,15 @@ Index: linux-2.6.12-rc6/fs/nfs/dir.c
        /* We cannot do exclusive creation on a positive dentry */
        if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
                goto no_open;
+@@ -1028,7 +1028,7 @@
+       attr.ia_valid = ATTR_MODE;
+       if (nd && (nd->flags & LOOKUP_CREATE))
+-              open_flags = nd->intent.open.flags;
++              open_flags = nd->intent.it_flags;
+       lock_kernel();
+       nfs_begin_data_update(dir);
 Index: linux-2.6.12-rc6/fs/nfs/nfs4proc.c
 ===================================================================
 --- linux-2.6.12-rc6.orig/fs/nfs/nfs4proc.c    2005-06-06 17:22:29.000000000 +0200
diff --git a/lustre/kernel_patches/patches/tcp-rto_proc-2.6.9.patch b/lustre/kernel_patches/patches/tcp-rto_proc-2.6.9.patch
new file mode 100644 (file)
index 0000000..f3c6023
--- /dev/null
@@ -0,0 +1,130 @@
+Index: linux+rhel4+chaos/include/linux/sysctl.h
+===================================================================
+--- linux+rhel4+chaos.orig/include/linux/sysctl.h
++++ linux+rhel4+chaos/include/linux/sysctl.h
+@@ -348,6 +348,8 @@ enum
+       NET_TCP_TSO_WIN_DIVISOR=107,
+       NET_TCP_BIC_BETA=108,
+       NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
++      NET_TCP_RTO_MAX=110,
++      NET_TCP_RTO_INIT=111,
+ };
+ enum {
+Index: linux+rhel4+chaos/net/ipv4/sysctl_net_ipv4.c
+===================================================================
+--- linux+rhel4+chaos.orig/net/ipv4/sysctl_net_ipv4.c
++++ linux+rhel4+chaos/net/ipv4/sysctl_net_ipv4.c
+@@ -49,6 +49,10 @@ extern int inet_peer_maxttl;
+ extern int inet_peer_gc_mintime;
+ extern int inet_peer_gc_maxtime;
++/* From tcp_timer.c */
++extern unsigned sysctl_tcp_rto_max;
++extern unsigned sysctl_tcp_rto_init;
++
+ #ifdef CONFIG_SYSCTL
+ static int tcp_retr1_max = 255; 
+ static int ip_local_port_range_min[] = { 1, 1 };
+@@ -699,6 +703,22 @@ ctl_table ipv4_table[] = {
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
++      {
++              .ctl_name       = NET_TCP_RTO_MAX,
++              .procname       = "tcp_rto_max",
++              .data           = &sysctl_tcp_rto_max,
++              .maxlen         = sizeof(unsigned),
++              .mode           = 0644, 
++              .proc_handler   = &proc_dointvec
++      },
++      {
++              .ctl_name       = NET_TCP_RTO_INIT,
++              .procname       = "tcp_rto_init",
++              .data           = &sysctl_tcp_rto_init,
++              .maxlen         = sizeof(unsigned), 
++              .mode           = 0644,
++              .proc_handler   = &proc_dointvec
++      },
+       { .ctl_name = 0 }
+ };
+Index: linux+rhel4+chaos/net/ipv4/tcp_timer.c
+===================================================================
+--- linux+rhel4+chaos.orig/net/ipv4/tcp_timer.c
++++ linux+rhel4+chaos/net/ipv4/tcp_timer.c
+@@ -32,6 +32,9 @@ int sysctl_tcp_retries1 = TCP_RETR1;
+ int sysctl_tcp_retries2 = TCP_RETR2;
+ int sysctl_tcp_orphan_retries;
++unsigned sysctl_tcp_rto_max        = TCP_RTO_MAX;
++unsigned sysctl_tcp_rto_init       = TCP_TIMEOUT_INIT;
++
+ static void tcp_write_timer(unsigned long);
+ static void tcp_delack_timer(unsigned long);
+ static void tcp_keepalive_timer (unsigned long data);
+@@ -104,7 +107,7 @@ static int tcp_out_of_resources(struct s
+       /* If peer does not open window for long time, or did not transmit 
+        * anything for long time, penalize it. */
+-      if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
++      if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*sysctl_tcp_rto_max || !do_reset)
+               orphans <<= 1;
+       /* If some dubious ICMP arrived, penalize even more. */
+@@ -186,7 +189,7 @@ static int tcp_write_timeout(struct sock
+               retry_until = sysctl_tcp_retries2;
+               if (sock_flag(sk, SOCK_DEAD)) {
+-                      int alive = (tp->rto < TCP_RTO_MAX);
++                      int alive = (tp->rto < sysctl_tcp_rto_max);
+  
+                       retry_until = tcp_orphan_retries(sk, alive);
+@@ -292,7 +295,7 @@ static void tcp_probe_timer(struct sock 
+       max_probes = sysctl_tcp_retries2;
+       if (sock_flag(sk, SOCK_DEAD)) {
+-              int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
++              int alive = ((tp->rto<<tp->backoff) < sysctl_tcp_rto_max);
+  
+               max_probes = tcp_orphan_retries(sk, alive);
+@@ -336,7 +339,7 @@ static void tcp_retransmit_timer(struct 
+                              inet->num, tp->snd_una, tp->snd_nxt);
+               }
+ #endif
+-              if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
++              if (tcp_time_stamp - tp->rcv_tstamp > sysctl_tcp_rto_max) {
+                       tcp_write_err(sk);
+                       goto out;
+               }
+@@ -405,7 +408,7 @@ static void tcp_retransmit_timer(struct 
+       tp->retransmits++;
+ out_reset_timer:
+-      tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
++      tp->rto = min(tp->rto << 1, sysctl_tcp_rto_max);
+       tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+       if (tp->retransmits > sysctl_tcp_retries1)
+               __sk_dst_reset(sk);
+@@ -502,7 +505,7 @@ static void tcp_synack_timer(struct sock
+       if (tp->defer_accept)
+               max_retries = tp->defer_accept;
+-      budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
++      budget = 2*(TCP_SYNQ_HSIZE/(sysctl_tcp_rto_init/TCP_SYNQ_INTERVAL));
+       i = lopt->clock_hand;
+       do {
+@@ -516,8 +519,8 @@ static void tcp_synack_timer(struct sock
+                                       if (req->retrans++ == 0)
+                                               lopt->qlen_young--;
+-                                      timeo = min((TCP_TIMEOUT_INIT << req->retrans),
+-                                                  TCP_RTO_MAX);
++                                      timeo = min((sysctl_tcp_rto_init << req->retrans),
++                                                  sysctl_tcp_rto_max);
+                                       req->expires = now + timeo;
+                                       reqp = &req->dl_next;
+                                       continue;
diff --git a/lustre/kernel_patches/patches/tcp-zero-copy-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/tcp-zero-copy-2.6.9-rhel4.patch
new file mode 100644 (file)
index 0000000..2b6a0da
--- /dev/null
@@ -0,0 +1,434 @@
+--- linux.orig/include/linux/skbuff.h  2004-11-10 17:02:53.000000000 +0000
++++ linux/include/linux/skbuff.h       2005-02-02 12:09:43.000000000 +0000
+@@ -134,6 +134,30 @@
+       __u16 size;
+ };
++/* Support for callback when skb data has been released */
++typedef struct zccd                           /* Zero Copy Callback Descriptor */
++{                                             /* (embed as first member of custom struct) */
++      atomic_t        zccd_count;             /* reference count */
++      void           (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
++} zccd_t;
++
++static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
++{
++      atomic_set (&d->zccd_count, 1);
++      d->zccd_destructor = callback;
++}
++
++static inline void zccd_get (zccd_t *d)               /* take a reference */
++{
++      atomic_inc (&d->zccd_count);
++}
++
++static inline void zccd_put (zccd_t *d)               /* release a reference */
++{
++      if (atomic_dec_and_test (&d->zccd_count))
++              (d->zccd_destructor)(d);
++}
++
+ /* This data is invariant across clones and lives at
+  * the end of the header data, ie. at skb->end.
+  */
+@@ -143,6 +167,12 @@
+       unsigned short  tso_size;
+       unsigned short  tso_segs;
+       struct sk_buff  *frag_list;
++      zccd_t          *zccd;                  /* zero copy descriptor */
++      zccd_t          *zccd2;                 /* 2nd zero copy descriptor */
++      /* NB we expect zero-copy data to be at least 1 packet, so
++       * having 2 zccds means we don't unneccessarily split the packet
++       * where consecutive zero-copy sends abutt.
++       */
+       skb_frag_t      frags[MAX_SKB_FRAGS];
+ };
+--- linux.orig/include/net/tcp.h       2004-11-10 17:02:53.000000000 +0000
++++ linux/include/net/tcp.h    2005-02-02 10:12:14.000000000 +0000
+@@ -785,6 +785,8 @@
+ extern int                    tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
+                                           struct msghdr *msg, size_t size);
+ extern ssize_t                        tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
++extern ssize_t                        tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
++                                                int flags, zccd_t *zccd);
+ extern int                    tcp_ioctl(struct sock *sk, 
+                                         int cmd, 
+@@ -881,6 +883,9 @@
+                                           struct msghdr *msg,
+                                           size_t len, int nonblock, 
+                                           int flags, int *addr_len);
++extern int                    tcp_recvpackets(struct sock *sk,
++                                              struct sk_buff_head *packets,
++                                              int len, int nonblock);
+ extern int                    tcp_listen_start(struct sock *sk);
+--- linux.orig/net/core/skbuff.c       2004-11-10 17:02:53.000000000 +0000
++++ linux/net/core/skbuff.c    2005-02-02 10:12:14.000000000 +0000
+@@ -155,6 +155,8 @@
+       skb_shinfo(skb)->tso_size = 0;
+       skb_shinfo(skb)->tso_segs = 0;
+       skb_shinfo(skb)->frag_list = NULL;
++      skb_shinfo(skb)->zccd = NULL;           /* skbuffs kick off with NO user zero copy descriptors */
++      skb_shinfo(skb)->zccd2 = NULL;
+ out:
+       return skb;
+ nodata:
+@@ -189,6 +191,10 @@
+ {
+       if (!skb->cloned ||
+           atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
++              if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
++                      zccd_put (skb_shinfo(skb)->zccd); /* release hold */
++              if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
++                      zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
+               if (skb_shinfo(skb)->nr_frags) {
+                       int i;
+                       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+@@ -476,6 +482,14 @@
+       n->data_len  = skb->data_len;
+       n->len       = skb->len;
++      if (skb_shinfo(skb)->zccd != NULL)      /* user zero copy descriptor? */
++              zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
++      skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
++
++      if (skb_shinfo(skb)->zccd2 != NULL)     /* 2nd user zero copy descriptor? */
++              zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
++      skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
++
+       if (skb_shinfo(skb)->nr_frags) {
+               int i;
+@@ -518,6 +532,8 @@
+       u8 *data;
+       int size = nhead + (skb->end - skb->head) + ntail;
+       long off;
++      zccd_t *zccd = skb_shinfo(skb)->zccd;   /* stash user zero copy descriptor */
++      zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
+       if (skb_shared(skb))
+               BUG();
+@@ -539,6 +555,11 @@
+       if (skb_shinfo(skb)->frag_list)
+               skb_clone_fraglist(skb);
++      if (zccd != NULL)                       /* user zero copy descriptor? */
++              zccd_get (zccd);                /* extra ref (pages are shared) */
++      if (zccd2 != NULL)                      /* 2nd user zero copy descriptor? */
++              zccd_get (zccd2);               /* extra ref (pages are shared) */
++
+       skb_release_data(skb);
+       off = (data + nhead) - skb->head;
+@@ -552,6 +573,8 @@
+       skb->nh.raw  += off;
+       skb->cloned   = 0;
+       atomic_set(&skb_shinfo(skb)->dataref, 1);
++      skb_shinfo(skb)->zccd = zccd;
++      skb_shinfo(skb)->zccd2 = zccd2;
+       return 0;
+ nodata:
+--- linux.orig/net/core/dev.c  2004-10-18 22:54:08.000000000 +0100
++++ linux/net/core/dev.c       2005-02-02 10:12:14.000000000 +0000
+@@ -1196,6 +1196,8 @@
+       ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
+       ninfo->nr_frags = 0;
+       ninfo->frag_list = NULL;
++      ninfo->zccd = NULL;             /* copied data => no user zero copy descriptor */
++      ninfo->zccd2 = NULL;
+       /* Offset between the two in bytes */
+       offset = data - skb->head;
+--- linux-2.6.9-org/net/ipv4/tcp.c     2005-05-20 10:09:34.000000000 +0100
++++ linux-2.6.9/net/ipv4/tcp.c 2005-05-20 10:22:14.000000000 +0100
+@@ -628,8 +628,9 @@
+       }
+ }
++/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */
+ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
+-                       size_t psize, int flags)
++size_t psize, int flags, zccd_t *zccd)
+ {
+       struct tcp_opt *tp = tcp_sk(sk);
+       int mss_now;
+@@ -676,6 +677,17 @@
+                       copy = size;
+               i = skb_shinfo(skb)->nr_frags;
++
++              if (zccd != NULL &&             /* this is a zcc I/O */
++                  skb_shinfo(skb)->zccd != NULL && /* skb part of a zcc I/O */
++                  skb_shinfo(skb)->zccd2 != NULL &&
++                  skb_shinfo(skb)->zccd != zccd && /* not the same one */
++                  skb_shinfo(skb)->zccd2 != zccd)
++              {
++                      tcp_mark_push (tp, skb);
++                      goto new_segment;
++              }
++
+               can_coalesce = skb_can_coalesce(skb, i, page, offset);
+               if (!can_coalesce && i >= MAX_SKB_FRAGS) {
+                       tcp_mark_push(tp, skb);
+@@ -692,6 +704,20 @@
+                       skb_fill_page_desc(skb, i, page, offset, copy);
+               }
++              if (zccd != NULL &&     /* this is a zcc I/O */
++                  skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
++                  skb_shinfo(skb)->zccd2 != zccd)
++              {
++                      zccd_get (zccd);        /* bump ref count */
++
++                      BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
++
++                      if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
++                              skb_shinfo(skb)->zccd = zccd;
++                      else
++                              skb_shinfo(skb)->zccd2 = zccd;
++              }
++
+               skb->len += copy;
+               skb->data_len += copy;
+               skb->truesize += copy;
+@@ -760,7 +786,31 @@
+       lock_sock(sk);
+       TCP_CHECK_TIMER(sk);
+-      res = do_tcp_sendpages(sk, &page, offset, size, flags);
++      res = do_tcp_sendpages(sk, &page, offset, size, flags, NULL);
++      TCP_CHECK_TIMER(sk);
++      release_sock(sk);
++      return res;
++}
++
++ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset,
++                        size_t size, int flags, zccd_t *zccd)
++{
++      ssize_t res;
++      struct sock *sk = sock->sk;
++
++#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
++
++      if (!(sk->sk_route_caps & NETIF_F_SG) ||     /* caller shouldn't waste */
++          !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS))/* time on double mapping */
++              BUG ();
++
++#undef TCP_ZC_CSUM_FLAGS
++
++      lock_sock(sk);
++      TCP_CHECK_TIMER(sk);
++
++      res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd);
++
+       TCP_CHECK_TIMER(sk);
+       release_sock(sk);
+       return res;
+@@ -1528,6 +1578,194 @@
+       goto out;
+ }
++int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
++                   int len, int nonblock)
++{
++      struct tcp_opt *tp = tcp_sk(sk);
++      int copied;
++      long timeo;
++
++      BUG_TRAP (len > 0);
++      /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
++
++      lock_sock(sk);
++
++      TCP_CHECK_TIMER(sk);
++
++      copied = -ENOTCONN;
++      if (sk->sk_state == TCP_LISTEN)
++              goto out;
++
++      copied = 0;
++      timeo = sock_rcvtimeo(sk, nonblock);
++
++      do {
++              struct sk_buff * skb;
++              u32 offset;
++              unsigned long used;
++              int exhausted;
++              int eaten;
++
++              /* Are we at urgent data? Stop if we have read anything. */
++              if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
++                      break;
++
++              /* We need to check signals first, to get correct SIGURG
++               * handling. FIXME: Need to check this doesnt impact 1003.1g
++               * and move it down to the bottom of the loop
++               */
++              if (signal_pending(current)) {
++                      if (copied)
++                              break;
++                      copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
++                      break;
++              }
++
++              /* Next get a buffer. */
++
++              skb = skb_peek(&sk->sk_receive_queue);
++
++              if (skb == NULL) {              /* nothing ready */
++                      if (copied) {
++                              if (sk->sk_err ||
++                                  sk->sk_state == TCP_CLOSE ||
++                                  (sk->sk_shutdown & RCV_SHUTDOWN) ||
++                                  !timeo ||
++                                  (0))
++                                      break;
++                      } else {
++                              if (sock_flag(sk, SOCK_DONE))
++                                      break;
++
++                              if (sk->sk_err) {
++                                      copied = sock_error(sk);
++                                      break;
++                              }
++
++                              if (sk->sk_shutdown & RCV_SHUTDOWN)
++                                      break;
++
++                              if (sk->sk_state == TCP_CLOSE) {
++                                      if (!(sock_flag(sk, SOCK_DONE))) {
++                                              /* This occurs when user tries to read
++                                               * from never connected socket.
++                                               */
++                                              copied = -ENOTCONN;
++                                              break;
++                                      }
++                                      break;
++                              }
++
++                              if (!timeo) {
++                                      copied = -EAGAIN;
++                                      break;
++                              }
++                      }
++
++                      cleanup_rbuf(sk, copied);
++                      sk_wait_data(sk, &timeo);
++                      continue;
++              }
++
++              BUG_TRAP (atomic_read (&skb->users) == 1);
++
++              exhausted = eaten = 0;
++
++              offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
++              if (skb->h.th->syn)
++                      offset--;
++
++              used = skb->len - offset;
++
++              if (tp->urg_data) {
++                      u32 urg_offset = tp->urg_seq - tp->copied_seq;
++                      if (urg_offset < used) {
++                              if (!urg_offset) { /* at urgent date */
++                                      if (!(sock_flag(sk, SOCK_URGINLINE))) {
++                                              tp->copied_seq++; /* discard the single byte of urgent data */
++                                              offset++;
++                                              used--;
++                                      }
++                              } else {                /* truncate read */
++                                      used = urg_offset;
++                              }
++                      }
++              }
++
++              BUG_TRAP (used >= 0);
++              if (len < used)
++                      used = len;
++
++              if (used == 0) {
++                      exhausted = 1;
++              } else {
++                      if (skb_is_nonlinear (skb)) {
++                              int   rc = skb_linearize (skb, GFP_KERNEL);
++
++                              printk ("tcp_recvpackets(): linearising: %d\n", rc);
++
++                              if (rc) {
++                                      if (!copied)
++                                              copied = rc;
++                                      break;
++                              }
++                      }
++
++                      if ((offset + used) == skb->len) { /* consuming the whole packet */
++                              __skb_unlink (skb, &sk->sk_receive_queue);
++                              dst_release (skb->dst);
++                              skb_orphan (skb);
++                              __skb_pull (skb, offset);
++                              __skb_queue_tail (packets, skb);
++                              exhausted = eaten = 1;
++                      } else {        /* consuming only part of the packet */
++                              struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
++
++                              if (skb2 == NULL) {
++                                      if (!copied)
++                                              copied = -ENOMEM;
++                                      break;
++                              }
++
++                              dst_release (skb2->dst);
++                              __skb_pull (skb2, offset);
++                              __skb_trim (skb2, used);
++                              __skb_queue_tail (packets, skb2);
++                      }
++
++                      tp->copied_seq += used;
++                      copied += used;
++                      len -= used;
++              }
++
++              if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
++                      tp->urg_data = 0;
++                      tcp_fast_path_check(sk, tp);
++              }
++
++              if (!exhausted)
++                      continue;
++
++              if (skb->h.th->fin) {
++                      tp->copied_seq++;
++                      if (!eaten)
++                              sk_eat_skb (sk, skb);
++                      break;
++              }
++
++              if (!eaten)
++                      sk_eat_skb (sk, skb);
++
++      } while (len > 0);
++
++ out:
++      /* Clean up data we have read: This will do ACK frames. */
++      cleanup_rbuf(sk, copied);
++      TCP_CHECK_TIMER(sk);
++      release_sock(sk);
++      return copied;
++}
++
+ /*
+  *    State processing on a close. This implements the state shift for
+  *    sending our FIN frame. Note that we only send a FIN for some
+@@ -2326,6 +2572,8 @@
+ EXPORT_SYMBOL(tcp_recvmsg);
+ EXPORT_SYMBOL(tcp_sendmsg);
+ EXPORT_SYMBOL(tcp_sendpage);
++EXPORT_SYMBOL(tcp_sendpage_zccd);
++EXPORT_SYMBOL(tcp_recvpackets);
+ EXPORT_SYMBOL(tcp_setsockopt);
+ EXPORT_SYMBOL(tcp_shutdown);
+ EXPORT_SYMBOL(tcp_statistics);
index 695423b..1d87227 100644 (file)
@@ -569,28 +569,6 @@ Index: linux-2.6.5-12.1/fs/stat.c
                fput(f);
        }
  
-Index: linux-2.6.5-12.1/fs/nfs/dir.c
-===================================================================
---- linux-2.6.5-12.1.orig/fs/nfs/dir.c 2004-05-10 12:21:53.000000000 -0400
-+++ linux-2.6.5-12.1/fs/nfs/dir.c      2004-06-03 18:31:28.000000000 -0400
-@@ -709,7 +709,7 @@
-               return 0;
-       if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
-               return 0;
--      return (nd->intent.open.flags & O_EXCL) != 0;
-+      return (nd->intent.it_flags & O_EXCL) != 0;
- }
- static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
-@@ -1026,7 +1026,7 @@
-       attr.ia_valid = ATTR_MODE;
-       if (nd && (nd->flags & LOOKUP_CREATE))
--              open_flags = nd->intent.open.flags;
-+              open_flags = nd->intent.it_flags;
-       /*
-        * The 0 argument passed into the create function should one day
 Index: linux-2.6.5-12.1/fs/inode.c
 ===================================================================
 --- linux-2.6.5-12.1.orig/fs/inode.c   2004-05-10 12:21:56.000000000 -0400
index 80db906..6edb8bd 100644 (file)
@@ -580,28 +580,6 @@ Index: linux-2.6.12.5/fs/stat.c
                fput(f);
        }
        return error;
-Index: linux-2.6.12.5/fs/nfs/dir.c
-===================================================================
---- linux-2.6.12.5.orig/fs/nfs/dir.c   2005-08-17 17:51:28.000000000 +0200
-+++ linux-2.6.12.5/fs/nfs/dir.c        2005-08-17 17:51:44.000000000 +0200
-@@ -727,7 +727,7 @@
-               return 0;
-       if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
-               return 0;
--      return (nd->intent.open.flags & O_EXCL) != 0;
-+      return (nd->intent.it_flags & O_EXCL) != 0;
- }
- static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
-@@ -1028,7 +1028,7 @@
-       attr.ia_valid = ATTR_MODE;
-       if (nd && (nd->flags & LOOKUP_CREATE))
--              open_flags = nd->intent.open.flags;
-+              open_flags = nd->intent.it_flags;
-       lock_kernel();
-       nfs_begin_data_update(dir);
 Index: linux-2.6.12.5/fs/inode.c
 ===================================================================
 --- linux-2.6.12.5.orig/fs/inode.c     2005-08-17 17:51:28.000000000 +0200
index 5b03878..0b2e845 100644 (file)
@@ -20,3 +20,4 @@ linux-2.6-binutils-2.16.patch
 compile-fixes-2.6.9-rhel4-22.patch
 vm-tunables-rhel4.patch 
 2.6-rhel4-kgdb-ga.patch
+tcp-zero-copy-2.6.9-rhel4.patch
index 790361c..7a39b32 100644 (file)
@@ -13,4 +13,3 @@ header-guards-2.6-suse.patch
 md_path_lookup-2.6-suse.patch
 ext3-super-ntohl.patch
 export-show_task-2.6-vanilla.patch
-export-filemap_populate.patch
index 5e34152..a0a2633 100644 (file)
@@ -1,5 +1,5 @@
 lnxmaj="2.6.5"
-lnxrel="7.252"
+lnxrel="7.244"
 
 KERNEL=linux-$lnxmaj-$lnxrel.tar.bz2
 # they include our patches
index 0ceec62..ae6e939 100644 (file)
@@ -45,13 +45,13 @@ static int quotfmt_initialize(struct lustre_quota_info *lqi,
                 int namelen = strlen(name);
 
                 /* remove the stale test quotafile */
-                down(&parent_inode->i_sem);
+                LOCK_INODE_MUTEX(parent_inode);
                 de = lookup_one_len(name, tgt->obd_lvfs_ctxt.pwd, namelen);
                 if (!IS_ERR(de) && de->d_inode)
                         vfs_unlink(parent_inode, de);
                 if (!IS_ERR(de))
                         dput(de);
-                up(&parent_inode->i_sem);
+                UNLOCK_INODE_MUTEX(parent_inode);
 
                 /* create quota file */
                 fp = filp_open(name, O_CREAT | O_EXCL, 0644);
@@ -99,7 +99,7 @@ static int quotfmt_finalize(struct lustre_quota_info *lqi,
                 filp_close(lqi->qi_files[i], 0);
 
                 /* unlink quota file */
-                down(&parent_inode->i_sem);
+                LOCK_INODE_MUTEX(parent_inode);
 
                 de = lookup_one_len(name, tgt->obd_lvfs_ctxt.pwd, namelen);
                 if (IS_ERR(de) || de->d_inode == NULL) {
@@ -116,7 +116,7 @@ static int quotfmt_finalize(struct lustre_quota_info *lqi,
               dput:
                 if (!IS_ERR(de))
                         dput(de);
-                up(&parent_inode->i_sem);
+                UNLOCK_INODE_MUTEX(parent_inode);
         }
 
         pop_ctxt(saved, &tgt->obd_lvfs_ctxt, NULL);
index e30bb66..2e247ff 100644 (file)
@@ -49,10 +49,6 @@ int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq,
 /* ldlm_flock.c */
 int ldlm_process_flock_lock(struct ldlm_lock *lock, int *flags, int first_enq,
                             ldlm_error_t *err);
-/* ldlm_llog.c */
-int ldlm_process_llog_lock(struct ldlm_lock *lock, int *flags, int first_enq,
-                           ldlm_error_t *err);
-
 
 /* ldlm_inodebits.c */
 int ldlm_process_inodebits_lock(struct ldlm_lock *lock, int *flags,
index 2108c45..6057380 100644 (file)
@@ -262,14 +262,14 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg)
         spin_lock_init(&cli->cl_write_page_hist.oh_lock);
         spin_lock_init(&cli->cl_read_offset_hist.oh_lock);
         spin_lock_init(&cli->cl_write_offset_hist.oh_lock);
-        if (num_physpages >> (20 - PAGE_SHIFT) <= 128) { /* <= 128 MB */
-                cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES / 4;
-                cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT / 4;
-        } else if (num_physpages >> (20 - PAGE_SHIFT) <= 256) { /* <= 256 MB */
-                cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES / 2;
-                cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT / 2;
+        cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES;
+        if (num_physpages >> (20 - PAGE_SHIFT) <= 128 /* MB */) {
+                cli->cl_max_rpcs_in_flight = 2;
+        } else if (num_physpages >> (20 - PAGE_SHIFT) <= 256 /* MB */) {
+                cli->cl_max_rpcs_in_flight = 3;
+        } else if (num_physpages >> (20 - PAGE_SHIFT) <= 512 /* MB */) {
+                cli->cl_max_rpcs_in_flight = 4;
         } else {
-                cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES;
                 cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT;
         }
 
@@ -282,17 +282,15 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg)
         ptlrpc_init_client(rq_portal, rp_portal, name,
                            &obddev->obd_ldlm_client);
 
-        imp = class_new_import();
+        imp = class_new_import(obddev);
         if (imp == NULL)
                 GOTO(err_ldlm, rc = -ENOENT);
         imp->imp_client = &obddev->obd_ldlm_client;
-        imp->imp_obd = obddev;
         imp->imp_connect_op = connect_op;
-        imp->imp_generation = 0;
         imp->imp_initial_recov = 1;
         imp->imp_initial_recov_bk = 0;
         INIT_LIST_HEAD(&imp->imp_pinger_chain);
-        memcpy(imp->imp_target_uuid.uuid, lustre_cfg_buf(lcfg, 1),
+        memcpy(cli->cl_target_uuid.uuid, lustre_cfg_buf(lcfg, 1),
                LUSTRE_CFG_BUFLEN(lcfg, 1));
         class_import_put(imp);
 
@@ -312,7 +310,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg)
                 if (!strcmp(lustre_cfg_string(lcfg, 3), "inactive")) {
                         CDEBUG(D_HA, "marking %s %s->%s as inactive\n",
                                name, obddev->obd_name,
-                               imp->imp_target_uuid.uuid);
+                               cli->cl_target_uuid.uuid);
                         imp->imp_invalid = 1;
                 }
         }
@@ -332,13 +330,6 @@ err:
 
 int client_obd_cleanup(struct obd_device *obddev)
 {
-        struct client_obd *cli = &obddev->u.cli;
-
-        if (!cli->cl_import)
-                RETURN(-EINVAL);
-        class_destroy_import(cli->cl_import);
-        cli->cl_import = NULL;
-
         ldlm_put_ref(obddev->obd_force);
 
         RETURN(0);
@@ -457,11 +448,15 @@ int client_disconnect_export(struct obd_export *exp)
         }
 
         /* Yeah, obd_no_recov also (mainly) means "forced shutdown". */
-        if (obd->obd_no_recov)
-                ptlrpc_invalidate_import(imp);
-        else
+        if (!obd->obd_no_recov)
                 rc = ptlrpc_disconnect_import(imp);
 
+        ptlrpc_invalidate_import(imp);
+        imp->imp_deactive = 1;
+        ptlrpc_free_rq_pool(imp->imp_rq_pool);
+        class_destroy_import(imp);
+        cli->cl_import = NULL;
+
         EXIT;
  out_no_disconnect:
         err = class_disconnect(exp);
@@ -479,12 +474,13 @@ int client_disconnect_export(struct obd_export *exp)
 int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
                             struct obd_uuid *cluuid)
 {
-        if (exp->exp_connection) {
+        if (exp->exp_connection && exp->exp_imp_reverse) {
                 struct lustre_handle *hdl;
                 hdl = &exp->exp_imp_reverse->imp_remote_handle;
                 /* Might be a re-connect after a partition. */
                 if (!memcmp(&conn->cookie, &hdl->cookie, sizeof conn->cookie)) {
-                        CWARN("%s reconnecting\n", cluuid->uuid);
+                        CWARN("%s: %s reconnecting\n", exp->exp_obd->obd_name,
+                              cluuid->uuid);
                         conn->cookie = exp->exp_handle.h_cookie;
                         /* target_handle_connect() treats EALREADY and
                          * -EALREADY differently.  EALREADY means we are
@@ -631,6 +627,14 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         list_for_each(p, &target->obd_exports) {
                 export = list_entry(p, struct obd_export, exp_obd_chain);
                 if (obd_uuid_equals(&cluuid, &export->exp_client_uuid)) {
+                        if (export->exp_connecting) { /* bug 9635, et. al. */
+                                CWARN("%s: exp %p already connecting\n",
+                                      export->exp_obd->obd_name, export);
+                                export = NULL;
+                                rc = -EALREADY;
+                                break;
+                        }
+                        export->exp_connecting = 1;
                         spin_unlock(&target->obd_dev_lock);
                         LASSERT(export->exp_obd == target);
 
@@ -642,17 +646,23 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         /* If we found an export, we already unlocked. */
         if (!export) {
                 spin_unlock(&target->obd_dev_lock);
+                OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_CONNECT, 2 * obd_timeout);
         } else if (req->rq_reqmsg->conn_cnt == 1) {
                 CERROR("%s: NID %s (%s) reconnected with 1 conn_cnt; "
                        "cookies not random?\n", target->obd_name,
                        libcfs_nid2str(req->rq_peer.nid), cluuid.uuid);
                 GOTO(out, rc = -EALREADY);
+        } else {
+                OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_RECONNECT, 2 * obd_timeout);
         }
 
-        /* We indicate the reconnection in a flag, not an error code. */
+        /* We want to handle EALREADY but *not* -EALREADY from
+         * target_handle_reconnect(), return reconnection state in a flag */
         if (rc == EALREADY) {
                 lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT);
                 rc = 0;
+        } else if (rc) {
+                GOTO(out, rc);
         }
 
         /* Tell the client if we're in recovery. */
@@ -683,9 +693,7 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
                 rc = obd_reconnect(export, target, &cluuid, data);
         }
 
-        /* we want to handle EALREADY but *not* -EALREADY from
-         * target_handle_reconnect() */
-        if (rc && rc != EALREADY)
+        if (rc)
                 GOTO(out, rc);
 
         /* Return only the parts of obd_connect_data that we understand, so the
@@ -753,15 +761,16 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
 
         if (export->exp_imp_reverse != NULL)
                 class_destroy_import(export->exp_imp_reverse);
-        revimp = export->exp_imp_reverse = class_new_import();
+        revimp = export->exp_imp_reverse = class_new_import(target);
         revimp->imp_connection = ptlrpc_connection_addref(export->exp_connection);
         revimp->imp_client = &export->exp_obd->obd_ldlm_client;
         revimp->imp_remote_handle = conn;
-        revimp->imp_obd = target;
         revimp->imp_dlm_fake = 1;
         revimp->imp_state = LUSTRE_IMP_FULL;
         class_import_put(revimp);
 out:
+        if (export)
+                export->exp_connecting = 0;
         if (rc)
                 req->rq_status = rc;
         RETURN(rc);
diff --git a/lustre/ldlm/ldlm_llog.c b/lustre/ldlm/ldlm_llog.c
deleted file mode 100644 (file)
index f573ebe..0000000
+++ /dev/null
@@ -1,269 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
- *   Author: LinSongTao<lin.songtao@clusterfs.com>
- *
- *   You may have signed or agreed to another license before downloading
- *   this software.  If so, you are bound by the terms and conditions
- *   of that agreement, and the following does not apply to you.  See the
- *   LICENSE file included with this distribution for more information.
- *
- *   If you did not agree to a different license, then this copy of Lustre
- *   is open source software; you can redistribute it and/or modify it
- *   under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
- *
- *   In either case, Lustre is distributed in the hope that it will be
- *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   license text for more details.
- */
-
-#define DEBUG_SUBSYSTEM S_LDLM
-
-#ifdef __KERNEL__
-#include <linux/lustre_dlm.h>
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_lib.h>
-#include <libcfs/list.h>
-#else
-#include <liblustre.h>
-#include <linux/obd_class.h>
-#endif
-
-#include "ldlm_internal.h"
-
-#define l_llog_waitq   l_lru
-
-static struct list_head ldlm_llog_waitq = LIST_HEAD_INIT(ldlm_llog_waitq);
-
-int ldlm_llog_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
-                            void *data, int flag);
-
-
-static inline void
-ldlm_llog_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags)
-{
-        ENTRY;
-
-        LDLM_DEBUG(lock, "ldlm_flock_destroy(mode: %d, flags: 0x%x)",
-                   mode, flags);
-
-        LASSERT(list_empty(&lock->l_flock_waitq));
-
-        list_del_init(&lock->l_res_link);
-        if (flags == LDLM_FL_WAIT_NOREPROC) {
-                /* client side - set a flag to prevent sending a CANCEL */
-                lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING;
-                ldlm_lock_decref_internal(lock, mode);
-        }
-
-        ldlm_lock_destroy(lock);
-        EXIT;
-}
-
-int
-ldlm_process_llog_lock(struct ldlm_lock *req, int *flags, int first_enq,
-                       ldlm_error_t *err)
-{
-        struct ldlm_resource *res = req->l_resource;
-        struct ldlm_namespace *ns = res->lr_namespace;
-        struct list_head *tmp;
-        struct list_head *ownlocks = NULL;
-        struct ldlm_lock *lock = NULL;
-        struct ldlm_lock *new = req;
-        struct ldlm_lock *new2 = NULL;
-        ldlm_mode_t mode = req->l_req_mode;
-        int local = ns->ns_client;
-        int added = (mode == LCK_NL);
-        ENTRY;
-
-        CDEBUG(D_DLMTRACE, "flags %#x \n", *flags);
-
-        *err = ELDLM_OK;
-
-        if (local) {
-                /* No blocking ASTs are sent to the clients for
-                 * Posix file & record locks */
-                req->l_blocking_ast = NULL;
-        } else {
-                /* Called on the server for lock cancels. */
-                req->l_blocking_ast = ldlm_llog_blocking_ast;
-        }
-
-
-        lockmode_verify(mode);
-
-        /* This loop determines if there are existing locks
-         * that conflict with the new lock request. */
-        list_for_each(tmp, &res->lr_granted) {
-                lock = list_entry(tmp, struct ldlm_lock, l_res_link);
-
-                if (lockmode_compat(lock->l_granted_mode, mode))
-                        continue;
-
-                if (!first_enq)
-                        RETURN(LDLM_ITER_CONTINUE);
-
-                LASSERT(list_empty(&req->l_llog_waitq));
-                list_add_tail(&req->l_llog_waitq, &ldlm_llog_waitq);
-
-                ldlm_resource_add_lock(res, &res->lr_waiting, req);
-                        //*flags |= LDLM_FL_BLOCK_GRANTED;
-                RETURN(LDLM_ITER_STOP);
-        }
-
-        list_del_init(&req->l_llog_waitq);
-
-        req->l_granted_mode = req->l_req_mode;
-
-        /* Add req to the granted queue. */
-        list_del_init(&req->l_res_link);
-
-        /* insert new lock*/
-        ldlm_resource_add_lock(res, &req->lr_granted, req);
-
-        if (*flags != LDLM_FL_WAIT_NOREPROC) {
-                if (first_enq) {
-                        if (mode == LCK_NL) {
-                                struct list_head rpc_list
-                                                    = LIST_HEAD_INIT(rpc_list);
-                                int rc;
-restart:
-                                res->lr_tmp = &rpc_list;
-                                ldlm_reprocess_queue(res, &res->lr_waiting);
-                                res->lr_tmp = NULL;
-
-                                l_unlock(&ns->ns_lock);
-                                rc = ldlm_run_ast_work(res->lr_namespace,
-                                                       &rpc_list);
-                                l_lock(&ns->ns_lock);
-                                if (rc == -ERESTART)
-                                        GOTO(restart, -ERESTART);
-                       }
-                } else {
-                        LASSERT(req->l_completion_ast);
-                        ldlm_add_ast_work_item(req, NULL, NULL, 0);
-                }
-        }
-
-        /* In case we're reprocessing the requested lock we can't destroy
-         * it until after calling ldlm_ast_work_item() above so that lawi()
-         * can bump the reference count on req. Otherwise req could be freed
-         * before the completion AST can be sent.  */
-        if (added)
-                ldlm_flock_destroy(req, mode, *flags);
-
-        ldlm_resource_dump(D_OTHER, res);
-        RETURN(LDLM_ITER_CONTINUE);
-}
-
-static void
-ldlm_llog_interrupted_wait(void *data)
-{
-        struct ldlm_lock *lock;
-        struct lustre_handle lockh;
-        ENTRY;
-
-        lock = (struct ldlm_lock *)data;
-
-        /* take lock off the deadlock detection waitq. */
-        list_del_init(&lock->l_llog_waitq);
-
-        /* client side - set flag to prevent lock from being put on lru list */
-        lock->l_flags |= LDLM_FL_CBPENDING;
-
-        ldlm_lock_decref_internal(lock, lock->l_req_mode);
-        ldlm_lock2handle(lock, &lockh);
-        ldlm_cli_cancel(&lockh);
-        EXIT;
-}
-
-int
-ldlm_llog_completion_ast(struct ldlm_lock *lock, int flags, void *data)
-{
-        struct ldlm_namespace *ns;
- //       struct file_lock *getlk = lock->l_ast_data;
- //       struct ldlm_flock_wait_data fwd;
-        unsigned long irqflags;
-        struct obd_device *obd;
-        struct obd_import *imp = NULL;
-        ldlm_error_t err;
-        int rc = 0;
-        struct l_wait_info lwi;
-        ENTRY;
-
-        CDEBUG(D_DLMTRACE, "flags: 0x%x data: %p getlk: %p\n",
-               flags, data, getlk);
-
-        if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
-                       LDLM_FL_BLOCK_CONV)))
-                goto  granted;
-
-        LDLM_DEBUG(lock, "client-side enqueue can not return a granted lock, "
-                   "sleeping");
-
-        obd = class_exp2obd(lock->l_conn_export);
-
-        /* if this is a local lock, then there is no import */
-        if (obd != NULL)
-                imp = obd->u.cli.cl_import;
-
-        if (imp != NULL) {
-                spin_lock_irqsave(&imp->imp_lock, irqflags);
-                fwd.fwd_generation = imp->imp_generation;
-                spin_unlock_irqrestore(&imp->imp_lock, irqflags);
-        }
-
-        lwi = LWI_TIMEOUT_INTR(0, NULL, ldlm_flock_interrupted_wait, &fwd);
-
-        /* Go to sleep until the lock is granted. */
-        rc = l_wait_event(lock->l_waitq,
-                          ((lock->l_req_mode == lock->l_granted_mode) ||
-                           lock->l_destroyed), &lwi);
-
-        LDLM_DEBUG(lock, "client-side enqueue waking up: rc = %d", rc);
-        RETURN(rc);
-granted:
-
-        LDLM_DEBUG(lock, "client-side enqueue granted");
-        ns = lock->l_resource->lr_namespace;
-        l_lock(&ns->ns_lock);
-
-        /* take lock off the deadlock detection waitq. */
-        list_del_init(&lock->l_flock_waitq);
-
-        /* ldlm_lock_enqueue() has already placed lock on the granted list. */
-        list_del_init(&lock->l_res_link);
-
-        /* We need to reprocess the lock to do merges or splits
-         * with existing locks owned by this process. */
-        ldlm_process_llog_lock(lock, NULL, 1, &err);
-        if (flags == 0)
-               wake_up(&lock->l_waitq);
-
-        l_unlock(&ns->ns_lock);
-        RETURN(0);
-}
-EXPORT_SYMBOL(ldlm_llog_completion_ast);
-
-int ldlm_llog_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
-                           void *data, int flag)
-{
-        struct ldlm_namespace *ns;
-        ENTRY;
-
-        LASSERT(lock);
-        LASSERT(flag == LDLM_CB_CANCELING);
-
-        ns = lock->l_resource->lr_namespace;
-
-        /* take lock off the deadlock detection waitq. */
-        l_lock(&ns->ns_lock);
-        list_del_init(&lock->l_flock_waitq);
-        l_unlock(&ns->ns_lock);
-        RETURN(0);
-}
index 93a7aee..02d2a29 100644 (file)
@@ -91,7 +91,6 @@ static ldlm_processing_policy ldlm_processing_policy_table[] = {
         [LDLM_EXTENT] ldlm_process_extent_lock,
 #ifdef __KERNEL__
         [LDLM_FLOCK] ldlm_process_flock_lock,
-        //[LDLM_LLOG]  ldlm_process_llog_lock,
 #endif
         [LDLM_IBITS] ldlm_process_inodebits_lock,
 };
@@ -748,6 +747,10 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
                                                           LDLM_FL_WAIT_NOREPROC,
                                                                  NULL);
                                 if (err) {
+                                        if (flags & LDLM_FL_TEST_LOCK)
+                                                LDLM_LOCK_PUT(lock);
+                                        else
+                                                ldlm_lock_decref_internal(lock, mode);
                                         rc = 0;
                                         goto out2;
                                 }
index c90424a..ee89422 100644 (file)
@@ -40,7 +40,7 @@ static void interrupted_completion_wait(void *data)
 
 struct lock_wait_data {
         struct ldlm_lock *lwd_lock;
-        int               lwd_generation;
+        __u32             lwd_conn_cnt;
 };
 
 int ldlm_expired_completion_wait(void *data)
@@ -69,11 +69,10 @@ int ldlm_expired_completion_wait(void *data)
 
         obd = lock->l_conn_export->exp_obd;
         imp = obd->u.cli.cl_import;
-        ptlrpc_fail_import(imp, lwd->lwd_generation);
+        ptlrpc_fail_import(imp, lwd->lwd_conn_cnt);
         LDLM_ERROR(lock, "lock timed out (enqueued %lus ago), entering "
                    "recovery for %s@%s", lock->l_enqueued_time.tv_sec,
-                   imp->imp_target_uuid.uuid,
-                   imp->imp_connection->c_remote_uuid.uuid);
+                   obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid);
 
         RETURN(0);
 }
@@ -127,7 +126,7 @@ noreproc:
 
         if (imp != NULL) {
                 spin_lock_irqsave(&imp->imp_lock, irqflags);
-                lwd.lwd_generation = imp->imp_generation;
+                lwd.lwd_conn_cnt = imp->imp_conn_cnt;
                 spin_unlock_irqrestore(&imp->imp_lock, irqflags);
         }
 
@@ -254,7 +253,6 @@ static int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
         ldlm_lock_addref_internal(lock, mode);
         ldlm_lock2handle(lock, lockh);
         lock->l_flags |= LDLM_FL_LOCAL;
-        lock->l_flags |= *flags & LDLM_INHERIT_FLAGS;
         lock->l_lvb_swabber = lvb_swabber;
         if (policy != NULL)
                 lock->l_policy_data = *policy;
index 9321581..5a3ad6c 100644 (file)
@@ -127,6 +127,7 @@ int liblustre_process_log(struct config_llog_instance *cfg,
         if (ocd == NULL)
                 GOTO(out_cleanup, rc = -ENOMEM);
 
+        ocd->ocd_connect_flags = OBD_CONNECT_VERSION;
         ocd->ocd_version = LUSTRE_VERSION_CODE;
 
         /* Disable initial recovery on this import */
index 9677c11..8285107 100644 (file)
@@ -348,6 +348,9 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset,
                 struct intnl_stat *st;
                 ENTRY;
 
+                if (it_disposition(it, DISP_OPEN_CREATE))
+                        ptlrpc_req_finished(request);
+
                 rc = mdc_req2lustre_md(request, offset, sbi->ll_osc_exp, &md);
                 if (rc)
                         RETURN(rc);
index 1f60bd5..244271b 100644 (file)
@@ -797,19 +797,8 @@ int llu_iop_read(struct inode *ino,
 int llu_iop_write(struct inode *ino,
                   struct ioctx *ioctx)
 {
-        struct iattr iattr;
-        int rc;
-
-        memset(&iattr, 0, sizeof(iattr));
-        iattr.ia_mtime = iattr.ia_atime = CURRENT_TIME;
-        iattr.ia_valid = ATTR_MTIME | ATTR_ATIME | ATTR_RAW;
-
-        liblustre_wait_event(0);
-        rc = llu_setattr_raw(ino, &iattr);
-        if (rc) {
-                CERROR("failed to set mtime/atime during write: %d", rc);
-                /* XXX should continue or return error? */
-        }
+        struct intnl_stat *st = llu_i2stat(ino);
+        st->st_mtime = st->st_ctime = CURRENT_TIME;
 
         return llu_file_rwx(ino, ioctx, 0);
 }
index bda38b9..129fff6 100644 (file)
@@ -147,10 +147,10 @@ void llu_update_inode(struct inode *inode, struct mdt_body *body,
                 }
         }
 
-        if (body->valid & OBD_MD_FLATIME &&
+        if (body->valid & OBD_MD_FLMTIME &&
             body->mtime > LTIME_S(st->st_mtime))
                 LTIME_S(st->st_mtime) = body->mtime;
-        if (body->valid & OBD_MD_FLMTIME &&
+        if (body->valid & OBD_MD_FLATIME &&
             body->atime > LTIME_S(st->st_atime))
                 LTIME_S(st->st_atime) = body->atime;
         if (body->valid & OBD_MD_FLCTIME &&
@@ -810,11 +810,11 @@ static int llu_iop_setattr(struct pnode *pno,
         }
         if (mask & SETATTR_MTIME) {
                 iattr.ia_mtime = stbuf->st_mtime;
-                iattr.ia_valid |= ATTR_MTIME;
+                iattr.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
         }
         if (mask & SETATTR_ATIME) {
                 iattr.ia_atime = stbuf->st_atime;
-                iattr.ia_valid |= ATTR_ATIME;
+                iattr.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
         }
         if (mask & SETATTR_UID) {
                 iattr.ia_uid = stbuf->st_uid;
index 566a4c9..53e4735 100644 (file)
@@ -468,7 +468,7 @@ int t18(char *name)
         char buf[128];
         int fd, i;
         struct stat statbuf[3];
-        ENTRY("write should change mtime/atime");
+        ENTRY("write should change mtime/ctime");
         snprintf(file, MAX_PATH_LENGTH, "%s/test_t18_file", lustre_path);
 
         for (i = 0; i < 3; i++) {
@@ -486,13 +486,13 @@ int t18(char *name)
                         printf("Error stat\n");
                         return(1);
                 }
-                printf("atime %lu, mtime %lu\n",
-                        statbuf[i].st_atime, statbuf[i].st_mtime);
+                printf("ctime %lu, mtime %lu\n",
+                        statbuf[i].st_ctime, statbuf[i].st_mtime);
                 sleep(2);
         }
 
         for (i = 1; i < 3; i++) {
-                if ((statbuf[i].st_atime <= statbuf[i-1].st_atime) ||
+                if ((statbuf[i].st_ctime <= statbuf[i-1].st_ctime) ||
                     (statbuf[i].st_mtime <= statbuf[i-1].st_mtime)) {
                         printf("time error\n");
                         return(-1);
@@ -1035,6 +1035,7 @@ int t51(char *name)
         printf("\n");
         LEAVE();
 }
+
 /*
  * check atime update during read
  */
@@ -1078,15 +1079,90 @@ int t52(char *name)
         LEAVE();
 }
 
+#define NEW_TIME        10000
+int t53(char *name)
+{
+        char file[MAX_PATH_LENGTH] = "";
+        struct utimbuf times;   /* struct. buffer for utime() */
+        struct stat stat_buf;   /* struct buffer to hold file info. */
+        time_t mtime, atime;
+        ENTRY("mtime/atime should be updated by utime() call");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t53_file", lustre_path);
+
+        t_echo_create(file, "check mtime/atime update by utime() call");
+        /* Initialize the modification and access time in the times arg */
+        times.actime = NEW_TIME+10;
+        times.modtime = NEW_TIME;
+        /* file modification/access time */
+        utime(file, &times);
+        if (stat(file, &stat_buf) < 0) {
+                printf("stat(2) of %s failed, error:%d %s\n",
+                        file, errno, strerror(errno)); 
+        }
+        mtime = stat_buf.st_mtime;
+        atime = stat_buf.st_atime;
+        if ((mtime == NEW_TIME) && (atime == NEW_TIME + 10)) {
+                t_unlink(file);
+                LEAVE();
+        }
+
+        printf("mod time %ld, expected %ld\n", mtime, (long)NEW_TIME);
+        printf("acc time %ld, expected %ld\n", atime, (long)NEW_TIME + 10);
+        t_unlink(file);
+        return (-1);
+}
+
+int t54(char *name)
+{
+        char file[MAX_PATH_LENGTH] = "";
+        struct flock lock;
+        int fd, err;
+
+        ENTRY("fcntl should return 0 when succeed in getting flock");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t54_file", lustre_path);
+
+        t_echo_create(file, "fcntl should return 0 when succeed");
+
+        fd = open(file, O_RDWR);
+        if (fd < 0) {
+                printf("\nerror open file: %s\n", strerror(errno));
+                return(-1);
+        }
+        lock.l_type   = F_WRLCK;
+        lock.l_start  = 0;
+        lock.l_whence = 0;
+        lock.l_len    = 1;
+        if ((err = t_fcntl(fd, F_SETLKW, &lock)) != 0) {
+                fprintf(stderr, "fcntl returned: %d (%s)\n", 
+                        err, strerror(err));
+                close(fd);
+                t_unlink(file);
+                return (-1);
+        }
+
+        lock.l_type   = F_UNLCK;
+        t_fcntl(fd, F_SETLKW, &lock);
+        close(fd);
+        t_unlink(file);
+        LEAVE();
+}
+
 extern void __liblustre_setup_(void);
 extern void __liblustre_cleanup_(void);
 
 
 void usage(char *cmd)
 {
-        printf("\n");
-        printf("Usage: \t%s --target mdsnid:/mdsname/profile\n", cmd);
-        printf("       \t%s --dumpfile dumpfile\n", cmd);
+        printf("\n"
+               "usage: %s [--only {test}] --target mdsnid:/mdsname/profile\n",
+               cmd);
+        printf("       %s --dumpfile dumpfile\n", cmd);
         exit(-1);
 }
 
@@ -1121,6 +1197,8 @@ struct testlist {
         { t50, "50" },
         { t50b, "50b" },
         { t51, "51" },
+        { t53, "53" },
+        { t54, "54" },
         { NULL, NULL }
 };
 
@@ -1189,12 +1267,21 @@ int main(int argc, char * const argv[])
                         run = 0;
                         len = strlen(test->name);
                         for (i = 0; i < numonly; i++) {
-                                if (len < strlen(only[i]))
+                                int olen = strlen(only[i]);
+
+                                if (len < olen)
                                         continue;
-                                if (strncmp(only[i], test->name,
-                                            strlen(only[i])) == 0) {
-                                        run = 1;
-                                        break;
+
+                                if (strncmp(only[i], test->name, olen) == 0) {
+                                        switch(test->name[olen]) {
+                                        case '0': case '1': case '2': case '3':
+                                        case '4': case '5': case '6': case '7':
+                                        case '8': case '9':
+                                                break;
+                                        default:
+                                                run = 1;
+                                                break;
+                                        }
                                 }
                         }
                 }
index 5a7b513..0ebf7e2 100644 (file)
@@ -162,10 +162,9 @@ out:
         /* Too bad, we had an error */
 
 Ebadsize:
-        CERROR("ext2_check_page"
-                "size of directory #%lu is not a multiple of chunk size\n",
-                dir->i_ino
-        );
+        CERROR("%s: directory %lu/%u size %llu is not a multiple of %u\n",
+               ll_i2mdcexp(dir)->exp_obd->obd_name, dir->i_ino,
+               dir->i_generation, dir->i_size, chunk_size);
         goto fail;
 Eshort:
         error = "rec_len is smaller than minimal";
@@ -182,10 +181,11 @@ Espan:
         //Einumber:
         // error = "inode out of bounds";
 bad_entry:
-        CERROR("ext2_check_page: bad entry in directory #%lu: %s - "
+        CERROR("%s: bad entry in directory %lu/%u: %s - "
                 "offset=%lu+%u, inode=%lu, rec_len=%d, name_len=%d",
-                dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT), offs,
-                (unsigned long) le32_to_cpu(p->inode),
+                ll_i2mdcexp(dir)->exp_obd->obd_name, dir->i_ino,
+                dir->i_generation, error, (page->index<<PAGE_CACHE_SHIFT), offs,
+                (unsigned long)le32_to_cpu(p->inode),
                 rec_len, p->name_len);
         goto fail;
 Eend:
@@ -237,16 +237,17 @@ static struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
 
         page = read_cache_page(mapping, n,
                                (filler_t*)mapping->a_ops->readpage, NULL);
-        if (!IS_ERR(page)) {
-                wait_on_page(page);
-                (void)kmap(page);
-                if (!PageUptodate(page))
-                        goto fail;
-                if (!PageChecked(page))
-                        ext2_check_page(page);
-                if (PageError(page))
-                        goto fail;
-        }
+        if (IS_ERR(page))
+                GOTO(out_unlock, page);
+
+        wait_on_page(page);
+        (void)kmap(page);
+        if (!PageUptodate(page))
+                goto fail;
+        if (!PageChecked(page))
+                ext2_check_page(page);
+        if (PageError(page))
+                goto fail;
 
 out_unlock:
         ldlm_lock_decref(&lockh, LCK_CR);
@@ -288,7 +289,7 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
 };
 
 
-int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
+int ll_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
         struct inode *inode = filp->f_dentry->d_inode;
         loff_t pos = filp->f_pos;
@@ -330,6 +331,7 @@ int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
 
                 kaddr = page_address(page);
                 if (need_revalidate) {
+                        /* page already checked from ll_get_dir_page() */
                         offset = ext2_validate_entry(kaddr, offset, chunk_mask);
                         need_revalidate = 0;
                 }
@@ -359,7 +361,8 @@ int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
 done:
         filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
         filp->f_version = inode->i_version;
-        update_atime(inode);
+        touch_atime(filp->f_vfsmnt, filp->f_dentry);
+
         RETURN(rc);
 }
 
@@ -541,7 +544,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                         RETURN(PTR_ERR(filename));
 
                 rc = ll_get_max_mdsize(sbi, &lmmsize);
-                if (rc) 
+                if (rc)
                         RETURN(rc);
 
                 rc = mdc_getattr_name(sbi->ll_mdc_exp, ll_inode2fid(inode),
@@ -586,39 +589,39 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                         int lmj_size, i, aindex = 0, rc;
  
                         rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
-                        if (rc < 0) 
+                        if (rc < 0)
                                 GOTO(out_req, rc = -ENOMEM);
                         rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
-                        if (rc) 
-                                GOTO(out_free_memmd, rc); 
-                        
+                        if (rc)
+                                GOTO(out_free_memmd, rc);
+
                         lmj_size = sizeof(struct lov_user_md_join) +
                                    lsm->lsm_stripe_count *
                                    sizeof(struct lov_user_ost_data_join);
                         OBD_ALLOC(lmj, lmj_size);
-                        if (!lmj) 
+                        if (!lmj)
                                 GOTO(out_free_memmd, rc = -ENOMEM);
-                        
+
                         memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
-                        for(i = 0; i < lsm->lsm_stripe_count; i++) {
+                        for (i = 0; i < lsm->lsm_stripe_count; i++) {
                                 struct lov_array_info *lai = lsm->lsm_array;
                                 if ((lai->lai_ext_array[aindex].le_loi_idx +
                                      lai->lai_ext_array[aindex].le_stripe_count)<=i){
                                         aindex ++;
                                 }
-                                CDEBUG(D_INFO, "aindex %d i %d l_extent_start"LPU64""
-                                               "len %d \n", aindex, i, 
-                                               lai->lai_ext_array[aindex].le_start,
-                                               (int)lai->lai_ext_array[aindex].le_len);
+                                CDEBUG(D_INFO, "aindex %d i %d l_extent_start"
+                                       LPU64"len %d \n", aindex, i,
+                                       lai->lai_ext_array[aindex].le_start,
+                                       (int)lai->lai_ext_array[aindex].le_len);
                                 lmj->lmm_objects[i].l_extent_start =
                                         lai->lai_ext_array[aindex].le_start;
+
                                 if ((int)lai->lai_ext_array[aindex].le_len == -1) {
                                         lmj->lmm_objects[i].l_extent_end = -1;
                                 } else {
-                                        lmj->lmm_objects[i].l_extent_end =  
-                                        lai->lai_ext_array[aindex].le_start + 
-                                        lai->lai_ext_array[aindex].le_len;
+                                        lmj->lmm_objects[i].l_extent_end =
+                                          lai->lai_ext_array[aindex].le_start +
+                                          lai->lai_ext_array[aindex].le_len;
                                 }
                                 lmj->lmm_objects[i].l_object_id =
                                         lsm->lsm_oinfo[i].loi_id;
@@ -815,9 +818,8 @@ out_free_memmd:
                         /* XXX: dqb_valid is borrowed as a flag to mark that
                          *      only mds quota is wanted */
                         if (qctl->qc_dqblk.dqb_valid)
-                                qctl->obd_uuid = 
-                                       sbi->ll_mdc_exp->exp_obd->u.cli.
-                                       cl_import->imp_target_uuid;
+                                qctl->obd_uuid = sbi->ll_mdc_exp->exp_obd->
+                                                        u.cli.cl_target_uuid;
                         break;
                 case Q_GETINFO:
                         break;
index 3129f22..acf7c91 100644 (file)
@@ -605,14 +605,14 @@ int ll_async_completion_ast(struct ldlm_lock *lock, int flags, void *data)
                 lsm->lsm_oinfo[stripe].loi_rss = lvb->lvb_size;
 
                 l_lock(&lock->l_resource->lr_namespace->ns_lock);
-                down(&inode->i_sem);
+                LOCK_INODE_MUTEX(inode);
                 kms = MAX(lsm->lsm_oinfo[stripe].loi_kms, lvb->lvb_size);
                 kms = ldlm_extent_shift_kms(NULL, kms);
                 if (lsm->lsm_oinfo[stripe].loi_kms != kms)
                         LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
                                    lsm->lsm_oinfo[stripe].loi_kms, kms);
                 lsm->lsm_oinfo[stripe].loi_kms = kms;
-                up(&inode->i_sem);
+                UNLOCK_INODE_MUTEX(inode);
                 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
         }
 
@@ -972,7 +972,7 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
         if (rc != 0)
                 RETURN(rc);
 
-        /* this is ok, g_f_w will overwrite this under i_sem if it races
+        /* this is ok, g_f_w will overwrite this under i_mutex if it races
          * with a local truncate, it just makes our maxbyte checking easier */
         if (file->f_flags & O_APPEND)
                 *ppos = inode->i_size;
@@ -989,7 +989,7 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
         CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
                inode->i_ino, count, *ppos);
 
-        /* generic_file_write handles O_APPEND after getting i_sem */
+        /* generic_file_write handles O_APPEND after getting i_mutex */
         retval = generic_file_write(file, buf, count, ppos);
 
 out:
@@ -999,6 +999,98 @@ out:
         RETURN(retval);
 }
 
+/*
+ * Send file content (through pagecache) somewhere with helper
+ */
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
+                                read_actor_t actor, void *target)
+{
+        struct inode *inode = in_file->f_dentry->d_inode;
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct lov_stripe_md *lsm = lli->lli_smd;
+        struct ll_lock_tree tree;
+        struct ll_lock_tree_node *node;
+        struct ost_lvb lvb;
+        struct ll_ra_read bead;
+        int rc;
+        ssize_t retval;
+        __u64 kms;
+        ENTRY;
+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
+               inode->i_ino, inode->i_generation, inode, count, *ppos);
+
+        /* "If nbyte is 0, read() will return 0 and have no other results."
+         *                      -- Single Unix Spec */
+        if (count == 0)
+                RETURN(0);
+
+        lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES,
+                            count);
+
+        node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
+        tree.lt_fd = LUSTRE_FPRIVATE(in_file);
+        rc = ll_tree_lock(&tree, node, NULL, count,
+                          in_file->f_flags & O_NONBLOCK?LDLM_FL_BLOCK_NOWAIT:0);
+        if (rc != 0)
+                RETURN(rc);
+
+        ll_inode_size_lock(inode, 1);
+        /*
+         * Consistency guarantees: following possibilities exist for the
+         * relation between region being read and real file size at this
+         * moment:
+         *
+         *  (A): the region is completely inside of the file;
+         *
+         *  (B-x): x bytes of region are inside of the file, the rest is
+         *  outside;
+         *
+         *  (C): the region is completely outside of the file.
+         *
+         * This classification is stable under DLM lock acquired by
+         * ll_tree_lock() above, because to change class, other client has to
+         * take DLM lock conflicting with our lock. Also, any updates to
+         * ->i_size by other threads on this client are serialized by
+         * ll_inode_size_lock(). This guarantees that short reads are handled
+         * correctly in the face of concurrent writes and truncates.
+         */
+        inode_init_lvb(inode, &lvb);
+        obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
+        kms = lvb.lvb_size;
+        if (*ppos + count - 1 > kms) {
+                /* A glimpse is necessary to determine whether we return a
+                 * short read (B) or some zeroes at the end of the buffer (C) */
+                ll_inode_size_unlock(inode, 1);
+                retval = ll_glimpse_size(inode, 0);
+                if (retval)
+                        goto out;
+        } else {
+                /* region is within kms and, hence, within real file size (A) */
+                inode->i_size = kms;
+                ll_inode_size_unlock(inode, 1);
+        }
+
+        CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
+               inode->i_ino, count, *ppos, inode->i_size);
+
+        /* turn off the kernel's read-ahead */
+        in_file->f_ra.ra_pages = 0;
+
+        bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
+        bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
+        ll_ra_read_in(in_file, &bead);
+        /* BUG: 5972 */
+        file_accessed(in_file);
+        retval = generic_file_sendfile(in_file, ppos, count, actor, target);
+        ll_ra_read_ex(in_file, &bead);
+
+ out:
+        ll_tree_unlock(&tree);
+        RETURN(retval);
+}
+#endif
+
 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
                                unsigned long arg)
 {
@@ -1086,8 +1178,8 @@ static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
         if (!f)
                 GOTO(out, -ENOMEM);
 
-        f->f_dentry = file->f_dentry;
-        f->f_vfsmnt = file->f_vfsmnt;
+        f->f_dentry = dget(file->f_dentry);
+        f->f_vfsmnt = mntget(file->f_vfsmnt);
 
         rc = ll_intent_file_open(f, lum, lum_size, &oit);
         if (rc)
@@ -1115,7 +1207,7 @@ static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
 
  out:
         if (f)
-                put_filp(f);
+                fput(f);
         ll_file_data_put(fd);
         up(&lli->lli_open_sem);
         if (req != NULL)
@@ -1306,8 +1398,8 @@ static int join_file(struct inode *head_inode, struct file *head_filp,
         if (f == NULL)
                 GOTO(out, rc = -ENOMEM);
 
-        f->f_dentry = head_filp->f_dentry;
-        f->f_vfsmnt = head_filp->f_vfsmnt;
+        f->f_dentry = dget(head_filp->f_dentry);
+        f->f_vfsmnt = mntget(head_filp->f_vfsmnt);
 
         ll_prepare_mdc_op_data(op_data, head_inode, tail_parent,
                                tail_dentry->d_name.name,
@@ -1337,7 +1429,7 @@ out:
         if (op_data)
                 OBD_FREE_PTR(op_data);
         if (f)
-                put_filp(f);
+                fput(f);
         ll_file_data_put(fd);
         ptlrpc_req_finished(req);
         RETURN(rc);
@@ -1435,6 +1527,7 @@ cleanup:
         }
         RETURN(rc);
 }
+
 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                   unsigned long arg)
 {
@@ -1463,10 +1556,18 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                 if (get_user(flags, (int *) arg))
                         RETURN(-EFAULT);
 
-                if (cmd == LL_IOC_SETFLAGS)
+                if (cmd == LL_IOC_SETFLAGS) {
+                        if ((flags & LL_FILE_IGNORE_LOCK) &&
+                            !(file->f_flags & O_DIRECT)) {
+                                CERROR("%s: unable to disable locking on "
+                                       "non-O_DIRECT file\n", current->comm);
+                                RETURN(-EINVAL);
+                        }
+
                         fd->fd_flags |= flags;
-                else
+                } else {
                         fd->fd_flags &= ~flags;
+                }
                 RETURN(0);
         case LL_IOC_LOV_SETSTRIPE:
                 RETURN(ll_lov_setstripe(inode, file, arg));
@@ -1910,7 +2011,7 @@ struct file_operations ll_file_operations = {
         .mmap           = ll_file_mmap,
         .llseek         = ll_file_seek,
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-        .sendfile       = generic_file_sendfile,
+        .sendfile       = ll_file_sendfile,
 #endif
         .fsync          = ll_fsync,
         /* .lock           = ll_file_flock */
@@ -1925,7 +2026,7 @@ struct file_operations ll_file_operations_flock = {
         .mmap           = ll_file_mmap,
         .llseek         = ll_file_seek,
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-        .sendfile       = generic_file_sendfile,
+        .sendfile       = ll_file_sendfile,
 #endif
         .fsync          = ll_fsync,
         .lock           = ll_file_flock
index a9c27d6..132755f 100644 (file)
@@ -131,6 +131,7 @@ enum ra_stat {
         RA_STAT_ZERO_WINDOW,
         RA_STAT_EOF,
         RA_STAT_MAX_IN_FLIGHT,
+        RA_STAT_WRONG_GRAB_PAGE,
         _NR_RA_STAT,
 };
 
index c08dffc..2193454 100644 (file)
@@ -179,7 +179,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         if (err)
                 GOTO(out_mdc, err);
 
-        /* async connect is surely finished by now */
+        /* MDC connect is surely finished by now */
         *data = class_exp2cliimp(sbi->ll_mdc_exp)->imp_connect_data;
         *md_data = class_exp2cliimp(sbi->ll_mdc_exp)->imp_connect_data;
 
@@ -214,8 +214,8 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
          * on all clients. */
         /* s_dev is also used in lt_compare() to compare two fs, but that is
          * only a node-local comparison. */
-        sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid,
-                         strlen(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid));
+        sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_target_uuid.uuid,
+                                 strlen(sbi2mdc(sbi)->cl_target_uuid.uuid));
 #endif
 
         obd = class_name2obd(osc);
@@ -674,7 +674,8 @@ out_free:
         if (err) {
                 struct obd_device *obd;
                 int next = 0;
-                /* like client_put_super below */
+                /* like ll_put_super below */
+                lustre_end_log(sb, NULL, &cfg);
                 while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) 
                        != NULL) {
                         class_manual_cleanup(obd);
@@ -682,6 +683,7 @@ out_free:
                 class_del_profile(profilenm);
                 ll_free_sbi(sb);
                 lsi->lsi_llsbi = NULL;
+                lustre_common_put_super(sb);
         }
         RETURN(err);
 } /* ll_fill_super */
@@ -983,15 +985,15 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                 if (attr->ia_size == 0)
                         ast_flags = LDLM_AST_DISCARD_DATA;
 
-                up(&inode->i_sem);
+                UNLOCK_INODE_MUTEX(inode);
                 UP_WRITE_I_ALLOC_SEM(inode);
                 rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy, &lockh,
                                     ast_flags);
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
                 DOWN_WRITE_I_ALLOC_SEM(inode);
-                down(&inode->i_sem);
+                LOCK_INODE_MUTEX(inode);
 #else
-                down(&inode->i_sem);
+                LOCK_INODE_MUTEX(inode);
                 DOWN_WRITE_I_ALLOC_SEM(inode);
 #endif
                 if (rc != 0)
@@ -1400,10 +1402,8 @@ int ll_iocontrol(struct inode *inode, struct file *file,
 
                 rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
                                  &attr, NULL, 0, NULL, 0, &req);
-                if (rc) {
+                if (rc || lsm == NULL) {
                         ptlrpc_req_finished(req);
-                        if (rc != -EPERM && rc != -EACCES)
-                                CERROR("mdc_setattr fails: rc = %d\n", rc);
                         obdo_free(oa);
                         RETURN(rc);
                 }
@@ -1588,7 +1588,6 @@ int ll_obd_statfs(struct inode *inode, void *arg)
         struct ll_sb_info *sbi = NULL;
         struct obd_device *client_obd = NULL, *lov_obd = NULL;
         struct lov_obd *lov = NULL;
-        struct obd_import *client_imp = NULL;
         struct obd_statfs stat_buf = {0};
         char *buf = NULL;
         struct obd_ioctl_data *data = NULL;
@@ -1614,7 +1613,6 @@ int ll_obd_statfs(struct inode *inode, void *arg)
                 if (index > 0)
                         GOTO(out_statfs, rc = -ENODEV);
                 client_obd = class_exp2obd(sbi->ll_mdc_exp);
-                client_imp = class_exp2cliimp(sbi->ll_mdc_exp);
         } else if (type == LL_STATFS_LOV) {
                 lov_obd = class_exp2obd(sbi->ll_osc_exp);
                 lov = &lov_obd->u.lov;
@@ -1623,12 +1621,11 @@ int ll_obd_statfs(struct inode *inode, void *arg)
                         GOTO(out_statfs, rc = -ENODEV);
 
                 client_obd = class_exp2obd(lov->tgts[index].ltd_exp);
-                client_imp = class_exp2cliimp(lov->tgts[index].ltd_exp);
                 if (!lov->tgts[index].active)
                         GOTO(out_uuid, rc = -ENODATA);
         }
 
-        if (!client_obd || !client_imp)
+        if (!client_obd)
                 GOTO(out_statfs, rc = -EINVAL);
 
         rc = obd_statfs(client_obd, &stat_buf, jiffies - 1);
@@ -1639,7 +1636,7 @@ int ll_obd_statfs(struct inode *inode, void *arg)
                 GOTO(out_statfs, rc = -EFAULT);
 
 out_uuid:
-        if (copy_to_user(data->ioc_pbuf2, &client_imp->imp_target_uuid,
+        if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(client_obd),
                          data->ioc_plen2))
                 rc = -EFAULT;
 
@@ -1648,9 +1645,3 @@ out_statfs:
                 obd_ioctl_freedata(buf, len);
         return rc;
 }
-
-EXPORT_SYMBOL(ll_fill_super);
-EXPORT_SYMBOL(ll_put_super);
-EXPORT_SYMBOL(ll_remount_fs);
-EXPORT_SYMBOL(ll_umount_begin);
-
index 3b0e6c5..6f0100d 100644 (file)
@@ -494,6 +494,9 @@ static void ll_vm_close(struct vm_area_struct *vma)
 }
 
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#ifndef HAVE_FILEMAP_POPULATE
+static int (*filemap_populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
+#endif
 static int ll_populate(struct vm_area_struct *area, unsigned long address,
                        unsigned long len, pgprot_t prot, unsigned long pgoff,
                        int nonblock)
@@ -600,6 +603,11 @@ int ll_file_mmap(struct file * file, struct vm_area_struct * vma)
 
         rc = generic_file_mmap(file, vma);
         if (rc == 0) {
+#if !defined(HAVE_FILEMAP_POPULATE) && \
+    (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+                if (!filemap_populate)
+                        filemap_populate = vma->vm_ops->populate;
+#endif
                 vma->vm_ops = &ll_file_vm_ops;
                 vma->vm_ops->open(vma);
                 /* update the inode's size and mtime */
index c445c91..8519ec3 100644 (file)
@@ -692,6 +692,7 @@ static int ll_ra_stats_seq_show(struct seq_file *seq, void *v)
                 [RA_STAT_ZERO_WINDOW] = "zero size window",
                 [RA_STAT_EOF] = "read-ahead to EOF",
                 [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
+                [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
         };
 
         do_gettimeofday(&now);
index 94fd695..34f80b3 100644 (file)
@@ -253,6 +253,12 @@ static void ll_d_add(struct dentry *de, struct inode *inode)
         __d_rehash(de, 0);
 }
 
+/* 2.6.15 and prior versions have buggy d_instantiate_unique that leaks an inode
+ * if suitable alias is found. But we are not going to fix it by just freeing
+ * such inode, because if some vendor's kernel contains this bugfix already,
+ * we will break everything then. We will use our own reimplementation
+ * instead. */
+#if !defined(HAVE_D_ADD_UNIQUE) || (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16))
 /* Search "inode"'s alias list for a dentry that has the same name and parent as
  * de.  If found, return it.  If not found, return de. */
 struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
@@ -299,6 +305,21 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
 
         return de;
 }
+#else
+struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
+{
+        struct dentry *dentry;
+
+        dentry = d_add_unique(de, inode);
+        if (dentry) {
+                lock_dentry(dentry);
+                dentry->d_flags &= ~DCACHE_LUSTRE_INVALID;
+                unlock_dentry(dentry);
+        }
+
+        return dentry?dentry:de;
+}
+#endif
 
 static int lookup_it_finish(struct ptlrpc_request *request, int offset,
                             struct lookup_intent *it, void *data)
index 7de5c53..f35154a 100644 (file)
@@ -103,7 +103,7 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
 
 /* this isn't where truncate starts.   roughly:
  * sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate. setattr_raw grabs
- * DLM lock on [size, EOF], i_sem, ->lli_size_sem, and WRITE_I_ALLOC_SEM to
+ * DLM lock on [size, EOF], i_mutex, ->lli_size_sem, and WRITE_I_ALLOC_SEM to
  * avoid races.
  *
  * must be called under ->lli_size_sem */
@@ -391,11 +391,11 @@ static struct obd_async_page_ops ll_async_page_ops = {
 
 struct ll_async_page *llap_cast_private(struct page *page)
 {
-        struct ll_async_page *llap = (struct ll_async_page *)page->private;
+        struct ll_async_page *llap = (struct ll_async_page *)page_private(page);
 
         LASSERTF(llap == NULL || llap->llap_magic == LLAP_MAGIC,
                  "page %p private %lu gave magic %d which != %d\n",
-                 page, page->private, llap->llap_magic, LLAP_MAGIC);
+                 page, page_private(page), llap->llap_magic, LLAP_MAGIC);
 
         return llap;
 }
@@ -519,10 +519,22 @@ static struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
         struct ll_async_page *llap;
         struct obd_export *exp;
         struct inode *inode = page->mapping->host;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        struct ll_sb_info *sbi;
         int rc;
         ENTRY;
 
+        if (!inode) {
+                static int triggered;
+
+                if (!triggered) {
+                        LL_CDEBUG_PAGE(D_ERROR, page, "Bug 10047. Wrong anon "
+                                       "page received\n");
+                        libcfs_debug_dumpstack(NULL);
+                        triggered = 1;
+                }
+                RETURN(ERR_PTR(-EINVAL));
+        }
+        sbi = ll_i2sbi(inode);
         LASSERT(ll_async_page_slab);
         LASSERTF(origin < LLAP__ORIGIN_MAX, "%u\n", origin);
 
@@ -847,7 +859,7 @@ void ll_removepage(struct page *page)
 
         /* sync pages or failed read pages can leave pages in the page
          * cache that don't have our data associated with them anymore */
-        if (page->private == 0) {
+        if (page_private(page) == 0) {
                 EXIT;
                 return;
         }
@@ -1120,6 +1132,13 @@ static int ll_readahead(struct ll_readahead_state *ras,
                         continue;
                 }
 
+                /* Check if page was truncated or reclaimed */
+                if (page->mapping != mapping) {
+                        ll_ra_stats_inc(mapping, RA_STAT_WRONG_GRAB_PAGE);
+                        CDEBUG(D_READA, "g_c_p_n returned invalid page\n");
+                        goto next_page;
+                }
+
                 /* we do this first so that we can see the page in the /proc
                  * accounting */
                 llap = llap_from_page(page, LLAP_ORIGIN_READAHEAD);
@@ -1367,17 +1386,19 @@ int ll_readpage(struct file *filp, struct page *page)
                 GOTO(out_oig, rc = 0);
         }
 
-        rc = ll_page_matches(page, fd->fd_flags);
-        if (rc < 0) {
-                LL_CDEBUG_PAGE(D_ERROR, page, "lock match failed: rc %d\n", rc);
-                GOTO(out, rc);
-        }
+        if (likely((fd->fd_flags & LL_FILE_IGNORE_LOCK) == 0)) {
+                rc = ll_page_matches(page, fd->fd_flags);
+                if (rc < 0) {
+                        LL_CDEBUG_PAGE(D_ERROR, page, "lock match failed: rc %d\n", rc);
+                        GOTO(out, rc);
+                }
 
-        if (rc == 0) {
-                CWARN("ino %lu page %lu (%llu) not covered by "
-                      "a lock (mmap?).  check debug logs.\n",
-                      inode->i_ino, page->index,
-                      (long long)page->index << PAGE_CACHE_SHIFT);
+                if (rc == 0) {
+                        CWARN("ino %lu page %lu (%llu) not covered by "
+                              "a lock (mmap?).  check debug logs.\n",
+                              inode->i_ino, page->index,
+                              (long long)page->index << PAGE_CACHE_SHIFT);
+                }
         }
 
         rc = ll_issue_page_read(exp, llap, oig, 0);
index 2c333e5..72250af 100644 (file)
@@ -68,7 +68,7 @@ static int ll_invalidatepage(struct page *page, unsigned long offset)
         return 1;
 }
 
-static int ll_releasepage(struct page *page, int gfp_mask)
+static int ll_releasepage(struct page *page, gfp_t gfp_mask)
 {
         if (PagePrivate(page))
                 ll_removepage(page);
index 82697ee..2bc20b3 100644 (file)
@@ -181,6 +181,31 @@ static ssize_t ll_special_write(struct file *filp, const char *buf,
         RETURN(rc);
 }
 
+#ifdef HAVE_UNLOCKED_IOCTL
+static long ll_special_unlocked_ioctl(struct file *filp, unsigned int cmd,
+                                     unsigned long arg)
+{
+        struct file_operations **pfop;
+        int rc = -ENOTTY;
+
+        lock_kernel();
+        pfop = get_save_fops(filp, INODE_OPS);
+        unlock_kernel();
+        if (pfop && *pfop && (*pfop)->unlocked_ioctl) {
+                struct file_operations *sfops = filp->f_op;
+
+                rc = (*pfop)->unlocked_ioctl(filp, cmd, arg);
+
+                /* sometimes, file_operations will be changed in ioctl */
+                lock_kernel();
+                save_fops(filp, filp->f_dentry->d_inode, sfops);
+                unlock_kernel();
+        }
+
+        RETURN(rc);
+}
+#endif
+
 static int ll_special_ioctl(struct inode *inode, struct file *filp,
                             unsigned int cmd, unsigned long arg)
 {
@@ -349,6 +374,9 @@ struct file_operations ll_special_blk_inode_fops = {
         .read           = ll_special_read,
         .write          = ll_special_write,
         .ioctl          = ll_special_ioctl,
+#ifdef HAVE_UNLOCKED_IOCTL
+        .unlocked_ioctl = ll_special_unlocked_ioctl,
+#endif
         .open           = ll_special_open,
         .release        = ll_special_release,
         .mmap           = ll_special_mmap,
index d874fed..01c07e3 100644 (file)
@@ -111,11 +111,11 @@ int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
                 for (loi = lsm->lsm_oinfo; stripe < lsm->lsm_stripe_count;
                      stripe++, loi++) {
                         kms = lov_size_to_stripe(lsm, size, stripe);
-                        loi->loi_kms = loi->loi_lvb.lvb_size = kms;
                         CDEBUG(D_INODE,
                                "stripe %d KMS %sing "LPU64"->"LPU64"\n",
                                stripe, kms > loi->loi_kms ? "increas":"shrink",
                                loi->loi_kms, kms);
+                        loi->loi_kms = loi->loi_lvb.lvb_size = kms;
                 }
                 RETURN(0);
         }
index 03b94ef..8cf28a1 100644 (file)
@@ -380,7 +380,7 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched,
                                watched->obd_name);
                         RETURN(-EINVAL);
                 }
-                uuid = &watched->u.cli.cl_import->imp_target_uuid;
+                uuid = &watched->u.cli.cl_target_uuid;
 
                 /* Set OSC as active before notifying the observer, so the
                  * observer can use the OSC normally.
@@ -392,7 +392,7 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched,
                 if (rc) {
                         CERROR("%sactivation of %s failed: %d\n",
                                (ev == OBD_NOTIFY_ACTIVE) ? "" : "de",
-                               uuid->uuid, rc);
+                               obd_uuid2str(uuid), rc);
                         RETURN(rc);
                 }
         }
@@ -700,7 +700,7 @@ static int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         RETURN(0);
 }
 
-static int lov_precleanup(struct obd_device *obd, int stage)
+static int lov_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
 {
         int rc = 0;
         ENTRY;
@@ -717,10 +717,15 @@ static int lov_precleanup(struct obd_device *obd, int stage)
                 }
                 break;
         }
+        case OBD_CLEANUP_EXPORTS:
+                break;
         case OBD_CLEANUP_SELF_EXP:
                 rc = obd_llog_finish(obd, 0);
                 if (rc != 0)
                         CERROR("failed to cleanup llogging subsystems\n");
+                break;
+        case OBD_CLEANUP_OBD:
+                break;
         }
         RETURN(rc);
 }
index 2cca5dd..d3ca4b8 100644 (file)
@@ -87,7 +87,7 @@ struct fsfilt_operations *fsfilt_get_ops(const char *type)
 
                 if (rc) {
                         CERROR("Can't find %s interface\n", name);
-                        RETURN(ERR_PTR(rc));
+                        RETURN(ERR_PTR(rc < 0 ? rc : -rc));
                         /* unlock fsfilt_types list */
                 }
         }
index 31478fa..ab9ba93 100644 (file)
@@ -73,11 +73,44 @@ struct fsfilt_cb_data {
 #define EXT3_XATTR_INDEX_TRUSTED        4
 #endif
 
-static char *fsfilt_ext3_label(struct super_block *sb)
+static char *fsfilt_ext3_get_label(struct super_block *sb)
 {
         return EXT3_SB(sb)->s_es->s_volume_name;
 }
 
+static int fsfilt_ext3_set_label(struct super_block *sb, char *label)
+{
+        /* see e.g. fsfilt_ext3_write_record() */
+        journal_t *journal;
+        handle_t *handle;
+        int err;
+
+        journal = EXT3_SB(sb)->s_journal;
+        lock_24kernel();
+        handle = journal_start(journal, 1);
+        unlock_24kernel();
+        if (IS_ERR(handle)) {
+                CERROR("can't start transaction\n");
+                return(PTR_ERR(handle));
+        }
+
+        err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+        if (err)
+                goto out;
+
+        memcpy(EXT3_SB(sb)->s_es->s_volume_name, label,
+               sizeof(EXT3_SB(sb)->s_es->s_volume_name));
+
+        err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+
+out:
+        lock_24kernel();
+        journal_stop(handle);
+        unlock_24kernel();
+
+        return(err);
+}
+
 static char *fsfilt_ext3_uuid(struct super_block *sb)
 {
         return EXT3_SB(sb)->s_es->s_uuid;
@@ -489,7 +522,7 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
 {
         int rc;
 
-        LASSERT_SEM_LOCKED(&inode->i_sem);
+        LASSERT(TRYLOCK_INODE_MUTEX(inode) == 0);
 
         if (EXT3_I(inode)->i_file_acl /* || large inode EA flag */)
                 CWARN("setting EA on %lu/%u again... interesting\n",
@@ -507,13 +540,13 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
         return rc;
 }
 
-/* Must be called with i_sem held */
+/* Must be called with i_mutex held */
 static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size,
                               const char *name)
 {
         int rc;
 
-        LASSERT_SEM_LOCKED(&inode->i_sem);
+        LASSERT(TRYLOCK_INODE_MUTEX(inode) == 0);
         lock_24kernel();
 
         rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED,
@@ -697,7 +730,7 @@ static int fsfilt_ext3_sync(struct super_block *sb)
 #undef EXT3_MULTIBLOCK_ALLOCATOR
 #endif
 #ifndef EXT3_EXTENTS_FL
-#define EXT3_EXTENTS_FL                        0x00080000 /* Inode uses extents */
+#define EXT3_EXTENTS_FL                 0x00080000 /* Inode uses extents */
 #endif
 
 #ifdef EXT3_MULTIBLOCK_ALLOCATOR
@@ -1928,7 +1961,8 @@ static int fsfilt_ext3_dquot(struct lustre_dquot *dquot, int cmd)
 static struct fsfilt_operations fsfilt_ext3_ops = {
         .fs_type                = "ext3",
         .fs_owner               = THIS_MODULE,
-        .fs_label               = fsfilt_ext3_label,
+        .fs_getlabel            = fsfilt_ext3_get_label,
+        .fs_setlabel            = fsfilt_ext3_set_label,
         .fs_uuid                = fsfilt_ext3_uuid,
         .fs_start               = fsfilt_ext3_start,
         .fs_brw_start           = fsfilt_ext3_brw_start,
index d00f555..d216d2b 100644 (file)
@@ -119,7 +119,7 @@ void mdc_create_pack(struct ptlrpc_request *req, int offset,
 static __u32 mds_pack_open_flags(__u32 flags)
 {
         return
-                (flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC |
+                (flags & (FMODE_READ | FMODE_WRITE |
                           MDS_OPEN_DELAY_CREATE | MDS_OPEN_HAS_EA |
                           MDS_OPEN_HAS_OBJS | MDS_OPEN_OWNEROVERRIDE)) |
                 ((flags & O_CREAT) ? MDS_OPEN_CREAT : 0) |
@@ -129,6 +129,9 @@ static __u32 mds_pack_open_flags(__u32 flags)
                 ((flags & O_SYNC) ? MDS_OPEN_SYNC : 0) |
                 ((flags & O_DIRECTORY) ? MDS_OPEN_DIRECTORY : 0) |
                 ((flags & O_JOIN_FILE) ? MDS_OPEN_JOIN_FILE : 0) |
+#ifdef FMODE_EXEC
+                ((flags & FMODE_EXEC) ? MDS_FMODE_EXEC : 0) |
+#endif
                 0;
 }
 
index b23349c..6c8812a 100644 (file)
@@ -809,7 +809,7 @@ int mdc_set_info(struct obd_export *exp, obd_count keylen,
                 if (vallen != sizeof(int))
                         RETURN(-EINVAL);
                 imp->imp_initial_recov = *(int *)val;
-                CDEBUG(D_HA, "%s: set imp_no_init_recov = %d\n",
+                CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n",
                        exp->exp_obd->obd_name, imp->imp_initial_recov);
                 RETURN(0);
         }
@@ -1144,18 +1144,22 @@ int mdc_init_ea_size(struct obd_export *mdc_exp, struct obd_export *lov_exp)
         RETURN(0);
 }
 
-static int mdc_precleanup(struct obd_device *obd, int stage)
+static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
 {
         int rc = 0;
         ENTRY;
 
-        if (stage < OBD_CLEANUP_SELF_EXP)
-                RETURN(0);
-
-        rc = obd_llog_finish(obd, 0);
-        if (rc != 0)
-                CERROR("failed to cleanup llogging subsystems\n");
-
+        switch (stage) {
+        case OBD_CLEANUP_EARLY: 
+        case OBD_CLEANUP_EXPORTS:
+                break;
+        case OBD_CLEANUP_SELF_EXP:
+                rc = obd_llog_finish(obd, 0);
+                if (rc != 0)
+                        CERROR("failed to cleanup llogging subsystems\n");
+        case OBD_CLEANUP_OBD:
+                break;
+        }
         RETURN(rc);
 }
 
index 713b98a..6b2eb81 100644 (file)
 
 #include "mds_internal.h"
 
+int mds_num_threads;
+CFS_MODULE_PARM(mds_num_threads, "i", int, 0444,
+                "number of MDS service threads to start");
+
 static int mds_intent_policy(struct ldlm_namespace *ns,
                              struct ldlm_lock **lockp, void *req_cookie,
                              ldlm_mode_t mode, int flags, void *data);
@@ -371,12 +375,13 @@ out:
         RETURN(rc);
 }
 
-static int mds_init_export(struct obd_export *exp)
+int mds_init_export(struct obd_export *exp)
 {
         struct mds_export_data *med = &exp->exp_mds_data;
 
         INIT_LIST_HEAD(&med->med_open_head);
         spin_lock_init(&med->med_open_lock);
+        exp->exp_connecting = 1;
         RETURN(0);
 }
 
@@ -392,7 +397,7 @@ static int mds_destroy_export(struct obd_export *export)
         target_destroy_export(export);
 
         if (obd_uuid_equals(&export->exp_client_uuid, &obd->obd_uuid))
-                GOTO(out, 0);
+                RETURN(0);
 
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         /* Close any open files (which may also cause orphan unlinking). */
@@ -425,7 +430,6 @@ static int mds_destroy_export(struct obd_export *export)
         }
         spin_unlock(&med->med_open_lock);
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-out:
         mds_client_free(export);
 
         RETURN(rc);
@@ -486,6 +490,10 @@ static int mds_getstatus(struct ptlrpc_request *req)
         RETURN(0);
 }
 
+/* get the LOV EA from @inode and store it into @md.  It can be at most
+ * @size bytes, and @size is updated with the actual EA size.
+ * The EA size is also returned on success, and -ve errno on failure. 
+ * If there is no EA then 0 is returned. */
 int mds_get_md(struct obd_device *obd, struct inode *inode, void *md,
                int *size, int lock)
 {
@@ -493,7 +501,7 @@ int mds_get_md(struct obd_device *obd, struct inode *inode, void *md,
         int lmm_size;
 
         if (lock)
-                down(&inode->i_sem);
+                LOCK_INODE_MUTEX(inode);
         rc = fsfilt_get_md(obd, inode, md, *size, "lov");
 
         if (rc < 0) {
@@ -513,14 +521,14 @@ int mds_get_md(struct obd_device *obd, struct inode *inode, void *md,
                 *size = 0;
         }
         if (lock)
-                up(&inode->i_sem);
+                UNLOCK_INODE_MUTEX(inode);
 
         RETURN (rc);
 }
 
 
-/* Call with lock=1 if you want mds_pack_md to take the i_sem.
- * Call with lock=0 if the caller has already taken the i_sem. */
+/* Call with lock=1 if you want mds_pack_md to take the i_mutex.
+ * Call with lock=0 if the caller has already taken the i_mutex. */
 int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg, int offset,
                 struct mds_body *body, struct inode *inode, int lock)
 {
@@ -708,10 +716,10 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
 
         if ((S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) ||
             (S_ISDIR(inode->i_mode) && (body->valid & OBD_MD_FLDIREA))) {
-                down(&inode->i_sem);
+                LOCK_INODE_MUTEX(inode);
                 rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0,
                                    "lov");
-                up(&inode->i_sem);
+                UNLOCK_INODE_MUTEX(inode);
                 CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n",
                        rc, inode->i_ino);
                 if (rc < 0) {
@@ -1895,7 +1903,6 @@ static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
 
                 strncpy(mds->mds_profile, lustre_cfg_string(lcfg, 3),
                         LUSTRE_CFG_BUFLEN(lcfg, 3));
-
         }
 
         ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
@@ -1931,7 +1938,8 @@ static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                 str = "no UUID";
         }
 
-        label = fsfilt_label(obd, obd->u.obt.obt_sb);
+        label = fsfilt_get_label(obd, obd->u.obt.obt_sb);
+
         if (obd->obd_recovering) {
                 LCONSOLE_WARN("MDT %s now serving %s (%s%s%s), but will be in "
                               "recovery until %d %s reconnect, or if no clients"
@@ -1942,8 +1950,8 @@ static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                               obd->obd_name, lustre_cfg_string(lcfg, 1),
                               label ?: "", label ? "/" : "", str,
                               obd->obd_recoverable_clients,
-                              (obd->obd_recoverable_clients == 1)
-                              "client" : "clients",
+                              (obd->obd_recoverable_clients == 1) ?
+                              "client" : "clients",
                               (int)(OBD_RECOVERY_TIMEOUT / HZ) / 60,
                               (int)(OBD_RECOVERY_TIMEOUT / HZ) % 60,
                               obd->obd_name);
@@ -1955,7 +1963,6 @@ static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
         }
 
         ldlm_timeout = 2;
-        ping_evictor_start();
 
         RETURN(0);
 
@@ -2111,12 +2118,14 @@ static int mds_lov_early_clean(struct obd_device *obd)
         return (obd_precleanup(osc, OBD_CLEANUP_EARLY));
 }
 
-static int mds_precleanup(struct obd_device *obd, int stage)
+static int mds_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
 {
         int rc = 0;
         ENTRY;
 
         switch (stage) {
+        case OBD_CLEANUP_EARLY:
+                break;
         case OBD_CLEANUP_EXPORTS:
                 target_cleanup_recovery(obd);
                 mds_lov_early_clean(obd);
@@ -2127,6 +2136,9 @@ static int mds_precleanup(struct obd_device *obd, int stage)
                 llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT));
                 llog_cleanup(llog_get_context(obd, LLOG_LOVEA_ORIG_CTXT));
                 rc = obd_llog_finish(obd, 0);
+                break;
+        case OBD_CLEANUP_OBD:
+                break;
         }
         RETURN(rc);
 }
@@ -2139,8 +2151,6 @@ static int mds_cleanup(struct obd_device *obd)
         int must_relock = 0;
         ENTRY;
 
-        ping_evictor_stop();
-
         if (obd->u.obt.obt_sb == NULL)
                 RETURN(0);
         save_dev = lvfs_sbdev(obd->u.obt.obt_sb);
@@ -2444,12 +2454,17 @@ static int mdt_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 
         sema_init(&mds->mds_health_sem, 1);
 
+        if (mds_num_threads < 2)
+                mds_num_threads = MDT_NUM_THREADS;
+        if (mds_num_threads > MDT_MAX_THREADS)
+                mds_num_threads = MDT_MAX_THREADS;
+
         mds->mds_service =
                 ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE,
                                 MDS_MAXREPSIZE, MDS_REQUEST_PORTAL,
                                 MDC_REPLY_PORTAL, MDS_SERVICE_WATCHDOG_TIMEOUT,
                                 mds_handle, LUSTRE_MDS_NAME,
-                                obd->obd_proc_entry, NULL, MDT_NUM_THREADS);
+                                obd->obd_proc_entry, NULL, mds_num_threads);
 
         if (!mds->mds_service) {
                 CERROR("failed to start service\n");
@@ -2465,7 +2480,7 @@ static int mdt_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                                 MDS_MAXREPSIZE, MDS_SETATTR_PORTAL,
                                 MDC_REPLY_PORTAL, MDS_SERVICE_WATCHDOG_TIMEOUT,
                                 mds_handle, "mds_setattr",
-                                obd->obd_proc_entry, NULL, MDT_NUM_THREADS);
+                                obd->obd_proc_entry, NULL, mds_num_threads);
         if (!mds->mds_setattr_service) {
                 CERROR("failed to start getattr service\n");
                 GOTO(err_thread, rc = -ENOMEM);
@@ -2481,7 +2496,7 @@ static int mdt_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                                 MDS_MAXREPSIZE, MDS_READPAGE_PORTAL,
                                 MDC_REPLY_PORTAL, MDS_SERVICE_WATCHDOG_TIMEOUT,
                                 mds_handle, "mds_readpage",
-                                obd->obd_proc_entry, NULL, MDT_NUM_THREADS);
+                                obd->obd_proc_entry, NULL, mds_num_threads);
         if (!mds->mds_readpage_service) {
                 CERROR("failed to start readpage service\n");
                 GOTO(err_thread2, rc = -ENOMEM);
@@ -2493,6 +2508,8 @@ static int mdt_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         if (rc)
                 GOTO(err_thread3, rc);
 
+        ping_evictor_start();
+
         RETURN(0);
 
 err_thread3:
@@ -2514,6 +2531,8 @@ static int mdt_cleanup(struct obd_device *obd)
         struct mds_obd *mds = &obd->u.mds;
         ENTRY;
 
+        ping_evictor_stop();
+
         down(&mds->mds_health_sem);
         ptlrpc_unregister_service(mds->mds_readpage_service);
         ptlrpc_unregister_service(mds->mds_setattr_service);
index bc3a0f2..1f4d463 100644 (file)
@@ -261,13 +261,13 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
                 }
                 mount_count = le64_to_cpu(lsd->lsd_mount_count);
         }
+
         if (lsd->lsd_feature_incompat & ~cpu_to_le32(MDT_INCOMPAT_SUPP)) {
                 CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
                        obd->obd_name, le32_to_cpu(lsd->lsd_feature_incompat) &
                        ~MDT_INCOMPAT_SUPP);
                 GOTO(err_msd, rc = -EINVAL);
         }
-
         if (lsd->lsd_feature_rocompat & ~cpu_to_le32(MDT_ROCOMPAT_SUPP)) {
                 CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
                        obd->obd_name, le32_to_cpu(lsd->lsd_feature_rocompat) &
@@ -275,7 +275,6 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
                 /* Do something like remount filesystem read-only */
                 GOTO(err_msd, rc = -EINVAL);
         }
-
         if (!(lsd->lsd_feature_incompat & cpu_to_le32(OBD_INCOMPAT_COMMON_LR))){
                 CDEBUG(D_WARNING, "using old last_rcvd format\n");
                 lsd->lsd_mount_count = lsd->lsd_last_transno;
@@ -285,10 +284,10 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
                 lsd->lsd_feature_incompat |= cpu_to_le32(LR_INCOMPAT_COMMON_LR);
                 */
         }
-
+        lsd->lsd_feature_compat = cpu_to_le32(OBD_COMPAT_MDT);
+        
         mds->mds_last_transno = le64_to_cpu(lsd->lsd_last_transno);
 
-        lsd->lsd_feature_compat = cpu_to_le32(OBD_COMPAT_MDT);
         CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n",
                obd->obd_name, mds->mds_last_transno);
         CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
@@ -355,23 +354,19 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
                        last_transno, le64_to_cpu(lsd->lsd_last_transno),
                        le64_to_cpu(mcd->mcd_last_xid));
 
-                exp = class_new_export(obd);
-                if (exp == NULL)
-                        GOTO(err_client, rc = -ENOMEM);
+                exp = class_new_export(obd, (struct obd_uuid *)mcd->mcd_uuid);
+                if (IS_ERR(exp))
+                        GOTO(err_client, rc = PTR_ERR(exp));
 
-                memcpy(&exp->exp_client_uuid.uuid, mcd->mcd_uuid,
-                       sizeof exp->exp_client_uuid.uuid);
                 med = &exp->exp_mds_data;
                 med->med_mcd = mcd;
                 rc = mds_client_add(obd, mds, med, cl_idx);
                 LASSERTF(rc == 0, "rc = %d\n", rc); /* can't fail existing */
 
-                /* create helper if export init gets more complex */
-                INIT_LIST_HEAD(&med->med_open_head);
-                spin_lock_init(&med->med_open_lock);
 
                 mcd = NULL;
                 exp->exp_replay_needed = 1;
+                exp->exp_connecting = 0;
                 obd->obd_recoverable_clients++;
                 obd->obd_max_recoverable_clients++;
                 class_export_put(exp);
@@ -674,7 +669,7 @@ int mds_obd_create(struct obd_export *exp, struct obdo *oa,
         oa->o_generation = filp->f_dentry->d_inode->i_generation;
         namelen = mds_fid2str(fidname, oa->o_id, oa->o_generation);
 
-        down(&parent_inode->i_sem);
+        LOCK_INODE_MUTEX(parent_inode);
         new_child = lookup_one_len(fidname, mds->mds_objects_dir, namelen);
 
         if (IS_ERR(new_child)) {
@@ -709,7 +704,7 @@ int mds_obd_create(struct obd_export *exp, struct obdo *oa,
 out_dput:
         dput(new_child);
 out_close:
-        up(&parent_inode->i_sem);
+        UNLOCK_INODE_MUTEX(parent_inode);
         err = filp_close(filp, 0);
         if (err) {
                 CERROR("closing tmpfile %u: rc %d\n", tmpname, rc);
@@ -741,7 +736,7 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
 
         namelen = mds_fid2str(fidname, oa->o_id, oa->o_generation);
 
-        down(&parent_inode->i_sem);
+        LOCK_INODE_MUTEX(parent_inode);
         de = lookup_one_len(fidname, mds->mds_objects_dir, namelen);
         if (IS_ERR(de)) {
                 rc = IS_ERR(de);
@@ -775,7 +770,7 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
 out_dput:
         if (de != NULL)
                 l_dput(de);
-        up(&parent_inode->i_sem);
+        UNLOCK_INODE_MUTEX(parent_inode);
 
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &ucred);
         RETURN(rc);
index 4c27432..3c53564 100644 (file)
@@ -229,6 +229,7 @@ extern struct lvfs_callback_ops mds_lvfs_ops;
 extern int mds_iocontrol(unsigned int cmd, struct obd_export *exp,
                          int len, void *karg, void *uarg);
 int mds_postrecov(struct obd_device *obd);
+int mds_init_export(struct obd_export *exp);
 #ifdef __KERNEL__
 int mds_get_md(struct obd_device *, struct inode *, void *md, int *size,
                int lock);
index 180094f..ec06360 100644 (file)
@@ -387,7 +387,7 @@ int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req,
                 GOTO(cleanup, rc);
         }
 
-        down(&head_inode->i_sem);
+        LOCK_INODE_MUTEX(head_inode);
         cleanup_phase = 1;
         rc = mds_get_md(obd, head_inode, head_lmm, &size, 0);
         if (rc < 0)
@@ -493,7 +493,7 @@ cleanup:
 
                 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         case 1:
-                up(&head_inode->i_sem);
+                UNLOCK_INODE_MUTEX(head_inode);
         case 0:
                 if (tail_lmm != NULL)
                         OBD_FREE(tail_lmm, lmm_size);
index 94d3076..57e7e09 100644 (file)
@@ -221,17 +221,15 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov)
         /* Don't change the mds_lov_desc until the objids size matches the
            count (paranoia) */
         mds->mds_lov_desc = *ld;
-        
-        CDEBUG(D_HA, "updated lov_desc, tgt_count: %d\n",
+        CDEBUG(D_CONFIG, "updated lov_desc, tgt_count: %d\n",
                mds->mds_lov_desc.ld_tgt_count);
 
-        stripes = min(mds->mds_lov_desc.ld_tgt_count,
-                      (__u32)LOV_MAX_STRIPE_COUNT);
-
+        stripes = min((__u32)LOV_MAX_STRIPE_COUNT, 
+                      max(mds->mds_lov_desc.ld_tgt_count,
+                          mds->mds_lov_objids_in_file));
         mds->mds_max_mdsize = lov_mds_md_size(stripes);
         mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie);
-      
-        CDEBUG(D_HA|D_WARNING, "updated max_mdsize/max_cookiesize: %d/%d\n",
+        CDEBUG(D_CONFIG, "updated max_mdsize/max_cookiesize: %d/%d\n",
                mds->mds_max_mdsize, mds->mds_max_cookiesize);
 
 out:
@@ -257,7 +255,7 @@ static int mds_lov_update_mds(struct obd_device *obd,
         if (rc)
                 RETURN(rc);
 
-        CDEBUG(D_ERROR, "idx=%d, recov=%d/%d, cnt=%d/%d\n",
+        CDEBUG(D_CONFIG, "idx=%d, recov=%d/%d, cnt=%d/%d\n",
                idx, obd->obd_recovering, obd->obd_async_recov, old_count, 
                mds->mds_lov_desc.ld_tgt_count);
 
@@ -294,7 +292,7 @@ static int mds_lov_update_mds(struct obd_device *obd,
         /* If we added a target we have to reconnect the llogs */
         /* Only do this at first add (idx), or the first time after recovery */
         if (idx != MDSLOV_NO_INDEX || 1/*FIXME*/) {
-                CDEBUG(D_CONFIG|D_WARNING, "reset llogs idx=%d\n", idx);
+                CDEBUG(D_CONFIG, "reset llogs idx=%d\n", idx);
                 /* These two must be atomic */
                 down(&mds->mds_orphan_recovery_sem);
                 obd_llog_finish(obd, old_count);
@@ -330,7 +328,8 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name)
         OBD_ALLOC(data, sizeof(*data));
         if (data == NULL)
                 RETURN(-ENOMEM);
-        data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX;
+        data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX |
+                                  OBD_CONNECT_REQPORTAL;
         data->ocd_version = LUSTRE_VERSION_CODE;
         /* NB: lov_connect() needs to fill in .ocd_index for each OST */
         rc = obd_connect(&conn, mds->mds_osc_obd, &obd->obd_uuid, data);
@@ -636,7 +635,7 @@ static int __mds_lov_synchronize(void *data)
         ENTRY;
 
         if (watched) 
-                uuid = &watched->u.cli.cl_import->imp_target_uuid;
+                uuid = &watched->u.cli.cl_target_uuid;
 
         OBD_FREE(mlsi, sizeof(*mlsi));
 
@@ -668,7 +667,7 @@ static int __mds_lov_synchronize(void *data)
                 GOTO(out, rc);
         }
 
-        CWARN("MDS %s: %s now active, resetting orphans\n",
+        LCONSOLE_INFO("MDS %s: %s now active, resetting orphans\n",
               obd->obd_name, (char *)uuid->uuid);
 
         if (obd->obd_stopping)
@@ -688,17 +687,11 @@ out:
 
 int mds_lov_synchronize(void *data)
 {
-        unsigned long flags;
-        ENTRY;
-
-        lock_kernel();
-        ptlrpc_daemonize();
+        struct mds_lov_sync_info *mlsi = data;
+        char name[20];
 
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-        unlock_kernel();
+        sprintf(name, "ll_mlov_sync_%02u", mlsi->mlsi_index);
+        ptlrpc_daemonize(name);
 
         RETURN(__mds_lov_synchronize(data));
 }
@@ -733,7 +726,7 @@ int mds_lov_start_synchronize(struct obd_device *obd,
            still disconnected. Taking an obd reference insures that we don't
            disconnect the LOV.  This of course means a cleanup won't
            finish for as long as the sync is blocking. */
-        atomic_inc(&obd->obd_refcount);
+        class_incref(obd);
 
         if (nonblock) {
                 /* Synchronize in the background */
@@ -781,8 +774,8 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched,
 
         if (obd->obd_recovering) {
                 CWARN("MDS %s: in recovery, not resetting orphans on %s\n",
-                      obd->obd_name,
-                      watched->u.cli.cl_import->imp_target_uuid.uuid);
+                      obd->obd_name, 
+                      obd_uuid2str(&watched->u.cli.cl_target_uuid));
                 /* We still have to fix the lov descriptor for ost's added 
                    after the mdt in the config log.  They didn't make it into
                    mds_lov_connect. */
index 41f4987..27f5a9c 100644 (file)
@@ -271,7 +271,7 @@ static struct mds_file_data *mds_dentry_open(struct dentry *dentry,
                 if (error)
                         GOTO(cleanup_mfd, error);
                 body->io_epoch = MDS_FILTERDATA(dentry->d_inode)->io_epoch;
-        } else if (flags & FMODE_EXEC) {
+        } else if (flags & MDS_FMODE_EXEC) {
                 error = mds_deny_write_access(mds, dentry->d_inode);
                 if (error)
                         GOTO(cleanup_mfd, error);
@@ -303,7 +303,7 @@ cleanup_dentry:
         return ERR_PTR(error);
 }
 
-/* Must be called with i_sem held */
+/* Must be called with i_mutex held */
 static int mds_create_objects(struct ptlrpc_request *req, int offset,
                               struct mds_update_record *rec,
                               struct mds_obd *mds, struct obd_device *obd,
@@ -361,11 +361,8 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset,
 
                 rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov");
                 lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, lmm_size);
-                if (!lmm_buf) {
-                        if (!rc) rc = -ENOMEM;
-                } else {
-                        memcpy(lmm_buf, lmm, lmm_size);
-                }
+                LASSERT(lmm_buf);
+                memcpy(lmm_buf, lmm, lmm_size);
                 if (rc)
                         CERROR("open replay failed to set md:%d\n", rc);
                 RETURN(rc);
@@ -480,11 +477,8 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset,
 
         rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov");
         lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, lmm_size);
-        if (!lmm_buf) {
-                if (!rc) rc = -ENOMEM;
-        } else {
-                memcpy(lmm_buf, lmm, lmm_size);
-        }
+        LASSERT(lmm_buf);
+        memcpy(lmm_buf, lmm, lmm_size);
         obd_free_diskmd(mds->mds_osc_exp, &lmm);
  out_oa:
         oti_free_cookies(&oti);
@@ -661,7 +655,7 @@ static int accmode(struct inode *inode, int flags)
                 res = MAY_READ;
         if (flags & (FMODE_WRITE|MDS_OPEN_TRUNC))
                 res |= MAY_WRITE;
-        if (flags & FMODE_EXEC)
+        if (flags & MDS_FMODE_EXEC)
                 res = MAY_EXEC;
         return res;
 }
@@ -680,38 +674,38 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
         ENTRY;
 
         /* atomically create objects if necessary */
-        down(&dchild->d_inode->i_sem);
+        LOCK_INODE_MUTEX(dchild->d_inode);
 
         if (S_ISREG(dchild->d_inode->i_mode) &&
             !(body->valid & OBD_MD_FLEASIZE)) {
                 rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
                                  dchild->d_inode, 0);
                 if (rc) {
-                        up(&dchild->d_inode->i_sem);
+                        UNLOCK_INODE_MUTEX(dchild->d_inode);
                         RETURN(rc);
                 }
         }
         if (rec != NULL) {
                 if ((body->valid & OBD_MD_FLEASIZE) &&
                     (rec->ur_flags & MDS_OPEN_HAS_EA)) {
-                        up(&dchild->d_inode->i_sem);
+                        UNLOCK_INODE_MUTEX(dchild->d_inode);
                         RETURN(-EEXIST);
                 }
-                if (rec->ur_flags & MDS_OPEN_JOIN_FILE) {
-                        up(&dchild->d_inode->i_sem);
-                        rc = mds_join_file(rec, req, dchild, lockh);
+                if (rec->ur_flags & MDS_OPEN_JOIN_FILE) { 
+                        UNLOCK_INODE_MUTEX(dchild->d_inode);
+                        rc = mds_join_file(rec, req, dchild, lockh); 
                         if (rc)
                                 RETURN(rc);
-                        down(&dchild->d_inode->i_sem);
-                }
-                if (!(body->valid & OBD_MD_FLEASIZE) &&
+                        LOCK_INODE_MUTEX(dchild->d_inode);
+                } 
+                if (!(body->valid & OBD_MD_FLEASIZE) && 
                     !(body->valid & OBD_MD_FLMODEASIZE)) {
                         /* no EA: create objects */
                         rc = mds_create_objects(req, 2, rec, mds, obd,
                                                 dchild, handle, &ids);
                         if (rc) {
                                 CERROR("mds_create_objects: rc = %d\n", rc);
-                                up(&dchild->d_inode->i_sem);
+                                UNLOCK_INODE_MUTEX(dchild->d_inode);
                                 RETURN(rc);
                         }
                 }
@@ -722,7 +716,7 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
                 body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
                                 OBD_MD_FLATIME | OBD_MD_FLMTIME);
         }
-        up(&dchild->d_inode->i_sem);
+        UNLOCK_INODE_MUTEX(dchild->d_inode);
 
         if (!(rec->ur_flags & MDS_OPEN_JOIN_FILE))
                 lustre_shrink_reply(req, 2, body->eadatasize, 0);
@@ -1146,7 +1140,7 @@ found_child:
 }
 
 /* Close a "file descriptor" and possibly unlink an orphan from the
- * PENDING directory.  Caller must hold child->i_sem, this drops it.
+ * PENDING directory.  Caller must hold child->i_mutex, this drops it.
  *
  * If we are being called from mds_disconnect() because the client has
  * disappeared, then req == NULL and we do not update last_rcvd because
@@ -1189,7 +1183,7 @@ int mds_mfd_close(struct ptlrpc_request *req, int offset,struct obd_device *obd,
         if (mfd->mfd_mode & FMODE_WRITE) {
                 rc = mds_put_write_access(mds, inode, request_body,
                                           last_orphan && unlink_orphan);
-        } else if (mfd->mfd_mode & FMODE_EXEC) {
+        } else if (mfd->mfd_mode & MDS_FMODE_EXEC) {
                 mds_allow_write_access(inode);
         }
 
@@ -1209,8 +1203,8 @@ int mds_mfd_close(struct ptlrpc_request *req, int offset,struct obd_device *obd,
                 /* Sadly, there is no easy way to save pending_child from
                  * mds_reint_unlink() into mfd, so we need to re-lookup,
                  * but normally it will still be in the dcache. */
-                down(&pending_dir->i_sem);
-                cleanup_phase = 1; /* up(&pending_dir->i_sem) when finished */
+                LOCK_INODE_MUTEX(pending_dir);
+                cleanup_phase = 1; /* UNLOCK_INODE_MUTEX(pending_dir) when finished */
                 pending_child = lookup_one_len(fidname, mds->mds_pending_dir,
                                                fidlen);
                 if (IS_ERR(pending_child))
@@ -1330,7 +1324,7 @@ out:
         case 2:
                 dput(pending_child);
         case 1:
-                up(&pending_dir->i_sem);
+                UNLOCK_INODE_MUTEX(pending_dir);
         }
         RETURN(rc);
 }
index 70b0d16..76a0c46 100644 (file)
@@ -113,6 +113,11 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
         int log_pri = D_HA;
         ENTRY;
 
+        if (IS_ERR(handle)) {
+                LASSERT(rc != 0);
+                RETURN(rc);
+        }
+
         /* if the export has already been failed, we have no last_rcvd slot */
         if (req->rq_export->exp_failed) {
                 CWARN("commit transaction for disconnected client %s: rc %d\n",
@@ -124,9 +129,6 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
                 RETURN(rc);
         }
 
-        if (IS_ERR(handle))
-                RETURN(rc);
-
         if (handle == NULL) {
                 /* if we're starting our own xaction, use our own inode */
                 inode = mds->mds_rcvd_filp->f_dentry->d_inode;
@@ -511,7 +513,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
 
         if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) &&
             rec->ur_eadata != NULL) {
-                down(&inode->i_sem);
+                LOCK_INODE_MUTEX(inode);
                 need_lock = 0;
         }
 
@@ -652,7 +654,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         case 1:
                 if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) &&
                     rec->ur_eadata != NULL)
-                        up(&inode->i_sem);
+                        UNLOCK_INODE_MUTEX(inode);
                 l_dput(de);
                 if (locked) {
                         if (rc) {
@@ -809,7 +811,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 int rdev = rec->ur_rdev;
                 handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD, NULL);
                 if (IS_ERR(handle))
-                        GOTO(cleanup, (handle = NULL, rc = PTR_ERR(handle)));
+                        GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev);
                 EXIT;
                 break;
@@ -869,10 +871,10 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                         int lmm_size = sizeof(lmm);
                         rc = mds_get_md(obd, dir, &lmm, &lmm_size, 1);
                         if (rc > 0) {
-                                down(&inode->i_sem);
+                                LOCK_INODE_MUTEX(inode);
                                 rc = fsfilt_set_md(obd, inode, handle,
                                                    &lmm, lmm_size, "lov");
-                                up(&inode->i_sem);
+                                UNLOCK_INODE_MUTEX(inode);
                         }
                         if (rc)
                                 CERROR("error on copy stripe info: rc = %d\n",
@@ -1307,7 +1309,7 @@ retry_locks:
         if (rc > 0)
                 goto retry_locks;
         if (rc < 0) {
-                cleanup_phase = 3;
+                cleanup_phase = 2;
                 GOTO(cleanup, rc);
         }
 
@@ -1341,8 +1343,8 @@ void mds_reconstruct_generic(struct ptlrpc_request *req)
  * part thereof, because we don't have the inode to check for link
  * count/open status until after it is locked.
  *
- * For lock ordering, caller must get child->i_sem first, then pending->i_sem
- * before starting journal transaction.
+ * For lock ordering, caller must get child->i_mutex first, then
+ * pending->i_mutex before starting journal transaction.
  *
  * returns 1 on success
  * returns 0 if we lost a race and didn't make a new link
@@ -1362,9 +1364,9 @@ static int mds_orphan_add_link(struct mds_update_record *rec,
         LASSERT(inode != NULL);
         LASSERT(!mds_inode_is_orphan(inode));
 #ifndef HAVE_I_ALLOC_SEM
-        LASSERT(down_trylock(&inode->i_sem) != 0);
+        LASSERT(TRYLOCK_INODE_MUTEX(inode) == 0);
 #endif
-        LASSERT(down_trylock(&pending_dir->i_sem) != 0);
+        LASSERT(TRYLOCK_INODE_MUTEX(pending_dir) == 0);
 
         fidlen = mds_fid2str(fidname, inode->i_ino, inode->i_generation);
 
@@ -1540,8 +1542,8 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
             child_inode->i_nlink == 1) {
                 if (mds_orphan_open_count(child_inode) > 0) {
                         /* need to lock pending_dir before transaction */
-                        down(&mds->mds_pending_dir->d_inode->i_sem);
-                        cleanup_phase = 5; /* up(&pending_dir->i_sem) */
+                        LOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode);
+                        cleanup_phase = 5; /* UNLOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode); */
                 } else if (S_ISREG(child_inode->i_mode)) {
                         mds_pack_inode2fid(&body->fid1, child_inode);
                         mds_pack_inode2body(body, child_inode);
@@ -1636,7 +1638,7 @@ cleanup:
                                    "unlinked", 0, NULL);
         switch(cleanup_phase) {
         case 5: /* pending_dir semaphore */
-                up(&mds->mds_pending_dir->d_inode->i_sem);
+                UNLOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode);
         case 4: /* child inode semaphore */
                 MDS_UP_READ_ORPHAN_SEM(child_inode);
         case 3: /* child ino-reuse lock */
@@ -1769,10 +1771,8 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
                 GOTO(cleanup, rc = -EROFS);
 
         handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK, NULL);
-        if (IS_ERR(handle)) {
-                rc = PTR_ERR(handle);
-                GOTO(cleanup, rc);
-        }
+        if (IS_ERR(handle))
+                GOTO(cleanup, rc = PTR_ERR(handle));
 
         rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild);
         if (rc && rc != -EPERM && rc != -EACCES)
@@ -2103,8 +2103,8 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
             new_inode->i_nlink == 1) {
                 if (mds_orphan_open_count(new_inode) > 0) {
                         /* need to lock pending_dir before transaction */
-                        down(&mds->mds_pending_dir->d_inode->i_sem);
-                        cleanup_phase = 4; /* up(&pending_dir->i_sem) */
+                        LOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode);
+                        cleanup_phase = 4; /* UNLOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode); */
                 } else if (S_ISREG(new_inode->i_mode)) {
                         mds_pack_inode2fid(&body->fid1, new_inode);
                         mds_pack_inode2body(body, new_inode);
@@ -2167,7 +2167,7 @@ cleanup:
 
         switch (cleanup_phase) {
         case 4:
-                up(&mds->mds_pending_dir->d_inode->i_sem);
+                UNLOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode);
         case 3:
                 MDS_UP_READ_ORPHAN_SEM(new_inode);
         case 2:
index 925967a..8d30235 100644 (file)
@@ -221,10 +221,10 @@ int mds_cleanup_pending(struct obd_device *obd)
                     ((namlen == 2) && !strcmp(d_name, "..")) || inum == 0)
                         continue;
 
-                down(&pending_dir->i_sem);
+                LOCK_INODE_MUTEX(pending_dir);
                 dchild = lookup_one_len(d_name, mds->mds_pending_dir, namlen);
                 if (IS_ERR(dchild)) {
-                        up(&pending_dir->i_sem);
+                        UNLOCK_INODE_MUTEX(pending_dir);
                         GOTO(err_out, rc = PTR_ERR(dchild));
                 }
                 if (!dchild->d_inode) {
@@ -263,7 +263,7 @@ int mds_cleanup_pending(struct obd_device *obd)
                 }
 next:
                 l_dput(dchild);
-                up(&pending_dir->i_sem);
+                UNLOCK_INODE_MUTEX(pending_dir);
         }
         rc = 0;
 err_out:
index 4d50f3d..836f675 100644 (file)
@@ -272,20 +272,20 @@ int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body)
                                 xattr = lustre_msg_buf(req->rq_reqmsg, 2,
                                                        xattrlen);
 
-                        down(&inode->i_sem);
+                        LOCK_INODE_MUTEX(inode);
                         lock_24kernel();
                         rc = inode->i_op->setxattr(de, xattr_name, xattr,
                                                    xattrlen, body->flags);
                         unlock_24kernel();
-                        up(&inode->i_sem);
+                        UNLOCK_INODE_MUTEX(inode);
                 }
         } else if (body->valid & OBD_MD_FLXATTRRM) {
                 if (inode->i_op && inode->i_op->removexattr) {
-                        down(&inode->i_sem);
+                        LOCK_INODE_MUTEX(inode);
                         lock_24kernel();
                         rc = inode->i_op->removexattr(de, xattr_name);
                         unlock_24kernel();
-                        up(&inode->i_sem);
+                        UNLOCK_INODE_MUTEX(inode);
                 }
         } else {
                 CERROR("valid bits: "LPX64"\n", body->valid);
index 1f87715..4e237c4 100644 (file)
@@ -222,6 +222,22 @@ static int mdt_getattr(struct mdt_thread_info *info,
         RETURN(result);
 }
 
+static struct lu_device_operations mdt_lu_ops;
+
+static int lu_device_is_mdt(struct lu_device *d)
+{
+        /*
+         * XXX for now. Tags in lu_device_type->ldt_something are needed.
+         */
+        return ergo(d->ld_ops != NULL, d->ld_ops == &mdt_lu_ops);
+}
+
+static struct mdt_device *mdt_dev(struct lu_device *d)
+{
+        LASSERT(lu_device_is_mdt(d));
+        return container_of(d, struct mdt_device, mdt_md_dev.md_lu_dev);
+}
+
 static int mdt_connect(struct mdt_thread_info *info,
                        struct ptlrpc_request *req, int offset)
 {
@@ -229,10 +245,9 @@ static int mdt_connect(struct mdt_thread_info *info,
 
         result = target_handle_connect(req, mdt_handle);
         if (result == 0) {
+                struct mdt_device *mdt = info->mti_mdt;
                 struct obd_connect_data *data;
-                struct mdt_device *mdt;
 
-                mdt = mdt_dev(req->rq_export->exp_obd->obd_lu_dev);
                 data = lustre_msg_buf(req->rq_repmsg, 0, sizeof *data);
                 result = seq_mgr_alloc(&info->mti_ctxt,
                                        mdt->mdt_seq_mgr, &data->ocd_seq);
@@ -426,16 +441,6 @@ void fid_unlock(struct ldlm_namespace *ns, const struct lu_fid *f,
         EXIT;
 }
 
-static struct lu_device_operations mdt_lu_ops;
-
-static int lu_device_is_mdt(struct lu_device *d)
-{
-        /*
-         * XXX for now. Tags in lu_device_type->ldt_something are needed.
-         */
-        return ergo(d->ld_ops != NULL, d->ld_ops == &mdt_lu_ops);
-}
-
 static struct mdt_object *mdt_obj(struct lu_object *o)
 {
         LASSERT(lu_device_is_mdt(o->lo_dev));
@@ -922,12 +927,6 @@ static int mdt_handle0(struct ptlrpc_request *req, struct mdt_thread_info *info)
         RETURN(result);
 }
 
-static struct mdt_device *mdt_dev(struct lu_device *d)
-{
-        LASSERT(lu_device_is_mdt(d));
-        return container_of(d, struct mdt_device, mdt_md_dev.md_lu_dev);
-}
-
 static int mdt_handle(struct ptlrpc_request *req)
 {
         int result;
index 9c6b1d2..2992d84 100644 (file)
@@ -28,7 +28,7 @@
 # define EXPORT_SYMTAB
 #endif
 #define DEBUG_SUBSYSTEM S_MGC
-#define D_MGC D_CONFIG/*|D_WARNING*/
+#define D_MGC D_CONFIG /*|D_WARNING*/
 
 #ifdef __KERNEL__
 # include <linux/module.h>
@@ -63,6 +63,10 @@ int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id)
                 CERROR("fsname too long: %s\n", logname);
                 return -EINVAL;
         }
+        if (len <= 0) {
+                CERROR("missing fsname: %s\n", logname);
+                return -EINVAL;
+        }
         memcpy(&resname, logname, len);
 
         memset(res_id, 0, sizeof(*res_id));
@@ -78,19 +82,20 @@ EXPORT_SYMBOL(mgc_logname2resid);
 static struct list_head config_llog_list = LIST_HEAD_INIT(config_llog_list);
 static spinlock_t       config_list_lock = SPIN_LOCK_UNLOCKED;
 
+/* Take a reference to a config log */
 static int config_log_get(struct config_llog_data *cld)
 {
         ENTRY;
         CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname,
                atomic_read(&cld->cld_refcount));
-        atomic_inc(&cld->cld_refcount);
-        if (cld->cld_stopping) {
-                atomic_dec(&cld->cld_refcount);
+        if (cld->cld_stopping)
                 RETURN(1);
-        }
+        atomic_inc(&cld->cld_refcount);
         RETURN(0);
 }
 
+/* Drop a reference to a config log.  When no longer referenced, 
+   we can free the config log data */
 static void config_log_put(struct config_llog_data *cld)
 {
         ENTRY;
@@ -107,7 +112,8 @@ static void config_log_put(struct config_llog_data *cld)
         EXIT;
 }
 
-static struct config_llog_data *config_log_find(char *logname,
+/* Find a config log by name */
+static struct config_llog_data *config_log_find(char *logname, 
                                                struct config_llog_instance *cfg)
 {
         struct list_head *tmp;
@@ -240,14 +246,14 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb,
         struct lustre_sb_info *lsi = s2lsi(sb);
         struct client_obd *cli = &obd->u.cli;
         struct dentry *dentry;
+        char *label;
         int err = 0;
         ENTRY;
 
         LASSERT(lsi);
         LASSERT(lsi->lsi_srv_mnt == mnt);
 
-        /* The mgc fs exclusion sem. Only one fs can be setup at a time.
-           Maybe just overload the cl_sem? */
+        /* The mgc fs exclusion sem. Only one fs can be setup at a time. */
         down(&cli->cl_mgc_sem);
 
         obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
@@ -259,8 +265,8 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb,
         }
 
         cli->cl_mgc_vfsmnt = mnt;
-        // FIXME which is the right SB? - filter_common_setup also
-        CDEBUG(D_MGC, "SB's: fill=%p mnt=%p root=%p\n", sb, mnt->mnt_sb,
+        // FIXME which is the right SB? - filter_common_setup also 
+        CDEBUG(D_MGC, "SB's: fill=%p mnt=%p == root=%p\n", sb, mnt->mnt_sb,
                mnt->mnt_root->d_inode->i_sb);
         fsfilt_setup(obd, mnt->mnt_sb);
 
@@ -281,6 +287,14 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb,
         }
         cli->cl_mgc_configs_dir = dentry;
 
+        /* We take an obd ref to insure that we can't get to mgc_cleanup
+           without calling mgc_fs_cleanup first. */
+        class_incref(obd);
+
+        label = fsfilt_get_label(obd, mnt->mnt_sb);
+        if (label)
+                CDEBUG(D_MGC, "MGC using disk labelled=%s\n", label);
+
         /* We keep the cl_mgc_sem until mgc_fs_cleanup */
         RETURN(0);
 
@@ -306,6 +320,7 @@ static int mgc_fs_cleanup(struct obd_device *obd)
                 l_dput(cli->cl_mgc_configs_dir);
                 cli->cl_mgc_configs_dir = NULL;
                 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+                class_decref(obd);
         }
 
         cli->cl_mgc_vfsmnt = NULL;
@@ -316,24 +331,40 @@ static int mgc_fs_cleanup(struct obd_device *obd)
         RETURN(rc);
 }
 
+static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+{
+        int rc = 0;
+        ENTRY;
+
+        switch (stage) {
+        case OBD_CLEANUP_EARLY: 
+        case OBD_CLEANUP_EXPORTS:
+                break;
+        case OBD_CLEANUP_SELF_EXP:
+                rc = obd_llog_finish(obd, 0);
+                if (rc != 0)
+                        CERROR("failed to cleanup llogging subsystems\n");
+                break;
+        case OBD_CLEANUP_OBD:
+                break;
+        }
+        RETURN(rc);
+}
+
 static int mgc_cleanup(struct obd_device *obd)
 {
         struct client_obd *cli = &obd->u.cli;
         int rc;
+        ENTRY;
 
-        /* FIXME calls to mgc_fs_setup must take an obd ref to insure there's
-           no fs by the time we get here. */
         LASSERT(cli->cl_mgc_vfsmnt == NULL);
-
-        rc = obd_llog_finish(obd, 0);
-        if (rc != 0)
-                CERROR("failed to cleanup llogging subsystems\n");
+        
+        config_log_end_all();
 
         ptlrpcd_decref();
 
-        config_log_end_all();
-
-        return client_obd_cleanup(obd);
+        rc = client_obd_cleanup(obd);
+        RETURN(rc);
 }
 
 static struct obd_device *the_mgc;
@@ -376,7 +407,7 @@ static int mgc_async_requeue(void *data)
         wait_queue_head_t   waitq;
         struct l_wait_info  lwi;
         struct config_llog_data *cld = (struct config_llog_data *)data;
-        unsigned long flags;
+        char name[24];
         int rc = 0;
         ENTRY;
 
@@ -385,15 +416,9 @@ static int mgc_async_requeue(void *data)
         if (cld->cld_stopping)
                 GOTO(out, rc = 0);
 
-        lock_kernel();
-        ptlrpc_daemonize();
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-        THREAD_NAME(current->comm, sizeof(current->comm) - 1, "reQ %s",
-                    cld->cld_logname);
-        unlock_kernel();
+        snprintf(name, sizeof(name), "ll_log_%s", cld->cld_logname);
+        name[sizeof(name)-1] = '\0';
+        ptlrpc_daemonize(name);
 
         CDEBUG(D_MGC, "requeue "LPX64" %s:%s\n",
                cld->cld_resid.name[0], cld->cld_logname,
@@ -660,17 +685,17 @@ int mgc_set_info(struct obd_export *exp, obd_count keylen,
         }
         /* Turn off initial_recov after we try all backup servers once */
         if (KEY_IS(KEY_INIT_RECOV_BACKUP)) {
+                int value;
                 if (vallen != sizeof(int))
                         RETURN(-EINVAL);
-                imp->imp_initial_recov_bk = *(int *)val;
-                CDEBUG(D_HA, "%s: set imp_initial_recov_bk = %d\n",
-                       exp->exp_obd->obd_name, imp->imp_initial_recov_bk);
-                if (imp->imp_invalid) {
+                value = *(int *)val;
+                imp->imp_initial_recov_bk = value > 0;
+                if (imp->imp_invalid || value > 1) {
                         /* Resurrect if we previously died */
-                        CDEBUG(D_MGC, "Reactivate %s %d:%d:%d\n",
-                               imp->imp_obd->obd_name,
-                               imp->imp_deactive, imp->imp_invalid,
-                               imp->imp_state);
+                        CDEBUG(D_MGC, "Reactivate %s %d:%d:%d:%s\n", 
+                               imp->imp_obd->obd_name, value,
+                               imp->imp_deactive, imp->imp_invalid, 
+                               ptlrpc_import_state_name(imp->imp_state));
                         /* can't put this in obdclass, module loop with ptlrpc*/
                         /* This seems to be necessary when restarting a
                            combo mgs/mdt while the mgc is alive */
@@ -730,14 +755,15 @@ static int mgc_import_event(struct obd_device *obd,
         switch (event) {
         case IMP_EVENT_INVALIDATE: {
                 struct ldlm_namespace *ns = obd->obd_namespace;
-
                 ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
-
                 break;
         }
-        case IMP_EVENT_DISCON:
-        case IMP_EVENT_INACTIVE:
-        case IMP_EVENT_ACTIVE:
+        case IMP_EVENT_DISCON: 
+                /* MGC imports should not wait for recovery */
+                ptlrpc_invalidate_import(imp);
+                break;
+        case IMP_EVENT_INACTIVE: 
+        case IMP_EVENT_ACTIVE: 
         case IMP_EVENT_OCD:
                 break;
         default:
@@ -886,7 +912,7 @@ static int mgc_process_log(struct obd_device *mgc,
         struct client_obd *cli = &mgc->u.cli;
         struct lvfs_run_ctxt saved;
         struct lustre_sb_info *lsi;
-        int rc, rcl, flags = 0, must_pop = 0;
+        int rc = 0, rcl, flags = 0, must_pop = 0;
         ENTRY;
 
         if (!cld || !cld->cld_cfg.cfg_sb) {
@@ -1056,6 +1082,7 @@ out:
 struct obd_ops mgc_obd_ops = {
         .o_owner        = THIS_MODULE,
         .o_setup        = mgc_setup,
+        .o_precleanup   = mgc_precleanup,
         .o_cleanup      = mgc_cleanup,
         .o_add_conn     = client_import_add_conn,
         .o_del_conn     = client_import_del_conn,
index 9f89bb4..e5dfea7 100644 (file)
@@ -173,7 +173,7 @@ static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 
         /* Internal mgs setup */
         mgs_init_fsdb_list(obd);
-        sema_init(&mgs->mgs_log_sem, 1);
+        sema_init(&mgs->mgs_sem, 1);
 
         /* Start the service threads */
         mgs->mgs_service =
@@ -188,7 +188,7 @@ static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                 GOTO(err_fs, rc = -ENOMEM);
         }
 
-        rc = ptlrpc_start_threads(obd, mgs->mgs_service, "lustre_mgs");
+        rc = ptlrpc_start_threads(obd, mgs->mgs_service, "ll_mgs");
         if (rc)
                 GOTO(err_thread, rc);
 
@@ -196,7 +196,6 @@ static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         lprocfs_init_vars(mgs, &lvars);
         lprocfs_obd_setup(obd, lvars.obd_vars);
 
-        ldlm_timeout = 6;
         ping_evictor_start();
 
         LCONSOLE_INFO("MGS %s started\n", obd->obd_name);
@@ -219,20 +218,36 @@ err_put:
         return rc;
 }
 
-static int mgs_precleanup(struct obd_device *obd, int stage)
+static int mgs_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
 {
         int rc = 0;
         ENTRY;
 
         switch (stage) {
+        case OBD_CLEANUP_EARLY:
+        case OBD_CLEANUP_EXPORTS:
+                break;
         case OBD_CLEANUP_SELF_EXP:
-                mgs_cleanup_fsdb_list(obd);
                 llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT));
                 rc = obd_llog_finish(obd, 0);
+                break;
+        case OBD_CLEANUP_OBD:
+                break;
         }
         RETURN(rc);
 }
 
+static int mgs_ldlm_nsfree(void *data)
+{
+        struct ldlm_namespace *ns = (struct ldlm_namespace *)data;
+        int rc;
+        ENTRY;
+
+        ptlrpc_daemonize("ll_mgs_nsfree");
+        rc = ldlm_namespace_free(ns, 1 /* obd_force should always be on */);
+        RETURN(rc);
+}
+
 static int mgs_cleanup(struct obd_device *obd)
 {
         struct mgs_obd *mgs = &obd->u.mgs;
@@ -245,26 +260,28 @@ static int mgs_cleanup(struct obd_device *obd)
                 RETURN(0);
 
         save_dev = lvfs_sbdev(mgs->mgs_sb);
+        
+        ptlrpc_unregister_service(mgs->mgs_service);
 
         lprocfs_obd_cleanup(obd);
 
-        ptlrpc_unregister_service(mgs->mgs_service);
+        mgs_cleanup_fsdb_list(obd);
 
         mgs_fs_cleanup(obd);
 
         server_put_mount(obd->obd_name, mgs->mgs_vfsmnt);
         mgs->mgs_sb = NULL;
 
-        ldlm_namespace_free(obd->obd_namespace, obd->obd_force);
-
-        LASSERT(!obd->obd_recovering);
+        /* Free the namespace in it's own thread, so that if the 
+           ldlm_cancel_handler put the last mgs obd ref, we won't 
+           deadlock here. */
+        kernel_thread(mgs_ldlm_nsfree, obd->obd_namespace, CLONE_VM | CLONE_FS);
 
         lvfs_clear_rdonly(save_dev);
 
         fsfilt_put_ops(obd->obd_fsops);
 
         LCONSOLE_INFO("%s has stopped.\n", obd->obd_name);
-
         RETURN(0);
 }
 
index 8181e0b..94d8607 100644 (file)
@@ -278,7 +278,6 @@ static void mgs_free_fsdb(struct fs_db *fsdb)
 int mgs_init_fsdb_list(struct obd_device *obd)
 {
         struct mgs_obd *mgs = &obd->u.mgs;
-        spin_lock_init(&mgs->mgs_fs_db_lock);
         INIT_LIST_HEAD(&mgs->mgs_fs_db_list);
         return 0;
 }
@@ -288,12 +287,12 @@ int mgs_cleanup_fsdb_list(struct obd_device *obd)
         struct mgs_obd *mgs = &obd->u.mgs;
         struct fs_db *fsdb;
         struct list_head *tmp, *tmp2;
-        spin_lock(&mgs->mgs_fs_db_lock);
+        down(&mgs->mgs_sem);
         list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) {
                 fsdb = list_entry(tmp, struct fs_db, fsdb_list);
                 mgs_free_fsdb(fsdb);
         }
-        spin_unlock(&mgs->mgs_fs_db_lock);
+        up(&mgs->mgs_sem);
         return 0;
 }
 
@@ -322,17 +321,17 @@ static int mgs_find_or_make_fsdb(struct obd_device *obd, char *name,
         char *cliname;
         int rc = 0;
 
-        spin_lock(&mgs->mgs_fs_db_lock);
+        down(&mgs->mgs_sem);
         fsdb = mgs_find_fsdb(obd, name);
         if (fsdb) {
-                spin_unlock(&mgs->mgs_fs_db_lock);
+                up(&mgs->mgs_sem);
                 *dbh = fsdb;
                 return 0;
         }
 
         CDEBUG(D_MGS, "Creating new db\n");
         fsdb = mgs_new_fsdb(obd, name);
-        spin_unlock(&mgs->mgs_fs_db_lock);
+        up(&mgs->mgs_sem);
         if (!fsdb) 
                 return -ENOMEM;
 
@@ -2202,11 +2201,11 @@ int mgs_erase_logs(struct obd_device *obd, char *fsname)
         }
                                                                                 
         /* Delete the fs db */
-        spin_lock(&mgs->mgs_fs_db_lock);
+        down(&mgs->mgs_sem);
         fsdb = mgs_find_fsdb(obd, fsname);
         if (fsdb) 
                 mgs_free_fsdb(fsdb);
-        spin_unlock(&mgs->mgs_fs_db_lock);
+        up(&mgs->mgs_sem);
 
         list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
                 list_del(&dirent->lld_list);
index 4f9819a..943f436 100644 (file)
@@ -95,7 +95,6 @@ unsigned int obd_timeout = 100; /* seconds */
 unsigned int ldlm_timeout = 20; /* seconds */
 unsigned int obd_health_check_timeout = 120; /* seconds */
 char obd_lustre_upcall[128] = "DEFAULT"; /* or NONE or /full/path/to/upcall  */
-unsigned int obd_sync_filter; /* = 0, don't sync by default */
 
 DECLARE_WAIT_QUEUE_HEAD(obd_race_waitq);
 
@@ -383,9 +382,7 @@ EXPORT_SYMBOL(obd_timeout);
 EXPORT_SYMBOL(ldlm_timeout);
 EXPORT_SYMBOL(obd_health_check_timeout);
 EXPORT_SYMBOL(obd_lustre_upcall);
-EXPORT_SYMBOL(obd_sync_filter);
 EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
-EXPORT_SYMBOL(ptlrpc_abort_inflight_superhack);
 
 struct proc_dir_entry *proc_lustre_root;
 EXPORT_SYMBOL(proc_lustre_root);
@@ -419,7 +416,8 @@ EXPORT_SYMBOL(class_handle_hash);
 EXPORT_SYMBOL(class_handle_unhash);
 EXPORT_SYMBOL(class_handle2object);
 
-/* config.c */
+/* obd_config.c */
+EXPORT_SYMBOL(class_incref);
 EXPORT_SYMBOL(class_decref);
 EXPORT_SYMBOL(class_get_profile);
 EXPORT_SYMBOL(class_del_profile);
@@ -480,7 +478,7 @@ static int obd_proc_read_health(char *page, char **start, off_t off,
                 if (obd->obd_stopping)
                         continue;
 
-                atomic_inc(&obd->obd_refcount);
+                class_incref(obd);
                 spin_unlock(&obd_dev_lock);
 
                 if (obd_health_check(obd)) {
@@ -756,11 +754,9 @@ int init_obdclass(void)
 /* liblustre doesn't call cleanup_obdclass, apparently.  we carry on in this
  * ifdef to the end of the file to cover module and versioning goo.*/
 #ifdef __KERNEL__
-
 static void cleanup_obdclass(void)
 {
         int i;
-        int leaked;
         int lustre_unregister_fs(void);
         ENTRY;
 
@@ -786,11 +782,6 @@ static void cleanup_obdclass(void)
 
         class_handle_cleanup();
         class_exit_uuidlist();
-
-        leaked = atomic_read(&obd_memory);
-        CDEBUG(leaked ? D_ERROR : D_INFO,
-               "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
-
         EXIT;
 }
 
index 89363a7..35ed2b7 100644 (file)
@@ -46,7 +46,6 @@ EXPORT_SYMBOL(obdo_cachep);
 kmem_cache_t *import_cachep = NULL;
 
 int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
-void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp);
 
 /*
  * support functions: we could use inter-module communication, but this
@@ -214,8 +213,8 @@ struct obd_device *class_newdev(struct obd_type *type, char *name)
                         obd->obd_minor = i;
                         obd->obd_type = type;
                         obd->obd_name = name;
-                        CDEBUG(D_IOCTL, "Adding new device %s\n",
-                               obd->obd_name);
+                        CDEBUG(D_IOCTL, "Adding new device %s (%p)\n",
+                               obd->obd_name, obd);
                         result = obd;
                 }
         }
@@ -311,7 +310,7 @@ void class_obd_list(void)
                         status = "AT";
                 else
                         status = "--";
-                LCONSOLE(D_WARNING, "%3d %s %s %s %s %d\n",
+                LCONSOLE(D_CONFIG, "%3d %s %s %s %s %d\n",
                          i, status, obd->obd_type->typ_name,
                          obd->obd_name, obd->obd_uuid.uuid,
                          atomic_read(&obd->obd_refcount));
@@ -336,9 +335,8 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
                         continue;
                 if ((strncmp(obd->obd_type->typ_name, typ_name,
                              strlen(typ_name)) == 0)) {
-                        struct client_obd *cli = &obd->u.cli;
-                        struct obd_import *imp = cli->cl_import;
-                        if (obd_uuid_equals(tgt_uuid, &imp->imp_target_uuid) &&
+                        if (obd_uuid_equals(tgt_uuid,
+                                            &obd->u.cli.cl_target_uuid) &&
                             ((grp_uuid)? obd_uuid_equals(grp_uuid,
                                                          &obd->obd_uuid) : 1)) {
                                 spin_unlock(&obd_dev_lock);
@@ -524,15 +522,14 @@ EXPORT_SYMBOL(__class_export_put);
 /* Creates a new export, adds it to the hash table, and returns a
  * pointer to it. The refcount is 2: one for the hash reference, and
  * one for the pointer returned by this function. */
-struct obd_export *class_new_export(struct obd_device *obd)
+struct obd_export *class_new_export(struct obd_device *obd,
+                                    struct obd_uuid *cluuid)
 {
-        struct obd_export *export;
+        struct obd_export *export, *tmp;
 
         OBD_ALLOC(export, sizeof(*export));
-        if (!export) {
-                CERROR("no memory! (minor %d)\n", obd->obd_minor);
-                return NULL;
-        }
+        if (!export)
+                return ERR_PTR(-ENOMEM);
 
         export->exp_conn_cnt = 0;
         atomic_set(&export->exp_refcount, 2);
@@ -546,16 +543,30 @@ struct obd_export *class_new_export(struct obd_device *obd)
         export->exp_last_request_time = CURRENT_SECONDS;
         spin_lock_init(&export->exp_lock);
 
+        export->exp_client_uuid = *cluuid;
+        obd_init_export(export);
+
         spin_lock(&obd->obd_dev_lock);
+        if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) {
+                list_for_each_entry(tmp, &obd->obd_exports, exp_obd_chain) {
+                        if (obd_uuid_equals(cluuid, &tmp->exp_client_uuid)) {
+                                spin_unlock(&obd->obd_dev_lock);
+                                CWARN("%s: denying duplicate export for %s\n",
+                                      obd->obd_name, cluuid->uuid);
+                                class_handle_unhash(&export->exp_handle);
+                                OBD_FREE_PTR(export);
+                                return ERR_PTR(-EALREADY);
+                        }
+                }
+        }
         LASSERT(!obd->obd_stopping); /* shouldn't happen, but might race */
-        atomic_inc(&obd->obd_refcount);
+        class_incref(obd);
         list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports);
         list_add_tail(&export->exp_obd_chain_timed,
                       &export->exp_obd->obd_exports_timed);
         export->exp_obd->obd_num_exports++;
         spin_unlock(&obd->obd_dev_lock);
 
-        obd_init_export(export);
         return export;
 }
 EXPORT_SYMBOL(class_new_export);
@@ -620,12 +631,13 @@ void class_import_put(struct obd_import *import)
         }
 
         LASSERT(list_empty(&import->imp_handle.h_link));
+        class_decref(import->imp_obd);
         OBD_FREE(import, sizeof(*import));
         EXIT;
 }
 EXPORT_SYMBOL(class_import_put);
 
-struct obd_import *class_new_import(void)
+struct obd_import *class_new_import(struct obd_device *obd)
 {
         struct obd_import *imp;
 
@@ -637,10 +649,8 @@ struct obd_import *class_new_import(void)
         INIT_LIST_HEAD(&imp->imp_sending_list);
         INIT_LIST_HEAD(&imp->imp_delayed_list);
         spin_lock_init(&imp->imp_lock);
-        imp->imp_conn_cnt = 0;
-        imp->imp_max_transno = 0;
-        imp->imp_peer_committed_transno = 0;
         imp->imp_state = LUSTRE_IMP_NEW;
+        imp->imp_obd = class_incref(obd);
         init_waitqueue_head(&imp->imp_recovery_waitq);
 
         atomic_set(&imp->imp_refcount, 2);
@@ -661,13 +671,7 @@ void class_destroy_import(struct obd_import *import)
 
         class_handle_unhash(&import->imp_handle);
 
-        /* Abort any inflight DLM requests and NULL out their (about to be
-         * freed) import. */
-        /* Invalidate all requests on import, would be better to call
-           ptlrpc_set_import_active(imp, 0); */
         import->imp_generation++;
-        ptlrpc_abort_inflight_superhack(import);
-
         class_import_put(import);
 }
 EXPORT_SYMBOL(class_destroy_import);
@@ -685,13 +689,11 @@ int class_connect(struct lustre_handle *conn, struct obd_device *obd,
         LASSERT(cluuid != NULL);
         ENTRY;
 
-        export = class_new_export(obd);
-        if (export == NULL)
-                RETURN(-ENOMEM);
+        export = class_new_export(obd, cluuid);
+        if (IS_ERR(export))
+                RETURN(PTR_ERR(export));
 
         conn->cookie = export->exp_handle.h_cookie;
-        memcpy(&export->exp_client_uuid, cluuid,
-               sizeof(export->exp_client_uuid));
         class_export_put(export);
 
         CDEBUG(D_IOCTL, "connect: client %s, cookie "LPX64"\n",
@@ -743,7 +745,7 @@ static void class_disconnect_export_list(struct list_head *list, int flags)
 
         /* It's possible that an export may disconnect itself, but
          * nothing else will be added to this list. */
-        while(!list_empty(list)) {
+        while (!list_empty(list)) {
                 exp = list_entry(list->next, struct obd_export, exp_obd_chain);
                 class_export_get(exp);
                 exp->exp_flags = flags;
@@ -1013,238 +1015,6 @@ char *obd_export_nid2str(struct obd_export *exp)
 }
 EXPORT_SYMBOL(obd_export_nid2str);
 
-/* Ping evictor thread */
-#ifdef __KERNEL__
-#define PET_READY     1
-#define PET_TERMINATE 2
-
-static int               pet_refcount = 0;
-static int               pet_state;
-static wait_queue_head_t pet_waitq;
-static struct obd_export *pet_exp = NULL;
-static spinlock_t        pet_lock = SPIN_LOCK_UNLOCKED;
-
-static int ping_evictor_wake(struct obd_export *exp)
-{
-        spin_lock(&pet_lock);
-        if (pet_exp) {
-                /* eventually the new obd will call here again. */
-                spin_unlock(&pet_lock);
-                return 1;
-        }
-
-        /* We have to make sure the obd isn't destroyed between now and when
-         * the ping evictor runs.  We'll take a reference here, and drop it
-         * when we finish in the evictor.  We don't really care about this
-         * export in particular; we just need one to keep the obd alive. */
-        pet_exp = class_export_get(exp);
-        spin_unlock(&pet_lock);
-
-        wake_up(&pet_waitq);
-        return 0;
-}
-
-static int ping_evictor_main(void *arg)
-{
-        struct obd_device *obd;
-        struct obd_export *exp;
-        struct l_wait_info lwi = { 0 };
-        time_t expire_time;
-        unsigned long flags;
-        ENTRY;
-
-        lock_kernel();
-
-        /* ptlrpc_daemonize() */
-        exit_mm(current);
-        lustre_daemonize_helper();
-        set_fs_pwd(current->fs, init_task.fs->pwdmnt, init_task.fs->pwd);
-        exit_files(current);
-        reparent_to_init();
-        THREAD_NAME(current->comm, sizeof(current->comm), "ping_evictor");
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-        unlock_kernel();
-
-        CDEBUG(D_HA, "Starting Ping Evictor\n");
-        pet_exp = NULL;
-        pet_state = PET_READY;
-        while (1) {
-                l_wait_event(pet_waitq, pet_exp ||
-                             (pet_state == PET_TERMINATE), &lwi);
-                if (pet_state == PET_TERMINATE)
-                        break;
-
-                /* we only get here if pet_exp != NULL, and the end of this
-                 * loop is the only place which sets it NULL again, so lock
-                 * is not strictly necessary. */
-                spin_lock(&pet_lock);
-                obd = pet_exp->exp_obd;
-                spin_unlock(&pet_lock);
-
-                expire_time = CURRENT_SECONDS - (3 * obd_timeout / 2);
-
-                CDEBUG(D_HA, "evicting all exports of obd %s older than %ld\n",
-                       obd->obd_name, expire_time);
-
-                /* Exports can't be deleted out of the list while we hold
-                 * the obd lock (class_unlink_export), which means we can't
-                 * lose the last ref on the export.  If they've already been
-                 * removed from the list, we won't find them here. */
-                spin_lock(&obd->obd_dev_lock);
-                while (!list_empty(&obd->obd_exports_timed)) {
-                        exp = list_entry(obd->obd_exports_timed.next,
-                                         struct obd_export,exp_obd_chain_timed);
-
-                        if (expire_time > exp->exp_last_request_time) {
-                                class_export_get(exp);
-                                spin_unlock(&obd->obd_dev_lock);
-                                LCONSOLE_WARN("%s: haven't heard from %s (%s) "
-                                              "in %ld seconds. "
-                                              "Last request was at %ld. "
-                                              "I think it's dead, and I am "
-                                              "evicting it.\n", obd->obd_name,
-                                              obd_uuid2str(&exp->exp_client_uuid),
-                                              obd_export_nid2str(exp),
-                                              (long)(CURRENT_SECONDS -
-                                                     exp->exp_last_request_time),
-                                              exp->exp_last_request_time);
-
-                                class_fail_export(exp);
-                                class_export_put(exp);
-
-                                spin_lock(&obd->obd_dev_lock);
-                        } else {
-                                /* List is sorted, so everyone below is ok */
-                                break;
-                        }
-                }
-                spin_unlock(&obd->obd_dev_lock);
-
-                class_export_put(pet_exp);
-
-                spin_lock(&pet_lock);
-                pet_exp = NULL;
-                spin_unlock(&pet_lock);
-        }
-        CDEBUG(D_HA, "Exiting Ping Evictor\n");
-
-        RETURN(0);
-}
-
-void ping_evictor_start(void)
-{
-        int rc;
-
-        if (++pet_refcount > 1)
-                return;
-
-        init_waitqueue_head(&pet_waitq);
-
-        rc = kernel_thread(ping_evictor_main, NULL, CLONE_VM | CLONE_FS);
-        if (rc < 0) {
-                pet_refcount--;
-                CERROR("Cannot start ping evictor thread: %d\n", rc);
-        }
-}
-EXPORT_SYMBOL(ping_evictor_start);
-
-void ping_evictor_stop(void)
-{
-        if (--pet_refcount > 0)
-                return;
-
-        pet_state = PET_TERMINATE;
-        wake_up(&pet_waitq);
-}
-EXPORT_SYMBOL(ping_evictor_stop);
-#else /* !__KERNEL__ */
-#define ping_evictor_wake(exp)     1
-#endif
-
-/* This function makes sure dead exports are evicted in a timely manner.
-   This function is only called when some export receives a message (i.e.,
-   the network is up.) */
-void class_update_export_timer(struct obd_export *exp, time_t extra_delay)
-{
-        struct obd_export *oldest_exp;
-        time_t oldest_time;
-
-        ENTRY;
-
-        LASSERT(exp);
-
-        /* Compensate for slow machines, etc, by faking our request time
-           into the future.  Although this can break the strict time-ordering
-           of the list, we can be really lazy here - we don't have to evict
-           at the exact right moment.  Eventually, all silent exports
-           will make it to the top of the list. */
-        exp->exp_last_request_time = max(exp->exp_last_request_time,
-                                         (time_t)CURRENT_SECONDS + extra_delay);
-
-        CDEBUG(D_INFO, "updating export %s at %ld\n",
-               exp->exp_client_uuid.uuid,
-               exp->exp_last_request_time);
-
-        /* exports may get disconnected from the chain even though the
-           export has references, so we must keep the spin lock while
-           manipulating the lists */
-        spin_lock(&exp->exp_obd->obd_dev_lock);
-
-        if (list_empty(&exp->exp_obd_chain_timed)) {
-                /* this one is not timed */
-                spin_unlock(&exp->exp_obd->obd_dev_lock);
-                EXIT;
-                return;
-        }
-
-        list_move_tail(&exp->exp_obd_chain_timed,
-                       &exp->exp_obd->obd_exports_timed);
-
-        oldest_exp = list_entry(exp->exp_obd->obd_exports_timed.next,
-                                struct obd_export, exp_obd_chain_timed);
-        oldest_time = oldest_exp->exp_last_request_time;
-        spin_unlock(&exp->exp_obd->obd_dev_lock);
-
-        if (exp->exp_obd->obd_recovering) {
-                /* be nice to everyone during recovery */
-                EXIT;
-                return;
-        }
-
-        /* Note - racing to start/reset the obd_eviction timer is safe */
-        if (exp->exp_obd->obd_eviction_timer == 0) {
-                /* Check if the oldest entry is expired. */
-                if (CURRENT_SECONDS > (oldest_time +
-                                       (3 * obd_timeout / 2) + extra_delay)) {
-                        /* We need a second timer, in case the net was down and
-                         * it just came back. Since the pinger may skip every
-                         * other PING_INTERVAL (see note in ptlrpc_pinger_main),
-                         * we better wait for 3. */
-                        exp->exp_obd->obd_eviction_timer = CURRENT_SECONDS +
-                                3 * PING_INTERVAL;
-                        CDEBUG(D_HA, "%s: Think about evicting %s from %ld\n",
-                               exp->exp_obd->obd_name, obd_export_nid2str(exp),
-                               oldest_time);
-                }
-        } else {
-                if (CURRENT_SECONDS > (exp->exp_obd->obd_eviction_timer +
-                                       extra_delay)) {
-                        /* The evictor won't evict anyone who we've heard from
-                         * recently, so we don't have to check before we start
-                         * it. */
-                        if (!ping_evictor_wake(exp))
-                                exp->exp_obd->obd_eviction_timer = 0;
-                }
-        }
-
-        EXIT;
-}
-EXPORT_SYMBOL(class_update_export_timer);
-
 #define EVICT_BATCH 32
 int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
 {
index 0f04901..f9beaa9 100644 (file)
@@ -672,9 +672,9 @@ static int llog_lvfs_destroy(struct llog_handle *handle)
                 rc = llog_lvfs_close(handle);
 
                 if (rc == 0) {
-                        down(&inode->i_sem);
+                        LOCK_INODE_MUTEX(inode);
                         rc = vfs_unlink(inode, fdentry);
-                        up(&inode->i_sem);
+                        UNLOCK_INODE_MUTEX(inode);
                 }
 
                 dput(fdentry);
@@ -729,7 +729,7 @@ int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
                 GOTO(out, rc = -ENOENT);
         }
 
-        CERROR("cat list: disk size=%d, read=%d\n", 
+        CDEBUG(D_CONFIG, "cat list: disk size=%d, read=%d\n", 
                (int)file->f_dentry->d_inode->i_size, size);
 
         rc = fsfilt_read_record(disk_obd, file, idarray, size, &off);
index 6d5cbb4..4435eba 100644 (file)
@@ -55,6 +55,8 @@ int llog_cleanup(struct llog_ctxt *ctxt)
                 rc = CTXTP(ctxt, cleanup)(ctxt);
 
         ctxt->loc_obd->obd_llog_ctxt[ctxt->loc_idx] = NULL;
+        if (ctxt->loc_exp)
+                class_export_put(ctxt->loc_exp);
         OBD_FREE(ctxt, sizeof(*ctxt));
 
         RETURN(rc);
@@ -88,7 +90,7 @@ int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd,
 
         obd->obd_llog_ctxt[index] = ctxt;
         ctxt->loc_obd = obd;
-        ctxt->loc_exp = disk_obd->obd_self_export;
+        ctxt->loc_exp = class_export_get(disk_obd->obd_self_export);
         ctxt->loc_idx = index;
         ctxt->loc_logops = op;
         sema_init(&ctxt->loc_sem, 1);
index 025e188..b4df232 100644 (file)
@@ -339,7 +339,7 @@ int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count,
         imp_state_name = ptlrpc_import_state_name(imp->imp_state);
         *eof = 1;
         return snprintf(page, count, "%s\t%s%s\n",
-                        imp->imp_target_uuid.uuid, imp_state_name,
+                        obd2cli_tgt(obd), imp_state_name,
                         imp->imp_deactive ? "\tDEACTIVATED" : "");
 }
 
index b5ed43c..630079a 100644 (file)
@@ -121,7 +121,6 @@ int class_attach(struct lustre_cfg *lcfg)
 
         INIT_LIST_HEAD(&obd->obd_exports);
         INIT_LIST_HEAD(&obd->obd_exports_timed);
-        obd->obd_num_exports = 0;
         spin_lock_init(&obd->obd_dev_lock);
         spin_lock_init(&obd->obd_osfs_lock);
         obd->obd_osfs_age = jiffies - 1000 * HZ;
@@ -157,8 +156,8 @@ int class_attach(struct lustre_cfg *lcfg)
 
         obd->obd_attached = 1;
         type->typ_refcnt++;
-        CDEBUG(D_IOCTL, "OBD: dev %d attached type %s\n",
-               obd->obd_minor, typename);
+        CDEBUG(D_IOCTL, "OBD: dev %d attached type %s with refcount %d\n",
+               obd->obd_minor, typename, atomic_read(&obd->obd_refcount));
         RETURN(0);
  out:
         switch (cleanup_phase) {
@@ -205,13 +204,9 @@ int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         obd->obd_starting = 1;
         spin_unlock(&obd->obd_dev_lock);
 
-        exp = class_new_export(obd);
-        if (!exp){
-                CERROR("Fail to build export.\n");
-                RETURN(-ENOMEM);
-        }
-        memcpy(&exp->exp_client_uuid, &obd->obd_uuid,
-               sizeof(exp->exp_client_uuid));
+        exp = class_new_export(obd, &obd->obd_uuid);
+        if (IS_ERR(exp))
+                RETURN(PTR_ERR(exp));
         obd->obd_self_export = exp;
         list_del_init(&exp->exp_obd_chain_timed);
         class_export_put(exp);
@@ -224,7 +219,7 @@ int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         obd->obd_set_up = 1;
         spin_lock(&obd->obd_dev_lock);
         /* cleanup drops this */
-        atomic_inc(&obd->obd_refcount);
+        class_incref(obd);
         spin_unlock(&obd->obd_dev_lock);
 
         CDEBUG(D_IOCTL, "finished setup of obd %s (uuid %s)\n",
@@ -400,6 +395,15 @@ out:
         RETURN(err);
 }
 
+struct obd_device *class_incref(struct obd_device *obd)
+{
+        atomic_inc(&obd->obd_refcount);
+        CDEBUG(D_INFO, "incref %s (%p) now %d\n", obd->obd_name, obd,
+               atomic_read(&obd->obd_refcount));
+
+        return obd;
+}
+
 void class_decref(struct obd_device *obd)
 {
         int err;
@@ -410,7 +414,7 @@ void class_decref(struct obd_device *obd)
         refs = atomic_read(&obd->obd_refcount);
         spin_unlock(&obd->obd_dev_lock);
 
-        CDEBUG(D_INFO, "Decref %s now %d\n", obd->obd_name, refs);
+        CDEBUG(D_INFO, "Decref %s (%p) now %d\n", obd->obd_name, obd, refs);
 
         if ((refs == 1) && obd->obd_stopping) {
                 /* All exports (other than the self-export) have been
@@ -720,7 +724,7 @@ extern int lustre_check_exclusion(struct super_block *sb, char *svname);
 static int class_config_llog_handler(struct llog_handle * handle,
                                      struct llog_rec_hdr *rec, void *data)
 {
-        struct config_llog_instance *cfg = data;
+        struct config_llog_instance *clli = data;
         int cfg_len = rec->lrh_len;
         char *cfg_buf = (char*) (rec + 1);
         int rc = 0;
@@ -748,59 +752,58 @@ static int class_config_llog_handler(struct llog_handle * handle,
                 if (lcfg->lcfg_command == LCFG_MARKER) {
                         struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
                         CDEBUG(D_CONFIG, "Marker, cfg_flg=%#x\n",
-                               cfg->cfg_flags);
+                               clli->cfg_flags);
                         if (marker->cm_flags & CM_START) {
                                 /* all previous flags off */
-                                cfg->cfg_flags = CFG_F_MARKER;
-                                if (marker->cm_flags & CM_SKIP) {
-                                        cfg->cfg_flags |= CFG_F_SKIP;
+                                clli->cfg_flags = CFG_F_MARKER;
+                                if (marker->cm_flags & CM_SKIP) { 
+                                        clli->cfg_flags |= CFG_F_SKIP;
                                         CDEBUG(D_CONFIG, "SKIP #%d\n",
                                                marker->cm_step);
-                                } else if (lustre_check_exclusion(cfg->cfg_sb,
+                                } else if (lustre_check_exclusion(clli->cfg_sb, 
                                                           marker->cm_svname)) {
-                                        cfg->cfg_flags |= CFG_F_EXCLUDE;
+                                        clli->cfg_flags |= CFG_F_EXCLUDE;
                                         CDEBUG(D_CONFIG, "EXCLUDE %d\n",
                                                marker->cm_step);
                                 }
                         } else if (marker->cm_flags & CM_END) {
-                                cfg->cfg_flags = 0;
+                                clli->cfg_flags = 0;
                         }
                 }
                 /* A config command without a start marker before it is
                    illegal (1.4.6. compat must set it artificially) */
-                if (!(cfg->cfg_flags & CFG_F_MARKER) &&
+                if (!(clli->cfg_flags & CFG_F_MARKER) && 
                     (lcfg->lcfg_command != LCFG_MARKER)) {
-                        CWARN("Config not inside markers, ignoring! (%#x)\n",
-                              cfg->cfg_flags);
-                        cfg->cfg_flags |= CFG_F_SKIP;
+                        CWARN("Config not inside markers, ignoring! (%#x)\n", 
+                              clli->cfg_flags);
+                        clli->cfg_flags |= CFG_F_SKIP;
                 }
-
-                if (cfg->cfg_flags & CFG_F_SKIP) {
+                if (clli->cfg_flags & CFG_F_SKIP) {
                         // FIXME warning
                         CDEBUG(D_CONFIG|D_WARNING, "skipping %#x\n",
-                               cfg->cfg_flags);
+                               clli->cfg_flags);
                         rc = 0;
                         /* No processing! */
                         break;
                 }
 
-                if ((cfg->cfg_flags & CFG_F_EXCLUDE) &&
+                if ((clli->cfg_flags & CFG_F_EXCLUDE) && 
                     (lcfg->lcfg_command == LCFG_LOV_ADD_OBD))
                         /* Add inactive instead */
                         lcfg->lcfg_command = LCFG_LOV_ADD_INA;
 
                 lustre_cfg_bufs_init(&bufs, lcfg);
 
-                if (cfg && cfg->cfg_instance && LUSTRE_CFG_BUFLEN(lcfg, 0) > 0){
+                if (clli && clli->cfg_instance && LUSTRE_CFG_BUFLEN(lcfg, 0) > 0){
                         inst = 1;
                         inst_len = LUSTRE_CFG_BUFLEN(lcfg, 0) +
-                                strlen(cfg->cfg_instance) + 1;
+                                strlen(clli->cfg_instance) + 1;
                         OBD_ALLOC(inst_name, inst_len);
                         if (inst_name == NULL)
                                 GOTO(out, rc = -ENOMEM);
                         sprintf(inst_name, "%s-%s",
                                 lustre_cfg_string(lcfg, 0),
-                                cfg->cfg_instance);
+                                clli->cfg_instance);
                         lustre_cfg_bufs_set_string(&bufs, 0, inst_name);
                         CDEBUG(D_CONFIG, "cmd %x, instance name: %s\n",
                                lcfg->lcfg_command, inst_name);
@@ -808,10 +811,10 @@ static int class_config_llog_handler(struct llog_handle * handle,
 
                 /* we override the llog's uuid for clients, to insure they
                 are unique */
-                if (cfg && cfg->cfg_instance &&
+                if (clli && clli->cfg_instance && 
                     lcfg->lcfg_command == LCFG_ATTACH) {
                         lustre_cfg_bufs_set_string(&bufs, 2,
-                                                   cfg->cfg_uuid.uuid);
+                                                   clli->cfg_uuid.uuid);
                 }
 
                 lcfg_new = lustre_cfg_new(lcfg->lcfg_command, &bufs);
index 2a3f5c4..877f4f3 100644 (file)
@@ -25,9 +25,9 @@
 
 
 #define DEBUG_SUBSYSTEM S_MGMT
-#define D_MOUNT D_SUPER|D_CONFIG/*|D_WARNING*/
+#define D_MOUNT D_SUPER|D_CONFIG /*|D_WARNING */
 #define PRINT_CMD LCONSOLE
-#define PRINT_MASK D_WARNING
+#define PRINT_MASK D_SUPER
 
 #include <linux/obd.h>
 #include <linux/lvfs.h>
@@ -121,13 +121,14 @@ static struct lustre_mount_info *server_find_mount(const char *name)
 {
         struct list_head *tmp;
         struct lustre_mount_info *lmi;
+        ENTRY;
 
         list_for_each(tmp, &server_mount_info_list) {
                 lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
-                if (strcmp(name, lmi->lmi_name) == 0)
-                        return(lmi);
+                if (strcmp(name, lmi->lmi_name) == 0) 
+                        RETURN(lmi);
         }
-        return(NULL);
+        RETURN(NULL);
 }
 
 /* we must register an obd for a mount before we call the setup routine.
@@ -202,25 +203,33 @@ static int server_deregister_mount(const char *name)
 
 /* Deregister anyone referencing the mnt. Everyone should have
    put_mount in *_cleanup, but this is a catch-all in case of err... */
+/* FIXME this should be removed from lustre_free_lsi, which may be called
+   from server_put_mount _before_ it gets to server_deregister_mount. 
+   Leave it here for now for the error message it shows... */
 static void server_deregister_mount_all(struct vfsmount *mnt)
 {
         struct list_head *tmp, *n;
         struct lustre_mount_info *lmi;
+        ENTRY;
 
-        if (!mnt)
+        if (!mnt) {
+                EXIT;
                 return;
+        }
 
-        down(&lustre_mount_info_lock);
+        //down(&lustre_mount_info_lock);
         list_for_each_safe(tmp, n, &server_mount_info_list) {
                 lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
                 if (lmi->lmi_mnt == mnt) {
-                        CERROR("Deregister failsafe %s\n", lmi->lmi_name);
-                        OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
-                        list_del(&lmi->lmi_list_chain);
-                        OBD_FREE(lmi, sizeof(*lmi));
+                        CERROR("Mount %p still referenced by %s\n", mnt,
+                               lmi->lmi_name);
+                        //OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
+                        //list_del(&lmi->lmi_list_chain);
+                        //OBD_FREE(lmi, sizeof(*lmi));
                 }
         }
-        up(&lustre_mount_info_lock);
+        //up(&lustre_mount_info_lock);
+        EXIT;
 }
 
 /* obd's look up a registered mount using their name. This is just
@@ -233,20 +242,17 @@ struct lustre_mount_info *server_get_mount(const char *name)
         ENTRY;
 
         down(&lustre_mount_info_lock);
-
         lmi = server_find_mount(name);
+        up(&lustre_mount_info_lock);
         if (!lmi) {
-                up(&lustre_mount_info_lock);
                 CERROR("Can't find mount for %s\n", name);
                 RETURN(NULL);
         }
         lsi = s2lsi(lmi->lmi_sb);
         mntget(lmi->lmi_mnt);
         atomic_inc(&lsi->lsi_mounts);
-
-        up(&lustre_mount_info_lock);
-
-        CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
+        
+        CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n", 
                lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
                atomic_read(&lmi->lmi_mnt->mnt_count));
 
@@ -275,8 +281,8 @@ int server_put_mount(const char *name, struct vfsmount *mnt)
 
         down(&lustre_mount_info_lock);
         lmi = server_find_mount(name);
+        up(&lustre_mount_info_lock);
         if (!lmi) {
-                up(&lustre_mount_info_lock);
                 CERROR("Can't find mount for %s\n", name);
                 RETURN(-ENOENT);
         }
@@ -297,7 +303,6 @@ int server_put_mount(const char *name, struct vfsmount *mnt)
                         CERROR("%s: mount busy, vfscount=%d!\n", name,
                                atomic_read(&lmi->lmi_mnt->mnt_count));
         }
-        up(&lustre_mount_info_lock);
 
         /* this obd should never need the mount again */
         server_deregister_mount(name);
@@ -608,7 +613,7 @@ static int lustre_start_mgc(struct super_block *sb)
         lnet_nid_t nid;
         char niduuid[10];
         char *ptr;
-        int recov_bk;
+        int recov_bk = 0;
         int rc = 0, i = 0, j;
         ENTRY;
 
@@ -622,13 +627,20 @@ static int lustre_start_mgc(struct super_block *sb)
                    or not?  If there's truly one MGS per site, the MGS uuids
                    _should_ all be the same. Maybe check here?
                 */
+                
+                /* If we are restarting the MGS, don't try to keep the MGC's
+                   old connection, or registration will fail. */
+                if ((lsi->lsi_flags & LSI_SERVER) && IS_MGS(lsi->lsi_ldd)) {
+                        CDEBUG(D_MOUNT|D_ERROR, "New MGS with live MGC\n");
+                        recov_bk = 1;
+                }
 
-                /* Try all connections, but only once (again).
+                /* Try all connections, but only once (again). 
                    We don't want to block another target from starting
                    (using its local copy of the log), but we do want to connect
                    if at all possible. */
-                CDEBUG(D_MOUNT, "Set MGS reconnect\n");
-                recov_bk = 1;
+                recov_bk++;
+                CDEBUG(D_MOUNT, "Set MGS reconnect %d\n", recov_bk);
                 rc = obd_set_info(obd->obd_self_export,
                                   strlen(KEY_INIT_RECOV_BACKUP),
                                   KEY_INIT_RECOV_BACKUP,
@@ -966,17 +978,23 @@ int server_register_target(struct super_block *sb)
         /* If this flag is set, it means the MGS wants us to change our
            on-disk data. (So far this means just the index.) */
         if (mti->mti_flags & LDD_F_REWRITE_LDD) {
-                CDEBUG(D_MOUNT, "Must change on-disk index from %#x to %#x for "
-                       " %s\n",
-                       ldd->ldd_svindex, mti->mti_stripe_index,
+                char *label;
+                int err;
+                CDEBUG(D_MOUNT, "Changing on-disk index from %#x to %#x "
+                       "for %s\n", ldd->ldd_svindex, mti->mti_stripe_index, 
                        mti->mti_svname);
                 ldd->ldd_svindex = mti->mti_stripe_index;
                 strncpy(ldd->ldd_svname, mti->mti_svname,
                         sizeof(ldd->ldd_svname));
                 /* or ldd_make_sv_name(ldd); */
                 ldd_write(&mgc->obd_lvfs_ctxt, ldd);
-
-                /* FIXME write last_rcvd?, disk label? */
+                err = fsfilt_set_label(mgc, lsi->lsi_srv_mnt->mnt_sb,
+                                       mti->mti_svname);
+                if (err)
+                        CERROR("Label set error %d\n", err);
+                label = fsfilt_get_label(mgc, lsi->lsi_srv_mnt->mnt_sb);
+                if (label) 
+                        CDEBUG(D_MOUNT, "Disk label changed to %s\n", label);
         }
 
 out:
@@ -1277,15 +1295,20 @@ static void server_put_super(struct super_block *sb)
         struct lustre_sb_info *lsi = s2lsi(sb);
         struct obd_device     *obd;
         struct vfsmount       *mnt = lsi->lsi_srv_mnt;
+        char *tmpname;
+        int tmpname_sz;
         int lddflags = lsi->lsi_ldd->ldd_flags;
         int lsiflags = lsi->lsi_flags;
         int rc;
         ENTRY;
 
         LASSERT(lsiflags & LSI_SERVER);
-
-        CDEBUG(D_MOUNT, "server put_super %s\n", lsi->lsi_ldd->ldd_svname);
-
+        
+        tmpname_sz = strlen(lsi->lsi_ldd->ldd_svname) + 1;
+        OBD_ALLOC(tmpname, tmpname_sz);
+        memcpy(tmpname, lsi->lsi_ldd->ldd_svname, tmpname_sz);
+        CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
+                                                                                       
         /* Stop the target */
         if (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd)) {
 
@@ -1335,7 +1358,8 @@ static void server_put_super(struct super_block *sb)
            is right. */
         server_stop_servers(lddflags, lsiflags);
 
-        CDEBUG(D_MOUNT|D_WARNING, "server umount done\n");
+        CDEBUG(D_MOUNT|D_WARNING, "server umount %s done\n", tmpname);
+        OBD_FREE(tmpname, tmpname_sz);
         EXIT;
 }
 
@@ -1790,8 +1814,7 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
                         /* Connect and start */
                         /* (should always be ll_fill_super) */
                         rc = (*client_fill_super)(sb);
-                        if (rc)
-                                lustre_common_put_super(sb);
+                        /* c_f_s will call lustre_common_put_super on failure */
                 }
         } else {
                 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
@@ -1808,7 +1831,8 @@ out:
                 CERROR("Unable to mount %s\n",
                        s2lsi(sb) ? lmd->lmd_dev : "");
         } else {
-                CDEBUG(D_MOUNT, "Successfully mounted %s\n", lmd->lmd_dev);
+                CDEBUG(D_MOUNT|D_WARNING, "Successfully mounted %s\n", 
+                       lmd->lmd_dev);
         }
         RETURN(rc);
 }
index 327bf6d..97812a9 100644 (file)
@@ -247,8 +247,11 @@ void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
                 LTIME_S(dst->i_ctime) = src->o_ctime;
         if (valid & OBD_MD_FLSIZE)
                 dst->i_size = src->o_size;
-        if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
+        if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
                 dst->i_blocks = src->o_blocks;
+                if (dst->i_blocks < src->o_blocks) /* overflow */
+                        dst->i_blocks = -1;
+        }
         if (valid & OBD_MD_FLBLKSZ)
                 dst->i_blksize = src->o_blksize;
         if (valid & OBD_MD_FLTYPE)
index 714aa0e..73db087 100644 (file)
@@ -107,8 +107,6 @@ static ctl_table obd_table[] = {
                 &proc_dostring, &sysctl_string },
         {OBD_MEMUSED, "memused", (int *)&obd_memory.counter,
                 sizeof(int), 0644, NULL, &proc_dointvec},
-        {OBD_SYNCFILTER, "filter_sync_on_commit", &obd_sync_filter, sizeof(int),
-                0644, NULL, &proc_dointvec},
         {OBD_LDLM_TIMEOUT, "ldlm_timeout", &ldlm_timeout, sizeof(int), 0644,
                 NULL, &proc_set_timeout},
         { 0 }
index 06796e6..aba93fe 100644 (file)
@@ -289,9 +289,11 @@ echo_get_object (struct ec_object **ecop, struct obd_device *obd,
         spin_lock (&ec->ec_lock);
         eco = echo_find_object_locked (obd, oa->o_id);
         if (eco != NULL) {
-                if (eco->eco_deleted)           /* being deleted */
-                        return (-EAGAIN);       /* (see comment in cleanup) */
-
+                if (eco->eco_deleted) {            /* being deleted */
+                        spin_unlock(&ec->ec_lock); /* (see comment in cleanup) */
+                        return (-EAGAIN);
+                }
+                
                 eco->eco_refcount++;
                 spin_unlock (&ec->ec_lock);
                 *ecop = eco;
@@ -794,7 +796,7 @@ static int echo_client_async_page(struct obd_export *exp, int rw,
                 if (page == NULL)
                         GOTO(out, rc = -ENOMEM);
 
-                page->private = 0;
+                set_page_private(page, 0);
                 list_add_tail(&PAGE_LIST(page), &pages);
 
                 OBD_ALLOC(eap, sizeof(*eap));
@@ -804,7 +806,7 @@ static int echo_client_async_page(struct obd_export *exp, int rw,
                 eap->eap_magic = EAP_MAGIC;
                 eap->eap_page = page;
                 eap->eap_eas = &eas;
-                page->private = (unsigned long)eap;
+                set_page_private(page, (unsigned long)eap);
                 list_add_tail(&eap->eap_item, &eas.eas_avail);
         }
 
@@ -887,8 +889,8 @@ out:
                                                PAGE_LIST_ENTRY);
 
                 list_del(&PAGE_LIST(page));
-                if (page->private != 0) {
-                        eap = (struct echo_async_page *)page->private;
+                if (page_private(page) != 0) {
+                        eap = (struct echo_async_page *)page_private(page);
                         if (eap->eap_cookie != NULL)
                                 obd_teardown_async_page(exp, lsm, NULL,
                                                         eap->eap_cookie);
@@ -1354,6 +1356,7 @@ static int echo_client_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
                 return -ENOMEM;
         }
 
+        ocd->ocd_connect_flags = OBD_CONNECT_VERSION;
         ocd->ocd_version = LUSTRE_VERSION_CODE;
 
         rc = obd_connect(&conn, tgt, &echo_uuid, ocd);
index ab7e3f0..95f69e7 100644 (file)
  */
 
 /*
- * Invariant: Get O/R i_sem for lookup, if needed, before any journal ops
+ * Invariant: Get O/R i_mutex for lookup, if needed, before any journal ops
  *            (which need to get journal_lock, may block if journal full).
  *
  * Invariant: Call filter_start_transno() before any journal ops to avoid the
  *            same deadlock problem.  We can (and want) to get rid of the
- *            transno sem in favour of the dir/inode i_sem to avoid single
+ *            transno sem in favour of the dir/inode i_mutex to avoid single
  *            threaded operation on the OST.
  */
 
@@ -301,6 +301,14 @@ free:
         return 0;
 }
 
+static int filter_init_export(struct obd_export *exp)
+{
+        spin_lock_init(&exp->exp_filter_data.fed_lock);
+        exp->exp_connecting = 1;
+
+        return 0;
+}
+
 static int filter_free_server_data(struct filter_obd *filter)
 {
         OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd));
@@ -487,25 +495,22 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
                 /* These exports are cleaned up by filter_disconnect(), so they
                  * need to be set up like real exports as filter_connect() does.
                  */
-                exp = class_new_export(obd);
+                exp = class_new_export(obd, (struct obd_uuid *)fcd->fcd_uuid);
                 CDEBUG(D_HA, "RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
                        " srv lr: "LPU64"\n", fcd->fcd_uuid, cl_idx,
                        last_rcvd, le64_to_cpu(fsd->lsd_last_transno));
-                if (exp == NULL)
-                        GOTO(err_client, rc = -ENOMEM);
+                if (IS_ERR(exp))
+                        GOTO(err_client, rc = PTR_ERR(exp));
 
-                memcpy(&exp->exp_client_uuid.uuid, fcd->fcd_uuid,
-                       sizeof exp->exp_client_uuid.uuid);
                 fed = &exp->exp_filter_data;
                 fed->fed_fcd = fcd;
                 rc = filter_client_add(obd, filter, fed, cl_idx);
                 LASSERTF(rc == 0, "rc = %d\n", rc); /* can't fail existing */
 
-                /* create helper if export init gets more complex */
-                spin_lock_init(&fed->fed_lock);
 
                 fcd = NULL;
                 exp->exp_replay_needed = 1;
+                exp->exp_connecting = 0;
                 obd->obd_recoverable_clients++;
                 obd->obd_max_recoverable_clients++;
                 class_export_put(exp);
@@ -647,10 +652,10 @@ static int filter_prep_groups(struct obd_device *obd)
                         GOTO(cleanup_O0, rc = -EEXIST);
                 }
 
-                down(&O_dentry->d_inode->i_sem);
+                LOCK_INODE_MUTEX(O_dentry->d_inode);
                 rc = vfs_rename(O_dentry->d_inode, dentry,
                                 O_dentry->d_inode, O0_dentry);
-                up(&O_dentry->d_inode->i_sem);
+                UNLOCK_INODE_MUTEX(O_dentry->d_inode);
 
                 if (rc) {
                         CERROR("error renaming O/R to O/0: rc %d\n", rc);
@@ -912,7 +917,7 @@ __u64 filter_last_id(struct filter_obd *filter, struct obdo *oa)
 
 static int filter_lock_dentry(struct obd_device *obd, struct dentry *dparent)
 {
-        down(&dparent->d_inode->i_sem);
+        LOCK_INODE_MUTEX(dparent->d_inode);
         return 0;
 }
 
@@ -947,7 +952,7 @@ struct dentry *filter_parent_lock(struct obd_device *obd, obd_gr group,
 /* We never dget the object parent, so DON'T dput it either */
 static void filter_parent_unlock(struct dentry *dparent)
 {
-        up(&dparent->d_inode->i_sem);
+        UNLOCK_INODE_MUTEX(dparent->d_inode);
 }
 
 /* How to get files, dentries, inodes from object id's.
@@ -1044,9 +1049,10 @@ int filter_vfs_unlink(struct inode *dir, struct dentry *dentry)
         ENTRY;
 
         /* don't need dir->i_zombie for 2.4, it is for rename/unlink of dir
-         * itself we already hold dir->i_sem for child create/unlink ops */
-        LASSERT(down_trylock(&dir->i_sem) != 0);
-        LASSERT(down_trylock(&dentry->d_inode->i_sem) != 0);
+         * itself we already hold dir->i_mutex for child create/unlink ops */
+        LASSERT(TRYLOCK_INODE_MUTEX(dir) == 0);
+        LASSERT(TRYLOCK_INODE_MUTEX(dentry->d_inode) == 0);
+
 
         /* may_delete() */
         if (!dentry->d_inode || dentry->d_parent->d_inode != dir)
@@ -1064,7 +1070,7 @@ int filter_vfs_unlink(struct inode *dir, struct dentry *dentry)
             IS_APPEND(dentry->d_inode) || IS_IMMUTABLE(dentry->d_inode))
                 GOTO(out, rc = -EPERM);
 
-        /* NOTE: This might need to go outside i_sem, though it isn't clear if
+        /* NOTE: This might need to go outside i_mutex, though it isn't clear if
          *       that was done because of journal_start (which is already done
          *       here) or some other ordering issue. */
         DQUOT_INIT(dir);
@@ -1077,8 +1083,8 @@ int filter_vfs_unlink(struct inode *dir, struct dentry *dentry)
 
         rc = dir->i_op->unlink(dir, dentry);
 out:
-        /* need to drop i_sem before we lose inode reference */
-        up(&dentry->d_inode->i_sem);
+        /* need to drop i_mutex before we lose inode reference */
+        UNLOCK_INODE_MUTEX(dentry->d_inode);
         if (rc == 0)
                 d_delete(dentry);
 
@@ -1086,7 +1092,7 @@ out:
 }
 
 /* Caller must hold LCK_PW on parent and push us into kernel context.
- * Caller must hold child i_sem, we drop it always.
+ * Caller must hold child i_mutex, we drop it always.
  * Caller is also required to ensure that dchild->d_inode exists. */
 static int filter_destroy_internal(struct obd_device *obd, obd_id objid,
                                    struct dentry *dparent,
@@ -1434,14 +1440,12 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
 
         /* failover is the default */
         obd->obd_replayable = 1;
-        obd_sync_filter = 1;
 
         if (lcfg->lcfg_bufcount > 3 && LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
                 str = lustre_cfg_string(lcfg, 3);
                 if (strchr(str, 'n')) {
                         CWARN("%s: recovery disabled\n", obd->obd_name);
                         obd->obd_replayable = 0;
-                        obd_sync_filter = 0;
                 }
         }
 
@@ -1509,7 +1513,8 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
         } else {
                 str = "no UUID";
         }
-        label = fsfilt_label(obd, obd->u.obt.obt_sb);
+        
+        label = fsfilt_get_label(obd, obd->u.obt.obt_sb);
 
         if (obd->obd_recovering) {
                 LCONSOLE_WARN("OST %s now serving %s (%s%s%s), but will be in "
@@ -1589,8 +1594,6 @@ static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                 lproc_filter_attach_seqstat(obd);
         }
 
-        ping_evictor_start();
-
         return rc;
 }
 
@@ -1646,17 +1649,23 @@ static int filter_llog_finish(struct obd_device *obd, int count)
         RETURN(rc);
 }
 
-static int filter_precleanup(struct obd_device *obd, int stage)
+static int filter_precleanup(struct obd_device *obd,
+                             enum obd_cleanup_stage stage)
 {
         int rc = 0;
         ENTRY;
 
         switch(stage) {
+        case OBD_CLEANUP_EARLY:
+                break;
         case OBD_CLEANUP_EXPORTS:
                 target_cleanup_recovery(obd);
                 break;
         case OBD_CLEANUP_SELF_EXP:
                 rc = filter_llog_finish(obd, 0);
+                break;
+        case OBD_CLEANUP_OBD:
+                break;
         }
         RETURN(rc);
 }
@@ -1681,8 +1690,6 @@ static int filter_cleanup(struct obd_device *obd)
                 }
         }
 
-        ping_evictor_stop();
-
         lquota_cleanup(quota_interface, obd);
 
         ldlm_namespace_free(obd->obd_namespace, obd->obd_force);
@@ -1768,7 +1775,7 @@ static int filter_connect_internal(struct obd_export *exp,
                 if (!(lsd->lsd_feature_compat &
                       cpu_to_le32(OBD_COMPAT_OST))) {
                         /* this will only happen on the first connect */
-                        lsd->lsd_ost_index = le32_to_cpu(data->ocd_index);
+                        lsd->lsd_ost_index = cpu_to_le32(data->ocd_index);
                         lsd->lsd_feature_compat |= cpu_to_le32(OBD_COMPAT_OST);
                         filter_update_server_data(exp->exp_obd,
                                                   filter->fo_rcvd_filp, lsd, 1);
@@ -1981,6 +1988,9 @@ static int filter_destroy_export(struct obd_export *exp)
 
         target_destroy_export(exp);
 
+        if (obd_uuid_equals(&exp->exp_client_uuid, &exp->exp_obd->obd_uuid))
+                RETURN(0);
+
         if (exp->exp_obd->obd_replayable)
                 filter_client_free(exp);
         else
@@ -2149,7 +2159,7 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
         }
 
         if (ia_valid & ATTR_SIZE || ia_valid & (ATTR_UID | ATTR_GID)) {
-                down(&inode->i_sem);
+                LOCK_INODE_MUTEX(inode);
                 locked = 1;
         }
 
@@ -2210,7 +2220,7 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
         }
 
         if (locked) {
-                up(&inode->i_sem);
+                UNLOCK_INODE_MUTEX(inode);
                 locked = 0;
         }
 
@@ -2225,7 +2235,7 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
         EXIT;
 out_unlock:
         if (locked)
-                up(&inode->i_sem);
+                UNLOCK_INODE_MUTEX(inode);
 
         /* trigger quota release */
         if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) {
@@ -2746,11 +2756,11 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
          *                      restart transaction
          * (see BUG 4180) -bzzz
          */
-        down(&dchild->d_inode->i_sem);
+        LOCK_INODE_MUTEX(dchild->d_inode);
         handle = fsfilt_start_log(obd, dchild->d_inode, FSFILT_OP_SETATTR,
                                   NULL, 1);
         if (IS_ERR(handle)) {
-                up(&dchild->d_inode->i_sem);
+                UNLOCK_INODE_MUTEX(dchild->d_inode);
                 GOTO(cleanup, rc = PTR_ERR(handle));
         }
 
@@ -2758,7 +2768,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
         iattr.ia_size = 0;
         rc = fsfilt_setattr(obd, dchild, handle, &iattr, 1);
         rc2 = fsfilt_commit(obd, dchild->d_inode, handle, 0);
-        up(&dchild->d_inode->i_sem);
+        UNLOCK_INODE_MUTEX(dchild->d_inode);
         if (rc)
                 GOTO(cleanup, rc);
         if (rc2)
@@ -2773,10 +2783,10 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
                 GOTO(cleanup, rc = PTR_ERR(dparent));
         cleanup_phase = 3; /* filter_parent_unlock */
 
-        down(&dchild->d_inode->i_sem);
+        LOCK_INODE_MUTEX(dchild->d_inode);
         handle = fsfilt_start_log(obd, dparent->d_inode,FSFILT_OP_UNLINK,oti,1);
         if (IS_ERR(handle)) {
-                up(&dchild->d_inode->i_sem);
+                UNLOCK_INODE_MUTEX(dchild->d_inode);
                 GOTO(cleanup, rc = PTR_ERR(handle));
         }
         cleanup_phase = 4; /* fsfilt_commit */
@@ -2784,7 +2794,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
         /* Quota release need uid/gid of inode */
         obdo_from_inode(oa, dchild->d_inode, OBD_MD_FLUID|OBD_MD_FLGID);
 
-        /* this drops dchild->d_inode->i_sem unconditionally */
+        /* this drops dchild->d_inode->i_mutex unconditionally */
         rc = filter_destroy_internal(obd, oa->o_id, dparent, dchild);
 
         EXIT;
@@ -2874,7 +2884,8 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa,
 
         push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
 
-        down(&dentry->d_inode->i_sem);
+        LOCK_INODE_MUTEX(dentry->d_inode);
+
         rc = filemap_fdatawrite(dentry->d_inode->i_mapping);
         if (rc == 0) {
                 /* just any file to grab fsync method - "file" arg unused */
@@ -2887,7 +2898,7 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa,
                 if (!rc)
                         rc = rc2;
         }
-        up(&dentry->d_inode->i_sem);
+        UNLOCK_INODE_MUTEX(dentry->d_inode);
 
         oa->o_valid = OBD_MD_FLID;
         obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS);
@@ -3073,6 +3084,8 @@ static struct obd_ops filter_obd_ops = {
         .o_connect        = filter_connect,
         .o_reconnect      = filter_reconnect,
         .o_disconnect     = filter_disconnect,
+        .o_init_export    = filter_init_export,
+        .o_destroy_export = filter_destroy_export,
         .o_statfs         = filter_statfs,
         .o_getattr        = filter_getattr,
         .o_unpackmd       = filter_unpackmd,
@@ -3084,7 +3097,6 @@ static struct obd_ops filter_obd_ops = {
         .o_sync           = filter_sync,
         .o_preprw         = filter_preprw,
         .o_commitrw       = filter_commitrw,
-        .o_destroy_export = filter_destroy_export,
         .o_llog_init      = filter_llog_init,
         .o_llog_finish    = filter_llog_finish,
         .o_iocontrol      = filter_iocontrol,
@@ -3101,6 +3113,8 @@ static struct obd_ops filter_sanobd_ops = {
         .o_connect        = filter_connect,
         .o_reconnect      = filter_reconnect,
         .o_disconnect     = filter_disconnect,
+        .o_init_export    = filter_init_export,
+        .o_destroy_export = filter_destroy_export,
         .o_statfs         = filter_statfs,
         .o_getattr        = filter_getattr,
         .o_unpackmd       = filter_unpackmd,
@@ -3113,7 +3127,6 @@ static struct obd_ops filter_sanobd_ops = {
         .o_preprw         = filter_preprw,
         .o_commitrw       = filter_commitrw,
         .o_san_preprw     = filter_san_preprw,
-        .o_destroy_export = filter_destroy_export,
         .o_llog_init      = filter_llog_init,
         .o_llog_finish    = filter_llog_finish,
         .o_iocontrol      = filter_iocontrol,
index ed768f7..c8c1b6b 100644 (file)
@@ -296,19 +296,20 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa,
                 spin_unlock(&obd->obd_osfs_lock);
         }
 
-        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-
         iobuf = filter_iobuf_get(&obd->u.filter, oti);
+        if (IS_ERR(iobuf))
+                RETURN(PTR_ERR(iobuf));
 
+        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         dentry = filter_oa2dentry(obd, oa);
         if (IS_ERR(dentry)) {
                 rc = PTR_ERR(dentry);
                 dentry = NULL;
                 GOTO(cleanup, rc);
         }
-        
+
         inode = dentry->d_inode;
-        
+
         if (oa)
                 obdo_to_inode(inode, oa, OBD_MD_FLATIME);
 
@@ -520,8 +521,8 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
 
         push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
         iobuf = filter_iobuf_get(&exp->exp_obd->u.filter, oti);
-        if (iobuf == NULL)
-                GOTO(cleanup, rc = -ENOMEM);
+        if (IS_ERR(iobuf))
+                GOTO(cleanup, rc = PTR_ERR(iobuf));
         cleanup_phase = 1;
 
         dentry = filter_fid2dentry(exp->exp_obd, NULL, obj->ioo_gr,
index 672e281..be6e550 100644 (file)
@@ -381,8 +381,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
                 GOTO(cleanup, rc);
 
         iobuf = filter_iobuf_get(&obd->u.filter, oti);
-        if (iobuf == NULL)
-                GOTO(cleanup, rc = -ENOMEM);
+        if (IS_ERR(iobuf))
+                GOTO(cleanup, rc = PTR_ERR(iobuf));
         cleanup_phase = 1;
 
         fso.fso_dentry = res->dentry;
@@ -467,7 +467,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
                 CERROR("Failure to commit OST transaction (%d)?\n", err);
                 rc = err;
         }
-        if (obd_sync_filter && !err)
+        if (obd->obd_replayable && !err)
                 LASSERTF(oti->oti_transno <= obd->obd_last_committed,
                          "oti_transno "LPU64" last_committed "LPU64"\n",
                          oti->oti_transno, obd->obd_last_committed);
index 6a48bc6..b5e2664 100644 (file)
@@ -419,7 +419,7 @@ static int filter_clear_page_cache(struct inode *inode,
         return 0;
 }
 
-/* Must be called with i_sem taken for writes; this will drop it */
+/* Must be called with i_mutex taken for writes; this will drop it */
 int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf,
                      struct obd_export *exp, struct iattr *attr,
                      struct obd_trans_info *oti, void **wait_handle)
@@ -479,7 +479,7 @@ remap:
                                             oti->oti_handle, attr, 0);
                 }
 
-                up(&inode->i_sem);
+                UNLOCK_INODE_MUTEX(inode);
 
                 rc2 = filter_finish_transno(exp, oti, 0);
                 if (rc2 != 0) {
@@ -550,6 +550,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                 GOTO(cleanup, rc);
 
         iobuf = filter_iobuf_get(&obd->u.filter, oti);
+        if (IS_ERR(iobuf))
+                GOTO(cleanup, rc = PTR_ERR(iobuf));
         cleanup_phase = 1;
 
         fso.fso_dentry = res->dentry;
@@ -580,9 +582,9 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                 this_size = lnb->offset + lnb->len;
                 if (this_size > iattr.ia_size)
                         iattr.ia_size = this_size;
-                
+
                 /* if one page is a write-back page from client cache, or it's
-                 * written by root, then mark the whole io request as ignore 
+                 * written by root, then mark the whole io request as ignore
                  * quota request */
                 if (lnb->flags & (OBD_BRW_FROM_GRANT | OBD_BRW_NOQUOTA))
                         iobuf->dr_ignore_quota = 1;
@@ -591,12 +593,12 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         cleanup_phase = 2;
 
-        down(&inode->i_sem);
-        fsfilt_check_slow(now, obd_timeout, "i_sem");
+        LOCK_INODE_MUTEX(inode);
+        fsfilt_check_slow(now, obd_timeout, "i_mutex");
         oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, res,
                                            oti);
         if (IS_ERR(oti->oti_handle)) {
-                up(&inode->i_sem);
+                UNLOCK_INODE_MUTEX(inode);
                 rc = PTR_ERR(oti->oti_handle);
                 CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
                        "error starting transaction: rc = %d\n", rc);
@@ -635,7 +637,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                 rc = filter_update_fidea(exp, inode, oti->oti_handle, oa);
         }
 
-        /* filter_direct_io drops i_sem */
+        /* filter_direct_io drops i_mutex */
         rc = filter_direct_io(OBD_BRW_WRITE, res->dentry, iobuf, exp, &iattr,
                               oti, &wait_handle);
         if (rc == 0)
@@ -652,7 +654,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         if (err)
                 rc = err;
 
-        if (obd_sync_filter && !err)
+        if (obd->obd_replayable && !err)
                 LASSERTF(oti->oti_transno <= obd->obd_last_committed,
                          "oti_transno "LPU64" last_committed "LPU64"\n",
                          oti->oti_transno, obd->obd_last_committed);
index 0dbd077..a2ce350 100644 (file)
@@ -51,14 +51,14 @@ int filter_log_sz_change(struct llog_handle *cathandle,
         struct ost_filterdata *ofd;
         ENTRY;
 
-        down(&inode->i_sem);
+        LOCK_INODE_MUTEX(inode);
         ofd = inode->i_filterdata;
 
         if (ofd && ofd->ofd_epoch >= io_epoch) {
                 if (ofd->ofd_epoch > io_epoch)
                         CERROR("client sent old epoch %d for obj ino %ld\n",
                                io_epoch, inode->i_ino);
-                up(&inode->i_sem);
+                UNLOCK_INODE_MUTEX(inode);
                 RETURN(0);
         }
 
@@ -73,7 +73,7 @@ int filter_log_sz_change(struct llog_handle *cathandle,
                 ofd->ofd_epoch = io_epoch;
         }
         /* the decision to write a record is now made, unlock */
-        up(&inode->i_sem);
+        UNLOCK_INODE_MUTEX(inode);
 
         OBD_ALLOC(lsc, sizeof(*lsc));
         if (lsc == NULL)
index 066f8dc..acdd457 100644 (file)
@@ -49,7 +49,7 @@ static int filter_lvbo_init(struct ldlm_resource *res)
         ENTRY;
 
         LASSERT(res);
-        LASSERT(down_trylock(&res->lr_lvb_sem) != 0);
+        LASSERT_SEM_LOCKED(&res->lr_lvb_sem);
 
         /* we only want lvb's for object resources */
         /* check for internal locks: these have name[1] != 0 */
index 1c9afe6..05e2567 100644 (file)
@@ -86,6 +86,7 @@ static int osc_wr_max_rpcs_in_flight(struct file *file, const char *buffer,
 {
         struct obd_device *dev = data;
         struct client_obd *cli = &dev->u.cli;
+        struct ptlrpc_request_pool *pool = cli->cl_import->imp_rq_pool;
         int val, rc;
 
         rc = lprocfs_write_helper(buffer, count, &val);
@@ -95,9 +96,8 @@ static int osc_wr_max_rpcs_in_flight(struct file *file, const char *buffer,
         if (val < 1 || val > OSC_MAX_RIF_MAX)
                 return -ERANGE;
 
-        if (cli->cl_rq_pool && val > cli->cl_max_rpcs_in_flight)
-                cli->cl_rq_pool->prp_populate(cli->cl_rq_pool,
-                                              val - cli->cl_max_rpcs_in_flight);
+        if (pool && val > cli->cl_max_rpcs_in_flight)
+                pool->prp_populate(pool, val-cli->cl_max_rpcs_in_flight);
 
         spin_lock(&cli->cl_loi_list_lock);
         cli->cl_max_rpcs_in_flight = val;
index a08ae20..b98cac1 100644 (file)
@@ -91,7 +91,7 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc)
                 spin_unlock(&oscc->oscc_lock);
                 DEBUG_REQ(D_ERROR, req,
                           "unknown rc %d from async create: failing oscc", rc);
-                ptlrpc_fail_import(req->rq_import, req->rq_import_generation);
+                ptlrpc_fail_import(req->rq_import, req->rq_reqmsg->conn_cnt);
         } else {
                 if (rc == 0) {
                         oscc->oscc_flags &= ~OSCC_FLAG_LOW;
@@ -357,8 +357,7 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
 
         if (rc == 0)
                 CDEBUG(D_HA, "%s: returning objid "LPU64"\n",
-                       oscc->oscc_obd->u.cli.cl_import->imp_target_uuid.uuid,
-                       lsm->lsm_object_id);
+                       obd2cli_tgt(oscc->oscc_obd), lsm->lsm_object_id);
         else if (*ea == NULL)
                 obd_free_memmd(exp, &lsm);
         RETURN(rc);
index 06e376d..9340119 100644 (file)
@@ -779,7 +779,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
         struct ptlrpc_request_pool *pool;
 
         opc = ((cmd & OBD_BRW_WRITE) != 0) ? OST_WRITE : OST_READ;
-        pool = ((cmd & OBD_BRW_WRITE) != 0) ? cli->cl_rq_pool : NULL;
+        pool = ((cmd & OBD_BRW_WRITE) != 0) ? imp->imp_rq_pool : NULL;
 
         for (niocount = i = 1; i < page_count; i++)
                 if (!can_merge_pages(&pga[i - 1], &pga[i]))
@@ -831,9 +831,9 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
                          "i %d p_c %u pg %p [pri %lu ind %lu] off "LPU64
                          " prev_pg %p [pri %lu ind %lu] off "LPU64"\n",
                          i, page_count,
-                         pg->pg, pg->pg->private, pg->pg->index, pg->off,
-                         pg_prev->pg, pg_prev->pg->private, pg_prev->pg->index,
-                                 pg_prev->off);
+                         pg->pg, page_private(pg->pg), pg->pg->index, pg->off,
+                         pg_prev->pg, page_private(pg_prev->pg),
+                         pg_prev->pg->index, pg_prev->off);
                 LASSERT((pga[0].flag & OBD_BRW_SRVLOCK) ==
                         (pg->flag & OBD_BRW_SRVLOCK));
 
@@ -1375,12 +1375,11 @@ static void osc_occ_interrupted(struct oig_callback_context *occ)
                 GOTO(unlock, 0);
         }
 
-        /* we don't get interruption callbacks until osc_trigger_sync_io()
+        /* we don't get interruption callbacks until osc_trigger_group_io()
          * has been called and put the sync oaps in the pending/urgent lists.*/
         if (!list_empty(&oap->oap_pending_item)) {
                 list_del_init(&oap->oap_pending_item);
-                if (oap->oap_async_flags & ASYNC_URGENT)
-                        list_del_init(&oap->oap_urgent_item);
+                list_del_init(&oap->oap_urgent_item);
 
                 loi = oap->oap_loi;
                 lop = (oap->oap_cmd & OBD_BRW_WRITE) ?
@@ -2259,7 +2258,8 @@ static void osc_group_to_pending(struct client_obd *cli, struct lov_oinfo *loi,
                 oap = list_entry(pos, struct osc_async_page, oap_pending_item);
                 list_del(&oap->oap_pending_item);
                 list_add_tail(&oap->oap_pending_item, &lop->lop_pending);
-                list_add(&oap->oap_urgent_item, &lop->lop_urgent);
+                if (oap->oap_async_flags & ASYNC_URGENT)
+                        list_add(&oap->oap_urgent_item, &lop->lop_urgent);
                 lop_update_pending(cli, lop, cmd, 1);
         }
         loi_list_maint(cli, loi);
@@ -2476,7 +2476,6 @@ static int sanosc_brw_write(struct obd_export *exp, struct obdo *oa,
                             struct lov_stripe_md *lsm, obd_count page_count,
                             struct brw_page *pga)
 {
-        struct client_obd *cli = &exp->exp_obd->u.cli;
         struct ptlrpc_request *request = NULL;
         struct ost_body *body;
         struct niobuf_remote *nioptr;
@@ -2491,7 +2490,7 @@ static int sanosc_brw_write(struct obd_export *exp, struct obdo *oa,
 
         request = ptlrpc_prep_req_pool(class_exp2cliimp(exp),
                                        LUSTRE_OST_VERSION, OST_SAN_WRITE,
-                                       3, size, NULL, cli->cl_rq_pool);
+                                       3, size, NULL, imp->imp_rq_pool);
         if (!request)
                 RETURN(-ENOMEM);
 
@@ -3092,11 +3091,10 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen,
         }
 
         if (KEY_IS(KEY_INIT_RECOV)) {
-                struct obd_import *imp = exp->exp_obd->u.cli.cl_import;
                 if (vallen != sizeof(int))
                         RETURN(-EINVAL);
                 imp->imp_initial_recov = *(int *)val;
-                CDEBUG(D_HA, "%s: set imp_no_init_recov = %d\n",
+                CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n",
                        exp->exp_obd->obd_name,
                        imp->imp_initial_recov);
                 RETURN(0);
@@ -3132,7 +3130,7 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen,
         }
 
         imp->imp_server_timeout = 1;
-        CDEBUG(D_HA, "pinging OST %s\n", imp->imp_target_uuid.uuid);
+        CDEBUG(D_HA, "pinging OST %s\n", obd2cli_tgt(exp->exp_obd));
         imp->imp_pingable = 1;
 
         RETURN(rc);
@@ -3303,6 +3301,7 @@ static int osc_import_event(struct obd_device *obd,
 int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 {
         int rc;
+        ENTRY;
 
         rc = ptlrpcd_addref();
         if (rc)
@@ -3327,15 +3326,16 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                    previous ones. Ideally we want to have 2x max_rpcs_in_flight
                    reserved, but I afraid that might be too much wasted RAM
                    in fact, so 2 is just my guess and still should work. */
-                cli->cl_rq_pool = ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2,
-                                                      OST_MAXREQSIZE,
-                                                      ptlrpc_add_rqs_to_pool);
+                cli->cl_import->imp_rq_pool =
+                        ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2,
+                                            OST_MAXREQSIZE,
+                                            ptlrpc_add_rqs_to_pool);
         }
 
         RETURN(rc);
 }
 
-static int osc_precleanup(struct obd_device *obd, int stage)
+static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
 {
         int rc = 0;
         ENTRY;
@@ -3349,10 +3349,15 @@ static int osc_precleanup(struct obd_device *obd, int stage)
                 ptlrpc_deactivate_import(imp);
                 break;
         }
+        case OBD_CLEANUP_EXPORTS:
+                break;
         case OBD_CLEANUP_SELF_EXP:
                 rc = obd_llog_finish(obd, 0);
                 if (rc != 0)
                         CERROR("failed to cleanup llogging subsystems\n");
+                break;
+        case OBD_CLEANUP_OBD:
+                break;
         }
         RETURN(rc);
 }
@@ -3360,7 +3365,6 @@ static int osc_precleanup(struct obd_device *obd, int stage)
 int osc_cleanup(struct obd_device *obd)
 {
         struct osc_creator *oscc = &obd->u.cli.cl_oscc;
-        struct client_obd *cli = &obd->u.cli;
         int rc;
 
         ptlrpc_lprocfs_unregister_obd(obd);
@@ -3376,8 +3380,6 @@ int osc_cleanup(struct obd_device *obd)
 
         rc = client_obd_cleanup(obd);
 
-        ptlrpc_free_rq_pool(cli->cl_rq_pool);
-
         ptlrpcd_decref();
         RETURN(rc);
 }
@@ -3429,7 +3431,9 @@ struct obd_ops osc_obd_ops = {
 #if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 struct obd_ops sanosc_obd_ops = {
         .o_owner                = THIS_MODULE,
-        .o_cleanup              = client_obd_cleanup,
+        .o_setup                = client_sanobd_setup,
+        .o_precleanup           = osc_precleanup,
+        .o_cleanup              = osc_cleanup,
         .o_add_conn             = client_import_add_conn,
         .o_del_conn             = client_import_del_conn,
         .o_connect              = client_connect_import,
@@ -3443,7 +3447,6 @@ struct obd_ops sanosc_obd_ops = {
         .o_getattr              = osc_getattr,
         .o_getattr_async        = osc_getattr_async,
         .o_setattr              = osc_setattr,
-        .o_setup                = client_sanobd_setup,
         .o_brw                  = sanosc_brw,
         .o_punch                = osc_punch,
         .o_sync                 = osc_sync,
index a7ae978..fada922 100644 (file)
@@ -1680,6 +1680,8 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
         if (rc)
                 GOTO(out_io, rc = -EINVAL);
 
+        ping_evictor_start();
+
         RETURN(0);
 
 out_io:
@@ -1702,6 +1704,8 @@ static int ost_cleanup(struct obd_device *obd)
         int err = 0;
         ENTRY;
 
+        ping_evictor_stop();
+
         spin_lock_bh(&obd->obd_processing_task_lock);
         if (obd->obd_recovering) {
                 target_cancel_recovery_timer(obd);
index ab76f7a..ee2c7d9 100644 (file)
@@ -24,7 +24,7 @@ if LIBLUSTRE
 
 noinst_LIBRARIES = libptlrpc.a
 libptlrpc_a_SOURCES = $(COMMON_SOURCES)
-libptlrpc_a_CPPFLAGS = $(LLCPPFLGS)
+libptlrpc_a_CPPFLAGS = $(LLCPPFLAGS)
 libptlrpc_a_CFLAGS = $(LLCFLAGS)
 
 endif
@@ -33,5 +33,5 @@ if MODULES
 modulefs_DATA = ptlrpc$(KMODEXT)
 endif # MODULES
 
-MOSTLYCLEANFILES := @MOSTLYCLEANFILES@  ldlm_*.c l_lock.c
 DIST_SOURCES = $(ptlrpc_objs:.o=.c) ptlrpc_internal.h
+MOSTLYCLEANFILES := @MOSTLYCLEANFILES@  ldlm_*.c l_lock.c
index 488ad13..24a312b 100644 (file)
@@ -459,6 +459,7 @@ void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
         list_add_tail(&req->rq_set_chain, &set->set_requests);
         req->rq_set = set;
         set->set_remaining++;
+
         atomic_inc(&req->rq_import->imp_inflight);
 }
 
@@ -982,7 +983,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req)
                 RETURN(1);
         }
 
-        ptlrpc_fail_import(imp, req->rq_import_generation);
+        ptlrpc_fail_import(imp, req->rq_reqmsg->conn_cnt);
 
         RETURN(0);
 }
index 0aef6e2..ab9e4f3 100644 (file)
@@ -61,12 +61,14 @@ void request_out_callback(lnet_event_t *ev)
                 spin_lock_irqsave(&req->rq_lock, flags);
                 req->rq_net_err = 1;
                 spin_unlock_irqrestore(&req->rq_lock, flags);
-                
+
                 ptlrpc_wake_client_req(req);
         }
 
-        /* this balances the atomic_inc in ptl_send_rpc() */
+        /* these balance the references in ptl_send_rpc() */
+        atomic_dec(&req->rq_import->imp_inflight);
         ptlrpc_req_finished(req);
+
         EXIT;
 }
 
index 091eba1..d046e78 100644 (file)
@@ -53,7 +53,7 @@ struct ptlrpc_connect_async_args {
 do {                                                                           \
         if (imp->imp_state != LUSTRE_IMP_CLOSED) {                             \
                CDEBUG(D_HA, "%p %s: changing import state from %s to %s\n",    \
-                      imp, imp->imp_target_uuid.uuid,                          \
+                      imp, obd2cli_tgt(imp->imp_obd),                          \
                       ptlrpc_import_state_name(imp->imp_state),                \
                       ptlrpc_import_state_name(state));                        \
                imp->imp_state = state;                                         \
@@ -112,45 +112,52 @@ static void deuuidify(char *uuid, const char *prefix, char **uuid_start, int *uu
 
 /* Returns true if import was FULL, false if import was already not
  * connected.
+ * @imp - import to be disconnected
+ * @conn_cnt - connection count (epoch) of the request that timed out
+ *             and caused the disconnection.  In some cases, multiple
+ *             inflight requests can fail to a single target (e.g. OST
+ *             bulk requests) and if one has already caused a reconnection
+ *             (increasing the import->conn_cnt) the older failure should
+ *             not also cause a reconnection.  If zero it forces a reconnect.
  */
-int ptlrpc_set_import_discon(struct obd_import *imp)
+int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt)
 {
         unsigned long flags;
         int rc = 0;
 
         spin_lock_irqsave(&imp->imp_lock, flags);
 
-        if (imp->imp_state == LUSTRE_IMP_FULL) {
+        if (imp->imp_state == LUSTRE_IMP_FULL &&
+            (conn_cnt == 0 || conn_cnt == imp->imp_conn_cnt)) {
                 char *target_start;
                 int   target_len;
 
-                deuuidify(imp->imp_target_uuid.uuid, NULL,
+                deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
                           &target_start, &target_len);
 
-                LCONSOLE_ERROR("Connection to service %.*s via nid %s was "
+                LCONSOLE_ERROR("%s: Connection to service %.*s via nid %s was "
                                "lost; in progress operations using this "
-                               "service will %s.\n",
+                               "service will %s.\n", imp->imp_obd->obd_name,
                                target_len, target_start,
                                libcfs_nid2str(imp->imp_connection->c_peer.nid),
                                imp->imp_replayable ?
-                               "wait for recovery to complete" : "fail");
+                                      "wait for recovery to complete" : "fail");
 
                 if (obd_dump_on_timeout)
                         libcfs_debug_dumplog();
 
-                CDEBUG(D_HA, "%s: connection lost to %s@%s\n",
-                      imp->imp_obd->obd_name,
-                      imp->imp_target_uuid.uuid,
-                      imp->imp_connection->c_remote_uuid.uuid);
                 IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
                 rc = 1;
         } else {
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
-                CDEBUG(D_HA, "%p %s: import already not connected: %s\n",
-                       imp,imp->imp_client->cli_name,
-                       ptlrpc_import_state_name(imp->imp_state));
+                CDEBUG(D_HA, "%s: import %p already %s (conn %u, was %u): %s\n",
+                       imp->imp_client->cli_name, imp,
+                       (imp->imp_state == LUSTRE_IMP_FULL &&
+                        imp->imp_conn_cnt > conn_cnt) ?
+                       "reconnected" : "not connected", imp->imp_conn_cnt,
+                       conn_cnt, ptlrpc_import_state_name(imp->imp_state));
         }
 
         return rc;
@@ -166,7 +173,7 @@ void ptlrpc_deactivate_import(struct obd_import *imp)
         ENTRY;
 
         spin_lock_irqsave(&imp->imp_lock, flags);
-        CDEBUG(D_HA, "setting import %s INVALID\n", imp->imp_target_uuid.uuid);
+        CDEBUG(D_HA, "setting import %s INVALID\n", obd2cli_tgt(imp->imp_obd));
         imp->imp_invalid = 1;
         imp->imp_generation++;
         spin_unlock_irqrestore(&imp->imp_lock, flags);
@@ -200,7 +207,7 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
 
         if (rc)
                 CERROR("%s: rc = %d waiting for callback (%d != 0)\n",
-                       imp->imp_target_uuid.uuid, rc,
+                       obd2cli_tgt(imp->imp_obd), rc,
                        atomic_read(&imp->imp_inflight));
 
         obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
@@ -219,26 +226,26 @@ void ptlrpc_activate_import(struct obd_import *imp)
         obd_import_event(obd, imp, IMP_EVENT_ACTIVE);
 }
 
-void ptlrpc_fail_import(struct obd_import *imp, int generation)
+void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
 {
         ENTRY;
 
-        LASSERT (!imp->imp_dlm_fake);
+        LASSERT(!imp->imp_dlm_fake);
 
-        if (ptlrpc_set_import_discon(imp)) {
+        if (ptlrpc_set_import_discon(imp, conn_cnt)) {
                 unsigned long flags;
 
                 if (!imp->imp_replayable) {
                         CDEBUG(D_HA, "import %s@%s for %s not replayable, "
                                "auto-deactivating\n",
-                               imp->imp_target_uuid.uuid,
+                               obd2cli_tgt(imp->imp_obd),
                                imp->imp_connection->c_remote_uuid.uuid,
                                imp->imp_obd->obd_name);
                         ptlrpc_deactivate_import(imp);
                 }
 
                 CDEBUG(D_HA, "%s: waking up pinger\n",
-                       imp->imp_target_uuid.uuid);
+                       obd2cli_tgt(imp->imp_obd));
 
                 spin_lock_irqsave(&imp->imp_lock, flags);
                 imp->imp_force_verify = 1;
@@ -304,11 +311,11 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
         int rc;
         __u64 committed_before_reconnect = 0;
         struct ptlrpc_request *request;
-        int size[] = {sizeof(imp->imp_target_uuid),
+        int size[] = {sizeof(imp->imp_obd->u.cli.cl_target_uuid),
                       sizeof(obd->obd_uuid),
                       sizeof(imp->imp_dlm_handle),
                       sizeof(imp->imp_connect_data)};
-        char *tmp[] = {imp->imp_target_uuid.uuid,
+        char *tmp[] = {obd2cli_tgt(imp->imp_obd),
                        obd->obd_uuid.uuid,
                        (char *)&imp->imp_dlm_handle,
                        (char *)&imp->imp_connect_data};
@@ -359,7 +366,7 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
             /* last in list */
             (imp->imp_conn_current->oic_item.next == &imp->imp_conn_list)) {
                 CDEBUG(D_HA, "Last connection attempt (%d) for %s\n",
-                       imp->imp_conn_cnt, imp->imp_target_uuid.uuid);
+                       imp->imp_conn_cnt, obd2cli_tgt(imp->imp_obd));
                 /* Don't retry if connect fails */
                 rc = 0;
                 obd_set_info(obd->obd_self_export,
@@ -473,7 +480,7 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
         if (aa->pcaa_initial_connect) {
                 if (msg_flags & MSG_CONNECT_REPLAYABLE) {
                         CDEBUG(D_HA, "connected to replayable target: %s\n",
-                               imp->imp_target_uuid.uuid);
+                               obd2cli_tgt(imp->imp_obd));
                         imp->imp_replayable = 1;
                 } else {
                         imp->imp_replayable = 0;
@@ -490,7 +497,7 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
                 if (!memcmp(&old_hdl, &request->rq_repmsg->handle,
                             sizeof (old_hdl))) {
                         CERROR("%s@%s didn't like our handle "LPX64
-                               ", failed\n", imp->imp_target_uuid.uuid,
+                               ", failed\n", obd2cli_tgt(imp->imp_obd),
                                imp->imp_connection->c_remote_uuid.uuid,
                                imp->imp_dlm_handle.cookie);
                         GOTO(out, rc = -ENOTCONN);
@@ -500,14 +507,14 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
                            sizeof(imp->imp_remote_handle))) {
                         CERROR("%s@%s changed handle from "LPX64" to "LPX64
                                "; copying, but this may foreshadow disaster\n",
-                               imp->imp_target_uuid.uuid,
+                               obd2cli_tgt(imp->imp_obd),
                                imp->imp_connection->c_remote_uuid.uuid,
                                imp->imp_remote_handle.cookie,
                                request->rq_repmsg->handle.cookie);
                         imp->imp_remote_handle = request->rq_repmsg->handle;
                 } else {
                         CDEBUG(D_HA, "reconnected to %s@%s after partition\n",
-                               imp->imp_target_uuid.uuid,
+                               obd2cli_tgt(imp->imp_obd),
                                imp->imp_connection->c_remote_uuid.uuid);
                 }
 
@@ -516,7 +523,7 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
                 } else if (MSG_CONNECT_RECOVERING & msg_flags) {
                         CDEBUG(D_HA, "%s: reconnected to %s during replay\n",
                                imp->imp_obd->obd_name,
-                               imp->imp_target_uuid.uuid);
+                               obd2cli_tgt(imp->imp_obd));
                         imp->imp_resend_replay = 1;
                         IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
                 } else {
@@ -543,7 +550,7 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
                        " was previously committed, server now claims "LPD64
                        ")!  See https://bugzilla.clusterfs.com/"
                        "long_list.cgi?buglist=9646\n",
-                       imp->imp_target_uuid.uuid, aa->pcaa_peer_committed,
+                       obd2cli_tgt(imp->imp_obd), aa->pcaa_peer_committed,
                        request->rq_repmsg->last_committed);
         }
 
@@ -553,7 +560,7 @@ finish:
                 if (rc == -ENOTCONN) {
                         CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery;"
                                "invalidating and reconnecting\n",
-                               imp->imp_target_uuid.uuid,
+                               obd2cli_tgt(imp->imp_obd),
                                imp->imp_connection->c_remote_uuid.uuid);
                         ptlrpc_connect_import(imp, NULL);
                         RETURN(0);
@@ -597,14 +604,14 @@ finish:
                         /* Sigh, some compilers do not like #ifdef in the middle
                            of macro arguments */
 #ifdef __KERNEL__
-                        char *action = "upgrading this client";
+                        const char *action = "upgrading this client";
 #else
-                        char *action = "recompiling this application";
+                        const char *action = "recompiling this application";
 #endif
 
                         CWARN("Server %s version (%d.%d.%d.%d) is much newer. "
                               "Consider %s (%s).\n",
-                              imp->imp_target_uuid.uuid,
+                              obd2cli_tgt(imp->imp_obd),
                               OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
                               OBD_OCD_VERSION_MINOR(ocd->ocd_version),
                               OBD_OCD_VERSION_PATCH(ocd->ocd_version),
@@ -649,12 +656,13 @@ finish:
                                        "refused connection from this client "
                                        "as too old version (%s).  Client must "
                                        "be recompiled\n",
-                                      imp->imp_target_uuid.uuid,
+                                      obd2cli_tgt(imp->imp_obd),
                                       OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
                                       OBD_OCD_VERSION_MINOR(ocd->ocd_version),
                                       OBD_OCD_VERSION_PATCH(ocd->ocd_version),
                                       OBD_OCD_VERSION_FIX(ocd->ocd_version),
                                       LUSTRE_VERSION_STRING);
+                                ptlrpc_deactivate_import(imp);
                                 IMPORT_SET_STATE(imp, LUSTRE_IMP_CLOSED);
                         }
                         RETURN(-EPROTO);
@@ -663,7 +671,7 @@ finish:
                 ptlrpc_maybe_ping_import_soon(imp);
 
                 CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n",
-                       imp->imp_target_uuid.uuid,
+                       obd2cli_tgt(imp->imp_obd),
                        (char *)imp->imp_connection->c_remote_uuid.uuid, rc);
         }
 
@@ -716,22 +724,13 @@ static int signal_completed_replay(struct obd_import *imp)
 static int ptlrpc_invalidate_import_thread(void *data)
 {
         struct obd_import *imp = data;
-        unsigned long flags;
 
         ENTRY;
 
-        lock_kernel();
-        ptlrpc_daemonize();
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-        THREAD_NAME(current->comm, sizeof(current->comm), "ll_imp_inval");
-        unlock_kernel();
+        ptlrpc_daemonize("ll_imp_inval");
 
         CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n",
-               imp->imp_obd->obd_name, imp->imp_target_uuid.uuid,
+               imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
                imp->imp_connection->c_remote_uuid.uuid);
 
         ptlrpc_invalidate_import(imp);
@@ -751,13 +750,13 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
         int target_len;
 
         if (imp->imp_state == LUSTRE_IMP_EVICTED) {
-                deuuidify(imp->imp_target_uuid.uuid, NULL,
+                deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
                           &target_start, &target_len);
                 LCONSOLE_ERROR("This client was evicted by %.*s; in progress "
                                "operations using this service will fail.\n",
                                target_len, target_start);
                 CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
-                       imp->imp_target_uuid.uuid,
+                       obd2cli_tgt(imp->imp_obd),
                        imp->imp_connection->c_remote_uuid.uuid);
 
 #ifdef __KERNEL__
@@ -777,7 +776,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
 
         if (imp->imp_state == LUSTRE_IMP_REPLAY) {
                 CDEBUG(D_HA, "replay requested by %s\n",
-                       imp->imp_target_uuid.uuid);
+                       obd2cli_tgt(imp->imp_obd));
                 rc = ptlrpc_replay_next(imp, &inflight);
                 if (inflight == 0 &&
                     atomic_read(&imp->imp_replay_inflight) == 0) {
@@ -807,7 +806,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
 
         if (imp->imp_state == LUSTRE_IMP_RECOVER) {
                 CDEBUG(D_HA, "reconnected to %s@%s\n",
-                       imp->imp_target_uuid.uuid,
+                       obd2cli_tgt(imp->imp_obd),
                        imp->imp_connection->c_remote_uuid.uuid);
 
                 rc = ptlrpc_resend(imp);
@@ -816,7 +815,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
                 ptlrpc_activate_import(imp);
 
-                deuuidify(imp->imp_target_uuid.uuid, NULL,
+                deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
                           &target_start, &target_len);
                 LCONSOLE_INFO("%s: Connection restored to service %.*s "
                               "using nid %s.\n", imp->imp_obd->obd_name,
@@ -852,7 +851,7 @@ int ptlrpc_disconnect_import(struct obd_import *imp)
         case MGS_CONNECT: rq_opc = MGS_DISCONNECT; break;
         default:
                 CERROR("don't know how to disconnect from %s (connect_op %d)\n",
-                       imp->imp_target_uuid.uuid, imp->imp_connect_op);
+                       obd2cli_tgt(imp->imp_obd), imp->imp_connect_op);
                 RETURN(-EINVAL);
         }
 
index 2752ef4..6ddf32b 100644 (file)
@@ -479,13 +479,17 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                        request->rq_reply_portal);
         }
 
-        ptlrpc_request_addref(request);       /* +1 ref for the SENT callback */
+        /* add references on request and import for request_out_callback */
+        ptlrpc_request_addref(request);
+        atomic_inc(&request->rq_import->imp_inflight);
+
+        OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
 
         request->rq_sent = CURRENT_SECONDS;
         ptlrpc_pinger_sending_on_import(request->rq_import);
-        rc = ptl_send_buf(&request->rq_req_md_h, 
+        rc = ptl_send_buf(&request->rq_req_md_h,
                           request->rq_reqmsg, request->rq_reqlen,
-                          LNET_NOACK_REQ, &request->rq_req_cbid, 
+                          LNET_NOACK_REQ, &request->rq_req_cbid,
                           connection,
                           request->rq_request_portal,
                           request->rq_xid);
@@ -494,7 +498,9 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                 RETURN(rc);
         }
 
-        ptlrpc_req_finished (request);          /* drop callback ref */
+         /* drop request_out_callback refs, we couldn't start the send */
+        atomic_dec(&request->rq_import->imp_inflight);
+        ptlrpc_req_finished (request);
 
         if (noreply)
                 RETURN(rc);
index 4b52a4a..52ec205 100644 (file)
@@ -1229,6 +1229,16 @@ void lustre_assert_wire_constants(void)
                  (long long)MDS_STATUS_CONN);
         LASSERTF(MDS_STATUS_LOV == 2, " found %lld\n",
                  (long long)MDS_STATUS_LOV);
+        LASSERTF(MGS_CONNECT == 250, " found %lld\n",
+                 (long long)MGS_CONNECT);
+        LASSERTF(MGS_DISCONNECT == 251, " found %lld\n",
+                 (long long)MGS_DISCONNECT);
+        LASSERTF(MGS_EXCEPTION == 252, " found %lld\n",
+                 (long long)MGS_EXCEPTION);
+        LASSERTF(MGS_TARGET_REG == 253, " found %lld\n",
+                 (long long)MGS_TARGET_REG);
+        LASSERTF(MGS_TARGET_DEL == 254, " found %lld\n",
+                 (long long)MGS_TARGET_DEL);
         LASSERTF(LDLM_ENQUEUE == 101, " found %lld\n",
                  (long long)LDLM_ENQUEUE);
         LASSERTF(LDLM_CONVERT == 102, " found %lld\n",
@@ -1259,16 +1269,10 @@ void lustre_assert_wire_constants(void)
                  (long long)LCK_GROUP);
         LASSERTF(LCK_MAXMODE == 65, " found %lld\n",
                  (long long)LCK_MAXMODE);
-        LASSERTF(MGS_CONNECT == 250, " found %lld\n",
-                 (long long)MGS_CONNECT);
-        LASSERTF(MGS_DISCONNECT == 251, " found %lld\n",
-                 (long long)MGS_DISCONNECT);
-        LASSERTF(MGS_EXCEPTION == 252, " found %lld\n",
-                 (long long)MGS_EXCEPTION);
-        LASSERTF(MGS_TARGET_REG == 253, " found %lld\n",
-                 (long long)MGS_TARGET_REG);
-        LASSERTF(MGS_TARGET_DEL == 254, " found %lld\n",
-                 (long long)MGS_TARGET_DEL);
+        CLASSERT(LDLM_PLAIN == 10);
+        CLASSERT(LDLM_EXTENT == 11);
+        CLASSERT(LDLM_FLOCK == 12);
+        CLASSERT(LDLM_IBITS == 13);
         LASSERTF(OBD_PING == 400, " found %lld\n",
                  (long long)OBD_PING);
         LASSERTF(OBD_LOG_CANCEL == 401, " found %lld\n",
@@ -1281,28 +1285,19 @@ void lustre_assert_wire_constants(void)
                  (long long)QUOTA_DQACQ);
         LASSERTF(QUOTA_DQREL == 602, " found %lld\n",
                  (long long)QUOTA_DQREL);
-        LASSERTF(OBD_CONNECT_RDONLY == 1, " found %lld\n",
-                 (long long)OBD_CONNECT_RDONLY);
-        LASSERTF(OBD_CONNECT_INDEX == 2, " found %lld\n",
-                 (long long)OBD_CONNECT_INDEX);
-        LASSERTF(OBD_CONNECT_GRANT == 8, " found %lld\n",
-                 (long long)OBD_CONNECT_GRANT);
-        LASSERTF(OBD_CONNECT_SRVLOCK == 16, " found %lld\n",
-                 (long long)OBD_CONNECT_SRVLOCK);
-        LASSERTF(OBD_CONNECT_VERSION == 32, " found %lld\n",
-                 (long long)OBD_CONNECT_VERSION);
-        LASSERTF(OBD_CONNECT_REQPORTAL == 64, " found %lld\n",
-                 (long long)OBD_CONNECT_REQPORTAL);
-        LASSERTF(OBD_CONNECT_ACL == 128, " found %lld\n",
-                 (long long)OBD_CONNECT_ACL);
-        LASSERTF(OBD_CONNECT_XATTR == 256, " found %lld\n",
-                 (long long)OBD_CONNECT_XATTR);
-        LASSERTF(OBD_CONNECT_CROW == 512, " found %lld\n",
-                 (long long)OBD_CONNECT_CROW);
-        LASSERTF(OBD_CONNECT_TRUNCLOCK == 1024, " found %lld\n",
-                 (long long)OBD_CONNECT_TRUNCLOCK);
-        LASSERTF(OBD_CONNECT_TRANSNO == 2048, " found %lld\n",
-                 (long long)OBD_CONNECT_TRANSNO);
+        CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL);
+        CLASSERT(OBD_CONNECT_INDEX == 0x2ULL);
+        CLASSERT(OBD_CONNECT_GRANT == 0x8ULL);
+        CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL);
+        CLASSERT(OBD_CONNECT_VERSION == 0x20ULL);
+        CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL);
+        CLASSERT(OBD_CONNECT_ACL == 0x80ULL);
+        CLASSERT(OBD_CONNECT_XATTR == 0x100ULL);
+        CLASSERT(OBD_CONNECT_CROW == 0x200ULL);
+        CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL);
+        CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL);
+        CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL);
+        CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
         /* Sizes and Offsets */
 
 
@@ -1449,6 +1444,10 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct obdo, o_mds));
         LASSERTF((int)sizeof(((struct obdo *)0)->o_mds) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct obdo *)0)->o_mds));
+        LASSERTF((int)offsetof(struct obdo, o_stripe_idx) == 120, " found %lld\n",
+                 (long long)(int)offsetof(struct obdo, o_stripe_idx));
+        LASSERTF((int)sizeof(((struct obdo *)0)->o_stripe_idx) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obdo *)0)->o_stripe_idx));
         LASSERTF((int)offsetof(struct obdo, o_padding_1) == 124, " found %lld\n",
                  (long long)(int)offsetof(struct obdo, o_padding_1));
         LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_1) == 4, " found %lld\n",
@@ -1459,86 +1458,52 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)sizeof(((struct obdo *)0)->o_inline));
         LASSERTF(OBD_INLINESZ == 80, " found %lld\n",
                  (long long)OBD_INLINESZ);
-        LASSERTF(OBD_MD_FLID == 1, " found %lld\n",
-                 (long long)OBD_MD_FLID);
-        LASSERTF(OBD_MD_FLATIME == 2, " found %lld\n",
-                 (long long)OBD_MD_FLATIME);
-        LASSERTF(OBD_MD_FLMTIME == 4, " found %lld\n",
-                 (long long)OBD_MD_FLMTIME);
-        LASSERTF(OBD_MD_FLCTIME == 8, " found %lld\n",
-                 (long long)OBD_MD_FLCTIME);
-        LASSERTF(OBD_MD_FLSIZE == 16, " found %lld\n",
-                 (long long)OBD_MD_FLSIZE);
-        LASSERTF(OBD_MD_FLBLOCKS == 32, " found %lld\n",
-                 (long long)OBD_MD_FLBLOCKS);
-        LASSERTF(OBD_MD_FLBLKSZ == 64, " found %lld\n",
-                 (long long)OBD_MD_FLBLKSZ);
-        LASSERTF(OBD_MD_FLMODE == 128, " found %lld\n",
-                 (long long)OBD_MD_FLMODE);
-        LASSERTF(OBD_MD_FLTYPE == 256, " found %lld\n",
-                 (long long)OBD_MD_FLTYPE);
-        LASSERTF(OBD_MD_FLUID == 512, " found %lld\n",
-                 (long long)OBD_MD_FLUID);
-        LASSERTF(OBD_MD_FLGID == 1024, " found %lld\n",
-                 (long long)OBD_MD_FLGID);
-        LASSERTF(OBD_MD_FLFLAGS == 2048, " found %lld\n",
-                 (long long)OBD_MD_FLFLAGS);
-        LASSERTF(OBD_MD_FLNLINK == 8192, " found %lld\n",
-                 (long long)OBD_MD_FLNLINK);
-        LASSERTF(OBD_MD_FLGENER == 16384, " found %lld\n",
-                 (long long)OBD_MD_FLGENER);
-        LASSERTF(OBD_MD_FLINLINE == 32768, " found %lld\n",
-                 (long long)OBD_MD_FLINLINE);
-        LASSERTF(OBD_MD_FLRDEV == 65536, " found %lld\n",
-                 (long long)OBD_MD_FLRDEV);
-        LASSERTF(OBD_MD_FLEASIZE == 131072, " found %lld\n",
-                 (long long)OBD_MD_FLEASIZE);
-        LASSERTF(OBD_MD_LINKNAME == 262144, " found %lld\n",
-                 (long long)OBD_MD_LINKNAME);
-        LASSERTF(OBD_MD_FLHANDLE == 524288, " found %lld\n",
-                 (long long)OBD_MD_FLHANDLE);
-        LASSERTF(OBD_MD_FLCKSUM == 1048576, " found %lld\n",
-                 (long long)OBD_MD_FLCKSUM);
-        LASSERTF(OBD_MD_FLQOS == 2097152, " found %lld\n",
-                 (long long)OBD_MD_FLQOS);
-        LASSERTF(OBD_MD_FLCOOKIE == 8388608, " found %lld\n",
-                 (long long)OBD_MD_FLCOOKIE);
-        LASSERTF(OBD_MD_FLGROUP == 16777216, " found %lld\n",
-                 (long long)OBD_MD_FLGROUP);
-        LASSERTF(OBD_MD_FLFID == 33554432, " found %lld\n",
-                 (long long)OBD_MD_FLFID);
-        LASSERTF(OBD_MD_FLEPOCH == 67108864, " found %lld\n",
-                 (long long)OBD_MD_FLEPOCH);
-        LASSERTF(OBD_MD_FLGRANT == 134217728, " found %lld\n",
-                 (long long)OBD_MD_FLGRANT);
-        LASSERTF(OBD_MD_FLDIREA == 268435456, " found %lld\n",
-                 (long long)OBD_MD_FLDIREA);
-        LASSERTF(OBD_MD_FLUSRQUOTA == 536870912, " found %lld\n",
-                 (long long)OBD_MD_FLUSRQUOTA);
-        LASSERTF(OBD_MD_FLGRPQUOTA == 1073741824, " found %lld\n",
-                 (long long)OBD_MD_FLGRPQUOTA);
-        LASSERTF(OBD_MD_MDS == 4294967296ULL, " found %lld\n",
-                 (long long)OBD_MD_MDS);
-        LASSERTF(OBD_MD_REINT == 8589934592ULL, " found %lld\n",
-                 (long long)OBD_MD_REINT);
-        LASSERTF(OBD_FL_INLINEDATA == 1, " found %lld\n",
-                 (long long)OBD_FL_INLINEDATA);
-        LASSERTF(OBD_FL_OBDMDEXISTS == 2, " found %lld\n",
-                 (long long)OBD_FL_OBDMDEXISTS);
-        LASSERTF(OBD_FL_DELORPHAN == 4, " found %lld\n",
-                 (long long)OBD_FL_DELORPHAN);
-        LASSERTF(OBD_FL_NORPC == 8, " found %lld\n",
-                 (long long)OBD_FL_NORPC);
-        LASSERTF(OBD_FL_IDONLY == 16, " found %lld\n",
-                 (long long)OBD_FL_IDONLY);
-        LASSERTF(OBD_FL_RECREATE_OBJS == 32, " found %lld\n",
-                 (long long)OBD_FL_RECREATE_OBJS);
-        LASSERTF(OBD_FL_DEBUG_CHECK == 64, " found %lld\n",
-                 (long long)OBD_FL_DEBUG_CHECK);
-        LASSERTF(OBD_FL_NO_USRQUOTA == 256, " found %lld\n",
-                 (long long)OBD_FL_NO_USRQUOTA);
-        LASSERTF(OBD_FL_NO_GRPQUOTA == 512, " found %lld\n",
-                 (long long)OBD_FL_NO_GRPQUOTA);
+        CLASSERT(OBD_MD_FLID == (0x00000001ULL));
+        CLASSERT(OBD_MD_FLATIME == (0x00000002ULL));
+        CLASSERT(OBD_MD_FLMTIME == (0x00000004ULL));
+        CLASSERT(OBD_MD_FLCTIME == (0x00000008ULL));
+        CLASSERT(OBD_MD_FLSIZE == (0x00000010ULL));
+        CLASSERT(OBD_MD_FLBLOCKS == (0x00000020ULL));
+        CLASSERT(OBD_MD_FLBLKSZ == (0x00000040ULL));
+        CLASSERT(OBD_MD_FLMODE == (0x00000080ULL));
+        CLASSERT(OBD_MD_FLTYPE == (0x00000100ULL));
+        CLASSERT(OBD_MD_FLUID == (0x00000200ULL));
+        CLASSERT(OBD_MD_FLGID == (0x00000400ULL));
+        CLASSERT(OBD_MD_FLFLAGS == (0x00000800ULL));
+        CLASSERT(OBD_MD_FLNLINK == (0x00002000ULL));
+        CLASSERT(OBD_MD_FLGENER == (0x00004000ULL));
+        CLASSERT(OBD_MD_FLINLINE == (0x00008000ULL));
+        CLASSERT(OBD_MD_FLRDEV == (0x00010000ULL));
+        CLASSERT(OBD_MD_FLEASIZE == (0x00020000ULL));
+        CLASSERT(OBD_MD_LINKNAME == (0x00040000ULL));
+        CLASSERT(OBD_MD_FLHANDLE == (0x00080000ULL));
+        CLASSERT(OBD_MD_FLCKSUM == (0x00100000ULL));
+        CLASSERT(OBD_MD_FLQOS == (0x00200000ULL));
+        CLASSERT(OBD_MD_FLCOOKIE == (0x00800000ULL));
+        CLASSERT(OBD_MD_FLGROUP == (0x01000000ULL));
+        CLASSERT(OBD_MD_FLFID == (0x02000000ULL));
+        CLASSERT(OBD_MD_FLEPOCH == (0x04000000ULL));
+        CLASSERT(OBD_MD_FLGRANT == (0x08000000ULL));
+        CLASSERT(OBD_MD_FLDIREA == (0x10000000ULL));
+        CLASSERT(OBD_MD_FLUSRQUOTA == (0x20000000ULL));
+        CLASSERT(OBD_MD_FLGRPQUOTA == (0x40000000ULL));
+        CLASSERT(OBD_MD_FLMODEASIZE == (0x80000000ULL));
+        CLASSERT(OBD_MD_MDS == (0x0000000100000000ULL));
+        CLASSERT(OBD_MD_REINT == (0x0000000200000000ULL));
+        CLASSERT(OBD_MD_FLXATTR == (0x0000001000000000ULL));
+        CLASSERT(OBD_MD_FLXATTRLS == (0x0000002000000000ULL));
+        CLASSERT(OBD_MD_FLXATTRRM == (0x0000004000000000ULL));
+        CLASSERT(OBD_MD_FLACL == (0x0000008000000000ULL));
+        CLASSERT(OBD_FL_INLINEDATA == (0x00000001));
+        CLASSERT(OBD_FL_OBDMDEXISTS == (0x00000002));
+        CLASSERT(OBD_FL_DELORPHAN == (0x00000004));
+        CLASSERT(OBD_FL_NORPC == (0x00000008));
+        CLASSERT(OBD_FL_IDONLY == (0x00000010));
+        CLASSERT(OBD_FL_RECREATE_OBJS == (0x00000020));
+        CLASSERT(OBD_FL_DEBUG_CHECK == (0x00000040));
+        CLASSERT(OBD_FL_NO_USRQUOTA == (0x00000100));
+        CLASSERT(OBD_FL_NO_GRPQUOTA == (0x00000200));
+        CLASSERT(OBD_FL_CREATE_CROW == (0x00000400));
 
         /* Checks for struct lov_mds_md_v1 */
         LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n",
@@ -1591,13 +1556,29 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_idx));
         LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx));
-        LASSERTF(LOV_MAGIC_V1 == 198249424, " found %lld\n",
-                 (long long)LOV_MAGIC_V1);
+        CLASSERT(LOV_MAGIC_V1 == 0x0BD10BD0);
+        CLASSERT(LOV_MAGIC_JOIN == 0x0BD20BD0);
         LASSERTF(LOV_PATTERN_RAID0 == 1, " found %lld\n",
                  (long long)LOV_PATTERN_RAID0);
         LASSERTF(LOV_PATTERN_RAID1 == 2, " found %lld\n",
                  (long long)LOV_PATTERN_RAID1);
 
+        /* Checks for struct lov_mds_md_join */
+        LASSERTF((int)sizeof(struct lov_mds_md_join) == 56, " found %lld\n",
+                 (long long)(int)sizeof(struct lov_mds_md_join));
+        LASSERTF((int)offsetof(struct lov_mds_md_join, lmmj_md) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_mds_md_join, lmmj_md));
+        LASSERTF((int)sizeof(((struct lov_mds_md_join *)0)->lmmj_md) == 32, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_mds_md_join *)0)->lmmj_md));
+        LASSERTF((int)offsetof(struct lov_mds_md_join, lmmj_array_id) == 32, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_mds_md_join, lmmj_array_id));
+        LASSERTF((int)sizeof(((struct lov_mds_md_join *)0)->lmmj_array_id) == 20, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_mds_md_join *)0)->lmmj_array_id));
+        LASSERTF((int)offsetof(struct lov_mds_md_join, lmmj_extent_count) == 52, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_mds_md_join, lmmj_extent_count));
+        LASSERTF((int)sizeof(((struct lov_mds_md_join *)0)->lmmj_extent_count) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_mds_md_join *)0)->lmmj_extent_count));
+
         /* Checks for struct obd_statfs */
         LASSERTF((int)sizeof(struct obd_statfs) == 144, " found %lld\n",
                  (long long)(int)sizeof(struct obd_statfs));
@@ -1637,6 +1618,42 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct obd_statfs, os_state));
         LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct obd_statfs *)0)->os_state));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare1) == 108, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare1));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare1) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare1));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare2));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare3));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare3));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare4) == 120, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare4));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare4) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare4));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare5) == 124, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare5));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare5) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare5));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare6) == 128, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare6));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare6) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare6));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare7) == 132, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare7));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare7) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare7));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare8) == 136, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare8));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare8) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare8));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare9) == 140, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare9));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare9) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare9));
 
         /* Checks for struct obd_ioobj */
         LASSERTF((int)sizeof(struct obd_ioobj) == 24, " found %lld\n",
@@ -1935,26 +1952,22 @@ void lustre_assert_wire_constants(void)
                  (long long)FMODE_READ);
         LASSERTF(FMODE_WRITE == 2, " found %lld\n",
                  (long long)FMODE_WRITE);
-        LASSERTF(FMODE_EXEC == 4, " found %lld\n",
-                 (long long)FMODE_EXEC);
-        LASSERTF(MDS_OPEN_CREAT == 64, " found %lld\n",
-                 (long long)MDS_OPEN_CREAT);
-        LASSERTF(MDS_OPEN_EXCL == 128, " found %lld\n",
-                 (long long)MDS_OPEN_EXCL);
-        LASSERTF(MDS_OPEN_TRUNC == 512, " found %lld\n",
-                 (long long)MDS_OPEN_TRUNC);
-        LASSERTF(MDS_OPEN_APPEND == 1024, " found %lld\n",
-                 (long long)MDS_OPEN_APPEND);
-        LASSERTF(MDS_OPEN_SYNC == 4096, " found %lld\n",
-                 (long long)MDS_OPEN_SYNC);
-        LASSERTF(MDS_OPEN_DIRECTORY == 65536, " found %lld\n",
-                 (long long)MDS_OPEN_DIRECTORY);
-        LASSERTF(MDS_OPEN_DELAY_CREATE == 16777216, " found %lld\n",
-                 (long long)MDS_OPEN_DELAY_CREATE);
+        LASSERTF(MDS_FMODE_EXEC == 4, " found %lld\n",
+                 (long long)MDS_FMODE_EXEC);
+        CLASSERT(MDS_OPEN_CREAT == 00000100);
+        CLASSERT(MDS_OPEN_EXCL == 00000200);
+        CLASSERT(MDS_OPEN_TRUNC == 00001000);
+        CLASSERT(MDS_OPEN_APPEND == 00002000);
+        CLASSERT(MDS_OPEN_SYNC == 00010000);
+        CLASSERT(MDS_OPEN_DIRECTORY == 00200000);
+        CLASSERT(MDS_OPEN_DELAY_CREATE == 0100000000);
         CLASSERT(MDS_OPEN_OWNEROVERRIDE == 0200000000);
         CLASSERT(MDS_OPEN_JOIN_FILE == 0400000000);
         CLASSERT(MDS_OPEN_HAS_EA == 010000000000);
         CLASSERT(MDS_OPEN_HAS_OBJS == 020000000000);
+        CLASSERT(MDS_INODELOCK_LOOKUP == 0x000001);
+        CLASSERT(MDS_INODELOCK_UPDATE == 0x000002);
+        CLASSERT(MDS_INODELOCK_OPEN == 0x000004);
 
         /* Checks for struct mds_rec_setattr */
         LASSERTF((int)sizeof(struct mds_rec_setattr) == 96, " found %lld\n",
@@ -2188,6 +2201,18 @@ void lustre_assert_wire_constants(void)
         LASSERTF((int)sizeof(((struct mds_rec_rename *)0)->rn_time) == 8, " found %lld\n",
                  (long long)(int)sizeof(((struct mds_rec_rename *)0)->rn_time));
 
+        /* Checks for struct mds_rec_join */
+        LASSERTF((int)sizeof(struct mds_rec_join) == 24, " found %lld\n",
+                 (long long)(int)sizeof(struct mds_rec_join));
+        LASSERTF((int)offsetof(struct mds_rec_join, jr_fid) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_rec_join, jr_fid));
+        LASSERTF((int)sizeof(((struct mds_rec_join *)0)->jr_fid) == 16, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_rec_join *)0)->jr_fid));
+        LASSERTF((int)offsetof(struct mds_rec_join, jr_headsize) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_rec_join, jr_headsize));
+        LASSERTF((int)sizeof(((struct mds_rec_join *)0)->jr_headsize) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_rec_join *)0)->jr_headsize));
+
         /* Checks for struct lov_desc */
         LASSERTF((int)sizeof(struct lov_desc) == 88, " found %lld\n",
                  (long long)(int)sizeof(struct lov_desc));
@@ -2284,6 +2309,14 @@ void lustre_assert_wire_constants(void)
         LASSERTF((int)sizeof(((struct ldlm_flock *)0)->pid) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct ldlm_flock *)0)->pid));
 
+        /* Checks for struct ldlm_inodebits */
+        LASSERTF((int)sizeof(struct ldlm_inodebits) == 8, " found %lld\n",
+                 (long long)(int)sizeof(struct ldlm_inodebits));
+        LASSERTF((int)offsetof(struct ldlm_inodebits, bits) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct ldlm_inodebits, bits));
+        LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->bits) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct ldlm_inodebits *)0)->bits));
+
         /* Checks for struct ldlm_intent */
         LASSERTF((int)sizeof(struct ldlm_intent) == 8, " found %lld\n",
                  (long long)(int)sizeof(struct ldlm_intent));
@@ -2419,24 +2452,16 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct llog_logid, lgl_ogen));
         LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_ogen) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct llog_logid *)0)->lgl_ogen));
-        LASSERTF(OST_SZ_REC == 274730752, " found %lld\n",
-                 (long long)OST_SZ_REC);
-        LASSERTF(OST_RAID1_REC == 274731008, " found %lld\n",
-                 (long long)OST_RAID1_REC);
-        LASSERTF(MDS_UNLINK_REC == 274801668, " found %lld\n",
-                 (long long)MDS_UNLINK_REC);
-        LASSERTF(MDS_SETATTR_REC == 274801665, " found %lld\n",
-                 (long long)MDS_SETATTR_REC);
-        LASSERTF(OBD_CFG_REC == 274857984, " found %lld\n",
-                 (long long)OBD_CFG_REC);
-        LASSERTF(PTL_CFG_REC == 274923520, " found %lld\n",
-                 (long long)PTL_CFG_REC);
-        LASSERTF(LLOG_GEN_REC == 274989056, " found %lld\n",
-                 (long long)LLOG_GEN_REC);
-        LASSERTF(LLOG_HDR_MAGIC == 275010873, " found %lld\n",
-                 (long long)LLOG_HDR_MAGIC);
-        LASSERTF(LLOG_LOGID_MAGIC == 275010875, " found %lld\n",
-                 (long long)LLOG_LOGID_MAGIC);
+        CLASSERT(OST_SZ_REC == 274730752);
+        CLASSERT(OST_RAID1_REC == 274731008);
+        CLASSERT(MDS_UNLINK_REC == 274801668);
+        CLASSERT(MDS_SETATTR_REC == 274801665);
+        CLASSERT(OBD_CFG_REC == 274857984);
+        CLASSERT(PTL_CFG_REC == 274923520);
+        CLASSERT(LLOG_GEN_REC == 274989056);
+        CLASSERT(LLOG_JOIN_REC == 275054592);
+        CLASSERT(LLOG_HDR_MAGIC == 275010873);
+        CLASSERT(LLOG_LOGID_MAGIC == 275010875);
 
         /* Checks for struct llog_catid */
         LASSERTF((int)sizeof(struct llog_catid) == 32, " found %lld\n",
@@ -2781,20 +2806,15 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct llogd_body, lgd_cur_offset));
         LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_cur_offset) == 8, " found %lld\n",
                  (long long)(int)sizeof(((struct llogd_body *)0)->lgd_cur_offset));
-        LASSERTF(LLOG_ORIGIN_HANDLE_CREATE == 501, " found %lld\n",
-                 (long long)LLOG_ORIGIN_HANDLE_CREATE);
-        LASSERTF(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502, " found %lld\n",
-                 (long long)LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
-        LASSERTF(LLOG_ORIGIN_HANDLE_READ_HEADER == 503, " found %lld\n",
-                 (long long)LLOG_ORIGIN_HANDLE_READ_HEADER);
-        LASSERTF(LLOG_ORIGIN_HANDLE_WRITE_REC == 504, " found %lld\n",
-                 (long long)LLOG_ORIGIN_HANDLE_WRITE_REC);
-        LASSERTF(LLOG_ORIGIN_HANDLE_CLOSE == 505, " found %lld\n",
-                 (long long)LLOG_ORIGIN_HANDLE_CLOSE);
-        LASSERTF(LLOG_ORIGIN_CONNECT == 506, " found %lld\n",
-                 (long long)LLOG_ORIGIN_CONNECT);
-        LASSERTF(LLOG_CATINFO == 507, " found %lld\n",
-                 (long long)LLOG_CATINFO);
+        CLASSERT(LLOG_ORIGIN_HANDLE_CREATE == 501);
+        CLASSERT(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502);
+        CLASSERT(LLOG_ORIGIN_HANDLE_READ_HEADER == 503);
+        CLASSERT(LLOG_ORIGIN_HANDLE_WRITE_REC == 504);
+        CLASSERT(LLOG_ORIGIN_HANDLE_CLOSE == 505);
+        CLASSERT(LLOG_ORIGIN_CONNECT == 506);
+        CLASSERT(LLOG_CATINFO == 507);
+        CLASSERT(LLOG_ORIGIN_HANDLE_PREV_BLOCK == 508);
+        CLASSERT(LLOG_ORIGIN_HANDLE_DESTROY == 509);
 
         /* Checks for struct llogd_conn_body */
         LASSERTF((int)sizeof(struct llogd_conn_body) == 40, " found %lld\n",
@@ -2812,6 +2832,38 @@ void lustre_assert_wire_constants(void)
         LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx));
 
+        /* Checks for struct llog_array_rec */
+        LASSERTF((int)sizeof(struct llog_array_rec) == 72, " found %lld\n",
+                 (long long)(int)sizeof(struct llog_array_rec));
+        LASSERTF((int)offsetof(struct llog_array_rec, lmr_hdr) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct llog_array_rec, lmr_hdr));
+        LASSERTF((int)sizeof(((struct llog_array_rec *)0)->lmr_hdr) == 16, " found %lld\n",
+                 (long long)(int)sizeof(((struct llog_array_rec *)0)->lmr_hdr));
+        LASSERTF((int)offsetof(struct llog_array_rec, lmr_med) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct llog_array_rec, lmr_med));
+        LASSERTF((int)sizeof(((struct llog_array_rec *)0)->lmr_med) == 48, " found %lld\n",
+                 (long long)(int)sizeof(((struct llog_array_rec *)0)->lmr_med));
+        LASSERTF((int)offsetof(struct llog_array_rec, lmr_tail) == 64, " found %lld\n",
+                 (long long)(int)offsetof(struct llog_array_rec, lmr_tail));
+        LASSERTF((int)sizeof(((struct llog_array_rec *)0)->lmr_tail) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct llog_array_rec *)0)->lmr_tail));
+
+        /* Checks for struct mds_extent_desc */
+        LASSERTF((int)sizeof(struct mds_extent_desc) == 48, " found %lld\n",
+                 (long long)(int)sizeof(struct mds_extent_desc));
+        LASSERTF((int)offsetof(struct mds_extent_desc, med_start) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_extent_desc, med_start));
+        LASSERTF((int)sizeof(((struct mds_extent_desc *)0)->med_start) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_start));
+        LASSERTF((int)offsetof(struct mds_extent_desc, med_len) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_extent_desc, med_len));
+        LASSERTF((int)sizeof(((struct mds_extent_desc *)0)->med_len) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_len));
+        LASSERTF((int)offsetof(struct mds_extent_desc, med_lmm) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_extent_desc, med_lmm));
+        LASSERTF((int)sizeof(((struct mds_extent_desc *)0)->med_lmm) == 32, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_lmm));
+
         /* Checks for struct qunit_data */
         LASSERTF((int)sizeof(struct qunit_data) == 16, " found %lld\n",
                  (long long)(int)sizeof(struct qunit_data));
index 7807fef..159ef9b 100644 (file)
@@ -51,14 +51,14 @@ int ptlrpc_ping(struct obd_import *imp)
         if (req) {
                 DEBUG_REQ(D_INFO, req, "pinging %s->%s",
                           imp->imp_obd->obd_uuid.uuid,
-                          imp->imp_target_uuid.uuid);
+                          obd2cli_tgt(imp->imp_obd));
                 req->rq_no_resend = req->rq_no_delay = 1;
                 req->rq_replen = lustre_msg_size(0, NULL);
                 ptlrpcd_add_req(req);
         } else {
                 CERROR("OOM trying to ping %s->%s\n",
                        imp->imp_obd->obd_uuid.uuid,
-                       imp->imp_target_uuid.uuid);
+                       obd2cli_tgt(imp->imp_obd));
                 rc = -ENOMEM;
         }
 
@@ -82,22 +82,9 @@ static int ptlrpc_pinger_main(void *arg)
 {
         struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
         struct ptlrpc_thread *thread = data->thread;
-        unsigned long flags;
         ENTRY;
 
-        lock_kernel();
-        ptlrpc_daemonize();
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-
-        LASSERTF(strlen(data->name) < sizeof(current->comm),
-                 "name %d > len %d\n",
-                 (int)strlen(data->name), (int)sizeof(current->comm));
-        THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name);
-        unlock_kernel();
+        ptlrpc_daemonize(data->name);
 
         /* Record that the thread is running */
         thread->t_flags = SVC_RUNNING;
@@ -146,7 +133,7 @@ static int ptlrpc_pinger_main(void *arg)
                                         CDEBUG(D_HA, "not pinging %s "
                                                "(in recovery: %s or recovery "
                                                "disabled: %u/%u)\n",
-                                               imp->imp_target_uuid.uuid,
+                                               obd2cli_tgt(imp->imp_obd),
                                                ptlrpc_import_state_name(level),
                                                imp->imp_deactive,
                                                imp->imp_obd->obd_no_recov);
@@ -158,7 +145,7 @@ static int ptlrpc_pinger_main(void *arg)
                                         continue;
                                 CDEBUG(D_INFO,
                                        "don't need to ping %s (%lu > %lu)\n",
-                                       imp->imp_target_uuid.uuid,
+                                       obd2cli_tgt(imp->imp_obd),
                                        imp->imp_next_ping, this_ping);
                         }
 
@@ -171,7 +158,7 @@ static int ptlrpc_pinger_main(void *arg)
 
                 /* Wait until the next ping time, or until we're stopped. */
                 time_to_next_ping = this_ping + (PING_INTERVAL * HZ) - jiffies;
-                
+
                 /* The ping sent by ptlrpc_send_rpc may get sent out
                    say .01 second after this.
                    ptlrpc_pinger_eending_on_import will then set the
@@ -279,7 +266,7 @@ int ptlrpc_pinger_add_import(struct obd_import *imp)
 
         down(&pinger_sem);
         CDEBUG(D_HA, "adding pingable import %s->%s\n",
-               imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
+               imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
         ptlrpc_update_next_ping(imp);
         /* XXX sort, blah blah */
         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
@@ -300,7 +287,7 @@ int ptlrpc_pinger_del_import(struct obd_import *imp)
         down(&pinger_sem);
         list_del_init(&imp->imp_pinger_chain);
         CDEBUG(D_HA, "removing pingable import %s->%s\n",
-               imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
+               imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
         class_import_put(imp);
         up(&pinger_sem);
         RETURN(0);
@@ -314,6 +301,137 @@ void ptlrpc_pinger_wake_up()
 #endif
 }
 
+/* Ping evictor thread */
+#define PET_READY     1
+#define PET_TERMINATE 2
+
+static int               pet_refcount = 0;
+static int               pet_state;
+static wait_queue_head_t pet_waitq;
+static struct obd_export *pet_exp = NULL;
+static spinlock_t        pet_lock = SPIN_LOCK_UNLOCKED;
+
+int ping_evictor_wake(struct obd_export *exp)
+{
+        spin_lock(&pet_lock);
+        if (pet_exp) {
+                /* eventually the new obd will call here again. */
+                spin_unlock(&pet_lock);
+                return 1;
+        }
+
+        /* We have to make sure the obd isn't destroyed between now and when
+         * the ping evictor runs.  We'll take a reference here, and drop it
+         * when we finish in the evictor.  We don't really care about this
+         * export in particular; we just need one to keep the obd alive. */
+        pet_exp = class_export_get(exp);
+        spin_unlock(&pet_lock);
+
+        wake_up(&pet_waitq);
+        return 0;
+}
+
+static int ping_evictor_main(void *arg)
+{
+        struct obd_device *obd;
+        struct obd_export *exp;
+        struct l_wait_info lwi = { 0 };
+        time_t expire_time;
+        ENTRY;
+
+        ptlrpc_daemonize("ll_evictor");
+
+        CDEBUG(D_HA, "Starting Ping Evictor\n");
+        pet_exp = NULL;
+        pet_state = PET_READY;
+        while (1) {
+                l_wait_event(pet_waitq, pet_exp ||
+                             (pet_state == PET_TERMINATE), &lwi);
+                if (pet_state == PET_TERMINATE)
+                        break;
+
+                /* we only get here if pet_exp != NULL, and the end of this
+                 * loop is the only place which sets it NULL again, so lock
+                 * is not strictly necessary. */
+                spin_lock(&pet_lock);
+                obd = pet_exp->exp_obd;
+                spin_unlock(&pet_lock);
+
+                expire_time = CURRENT_SECONDS - (3 * obd_timeout / 2);
+
+                CDEBUG(D_HA, "evicting all exports of obd %s older than %ld\n",
+                       obd->obd_name, expire_time);
+
+                /* Exports can't be deleted out of the list while we hold
+                 * the obd lock (class_unlink_export), which means we can't
+                 * lose the last ref on the export.  If they've already been
+                 * removed from the list, we won't find them here. */
+                spin_lock(&obd->obd_dev_lock);
+                while (!list_empty(&obd->obd_exports_timed)) {
+                        exp = list_entry(obd->obd_exports_timed.next,
+                                         struct obd_export,exp_obd_chain_timed);
+
+                        if (expire_time > exp->exp_last_request_time) {
+                                class_export_get(exp);
+                                spin_unlock(&obd->obd_dev_lock);
+                                LCONSOLE_WARN("%s: haven't heard from %s in %ld"
+                                              " seconds. Last request was at %ld. "
+                                              "I think it's dead, and I am evicting "
+                                              "it.\n", obd->obd_name,
+                                              obd_export_nid2str(exp),
+                                              (long)(CURRENT_SECONDS -
+                                                     exp->exp_last_request_time),
+                                              exp->exp_last_request_time);
+
+
+                                class_fail_export(exp);
+                                class_export_put(exp);
+
+                                spin_lock(&obd->obd_dev_lock);
+                        } else {
+                                /* List is sorted, so everyone below is ok */
+                                break;
+                        }
+                }
+                spin_unlock(&obd->obd_dev_lock);
+
+                class_export_put(pet_exp);
+
+                spin_lock(&pet_lock);
+                pet_exp = NULL;
+                spin_unlock(&pet_lock);
+        }
+        CDEBUG(D_HA, "Exiting Ping Evictor\n");
+
+        RETURN(0);
+}
+
+void ping_evictor_start(void)
+{
+        int rc;
+
+        if (++pet_refcount > 1)
+                return;
+
+        init_waitqueue_head(&pet_waitq);
+
+        rc = kernel_thread(ping_evictor_main, NULL, CLONE_VM | CLONE_FS);
+        if (rc < 0) {
+                pet_refcount--;
+                CERROR("Cannot start ping evictor thread: %d\n", rc);
+        }
+}
+EXPORT_SYMBOL(ping_evictor_start);
+
+void ping_evictor_stop(void)
+{
+        if (--pet_refcount > 0)
+                return;
+
+        pet_state = PET_TERMINATE;
+        wake_up(&pet_waitq);
+}
+EXPORT_SYMBOL(ping_evictor_stop);
 #else /* !__KERNEL__ */
 
 /* XXX
@@ -382,7 +500,7 @@ static int pinger_check_rpcs(void *arg)
                         if (level != LUSTRE_IMP_FULL) {
                                 CDEBUG(D_HA,
                                        "not pinging %s (in recovery)\n",
-                                       imp->imp_target_uuid.uuid);
+                                       obd2cli_tgt(imp->imp_obd));
                                 continue;
                         }
 
@@ -400,7 +518,7 @@ static int pinger_check_rpcs(void *arg)
                         ptlrpc_set_add_req(set, req);
                 } else {
                         CDEBUG(D_HA, "don't need to ping %s (%lu > "
-                               "%lu)\n", imp->imp_target_uuid.uuid,
+                               "%lu)\n", obd2cli_tgt(imp->imp_obd),
                                imp->imp_next_ping, pd->pd_this_ping);
                 }
         }
@@ -417,7 +535,7 @@ static int pinger_check_rpcs(void *arg)
                                    rq_set_chain);
                 DEBUG_REQ(D_HA, req, "pinging %s->%s",
                           req->rq_import->imp_obd->obd_uuid.uuid,
-                          req->rq_import->imp_target_uuid.uuid);
+                          obd2cli_tgt(req->rq_import->imp_obd));
                 (void)ptl_send_rpc(req, 0);
         }
 
@@ -425,7 +543,7 @@ do_check_set:
         rc = ptlrpc_check_set(set);
 
         /* not finished, and we are not expired, simply return */
-        if (!rc && time_before(curtime, pd->pd_this_ping + PING_INTERVAL * HZ)) {
+        if (!rc && time_before(curtime, pd->pd_this_ping + PING_INTERVAL * HZ)){
                 CDEBUG(D_HA, "not finished, but also not expired\n");
                 pd->pd_recursion--;
                 return 0;
@@ -509,7 +627,7 @@ int ptlrpc_pinger_add_import(struct obd_import *imp)
                 RETURN(-EALREADY);
 
         CDEBUG(D_HA, "adding pingable import %s->%s\n",
-               imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
+               imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
         ptlrpc_pinger_sending_on_import(imp);
 
         down(&pinger_sem);
@@ -529,7 +647,7 @@ int ptlrpc_pinger_del_import(struct obd_import *imp)
         down(&pinger_sem);
         list_del_init(&imp->imp_pinger_chain);
         CDEBUG(D_HA, "removing pingable import %s->%s\n",
-               imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
+               imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
         class_import_put(imp);
         up(&pinger_sem);
         RETURN(0);
index be8c52c..d4eb69b 100644 (file)
@@ -38,7 +38,7 @@ struct ptlrpc_request_set;
 void ptlrpc_request_handle_notconn(struct ptlrpc_request *);
 void lustre_assert_wire_constants(void);
 int ptlrpc_import_in_recovery(struct obd_import *imp);
-int ptlrpc_set_import_discon(struct obd_import *imp);
+int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt);
 void ptlrpc_handle_failed_import(struct obd_import *imp);
 int ptlrpc_replay_next(struct obd_import *imp, int *inflight);
 void ptlrpc_initiate_recovery(struct obd_import *imp);
@@ -56,7 +56,7 @@ void ptlrpc_lprocfs_do_request_stat (struct ptlrpc_request *req,
 #define ptlrpc_lprocfs_unregister_service(params...) do{}while(0)
 #define ptlrpc_lprocfs_rpc_sent(params...) do{}while(0)
 #define ptlrpc_lprocfs_do_request_stat(params...) do{}while(0)
-#endif /* __KERNEL__ */
+#endif /* LPROCFS */
 
 /* recovd_thread.c */
 int llog_init_commit_master(void);
@@ -114,5 +114,10 @@ int ptlrpc_stop_pinger(void);
 void ptlrpc_pinger_sending_on_import(struct obd_import *imp);
 void ptlrpc_pinger_wake_up(void);
 void ptlrpc_ping_import_soon(struct obd_import *imp);
+#ifdef __KERNEL__
+int ping_evictor_wake(struct obd_export *exp);
+#else
+#define ping_evictor_wake(exp)     1
+#endif
 
 #endif /* PTLRPC_INTERNAL_H */
index f9beb17..1fddaaf 100644 (file)
@@ -63,7 +63,6 @@ __init int ptlrpc_init(void)
         cleanup_phase = 2;
 
         ptlrpc_put_connection_superhack = ptlrpc_put_connection;
-        ptlrpc_abort_inflight_superhack = ptlrpc_abort_inflight;
 
         rc = ptlrpc_start_pinger();
         if (rc)
index 6b0062b..2f28528 100644 (file)
@@ -94,10 +94,9 @@ void ptlrpcd_add_req(struct ptlrpc_request *req)
         else
                 pc = &ptlrpcd_recovery_pc;
 
-        ptlrpc_set_add_new_req(pc->pc_set, req);
         req->rq_ptlrpcd_data = pc;
-
-        ptlrpcd_wake(req);
+        ptlrpc_set_add_new_req(pc->pc_set, req);
+        wake_up(&pc->pc_waitq);
 }
 
 static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
index df7f97b..c9684fe 100644 (file)
@@ -221,27 +221,20 @@ static int log_commit_thread(void *arg)
         struct llog_commit_master *lcm = arg;
         struct llog_commit_daemon *lcd;
         struct llog_canceld_ctxt *llcd, *n;
-        unsigned long flags;
+        char name[24];
         ENTRY;
 
         OBD_ALLOC(lcd, sizeof(*lcd));
         if (lcd == NULL)
                 RETURN(-ENOMEM);
 
-        lock_kernel();
-        ptlrpc_daemonize(); /* thread never needs to do IO */
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-
         spin_lock(&lcm->lcm_thread_lock);
-        THREAD_NAME(current->comm, sizeof(current->comm) - 1,
+        THREAD_NAME(name, sizeof(name) - 1,
                     "ll_log_comt_%02d", atomic_read(&lcm->lcm_thread_total));
         atomic_inc(&lcm->lcm_thread_total);
         spin_unlock(&lcm->lcm_thread_lock);
-        unlock_kernel();
+
+        ptlrpc_daemonize(name); /* thread never needs to do IO */
 
         INIT_LIST_HEAD(&lcd->lcd_lcm_list);
         INIT_LIST_HEAD(&lcd->lcd_llcd_list);
@@ -348,7 +341,8 @@ static int log_commit_thread(void *arg)
                         }
                         up(&llcd->llcd_ctxt->loc_sem);
 
-                        if (!import || (import == LP_POISON)) {
+                        if (!import || (import == LP_POISON) ||
+                            (import->imp_client == LP_POISON)) {
                                 CERROR("No import %p (llcd=%p, ctxt=%p)\n",
                                        import, llcd, llcd->llcd_ctxt);
                                 llcd_put(llcd);
@@ -501,20 +495,11 @@ static int log_process_thread(void *args)
         void   *cb = data->llpa_cb;
         struct llog_logid logid = *(struct llog_logid *)(data->llpa_arg);
         struct llog_handle *llh = NULL;
-        unsigned long flags;
         int rc;
         ENTRY;
 
         up(&data->llpa_sem);
-        lock_kernel();
-        ptlrpc_daemonize();     /* thread does IO to log files */
-        THREAD_NAME(current->comm, sizeof(current->comm) - 1, "llog_process");
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-        unlock_kernel();
+        ptlrpc_daemonize("llog_process");     /* thread does IO to log files */
 
         rc = llog_create(ctxt, &llh, &logid, NULL);
         if (rc) {
index 8513641..30f6aa7 100644 (file)
@@ -97,7 +97,7 @@ void ptlrpc_run_failed_import_upcall(struct obd_import* imp)
 
         argv[0] = obd_lustre_upcall;
         argv[1] = "FAILED_IMPORT";
-        argv[2] = imp->imp_target_uuid.uuid;
+        argv[2] = obd2cli_tgt(imp->imp_obd);
         argv[3] = imp->imp_obd->obd_name;
         argv[4] = imp->imp_connection->c_remote_uuid.uuid;
         argv[5] = imp->imp_obd->obd_uuid.uuid;
@@ -135,14 +135,14 @@ void ptlrpc_initiate_recovery(struct obd_import *imp)
 
         if (strcmp(obd_lustre_upcall, "DEFAULT") == 0) {
                 CDEBUG(D_HA, "%s: starting recovery without upcall\n",
-                        imp->imp_target_uuid.uuid);
+                        obd2cli_tgt(imp->imp_obd));
                 ptlrpc_connect_import(imp, NULL);
         } else if (strcmp(obd_lustre_upcall, "NONE") == 0) {
                 CDEBUG(D_HA, "%s: recovery disabled\n",
-                        imp->imp_target_uuid.uuid);
+                        obd2cli_tgt(imp->imp_obd));
         } else {
                 CDEBUG(D_HA, "%s: calling upcall to start recovery\n",
-                        imp->imp_target_uuid.uuid);
+                        obd2cli_tgt(imp->imp_obd));
                 ptlrpc_run_failed_import_upcall(imp);
         }
 
@@ -169,8 +169,8 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
         CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
-               imp, imp->imp_target_uuid.uuid, imp->imp_peer_committed_transno,
-               last_transno);
+               imp, obd2cli_tgt(imp->imp_obd),
+               imp->imp_peer_committed_transno, last_transno);
 
         /* Do I need to hold a lock across this iteration?  We shouldn't be
          * racing with any additions to the list, because we're in recovery
@@ -276,15 +276,14 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
         ENTRY;
 
         CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
-               imp->imp_obd->obd_name,
-               imp->imp_target_uuid.uuid,
+               imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
                imp->imp_connection->c_remote_uuid.uuid);
 
-        if (ptlrpc_set_import_discon(imp)) {
+        if (ptlrpc_set_import_discon(imp, failed_req->rq_reqmsg->conn_cnt)) {
                 if (!imp->imp_replayable) {
                         CDEBUG(D_HA, "import %s@%s for %s not replayable, "
                                "auto-deactivating\n",
-                               imp->imp_target_uuid.uuid,
+                               obd2cli_tgt(imp->imp_obd),
                                imp->imp_connection->c_remote_uuid.uuid,
                                imp->imp_obd->obd_name);
                         ptlrpc_deactivate_import(imp);
@@ -321,7 +320,7 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
          * requests. */
         if (!active) {
                 CWARN("setting import %s INACTIVE by administrator request\n",
-                      imp->imp_target_uuid.uuid);
+                      obd2cli_tgt(imp->imp_obd));
                 ptlrpc_invalidate_import(imp);
                 imp->imp_deactive = 1;
         }
@@ -330,7 +329,7 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
         if (active) {
                 imp->imp_deactive = 0;
                 CDEBUG(D_HA, "setting import %s VALID\n",
-                       imp->imp_target_uuid.uuid);
+                       obd2cli_tgt(imp->imp_obd));
                 rc = ptlrpc_recover_import(imp, NULL);
         }
 
@@ -344,7 +343,7 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
         ENTRY;
 
         /* force import to be disconnected. */
-        ptlrpc_set_import_discon(imp);
+        ptlrpc_set_import_discon(imp, 0);
 
         imp->imp_deactive = 0;
         rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
@@ -389,13 +388,13 @@ static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
                 RETURN(rc);
 
         CDEBUG(D_HA, "%s: recovery started, waiting\n",
-               imp->imp_target_uuid.uuid);
+               obd2cli_tgt(imp->imp_obd));
 
         lwi = LWI_TIMEOUT(MAX(obd_timeout * HZ, 1), NULL, NULL);
         rc = l_wait_event(imp->imp_recovery_waitq,
                           !ptlrpc_import_in_recovery(imp), &lwi);
         CDEBUG(D_HA, "%s: recovery finished\n",
-               imp->imp_target_uuid.uuid);
+               obd2cli_tgt(imp->imp_obd));
 
         RETURN(rc);
 }
index 6c1aea3..05d7684 100644 (file)
@@ -430,6 +430,85 @@ ptlrpc_server_free_request(struct ptlrpc_request *req)
 
 }
 
+/* This function makes sure dead exports are evicted in a timely manner.
+   This function is only called when some export receives a message (i.e.,
+   the network is up.) */
+static void ptlrpc_update_export_timer(struct obd_export *exp, long extra_delay)
+{
+        struct obd_export *oldest_exp;
+        time_t oldest_time;
+
+        ENTRY;
+
+        LASSERT(exp);
+
+        /* Compensate for slow machines, etc, by faking our request time
+           into the future.  Although this can break the strict time-ordering
+           of the list, we can be really lazy here - we don't have to evict
+           at the exact right moment.  Eventually, all silent exports
+           will make it to the top of the list. */
+        exp->exp_last_request_time = max(exp->exp_last_request_time,
+                                         (time_t)CURRENT_SECONDS + extra_delay);
+
+        CDEBUG(D_INFO, "updating export %s at %ld\n",
+               exp->exp_client_uuid.uuid,
+               exp->exp_last_request_time);
+
+        /* exports may get disconnected from the chain even though the
+           export has references, so we must keep the spin lock while
+           manipulating the lists */
+        spin_lock(&exp->exp_obd->obd_dev_lock);
+
+        if (list_empty(&exp->exp_obd_chain_timed)) {
+                /* this one is not timed */
+                spin_unlock(&exp->exp_obd->obd_dev_lock);
+                EXIT;
+                return;
+        }
+
+        list_move_tail(&exp->exp_obd_chain_timed,
+                       &exp->exp_obd->obd_exports_timed);
+
+        oldest_exp = list_entry(exp->exp_obd->obd_exports_timed.next,
+                                struct obd_export, exp_obd_chain_timed);
+        oldest_time = oldest_exp->exp_last_request_time;
+        spin_unlock(&exp->exp_obd->obd_dev_lock);
+
+        if (exp->exp_obd->obd_recovering) {
+                /* be nice to everyone during recovery */
+                EXIT;
+                return;
+        }
+
+        /* Note - racing to start/reset the obd_eviction timer is safe */
+        if (exp->exp_obd->obd_eviction_timer == 0) {
+                /* Check if the oldest entry is expired. */
+                if (CURRENT_SECONDS > (oldest_time +
+                                       (3 * obd_timeout / 2) + extra_delay)) {
+                        /* We need a second timer, in case the net was down and
+                         * it just came back. Since the pinger may skip every
+                         * other PING_INTERVAL (see note in ptlrpc_pinger_main),
+                         * we better wait for 3. */
+                        exp->exp_obd->obd_eviction_timer = CURRENT_SECONDS +
+                                3 * PING_INTERVAL;
+                        CDEBUG(D_HA, "%s: Think about evicting %s from %ld\n",
+                               exp->exp_obd->obd_name, obd_export_nid2str(exp),
+                               oldest_time);
+                }
+        } else {
+                if (CURRENT_SECONDS > (exp->exp_obd->obd_eviction_timer +
+                                       extra_delay)) {
+                        /* The evictor won't evict anyone who we've heard from
+                         * recently, so we don't have to check before we start
+                         * it. */
+                        if (!ping_evictor_wake(exp))
+                                exp->exp_obd->obd_eviction_timer = 0;
+                }
+        }
+
+        EXIT;
+}
+
 static int
 ptlrpc_server_handle_request(struct ptlrpc_service *svc,
                              struct ptlrpc_thread *thread)
@@ -519,8 +598,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc,
                         goto put_conn;
                 }
 
-                class_update_export_timer(request->rq_export,
-                                          (time_t)(timediff / 500000));
+                ptlrpc_update_export_timer(request->rq_export, timediff/500000);
         }
 
         /* Discard requests queued for longer than my timeout.  If the
@@ -742,13 +820,14 @@ liblustre_check_services (void *arg)
 #else /* __KERNEL__ */
 
 /* Don't use daemonize, it removes fs struct from new thread (bug 418) */
-void ptlrpc_daemonize(void)
+void ptlrpc_daemonize(char *name)
 {
-        exit_mm(current);
-        lustre_daemonize_helper();
-        set_fs_pwd(current->fs, init_task.fs->pwdmnt, init_task.fs->pwd);
-        exit_files(current);
-        reparent_to_init();
+        struct fs_struct *fs = current->fs;
+
+        atomic_inc(&fs->count);
+        libcfs_daemonize(name);
+        exit_fs(current);
+        current->fs = fs;
 }
 
 static void
@@ -870,19 +949,7 @@ static int ptlrpc_main(void *arg)
         int rc = 0;
         ENTRY;
 
-        lock_kernel();
-        ptlrpc_daemonize();
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-
-        LASSERTF(strlen(data->name) < sizeof(current->comm),
-                 "name %d > len %d\n",
-                 (int)strlen(data->name), (int)sizeof(current->comm));
-        THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name);
-        unlock_kernel();
+        ptlrpc_daemonize(data->name);
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) && CONFIG_NUMA
         /* we need to do this before any per-thread allocation is done so that
index 7c5d749..9fd7910 100644 (file)
@@ -70,7 +70,6 @@ static int target_quotacheck_callback(struct obd_export *exp,
 
 static int target_quotacheck_thread(void *data)
 {
-        unsigned long flags;
         struct quotacheck_thread_args *qta = data;
         struct obd_export *exp;
         struct obd_device *obd;
@@ -78,17 +77,7 @@ static int target_quotacheck_thread(void *data)
         struct lvfs_run_ctxt saved;
         int rc;
 
-        lock_kernel();
-        ptlrpc_daemonize();
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-
-        THREAD_NAME(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX, "%s",
-                    "quotacheck");
-        unlock_kernel();
+        ptlrpc_daemonize("quotacheck");
 
         exp = qta->qta_exp;
         obd = exp->exp_obd;
@@ -211,7 +200,7 @@ int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk)
         if (rc == CL_NOT_QUOTACHECKED)
                 rc = -EINTR;
 
-        qchk->obd_uuid = cli->cl_import->imp_target_uuid;
+        qchk->obd_uuid = cli->cl_target_uuid;
         if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME,
             strlen(LUSTRE_OSC_NAME)))
                 memcpy(qchk->obd_type, LUSTRE_FILTER_NAME,
index 2c93975..5efb2a4 100644 (file)
@@ -685,20 +685,11 @@ static int qslave_recovery_main(void *arg)
         struct qslave_recov_thread_data *data = arg;
         struct obd_device *obd = data->obd;
         struct lustre_quota_ctxt *qctxt = data->qctxt;
-        unsigned long flags;
         unsigned int type; 
         int rc = 0;
         ENTRY;
 
-        lock_kernel();
-        ptlrpc_daemonize();
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-        THREAD_NAME(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX - 1, "%s", "qslave_recovd");
-        unlock_kernel();
+        ptlrpc_daemonize("qslave_recovd");
 
         complete(&data->comp);
 
index 4db3a71..9eb3cf3 100644 (file)
@@ -396,10 +396,10 @@ int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl)
 
                 /* lookup quota file */
                 rc = 0;
-                down(&iparent->i_sem);
+                LOCK_INODE_MUTEX(iparent);
                 de = lookup_one_len(quotafiles[i], dparent,
                                     strlen(quotafiles[i]));
-                up(&iparent->i_sem);
+                UNLOCK_INODE_MUTEX(iparent);
                 if (IS_ERR(de) || de->d_inode == NULL || 
                     !S_ISREG(de->d_inode->i_mode))
                         rc = IS_ERR(de) ? PTR_ERR(de) : -ENOENT;
@@ -1018,21 +1018,11 @@ static int qmaster_recovery_main(void *arg)
 {
         struct qmaster_recov_thread_data *data = arg;
         struct obd_device *obd = data->obd;
-        unsigned long flags;
         int rc = 0;
         unsigned short type;
         ENTRY;
 
-        lock_kernel();
-        ptlrpc_daemonize();
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-        THREAD_NAME(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX - 1, "%s", 
-                    "qmaster_recovd");
-        unlock_kernel();
+        ptlrpc_daemonize("qmaster_recovd");
 
         complete(&data->comp);
 
index 8da729a..a4bacc2 100755 (executable)
@@ -5,7 +5,7 @@ set -vxe
 
 PATH=`dirname $0`/../utils:$PATH
 
-[ "$CONFIGS" ] || CONFIGS="local lov"
+[ "$CONFIGS" ] || CONFIGS="local"  #"local lov"
 [ "$MAX_THREADS" ] || MAX_THREADS=10
 if [ -z "$THREADS" ]; then
        KB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo`
@@ -19,19 +19,29 @@ fi
 [ "$MOUNT2" ] || MOUNT2=${MOUNT}2
 [ "$TMP" ] || TMP=/tmp
 [ "$COUNT" ] || COUNT=1000
-#[ "$DEBUG_LVL" ] || DEBUG_LVL=0x370200
 [ "$DEBUG_LVL" ] || DEBUG_LVL=0
 [ "$DEBUG_OFF" ] || DEBUG_OFF="sysctl -w lnet.debug=$DEBUG_LVL"
-[ "$DEBUG_ON" ] || DEBUG_ON="sysctl -w lnet.debug=0x33f0480"
+[ "$DEBUG_ON" ] || DEBUG_ON="sysctl -w lnet.debug=0x33f0484"
 
 LIBLUSTRE=${LIBLUSTRE:-../liblustre}
 LIBLUSTRETESTS=${LIBLUSTRETESTS:-$LIBLUSTRE/tests}
 
+LUSTRE=${LUSTRE:-`dirname $0`/..}
+. $LUSTRE/tests/test-framework.sh
+init_test_env $@
+. mountconf.sh
+
+SETUP=${SETUP:-mcsetup}
+FORMAT=${FORMAT:-mcformat}
+CLEANUP=${CLEANUP:-mcstopall}
+
 for NAME in $CONFIGS; do
        export NAME MOUNT START CLEAN
-       [ -e $NAME.sh ] && sh $NAME.sh
-       [ ! -e $NAME.xml ] && [ -z "$LDAPURL" ] && \
-               echo "no config '$NAME.xml'" 1>&2 && exit 1
+       . $LUSTRE/tests/cfg/$NAME.sh
+       
+       assert_env mds_HOST MDS_MKFS_OPTS MDSDEV
+       assert_env ost_HOST ost2_HOST OST_MKFS_OPTS OSTDEV
+       assert_env FSNAME
 
        if [ "$RUNTESTS" != "no" ]; then
                sh runtests
@@ -42,7 +52,7 @@ for NAME in $CONFIGS; do
        fi
 
        if [ "$DBENCH" != "no" ]; then
-               mount | grep $MOUNT || sh llmount.sh
+               mount_client $MOUNT
                SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
                DB_THREADS=`expr $SPACE / 50000`
                [ $THREADS -lt $DB_THREADS ] && DB_THREADS=$THREADS
@@ -50,44 +60,44 @@ for NAME in $CONFIGS; do
                $DEBUG_OFF
                sh rundbench 1
                $DEBUG_ON
-               sh llmountcleanup.sh
-               sh llmount.sh
+               $CLEANUP
+               $SETUP
                if [ $DB_THREADS -gt 1 ]; then
                        $DEBUG_OFF
                        sh rundbench $DB_THREADS
                        $DEBUG_ON
-                       sh llmountcleanup.sh
-                       sh llmount.sh
+                       $CLEANUP
+                       $SETUP
                fi
                rm -f /mnt/lustre/`hostname`/client.txt
        fi
 
        chown $UID $MOUNT && chmod 700 $MOUNT
        if [ "$BONNIE" != "no" ]; then
-               mount | grep $MOUNT || sh llmount.sh
+               mount_client $MOUNT
                $DEBUG_OFF
                bonnie++ -f -r 0 -s $(($SIZE / 1024)) -n 10 -u $UID -d $MOUNT
                $DEBUG_ON
-               sh llmountcleanup.sh
-               sh llmount.sh
+               $CLEANUP
+               $SETUP
        fi
 
        IOZONE_OPTS="-i 0 -i 1 -i 2 -e -+d -r $RSIZE -s $SIZE"
        IOZFILE="-f $MOUNT/iozone"
        if [ "$IOZONE" != "no" ]; then
-               mount | grep $MOUNT || sh llmount.sh
+               mount_client $MOUNT
                $DEBUG_OFF
                iozone $IOZONE_OPTS $IOZFILE
                $DEBUG_ON
-               sh llmountcleanup.sh
-               sh llmount.sh
+               $CLEANUP
+               $SETUP
 
                if [ "$O_DIRECT" != "no" -a "$IOZONE_DIR" != "no" ]; then
                        $DEBUG_OFF
                        iozone -I $IOZONE_OPTS $IOZFILE.odir
                        $DEBUG_ON
-                       sh llmountcleanup.sh
-                       sh llmount.sh
+                       $CLEANUP
+                       $SETUP
                fi
 
                SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
@@ -104,8 +114,8 @@ for NAME in $CONFIGS; do
                        done
                        iozone $IOZONE_OPTS -t $IOZ_THREADS $IOZFILE
                        $DEBUG_ON
-                       sh llmountcleanup.sh
-                       sh llmount.sh
+                       $CLEANUP
+                       $SETUP
                elif [ $IOZVER -lt 3145 ]; then
                        VER=`iozone -v | awk '/Revision:/ { print $3 }'`
                        echo "iozone $VER too old for multi-thread test"
@@ -113,13 +123,13 @@ for NAME in $CONFIGS; do
        fi
 
        if [ "$FSX" != "no" ]; then
-               mount | grep $MOUNT || sh llmount.sh
+               mount | grep $MOUNT || $SETUP
                $DEBUG_OFF
                ./fsx -c 50 -p 1000 -P $TMP -l $SIZE \
                        -N $(($COUNT * 100)) $MOUNT/fsxfile
                $DEBUG_ON
-               sh llmountcleanup.sh
-               sh llmount.sh
+               $CLEANUP
+               $SETUP
        fi      
 
        mkdir -p $MOUNT2
@@ -134,11 +144,11 @@ for NAME in $CONFIGS; do
        esac
 
        if [ "$SANITYN" != "no" ]; then
-               mount | grep $MOUNT || sh llmount.sh
+               mount_client $MOUNT
                $DEBUG_OFF
 
                if [ "$MDSNODE" -a "$MDSNAME" -a "$CLIENT" ]; then
-                       llmount $MDSNODE:/$MDSNAME/$CLIENT $MOUNT2
+                       mount_client $MOUNT2
                        SANITYLOG=$TMP/sanity.log START=: CLEAN=: sh sanityN.sh
                        umount $MOUNT2
                else
@@ -147,12 +157,12 @@ for NAME in $CONFIGS; do
                fi
 
                $DEBUG_ON
-               sh llmountcleanup.sh
-               sh llmount.sh
+               $CLEANUP
+               $SETUP
        fi
 
        if [ "$LIBLUSTRE" != "no" ]; then
-               mount | grep $MOUNT || sh llmount.sh
+               mount_client $MOUNT
                export LIBLUSTRE_MOUNT_POINT=$MOUNT2
                export LIBLUSTRE_MOUNT_TARGET=$MDSNODE:/$MDSNAME/$CLIENT
                export LIBLUSTRE_TIMEOUT=`cat /proc/sys/lustre/timeout`
@@ -160,11 +170,11 @@ for NAME in $CONFIGS; do
                if [ -x $LIBLUSTRETESTS/sanity ]; then
                        $LIBLUSTRETESTS/sanity --target=$LIBLUSTRE_MOUNT_TARGET
                fi
-               sh llmountcleanup.sh
-               #sh llmount.sh
+               $CLEANUP
+               #$SETUP
        fi
 
-       mount | grep $MOUNT && sh llmountcleanup.sh
+       $CLEANUP
 done
 
 if [ "$REPLAY_SINGLE" != "no" ]; then
index 2b185f9..e8d323e 100644 (file)
@@ -1,33 +1,59 @@
+FSNAME=lustre
 mds_HOST=${mds_HOST:-`hostname`}
+mgs_HOST=${mgs_HOST:-$mds_HOST}
 mdsfailover_HOST=${mdsfailover_HOST:-""}
 ost1_HOST=${ost1_HOST:-"`hostname`"}
 ost2_HOST=${ost2_HOST:-"`hostname`"}
 EXTRA_OSTS=${EXTRA_OSTS:-"`hostname`"}
-client_HOST=${client_HOST:-"'*'"}
 LIVE_CLIENT=${LIVE_CLIENT:-"`hostname`"}
 # This should always be a list, not a regexp
 FAIL_CLIENTS=${FAIL_CLIENTS:-""}
 
+MDSDEV=${MDSDEV:-$TMP/${FSNAME}-mdt}
+MDSSIZE=${MDSSIZE:-10000} #50000000
+OSTDEV=${OSTDEV:-"$TMP/${FSNAME}-ost%d"}
+OSTSIZE=${OSTSIZE:=10000} #50000000
+
 NETTYPE=${NETTYPE:-tcp}
+MGSNID=`h2$NETTYPE $mgs_HOST`
+FSTYPE=${FSTYPE:-ext3}
+STRIPE_BYTES=${STRIPE_BYTES:-1048576}
+STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
 TIMEOUT=${TIMEOUT:-30}
-PTLDEBUG=${PTLDEBUG:-0x3f0400}
+PTLDEBUG=${PTLDEBUG:-0x33f0404}
 SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff}
-MOUNT=${MOUNT:-"/mnt/lustre"}
-#CLIENT_UPCALL=${CLIENT_UPCALL:-`pwd`/client-upcall-mdev.sh}
-#UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh}
 
-MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
-MDSSIZE=${MDSSIZE:-10000} #50000000
-MDSJOURNALSIZE=${MDSJOURNALSIZE:-0}
+MKFSOPT=""
+MOUNTOPT=""
+[ "x$MDSJOURNALSIZE" != "x" ] &&
+    MKFSOPT=$MKFSOPT" -J size=$MDSJOURNALSIZE"
+[ "x$MDSISIZE" != "x" ] &&
+    MKFSOPT=$MKFSOPT" -i $MDSISIZE"
+[ "x$MKFSOPT" != "x" ] &&
+    MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
+[ "x$mdsfailover_HOST" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`"
+[ "x$STRIPE_BYTES" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --param default_stripe_size=$STRIPE_BYTES"
+[ "x$STRIPES_PER_OBJ" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --param default_stripe_count=$STRIPES_PER_OBJ"
+MDS_MKFS_OPTS="--mgs --mdt --device-size=$MDSSIZE $MKFSOPT $MOUNTOPT $MDSOPT"
 
-OSTDEV=${OSTDEV:-"$TMP/ost%d-`hostname`"}
-OSTSIZE=${OSTSIZE:=10000} #50000000
-OSTJOURNALSIZE=${OSTJOURNALSIZE:-0}
+MKFSOPT=""
+MOUNTOPT=""
+[ "x$OSTJOURNALSIZE" != "x" ] &&
+    MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE"
+[ "x$MKFSOPT" != "x" ] &&
+    MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
+[ "x$ostfailover_HOST" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`"
+OST_MKFS_OPTS="--ost --device-size=$OSTSIZE --mgsnode=$MGSNID $MKFSOPT $MOUNTOPT $OSTOPT"
 
-FSTYPE=${FSTYPE:-ext3}
-STRIPE_BYTES=${STRIPE_BYTES:-65536} #1048576
-STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
+MDS_MOUNT_OPTS="-o loop"
+OST_MOUNT_OPTS="-o loop"
+MOUNT=${MOUNT:-"/mnt/lustre"}
 
+PDSH=${PDSH:-no_dsh}
 FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD
 POWER_DOWN=${POWER_DOWN:-"powerman --off"}
 POWER_UP=${POWER_UP:-"powerman --on"}
index 47a7b0c..38ad798 100644 (file)
@@ -62,7 +62,6 @@ fi
 OSTJOURNALSIZE=${OSTJOURNALSIZE:-0}
 
 FSTYPE=${FSTYPE:-ext3}
-#STRIPE_BYTES=${STRIPE_BYTES:-65536} 
 STRIPE_BYTES=${STRIPE_BYTES:-1048576} 
 STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
 
index 56fa05b..3a921be 100644 (file)
@@ -6,25 +6,55 @@ CLIENT=${CLIENT:-client}
 FSNAME=lustre
 mds_HOST=${mds_HOST:-$MDSNODE}
 mdsfailover_HOST=${mdsfailover_HOST}
-mgs_HOST=${mgs_HOST:-$MDSNODE}
+mgs_HOST=${mgs_HOST:-$mds_HOST}
 ost_HOST=${ost_HOST:-$OSTNODE}
 ostfailover_HOST=${ostfailover_HOST}
 ost2_HOST=${ost2_HOST:-$ost_HOST}
-client_HOST=${client_HOST:-$CLIENT}
-NETTYPE=${NETTYPE:-tcp}
-MGSNID=`h2$NETTYPE $HOSTNAME`
 
-MDSDEV=${MDSDEV:-$ROOT/tmp/${FSNAME}-mdt}
+TMP=${TMP:-/tmp}
+MDSDEV=${MDSDEV:-$TMP/${FSNAME}-mdt}
 MDSSIZE=${MDSSIZE:-100000}
 MDSOPT=${MDSOPT:-"--mountfsoptions=acl"}
-OSTDEV=${OSTDEV:-$ROOT/tmp/${FSNAME}-ost0}
+OSTDEV=${OSTDEV:-$TMP/${FSNAME}-ost0}
 OSTSIZE=${OSTSIZE:-200000}
-OSTDEV2=${OSTDEV2:-$ROOT/tmp/${FSNAME}-ost1}
-FSTYPE=${FSTYPE:-ext3}
+OSTDEV2=${OSTDEV2:-$TMP/${FSNAME}-ost1}
+
+NETTYPE=${NETTYPE:-tcp}
+MGSNID=`h2$NETTYPE $mgs_HOST`
+FSTYPE=${FSTYPE:-ldiskfs}
+STRIPE_BYTES=${STRIPE_BYTES:-1048576}
+STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
+TIMEOUT=${TIMEOUT:-20}
+UPCALL=${UPCALL:-DEFAULT}
+PTLDEBUG=${PTLDEBUG:-0x33f0404}
+SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff}
 
-MDS_MKFS_OPTS="--mgs --mdt --index=0 --device-size=$MDSSIZE $MDSOPT"
-OST_MKFS_OPTS="--ost --index=0 --device-size=$OSTSIZE --mgsnode=`h2$NETTYPE $HOSTNAME` $OSTOPT"
-OST2_MKFS_OPTS="--ost --index=1 --device-size=$OSTSIZE --mgsnode=`h2$NETTYPE $HOSTNAME` $OSTOPT"
+MKFSOPT=""
+MOUNTOPT=""
+[ "x$MDSJOURNALSIZE" != "x" ] &&
+    MKFSOPT=$MKFSOPT" -J size=$MDSJOURNALSIZE"
+[ "x$MDSISIZE" != "x" ] &&
+    MKFSOPT=$MKFSOPT" -i $MDSISIZE"
+[ "x$MKFSOPT" != "x" ] &&
+    MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
+[ "x$mdsfailover_HOST" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`"
+[ "x$STRIPE_BYTES" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --param default_stripe_size=$STRIPE_BYTES"
+[ "x$STRIPES_PER_OBJ" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --param default_stripe_count=$STRIPES_PER_OBJ"
+MDS_MKFS_OPTS="--mgs --mdt --device-size=$MDSSIZE $MKFSOPT $MOUNTOPT $MDSOPT"
+
+MKFSOPT=""
+MOUNTOPT=""
+[ "x$OSTJOURNALSIZE" != "x" ] &&
+    MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE"
+[ "x$MKFSOPT" != "x" ] &&
+    MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
+[ "x$ostfailover_HOST" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`"
+OST_MKFS_OPTS="--ost --device-size=$OSTSIZE --mgsnode=$MGSNID $MKFSOPT $MOUNTOPT $OSTOPT"
+OST2_MKFS_OPTS="--ost --device-size=$OSTSIZE --mgsnode=$MGSNID $MKFSOPT $MOUNTOPT $OSTOPT"
 
 MDS_MOUNT_OPTS="-o loop"
 OST_MOUNT_OPTS="-o loop"
@@ -38,15 +68,7 @@ DIR1=${DIR:-$MOUNT1}
 DIR2=${DIR2:-$MOUNT2}
 MOUNTOPT=${MOUNTOPT:-"user_xattr,acl"}
 
-TIMEOUT=${TIMEOUT:-20}
-UPCALL=${UPCALL:-DEFAULT}
-PTLDEBUG=${PTLDEBUG:-0x33f0404}
-SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff}
 PDSH=${PDSH:-no_dsh}
-
-STRIPE_BYTES=${STRIPE_BYTES:-1048576}
-STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
-
 FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD
 POWER_DOWN=${POWER_DOWN:-"powerman --off"}
 POWER_UP=${POWER_UP:-"powerman --on"}
index c7f7674..b0d6101 100644 (file)
@@ -23,7 +23,7 @@ FSTYPE=${FSTYPE:-ext3}
 TIMEOUT=${TIMEOUT:-10}
 #UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh}
 
-STRIPE_BYTES=${STRIPE_BYTES:-65536}
+STRIPE_BYTES=${STRIPE_BYTES:-1048576}
 STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
 
 FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD
index 25a31c3..711aa35 100644 (file)
@@ -29,20 +29,26 @@ init_test_env $@
 
 . ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
 
+reformat() {
+        grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT
+       stop ost -f
+       stop ost2 -f
+       stop mds -f
+       echo Formatting mds, ost, ost2
+       add mds $MDS_MKFS_OPTS --reformat $MDSDEV  > /dev/null
+       add ost $OST_MKFS_OPTS --reformat $OSTDEV  > /dev/null
+       add ost2 $OST2_MKFS_OPTS --reformat $OSTDEV2  > /dev/null
+}
 
 gen_config() {
-    grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT
-    stop ost -f
-    stop mds -f
-    echo Formatting mds, ost
-    add mds $MDS_MKFS_OPTS --reformat $MDSDEV  > /dev/null
-    add ost $OST_MKFS_OPTS --reformat $OSTDEV  > /dev/null
-    #The MGS must be started before the OSTs for a new fs
-    start_mds
-    start_ost
-    sleep 5
-    stop_ost
-    stop_mds
+        reformat
+        # The MGS must be started before the OSTs for a new fs, so start
+        # and stop to generate the startup logs. 
+       start_mds
+       start_ost
+       sleep 5
+       stop_ost
+       stop_mds
 }
 
 start_mds() {
@@ -67,12 +73,6 @@ stop_ost() {
        stop ost -f  || return 98
 }
 
-add_ost2() {
-       stop ost2 -f
-       echo Formatting ost2
-       add ost2 $OST2_MKFS_OPTS --reformat $OSTDEV2  > /dev/null
-}
-
 start_ost2() {
        echo "start ost2 service on `facet_active_host ost2`"
        start ost2 $OSTDEV2 $OST2_MOUNT_OPTS || return 92
@@ -97,7 +97,7 @@ umount_client() {
 }
 
 manual_umount_client(){
-       echo "manual umount lustre on ${MOUNTPATH}...."
+       echo "manual umount lustre on ${MOUNT}...."
        do_facet client "umount -d $MOUNT"
 }
 
@@ -119,7 +119,7 @@ cleanup() {
 }
 
 check_mount() {
-       do_facet client "touch $DIR/a" || return 71
+       do_facet client "cp /etc/passwd $DIR/a" || return 71
        do_facet client "rm $DIR/a" || return 72
        # make sure lustre is actually mounted (touch will block, 
         # but grep won't, so do it after) 
@@ -218,9 +218,14 @@ test_5() {
        echo "waiting for umount to finish"
        wait $UMOUNT_PID
 
-       umount_client $MOUNT 
+       manual_umount_client
        # stop_mds is a no-op here, and should not fail
        cleanup_nocli || return $?
+       # df may have lingering entry
+       manual_umount_client
+       # mtab may have lingering entry
+       grep -v $MOUNT" " /etc/mtab > $TMP/mtabtemp
+       mv $TMP/mtabtemp /etc/mtab
 }
 run_test 5 "force cleanup mds, then cleanup"
 
@@ -239,23 +244,32 @@ run_test 5b "mds down, cleanup after failed mount (bug 2712)"
 test_5c() {
        start_ost
        start_mds
-
        [ -d $MOUNT ] || mkdir -p $MOUNT
-       do_node $client mount -t lustre wrong_mgs@tcp:/$FSNAME $MOUNT && return 1
+       # Bad nid might still work if mgs is on 0@lo
+       mount -t lustre 1.2.3.4@tcp:/wrong.$FSNAME $MOUNT || :
        umount_client $MOUNT
        cleanup_nocli  || return $?
 }
 run_test 5c "cleanup after failed mount (bug 2712)"
 
 test_5d() {
-        df
        start_ost
        start_mds
        stop_ost -f
        mount_client $MOUNT || return 1
        cleanup  || return $?
 }
-run_test 5d "ost down, don't crash during mount attempt"
+run_test 5d "mount with ost down"
+
+test_5e() {
+       start_ost
+       start_mds
+#define OBD_FAIL_PTLRPC_DELAY_SEND       0x506
+       do_facet client "sysctl -w lustre.fail_loc=0x80000506"
+       mount_client $MOUNT || echo "mount failed (not fatal)"
+       cleanup  || return $?
+}
+run_test 5e "delayed connect, don't crash (bug 10268)"
 
 test_6() {
        setup
@@ -295,8 +309,7 @@ test_9() {
         # check the result of lmc --ptldebug/subsystem
         start_ost
         start_mds
-        mount_client $MOUNT
-        CHECK_PTLDEBUG="`do_facet mds sysctl lnet.debug | sed -e 's/.* = //'`"
+        CHECK_PTLDEBUG="`do_facet mds sysctl lnet.debug|cut -d= -f2`"
         if [ "$CHECK_PTLDEBUG" ] && [ $CHECK_PTLDEBUG -eq 1 ]; then
            echo "lmc --debug success"
         else
@@ -311,7 +324,6 @@ test_9() {
            echo "lmc --subsystem: want 2, have $CHECK_SUBSYS"
            return 1
         fi
-        check_mount || return 41
         cleanup || return $?
 
         # the new PTLDEBUG/SUBSYSTEM used for lconf --ptldebug/subsystem
@@ -335,8 +347,6 @@ test_9() {
            echo "lconf --subsystem: want 20, have $CHECK_SUBSYS"
            return 1
         fi
-        mount_client $MOUNT
-        check_mount || return 41
         cleanup || return $?
 
         # resume the old configuration
@@ -578,16 +588,18 @@ cleanup_15() {
 }
 
 test_15() {
-       start_ost
-       start_mds
        echo "mount lustre on ${MOUNT} with $MOUNTLUSTRE....."
        if [ -f "$MOUNTLUSTRE" ]; then
                echo "save $MOUNTLUSTRE to $MOUNTLUSTRE.sav"
-               mv $MOUNTLUSTRE $MOUNTLUSTRE.sav
+               mv $MOUNTLUSTRE $MOUNTLUSTRE.sav && trap cleanup_15 EXIT INT
+               if [ -f $MOUNTLUSTRE ]; then
+                       echo "$MOUNTLUSTRE cannot be moved, skipping test"
+                       return 0
+               fi
        fi
-       [ -f "$MOUNTLUSTRE" ] && echo "can't move $MOUNTLUSTRE" && return 40
-       trap cleanup_15 EXIT INT
        [ ! `cp $(which llmount) $MOUNTLUSTRE` ] || return $?
+       start_ost
+       start_mds
        do_facet client "mkdir -p $MOUNT 2> /dev/null"
        # load llite module on the client if it isn't in /lib/modules
        do_facet client "$LCONF --nosetup --node client_facet $XMLCONFIG"
@@ -609,7 +621,7 @@ test_15() {
 run_test 15 "zconf-mount without /sbin/mount.lustre (should return error)"
 
 test_16() {
-        TMPMTPT="/mnt/conf16"
+        TMPMTPT="${MOUNT%/*}/conf16"
 
         if [ ! -f "$MDSDEV" ]; then
             echo "no $MDSDEV existing, so mount Lustre to create one"
@@ -666,12 +678,11 @@ test_17() {
         fi
 
         echo "Remove mds config log"
-        do_facet mds "debugfs -w -R 'rm CONFIGS/$FSNAME-MDT0000' $MDSDEV || return \$?" || return $?
+        do_facet mds "debugfs -w -R 'unlink CONFIGS/$FSNAME-MDT0000' $MDSDEV || return \$?" || return $?
 
         start_ost
        start_mds && return 42
-       umount_client $MOUNT
-        cleanup_nocli || return $?
+       gen_config
 }
 run_test 17 "Verify failed mds_postsetup won't fail assertion (2936)"
 
@@ -704,86 +715,69 @@ test_18() {
 }
 run_test 18 "check lconf creates large journals"
 
-test_19() {
-        # first format the ost/mdt
-        start_ost
-       start_mds
-       stop_mds
-       stop_ost
+test_19a() {
        start_mds || return 1
        stop_mds -f || return 2
 }
-run_test 19 "start/stop MDS without OSTs"
+run_test 19a "start/stop MDS without OSTs"
 
-test_20() {
-       add_ost2
+test_19b() {
+       start_ost || return 1
+       stop_ost -f || return 2
+}
+run_test 19b "start/stop OSTs without MDS"
 
+test_20a() {
         start_mds
        start_ost
-       start_ost2
-       sleep 5
-       stop_ost2
        stop_ost
-       stop_mds || return 1
+       stop_mds
 }
-run_test 20 "start mds first"
-
-test_21() {
-       add_ost2
+run_test 20a "start mds before ost, stop ost first"
 
+test_20b() {
         start_ost
-       start_ost2
        start_mds
-       sleep 5
+       stop_mds
        stop_ost
-       stop_ost2
-       stop_mds || return 1
 }
-run_test 21 "start mds last"
-
-test_22() {
-       add_ost2
+run_test 20b "start ost before mds, stop mds first"
 
+test_20c() {
         start_ost
        start_mds
        start_ost2
-       sleep 5
        stop_ost
        stop_ost2
-       stop_mds || return 1
+       stop_mds
 }
-run_test 22 "start mds between two osts"
+run_test 20c "start mds between two osts, stop mds last"
 
-test_23() {
-       #setup
-       start_ost
+test_21() {
+        reformat
        start_mds
-       add_ost2
-       start_ost2
-
+       echo Client mount before any osts are in the logs
        mount_client $MOUNT
-       check_mount || return 41
-
-       # cleanup
-       umount_client $MOUNT || return 200
-       stop_ost2 || return 204
-       cleanup_nocli || return $?
-}
-run_test 23 "add a new ost before a client has started"
+       check_mount && return 41
+       pass
 
-test_24() {
-        setup
-       add_ost2
-       start_ost2
+       echo Client mount with ost in logs, but none running
+       start_ost
+       stop_ost
+       mount_client $MOUNT
+       # check_mount will block trying to contact ost
+       umount_client $MOUNT
+       pass
 
+       echo Client mount with a running ost
+       start_ost
+       mount_client $MOUNT
        check_mount || return 41
+       pass
 
-       # cleanup
-       umount_client $MOUNT || return 200
-       stop_ost2 || return 204
-       cleanup_nocli || return $?
+       cleanup
 }
-run_test 24 "add a new ost after a client has started"
+run_test 21 "start a client before osts"
 
 
 umount_client $MOUNT   
index 03a8f7d..d399036 100755 (executable)
@@ -18,7 +18,9 @@ UPCALL=${UPCALL:-DEFAULT}
 
 build_test_filter
 
-assert_env mds_HOST ost1_HOST ost2_HOST client_HOST LIVE_CLIENT 
+assert_env mds_HOST MDS_MKFS_OPTS MDSDEV
+assert_env ost1_HOST ost2_HOST OST_MKFS_OPTS OSTDEV
+assert_env LIVE_CLIENT FSNAME
 
 ####
 # Initialize all the ostN_HOST 
@@ -111,48 +113,36 @@ reintegrate_clients() {
     DOWN_NUM=0
 }
 
-gen_config() {
-    rm -f $XMLCONFIG
-    add_mds mds --dev $MDSDEV --size $MDSSIZE --journal-size $MDSJOURNALSIZE
-
-    if [ ! -z "$mdsfailover_HOST" ]; then
-        add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE
-    fi
-
-    add_lov lov1 mds --stripe_sz $STRIPE_BYTES\
-       --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
-    for i in `seq $NUMOST`; do
-       dev=`printf $OSTDEV $i`
-       add_ost ost$i --lov lov1 --dev $dev --size $OSTSIZE \
-           --journal-size $OSTJOURNALSIZE
-    done
-     
-
-    add_client client mds --lov lov1 --path $MOUNT
+start_ost() {
+    local dev=`printf $OSTDEV $1`
+    start ost$1 $dev $OST_MOUNT_OPTS
 }
 
 setup() {
-    gen_config
-
+    cleanup
     rm -rf logs/*
+    wait_for mds
+    add mds $MDS_MKFS_OPTS --reformat $MDSDEV >> /dev/null
+    start mds $MDSDEV $MDS_MOUNT_OPTS
     for i in `seq $NUMOST`; do
+       local dev=`printf $OSTDEV $i`
+       local index=$((i - 1))
        wait_for ost$i
-       start ost$i ${REFORMAT} $OSTLCONFARGS 
+       echo Adding ost$i at index $index dev $dev
+       add ost$i $OST_MKFS_OPTS --reformat --index=$index $dev >> /dev/null
+       start ost$i $dev $OST_MOUNT_OPTS
     done
     [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
-    wait_for mds
-    start mds $MDSLCONFARGS ${REFORMAT}
+
     while ! do_node $CLIENTS "ls -d $LUSTRE" > /dev/null; do sleep 5; done
     grep " $MOUNT " /proc/mounts || zconf_mount $CLIENTS $MOUNT
-
 }
 
 cleanup() {
     zconf_umount $CLIENTS $MOUNT
-
-    stop mds ${FORCE} $MDSLCONFARGS || :
+    stop mds -f
     for i in `seq $NUMOST`; do
-        stop ost$i ${FORCE} $OSTLCONFARGS  || :
+        stop ost$i -f
     done
 }
 
@@ -205,11 +195,9 @@ node_to_ost() {
     done
     echo "No ost found for node; $node"
     return 1
-    
 }
 
 
-
 if [ "$ONLY" == "cleanup" ]; then
     $CLEANUP
     exit
@@ -230,17 +218,14 @@ fi
 echo "Starting Test 17 at `date`"
 
 test_0() {
-    echo "Failover MDS"
     facet_failover mds
     echo "Waiting for df pid: $DFPID"
     wait $DFPID || { echo "df returned $?" && return 1; }
 
-    echo "Failing OST1"
     facet_failover ost1
     echo "Waiting for df pid: $DFPID"
     wait $DFPID || { echo "df returned $?" && return 2; }
 
-    echo "Failing OST2"
     facet_failover ost2
     echo "Waiting for df pid: $DFPID"
     wait $DFPID || { echo "df returned $?" && return 3; }
@@ -261,7 +246,6 @@ test_2() {
     echo "Verify Lustre filesystem is up and running"
     client_df
 
-    echo "Failing MDS"
     shutdown_facet mds
     reboot_facet mds
 
@@ -273,17 +257,15 @@ test_2() {
     DFPID=$!
     sleep 5
 
-    echo "Failing OST"
     shutdown_facet ost1
 
     echo "Reintegrating OST"
     reboot_facet ost1
     wait_for ost1
-    start ost1
+    start_ost 1
 
-    echo "Failover MDS"
     wait_for mds
-    start mds
+    start mds $MDSDEV $MDS_MOUNT_OPTS
 
     #Check FS
     wait $DFPID
@@ -331,7 +313,6 @@ test_4() {
     echo "Fourth Failure Mode: OST/MDS `date`"
 
     #OST Portion
-    echo "Failing OST ost1"
     shutdown_facet ost1
  
     #Check FS
@@ -341,7 +322,6 @@ test_4() {
     sleep 5
 
     #MDS Portion
-    echo "Failing MDS"
     shutdown_facet mds
     reboot_facet mds
 
@@ -357,11 +337,10 @@ test_4() {
     echo "Reintegrating OST"
     reboot_facet ost1
     wait_for ost1
-    start ost1
+    start_ost 1
     
-    echo "Failover MDS"
     wait_for mds
-    start mds
+    start mds $MDSDEV $MDS_MOUNT_OPTS
     #Check FS
     
     wait $DFPIDA
@@ -382,7 +361,6 @@ test_5() {
     client_df
     
     #OST Portion
-    echo "Failing OST"
     shutdown_facet ost1
     reboot_facet ost1
     
@@ -393,7 +371,6 @@ test_5() {
     sleep 5
     
     #OST Portion
-    echo "Failing OST"
     shutdown_facet ost2
     reboot_facet ost2
 
@@ -406,9 +383,9 @@ test_5() {
     #Reintegration
     echo "Reintegrating OSTs"
     wait_for ost1
-    start ost1
+    start_ost 1
     wait_for ost2
-    start ost2
+    start_ost 2
     
     clients_recover_osts ost1
     clients_recover_osts ost2
@@ -431,7 +408,6 @@ test_6() {
     client_touch testfile || return 2
        
     #OST Portion
-    echo "Failing OST"
     shutdown_facet ost1
     reboot_facet ost1
 
@@ -454,7 +430,7 @@ test_6() {
     #Reintegration
     echo "Reintegrating OST/CLIENTs"
     wait_for ost1
-    start ost1
+    start_ost 1
     reintegrate_clients
     sleep 5 
 
@@ -496,7 +472,6 @@ test_7() {
     client_rm testfile
 
     #MDS Portion
-    echo "Failing MDS"
     facet_failover mds
 
     #Check FS
@@ -548,7 +523,6 @@ test_8() {
 
 
     #OST Portion
-    echo "Failing OST"
     shutdown_facet ost1
     reboot_facet ost1
 
@@ -565,7 +539,7 @@ test_8() {
     echo "Reintegrating CLIENTs/OST"
     reintegrate_clients
     wait_for ost1
-    start ost1
+    start_ost 1
     wait $DFPID
     client_df || return 1
     client_touch testfile2 || return 2
@@ -637,5 +611,4 @@ test_10() {
 run_test 10 "Running Availability for 6 hours..."
 
 equals_msg "Done, cleaning up"
-# we need to force cleanup for the stale MDS conns until bug 5921 is fixed
-FORCE=--force $CLEANUP
+$CLEANUP
index 352c2b9..62c3b14 100755 (executable)
@@ -19,7 +19,7 @@ NETTYPE=${NETTYPE:-tcp}
 [ "$ACCEPTOR_PORT" ] && PORT_OPT="--port $ACCEPTOR_PORT"
 
 OSTCOUNT=${OSTCOUNT:-5}
-# OSTDEVN will still override the device for OST N
+# OSTDEVn will still override the device for OST n
 
 OSTSIZE=${OSTSIZE:-150000}
 # 1 to config an echo client instead of llite
index c1e3f7f..425a26d 100755 (executable)
@@ -8,24 +8,23 @@
 #LUSTRE=${LUSTRE:-`dirname $0`/..}
 #. $LUSTRE/tests/test-framework.sh
 #init_test_env $@
-#. ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
 
-
-stop_all() {
-    grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT
+mcstopall() {
+    grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT $*
     stop ost -f
     stop ost2 -f
     stop mds -f
+    return 0
 }
 
 mccleanup() {
     echo "mountconf cleanup $*"
-    stop_all
+    mcstopall $*
     unload_modules
 }
 
 mcformat() {
-    stop_all
+    mcstopall
     echo Formatting mds, ost, ost2
     add mds $MDS_MKFS_OPTS --reformat $MDSDEV    > /dev/null || exit 10
     add ost $OST_MKFS_OPTS --reformat $OSTDEV    > /dev/null || exit 10
index 65dd8be..3da2ceb 100755 (executable)
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 set -e
-set -vx
+#set -vx
 
 export PATH=`dirname $0`/../utils:$PATH
 LFS=${LFS:-lfs}
index 436a8cd..9702681 100755 (executable)
@@ -5,6 +5,12 @@ set -e
 #         bug  2986 5494 7288
 ALWAYS_EXCEPT="20b  24   27 $RECOVERY_SMALL_EXCEPT"
 
+# Tests that always fail with mountconf -- FIXME
+# 16 fails with 1, not evicted
+# 18a,b there is still data in page cache
+EXCEPT="$EXCEPT 16 18a 18b"
+
+
 LUSTRE=${LUSTRE:-`dirname $0`/..}
 . $LUSTRE/tests/test-framework.sh
 init_test_env $@
@@ -149,23 +155,23 @@ run_test 12 "recover from timed out resend in ptlrpcd (b=2494)"
 
 # Bug 113, check that readdir lost recv timeout works.
 test_13() {
-    mkdir /mnt/lustre/readdir || return 1
-    touch /mnt/lustre/readdir/newentry || return
+    mkdir $MOUNT/readdir || return 1
+    touch $MOUNT/readdir/newentry || return
 # OBD_FAIL_MDS_READPAGE_NET|OBD_FAIL_ONCE
     do_facet mds "sysctl -w lustre.fail_loc=0x80000104"
-    ls /mnt/lustre/readdir || return 3
+    ls $MOUNT/readdir || return 3
     do_facet mds "sysctl -w lustre.fail_loc=0"
-    rm -rf /mnt/lustre/readdir || return 4
+    rm -rf $MOUNT/readdir || return 4
 }
 run_test 13 "mdc_readpage restart test (bug 1138)"
 
 # Bug 113, check that readdir lost send timeout works.
 test_14() {
-    mkdir /mnt/lustre/readdir
-    touch /mnt/lustre/readdir/newentry
+    mkdir $MOUNT/readdir
+    touch $MOUNT/readdir/newentry
 # OBD_FAIL_MDS_SENDPAGE|OBD_FAIL_ONCE
     do_facet mds "sysctl -w lustre.fail_loc=0x80000106"
-    ls /mnt/lustre/readdir || return 1
+    ls $MOUNT/readdir || return 1
     do_facet mds "sysctl -w lustre.fail_loc=0"
 }
 run_test 14 "mdc_readpage resend test (bug 1138)"
@@ -177,15 +183,15 @@ test_15() {
 }
 run_test 15 "failed open (-ENOMEM)"
 
-READ_AHEAD=`cat /proc/fs/lustre/llite/*/max_read_ahead_mb | head -n 1`
+READ_AHEAD=`cat $LPROC/llite/*/max_read_ahead_mb | head -n 1`
 stop_read_ahead() {
-   for f in /proc/fs/lustre/llite/*/max_read_ahead_mb; do 
+   for f in $LPROC/llite/*/max_read_ahead_mb; do 
       echo 0 > $f
    done
 }
 
 start_read_ahead() {
-   for f in /proc/fs/lustre/llite/*/max_read_ahead_mb; do 
+   for f in $LPROC/llite/*/max_read_ahead_mb; do 
       echo $READ_AHEAD > $f
    done
 }
@@ -196,7 +202,7 @@ test_16() {
     stop_read_ahead
 
 #define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 | OBD_FAIL_ONCE
-    sysctl -w lustre.fail_loc=0x80000504
+    do_facet ost sysctl -w lustre.fail_loc=0x80000504
     cancel_lru_locks osc
     # will get evicted here
     do_facet client "cmp /etc/termcap $MOUNT/termcap"  && return 1
@@ -238,8 +244,7 @@ test_18a() {
 
     do_facet client cp /etc/termcap $f
     sync
-    local osc2_dev=`$LCTL device_list | \
-       awk '(/ost2.*client_facet/){print $4}' `
+    local osc2_dev=`awk '(/OST0001-osc-/){print $4}' $LPROC/devices`
     $LCTL --device %$osc2_dev deactivate
     # my understanding is that there should be nothing in the page
     # cache after the client reconnects?     
@@ -369,7 +374,7 @@ test_26() {      # bug 5921 - evict dead exports
            echo "skipping test 26 (local OST)" && return
        [ "`lsmod | grep mds`" ] && \
            echo "skipping test 26 (local MDS)" && return
-       OST_FILE=/proc/fs/lustre/obdfilter/ost_svc/num_exports
+       OST_FILE=$LPROC/obdfilter/ost_svc/num_exports
         OST_EXP="`do_facet ost cat $OST_FILE`"
        OST_NEXP1=`echo $OST_EXP | cut -d' ' -f2`
        echo starting with $OST_NEXP1 OST exports
@@ -389,6 +394,28 @@ test_26() {      # bug 5921 - evict dead exports
 }
 run_test 26 "evict dead exports"
 
+test_26b() {      # bug 10140 - evict dead exports by pinger
+       zconf_mount `hostname` $MOUNT2
+       MDS_FILE=$LPROC/mds/${mds_svc}/num_exports
+        MDS_NEXP1="`do_facet mds cat $MDS_FILE | cut -d' ' -f2`"
+       OST_FILE=$LPROC/obdfilter/${ost_svc}/num_exports
+        OST_NEXP1="`do_facet ost cat $OST_FILE | cut -d' ' -f2`"
+       echo starting with $OST_NEXP1 OST and $MDS_NEXP1 MDS exports
+       zconf_umount `hostname` $MOUNT2 -f
+       # evictor takes up to 2.25x to evict.  But if there's a 
+       # race to start the evictor from various obds, the loser
+       # might have to wait for the next ping.
+       echo Waiting for $(($TIMEOUT * 4)) secs
+       sleep $(($TIMEOUT * 4))
+        OST_NEXP2="`do_facet ost cat $OST_FILE | cut -d' ' -f2`"
+        MDS_NEXP2="`do_facet mds cat $MDS_FILE | cut -d' ' -f2`"
+       echo ending with $OST_NEXP2 OST and $MDS_NEXP2 MDS exports
+        [ $OST_NEXP1 -le $OST_NEXP2 ] && error "client not evicted from OST"
+        [ $MDS_NEXP1 -le $MDS_NEXP2 ] && error "client not evicted from MDS"
+       return 0
+}
+run_test 26b "evict dead exports"
+
 test_27() {
        [ "`lsmod | grep mds`" ] || \
            { echo "skipping test 27 (non-local MDS)" && return 0; }
index d4aed5d..05dfdde 100755 (executable)
@@ -46,6 +46,8 @@ setup() {
     start mds $MDSDEV $MDS_MOUNT_OPTS
     start ost $OSTDEV $OST_MOUNT_OPTS
     start ost2 $OSTDEV2 $OST2_MOUNT_OPTS
+    # client actions will get EIO until MDT contacts OSTs, so give it a sec
+    sleep 5
     zconf_mount `hostname` $MOUNT
     zconf_mount `hostname` $MOUNT2
 }
index 8405e72..cae66d0 100755 (executable)
@@ -15,8 +15,8 @@ init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
 
 # Skip these tests
-# bug number: 2766 9930
-ALWAYS_EXCEPT="0b  39   $REPLAY_SINGLE_EXCEPT"
+# bug number: 2766
+ALWAYS_EXCEPT="0b   $REPLAY_SINGLE_EXCEPT"
 
 build_test_filter
 
@@ -902,14 +902,30 @@ run_test 43 "mds osc import failure during recovery; don't LBUG"
 
 test_44() {
     mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
-    do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
-    $LCTL --device $mdcdev recover
-    df $MOUNT
+    for i in `seq 1 10`; do
+        #define OBD_FAIL_TGT_CONN_RACE     0x701
+        do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
+        $LCTL --device $mdcdev recover
+        df $MOUNT
+    done
     do_facet mds "sysctl -w lustre.fail_loc=0"
     return 0
 }
 run_test 44 "race in target handle connect"
 
+test_44b() {
+    mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+    for i in `seq 1 10`; do
+        #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
+        do_facet mds "sysctl -w lustre.fail_loc=0x80000704"
+        $LCTL --device $mdcdev recover
+        df $MOUNT
+    done
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+    return 0
+}
+run_test 44b "race in target handle connect"
+
 # Handle failed close
 test_45() {
     mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
diff --git a/lustre/tests/runregression-mds.sh b/lustre/tests/runregression-mds.sh
deleted file mode 100755 (executable)
index 1b05df8..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/sh
-
-SRCDIR="`dirname $0`"
-
-ENDRUN=endrun-`hostname`
-
-fail() { 
-       echo "ERROR: $1" 1>&2
-       [ $2 ] && RC=$2 || RC=1
-       exit $RC
-}
-
-export PATH=/sbin:/usr/sbin:$SRCDIR:$PATH
-
-cleanup() {
-       trap 0
-        $LCONF --cleanup $OPTS
-}
-
-[ "$COUNT" ] || COUNT=1000
-
-[ "$LCONF" ] || LCONF=$SRCDIR/../utils/lconf
-
-[ -z "$*" ] && fail "usage: $0 [--reformat] <conf>.xml" 1
-
-OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -n 1`"
-if [ -z "$OSCMT" ]; then
-       $LCONF $@ || exit 1
-        trap cleanup EXIT
-       OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -n 1`"
-       [ -z "$OSCMT" ] && fail "no lustre filesystem mounted" 1
-fi
-
-V="-10"
-while [ "$1" ]; do
-       case $1 in
-       -v|--verbose) V="1";;
-       --reformat) : ;;
-       *) OPTS="$OPTS $1" ;;
-       esac
-       shift
-done
-
-OSCTMP=`echo $OSCMT | tr "/" "."`
-USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -n 1`
-USED=`expr $USED + 16` # Some space for the status file
-
-THREADS=1
-while [ $THREADS -lt 196 ]; do
-       echo "starting $THREADS threads at `date`"
-       [ $V -gt 0 ] || echo 0 > /proc/sys/lnet/debug
-       $SRCDIR/createdestroy /mnt/lustre/file-$$ $COUNT $V $THREADS
-       $SRCDIR/openclose /mnt/lustre/file-$$ $COUNT $THREADS
-       THREADS=`expr $THREADS + 5`
-       $LCONF --cleanup $OPTS || fail 10
-       $LCONF $OPTS || fail 11
-done
-
-rm -f $ENDRUN
-
-NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -n 1`
-if [ $NOWUSED -gt $USED ]; then
-       echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2
-       echo "This is normal on BA OSTs, because of subdirectories." 1>&2
-fi
-
-cleanup
index 997e0a2..7071490 100755 (executable)
@@ -4,7 +4,19 @@
 # Probably a good idea to run this before doing any checkins.
 # In the future this can become more fancy, but it's OK for now.
 
+LUSTRE=${LUSTRE:-`dirname $0`/..}
 SRCDIR="`dirname $0`"
+export PATH=/sbin:/usr/sbin:$SRCDIR:$SRCDIR/../utils:$PATH
+
+. $LUSTRE/tests/test-framework.sh
+init_test_env $@
+. ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
+. mountconf.sh
+
+SETUP=${SETUP:-mcsetup}
+FORMAT=${FORMAT:-mcformat}
+CLEANUP=${CLEANUP:-mcstopall}
+
 fail() { 
        echo "ERROR: $1" 1>&2
        [ $2 ] && RC=$2 || RC=1
@@ -16,14 +28,11 @@ log() {
        lctl mark "$*"
 }
 
-export PATH=/sbin:/usr/sbin:$SRCDIR:$SRCDIR/../utils:$PATH
 
 ERROR=
 SRC=/etc
 [ "$COUNT" ] || COUNT=1000
 
-[ "$LCONF" ] || LCONF=lconf
-
 [ "$MCREATE" ] || MCREATE=mcreate
 
 [ "$MKDIRMANY" ] || MKDIRMANY="createmany -d"
@@ -36,20 +45,23 @@ while [ "$1" ]; do
        shift
 done
 
-EXISTING_MOUNT="`mount | awk '/ lustre(_lite)? / { print $3 }' | tail -n 1`"
+EXISTING_MOUNT=`awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts`
 if [ -z "$EXISTING_MOUNT" ]; then
-       sh llmount.sh $OPTS
-       EXISTING_MOUNT="`mount | awk '/ lustre(_lite)? / { print $3 }' | tail -n 1`"
+        $FORMAT
+        $SETUP
+       EXISTING_MOUNT=`awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts`
        [ -z "$EXISTING_MOUNT" ] && fail "no lustre filesystem mounted" 1
        I_MOUNTED="yes"
 fi
+MOUNT=$EXISTING_MOUNT
 
 OSCTMP=`echo $MOUNT | tr "/" "."`
 USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -n 1`
 USED=`expr $USED + 16` # Some space for the status file
 
 # let's start slowly here...
-log "touching $MOUNT"
+START=`date +%s`
+log "touching $MOUNT at `date`"
 touch $MOUNT || fail "can't touch $MOUNT" 2
 HOSTS=$MOUNT/hosts.$$
 
@@ -79,19 +91,20 @@ mkdir $DST || fail "can't mkdir $DST" 10
 # ok, that hopefully worked, so let's do a little more, with files that
 # haven't changed in the last day (hopefully they don't change during test)
 FILES=`find $SRC -type f -mtime +1 -ctime +1 | head -n $COUNT`
-log "copying files from $SRC to $DST$SRC"
+log "copying files from $SRC to $DST$SRC at `date`"
 tar cf - $FILES | tar xvf - -C $DST || fail "copying $SRC" 11
 
-log "comparing newly copied files"
+log "comparing newly copied files at `date`"
 for f in $FILES; do
        [ $V ] && log "verifying $DST/$f"
        diff -q $f $DST/$f || ERROR=11
 done
 
 [ "$ERROR" ] && fail "old and new files are different" $ERROR
+log "finished at `date` ($(($(date +%s) - START)))"
 
-sh llmountcleanup.sh || exit 19
-sh llmount.sh $OPTS || exit 20
+$CLEANUP || exit 19
+$SETUP || exit 20
 
 log "comparing previously copied files"
 for f in $FILES; do
@@ -101,8 +114,8 @@ done
 
 [ "$ERROR" ] && fail "old and new files are different on second diff" $ERROR
 
-sh llmountcleanup.sh || exit 19
-sh llmount.sh $OPTS || exit 20
+$CLEANUP || exit 19
+$SETUP || exit 20
 
 log "removing $DST"
 rm -r $V $DST || fail "can't remove $DST" 37
@@ -131,5 +144,5 @@ fi
 
 if [ "$I_MOUNTED" = "yes" ]; then
        sync && sleep 2 && sync     # wait for delete thread
-       sh llmountcleanup.sh || exit 29
+       $CLEANUP
 fi
index 9a05cc9..0c4f5fd 100644 (file)
@@ -11,7 +11,14 @@ ONLY=${ONLY:-"$*"}
 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42b  42c  42d  45   68"}
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
-[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 27m 51b 51c 64b 71 101"
+[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 27m 51b 51c 63 64b 71 101"
+# Tests that fail on uml
+[ "$UML" = "no" ] && EXCEPT="$EXCEPT 31d"
+
+# Tests that always fail with mountconf -- FIXME
+# 48a moving the working dir succeeds
+# 104 something is out of sync with b1_4? 'lfs df' needs an arg
+EXCEPT="$EXCEPT 48a 104"
 
 case `uname -r` in
 2.4*) FSTYPE=${FSTYPE:-ext3};    ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" ;;
@@ -77,7 +84,7 @@ init_test_env $@
 
 cleanup() {
        echo -n "cln.."
-       $MCCLEANUP ${FORCE} > /dev/null || { echo "FAILed to clean up"; exit 20; }
+       $MCCLEANUP ${FORCE} $* || { echo "FAILed to clean up"; exit 20; }
 }
 CLEANUP=${CLEANUP:-:}
 
@@ -102,7 +109,6 @@ trace() {
 }
 TRACE=${TRACE:-""}
 
-LPROC=/proc/fs/lustre
 check_kernel_version() {
        VERSION_FILE=$LPROC/kernel_version
        WANT_VER=$1
@@ -239,7 +245,7 @@ rm -rf $DIR/[Rdfs][1-9]*
 build_test_filter
 
 echo "preparing for tests involving mounts"
-EXT2_DEV=${EXT2_DEV:-/tmp/SANITY.LOOP}
+EXT2_DEV=${EXT2_DEV:-$TMP/SANITY.LOOP}
 touch $EXT2_DEV
 mke2fs -j -F $EXT2_DEV 8000 > /dev/null
 echo # add a newline after mke2fs.
@@ -596,7 +602,7 @@ test_22() {
        mkdir $DIR/d22
        chown $RUNAS_ID $DIR/d22
        # Tar gets pissy if it can't access $PWD *sigh*
-       (cd /tmp;
+       (cd $TMP;
        $RUNAS tar cf - /etc/hosts /etc/sysconfig/network | \
        $RUNAS tar xfC - $DIR/d22)
        ls -lR $DIR/d22/etc
@@ -1041,7 +1047,7 @@ test_27o() {
        exhaust_all_precreations 0x215
        sleep 5
 
-       touch $DIR/d27/f27o && error
+       touch $DIR/d27/f27o && error "able to create $DIR/d27/f27o"
 
        reset_enospc
 }
@@ -2466,7 +2472,7 @@ run_test 65i "set default striping on root directory (bug 6367)="
 test_65j() { # bug6367
        # if we aren't already remounting for each test, do so for this test
        if [ "$CLEANUP" = ":" ]; then
-               cleanup || error "failed to unmount"
+               cleanup -f || error "failed to unmount"
                setup || error "failed to remount"
        fi
        $LSTRIPE -d $MOUNT || true
@@ -2694,6 +2700,8 @@ test_75() {
                error "files ${F}_join_10 ${F}_join_10_compare are different"
        $LFS getstripe ${F}_join_10
        $OPENUNLINK ${F}_join_10 ${F}_join_10 || error "files unlink open"
+
+       ls -l $F*
 }
 run_test 75 "TEST join file"
 
@@ -2822,10 +2830,12 @@ test_101() {
        done
 
        #
-       # randomly read 10000 of 64K chunks from 200M file.
+       # randomly read 10000 of 64K chunks from file 3x RAM size
        #
        nreads=10000
-       $RANDOM_READS -f $DIR/f101 -s200000000 -b65536 -C -n$nreads -t 180
+       s=$(($(awk '/MemTotal/ { print $2 }' /proc/meminfo) * 3))
+       echo "nreads: $nreads file size: ${s}kB"
+       $RANDOM_READS -f $DIR/f101 -s${s}000 -b65536 -C -n$nreads -t 180
 
        discard=0
        for s in $LPROC/llite/*/read_ahead_stats ;do
@@ -2935,7 +2945,7 @@ test_104() {
        lfs df $DIR/$tfile || error "lfs df $DIR/$tfile failed"
        lfs df -ih $DIR/$tfile || error "lfs df -ih $DIR/$tfile failed"
        
-       OSC=`lctl dl | awk '/OSC.*MNT/ {print $4}' | head -n 1`
+       OSC=`awk '/-osc-/ {print $4}' $LPROC/devices | head -n 1`
        lctl --device %$OSC deactivate
        lfs df || error "lfs df with deactivated OSC failed"
        lctl --device %$OSC recover
@@ -2952,7 +2962,7 @@ if [ "`mount | grep ^$NAME`" ]; then
     rm -rf $DIR/[Rdfs][1-9]*
 fi
 if [ "$I_MOUNTED" = "yes" ]; then
-    $MCCLEANUP || error "cleanup failed"
+    $MCCLEANUP -f || error "cleanup failed"
 fi
 
 
index 763e655..b10a0e1 100644 (file)
@@ -42,6 +42,7 @@ init_test_env $@
 
 cleanup() {
        echo -n "cln.."
+       grep " $MOUNT2 " /proc/mounts && zconf_umount `hostname` $MOUNT2 ${FORCE}
        $MCCLEANUP ${FORCE} > /dev/null || { echo "FAILed to clean up"; exit 20; }
 }
 CLEANUP=${CLEANUP:-:}
@@ -105,6 +106,26 @@ basetest() {
     IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
 }
 
+build_test_filter() {
+       [ "$ALWAYS_EXCEPT$EXCEPT$SANITYN_EXCEPT" ] && \
+           echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT $SANITYN_EXCEPT`"
+
+        for O in $ONLY; do
+            eval ONLY_${O}=true
+        done
+        for E in $EXCEPT $ALWAYS_EXCEPT $SANITY_EXCEPT; do
+            eval EXCEPT_${E}=true
+        done
+}
+
+_basetest() {
+    echo $*
+}
+
+basetest() {
+    IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
+}
+
 run_test() {
          export base=`basetest $1`
          if [ "$ONLY" ]; then
@@ -541,7 +562,7 @@ run_test 23 " others should see updated atime while another read===="
 log "cleanup: ======================================================"
 rm -rf $DIR1/[df][0-9]* $DIR1/lnk || true
 if [ "$I_MOUNTED" = "yes" ]; then
-    $MCCLEANUP || error "cleanup failed"
+    cleanup
 fi
 
 echo '=========================== finished ==============================='
index 79aa820..8ca56d9 100644 (file)
@@ -41,6 +41,7 @@ init_test_env() {
     export MKFS=${MKFS:-"$LUSTRE/utils/mkfs.lustre"}
     export CHECKSTAT="${CHECKSTAT:-checkstat} "
     export FSYTPE=${FSTYPE:-"ext3"}
+    export LPROC=/proc/fs/lustre
 
     if [ "$ACCEPTOR_PORT" ]; then
         export PORT_OPT="--port $ACCEPTOR_PORT"
@@ -70,13 +71,11 @@ init_test_env() {
 }
 
 unload_modules() {
-    $LCTL dk $TMP/debug
-    $LCTL modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 
+    lsmod | grep lnet > /dev/null && $LCTL dk $TMP/debug
+    local MODULES=`$LCTL modules | awk '{ print $2 }'`
+    rmmod $MODULES >/dev/null 2>&1 
      # do it again, in case we tried to unload ksocklnd too early
-    LNET=$(lsmod | grep -c lnet) 
-    if [ $LNET -ne 0 ]; then
-       $LCTL modules | awk '{ print $2 }' | xargs rmmod
-    fi
+    lsmod | grep lnet > /dev/null && rmmod $MODULES >/dev/null 2>&1 
     lsmod | grep lnet && echo "modules still loaded" && return 1
 
     LEAK_LUSTRE=`dmesg | tail -n 30 | grep "obd mem.*leaked"`
@@ -108,7 +107,9 @@ start() {
        echo mount -t lustre $@ ${device} /mnt/${facet} 
         echo Start of ${device} on ${facet} failed ${RC}
     else 
-       label=`do_facet ${facet} e2label ${device}`
+       do_facet ${facet} sync
+       # need the awk in case running with -v 
+       label=`do_facet ${facet} "e2label ${device}" | awk '{print $(NF)}'`
        eval export ${facet}_svc=${label}
        eval export ${facet}_dev=${device}
        eval export ${facet}_opt=\"$@\"
@@ -120,14 +121,14 @@ start() {
 stop() {
     facet=$1
     shift
-    local running=`do_facet ${facet} "grep -c /mnt/${facet}' ' /proc/mounts"`
+    # the following line fails with VERBOSE set 
+    local running=`do_facet ${facet} "grep -c /mnt/${facet}' ' /proc/mounts" | awk '{print $(NF)}'`
     if [ $running -ne 0 ]; then
        echo "Stopping /mnt/${facet} (opts:$@)"
        do_facet ${facet} umount -d $@ /mnt/${facet}
     fi
-    #do_facet $facet $LCONF --select ${facet}_svc=${active}_facet \
-    #    --node ${active}_facet  --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM \
-    #    $@ --cleanup $XMLCONFIG
+    #do_facet ${facet} umount -d $@ /mnt/${facet} >> /dev/null 2>&1 || :
+    [ -e /proc/fs/lustre ] && grep "ST " /proc/fs/lustre/devices && echo "service didn't stop" && exit 1
     return 0
 }
 
@@ -135,22 +136,21 @@ zconf_mount() {
     local OPTIONS
     local client=$1
     local mnt=$2
-    if [ -z "$mnt" ]; then
-       echo No mount point given: zconf_mount $*
-       exit 1
-    fi
     # Only supply -o to mount if we have options
     if [ -n "$MOUNTOPT" ]; then
         OPTIONS="-o $MOUNTOPT"
     fi
+    local device=`facet_nid mgs`:/$FSNAME
+    if [ -z "$mnt" -o -z "$FSNAME" ]; then
+       echo Bad zconf mount command: opt=$OPTIONS dev=$device mnt=$mnt
+       exit 1
+    fi
 
-    echo "Starting client: $OPTIONS `facet_nid mgs`:/$FSNAME $mnt" 
+    echo "Starting client: $OPTIONS $device $mnt" 
     do_node $client mkdir -p $mnt
-    do_node $client mount -t lustre $OPTIONS \
-       `facet_nid mgs`:/$FSNAME $mnt || return 1
+    do_node $client mount -t lustre $OPTIONS $device $mnt || return 1
 
     do_node $client "sysctl -w lnet.debug=$PTLDEBUG; sysctl -w lnet.subsystem_debug=${SUBSYSTEM# }"
-
     [ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname`
     return 0
 }
@@ -159,7 +159,11 @@ zconf_umount() {
     client=$1
     mnt=$2
     [ "$3" ] && force=-f
-    do_node $client umount $force $mnt
+    local running=`do_node $client "grep -c $mnt' ' /proc/mounts" | awk '{print $(NF)}'`
+    if [ $running -ne 0 ]; then
+       echo "Stopping client $mnt (opts:$force)"
+       do_node $client umount $force $mnt
+    fi
 }
 
 shutdown_facet() {
@@ -396,16 +400,6 @@ do_facet() {
     do_node $HOST $@
 }
 
-add_facet() {
-    local facet=$1
-    shift
-    echo "add facet $facet: `facet_host $facet`"
-    do_lmc --add node --node ${facet}_facet $@ --timeout $TIMEOUT \
-        --lustre_upcall $UPCALL --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM
-    do_lmc --add net --node ${facet}_facet --nid `facet_nid $facet` \
-        --nettype lnet $PORT_OPT
-}
-
 add() {
     local facet=$1
     shift
@@ -415,16 +409,6 @@ add() {
     $MKFS $*
 }
 
-add_client() {
-    local MOUNT_OPTS
-    local facet=$1
-    mds=$2
-    shift; shift
-    [ "x$CLIENTOPT" != "x" ] && MOUNT_OPTS="--clientoptions $CLIENTOPT"
-    add_facet $facet --lustre_upcall $UPCALL
-    do_lmc --add mtpt --node ${facet}_facet --mds ${mds}_svc $* $MOUNT_OPTS
-}
-
 
 ####### 
 # General functions
@@ -561,7 +545,7 @@ pgcache_empty() {
 ##################################
 # Test interface 
 error() {
-       sysctl -w lustre.fail_loc=0 > /dev/null 2>&1 || true
+       sysctl -w lustre.fail_loc=0 2> /dev/null || true
        echo "${TESTSUITE}: **** FAIL:" $@
        log "FAIL: $@"
        exit 1
@@ -629,6 +613,7 @@ equals_msg() {
 
 log() {
        echo "$*"
+       lsmod | grep lnet > /dev/null || modprobe lnet
        $LCTL mark "$*" 2> /dev/null || true
 }
 
index d6e9839..d730e27 100644 (file)
@@ -15,12 +15,15 @@ obdbarrier
 lload
 wirecheck
 lfs
+mkfs.lustre
 mkfs_lustre
+mount.lustre
 mount_lustre
+tunefs.lustre
+tunefs_lustre
 llog_reader
 llmount
 l_getgroups
-mount.lustre
 wiretest
 llog_reader
 .*.cmd
index 3283153..82409e1 100644 (file)
@@ -478,6 +478,8 @@ class LustreDB_LDAP(LustreDB):
 
     def _get_val(self, k):
         ret = None
+        if k == 'name':
+            k = 'lustreName'
         if self._attrs.has_key(k):
             v = self._attrs[k]
             if type(v) == types.ListType:
index 49fc5ca..77237d4 100644 (file)
@@ -486,7 +486,7 @@ static int mntdf(char *mntdir, int ishow, int cooked)
         __u32 index;
         __u64 avail_sum, used_sum, total_sum;
         char tbuf[10], ubuf[10], abuf[10], rbuf[10];        
-        double ratio_sum;
+        double ratio_sum = 0;
         int rc;
 
         if (ishow)
@@ -554,7 +554,8 @@ static int mntdf(char *mntdir, int ishow, int cooked)
         }
 
         used_sum = total_sum - avail_sum;
-        ratio_sum = (double)(total_sum - avail_sum) / (double)total_sum;
+        if (total_sum > 0)
+                ratio_sum = (double)(total_sum - avail_sum) / (double)total_sum;
         sprintf(rbuf, RDF, (int)(ratio_sum * 100));
         if (cooked) {
                 int i;
@@ -840,7 +841,7 @@ static int lfs_quotacheck(int argc, char **argv)
 
         if (check_type)
                 check_type--;
-        else /* check both user & group quota by default */
+        else    /* do quotacheck for both user & group quota by default */
                 check_type = 0x02;
 
         if (argc == optind)
index 1c77b0a..a14db82 100644 (file)
@@ -75,6 +75,7 @@ void usage(FILE *out)
                 "\t\t\trequired for all targets other than the mgs node\n"
                 "\t\t--fsname=<filesystem_name> : default is 'lustre'\n"
                 "\t\t--failnode=<nid>[,<...>] : NID(s) of a failover partner\n"
+                "\t\t--param <key>=<value> : set a permanent parameter\n"
                 "\t\t--index=#N : target index\n"
                 /* FIXME implement 1.6.x
                 "\t\t--configdev=<altdevice|file>: store configuration info\n"
@@ -88,6 +89,7 @@ void usage(FILE *out)
                 "\t\t--reformat: overwrite an existing disk\n"
                 "\t\t--stripe-count-hint=#N : used for optimizing MDT inode size\n"
 #else
+                "\t\t--erase-params : erase all old parameter settings\n"
                 "\t\t--nomgs: turn off MGS service on this MDT\n"
                 "\t\t--writeconf: erase all config logs for this fs.\n"
 #endif
index 8d1d4b3..8631dc1 100644 (file)
@@ -381,11 +381,19 @@ int main(int argc, char *const argv[])
                 if (errno == ENOTBLK)
                         fprintf(stderr,"Does this filesystem have any OSTs?\n");
                 if (errno == ENOENT)
-                        fprintf(stderr,"Is the mgs specification correct? "
+                        fprintf(stderr,"Is the MGS specification correct? "
                                 "(%s)\n", source);
                 if (errno == EALREADY)
-                        fprintf(stderr,"This service is already running. "
+                        fprintf(stderr,"The target service is already running. "
                                 "(%s)\n", source);
+                if (errno == ENXIO)
+                        fprintf(stderr,"The target service failed to start "
+                                "(bad config log?) (%s)\n", source);
+                if (errno == EIO)
+                        fprintf(stderr,"Is the MGS running? (%s)\n", source);
+                if (errno == EADDRINUSE)
+                        fprintf(stderr,"The target service's index is already "
+                                "in use. (%s)\n", source);
                 rc = errno;
         } else if (!nomtab) {
                 rc = update_mtab_entry(source, target, "lustre", options,0,0,0);
index 62d526b..9ae82bb 100755 (executable)
@@ -1,5 +1,8 @@
 #!/bin/sh
 
-./lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 
+SRCDIR=`dirname $0`
+PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
+
+lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 
 # do it again, in case we tried to unload ksocklnd too early
-./lctl modules | awk '{ print $2 }' | xargs rmmod
+lctl modules | awk '{ print $2 }' | xargs rmmod
index 69e5aef..3b781cc 100644 (file)
@@ -123,52 +123,59 @@ check_obdo(void)
         CHECK_MEMBER(obdo, o_misc);
         CHECK_MEMBER(obdo, o_easize);
         CHECK_MEMBER(obdo, o_mds);
+        CHECK_MEMBER(obdo, o_stripe_idx);
         CHECK_MEMBER(obdo, o_padding_1);
         CHECK_MEMBER(obdo, o_inline);
 
         CHECK_VALUE(OBD_INLINESZ);
 
-        CHECK_VALUE(OBD_MD_FLID);
-        CHECK_VALUE(OBD_MD_FLATIME);
-        CHECK_VALUE(OBD_MD_FLMTIME);
-        CHECK_VALUE(OBD_MD_FLCTIME);
-        CHECK_VALUE(OBD_MD_FLSIZE);
-        CHECK_VALUE(OBD_MD_FLBLOCKS);
-        CHECK_VALUE(OBD_MD_FLBLKSZ);
-        CHECK_VALUE(OBD_MD_FLMODE);
-        CHECK_VALUE(OBD_MD_FLTYPE);
-        CHECK_VALUE(OBD_MD_FLUID);
-        CHECK_VALUE(OBD_MD_FLGID);
-        CHECK_VALUE(OBD_MD_FLFLAGS);
-        CHECK_VALUE(OBD_MD_FLNLINK);
-        CHECK_VALUE(OBD_MD_FLGENER);
-        CHECK_VALUE(OBD_MD_FLINLINE);
-        CHECK_VALUE(OBD_MD_FLRDEV);
-        CHECK_VALUE(OBD_MD_FLEASIZE);
-        CHECK_VALUE(OBD_MD_LINKNAME);
-        CHECK_VALUE(OBD_MD_FLHANDLE);
-        CHECK_VALUE(OBD_MD_FLCKSUM);
-        CHECK_VALUE(OBD_MD_FLQOS);
-        CHECK_VALUE(OBD_MD_FLCOOKIE);
-        CHECK_VALUE(OBD_MD_FLGROUP);
-        CHECK_VALUE(OBD_MD_FLFID);
-        CHECK_VALUE(OBD_MD_FLEPOCH);
-        CHECK_VALUE(OBD_MD_FLGRANT);
-        CHECK_VALUE(OBD_MD_FLDIREA);
-        CHECK_VALUE(OBD_MD_FLUSRQUOTA);
-        CHECK_VALUE(OBD_MD_FLGRPQUOTA);
-        CHECK_VALUE_64(OBD_MD_MDS);
-        CHECK_VALUE_64(OBD_MD_REINT);
-
-        CHECK_VALUE(OBD_FL_INLINEDATA);
-        CHECK_VALUE(OBD_FL_OBDMDEXISTS);
-        CHECK_VALUE(OBD_FL_DELORPHAN);
-        CHECK_VALUE(OBD_FL_NORPC);
-        CHECK_VALUE(OBD_FL_IDONLY);
-        CHECK_VALUE(OBD_FL_RECREATE_OBJS);
-        CHECK_VALUE(OBD_FL_DEBUG_CHECK);
-        CHECK_VALUE(OBD_FL_NO_USRQUOTA);
-        CHECK_VALUE(OBD_FL_NO_GRPQUOTA);
+        CHECK_CDEFINE(OBD_MD_FLID);
+        CHECK_CDEFINE(OBD_MD_FLATIME);
+        CHECK_CDEFINE(OBD_MD_FLMTIME);
+        CHECK_CDEFINE(OBD_MD_FLCTIME);
+        CHECK_CDEFINE(OBD_MD_FLSIZE);
+        CHECK_CDEFINE(OBD_MD_FLBLOCKS);
+        CHECK_CDEFINE(OBD_MD_FLBLKSZ);
+        CHECK_CDEFINE(OBD_MD_FLMODE);
+        CHECK_CDEFINE(OBD_MD_FLTYPE);
+        CHECK_CDEFINE(OBD_MD_FLUID);
+        CHECK_CDEFINE(OBD_MD_FLGID);
+        CHECK_CDEFINE(OBD_MD_FLFLAGS);
+        CHECK_CDEFINE(OBD_MD_FLNLINK);
+        CHECK_CDEFINE(OBD_MD_FLGENER);
+        CHECK_CDEFINE(OBD_MD_FLINLINE);
+        CHECK_CDEFINE(OBD_MD_FLRDEV);
+        CHECK_CDEFINE(OBD_MD_FLEASIZE);
+        CHECK_CDEFINE(OBD_MD_LINKNAME);
+        CHECK_CDEFINE(OBD_MD_FLHANDLE);
+        CHECK_CDEFINE(OBD_MD_FLCKSUM);
+        CHECK_CDEFINE(OBD_MD_FLQOS);
+        CHECK_CDEFINE(OBD_MD_FLCOOKIE);
+        CHECK_CDEFINE(OBD_MD_FLGROUP);
+        CHECK_CDEFINE(OBD_MD_FLFID);
+        CHECK_CDEFINE(OBD_MD_FLEPOCH);
+        CHECK_CDEFINE(OBD_MD_FLGRANT);
+        CHECK_CDEFINE(OBD_MD_FLDIREA);
+        CHECK_CDEFINE(OBD_MD_FLUSRQUOTA);
+        CHECK_CDEFINE(OBD_MD_FLGRPQUOTA);
+        CHECK_CDEFINE(OBD_MD_FLMODEASIZE);
+        CHECK_CDEFINE(OBD_MD_MDS);
+        CHECK_CDEFINE(OBD_MD_REINT);
+        CHECK_CDEFINE(OBD_MD_FLXATTR);
+        CHECK_CDEFINE(OBD_MD_FLXATTRLS);
+        CHECK_CDEFINE(OBD_MD_FLXATTRRM);
+        CHECK_CDEFINE(OBD_MD_FLACL);
+
+        CHECK_CDEFINE(OBD_FL_INLINEDATA);
+        CHECK_CDEFINE(OBD_FL_OBDMDEXISTS);
+        CHECK_CDEFINE(OBD_FL_DELORPHAN);
+        CHECK_CDEFINE(OBD_FL_NORPC);
+        CHECK_CDEFINE(OBD_FL_IDONLY);
+        CHECK_CDEFINE(OBD_FL_RECREATE_OBJS);
+        CHECK_CDEFINE(OBD_FL_DEBUG_CHECK);
+        CHECK_CDEFINE(OBD_FL_NO_USRQUOTA);
+        CHECK_CDEFINE(OBD_FL_NO_GRPQUOTA);
+        CHECK_CDEFINE(OBD_FL_CREATE_CROW);
 }
 
 static void
@@ -191,13 +198,24 @@ check_lov_mds_md_v1(void)
         CHECK_MEMBER(lov_ost_data_v1, l_ost_gen);
         CHECK_MEMBER(lov_ost_data_v1, l_ost_idx);
 
-        CHECK_VALUE(LOV_MAGIC_V1);
+        CHECK_CDEFINE(LOV_MAGIC_V1);
+        CHECK_CDEFINE(LOV_MAGIC_JOIN);
 
         CHECK_VALUE(LOV_PATTERN_RAID0);
         CHECK_VALUE(LOV_PATTERN_RAID1);
 }
 
 static void
+check_lov_mds_md_join(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT(lov_mds_md_join);
+        CHECK_MEMBER(lov_mds_md_join, lmmj_md);
+        CHECK_MEMBER(lov_mds_md_join, lmmj_array_id);
+        CHECK_MEMBER(lov_mds_md_join, lmmj_extent_count);
+}
+
+static void
 check_obd_statfs(void)
 {
         BLANK_LINE();
@@ -211,6 +229,15 @@ check_obd_statfs(void)
         CHECK_MEMBER(obd_statfs, os_bsize);
         CHECK_MEMBER(obd_statfs, os_namelen);
         CHECK_MEMBER(obd_statfs, os_state);
+        CHECK_MEMBER(obd_statfs, os_spare1);
+        CHECK_MEMBER(obd_statfs, os_spare2);
+        CHECK_MEMBER(obd_statfs, os_spare3);
+        CHECK_MEMBER(obd_statfs, os_spare4);
+        CHECK_MEMBER(obd_statfs, os_spare5);
+        CHECK_MEMBER(obd_statfs, os_spare6);
+        CHECK_MEMBER(obd_statfs, os_spare7);
+        CHECK_MEMBER(obd_statfs, os_spare8);
+        CHECK_MEMBER(obd_statfs, os_spare9);
 }
 
 static void
@@ -340,19 +367,23 @@ check_mds_body(void)
 
         CHECK_VALUE(FMODE_READ);
         CHECK_VALUE(FMODE_WRITE);
-        CHECK_VALUE(FMODE_EXEC);
-
-        CHECK_VALUE(MDS_OPEN_CREAT);
-        CHECK_VALUE(MDS_OPEN_EXCL);
-        CHECK_VALUE(MDS_OPEN_TRUNC);
-        CHECK_VALUE(MDS_OPEN_APPEND);
-        CHECK_VALUE(MDS_OPEN_SYNC);
-        CHECK_VALUE(MDS_OPEN_DIRECTORY);
-        CHECK_VALUE(MDS_OPEN_DELAY_CREATE);
+        CHECK_VALUE(MDS_FMODE_EXEC);
+
+        CHECK_CDEFINE(MDS_OPEN_CREAT);
+        CHECK_CDEFINE(MDS_OPEN_EXCL);
+        CHECK_CDEFINE(MDS_OPEN_TRUNC);
+        CHECK_CDEFINE(MDS_OPEN_APPEND);
+        CHECK_CDEFINE(MDS_OPEN_SYNC);
+        CHECK_CDEFINE(MDS_OPEN_DIRECTORY);
+        CHECK_CDEFINE(MDS_OPEN_DELAY_CREATE);
         CHECK_CDEFINE(MDS_OPEN_OWNEROVERRIDE);
         CHECK_CDEFINE(MDS_OPEN_JOIN_FILE);
         CHECK_CDEFINE(MDS_OPEN_HAS_EA);
         CHECK_CDEFINE(MDS_OPEN_HAS_OBJS);
+
+        CHECK_CDEFINE(MDS_INODELOCK_LOOKUP);
+        CHECK_CDEFINE(MDS_INODELOCK_UPDATE);
+        CHECK_CDEFINE(MDS_INODELOCK_OPEN);
 }
 
 static void
@@ -444,6 +475,15 @@ check_mds_rec_rename(void)
 }
 
 static void
+check_mds_rec_join(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT(mds_rec_join);
+        CHECK_MEMBER(mds_rec_join, jr_fid);
+        CHECK_MEMBER(mds_rec_join, jr_headsize);
+}
+
+static void
 check_lov_desc(void)
 {
         BLANK_LINE();
@@ -481,6 +521,14 @@ check_ldlm_extent(void)
 }
 
 static void
+check_ldlm_inodebits(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT(ldlm_inodebits);
+        CHECK_MEMBER(ldlm_inodebits, bits);
+}
+
+static void
 check_ldlm_flock(void)
 {
         BLANK_LINE();
@@ -567,15 +615,16 @@ check_llog_logid(void)
         CHECK_MEMBER(llog_logid, lgl_ogr);
         CHECK_MEMBER(llog_logid, lgl_ogen);
 
-        CHECK_VALUE(OST_SZ_REC);
-        CHECK_VALUE(OST_RAID1_REC);
-        CHECK_VALUE(MDS_UNLINK_REC);
-        CHECK_VALUE(MDS_SETATTR_REC);
-        CHECK_VALUE(OBD_CFG_REC);
-        CHECK_VALUE(PTL_CFG_REC);
-        CHECK_VALUE(LLOG_GEN_REC);
-        CHECK_VALUE(LLOG_HDR_MAGIC);
-        CHECK_VALUE(LLOG_LOGID_MAGIC);
+        CHECK_CVALUE(OST_SZ_REC);
+        CHECK_CVALUE(OST_RAID1_REC);
+        CHECK_CVALUE(MDS_UNLINK_REC);
+        CHECK_CVALUE(MDS_SETATTR_REC);
+        CHECK_CVALUE(OBD_CFG_REC);
+        CHECK_CVALUE(PTL_CFG_REC);
+        CHECK_CVALUE(LLOG_GEN_REC);
+        CHECK_CVALUE(LLOG_JOIN_REC);
+        CHECK_CVALUE(LLOG_HDR_MAGIC);
+        CHECK_CVALUE(LLOG_LOGID_MAGIC);
 }
 
 static void
@@ -747,13 +796,15 @@ check_llogd_body(void)
         CHECK_MEMBER(llogd_body, lgd_len);
         CHECK_MEMBER(llogd_body, lgd_cur_offset);
 
-        CHECK_VALUE(LLOG_ORIGIN_HANDLE_CREATE);
-        CHECK_VALUE(LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
-        CHECK_VALUE(LLOG_ORIGIN_HANDLE_READ_HEADER);
-        CHECK_VALUE(LLOG_ORIGIN_HANDLE_WRITE_REC);
-        CHECK_VALUE(LLOG_ORIGIN_HANDLE_CLOSE);
-        CHECK_VALUE(LLOG_ORIGIN_CONNECT);
-        CHECK_VALUE(LLOG_CATINFO);
+        CHECK_CVALUE(LLOG_ORIGIN_HANDLE_CREATE);
+        CHECK_CVALUE(LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
+        CHECK_CVALUE(LLOG_ORIGIN_HANDLE_READ_HEADER);
+        CHECK_CVALUE(LLOG_ORIGIN_HANDLE_WRITE_REC);
+        CHECK_CVALUE(LLOG_ORIGIN_HANDLE_CLOSE);
+        CHECK_CVALUE(LLOG_ORIGIN_CONNECT);
+        CHECK_CVALUE(LLOG_CATINFO);
+        CHECK_CVALUE(LLOG_ORIGIN_HANDLE_PREV_BLOCK);
+        CHECK_CVALUE(LLOG_ORIGIN_HANDLE_DESTROY);
 }
 
 static void
@@ -767,6 +818,26 @@ check_llogd_conn_body(void)
 }
 
 static void
+check_mds_extent_desc(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT(mds_extent_desc);
+        CHECK_MEMBER(mds_extent_desc, med_start);
+        CHECK_MEMBER(mds_extent_desc, med_len);
+        CHECK_MEMBER(mds_extent_desc, med_lmm);
+}
+
+static void
+check_llog_array_rec(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT(llog_array_rec);
+        CHECK_MEMBER(llog_array_rec, lmr_hdr);
+        CHECK_MEMBER(llog_array_rec, lmr_med);
+        CHECK_MEMBER(llog_array_rec, lmr_tail);
+}
+
+static void
 check_qunit_data(void)
 {
         BLANK_LINE();
@@ -911,6 +982,12 @@ main(int argc, char **argv)
         CHECK_VALUE(REINT_OPEN);
         CHECK_VALUE(REINT_MAX);
 
+        CHECK_VALUE(MGS_CONNECT);
+        CHECK_VALUE(MGS_DISCONNECT);
+        CHECK_VALUE(MGS_EXCEPTION);
+        CHECK_VALUE(MGS_TARGET_REG);
+        CHECK_VALUE(MGS_TARGET_DEL);
+
         CHECK_VALUE(DISP_IT_EXECD);
         CHECK_VALUE(DISP_LOOKUP_EXECD);
         CHECK_VALUE(DISP_LOOKUP_NEG);
@@ -938,11 +1015,10 @@ main(int argc, char **argv)
         CHECK_VALUE(LCK_GROUP);
         CHECK_VALUE(LCK_MAXMODE);
 
-        CHECK_VALUE(MGS_CONNECT);
-        CHECK_VALUE(MGS_DISCONNECT);
-        CHECK_VALUE(MGS_EXCEPTION);
-        CHECK_VALUE(MGS_TARGET_REG);
-        CHECK_VALUE(MGS_TARGET_DEL);
+        CHECK_CVALUE(LDLM_PLAIN);
+        CHECK_CVALUE(LDLM_EXTENT);
+        CHECK_CVALUE(LDLM_FLOCK);
+        CHECK_CVALUE(LDLM_IBITS);
 
         CHECK_VALUE(OBD_PING);
         CHECK_VALUE(OBD_LOG_CANCEL);
@@ -952,17 +1028,19 @@ main(int argc, char **argv)
         CHECK_VALUE(QUOTA_DQACQ);
         CHECK_VALUE(QUOTA_DQREL);
 
-        CHECK_VALUE(OBD_CONNECT_RDONLY);
-        CHECK_VALUE(OBD_CONNECT_INDEX);
-        CHECK_VALUE(OBD_CONNECT_GRANT);
-        CHECK_VALUE(OBD_CONNECT_SRVLOCK);
-        CHECK_VALUE(OBD_CONNECT_VERSION);
-        CHECK_VALUE(OBD_CONNECT_REQPORTAL);
-        CHECK_VALUE(OBD_CONNECT_ACL);
-        CHECK_VALUE(OBD_CONNECT_XATTR);
-        CHECK_VALUE(OBD_CONNECT_CROW);
-        CHECK_VALUE(OBD_CONNECT_TRUNCLOCK);
-        CHECK_VALUE(OBD_CONNECT_TRANSNO);
+        CHECK_CDEFINE(OBD_CONNECT_RDONLY);
+        CHECK_CDEFINE(OBD_CONNECT_INDEX);
+        CHECK_CDEFINE(OBD_CONNECT_GRANT);
+        CHECK_CDEFINE(OBD_CONNECT_SRVLOCK);
+        CHECK_CDEFINE(OBD_CONNECT_VERSION);
+        CHECK_CDEFINE(OBD_CONNECT_REQPORTAL);
+        CHECK_CDEFINE(OBD_CONNECT_ACL);
+        CHECK_CDEFINE(OBD_CONNECT_XATTR);
+        CHECK_CDEFINE(OBD_CONNECT_CROW);
+        CHECK_CDEFINE(OBD_CONNECT_TRUNCLOCK);
+        CHECK_CDEFINE(OBD_CONNECT_TRANSNO);
+        CHECK_CDEFINE(OBD_CONNECT_IBITS);
+        CHECK_CDEFINE(OBD_CONNECT_JOIN);
 
         COMMENT("Sizes and Offsets");
         BLANK_LINE();
@@ -970,6 +1048,7 @@ main(int argc, char **argv)
         check_lustre_msg();
         check_obdo();
         check_lov_mds_md_v1();
+        check_lov_mds_md_join();
         check_obd_statfs();
         check_obd_ioobj();
         check_obd_quotactl();
@@ -983,10 +1062,12 @@ main(int argc, char **argv)
         check_mds_rec_link();
         check_mds_rec_unlink();
         check_mds_rec_rename();
+        check_mds_rec_join();
         check_lov_desc();
         check_ldlm_res_id();
         check_ldlm_extent();
         check_ldlm_flock();
+        check_ldlm_inodebits();
         check_ldlm_intent();
         check_ldlm_resource_desc();
         check_ldlm_lock_desc();
@@ -1009,6 +1090,8 @@ main(int argc, char **argv)
         check_llog_cookie();
         check_llogd_body();
         check_llogd_conn_body();
+        check_llog_array_rec();
+        check_mds_extent_desc();
         check_qunit_data();
 
         printf("}\n\n");
index a44712c..021a1de 100644 (file)
@@ -159,6 +159,16 @@ void lustre_assert_wire_constants(void)
                  (long long)MDS_STATUS_CONN);
         LASSERTF(MDS_STATUS_LOV == 2, " found %lld\n",
                  (long long)MDS_STATUS_LOV);
+        LASSERTF(MGS_CONNECT == 250, " found %lld\n",
+                 (long long)MGS_CONNECT);
+        LASSERTF(MGS_DISCONNECT == 251, " found %lld\n",
+                 (long long)MGS_DISCONNECT);
+        LASSERTF(MGS_EXCEPTION == 252, " found %lld\n",
+                 (long long)MGS_EXCEPTION);
+        LASSERTF(MGS_TARGET_REG == 253, " found %lld\n",
+                 (long long)MGS_TARGET_REG);
+        LASSERTF(MGS_TARGET_DEL == 254, " found %lld\n",
+                 (long long)MGS_TARGET_DEL);
         LASSERTF(LDLM_ENQUEUE == 101, " found %lld\n",
                  (long long)LDLM_ENQUEUE);
         LASSERTF(LDLM_CONVERT == 102, " found %lld\n",
@@ -189,16 +199,10 @@ void lustre_assert_wire_constants(void)
                  (long long)LCK_GROUP);
         LASSERTF(LCK_MAXMODE == 65, " found %lld\n",
                  (long long)LCK_MAXMODE);
-        LASSERTF(MGS_CONNECT == 250, " found %lld\n",
-                 (long long)MGS_CONNECT);
-        LASSERTF(MGS_DISCONNECT == 251, " found %lld\n",
-                 (long long)MGS_DISCONNECT);
-        LASSERTF(MGS_EXCEPTION == 252, " found %lld\n",
-                 (long long)MGS_EXCEPTION);
-        LASSERTF(MGS_TARGET_REG == 253, " found %lld\n",
-                 (long long)MGS_TARGET_REG);
-        LASSERTF(MGS_TARGET_DEL == 254, " found %lld\n",
-                 (long long)MGS_TARGET_DEL);
+        CLASSERT(LDLM_PLAIN == 10);
+        CLASSERT(LDLM_EXTENT == 11);
+        CLASSERT(LDLM_FLOCK == 12);
+        CLASSERT(LDLM_IBITS == 13);
         LASSERTF(OBD_PING == 400, " found %lld\n",
                  (long long)OBD_PING);
         LASSERTF(OBD_LOG_CANCEL == 401, " found %lld\n",
@@ -211,28 +215,19 @@ void lustre_assert_wire_constants(void)
                  (long long)QUOTA_DQACQ);
         LASSERTF(QUOTA_DQREL == 602, " found %lld\n",
                  (long long)QUOTA_DQREL);
-        LASSERTF(OBD_CONNECT_RDONLY == 1, " found %lld\n",
-                 (long long)OBD_CONNECT_RDONLY);
-        LASSERTF(OBD_CONNECT_INDEX == 2, " found %lld\n",
-                 (long long)OBD_CONNECT_INDEX);
-        LASSERTF(OBD_CONNECT_GRANT == 8, " found %lld\n",
-                 (long long)OBD_CONNECT_GRANT);
-        LASSERTF(OBD_CONNECT_SRVLOCK == 16, " found %lld\n",
-                 (long long)OBD_CONNECT_SRVLOCK);
-        LASSERTF(OBD_CONNECT_VERSION == 32, " found %lld\n",
-                 (long long)OBD_CONNECT_VERSION);
-        LASSERTF(OBD_CONNECT_REQPORTAL == 64, " found %lld\n",
-                 (long long)OBD_CONNECT_REQPORTAL);
-        LASSERTF(OBD_CONNECT_ACL == 128, " found %lld\n",
-                 (long long)OBD_CONNECT_ACL);
-        LASSERTF(OBD_CONNECT_XATTR == 256, " found %lld\n",
-                 (long long)OBD_CONNECT_XATTR);
-        LASSERTF(OBD_CONNECT_CROW == 512, " found %lld\n",
-                 (long long)OBD_CONNECT_CROW);
-        LASSERTF(OBD_CONNECT_TRUNCLOCK == 1024, " found %lld\n",
-                 (long long)OBD_CONNECT_TRUNCLOCK);
-        LASSERTF(OBD_CONNECT_TRANSNO == 2048, " found %lld\n",
-                 (long long)OBD_CONNECT_TRANSNO);
+        CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL);
+        CLASSERT(OBD_CONNECT_INDEX == 0x2ULL);
+        CLASSERT(OBD_CONNECT_GRANT == 0x8ULL);
+        CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL);
+        CLASSERT(OBD_CONNECT_VERSION == 0x20ULL);
+        CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL);
+        CLASSERT(OBD_CONNECT_ACL == 0x80ULL);
+        CLASSERT(OBD_CONNECT_XATTR == 0x100ULL);
+        CLASSERT(OBD_CONNECT_CROW == 0x200ULL);
+        CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL);
+        CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL);
+        CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL);
+        CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
         /* Sizes and Offsets */
 
 
@@ -379,6 +374,10 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct obdo, o_mds));
         LASSERTF((int)sizeof(((struct obdo *)0)->o_mds) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct obdo *)0)->o_mds));
+        LASSERTF((int)offsetof(struct obdo, o_stripe_idx) == 120, " found %lld\n",
+                 (long long)(int)offsetof(struct obdo, o_stripe_idx));
+        LASSERTF((int)sizeof(((struct obdo *)0)->o_stripe_idx) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obdo *)0)->o_stripe_idx));
         LASSERTF((int)offsetof(struct obdo, o_padding_1) == 124, " found %lld\n",
                  (long long)(int)offsetof(struct obdo, o_padding_1));
         LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_1) == 4, " found %lld\n",
@@ -389,86 +388,52 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)sizeof(((struct obdo *)0)->o_inline));
         LASSERTF(OBD_INLINESZ == 80, " found %lld\n",
                  (long long)OBD_INLINESZ);
-        LASSERTF(OBD_MD_FLID == 1, " found %lld\n",
-                 (long long)OBD_MD_FLID);
-        LASSERTF(OBD_MD_FLATIME == 2, " found %lld\n",
-                 (long long)OBD_MD_FLATIME);
-        LASSERTF(OBD_MD_FLMTIME == 4, " found %lld\n",
-                 (long long)OBD_MD_FLMTIME);
-        LASSERTF(OBD_MD_FLCTIME == 8, " found %lld\n",
-                 (long long)OBD_MD_FLCTIME);
-        LASSERTF(OBD_MD_FLSIZE == 16, " found %lld\n",
-                 (long long)OBD_MD_FLSIZE);
-        LASSERTF(OBD_MD_FLBLOCKS == 32, " found %lld\n",
-                 (long long)OBD_MD_FLBLOCKS);
-        LASSERTF(OBD_MD_FLBLKSZ == 64, " found %lld\n",
-                 (long long)OBD_MD_FLBLKSZ);
-        LASSERTF(OBD_MD_FLMODE == 128, " found %lld\n",
-                 (long long)OBD_MD_FLMODE);
-        LASSERTF(OBD_MD_FLTYPE == 256, " found %lld\n",
-                 (long long)OBD_MD_FLTYPE);
-        LASSERTF(OBD_MD_FLUID == 512, " found %lld\n",
-                 (long long)OBD_MD_FLUID);
-        LASSERTF(OBD_MD_FLGID == 1024, " found %lld\n",
-                 (long long)OBD_MD_FLGID);
-        LASSERTF(OBD_MD_FLFLAGS == 2048, " found %lld\n",
-                 (long long)OBD_MD_FLFLAGS);
-        LASSERTF(OBD_MD_FLNLINK == 8192, " found %lld\n",
-                 (long long)OBD_MD_FLNLINK);
-        LASSERTF(OBD_MD_FLGENER == 16384, " found %lld\n",
-                 (long long)OBD_MD_FLGENER);
-        LASSERTF(OBD_MD_FLINLINE == 32768, " found %lld\n",
-                 (long long)OBD_MD_FLINLINE);
-        LASSERTF(OBD_MD_FLRDEV == 65536, " found %lld\n",
-                 (long long)OBD_MD_FLRDEV);
-        LASSERTF(OBD_MD_FLEASIZE == 131072, " found %lld\n",
-                 (long long)OBD_MD_FLEASIZE);
-        LASSERTF(OBD_MD_LINKNAME == 262144, " found %lld\n",
-                 (long long)OBD_MD_LINKNAME);
-        LASSERTF(OBD_MD_FLHANDLE == 524288, " found %lld\n",
-                 (long long)OBD_MD_FLHANDLE);
-        LASSERTF(OBD_MD_FLCKSUM == 1048576, " found %lld\n",
-                 (long long)OBD_MD_FLCKSUM);
-        LASSERTF(OBD_MD_FLQOS == 2097152, " found %lld\n",
-                 (long long)OBD_MD_FLQOS);
-        LASSERTF(OBD_MD_FLCOOKIE == 8388608, " found %lld\n",
-                 (long long)OBD_MD_FLCOOKIE);
-        LASSERTF(OBD_MD_FLGROUP == 16777216, " found %lld\n",
-                 (long long)OBD_MD_FLGROUP);
-        LASSERTF(OBD_MD_FLFID == 33554432, " found %lld\n",
-                 (long long)OBD_MD_FLFID);
-        LASSERTF(OBD_MD_FLEPOCH == 67108864, " found %lld\n",
-                 (long long)OBD_MD_FLEPOCH);
-        LASSERTF(OBD_MD_FLGRANT == 134217728, " found %lld\n",
-                 (long long)OBD_MD_FLGRANT);
-        LASSERTF(OBD_MD_FLDIREA == 268435456, " found %lld\n",
-                 (long long)OBD_MD_FLDIREA);
-        LASSERTF(OBD_MD_FLUSRQUOTA == 536870912, " found %lld\n",
-                 (long long)OBD_MD_FLUSRQUOTA);
-        LASSERTF(OBD_MD_FLGRPQUOTA == 1073741824, " found %lld\n",
-                 (long long)OBD_MD_FLGRPQUOTA);
-        LASSERTF(OBD_MD_MDS == 4294967296ULL, " found %lld\n",
-                 (long long)OBD_MD_MDS);
-        LASSERTF(OBD_MD_REINT == 8589934592ULL, " found %lld\n",
-                 (long long)OBD_MD_REINT);
-        LASSERTF(OBD_FL_INLINEDATA == 1, " found %lld\n",
-                 (long long)OBD_FL_INLINEDATA);
-        LASSERTF(OBD_FL_OBDMDEXISTS == 2, " found %lld\n",
-                 (long long)OBD_FL_OBDMDEXISTS);
-        LASSERTF(OBD_FL_DELORPHAN == 4, " found %lld\n",
-                 (long long)OBD_FL_DELORPHAN);
-        LASSERTF(OBD_FL_NORPC == 8, " found %lld\n",
-                 (long long)OBD_FL_NORPC);
-        LASSERTF(OBD_FL_IDONLY == 16, " found %lld\n",
-                 (long long)OBD_FL_IDONLY);
-        LASSERTF(OBD_FL_RECREATE_OBJS == 32, " found %lld\n",
-                 (long long)OBD_FL_RECREATE_OBJS);
-        LASSERTF(OBD_FL_DEBUG_CHECK == 64, " found %lld\n",
-                 (long long)OBD_FL_DEBUG_CHECK);
-        LASSERTF(OBD_FL_NO_USRQUOTA == 256, " found %lld\n",
-                 (long long)OBD_FL_NO_USRQUOTA);
-        LASSERTF(OBD_FL_NO_GRPQUOTA == 512, " found %lld\n",
-                 (long long)OBD_FL_NO_GRPQUOTA);
+        CLASSERT(OBD_MD_FLID == (0x00000001ULL));
+        CLASSERT(OBD_MD_FLATIME == (0x00000002ULL));
+        CLASSERT(OBD_MD_FLMTIME == (0x00000004ULL));
+        CLASSERT(OBD_MD_FLCTIME == (0x00000008ULL));
+        CLASSERT(OBD_MD_FLSIZE == (0x00000010ULL));
+        CLASSERT(OBD_MD_FLBLOCKS == (0x00000020ULL));
+        CLASSERT(OBD_MD_FLBLKSZ == (0x00000040ULL));
+        CLASSERT(OBD_MD_FLMODE == (0x00000080ULL));
+        CLASSERT(OBD_MD_FLTYPE == (0x00000100ULL));
+        CLASSERT(OBD_MD_FLUID == (0x00000200ULL));
+        CLASSERT(OBD_MD_FLGID == (0x00000400ULL));
+        CLASSERT(OBD_MD_FLFLAGS == (0x00000800ULL));
+        CLASSERT(OBD_MD_FLNLINK == (0x00002000ULL));
+        CLASSERT(OBD_MD_FLGENER == (0x00004000ULL));
+        CLASSERT(OBD_MD_FLINLINE == (0x00008000ULL));
+        CLASSERT(OBD_MD_FLRDEV == (0x00010000ULL));
+        CLASSERT(OBD_MD_FLEASIZE == (0x00020000ULL));
+        CLASSERT(OBD_MD_LINKNAME == (0x00040000ULL));
+        CLASSERT(OBD_MD_FLHANDLE == (0x00080000ULL));
+        CLASSERT(OBD_MD_FLCKSUM == (0x00100000ULL));
+        CLASSERT(OBD_MD_FLQOS == (0x00200000ULL));
+        CLASSERT(OBD_MD_FLCOOKIE == (0x00800000ULL));
+        CLASSERT(OBD_MD_FLGROUP == (0x01000000ULL));
+        CLASSERT(OBD_MD_FLFID == (0x02000000ULL));
+        CLASSERT(OBD_MD_FLEPOCH == (0x04000000ULL));
+        CLASSERT(OBD_MD_FLGRANT == (0x08000000ULL));
+        CLASSERT(OBD_MD_FLDIREA == (0x10000000ULL));
+        CLASSERT(OBD_MD_FLUSRQUOTA == (0x20000000ULL));
+        CLASSERT(OBD_MD_FLGRPQUOTA == (0x40000000ULL));
+        CLASSERT(OBD_MD_FLMODEASIZE == (0x80000000ULL));
+        CLASSERT(OBD_MD_MDS == (0x0000000100000000ULL));
+        CLASSERT(OBD_MD_REINT == (0x0000000200000000ULL));
+        CLASSERT(OBD_MD_FLXATTR == (0x0000001000000000ULL));
+        CLASSERT(OBD_MD_FLXATTRLS == (0x0000002000000000ULL));
+        CLASSERT(OBD_MD_FLXATTRRM == (0x0000004000000000ULL));
+        CLASSERT(OBD_MD_FLACL == (0x0000008000000000ULL));
+        CLASSERT(OBD_FL_INLINEDATA == (0x00000001));
+        CLASSERT(OBD_FL_OBDMDEXISTS == (0x00000002));
+        CLASSERT(OBD_FL_DELORPHAN == (0x00000004));
+        CLASSERT(OBD_FL_NORPC == (0x00000008));
+        CLASSERT(OBD_FL_IDONLY == (0x00000010));
+        CLASSERT(OBD_FL_RECREATE_OBJS == (0x00000020));
+        CLASSERT(OBD_FL_DEBUG_CHECK == (0x00000040));
+        CLASSERT(OBD_FL_NO_USRQUOTA == (0x00000100));
+        CLASSERT(OBD_FL_NO_GRPQUOTA == (0x00000200));
+        CLASSERT(OBD_FL_CREATE_CROW == (0x00000400));
 
         /* Checks for struct lov_mds_md_v1 */
         LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n",
@@ -521,13 +486,29 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_idx));
         LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx));
-        LASSERTF(LOV_MAGIC_V1 == 198249424, " found %lld\n",
-                 (long long)LOV_MAGIC_V1);
+        CLASSERT(LOV_MAGIC_V1 == 0x0BD10BD0);
+        CLASSERT(LOV_MAGIC_JOIN == 0x0BD20BD0);
         LASSERTF(LOV_PATTERN_RAID0 == 1, " found %lld\n",
                  (long long)LOV_PATTERN_RAID0);
         LASSERTF(LOV_PATTERN_RAID1 == 2, " found %lld\n",
                  (long long)LOV_PATTERN_RAID1);
 
+        /* Checks for struct lov_mds_md_join */
+        LASSERTF((int)sizeof(struct lov_mds_md_join) == 56, " found %lld\n",
+                 (long long)(int)sizeof(struct lov_mds_md_join));
+        LASSERTF((int)offsetof(struct lov_mds_md_join, lmmj_md) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_mds_md_join, lmmj_md));
+        LASSERTF((int)sizeof(((struct lov_mds_md_join *)0)->lmmj_md) == 32, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_mds_md_join *)0)->lmmj_md));
+        LASSERTF((int)offsetof(struct lov_mds_md_join, lmmj_array_id) == 32, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_mds_md_join, lmmj_array_id));
+        LASSERTF((int)sizeof(((struct lov_mds_md_join *)0)->lmmj_array_id) == 20, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_mds_md_join *)0)->lmmj_array_id));
+        LASSERTF((int)offsetof(struct lov_mds_md_join, lmmj_extent_count) == 52, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_mds_md_join, lmmj_extent_count));
+        LASSERTF((int)sizeof(((struct lov_mds_md_join *)0)->lmmj_extent_count) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_mds_md_join *)0)->lmmj_extent_count));
+
         /* Checks for struct obd_statfs */
         LASSERTF((int)sizeof(struct obd_statfs) == 144, " found %lld\n",
                  (long long)(int)sizeof(struct obd_statfs));
@@ -567,6 +548,42 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct obd_statfs, os_state));
         LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct obd_statfs *)0)->os_state));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare1) == 108, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare1));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare1) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare1));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare2));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare3));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare3));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare4) == 120, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare4));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare4) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare4));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare5) == 124, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare5));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare5) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare5));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare6) == 128, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare6));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare6) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare6));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare7) == 132, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare7));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare7) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare7));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare8) == 136, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare8));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare8) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare8));
+        LASSERTF((int)offsetof(struct obd_statfs, os_spare9) == 140, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_statfs, os_spare9));
+        LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare9) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare9));
 
         /* Checks for struct obd_ioobj */
         LASSERTF((int)sizeof(struct obd_ioobj) == 24, " found %lld\n",
@@ -865,26 +882,22 @@ void lustre_assert_wire_constants(void)
                  (long long)FMODE_READ);
         LASSERTF(FMODE_WRITE == 2, " found %lld\n",
                  (long long)FMODE_WRITE);
-        LASSERTF(FMODE_EXEC == 4, " found %lld\n",
-                 (long long)FMODE_EXEC);
-        LASSERTF(MDS_OPEN_CREAT == 64, " found %lld\n",
-                 (long long)MDS_OPEN_CREAT);
-        LASSERTF(MDS_OPEN_EXCL == 128, " found %lld\n",
-                 (long long)MDS_OPEN_EXCL);
-        LASSERTF(MDS_OPEN_TRUNC == 512, " found %lld\n",
-                 (long long)MDS_OPEN_TRUNC);
-        LASSERTF(MDS_OPEN_APPEND == 1024, " found %lld\n",
-                 (long long)MDS_OPEN_APPEND);
-        LASSERTF(MDS_OPEN_SYNC == 4096, " found %lld\n",
-                 (long long)MDS_OPEN_SYNC);
-        LASSERTF(MDS_OPEN_DIRECTORY == 65536, " found %lld\n",
-                 (long long)MDS_OPEN_DIRECTORY);
-        LASSERTF(MDS_OPEN_DELAY_CREATE == 16777216, " found %lld\n",
-                 (long long)MDS_OPEN_DELAY_CREATE);
+        LASSERTF(MDS_FMODE_EXEC == 4, " found %lld\n",
+                 (long long)MDS_FMODE_EXEC);
+        CLASSERT(MDS_OPEN_CREAT == 00000100);
+        CLASSERT(MDS_OPEN_EXCL == 00000200);
+        CLASSERT(MDS_OPEN_TRUNC == 00001000);
+        CLASSERT(MDS_OPEN_APPEND == 00002000);
+        CLASSERT(MDS_OPEN_SYNC == 00010000);
+        CLASSERT(MDS_OPEN_DIRECTORY == 00200000);
+        CLASSERT(MDS_OPEN_DELAY_CREATE == 0100000000);
         CLASSERT(MDS_OPEN_OWNEROVERRIDE == 0200000000);
         CLASSERT(MDS_OPEN_JOIN_FILE == 0400000000);
         CLASSERT(MDS_OPEN_HAS_EA == 010000000000);
         CLASSERT(MDS_OPEN_HAS_OBJS == 020000000000);
+        CLASSERT(MDS_INODELOCK_LOOKUP == 0x000001);
+        CLASSERT(MDS_INODELOCK_UPDATE == 0x000002);
+        CLASSERT(MDS_INODELOCK_OPEN == 0x000004);
 
         /* Checks for struct mds_rec_setattr */
         LASSERTF((int)sizeof(struct mds_rec_setattr) == 96, " found %lld\n",
@@ -1118,6 +1131,18 @@ void lustre_assert_wire_constants(void)
         LASSERTF((int)sizeof(((struct mds_rec_rename *)0)->rn_time) == 8, " found %lld\n",
                  (long long)(int)sizeof(((struct mds_rec_rename *)0)->rn_time));
 
+        /* Checks for struct mds_rec_join */
+        LASSERTF((int)sizeof(struct mds_rec_join) == 24, " found %lld\n",
+                 (long long)(int)sizeof(struct mds_rec_join));
+        LASSERTF((int)offsetof(struct mds_rec_join, jr_fid) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_rec_join, jr_fid));
+        LASSERTF((int)sizeof(((struct mds_rec_join *)0)->jr_fid) == 16, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_rec_join *)0)->jr_fid));
+        LASSERTF((int)offsetof(struct mds_rec_join, jr_headsize) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_rec_join, jr_headsize));
+        LASSERTF((int)sizeof(((struct mds_rec_join *)0)->jr_headsize) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_rec_join *)0)->jr_headsize));
+
         /* Checks for struct lov_desc */
         LASSERTF((int)sizeof(struct lov_desc) == 88, " found %lld\n",
                  (long long)(int)sizeof(struct lov_desc));
@@ -1214,6 +1239,14 @@ void lustre_assert_wire_constants(void)
         LASSERTF((int)sizeof(((struct ldlm_flock *)0)->pid) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct ldlm_flock *)0)->pid));
 
+        /* Checks for struct ldlm_inodebits */
+        LASSERTF((int)sizeof(struct ldlm_inodebits) == 8, " found %lld\n",
+                 (long long)(int)sizeof(struct ldlm_inodebits));
+        LASSERTF((int)offsetof(struct ldlm_inodebits, bits) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct ldlm_inodebits, bits));
+        LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->bits) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct ldlm_inodebits *)0)->bits));
+
         /* Checks for struct ldlm_intent */
         LASSERTF((int)sizeof(struct ldlm_intent) == 8, " found %lld\n",
                  (long long)(int)sizeof(struct ldlm_intent));
@@ -1349,24 +1382,16 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct llog_logid, lgl_ogen));
         LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_ogen) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct llog_logid *)0)->lgl_ogen));
-        LASSERTF(OST_SZ_REC == 274730752, " found %lld\n",
-                 (long long)OST_SZ_REC);
-        LASSERTF(OST_RAID1_REC == 274731008, " found %lld\n",
-                 (long long)OST_RAID1_REC);
-        LASSERTF(MDS_UNLINK_REC == 274801668, " found %lld\n",
-                 (long long)MDS_UNLINK_REC);
-        LASSERTF(MDS_SETATTR_REC == 274801665, " found %lld\n",
-                 (long long)MDS_SETATTR_REC);
-        LASSERTF(OBD_CFG_REC == 274857984, " found %lld\n",
-                 (long long)OBD_CFG_REC);
-        LASSERTF(PTL_CFG_REC == 274923520, " found %lld\n",
-                 (long long)PTL_CFG_REC);
-        LASSERTF(LLOG_GEN_REC == 274989056, " found %lld\n",
-                 (long long)LLOG_GEN_REC);
-        LASSERTF(LLOG_HDR_MAGIC == 275010873, " found %lld\n",
-                 (long long)LLOG_HDR_MAGIC);
-        LASSERTF(LLOG_LOGID_MAGIC == 275010875, " found %lld\n",
-                 (long long)LLOG_LOGID_MAGIC);
+        CLASSERT(OST_SZ_REC == 274730752);
+        CLASSERT(OST_RAID1_REC == 274731008);
+        CLASSERT(MDS_UNLINK_REC == 274801668);
+        CLASSERT(MDS_SETATTR_REC == 274801665);
+        CLASSERT(OBD_CFG_REC == 274857984);
+        CLASSERT(PTL_CFG_REC == 274923520);
+        CLASSERT(LLOG_GEN_REC == 274989056);
+        CLASSERT(LLOG_JOIN_REC == 275054592);
+        CLASSERT(LLOG_HDR_MAGIC == 275010873);
+        CLASSERT(LLOG_LOGID_MAGIC == 275010875);
 
         /* Checks for struct llog_catid */
         LASSERTF((int)sizeof(struct llog_catid) == 32, " found %lld\n",
@@ -1711,20 +1736,15 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct llogd_body, lgd_cur_offset));
         LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_cur_offset) == 8, " found %lld\n",
                  (long long)(int)sizeof(((struct llogd_body *)0)->lgd_cur_offset));
-        LASSERTF(LLOG_ORIGIN_HANDLE_CREATE == 501, " found %lld\n",
-                 (long long)LLOG_ORIGIN_HANDLE_CREATE);
-        LASSERTF(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502, " found %lld\n",
-                 (long long)LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
-        LASSERTF(LLOG_ORIGIN_HANDLE_READ_HEADER == 503, " found %lld\n",
-                 (long long)LLOG_ORIGIN_HANDLE_READ_HEADER);
-        LASSERTF(LLOG_ORIGIN_HANDLE_WRITE_REC == 504, " found %lld\n",
-                 (long long)LLOG_ORIGIN_HANDLE_WRITE_REC);
-        LASSERTF(LLOG_ORIGIN_HANDLE_CLOSE == 505, " found %lld\n",
-                 (long long)LLOG_ORIGIN_HANDLE_CLOSE);
-        LASSERTF(LLOG_ORIGIN_CONNECT == 506, " found %lld\n",
-                 (long long)LLOG_ORIGIN_CONNECT);
-        LASSERTF(LLOG_CATINFO == 507, " found %lld\n",
-                 (long long)LLOG_CATINFO);
+        CLASSERT(LLOG_ORIGIN_HANDLE_CREATE == 501);
+        CLASSERT(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502);
+        CLASSERT(LLOG_ORIGIN_HANDLE_READ_HEADER == 503);
+        CLASSERT(LLOG_ORIGIN_HANDLE_WRITE_REC == 504);
+        CLASSERT(LLOG_ORIGIN_HANDLE_CLOSE == 505);
+        CLASSERT(LLOG_ORIGIN_CONNECT == 506);
+        CLASSERT(LLOG_CATINFO == 507);
+        CLASSERT(LLOG_ORIGIN_HANDLE_PREV_BLOCK == 508);
+        CLASSERT(LLOG_ORIGIN_HANDLE_DESTROY == 509);
 
         /* Checks for struct llogd_conn_body */
         LASSERTF((int)sizeof(struct llogd_conn_body) == 40, " found %lld\n",
@@ -1742,6 +1762,38 @@ void lustre_assert_wire_constants(void)
         LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx));
 
+        /* Checks for struct llog_array_rec */
+        LASSERTF((int)sizeof(struct llog_array_rec) == 72, " found %lld\n",
+                 (long long)(int)sizeof(struct llog_array_rec));
+        LASSERTF((int)offsetof(struct llog_array_rec, lmr_hdr) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct llog_array_rec, lmr_hdr));
+        LASSERTF((int)sizeof(((struct llog_array_rec *)0)->lmr_hdr) == 16, " found %lld\n",
+                 (long long)(int)sizeof(((struct llog_array_rec *)0)->lmr_hdr));
+        LASSERTF((int)offsetof(struct llog_array_rec, lmr_med) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct llog_array_rec, lmr_med));
+        LASSERTF((int)sizeof(((struct llog_array_rec *)0)->lmr_med) == 48, " found %lld\n",
+                 (long long)(int)sizeof(((struct llog_array_rec *)0)->lmr_med));
+        LASSERTF((int)offsetof(struct llog_array_rec, lmr_tail) == 64, " found %lld\n",
+                 (long long)(int)offsetof(struct llog_array_rec, lmr_tail));
+        LASSERTF((int)sizeof(((struct llog_array_rec *)0)->lmr_tail) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct llog_array_rec *)0)->lmr_tail));
+
+        /* Checks for struct mds_extent_desc */
+        LASSERTF((int)sizeof(struct mds_extent_desc) == 48, " found %lld\n",
+                 (long long)(int)sizeof(struct mds_extent_desc));
+        LASSERTF((int)offsetof(struct mds_extent_desc, med_start) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_extent_desc, med_start));
+        LASSERTF((int)sizeof(((struct mds_extent_desc *)0)->med_start) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_start));
+        LASSERTF((int)offsetof(struct mds_extent_desc, med_len) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_extent_desc, med_len));
+        LASSERTF((int)sizeof(((struct mds_extent_desc *)0)->med_len) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_len));
+        LASSERTF((int)offsetof(struct mds_extent_desc, med_lmm) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_extent_desc, med_lmm));
+        LASSERTF((int)sizeof(((struct mds_extent_desc *)0)->med_lmm) == 32, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_lmm));
+
         /* Checks for struct qunit_data */
         LASSERTF((int)sizeof(struct qunit_data) == 16, " found %lld\n",
                  (long long)(int)sizeof(struct qunit_data));