Whamcloud - gitweb
Merge b_md into HEAD
authorpschwan <pschwan>
Fri, 7 Feb 2003 18:01:04 +0000 (18:01 +0000)
committerpschwan <pschwan>
Fri, 7 Feb 2003 18:01:04 +0000 (18:01 +0000)
* bug fixes
 - Fix ldlm_lock_match on the MDS to avoid matching remote locks (592)
 - Fix fsfilt_extN_readpage() to read a full page of directory
   entries, or fake the remainder if PAGE_SIZE != blocksize (500)
 - Avoid extra mdc_getattr() in ll_intent_lock when possible (534, 604)
 - Fix imbalanced LOV object allocation and out-of-bound access (469)
 - Most intent operations were removed, in favour of a new RPC mode
   that does a single RPC to the server and bypasses most of the VFS
 - All LDLM resource ID arrays were removed in favour of ldlm_res_id
 - Aggressively cancel local locks on DLM servers
 - mds_reint_unlink sends EA to the client if it's the last nlink.
   client uses that EA to unlink OST objects.
 - mds_reint_{rename,unlink,link} were rewritten to take ordered locks
 - recursive symlinks were fixed (440)
 - fixed NULL deref in DEBUG_REQ
 - filter_update_lastobjid no longer calls sync, which annoyed extN
 - fixed multi-client small-writes to a single file problem (445)
 - fixed mtime updates during file writes (607)
 - fixed vector writes on obdfilter causing problems when ENOSPC (670)
 - fixed bug in obd_brw_read/write() (under guise of testing 367)
 - fixed Linux OST size reporting problem (444, 656)
 - OST now updates object mtime with writes or setattr (607, 619)
 - client verifies file size before zeroing page past EOF (445)
 - OST now writes last allocated objid to disk with allocation (108)
 - LOV on echo now works (409)
 * protocol changes
 - mds_reint_unlink sends a new buffer, with the EA included.  this
   buffer is only valid if body->valid & OBD_MD_FLEASIZE, which is only
   set if a regular file was being unlinked, and it was the last link
 - use PtlGet from the target for bulk writes (315)
 - OST now updates object mtime with writes or setattr (607, 619)
 - LDLM now has a grant-time callback to revalidate locked items, if
   necessary (604)
 - Many MDS operations were reorganized to combat race conditions
* other changes
 - Merge b_intel branch (updated lprocfs code) - now at /proc/fs/lustre
 - configure check to avoid gcc version 2.96 20000731-2.96-98 (606)

179 files changed:
lustre/ChangeLog
lustre/Rules
lustre/archdep.m4
lustre/cobd/cache_obd.c
lustre/cobd/lproc_cache.c
lustre/conf/Makefile.am
lustre/conf/lustre2ldif.xsl
lustre/configure.in
lustre/extN/Makefile.am
lustre/extN/ext3-unmount_sync.diff [new file with mode: 0644]
lustre/extN/ext3-use-after-free.diff [new file with mode: 0644]
lustre/extN/extN-iget-debug.diff [new file with mode: 0644]
lustre/extN/extN-misc-fixup.diff
lustre/extN/extN-wantedi.diff
lustre/extN/htree-ext3-2.4.18.diff
lustre/extN/linux-2.4.18ea-0.8.26.diff
lustre/include/linux/lprocfs_status.h
lustre/include/linux/lustre_dlm.h
lustre/include/linux/lustre_export.h
lustre/include/linux/lustre_fsfilt.h
lustre/include/linux/lustre_idl.h
lustre/include/linux/lustre_import.h
lustre/include/linux/lustre_lib.h
lustre/include/linux/lustre_lite.h
lustre/include/linux/lustre_mds.h
lustre/include/linux/lustre_net.h
lustre/include/linux/obd.h
lustre/include/linux/obd_class.h
lustre/include/linux/obd_echo.h
lustre/include/linux/obd_filter.h
lustre/include/linux/obd_support.h
lustre/kernel_patches/README [moved from lustre/kernel_patches/scripts/docco.txt with 100% similarity]
lustre/kernel_patches/patches/dev_read_only.patch
lustre/kernel_patches/patches/dev_read_only_hp.patch [new file with mode: 0644]
lustre/kernel_patches/patches/exports.patch
lustre/kernel_patches/patches/exports_hp.patch [new file with mode: 0644]
lustre/kernel_patches/patches/invalidate_show.patch [new file with mode: 0644]
lustre/kernel_patches/patches/iod-rmap-exports.patch [new file with mode: 0644]
lustre/kernel_patches/patches/jbd-transno-cb.patch [new file with mode: 0644]
lustre/kernel_patches/patches/kmem_cache_validate_hp.patch [new file with mode: 0644]
lustre/kernel_patches/patches/lustre_version.patch
lustre/kernel_patches/patches/vanilla-2.4.19.patch
lustre/kernel_patches/patches/vfs_intent-2.4.18-18.patch
lustre/kernel_patches/patches/vfs_intent.patch
lustre/kernel_patches/patches/vfs_intent_hp.patch [new file with mode: 0644]
lustre/kernel_patches/pc/dev_read_only_hp.pc [new file with mode: 0644]
lustre/kernel_patches/pc/exports_hp.pc [new file with mode: 0644]
lustre/kernel_patches/pc/invalidate_show.pc [new file with mode: 0644]
lustre/kernel_patches/pc/iod-rmap-exports.pc [new file with mode: 0644]
lustre/kernel_patches/pc/jbd-transno-cb.pc [new file with mode: 0644]
lustre/kernel_patches/pc/kmem_cache_validate.pc
lustre/kernel_patches/pc/kmem_cache_validate_hp.pc [new file with mode: 0644]
lustre/kernel_patches/pc/vanilla-2.4.19.pc
lustre/kernel_patches/pc/vfs_intent_hp.pc [new file with mode: 0644]
lustre/kernel_patches/series/chaos
lustre/kernel_patches/series/hp-pnnl
lustre/kernel_patches/series/rh-2.4.18-18
lustre/kernel_patches/series/rh-8.0
lustre/kernel_patches/series/vanilla-2.4.18
lustre/kernel_patches/series/vanilla-2.4.19
lustre/kernel_patches/txt/exports.txt
lustre/kernel_patches/txt/exports_hp.txt [new file with mode: 0644]
lustre/kernel_patches/txt/invalidate_show.txt [new file with mode: 0644]
lustre/kernel_patches/which_patch
lustre/ldlm/Makefile.am
lustre/ldlm/ldlm_extent.c
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/ldlm/ldlm_resource.c
lustre/ldlm/ldlm_test.c
lustre/lib/client.c
lustre/lib/mds_updates.c
lustre/lib/simple.c
lustre/lib/target.c
lustre/llite/dcache.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/lproc_llite.c
lustre/llite/namei.c
lustre/llite/rw.c
lustre/llite/super.c
lustre/llite/super25.c
lustre/llite/symlink.c
lustre/lov/Makefile.am
lustre/lov/lov_obd.c
lustre/lov/lov_pack.c
lustre/lov/lproc_lov.c
lustre/mdc/lproc_mdc.c
lustre/mdc/mdc_reint.c
lustre/mdc/mdc_request.c
lustre/mds/Makefile.am
lustre/mds/handler.c
lustre/mds/lproc_mds.c
lustre/mds/mds_fs.c
lustre/mds/mds_lov.c
lustre/mds/mds_open.c [new file with mode: 0644]
lustre/mds/mds_reint.c
lustre/obdclass/class_obd.c
lustre/obdclass/fsfilt_ext3.c
lustre/obdclass/fsfilt_extN.c
lustre/obdclass/fsfilt_reiserfs.c
lustre/obdclass/genops.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/statfs_pack.c
lustre/obdclass/sysctl.c
lustre/obdclass/uuid.c
lustre/obdecho/echo.c
lustre/obdecho/echo_client.c
lustre/obdecho/lproc_echo.c
lustre/obdfilter/filter.c
lustre/obdfilter/lproc_obdfilter.c
lustre/osc/lproc_osc.c
lustre/osc/osc_request.c
lustre/ost/lproc_ost.c
lustre/ost/ost_handler.c
lustre/ptlbd/blk.c
lustre/ptlbd/client.c
lustre/ptlbd/rpc.c
lustre/ptlbd/server.c
lustre/ptlrpc/client.c
lustre/ptlrpc/connection.c
lustre/ptlrpc/events.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/recovd.c
lustre/ptlrpc/recover.c
lustre/ptlrpc/rpc.c
lustre/ptlrpc/service.c
lustre/scripts/lustre.spec.in
lustre/tests/.cvsignore
lustre/tests/Makefile.am
lustre/tests/acceptance-metadata-single.sh [new file with mode: 0644]
lustre/tests/acceptance-small.sh
lustre/tests/ba-echo.sh
lustre/tests/busy.sh [new file with mode: 0644]
lustre/tests/create.pl
lustre/tests/createmany.c
lustre/tests/createtest.c [new file with mode: 0644]
lustre/tests/echo.sh
lustre/tests/leak_finder.pl
lustre/tests/lkcdmap [new file with mode: 0755]
lustre/tests/llmount.sh
lustre/tests/llmount2-hack.sh [deleted file]
lustre/tests/llmount2-hackcleanup.sh [deleted file]
lustre/tests/llmountcleanup.sh
lustre/tests/llmountcleanup2-hack.sh [deleted file]
lustre/tests/llrmount.sh
lustre/tests/local.sh
lustre/tests/local2-hack.xml [deleted file]
lustre/tests/mkdirmany.c
lustre/tests/mount2.sh
lustre/tests/open_delay.c [new file with mode: 0644]
lustre/tests/openunlink.c
lustre/tests/recovery-small.sh [new file with mode: 0755]
lustre/tests/rename.pl [new file with mode: 0644]
lustre/tests/runiozone
lustre/tests/runregression-brw.sh
lustre/tests/runregression-net.sh
lustre/tests/runtests
lustre/tests/sanity.sh
lustre/tests/sanityN.sh
lustre/tests/uml.sh
lustre/utils/.cvsignore
lustre/utils/Makefile.am
lustre/utils/lconf.in
lustre/utils/lctl.c
lustre/utils/lfind.c
lustre/utils/llparser.pm [new file with mode: 0644]
lustre/utils/lmc
lustre/utils/lstripe.c
lustre/utils/obd.c
lustre/utils/obdbarrier.c [new file with mode: 0644]
lustre/utils/obdctl.h
lustre/utils/obdio.c [new file with mode: 0644]
lustre/utils/obdiolib.c [new file with mode: 0644]
lustre/utils/obdiolib.h [new file with mode: 0644]
lustre/utils/obdstat.c [new file with mode: 0644]

index 41e712f..120deef 100644 (file)
@@ -1,3 +1,43 @@
+TBD
+       * version v0_5_20
+       * bug fixes
+        - Fix ldlm_lock_match on the MDS to avoid matching remote locks (592)
+        - Fix fsfilt_extN_readpage() to read a full page of directory
+          entries, or fake the remainder if PAGE_SIZE != blocksize (500)
+         - Avoid extra mdc_getattr() in ll_intent_lock when possible (534, 604)
+        - Fix imbalanced LOV object allocation and out-of-bound access (469)
+        - Most intent operations were removed, in favour of a new RPC mode
+          that does a single RPC to the server and bypasses most of the VFS
+        - All LDLM resource ID arrays were removed in favour of ldlm_res_id
+        - Aggressively cancel local locks on DLM servers
+        - mds_reint_unlink sends EA to the client if it's the last nlink.
+          client uses that EA to unlink OST objects.
+        - mds_reint_{rename,unlink,link} were rewritten to take ordered locks
+        - recursive symlinks were fixed (439)
+        - fixed NULL deref in DEBUG_REQ
+        - filter_update_lastobjid no longer calls sync, which annoyed extN
+        - fixed multi-client small-writes to a single file problem (445)
+        - fixed mtime updates during file writes (607)
+        - fixed vector writes on obdfilter causing problems when ENOSPC (670)
+        - fixed bug in obd_brw_read/write() (under guise of testing 367)
+        - fixed Linux OST size reporting problem (444, 656)
+        - OST now updates object mtime with writes or setattr (607, 619)
+        - client verifies file size before zeroing page past EOF (445)
+        - OST now writes last allocated objid to disk with allocation (108)
+        - LOV on echo now works (409)
+        * protocol changes
+        - mds_reint_unlink sends a new buffer, with the EA included.  this
+          buffer is only valid if body->valid & OBD_MD_FLEASIZE, which is only
+          set if a regular file was being unlinked, and it was the last link
+        - use PtlGet from the target for bulk writes (315)
+        - OST now updates object mtime with writes or setattr (607, 619)
+        - LDLM now has a grant-time callback to revalidate locked items, if
+          necessary (604)
+        - Many MDS operations were reorganized to combat race conditions
+       * other changes
+        - Merge b_intel branch (updated lprocfs code) - now at /proc/fs/lustre
+        - configure check to avoid gcc version 2.96 20000731-2.96-98) (606)
+
 2003-01-06  Andreas Dilger  <adilger@clusterfs.com>
        * version v0_5_19
        * bug fixes
index 0f2fa56..069e89a 100644 (file)
@@ -17,8 +17,8 @@ tags:
        rm -f $(top_srcdir)/TAGS
        rm -f $(top_srcdir)/tags
        find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs etags -a
-       find $(top_srcdir) -name '*.[hc]' | xargs etags -a
+       find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs etags -a
        find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs ctags -a
-       find $(top_srcdir) -name '*.[hc]' | xargs ctags -a
+       find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs ctags -a
 
 AM_CPPFLAGS=-I$(top_builddir)/include
index b11266c..58a6576 100644 (file)
@@ -49,7 +49,7 @@ case ${host_cpu} in
 
        ia64 )
        AC_MSG_RESULT($host_cpu)
-        KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step'
+        KCFLAGS='-gstabs -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step'
        KCPPFLAGS='-D__KERNEL__ -DMODULE'
         MOD_LINK=elf64_ia64
 ;;
index ac921d8..72a05cc 100644 (file)
@@ -1,10 +1,22 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2002 Cluster File Systems, Inc. <info@clusterfs.com>
  *
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define DEBUG_SUBSYSTEM S_COBD
 #include <linux/obd_class.h>
 #include <linux/obd_cache.h>
 
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
-
-static int
-cobd_attach (struct obd_device *dev, obd_count len, void *data)
+static int cobd_attach(struct obd_device *dev, obd_count len, void *data)
 {
-       return (lprocfs_reg_obd (dev, status_var_nm_1, dev));
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_vars(&lvars);
+       return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
-static int
-cobd_detach (struct obd_device *dev)
+static int cobd_detach(struct obd_device *dev)
 {
-       return (lprocfs_dereg_obd (dev));
+       return lprocfs_obd_detach(dev);
 }
 
 static int
@@ -38,26 +48,30 @@ cobd_setup (struct obd_device *dev, obd_count len, void *buf)
         struct cache_obd  *cobd = &dev->u.cobd;
         struct obd_device *target;
         struct obd_device *cache;
+        struct obd_uuid target_uuid;
+        struct obd_uuid cache_uuid;
         int                rc;
-        
+
         if (data->ioc_inlbuf1 == NULL ||
             data->ioc_inlbuf2 == NULL)
                 return (-EINVAL);
-        
-        target = class_uuid2obd (data->ioc_inlbuf1);
-        cache  = class_uuid2obd (data->ioc_inlbuf2);
+
+        obd_str2uuid(&target_uuid, data->ioc_inlbuf1);
+        target = class_uuid2obd (&target_uuid);
+
+        obd_str2uuid(&cache_uuid, data->ioc_inlbuf2);
+        cache  = class_uuid2obd (&cache_uuid);
         if (target == NULL ||
             cache == NULL)
                 return (-EINVAL);
-        
-        /* don't bother checking attached/setup; 
-         * obd_connect() should, and it can change underneath us */
 
-        rc = obd_connect (&cobd->cobd_target, target, NULL, NULL, NULL);
+        /* don't bother checking attached/setup;
+         * obd_connect() should, and it can change underneath us */
+        rc = obd_connect (&cobd->cobd_target, target, &target_uuid, NULL, NULL);
         if (rc != 0)
                 return (rc);
 
-        rc = obd_connect (&cobd->cobd_cache, cache, NULL, NULL, NULL);
+        rc = obd_connect (&cobd->cobd_cache, cache, &cache_uuid, NULL, NULL);
         if (rc != 0)
                 goto fail_0;
 
@@ -73,14 +87,14 @@ cobd_cleanup (struct obd_device *dev)
 {
         struct cache_obd  *cobd = &dev->u.cobd;
         int                rc;
-        
+
         if (!list_empty (&dev->obd_exports))
                 return (-EBUSY);
-        
+
         rc = obd_disconnect (&cobd->cobd_cache);
         if (rc != 0)
                 CERROR ("error %d disconnecting cache\n", rc);
-        
+
         rc = obd_disconnect (&cobd->cobd_target);
         if (rc != 0)
                 CERROR ("error %d disconnecting target\n", rc);
@@ -90,7 +104,7 @@ cobd_cleanup (struct obd_device *dev)
 
 static int
 cobd_connect (struct lustre_handle *conn, struct obd_device *obd,
-              obd_uuid_t cluuid, struct recovd_obd *recovd,
+              struct obd_uuid *cluuid, struct recovd_obd *recovd,
               ptlrpc_recovery_cb_t recover)
 {
         int rc = class_connect (conn, obd, cluuid);
@@ -103,12 +117,12 @@ static int
 cobd_disconnect (struct lustre_handle *conn)
 {
        int rc = class_disconnect (conn);
-       
+
         CERROR ("rc %d\n", rc);
        return (rc);
 }
 
-static int 
+static int
 cobd_get_info(struct lustre_handle *conn, obd_count keylen,
               void *key, obd_count *vallen, void **val)
 {
@@ -124,11 +138,11 @@ cobd_get_info(struct lustre_handle *conn, obd_count keylen,
 
         /* intercept cache utilisation info? */
 
-        return (obd_get_info (&cobd->cobd_target, 
+        return (obd_get_info (&cobd->cobd_target,
                               keylen, key, vallen, val));
 }
 
-static int 
+static int
 cobd_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
 {
         struct obd_device *obd = class_conn2obd(conn);
@@ -143,7 +157,7 @@ cobd_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
         return (obd_statfs (&cobd->cobd_target, osfs));
 }
 
-static int 
+static int
 cobd_getattr(struct lustre_handle *conn, struct obdo *oa,
              struct lov_stripe_md *lsm)
 {
@@ -159,9 +173,9 @@ cobd_getattr(struct lustre_handle *conn, struct obdo *oa,
         return (obd_getattr (&cobd->cobd_target, oa, lsm));
 }
 
-static int 
+static int
 cobd_open(struct lustre_handle *conn, struct obdo *oa,
-          struct lov_stripe_md *lsm)
+          struct lov_stripe_md *lsm, struct obd_trans_info *oti)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
@@ -172,12 +186,12 @@ cobd_open(struct lustre_handle *conn, struct obdo *oa,
         }
 
         cobd = &obd->u.cobd;
-        return (obd_open (&cobd->cobd_target, oa, lsm));
+        return (obd_open (&cobd->cobd_target, oa, lsm, oti));
 }
 
-static int 
+static int
 cobd_close(struct lustre_handle *conn, struct obdo *oa,
-           struct lov_stripe_md *lsm)
+           struct lov_stripe_md *lsm, struct obd_trans_info *oti)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
@@ -188,14 +202,15 @@ cobd_close(struct lustre_handle *conn, struct obdo *oa,
         }
 
         cobd = &obd->u.cobd;
-        return (obd_close (&cobd->cobd_target, oa, lsm));
+        return (obd_close (&cobd->cobd_target, oa, lsm, oti));
 }
 
-static int 
+static int
 cobd_preprw(int cmd, struct lustre_handle *conn,
             int objcount, struct obd_ioobj *obj,
             int niocount, struct niobuf_remote *nb,
-            struct niobuf_local *res, void **desc_private)
+            struct niobuf_local *res, void **desc_private, 
+            struct obd_trans_info *oti)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
@@ -207,19 +222,19 @@ cobd_preprw(int cmd, struct lustre_handle *conn,
 
         if ((cmd & OBD_BRW_WRITE) != 0)
                 return -EOPNOTSUPP;
-        
+
         cobd = &obd->u.cobd;
-        return (obd_preprw (cmd, &cobd->cobd_target, 
-                            objcount, obj, 
-                            niocount, nb, 
-                            res, desc_private));
+        return (obd_preprw (cmd, &cobd->cobd_target,
+                            objcount, obj,
+                            niocount, nb,
+                            res, desc_private, oti));
 }
 
-static int 
+static int
 cobd_commitrw(int cmd, struct lustre_handle *conn,
               int objcount, struct obd_ioobj *obj,
               int niocount, struct niobuf_local *local,
-              void *desc_private)
+              void *desc_private, struct obd_trans_info *oti)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
@@ -231,18 +246,19 @@ cobd_commitrw(int cmd, struct lustre_handle *conn,
 
         if ((cmd & OBD_BRW_WRITE) != 0)
                 return -EOPNOTSUPP;
-        
+
         cobd = &obd->u.cobd;
         return (obd_commitrw (cmd, &cobd->cobd_target,
                               objcount, obj,
                               niocount, local,
-                              desc_private));
+                              desc_private, oti));
 }
 
-static inline int 
+static inline int
 cobd_brw(int cmd, struct lustre_handle *conn,
          struct lov_stripe_md *lsm, obd_count oa_bufs,
-         struct brw_page *pga, struct obd_brw_set *set)
+         struct brw_page *pga, struct obd_brw_set *set, 
+         struct obd_trans_info *oti)
 {
         struct obd_device *obd = class_conn2obd(conn);
         struct cache_obd  *cobd;
@@ -254,13 +270,13 @@ cobd_brw(int cmd, struct lustre_handle *conn,
 
         if ((cmd & OBD_BRW_WRITE) != 0)
                 return -EOPNOTSUPP;
-        
+
         cobd = &obd->u.cobd;
-        return (obd_brw (cmd, &cobd->cobd_target, 
-                         lsm, oa_bufs, pga, set));
+        return (obd_brw (cmd, &cobd->cobd_target,
+                         lsm, oa_bufs, pga, set, oti));
 }
 
-static int 
+static int
 cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                void *karg, void *uarg)
 {
@@ -301,29 +317,26 @@ static struct obd_ops cobd_ops = {
         o_iocontrol:            cobd_iocontrol,
 };
 
-static int __init
-cobd_init (void)
+static int __init cobd_init(void)
 {
-       int   rc;
-       
-       printk (KERN_INFO "Lustre Caching OBD driver\n");
-       
-       rc = class_register_type (&cobd_ops, status_class_var,
-                                 OBD_CACHE_DEVICENAME);
-       return (rc);
+        struct lprocfs_static_vars lvars;
+        ENTRY;
+
+       printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n");
+
+        lprocfs_init_vars(&lvars);
+        RETURN(class_register_type(&cobd_ops, lvars.module_vars,
+                                   OBD_CACHE_DEVICENAME));
 }
 
-static void __exit
-cobd_exit (void)
+static void __exit cobd_exit(void)
 {
-       class_unregister_type (OBD_CACHE_DEVICENAME);
+       class_unregister_type(OBD_CACHE_DEVICENAME);
 }
 
-MODULE_AUTHOR("Cluster Filesystems Inc. <info@clusterfs.com>");
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
 MODULE_DESCRIPTION("Lustre Caching OBD driver");
 MODULE_LICENSE("GPL");
 
 module_init(cobd_init);
 module_exit(cobd_exit);
-
-       
index 5adcaf8..5170829 100644 (file)
 #include <linux/lustre_lite.h>
 #include <linux/lprocfs_status.h>
 
-/*
- * Common STATUS namespace
- */
-
-static int rd_uuid (char *page, char **start, off_t off, int count,
-                    int *eof, void *data)
-{
-        struct obd_device* dev = (struct obd_device*)data;
-
-        return (snprintf(page, count, "%s\n", dev->obd_uuid));
-}
-
-static int rd_target (char *page, char **start, off_t off, int count,
-                      int *eof, void *data)
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
+/* Common STATUS namespace */
+static int rd_target(char *page, char **start, off_t off, int count,
+                     int *eof, void *data)
 {
         struct obd_device    *dev = (struct obd_device*)data;
-        struct cache_obd     *cobd = &dev->u.cobd;
-       struct lustre_handle *conn = &cobd->cobd_target;
+       struct lustre_handle *conn = &dev->u.cobd.cobd_target;
        struct obd_export    *exp;
        int    rc;
 
@@ -49,8 +41,8 @@ static int rd_target (char *page, char **start, off_t off, int count,
                rc = snprintf (page, count, "not set up\n");
        else {
                exp = class_conn2export (conn);
-               LASSERT (exp != NULL);
-               rc = snprintf(page, count, "%s\n", exp->exp_obd->obd_uuid);
+               LASSERT(exp != NULL);
+               rc = snprintf(page, count, "%s\n", exp->exp_obd->obd_uuid.uuid);
        }
        return (rc);
 }
@@ -59,8 +51,7 @@ static int rd_cache(char *page, char **start, off_t off, int count,
                     int *eof, void *data)
 {
         struct obd_device    *dev = (struct obd_device*)data;
-       struct cache_obd     *cobd = &dev->u.cobd;
-       struct lustre_handle *conn = &cobd->cobd_cache;
+       struct lustre_handle *conn = &dev->u.cobd.cobd_cache;
        struct obd_export    *exp;
        int    rc;
 
@@ -69,27 +60,22 @@ static int rd_cache(char *page, char **start, off_t off, int count,
        else {
                exp = class_conn2export (conn);
                LASSERT (exp != NULL);
-               rc = snprintf(page, count, "%s\n", exp->exp_obd->obd_uuid);
+               rc = snprintf(page, count, "%s\n", exp->exp_obd->obd_uuid.uuid);
        }
        return (rc);
 }
 
-struct lprocfs_vars status_var_nm_1[] = {
-        {"status/uuid", rd_uuid, 0, 0},
-        {"status/target_uuid", rd_target, 0, 0},
-        {"status/cache_uuid", rd_cache, 0, 0},
-        {0}
+struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",        lprocfs_rd_uuid,    0, 0 },
+        { "target_uuid", rd_target,          0, 0 },
+        { "cache_uuid",  rd_cache,           0, 0 },
+        { 0 }
 };
 
-int rd_numrefs(char *page, char **start, off_t off, int count,
-               int *eof, void *data)
-{
-        struct obd_type* class = (struct obd_type*)data;
-
-        return (snprintf(page, count, "%d\n", class->typ_refcnt));
-}
-
-struct lprocfs_vars status_class_var[] = {
-        {"status/num_refs", rd_numrefs, 0, 0},
-        {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",    lprocfs_rd_numrefs, 0, 0 },
+        { 0 }
 };
+#endif /* LPROCFS */
+
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
index 7f98129..a205d10 100644 (file)
@@ -3,11 +3,13 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-EXTRA_DIST = lustre2ldif.xsl lustre.dtd lustre.schema slapd-lustre.conf
+EXTRA_DIST = lustre.dtd lustre.schema slapd-lustre.conf lustre2ldif.xsl top.ldif
 ldapconfdir = $(sysconfdir)/openldap
 ldapschemadir = $(sysconfdir)/openldap/schema
 ldapconf_SCRIPTS = slapd-lustre.conf
 ldapschema_SCRIPTS = lustre.schema
+pkglibdir = '${exec_prefix}/usr/lib/$(PACKAGE)'
+pkglib_DATA = top.ldif lustre2ldif.xsl
 
 include $(top_srcdir)/Rules
 
index f5d8098..c7ea957 100644 (file)
@@ -184,6 +184,10 @@ networkRef: <value-of select="@uuidref"/>
 mdsRef: <value-of select="@uuidref"/>
 </template>
 
+<template match="mdsdev_ref">
+mdsdevRef: <value-of select="@uuidref"/>
+</template>
+
 <template match="mountpoint_ref">
 mountpointRef: <value-of select="@uuidref"/>
 </template>
index 6ef9286..d51fb40 100644 (file)
@@ -1,7 +1,7 @@
 AC_INIT
 AC_CANONICAL_SYSTEM
 
-# Copyright (C) 2001  Cluster File Systems, Inc.
+# Copyright (C) 2001-2003 Cluster File Systems, Inc.
 #
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
@@ -9,7 +9,29 @@ AC_CANONICAL_SYSTEM
 # Automake variables.  Steal the version number from lustre.spec.in.
 AM_INIT_AUTOMAKE(lustre, builtin([esyscmd], [sed -ne '/^%define version /{ s/.*version //; p; q; }' scripts/lustre.spec.in]))
 #AM_MAINTAINER_MODE
+
 AC_PROG_CC
+AC_MSG_CHECKING(for buggy compiler)
+CC_VERSION=`$CC -v 2>&1 | grep "^gcc version"`
+bad_cc() {
+       echo
+       echo "   '$CC_VERSION'"
+       echo "  has been known to generate bad code, "
+       echo "  please get an updated compiler."
+       AC_MSG_ERROR(sorry)
+}
+case "$CC_VERSION" in 
+       # ost_pack_niobuf putting 64bit NTOH temporaries on the stack
+       # without "sub    $0xc,%esp" to protect the stack from being
+       # stomped on by interrupts (bug 606)
+       "gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-98)")
+               bad_cc
+               ;;
+       *)
+               AC_MSG_RESULT(no known problems)
+               ;;
+esac
+
 AC_PROG_RANLIB
 
 # 
index 5ad1642..3fc2b66 100644 (file)
@@ -18,6 +18,7 @@ EXTN_FIXES = patch-2.4.18-chaos22
 EXTNP = htree-ext3-2.4.18.diff linux-2.4.18ea-0.8.26.diff
 EXTNP+= ext3-2.4.18-ino_sb_macro.diff extN-misc-fixup.diff extN-noread.diff
 EXTNP+= extN-wantedi.diff
+#EXTNP+= extN-iget-debug.diff
 EXTNC = balloc.c bitmap.c dir.c file.c fsync.c ialloc.c inode.c ioctl.c
 EXTNC+= namei.c super.c symlink.c
 EXTNI = extN_fs.h extN_fs_i.h extN_fs_sb.h extN_jbd.h quotaops.h
@@ -107,18 +108,21 @@ patch-stamp: sed-stamp $(EXTNP)
        list='$(EXTN_EXTRA)'; for f in $$list; do $(RM) $(top_builddir)/$$f; done
        if [ -f $(srcdir)/extN.patch-$(RELEASE) ]; then                       \
          echo "applying patch $(srcdir)/extN.patch-$(RELEASE)";              \
-         (cd $(top_builddir) && patch -p0) < $(srcdir)/extN.patch-$(RELEASE);  \
+         (cd $(top_builddir) && patch -p0) < $(srcdir)/extN.patch-$(RELEASE);\
        else                                                                  \
-         echo "If first patch fails, read NOTE in extN/Makefile.am";         \
          list='$(EXTNP)'; \
-         sed '/i_version/q' $(extN_orig)/namei.c | tail -2 |          \
-           grep extN_mark_inode_dirty >/dev/null && list="$(EXTN_FIXES) $$list"; \
+         grep -q "err = extN_mark_inode_dirty" $(extN_orig)/namei.c ||       \
+           list="ext3-use-after-free.diff $$list";                           \
+         sed '/i_version/q' $(extN_orig)/namei.c | tail -2 |                 \
+           grep -q extN_mark_inode_dirty && list="$(EXTN_FIXES) $$list";     \
+         grep -q "if (do_sync_supers)" $(extN_orig)/super.c &&               \
+           list="ext3-unmount_sync.diff $$list";                             \
          for p in $$list; do                                                 \
            echo "applying patch $$p";                                        \
            sed $(SUB) $(srcdir)/$$p |                                        \
-             (cd $(top_builddir) && patch -p1) || exit $$?;                    \
+             (cd $(top_builddir) && patch -p1) || exit $$?;                  \
          done;                                                               \
-         echo "It is OK if the next patch says it is already applied";       \
+         echo "It is OK if the next patch says it is skipping this patch";   \
          echo "applying patch $(srcdir)/extN-2.4.18-exports.diff";           \
          (cd $(top_builddir) &&                                              \
            patch -N -p1) < $(srcdir)/extN-2.4.18-exports.diff;               \
diff --git a/lustre/extN/ext3-unmount_sync.diff b/lustre/extN/ext3-unmount_sync.diff
new file mode 100644 (file)
index 0000000..1f9b796
--- /dev/null
@@ -0,0 +1,59 @@
+From adilger@clusterfs.com Mon Dec  2 10:26:44 2002
+Date: Mon, 2 Dec 2002 10:26:44 -0700
+From: Andreas Dilger <adilger@clusterfs.com>
+To: Lustre LLNL Mailing list <lc-lustre@llnl.gov>,
+       Lustre Development Mailing List <lustre-devel@lists.sourceforge.net>
+Subject: Re: data corrupting bug in 2.4.20 ext3, data=journal
+Message-ID: <20021202102644.H1422@schatzie.adilger.int>
+Mail-Followup-To: Lustre LLNL Mailing list <lc-lustre@llnl.gov>,
+       Lustre Development Mailing List <lustre-devel@lists.sourceforge.net>
+Mime-Version: 1.0
+Content-Type: text/plain; charset=us-ascii
+Content-Disposition: inline
+User-Agent: Mutt/1.2.5.1i
+X-GPG-Key: 1024D/0D35BED6
+X-GPG-Fingerprint: 7A37 5D79 BF1B CECA D44F  8A29 A488 39F5 0D35 BED6
+Status: RO
+Content-Length: 1160
+Lines: 39
+
+Here is the new-improved fix for the ext3 discarding data at umount bug
+discovered late last week.  To be used instead of the previous ext3 fix.
+
+Sadly, this is completely unrelated to the problems Mike is having with
+ext3 under UML, since it is an unmount-time problem.
+
+----- Forwarded message from "Stephen C. Tweedie" <sct@redhat.com> -----
+The attached patch seems to fix things for me.
+
+Cheers,
+ Stephen
+
+
+--- linux-2.4-ext3merge/fs/ext3/super.c.=K0027=.orig   2002-12-02 15:35:13.000000000 +0000
++++ linux-2.4-ext3merge/fs/ext3/super.c        2002-12-02 15:35:14.000000000 +0000
+@@ -1640,7 +1640,12 @@
+       sb->s_dirt = 0;
+       target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
+-      if (do_sync_supers) {
++      /*
++       * Tricky --- if we are unmounting, the write really does need
++       * to be synchronous.  We can detect that by looking for NULL in
++       * sb->s_root.
++       */
++      if (do_sync_supers || !sb->s_root) {
+               unlock_super(sb);
+               log_wait_commit(EXT3_SB(sb)->s_journal, target);
+               lock_super(sb);
+
+
+----- End forwarded message -----
+
+Cheers, Andreas
+--
+Andreas Dilger
+http://sourceforge.net/projects/ext2resize/
+http://www-mddsp.enel.ucalgary.ca/People/adilger/
+
+
diff --git a/lustre/extN/ext3-use-after-free.diff b/lustre/extN/ext3-use-after-free.diff
new file mode 100644 (file)
index 0000000..8cd673f
--- /dev/null
@@ -0,0 +1,65 @@
+
+
+If ext3_add_nondir() fails it will do an iput() of the inode.  But we
+continue to run ext3_mark_inode_dirty() against the potentially-freed
+inode.  This oopses when slab poisoning is enabled.
+
+Fix it so that we only run ext3_mark_inode_dirty() if the inode was
+successfully instantiated.
+
+This bug was added in 2.4.20-pre9.
+
+
+ fs/ext3/namei.c |   11 +++++------
+ 1 files changed, 5 insertions(+), 6 deletions(-)
+
+--- 24/fs/ext3/namei.c~ext3-use-after-free     Sun Dec 15 11:27:50 2002
++++ 24-akpm/fs/ext3/namei.c    Sun Dec 15 11:27:50 2002
+@@ -429,8 +429,11 @@ static int ext3_add_nondir(handle_t *han
+ {
+       int err = ext3_add_entry(handle, dentry, inode);
+       if (!err) {
+-              d_instantiate(dentry, inode);
+-              return 0;
++              err = ext3_mark_inode_dirty(handle, inode);
++              if (err == 0) {
++                      d_instantiate(dentry, inode);
++                      return 0;
++              }
+       }
+       ext3_dec_count(handle, inode);
+       iput(inode);
+@@ -465,7 +468,6 @@ static int ext3_create (struct inode * d
+               inode->i_fop = &ext3_file_operations;
+               inode->i_mapping->a_ops = &ext3_aops;
+               err = ext3_add_nondir(handle, dentry, inode);
+-              ext3_mark_inode_dirty(handle, inode);
+       }
+       ext3_journal_stop(handle, dir);
+       return err;
+@@ -490,7 +492,6 @@ static int ext3_mknod (struct inode * di
+       if (!IS_ERR(inode)) {
+               init_special_inode(inode, mode, rdev);
+               err = ext3_add_nondir(handle, dentry, inode);
+-              ext3_mark_inode_dirty(handle, inode);
+       }
+       ext3_journal_stop(handle, dir);
+       return err;
+@@ -934,7 +935,6 @@ static int ext3_symlink (struct inode * 
+       }
+       inode->u.ext3_i.i_disksize = inode->i_size;
+       err = ext3_add_nondir(handle, dentry, inode);
+-      ext3_mark_inode_dirty(handle, inode);
+ out_stop:
+       ext3_journal_stop(handle, dir);
+       return err;
+@@ -971,7 +971,6 @@ static int ext3_link (struct dentry * ol
+       atomic_inc(&inode->i_count);
+       err = ext3_add_nondir(handle, dentry, inode);
+-      ext3_mark_inode_dirty(handle, inode);
+       ext3_journal_stop(handle, dir);
+       return err;
+ }
+
+_
diff --git a/lustre/extN/extN-iget-debug.diff b/lustre/extN/extN-iget-debug.diff
new file mode 100644 (file)
index 0000000..9714e35
--- /dev/null
@@ -0,0 +1,48 @@
+--- linux/fs/ext3/namei.c.orig Thu Jan 30 01:15:13 2003
++++ linux/fs/ext3/namei.c      Sat Feb  1 00:33:46 2003
+@@ -710,6 +710,24 @@
+       return ret;
+ }
++static int extN_find_inode(struct inode *inode, unsigned long ino,
++                         void *opaque)
++{
++      const char *name = NULL;
++      int len = 0;
++
++      if (opaque) {
++              struct dentry *dentry = opaque;
++              name = dentry->d_name.name;
++              len = dentry->d_name.len;
++      }
++      printk(KERN_INFO "finding inode %s:%lu (%p) count %d (%p = %*s)\n",
++             kdevname(inode->i_dev), ino, inode, atomic_read(&inode->i_count),
++             opaque, len, name ? name : "");
++
++      return 1;
++}
++
+ static struct dentry *extN_lookup(struct inode * dir, struct dentry *dentry)
+ {
+       struct inode * inode;
+@@ -724,7 +742,7 @@
+       if (bh) {
+               unsigned long ino = le32_to_cpu(de->inode);
+               brelse (bh);
+-              inode = iget(dir->i_sb, ino);
++              inode = iget4(dir->i_sb, ino, extN_find_inode, dentry);
+               if (!inode)
+                       return ERR_PTR(-EACCES);
+--- linux/fs/ext3/inode.c.orig Thu Jan 30 01:15:13 2003
++++ linux/fs/ext3/inode.c      Sat Feb  1 00:34:45 2003
+@@ -166,6 +166,9 @@
+  */
+ void extN_put_inode (struct inode * inode)
+ {
++      printk(KERN_INFO "putting inode %s:%lu (%p) count %d\n",
++             kdevname(inode->i_dev), inode->i_ino, inode,
++             atomic_read(&inode->i_count));
+       extN_discard_prealloc (inode);
+ }
index 29b36fb..db0bc0f 100644 (file)
                goto out_journal;
        }
        EXTN_SB(sb)->journal_bdev = bdev;
+@@ -1560,6 +1560,7 @@
+       unlock_kernel();
+       return ret;
+ }
++EXPORT_SYMBOL(extN_force_commit); /* here to avoid potential patch collisions */
+ /*
+  * Ext3 always journals updates to the superblock itself, so we don't
index 3be559f..a55aec0 100644 (file)
@@ -74,7 +74,7 @@
 +              if (err) goto fail;
 +
 +              if (extN_set_bit(j, bh->b_data)) {
-+                      printk(KERN_ERR "goal inode %lu unavailable", goal);
++                      printk(KERN_ERR "goal inode %lu unavailable\n", goal);
 +                      /* Oh well, we tried. */
 +                      goto repeat;
 +              }
index 9eba30c..4251251 100644 (file)
  static struct buffer_head * ext3_find_entry (struct dentry *dentry,
                                        struct ext3_dir_entry_2 ** res_dir)
  {
-@@ -119,10 +564,76 @@
+@@ -119,10 +564,70 @@
        int num = 0;
        int nblocks, i, err;
        struct inode *dir = dentry->d_parent->d_inode;
-+      int namelen;
-+      const u8 *name;
-+      unsigned blocksize;
 +      ext3_dirent *de, *top;
  
        *res_dir = NULL;
        sb = dir->i_sb;
-+      blocksize = sb->s_blocksize;
-+      namelen = dentry->d_name.len;
-+      name = dentry->d_name.name;
-+      if (namelen > EXT3_NAME_LEN)
++      if (dentry->d_name.len > EXT3_NAME_LEN)
 +              return NULL;
 +      if (ext3_dx && is_dx(dir)) {
-+              u32 hash = dx_hash (name, namelen);
++              u32 hash = dx_hash(dentry->d_name.name, dentry->d_name.len);
 +              struct dx_frame frames[2], *frame;
 +              if (!(frame = dx_probe (dir, hash, frames)))
 +                      return NULL;
 +              if (!(bh = ext3_bread (NULL,dir, block, 0, &err)))
 +                      goto dxfail;
 +              de = (ext3_dirent *) bh->b_data;
-+              top = (ext3_dirent *) ((char *) de + blocksize -
++              top = (ext3_dirent *) ((char *) de + sb->s_blocksize -
 +                              EXT3_DIR_REC_LEN(0));
 +              for (; de < top; de = ext3_next_entry(de))
-+                      if (ext3_match (namelen, name, de)) {
++                      if (ext3_match(dentry->d_name.len, dentry->d_name.name, de)) {
 +                              if (!ext3_check_dir_entry("ext3_find_entry",
 +                                        dir, de, bh,
 +                                        (block<<EXT3_BLOCK_SIZE_BITS(sb))
        nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
        start = dir->u.ext3_i.i_dir_start_lookup;
        if (start >= nblocks)
-@@ -237,6 +748,92 @@
+@@ -237,6 +748,90 @@
                de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
  }
  
 +                      struct buffer_head **bh,struct dx_frame *frame,
 +                      u32 hash, int *error)
 +{
-+      unsigned blocksize = dir->i_sb->s_blocksize;
-+      unsigned count, continued;
++      unsigned count;
 +      struct buffer_head *bh2;
 +      u32 newblock;
-+      unsigned MAX_DX_MAP = PAGE_CACHE_SIZE/EXT3_DIR_REC_LEN(1) + 1;
 +      u32 hash2;
 +      struct dx_map_entry *map;
 +      char *data1 = (*bh)->b_data, *data2, *data3;
 +
 +      data2 = bh2->b_data;
 +
-+      map = kmalloc(sizeof(*map) * MAX_DX_MAP, GFP_KERNEL);
++      map = kmalloc(sizeof(*map) * PAGE_CACHE_SIZE/EXT3_DIR_REC_LEN(1) + 1,
++                    GFP_KERNEL);
 +      if (!map)
 +              panic("no memory for do_split\n");
-+      count = dx_make_map ((ext3_dirent *) data1, blocksize, map);
++      count = dx_make_map((ext3_dirent *)data1, dir->i_sb->s_blocksize, map);
 +      split = count/2; // need to adjust to actual middle
 +      dx_sort_map (map, count);
 +      hash2 = map[split].hash;
-+      continued = hash2 == map[split - 1].hash;
 +      dxtrace(printk("Split block %i at %x, %i/%i\n",
 +              dx_get_block(frame->at), hash2, split, count-split));
 +
 +      de = dx_copy_dirents (data1, data3, map, split);
 +      memcpy(data1, data3, (char *) de + de->rec_len - data3);
 +      de = (ext3_dirent *) ((char *) de - data3 + data1); // relocate de
-+      de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
-+      de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
-+      dxtrace(dx_show_leaf ((ext3_dirent *) data1, blocksize, 1));
-+      dxtrace(dx_show_leaf ((ext3_dirent *) data2, blocksize, 1));
++      de->rec_len = cpu_to_le16(data1 + dir->i_sb->s_blocksize - (char *)de);
++      de2->rec_len = cpu_to_le16(data2 + dir->i_sb->s_blocksize-(char *)de2);
++      dxtrace(dx_show_leaf((ext3_dirent *)data1, dir->i_sb->s_blocksize, 1));
++      dxtrace(dx_show_leaf((ext3_dirent *)data2, dir->i_sb->s_blocksize, 1));
 +
 +      /* Which block gets the new entry? */
 +      if (hash >= hash2)
 +              swap(*bh, bh2);
 +              de = de2;
 +      }
-+      dx_insert_block (frame, hash2 + continued, newblock);
++      dx_insert_block(frame, hash2 + (hash2 == map[split-1].hash), newblock);
 +      ext3_journal_dirty_metadata (handle, bh2);
 +      brelse (bh2);
 +      ext3_journal_dirty_metadata (handle, frame->bh);
  /*
   *    ext3_add_entry()
   *
-@@ -251,6 +844,7 @@
- /*
-  * AKPM: the journalling code here looks wrong on the error paths
-  */
-+
- static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
+@@ -255,118 +849,278 @@
        struct inode *inode)
  {
-@@ -258,117 +852,281 @@
-       const char *name = dentry->d_name.name;
-       int namelen = dentry->d_name.len;
+       struct inode *dir = dentry->d_parent->d_inode;
+-      const char *name = dentry->d_name.name;
+-      int namelen = dentry->d_name.len;
        unsigned long offset;
 -      unsigned short rec_len;
        struct buffer_head * bh;
 +      ext3_dirent *de;
 +      struct super_block * sb = dir->i_sb;
        int     retval;
-+      unsigned short reclen = EXT3_DIR_REC_LEN(namelen);
++      unsigned short reclen = EXT3_DIR_REC_LEN(dentry->d_name.len);
  
 -      sb = dir->i_sb;
-+      unsigned blocksize = sb->s_blocksize;
 +      unsigned nlen, rlen;
 +      u32 block, blocks;
 +      char *top;
  
-       if (!namelen)
+-      if (!namelen)
++      if (!dentry->d_name.len)
                return -EINVAL;
 -      bh = ext3_bread (handle, dir, 0, 0, &retval);
 -      if (!bh)
 +              u32 hash;
 +              char *data1;
 +
-+              hash = dx_hash(name, namelen);
++              hash = dx_hash(dentry->d_name.name, dentry->d_name.len);
 +              /* FIXME: do something if dx_probe() fails here */
 +              frame = dx_probe(dir, hash, frames);
 +              entries = frame->entries;
 +
 +              data1 = bh->b_data;
 +              de = (ext3_dirent *) data1;
-+              top = data1 + (0? 200: blocksize);
++              top = data1 + (0? 200: sb->s_blocksize);
 +              while ((char *) de < top)
 +              {
 +                      /* FIXME: check EEXIST and dir */
 +                              goto dxfail2;
 +                      node2 = (struct dx_node *)(bh2->b_data);
 +                      entries2 = node2->entries;
-+                      node2->fake.rec_len = cpu_to_le16(blocksize);
++                      node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
 +                      node2->fake.inode = 0;
 +                      BUFFER_TRACE(frame->bh, "get_write_access");
 +                      ext3_journal_get_write_access(handle, frame->bh);
 +              if(!bh)
 +                      return retval;
 +              de = (ext3_dirent *)bh->b_data;
-+              top = bh->b_data + blocksize - reclen;
++              top = bh->b_data + sb->s_blocksize - reclen;
 +              while ((char *) de <= top) {
 +                      if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
 +                                                bh, offset)) {
 +                              brelse (bh);
 +                              return -EIO;
 +                      }
-+                      if (ext3_match (namelen, name, de)) {
++                      if (ext3_match(dentry->d_name.len,dentry->d_name.name,de)) {
                                brelse (bh);
                                return -EEXIST;
 -              }
 -                      ext3_journal_dirty_metadata(handle, bh);
 +                      nlen = EXT3_DIR_REC_LEN(de->name_len);
 +                      rlen = le16_to_cpu(de->rec_len);
-+                      if ((de->inode? rlen - nlen: rlen) >= reclen)
++                      if ((de->inode ? rlen - nlen: rlen) >= reclen)
 +                              goto add;
 +                      de = (ext3_dirent *)((char *)de + rlen);
 +                      offset += rlen;
 +              return retval;
 +      de = (ext3_dirent *) bh->b_data;
 +      de->inode = 0;
-+      de->rec_len = cpu_to_le16(rlen = blocksize);
++      de->rec_len = cpu_to_le16(rlen = sb->s_blocksize);
 +      nlen = 0;
 +      goto add;
 +
 +              ext3_set_de_type(dir->i_sb, de, inode->i_mode);
 +      } else
 +              de->inode = 0;
-+      de->name_len = namelen;
-+      memcpy (de->name, name, namelen);
++      de->name_len = dentry->d_name.len;
++      memcpy (de->name, dentry->d_name.name, dentry->d_name.len);
 +      /*
 +       * XXX shouldn't update any times until successful
 +       * completion of syscall, but too many callers depend
 +
 +              /* The 0th block becomes the root, move the dirents out */
 +              de = (ext3_dirent *) &root->info;
-+              len = ((char *) root) + blocksize - (char *) de;
++              len = ((char *) root) + sb->s_blocksize - (char *) de;
 +              memcpy (data1, de, len);
 +              de = (ext3_dirent *) data1;
 +              top = data1 + len;
 +              while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top)
 +                      de = de2;
-+              de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
++              de->rec_len = cpu_to_le16(data1 + sb->s_blocksize - (char *)de);
 +              /* Initialize the root; the dot dirents already exist */
 +              de = (ext3_dirent *) (&root->dotdot);
-+              de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
++              de->rec_len = cpu_to_le16(sb->s_blocksize-EXT3_DIR_REC_LEN(2));
 +              memset (&root->info, 0, sizeof(root->info));
 +              root->info.info_length = sizeof(root->info);
 +              entries = root->entries;
 +              dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
 +
 +              /* Initialize as for dx_probe */
-+              hash = dx_hash (name, namelen);
++              hash = dx_hash (dentry->d_name.name, dentry->d_name.len);
 +              frame = frames;
 +              frame->entries = entries;
 +              frame->at = entries;
 +      return -ENOENT;
  }
  
-+
  /*
-  * ext3_delete_entry deletes a directory entry by merging it with the
-  * previous entry
 @@ -451,7 +1212,8 @@
        struct inode * inode;
        int err;
index 15df90c..4c8fb86 100644 (file)
@@ -133,24 +133,14 @@ diff -Nur linux-2.4.18/fs/ext3/namei.c linux-2.4.18ea/fs/ext3/namei.c
  #include <linux/fcntl.h>
  #include <linux/stat.h>
  #include <linux/string.h>
-@@ -465,6 +466,8 @@
-               inode->i_fop = &extN_file_operations;
-               inode->i_mapping->a_ops = &ext3_aops;
-               err = ext3_add_nondir(handle, dentry, inode);
-+              if (err)
-+                      ext3_xattr_drop_inode(handle, inode);
-               ext3_mark_inode_dirty(handle, inode);
-       }
-       ext3_journal_stop(handle, dir);
-@@ -490,6 +493,8 @@
-       if (!IS_ERR(inode)) {
-               init_special_inode(inode, mode, rdev);
-               err = ext3_add_nondir(handle, dentry, inode);
-+              if (err)
-+                      ext3_xattr_drop_inode(handle, inode);
-               ext3_mark_inode_dirty(handle, inode);
+@@ -435,6 +435,7 @@ static int ext3_add_nondir(handle_t *han
+                       return 0;
+               }
        }
-       ext3_journal_stop(handle, dir);
++      ext3_xattr_drop_inode(handle, inode);
+       ext3_dec_count(handle, inode);
+       iput(inode);
+       return err;
 @@ -514,7 +519,7 @@
        if (IS_SYNC(dir))
                handle->h_sync = 1;
@@ -179,14 +169,6 @@ diff -Nur linux-2.4.18/fs/ext3/namei.c linux-2.4.18ea/fs/ext3/namei.c
        ext3_mark_inode_dirty(handle, inode);
        err = ext3_add_entry (handle, dentry, inode);
        if (err)
-@@ -565,6 +566,7 @@
-       return err;
- out_no_entry:
-+      ext3_xattr_drop_inode(handle, inode);
-       inode->i_nlink = 0;
-       ext3_mark_inode_dirty(handle, inode);
-       iput (inode);
 @@ -917,5 +919,5 @@
                goto out_stop;
  
index e769f43..14a713c 100644 (file)
 #ifndef _LPROCFS_SNMP_H
 #define _LPROCFS_SNMP_H
 
-
-#ifndef LPROC_SNMP
-#define LPROC_SNMP
-#endif
-
+#include <linux/autoconf.h>
 #include <linux/proc_fs.h>
 
-typedef enum {
-        E_LPROC_OK = 0
-} lproc_error_t;
-
-struct lprocfs_vars{
+#ifndef LPROCFS
+#ifdef  CONFIG_PROC_FS  /* Ensure that /proc is configured */
+#define LPROCFS
+#endif
+#endif
 
-        char* name;
-        read_proc_t* read_fptr;
-        write_proc_t* write_fptr;
-        void* data;
+struct lprocfs_vars {
+        char *name;
+        read_proc_t *read_fptr;
+        write_proc_t *write_fptr;
+        void *data;
 };
 
-#ifdef LPROC_SNMP
-
-struct proc_dir_entry* lprocfs_mkdir(const char *dname,
-                                     struct proc_dir_entry *parent);
-struct proc_dir_entry* lprocfs_srch(struct proc_dir_entry *head,
-                                    const char *name);
-void lprocfs_remove_all(struct proc_dir_entry *root);
-struct proc_dir_entry* lprocfs_new_dir(struct proc_dir_entry *root,
-                                       const char *string,
-                                       const char *tok);
-int lprocfs_new_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
-                     const char *tok, void *data);
-
-int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *var,
-                     void *data);
-int lprocfs_reg_obd(struct obd_device *device, struct lprocfs_vars *list,
-                    void *data);
-int lprocfs_dereg_obd(struct obd_device *device);
-struct proc_dir_entry* lprocfs_reg_mnt(char *mnt_name);
-int lprocfs_dereg_mnt(struct proc_dir_entry *root);
-
-int lprocfs_reg_class(struct obd_type *type, struct lprocfs_vars *list,
-                      void *data);
-int lprocfs_dereg_class(struct obd_type *class);
-int lprocfs_reg_main(void);
-int lprocfs_dereg_main(void);
-int lprocfs_ll_rd(char *page, char **start, off_t off, int count, int *eof,
-                  void *data);
-#else
-
-
-static inline int lprocfs_add_vars(struct proc_dir_entry *root,
-                                  struct lprocfs_vars *var, void *data)
-{
-        return 0;
-}
-
-static inline int lprocfs_reg_obd(struct obd_device* device,
-                                 struct lprocfs_vars* list, void* data)
-{
-        return 0;
-}
-
-static inline int lprocfs_dereg_obd(struct obd_device* device)
-{
-        return 0;
-}
-
-static inline struct proc_dir_entry* lprocfs_reg_mnt(char *name)
-{
-        return NULL;
-}
-
-static inline int lprocfs_dereg_mnt(struct proc_dir_entry* root)
-{
-        return 0;
-}
-
-static inline int lprocfs_reg_class(struct obd_type* type,
-                                    struct lprocfs_vars* list, void* data)
-{
-        return 0;
-}
-
-static inline int lprocfs_dereg_class(struct obd_type* class)
-{
-        return 0;
-}
+struct lprocfs_static_vars {
+        struct lprocfs_vars *module_vars;
+        struct lprocfs_vars *obd_vars;
+};
 
-static inline int lprocfs_reg_main(void)
-{
-        return 0;
+/* class_obd.c */
+extern struct proc_dir_entry *proc_lustre_root;
+
+extern void lprocfs_init_vars(struct lprocfs_static_vars *var);
+extern void lprocfs_init_multi_vars(unsigned int idx, 
+                                    struct lprocfs_static_vars *var);
+
+#define LPROCFS_INIT_MULTI_VARS(array, size)                              \
+void lprocfs_init_multi_vars(unsigned int idx,                            \
+                             struct lprocfs_static_vars *x)               \
+{                                                                         \
+   struct lprocfs_static_vars *glob = (struct lprocfs_static_vars*)array; \
+   LASSERT(glob != 0);                                                    \
+   LASSERT(idx < (unsigned int)(size));                                   \
+   x->module_vars = glob[idx].module_vars;                                \
+   x->obd_vars = glob[idx].obd_vars;                                      \
+}                                                                         \
+
+#define LPROCFS_INIT_VARS(vclass, vinstance)           \
+void lprocfs_init_vars(struct lprocfs_static_vars *x)  \
+{                                                      \
+        x->module_vars = vclass;                       \
+        x->obd_vars = vinstance;                       \
+}                                                      \
+
+#ifdef LPROCFS
+/* lprocfs_status.c */
+extern int lprocfs_add_vars(struct proc_dir_entry *root,
+                            struct lprocfs_vars *var,
+                            void *data);
+
+extern struct proc_dir_entry *lprocfs_register(const char *name,
+                                               struct proc_dir_entry *parent,
+                                               struct lprocfs_vars *list,
+                                               void *data);
+
+extern void lprocfs_remove(struct proc_dir_entry *root);
+
+struct obd_device;
+extern int lprocfs_obd_attach(struct obd_device *dev, struct lprocfs_vars *list);
+extern int lprocfs_obd_detach(struct obd_device *dev);
+
+/* Generic callbacks */
+
+extern int lprocfs_rd_u64(char *page, char **start, off_t off,
+                          int count, int *eof, void *data);
+extern int lprocfs_rd_uuid(char *page, char **start, off_t off,
+                           int count, int *eof, void *data);
+extern int lprocfs_rd_name(char *page, char **start, off_t off,
+                           int count, int *eof, void *data);
+extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data);
+extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
+                                int count, int *eof, void *data);
+extern int lprocfs_rd_numrefs(char *page, char **start, off_t off,
+                              int count, int *eof, void *data);
+
+/* Statfs helpers */
+struct statfs;
+extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
+                              int count, int *eof, struct statfs *sfs);
+extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
+                                  int count, int *eof, struct statfs *sfs);
+extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
+                                 int count, int *eof, struct statfs *sfs);
+extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
+                                 int count, int *eof, struct statfs *sfs);
+extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
+                                int count, int *eof, struct statfs *sfs);
+extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
+                                 int count, int *eof, struct statfs *sfs);
+
+#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct)      \
+int fct_name(char *page, char **start, off_t off,                \
+             int count, int *eof, void *data)                    \
+{                                                                \
+        struct statfs sfs;                                       \
+        int rc = get_statfs_fct((struct obd_device*)data, &sfs); \
+        return (rc==0                                            \
+                ? lprocfs_##fct_name (page, start, off, count, eof, &sfs) \
+                : rc);                                       \
 }
 
-static inline int lprocfs_dereg_main(void)
-{
-        return 0;
-}
+#else
 
-static inline int lprocfs_ll_rd(char *page, char **start, off_t off,
-                                int count, int *eof, void *data)
-{
-        return 0;
-}
-#endif /* LPROC_SNMP */
+static inline struct proc_dir_entry *
+lprocfs_register(const char *name, struct proc_dir_entry *parent,
+                 struct lprocfs_vars *list, void *data) { return NULL; }
+static inline int lprocfs_add_vars(struct proc_dir_entry *root,
+                                   struct lprocfs_vars *var,
+                                   void *data) { return 0; }
+static inline void lprocfs_remove(struct proc_dir_entry *root) {};
+struct obd_device;
+static inline int lprocfs_obd_attach(struct obd_device *dev,
+                                     struct lprocfs_vars *list) { return 0; }
+static inline int lprocfs_obd_detach(struct obd_device *dev)  { return 0; }
+static inline int lprocfs_rd_u64(char *page, char **start, off_t off,
+                                 int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_uuid(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_name(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
+                                         int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
+                                       int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off,
+                                     int count, int *eof, void *data) { return 0; }
+
+/* Statfs helpers */
+struct statfs;
+static inline
+int lprocfs_rd_blksize(char *page, char **start, off_t off,
+                       int count, int *eof, struct statfs *sfs) { return 0; }
+static inline
+int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
+                           int count, int *eof, struct statfs *sfs) { return 0; }
+static inline
+int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
+                          int count, int *eof, struct statfs *sfs) { return 0; }
+static inline
+int lprocfs_rd_filestotal(char *page, char **start, off_t off,
+                          int count, int *eof, struct statfs *sfs) { return 0; }
+static inline
+int lprocfs_rd_filesfree(char *page, char **start, off_t off,
+                         int count, int *eof, struct statfs *sfs)  { return 0; }
+static inline
+int lprocfs_rd_filegroups(char *page, char **start, off_t off,
+                          int count, int *eof, struct statfs *sfs) { return 0; }
+
+#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct)  \
+int fct_name(char *page, char **start, off_t off,            \
+             int count, int *eof, void *data) { *eof = 1; return 0; }
+
+#endif /* LPROCFS */
 
 #endif /* LPROCFS_SNMP_H */
index e552dfd..8c05041 100644 (file)
@@ -24,6 +24,7 @@ typedef enum {
 
         ELDLM_LOCK_CHANGED = 300,
         ELDLM_LOCK_ABORTED = 301,
+        ELDLM_LOCK_REPLACED = 302,
 
         ELDLM_NAMESPACE_EXISTS = 400,
         ELDLM_BAD_NAMESPACE    = 401
@@ -55,6 +56,7 @@ typedef enum {
 #define LDLM_FL_NO_CALLBACK    (1 << 11) /* see ldlm_cli_cancel_unused */
 #define LDLM_FL_HAS_INTENT     (1 << 12) /* lock request has intent */
 #define LDLM_FL_CANCELING      (1 << 13) /* lock cancel has already been sent */
+#define LDLM_FL_LOCAL          (1 << 14) // a local lock (ie, no srv/cli split)
 
 /* The blocking callback is overloaded to perform two functions.  These flags
  * indicate which operation should be performed. */
@@ -140,9 +142,10 @@ struct ldlm_lock;
 
 typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock,
                                       struct ldlm_lock_desc *new, void *data,
-                                      __u32 data_len, int flag);
-
-typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, int flags);
+                                      int flag);
+typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, int flags, void *data);
+typedef int (*ldlm_granted_callback)(struct ldlm_lock *,
+                                     struct lustre_msg *, int offset);
 
 struct ldlm_lock {
         struct portals_handle l_handle; // must be first in the structure
@@ -162,13 +165,14 @@ struct ldlm_lock {
 
         ldlm_completion_callback l_completion_ast;
         ldlm_blocking_callback   l_blocking_ast;
+        ldlm_granted_callback l_granted_cb;
 
         struct obd_export    *l_export;
         struct lustre_handle *l_connh;
         __u32                 l_flags;
         struct lustre_handle  l_remote_handle;
         void                 *l_data;
-        __u32                 l_data_len;
+        void                 *l_cp_data;
         struct ldlm_extent    l_extent;
         __u32                 l_version[RES_VERSION_SIZE];
 
@@ -183,7 +187,7 @@ struct ldlm_lock {
 };
 
 typedef int (*ldlm_res_compat)(struct ldlm_lock *child, struct ldlm_lock *new);
-typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock *,
+typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **,
                                void *req_cookie, ldlm_mode_t mode, int flags,
                                void *data);
 
@@ -210,9 +214,12 @@ struct ldlm_resource {
         ldlm_mode_t            lr_most_restr;
         __u32                  lr_type; /* LDLM_PLAIN or LDLM_EXTENT */
         struct ldlm_resource  *lr_root;
-        __u64                  lr_name[RES_NAME_SIZE];
+        struct ldlm_res_id     lr_name;
         __u32                  lr_version[RES_VERSION_SIZE];
         atomic_t               lr_refcount;
+
+        /* lr_tmp holds a list head temporarily, during the building of a work
+         * queue.  see ldlm_add_ast_work_item and ldlm_run_ast_work */
         void                  *lr_tmp;
 };
 
@@ -232,21 +239,16 @@ struct ldlm_export_data {
         struct obd_import       led_import;
 };
 
-static inline struct ldlm_extent *ldlm_res2extent(struct ldlm_resource *res)
-{
-        return (struct ldlm_extent *)(res->lr_name);
-}
-
 extern struct obd_ops ldlm_obd_ops;
 
 extern char *ldlm_lockname[];
 extern char *ldlm_typename[];
 extern char *ldlm_it2str(int it);
 
-#define LDLM_DEBUG(lock, format, a...)                                        \
+#define __LDLM_DEBUG(level, lock, format, a...)                               \
 do {                                                                          \
         if (lock->l_resource == NULL) {                                       \
-                CDEBUG(D_DLMTRACE, "### " format                              \
+                CDEBUG(level, "### " format                                   \
                        " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\
                        "res: \?\? rrc=\?\? type: \?\?\? remote: "LPX64")\n"   \
                        , ## a, lock, lock->l_handle.h_cookie,                 \
@@ -258,7 +260,7 @@ do {                                                                          \
                 break;                                                        \
         }                                                                     \
         if (lock->l_resource->lr_type == LDLM_EXTENT) {                       \
-                CDEBUG(D_DLMTRACE, "### " format                              \
+                CDEBUG(level, "### " format                                   \
                        " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
                        "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64\
                        "] remote: "LPX64"\n" , ## a,                          \
@@ -267,8 +269,8 @@ do {                                                                          \
                        lock->l_readers, lock->l_writers,                      \
                        ldlm_lockname[lock->l_granted_mode],                   \
                        ldlm_lockname[lock->l_req_mode],                       \
-                       lock->l_resource->lr_name[0],                          \
-                       lock->l_resource->lr_name[1],                          \
+                       lock->l_resource->lr_name.name[0],                     \
+                       lock->l_resource->lr_name.name[1],                     \
                        atomic_read(&lock->l_resource->lr_refcount),           \
                        ldlm_typename[lock->l_resource->lr_type],              \
                        lock->l_extent.start, lock->l_extent.end,              \
@@ -276,7 +278,7 @@ do {                                                                          \
                 break;                                                        \
         }                                                                     \
         {                                                                     \
-                CDEBUG(D_DLMTRACE, "### " format                              \
+                CDEBUG(level, "### " format                                   \
                        " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
                        "res: "LPU64"/"LPU64" rrc: %d type: %s remote: "LPX64  \
                        "\n" , ## a, lock->l_resource->lr_namespace->ns_name,  \
@@ -285,14 +287,17 @@ do {                                                                          \
                        lock->l_readers, lock->l_writers,                      \
                        ldlm_lockname[lock->l_granted_mode],                   \
                        ldlm_lockname[lock->l_req_mode],                       \
-                       lock->l_resource->lr_name[0],                          \
-                       lock->l_resource->lr_name[1],                          \
+                       lock->l_resource->lr_name.name[0],                     \
+                       lock->l_resource->lr_name.name[1],                     \
                        atomic_read(&lock->l_resource->lr_refcount),           \
                        ldlm_typename[lock->l_resource->lr_type],              \
                        lock->l_remote_handle.cookie);                         \
         }                                                                     \
 } while (0)
 
+#define LDLM_DEBUG(lock, format, a...) __LDLM_DEBUG(D_DLMTRACE, lock, format, a)
+#define LDLM_ERROR(lock, format, a...) __LDLM_DEBUG(D_ERROR, lock, format, a)
+
 #define LDLM_DEBUG_NOLOCK(format, a...)                 \
         CDEBUG(D_DLMTRACE, "### " format "\n" , ## a)
 
@@ -317,11 +322,15 @@ int ldlm_replay_locks(struct obd_import *imp);
 
 /* ldlm_extent.c */
 int ldlm_extent_compat(struct ldlm_lock *, struct ldlm_lock *);
-int ldlm_extent_policy(struct ldlm_namespace *, struct ldlm_lock *, void *,
+int ldlm_extent_policy(struct ldlm_namespace *, struct ldlm_lock **, void *,
                        ldlm_mode_t, int flags, void *);
 
 /* ldlm_lockd.c */
-int ldlm_handle_enqueue(struct ptlrpc_request *req);
+int ldlm_server_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
+                             void *data, int flag);
+int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data);
+int ldlm_handle_enqueue(struct ptlrpc_request *req, ldlm_completion_callback,
+                        ldlm_blocking_callback);
 int ldlm_handle_convert(struct ptlrpc_request *req);
 int ldlm_handle_cancel(struct ptlrpc_request *req);
 int ldlm_del_waiting_lock(struct ldlm_lock *lock);
@@ -332,7 +341,7 @@ void ldlm_unregister_intent(void);
 void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh);
 struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *, int flags);
 void ldlm_cancel_callback(struct ldlm_lock *);
-int ldlm_lock_set_data(struct lustre_handle *, void *data, int datalen);
+int ldlm_lock_set_data(struct lustre_handle *, void *data, void *cp_data);
 void ldlm_lock_remove_from_lru(struct ldlm_lock *);
 
 static inline struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *h)
@@ -342,14 +351,14 @@ static inline struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *h)
 
 #define LDLM_LOCK_PUT(lock)                     \
 do {                                            \
-        /*LDLM_DEBUG(lock, "put");*/            \
+        /*LDLM_DEBUG((lock), "put");*/          \
         ldlm_lock_put(lock);                    \
 } while (0)
 
 #define LDLM_LOCK_GET(lock)                     \
 ({                                              \
         ldlm_lock_get(lock);                    \
-        /*LDLM_DEBUG(lock, "get");*/            \
+        /*LDLM_DEBUG((lock), "get");*/          \
         lock;                                   \
 })
 
@@ -360,16 +369,16 @@ void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc);
 void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode);
 void ldlm_lock_addref_internal(struct ldlm_lock *, __u32 mode);
 void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode);
-void ldlm_grant_lock(struct ldlm_lock *lock);
-int ldlm_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type,
-                    void *cookie, int cookielen, ldlm_mode_t mode,
-                    struct lustre_handle *lockh);
+void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode);
+void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen);
+int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *,
+                    __u32 type, void *cookie, int cookielen, ldlm_mode_t mode,
+                    struct lustre_handle *);
 struct ldlm_lock *
 ldlm_lock_create(struct ldlm_namespace *ns,
-                 struct lustre_handle *parent_lock_handle,
-                 __u64 *res_id, __u32 type, ldlm_mode_t mode, void *data,
-                 __u32 data_len);
-ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *, struct ldlm_lock *,
+                 struct lustre_handle *parent_lock_handle, struct ldlm_res_id,
+                 __u32 type, ldlm_mode_t mode, void *data, void *cp_data);
+ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *, struct ldlm_lock **,
                                void *cookie, int cookie_len, int *flags,
                                ldlm_completion_callback completion,
                                ldlm_blocking_callback blocking);
@@ -403,7 +412,8 @@ void ldlm_proc_cleanup(struct obd_device *obd);
 /* resource.c - internal */
 struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns,
                                         struct ldlm_resource *parent,
-                                        __u64 *name, __u32 type, int create);
+                                        struct ldlm_res_id, __u32 type,
+                                        int create);
 struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res);
 int ldlm_resource_putref(struct ldlm_resource *res);
 void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
@@ -414,16 +424,16 @@ void ldlm_dump_all_namespaces(void);
 void ldlm_namespace_dump(struct ldlm_namespace *);
 void ldlm_resource_dump(struct ldlm_resource *);
 int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
-                              __u64 new_resid[3]);
+                              struct ldlm_res_id);
 
 /* ldlm_request.c */
 int ldlm_expired_completion_wait(void *data);
-int ldlm_completion_ast(struct ldlm_lock *lock, int flags);
+int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data);
 int ldlm_cli_enqueue(struct lustre_handle *conn,
                      struct ptlrpc_request *req,
                      struct ldlm_namespace *ns,
                      struct lustre_handle *parent_lock_handle,
-                     __u64 *res_id,
+                     struct ldlm_res_id,
                      __u32 type,
                      void *cookie, int cookielen,
                      ldlm_mode_t mode,
@@ -431,13 +441,13 @@ int ldlm_cli_enqueue(struct lustre_handle *conn,
                      ldlm_completion_callback completion,
                      ldlm_blocking_callback callback,
                      void *data,
-                     __u32 data_len,
+                     void *cp_data,
                      struct lustre_handle *lockh);
 int ldlm_match_or_enqueue(struct lustre_handle *connh,
                           struct ptlrpc_request *req,
                           struct ldlm_namespace *ns,
                           struct lustre_handle *parent_lock_handle,
-                          __u64 *res_id,
+                          struct ldlm_res_id,
                           __u32 type,
                           void *cookie, int cookielen,
                           ldlm_mode_t mode,
@@ -445,19 +455,20 @@ int ldlm_match_or_enqueue(struct lustre_handle *connh,
                           ldlm_completion_callback completion,
                           ldlm_blocking_callback callback,
                           void *data,
-                          __u32 data_len,
+                          void *cp_data,
                           struct lustre_handle *lockh);
 int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new,
                     void *data, __u32 data_len);
 int ldlm_cli_convert(struct lustre_handle *, int new_mode, int *flags);
 int ldlm_cli_cancel(struct lustre_handle *lockh);
-int ldlm_cli_cancel_unused(struct ldlm_namespace *, __u64 *, int flags);
+int ldlm_cli_cancel_unused(struct ldlm_namespace *, struct ldlm_res_id *,
+                           int flags);
 int ldlm_cancel_lru(struct ldlm_namespace *ns);
 
 /* mds/handler.c */
 /* This has to be here because recurisve inclusion sucks. */
 int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
-                     void *data, __u32 data_len, int flag);
+                     void *data, int flag);
 
 #endif /* __KERNEL__ */
 
index 342721c..74b8dca 100644 (file)
@@ -23,11 +23,17 @@ struct lov_export_data {
 };
 
 struct ost_export_data {
-        __u8 oed_uuid[37]; /* client UUID */
+        struct obd_uuid oed_uuid; /* client UUID */
+};
+
+struct ec_export_data { /* echo client */
+        struct list_head eced_open_head;
+        struct list_head eced_locks;
 };
 
 struct obd_export {
         __u64                     exp_cookie;
+        struct obd_uuid           exp_client_uuid;
         struct list_head          exp_obd_chain;
         struct list_head          exp_conn_chain;
         struct obd_device        *exp_obd;
@@ -38,6 +44,7 @@ struct obd_export {
                 struct filter_export_data eu_filter_data;
                 struct lov_export_data    eu_lov_data;
                 struct ost_export_data    eu_ost_data;
+                struct ec_export_data     eu_ec_data;
         } u;
 };
 
@@ -45,6 +52,7 @@ struct obd_export {
 #define exp_lov_data    u.eu_lov_data
 #define exp_filter_data u.eu_filter_data
 #define exp_ost_data    u.eu_ost_data
+#define exp_ec_data     u.eu_ec_data
 
 extern struct obd_export *class_conn2export(struct lustre_handle *conn);
 extern struct obd_device *class_conn2obd(struct lustre_handle *conn);
index eeae647..341d082 100644 (file)
@@ -56,6 +56,7 @@ struct fsfilt_operations {
         int     (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd,
                                      void *handle, fsfilt_cb_t cb_func);
         int     (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs);
+        int     (* fs_sync)(struct super_block *sb);
 };
 
 extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops);
@@ -146,6 +147,11 @@ static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *fs,
         return obd->obd_fsops->fs_statfs(fs, osfs);
 }
 
+static inline int fsfilt_sync(struct obd_device *obd, struct super_block *fs)
+{
+        return obd->obd_fsops->fs_sync(fs);
+}
+
 #endif /* __KERNEL__ */
 
 #endif
index cc194ac..6e11240 100644 (file)
 /*
  *  GENERAL STUFF
  */
-typedef __u8 obd_uuid_t[37];
+struct obd_uuid {
+        __u8 uuid[37];
+};
+
+static inline void obd_str2uuid(struct obd_uuid *uuid, char *tmp)
+{
+        strncpy(uuid->uuid, tmp, sizeof(uuid->uuid));
+        uuid->uuid[sizeof(uuid->uuid) - 1] = '\0';
+}
 
 /* FOO_REQUEST_PORTAL is for incoming requests on the FOO
  * FOO_REPLY_PORTAL   is for incoming replies on the FOO
@@ -75,6 +83,7 @@ typedef __u8 obd_uuid_t[37];
 #define PTLBD_REQUEST_PORTAL           19
 #define PTLBD_REPLY_PORTAL             20
 #define PTLBD_BULK_PORTAL              21
+#define MDS_GETATTR_PORTAL      22
 
 #define SVC_KILLED               1
 #define SVC_EVENT                2
@@ -133,9 +142,6 @@ struct lustre_msg {
 #define MSG_LAST_REPLAY        1
 #define MSG_RESENT             2
 
-/* XXX horrible interim hack -- see bug 578 */
-#define MSG_REPLAY_IN_PROGRESS 4
-
 static inline int lustre_msg_get_flags(struct lustre_msg *msg)
 {
         return (msg->flags & MSG_GEN_FLAG_MASK);
@@ -157,14 +163,24 @@ static inline int lustre_msg_get_op_flags(struct lustre_msg *msg)
         return (msg->flags >> MSG_OP_FLAG_SHIFT);
 }
 
+static inline void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags)
+{
+        msg->flags |= ((flags & MSG_GEN_FLAG_MASK) << MSG_OP_FLAG_SHIFT);
+}
+
 static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
 {
         msg->flags &= ~MSG_OP_FLAG_MASK;
-        msg->flags |= ((flags & MSG_GEN_FLAG_MASK) << MSG_OP_FLAG_SHIFT);
+        lustre_msg_add_op_flags(msg, flags);
 }
 
-#define CONNMGR_REPLY  0
-#define CONNMGR_CONNECT        1
+/*
+ * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
+ */
+
+#define MSG_CONNECT_RECOVERING 0x1
+#define MSG_CONNECT_RECONNECT  0x2
+#define MSG_CONNECT_REPLAYABLE  0x4
 
 /*
  *   OST requests: OBDO & OBD request records
@@ -305,16 +321,6 @@ struct niobuf_remote {
         __u32 flags;
 };
 
-#define CONNMGR_REPLY  0
-#define CONNMGR_CONNECT        1
-
-struct connmgr_body {
-        __u64 conn;
-        __u64 conn_token;
-        __u32 generation;
-        obd_uuid_t conn_uuid;
-};
-
 /* request structure for OST's */
 
 #define OST_REQ_HAS_OA1  0x1
@@ -328,24 +334,33 @@ struct ost_body {
  */
 
 /* opcodes */
-#define MDS_GETATTR    1
-#define MDS_OPEN       2
-#define MDS_CLOSE      3
-#define MDS_REINT      4
-#define MDS_READPAGE   6
-#define MDS_CONNECT    7
-#define MDS_DISCONNECT 8
-#define MDS_GETSTATUS  9
-#define MDS_STATFS     10
-#define MDS_GETLOVINFO 11
-#define MDS_GETATTR_NAME 12
+#define MDS_GETATTR      1
+#define MDS_GETATTR_NAME 2
+#define MDS_CLOSE        3
+#define MDS_REINT        4
+#define MDS_READPAGE     6
+#define MDS_CONNECT      7
+#define MDS_DISCONNECT   8
+#define MDS_GETSTATUS    9
+#define MDS_STATFS       10
+#define MDS_GETLOVINFO   11
 
 #define REINT_SETATTR  1
 #define REINT_CREATE   2
 #define REINT_LINK     3
 #define REINT_UNLINK   4
 #define REINT_RENAME   5
-#define REINT_MAX      5
+#define REINT_OPEN     6
+#define REINT_MAX      6
+
+#define IT_INTENT_EXEC   1
+#define IT_OPEN_LOOKUP  (1 << 1)
+#define IT_OPEN_NEG     (1 << 2)
+#define IT_OPEN_POS     (1 << 3)
+#define IT_OPEN_CREATE  (1 << 4)
+#define IT_OPEN_OPEN    (1 << 5)
+
+#define IT_UNLINK (1<<8)
 
 #define REINT_OPCODE_MASK 0xff /* opcodes must fit into this mask */
 #define REINT_REPLAYING 0x1000 /* masked into the opcode to indicate replay */
@@ -383,6 +398,7 @@ struct mds_body {
         struct ll_fid  fid2;
         struct lustre_handle handle;
         __u64          size;
+        __u64          blocks; /* XID, in the case of MDS_READPAGE */
         __u32          ino;   /* make this a __u64 */
         __u32          valid;
         __u32          fsuid;
@@ -398,6 +414,7 @@ struct mds_body {
         __u32          rdev;
         __u32          nlink;
         __u32          generation;
+        __u32          suppgid;
 };
 
 /* This is probably redundant with OBD_MD_FLEASIZE, but we need an audit */
@@ -426,6 +443,7 @@ struct mds_rec_setattr {
         __u64           sa_atime;
         __u64           sa_mtime;
         __u64           sa_ctime;
+        __u32           sa_suppgid;
 };
 
 struct mds_rec_create {
@@ -433,7 +451,7 @@ struct mds_rec_create {
         __u32           cr_fsuid;
         __u32           cr_fsgid;
         __u32           cr_cap;
-        __u32           cr_reserved;
+        __u32           cr_flags; /* for use with open */
         __u32           cr_mode;
         struct ll_fid   cr_fid;
         struct ll_fid   cr_replayfid;
@@ -441,6 +459,7 @@ struct mds_rec_create {
         __u32           cr_gid;
         __u64           cr_time;
         __u64           cr_rdev;
+        __u32           cr_suppgid;
 };
 
 struct mds_rec_link {
@@ -448,6 +467,7 @@ struct mds_rec_link {
         __u32           lk_fsuid;
         __u32           lk_fsgid;
         __u32           lk_cap;
+        __u32           lk_suppgid;
         struct ll_fid   lk_fid1;
         struct ll_fid   lk_fid2;
 };
@@ -459,6 +479,7 @@ struct mds_rec_unlink {
         __u32           ul_cap;
         __u32           ul_reserved;
         __u32           ul_mode;
+        __u32           ul_suppgid;
         struct ll_fid   ul_fid1;
         struct ll_fid   ul_fid2;
 };
@@ -487,7 +508,7 @@ struct lov_desc {
         __u64 ld_default_stripe_size;      /* in bytes */
         __u64 ld_default_stripe_offset;    /* in bytes */
         __u32 ld_pattern;                  /* RAID 0,1 etc */
-        obd_uuid_t ld_uuid;
+        struct obd_uuid ld_uuid;
 };
 
 /*
@@ -503,6 +524,10 @@ struct lov_desc {
 #define RES_NAME_SIZE 3
 #define RES_VERSION_SIZE 4
 
+struct ldlm_res_id {
+        __u64 name[RES_NAME_SIZE];
+};
+
 /* lock types */
 typedef enum {
         LCK_EX = 1,
@@ -526,7 +551,7 @@ struct ldlm_intent {
  * below, we're probably fine. */
 struct ldlm_resource_desc {
         __u32 lr_type;
-        __u64 lr_name[RES_NAME_SIZE];
+        struct ldlm_res_id lr_name;
         __u32 lr_version[RES_VERSION_SIZE];
 };
 
@@ -548,7 +573,7 @@ struct ldlm_request {
 struct ldlm_reply {
         __u32 lock_flags;
         __u32 lock_mode;
-        __u64 lock_resource_name[RES_NAME_SIZE];
+        struct ldlm_res_id lock_resource_name;
         struct lustre_handle lock_handle;
         struct ldlm_extent lock_extent;   /* XXX make this policy 1 &2 */
         __u64  lock_policy_res1;
index 0f0d67d..36cd54f 100644 (file)
@@ -18,6 +18,7 @@
 typedef int (*import_recover_t)(struct obd_import *imp, int phase);
 
 #include <linux/lustre_idl.h>
+
 struct obd_import {
         import_recover_t          imp_recover;
         struct ptlrpc_connection *imp_connection;
@@ -36,11 +37,11 @@ struct obd_import {
         int                       imp_flags;
         int                       imp_level;
         __u64                     imp_last_xid;
+        __u64                     imp_last_bulk_xid;
         __u64                     imp_max_transno;
-        __u64                     imp_peer_last_xid;
         __u64                     imp_peer_committed_transno;
 
-        /* Protects flags, level, *_xid, *_list */
+        /* Protects flags, level, last_xid, *_list */
         spinlock_t                imp_lock;
 };
 
index b1f9288..54750c0 100644 (file)
@@ -18,7 +18,7 @@
  *   along with Lustre; if not, write to the Free Software
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
- * Basic Lustre library routines. 
+ * Basic Lustre library routines.
  *
  */
 
@@ -59,20 +59,22 @@ struct obd_export;
 int target_handle_connect(struct ptlrpc_request *req);
 int target_handle_disconnect(struct ptlrpc_request *req);
 int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
-                            char *cluuid);
+                            struct obd_uuid *cluuid);
 int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd,
-                       obd_uuid_t cluuid, struct recovd_obd *recovd,
+                       struct obd_uuid *cluuid, struct recovd_obd *recovd,
                        ptlrpc_recovery_cb_t recover);
 int client_obd_disconnect(struct lustre_handle *conn);
 int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf);
 int client_obd_cleanup(struct obd_device * obddev);
-struct client_obd *client_conn2cli(struct lustre_handle *conn); 
-struct obd_device *client_tgtuuid2obd(char *tgtuuid);
+struct client_obd *client_conn2cli(struct lustre_handle *conn);
+struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
 
 int target_revoke_connection(struct recovd_data *rd, int phase);
 
+int obd_self_statfs(struct obd_device *dev, struct statfs *sfs);
+
 /* l_lock.c */
-struct lustre_lock { 
+struct lustre_lock {
         int l_depth;
         struct task_struct *l_owner;
         struct semaphore l_sem;
@@ -131,9 +133,9 @@ static inline void ll_sleep(int t)
 /* FIXME: This needs to validate pointers and cookies */
 static inline void *lustre_handle2object(struct lustre_handle *handle)
 {
-        if (handle) 
+        if (handle)
                 return (void *)(unsigned long)(handle->addr);
-        return NULL; 
+        return NULL;
 }
 
 static inline void ldlm_object2handle(void *object, struct lustre_handle *handle)
@@ -279,7 +281,7 @@ static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
                 printk("OBD ioctl: inlbuf3 not 0 terminated\n");
                 return 1;
         }
-#endif 
+#endif
         return 0;
 }
 
@@ -457,16 +459,24 @@ static inline int obd_ioctl_getdata(char **buf, int *len, void *arg)
 #define OBD_IOC_RECOVD_FAILCONN        _IOWR('f', 136, long)
 
 #define OBD_IOC_DEC_FS_USE_COUNT       _IO  ('f', 139      )
+#define OBD_IOC_NO_TRANSNO             _IOW ('f', 140, long)
+#define OBD_IOC_SET_READONLY           _IOW ('f', 141, long)
 
 #define OBD_GET_VERSION                _IOWR ('f', 144, long)
 
+#define ECHO_IOC_GET_STRIPE            _IOWR('f', 200, long)
+#define ECHO_IOC_SET_STRIPE            _IOWR('f', 201, long)
+#define ECHO_IOC_ENQUEUE               _IOWR('f', 202, long)
+#define ECHO_IOC_CANCEL                _IOWR('f', 203, long)
+
+
 /*
  * l_wait_event is a flexible sleeping function, permitting simple caller
  * configuration of interrupt and timeout sensitivity along with actions to
  * be performed in the event of either exception.
  *
  * Common usage looks like this:
- * 
+ *
  * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler,
  *                                           intr_handler, callback_data);
  * rc = l_wait_event(waitq, condition, &lwi);
index deb9656..0c56fcd 100644 (file)
@@ -62,7 +62,9 @@ struct ll_inode_info {
 #endif
 };
 
-
+/* interpet return codes from intent lookup */
+#define LL_LOOKUP_POSITIVE 1
+#define LL_LOOKUP_NEGATIVE 2
 
 #define LL_SUPER_MAGIC 0x0BD00BD0
 
@@ -73,7 +75,7 @@ struct ll_inode_info {
 #define LL_SBI_NOLCK   0x1
 
 struct ll_sb_info {
-        obd_uuid_t                ll_sb_uuid;
+        struct obd_uuid           ll_sb_uuid;
         struct lustre_handle      ll_mdc_conn;
         struct lustre_handle      ll_osc_conn;
         struct proc_dir_entry*    ll_proc_root;
@@ -120,6 +122,28 @@ static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
         return ll_s2sbi(inode->i_sb);
 }
 
+static inline void d_unhash_aliases(struct inode *inode)
+{
+        struct dentry *dentry = NULL;
+        struct list_head *tmp;
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        ENTRY;
+
+        CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n",
+               inode->i_ino, inode->i_generation);
+
+        spin_lock(&dcache_lock);
+        list_for_each(tmp, &inode->i_dentry) {
+                dentry = list_entry(tmp, struct dentry, d_alias);
+
+                list_del_init(&dentry->d_hash);
+                dentry->d_flags |= DCACHE_LUSTRE_INVALID;
+                list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
+        }
+
+        spin_unlock(&dcache_lock);
+        EXIT;
+}
 
 // FIXME: replace the name of this with LL_I to conform to kernel stuff
 // static inline struct ll_inode_info *LL_I(struct inode *inode)
@@ -169,7 +193,6 @@ int ll_intent_lock(struct inode *parent, struct dentry **,
 
 /* dcache.c */
 void ll_intent_release(struct dentry *, struct lookup_intent *);
-int ll_set_dd(struct dentry *de);
 
 /****
 
@@ -220,14 +243,15 @@ extern struct inode_operations ll_dir_inode_operations;
 /* file.c */
 extern struct file_operations ll_file_operations;
 extern struct inode_operations ll_file_inode_operations;
+extern struct inode_operations ll_special_inode_operations;
 struct ldlm_lock;
-int ll_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *, void *data,
-                     __u32 data_len, int flag);
+int ll_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *, void *data, int flag);
 int ll_size_lock(struct inode *, struct lov_stripe_md *, obd_off start,
                  int mode, struct lustre_handle *);
 int ll_size_unlock(struct inode *, struct lov_stripe_md *, int mode,
                    struct lustre_handle *);
-int ll_file_size(struct inode *inode, struct lov_stripe_md *md);
+int ll_file_size(struct inode *inode, struct lov_stripe_md *md,
+                 struct lustre_handle *);
 int ll_create_objects(struct super_block *sb, obd_id id, uid_t uid,
                       gid_t gid, struct lov_stripe_md **lsmp);
 
@@ -237,7 +261,7 @@ struct page *ll_getpage(struct inode *inode, unsigned long offset,
 void ll_truncate(struct inode *inode);
 
 /* super.c */
-void ll_update_inode(struct inode *, struct mds_body *);
+void ll_update_inode(struct inode *, struct mds_body *, struct lov_mds_md *);
 
 /* symlink.c */
 extern struct inode_operations ll_fast_symlink_inode_operations;
index 7a02dae..133f7af 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2001 Cluster File Systems, Inc. <info@clusterfs.com>
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc. <info@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -29,6 +29,7 @@
 #ifdef __KERNEL__
 
 #include <linux/fs.h>
+#include <linux/kp30.h>
 #include <linux/lustre_idl.h>
 
 struct ldlm_lock_desc;
@@ -38,11 +39,57 @@ struct ptlrpc_client;
 struct obd_export;
 struct ptlrpc_request;
 struct obd_device;
+struct ll_file_data;
 
 #define LUSTRE_MDS_NAME "mds"
 #define LUSTRE_MDT_NAME "mdt"
 #define LUSTRE_MDC_NAME "mdc"
 
+struct mdc_rpc_lock { 
+        struct semaphore rpcl_sem;
+        struct lookup_intent *rpcl_it;
+};
+extern struct mdc_rpc_lock mdc_rpc_lock;
+
+static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck)
+{
+        sema_init(&lck->rpcl_sem, 1);
+        lck->rpcl_it = NULL;
+}
+
+static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck, 
+                                    struct lookup_intent *it)
+{
+        down(&lck->rpcl_sem);
+        if (it) { 
+                lck->rpcl_it = it;
+                it->it_iattr = (void *)1;
+        }
+}
+
+static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, 
+                                    struct lookup_intent *it)
+{
+        if (it == NULL) {
+                LASSERT(it == lck->rpcl_it);
+                up(&lck->rpcl_sem);
+                return;
+        }
+        if (it && it->it_iattr) {
+                it->it_iattr = NULL;
+                LASSERT(it == lck->rpcl_it);
+                lck->rpcl_it = NULL;
+                up(&lck->rpcl_sem);
+        }
+}
+struct  mdc_unlink_data {
+        struct inode *unl_dir;
+        struct inode *unl_de;
+        int unl_mode;
+        const char *unl_name;
+        int unl_len;
+};
+
 struct mds_update_record {
         __u32 ur_fsuid;
         __u32 ur_fsgid;
@@ -60,6 +107,8 @@ struct mds_update_record {
         __u32 ur_uid;
         __u32 ur_gid;
         __u64 ur_time;
+        __u32 ur_flags;
+        __u32 ur_suppgid;
 };
 
 #define MDS_LR_CLIENT  8192
@@ -68,6 +117,7 @@ struct mds_update_record {
 #define MDS_CLIENT_SLOTS 17
 
 #define MDS_MOUNT_RECOV 2
+#define MDS_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
 
 /* Data stored per server at the head of the last_rcvd file.  In le32 order. */
 struct mds_server_data {
@@ -94,19 +144,25 @@ struct mds_export_data {
         spinlock_t              med_open_lock;
         struct mds_client_data *med_mcd;
         int                     med_off;
+        __u64                   med_last_xid;
+        struct lustre_msg      *med_last_reply;
+        int                     med_last_replen;
 };
 
 /* file data for open files on MDS */
 struct mds_file_data {
         struct list_head     mfd_list;
-        struct lustre_handle mfd_clienthandle;
         __u64                mfd_servercookie;
         struct file         *mfd_file;
 };
 
 /* mds/mds_reint.c  */
 int mds_reint_rec(struct mds_update_record *r, int offset,
-                  struct ptlrpc_request *req);
+                  struct ptlrpc_request *req, struct lustre_handle *);
+
+/* mds/mds_open.c */
+int mds_open(struct mds_update_record *rec, int offset,
+             struct ptlrpc_request *req, struct lustre_handle *);
 
 /* lib/mds_updates.c */
 void mds_unpack_body(struct mds_body *b);
@@ -117,16 +173,20 @@ void mds_pack_rep_body(struct ptlrpc_request *);
 int mds_update_unpack(struct ptlrpc_request *, int offset,
                       struct mds_update_record *);
 
-void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset,
-                      obd_id ino, int type);
-void mds_getattr_pack(struct ptlrpc_request *req, int offset,
+void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, obd_id ino,
+                      int type, __u64 xid);
+void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset, int fl,
                       struct inode *inode, const char *name, int namelen);
-void mds_setattr_pack(struct ptlrpc_request *, int offset, struct inode *,
-                      struct iattr *, const char *name, int namelen);
+void mds_setattr_pack(struct ptlrpc_request *, struct inode *,
+                      struct iattr *, void *ea, int ealen);
 void mds_create_pack(struct ptlrpc_request *, int offset, struct inode *dir,
                      __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
                      const char *name, int namelen, const void *data,
                      int datalen);
+void mds_open_pack(struct ptlrpc_request *, int offset, struct inode *dir,
+                     __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
+                     __u32 flags, const char *name, int namelen,
+                     const void *data, int datalen);
 void mds_unlink_pack(struct ptlrpc_request *, int offset, struct inode *inode,
                      struct inode *child, __u32 mode, const char *name,
                      int namelen);
@@ -149,8 +209,8 @@ struct dentry *mds_fid2locked_dentry(struct obd_device *obd, struct ll_fid *fid,
                                      struct lustre_handle *lockh);
 struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
                               struct vfsmount **mnt);
-int mds_reint(struct ptlrpc_request *req, int offset);
-int mds_pack_md(struct mds_obd *mds, struct ptlrpc_request *req,
+int mds_reint(struct ptlrpc_request *req, int offset, struct lustre_handle *);
+int mds_pack_md(struct obd_device *mds, struct lustre_msg *msg,
                 int offset, struct mds_body *body, struct inode *inode);
 
 /* mds/mds_fs.c */
@@ -173,10 +233,12 @@ int mdc_getattr_name(struct lustre_handle *conn, struct inode *parent,
                      char *filename, int namelen, unsigned long valid,
                      unsigned int ea_size, struct ptlrpc_request **request);
 int mdc_setattr(struct lustre_handle *conn,
-                struct inode *, struct iattr *iattr, struct ptlrpc_request **);
+                struct inode *, struct iattr *iattr,
+                void *ea, int ealen, struct ptlrpc_request **);
 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
              struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
              struct ptlrpc_request **);
+void mdc_set_open_replay_data(struct ll_file_data *fd);
 int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
               struct lustre_handle *fh,  struct ptlrpc_request **req);
 int mdc_readpage(struct lustre_handle *conn, obd_id ino,
@@ -189,13 +251,14 @@ int mdc_unlink(struct lustre_handle *, struct inode *dir, struct inode *child,
                __u32 mode, const char *name, int namelen,
                struct ptlrpc_request **);
 int mdc_link(struct lustre_handle *conn,
-             struct dentry *src, struct inode *dir, const char *name,
+             struct inode *src, struct inode *dir, const char *name,
              int namelen, struct ptlrpc_request **);
 int mdc_rename(struct lustre_handle *conn,
                struct inode *src, struct inode *tgt, const char *old,
                int oldlen, const char *new, int newlen,
                struct ptlrpc_request **);
-int mdc_create_client(obd_uuid_t uuid, struct ptlrpc_client *cl);
+int mdc_create_client(struct obd_uuid uuid, struct ptlrpc_client *cl);
+void mdc_lock_set_inode(struct lustre_handle *lock, struct inode *inode);
 
 /* Store the generation of a newly-created inode in |req| for replay. */
 void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
index 081492c..e2c9db3 100644 (file)
  */
 
 #define LDLM_NUM_THREADS        4
-#define LDLM_NEVENTS    1024
-#define LDLM_NBUFS      100
+#define LDLM_NEVENT_MAX 8192UL
+#define LDLM_NEVENTS    min(num_physpages / 64, LDLM_NEVENT_MAX)
+#define LDLM_NBUF_MAX   256UL
+#define LDLM_NBUFS      min(LDLM_NEVENTS / 16, LDLM_NBUF_MAX)
 #define LDLM_BUFSIZE    (8 * 1024)
 #define LDLM_MAXREQSIZE 1024
 
 #define MDT_NUM_THREADS 8
-#define MDS_NEVENTS     1024
-#define MDS_NBUFS       100
+#define MDS_NEVENT_MAX  8192UL
+#define MDS_NEVENTS     min(num_physpages / 64, MDS_NEVENT_MAX)
+#define MDS_NBUF_MAX    512UL
+#define MDS_NBUFS       min(MDS_NEVENTS / 16, MDS_NBUF_MAX)
 #define MDS_BUFSIZE     (8 * 1024)
-#define MDS_MAXREQSIZE  1024
+/* Assume file name length = FNAME_MAX = 256 (true for extN).
+ *        path name length = PATH_MAX = 4096
+ *        LOV MD size max  = EA_MAX = 4000
+ * symlink:  FNAME_MAX + PATH_MAX  <- largest
+ * link:     FNAME_MAX + PATH_MAX  (mds_rec_link < mds_rec_create)
+ * rename:   FNAME_MAX + FNAME_MAX
+ * open:     FNAME_MAX + EA_MAX
+ *
+ * MDS_MAXREQSIZE ~= 4736 bytes =
+ * lustre_msg + ldlm_request + mds_body + mds_rec_create + FNAME_MAX + PATH_MAX
+ *
+ * Realistic size is about 512 bytes (20 character name + 128 char symlink),
+ * except in the open case where there are a large number of OSTs in a LOV.
+ */
+#define MDS_MAXREQSIZE  (5 * 1024)
 
 #define OST_NUM_THREADS 6
-#define OST_NEVENTS     min(num_physpages / 16, 32768UL)
-#define OST_NBUFS       min(OST_NEVENTS / 128, 1280UL)
-#define OST_BUFSIZE     ((OST_NEVENTS > 4096UL ? 32 : 8) * 1024)
-#define OST_MAXREQSIZE  (8 * 1024)
+#define OST_NEVENT_MAX  32768UL
+#define OST_NEVENTS     min(num_physpages / 16, OST_NEVENT_MAX)
+#define OST_NBUF_MAX    1280UL
+#define OST_NBUFS       min(OST_NEVENTS / 64, OST_NBUF_MAX)
+#define OST_BUFSIZE     (8 * 1024)
+/* OST_MAXREQSIZE ~= 1896 bytes =
+ * lustre_msg + obdo + 16 * obd_ioobj + 64 * niobuf_remote
+ *
+ * single object with 16 pages is 576 bytes
+ */
+#define OST_MAXREQSIZE  (2 * 1024)
 
 #define PTLBD_NUM_THREADS        4
 #define PTLBD_NEVENTS    1024
 struct ptlrpc_connection {
         struct list_head        c_link;
         struct lustre_peer      c_peer;
-        __u8                    c_local_uuid[37];  /* XXX do we need this? */
-        __u8                    c_remote_uuid[37];
+        struct obd_uuid         c_local_uuid;  /* XXX do we need this? */
+        struct obd_uuid         c_remote_uuid;
 
         __u32                   c_generation;  /* changes upon new connection */
         __u32                   c_epoch;       /* changes when peer changes */
@@ -160,19 +185,25 @@ struct ptlrpc_request {
         struct ptlrpc_service *rq_svc;
 
         void (*rq_replay_cb)(struct ptlrpc_request *);
+        void  *rq_replay_data;
 };
 
 #define DEBUG_REQ(level, req, fmt, args...)                                    \
 do {                                                                           \
 CDEBUG(level,                                                                  \
        "@@@ " fmt " req@%p x"LPD64"/t"LPD64" o%d->%s:%d lens %d/%d ref %d fl " \
-       "%x\n" ,  ## args, req, req->rq_xid, req->rq_reqmsg->transno,           \
+       "%x/%x/%x rc %x\n" ,  ## args, req, req->rq_xid,                        \
+       req->rq_reqmsg ? req->rq_reqmsg->transno : -1,                          \
        req->rq_reqmsg ? req->rq_reqmsg->opc : -1,                              \
-       req->rq_connection ? (char *)req->rq_connection->c_remote_uuid : "<?>", \
+       req->rq_connection ?                                                    \
+          (char *)req->rq_connection->c_remote_uuid.uuid : "<?>",              \
        (req->rq_import && req->rq_import->imp_client) ?                        \
            req->rq_import->imp_client->cli_request_portal : -1,                \
        req->rq_reqlen, req->rq_replen,                                         \
-       atomic_read (&req->rq_refcount), req->rq_flags);                        \
+       atomic_read (&req->rq_refcount), req->rq_flags,                         \
+       req->rq_reqmsg ? req->rq_reqmsg->flags : 0,                             \
+       req->rq_repmsg ? req->rq_repmsg->flags : 0,                             \
+       req->rq_status);                                                        \
 } while (0)
 
 struct ptlrpc_bulk_page {
@@ -277,9 +308,9 @@ typedef void (*bulk_callback_t)(struct ptlrpc_bulk_desc *, void *);
 typedef int (*svc_handler_t)(struct ptlrpc_request *req);
 
 /* rpc/connection.c */
-void ptlrpc_readdress_connection(struct ptlrpc_connection *, obd_uuid_t uuid);
+void ptlrpc_readdress_connection(struct ptlrpc_connection *, struct obd_uuid *uuid);
 struct ptlrpc_connection *ptlrpc_get_connection(struct lustre_peer *peer,
-                                                obd_uuid_t uuid);
+                                                struct obd_uuid *uuid);
 int ptlrpc_put_connection(struct ptlrpc_connection *c);
 struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *);
 void ptlrpc_init_connection(void);
@@ -288,8 +319,10 @@ void ptlrpc_cleanup_connection(void);
 /* rpc/niobuf.c */
 int ptlrpc_check_bulk_sent(struct ptlrpc_bulk_desc *bulk);
 int ptlrpc_check_bulk_received(struct ptlrpc_bulk_desc *bulk);
-int ptlrpc_send_bulk(struct ptlrpc_bulk_desc *);
-int ptlrpc_register_bulk(struct ptlrpc_bulk_desc *);
+int ptlrpc_bulk_put(struct ptlrpc_bulk_desc *);
+int ptlrpc_bulk_get(struct ptlrpc_bulk_desc *);
+int ptlrpc_register_bulk_put(struct ptlrpc_bulk_desc *);
+int ptlrpc_register_bulk_get(struct ptlrpc_bulk_desc *);
 int ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *bulk);
 struct obd_brw_set *obd_brw_set_new(void);
 void obd_brw_set_add(struct obd_brw_set *, struct ptlrpc_bulk_desc *);
@@ -305,8 +338,8 @@ void ptlrpc_link_svc_me(struct ptlrpc_request_buffer_desc *rqbd);
 void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
                         struct ptlrpc_client *);
 void ptlrpc_cleanup_client(struct obd_import *imp);
-__u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req);
-struct ptlrpc_connection *ptlrpc_uuid_to_connection(obd_uuid_t uuid);
+struct obd_uuid *ptlrpc_req_to_uuid(struct ptlrpc_request *req);
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid);
 
 int ll_brw_sync_wait(struct obd_brw_set *, int phase);
 
@@ -314,22 +347,25 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req);
 void ptlrpc_continue_req(struct ptlrpc_request *req);
 int ptlrpc_replay_req(struct ptlrpc_request *req);
 void ptlrpc_restart_req(struct ptlrpc_request *req);
-void ptlrpc_abort_inflight(struct obd_import *imp);
+void ptlrpc_abort_inflight(struct obd_import *imp, int dying_import);
 
 struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
                                        int count, int *lengths, char **bufs);
 void ptlrpc_free_req(struct ptlrpc_request *request);
 void ptlrpc_req_finished(struct ptlrpc_request *request);
+struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req);
 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *);
 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk);
 struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc);
 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *page);
+void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
+                                      struct obd_import *imp);
 
 /* rpc/service.c */
 struct ptlrpc_service *
 ptlrpc_init_svc(__u32 nevents, __u32 nbufs, __u32 bufsize, __u32 max_req_size,
                 int req_portal, int rep_portal,
-                obd_uuid_t uuid, svc_handler_t, char *name);
+                struct obd_uuid *uuid, svc_handler_t, char *name);
 void ptlrpc_stop_all_threads(struct ptlrpc_service *svc);
 int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc,
                         char *name);
index 94ffd4f..acc59c2 100644 (file)
@@ -17,11 +17,11 @@ struct lov_oinfo { /* per-child structure */
 };
 
 struct lov_stripe_md {
-        __u64 lsm_object_id;       /* lov object id */
+        __u64 lsm_object_id;        /* lov object id */
         __u32 lsm_magic;
-        __u32 lsm_stripe_size;     /* size of the stripe */
-        int   lsm_stripe_offset;   /* offset of first stripe in lmd_objects */
-        int   lsm_stripe_count;    /* how many objects are being striped on */
+        __u32 lsm_stripe_size;      /* size of the stripe */
+        unsigned lsm_stripe_offset; /* offset of first stripe in lmd_objects */
+        unsigned lsm_stripe_count;  /* how many objects are being striped on */
         struct lov_oinfo lsm_oinfo[0];
 };
 
@@ -72,6 +72,7 @@ struct obd_ucred {
         __u32 ouc_fsuid;
         __u32 ouc_fsgid;
         __u32 ouc_cap;
+        __u32 ouc_suppgid;
 };
 
 #define OBD_RUN_CTXT_MAGIC      0xC0FFEEAA
@@ -95,6 +96,8 @@ struct obd_run_ctxt {
 #define OBD_SET_CTXT_MAGIC(ctxt) do {} while(0)
 #endif
 
+struct ost_server_data;
+
 struct filter_obd {
         char *fo_fstype;
         struct super_block *fo_sb;
@@ -103,8 +106,12 @@ struct filter_obd {
         struct dentry *fo_dentry_O;
         struct dentry *fo_dentry_O_mode[16];
         spinlock_t fo_objidlock;        /* protects fo_lastobjid increment */
-        __u64 fo_lastobjid;
-        __u64 fo_last_committed;
+        struct semaphore fo_transno_sem;
+        struct file *fo_rcvd_filp;
+        struct filter_server_data *fo_fsd;
+
+        __u64 fo_next_recovery_transno;
+        int   fo_recoverable_clients;
         struct file_operations *fo_fop;
         struct inode_operations *fo_iop;
         struct address_space_operations *fo_aops;
@@ -118,7 +125,7 @@ struct client_obd {
         struct obd_import    cl_import;
         struct semaphore     cl_sem;
         int                  cl_conn_count;
-        obd_uuid_t           cl_target_uuid; /* XXX -> lustre_name */
+        struct obd_uuid           cl_target_uuid; /* XXX -> lustre_name */
         /* max_mds_easize is purely a performance thing so we don't have to
          * call obd_size_wiremd() all the time. */
         int                  cl_max_mds_easize;
@@ -127,6 +134,7 @@ struct client_obd {
 
 struct mds_obd {
         struct ptlrpc_service           *mds_service;
+        struct ptlrpc_service           *mds_getattr_service;
 
         struct super_block              *mds_sb;
         struct vfsmount                 *mds_vfsmnt;
@@ -138,7 +146,6 @@ struct mds_obd {
         int                              mds_max_mdsize;
         struct file                     *mds_rcvd_filp;
         struct semaphore                 mds_transno_sem;
-        __u64                            mds_last_committed;
         __u64                            mds_last_rcvd;
         __u64                            mds_mount_count;
         struct ll_fid                    mds_rootfid;
@@ -151,7 +158,8 @@ struct mds_obd {
         struct list_head                 mds_delayed_reply_queue;
         spinlock_t                       mds_processing_task_lock;
         pid_t                            mds_processing_task;
-
+        struct timer_list                mds_recovery_timer;
+        
         int                              mds_has_lov_desc;
         struct lov_desc                  mds_lov_desc;
 };
@@ -184,6 +192,7 @@ struct echo_obd {
 struct ptlbd_obd {
         /* server's */
         struct ptlrpc_service *ptlbd_service;
+        struct file *filp;
         /* client's */
         struct ptlrpc_client bd_client;
         struct obd_import bd_import;
@@ -216,11 +225,15 @@ struct snap_obd {
 
 struct ost_obd {
         struct ptlrpc_service *ost_service;
-        struct lustre_handle ost_conn;   /* the local connection to the OBD */
 };
 
 struct echo_client_obd {
-        struct lustre_handle conn;   /* the local connection to osc/lov */
+        struct lustre_handle ec_conn;   /* the local connection to osc/lov */
+        spinlock_t           ec_lock;
+        struct list_head     ec_objects;
+        int                  ec_lsmsize;
+        int                  ec_nstripes;
+        __u64                ec_unique;
 };
 
 struct cache_obd {
@@ -229,7 +242,7 @@ struct cache_obd {
 };
 
 struct lov_tgt_desc {
-        obd_uuid_t uuid;
+        struct obd_uuid uuid;
         struct lustre_handle conn;
         int active; /* is this target available for requests, etc */
 };
@@ -254,6 +267,10 @@ struct niobuf_local {
         struct dentry *dentry;
 };
 
+struct obd_trans_info {
+        __u64     oti_transno;
+};
+
 #define N_LOCAL_TEMP_PAGE 0x00000001
 
 /* corresponds to one of the obd's */
@@ -262,7 +279,7 @@ struct obd_device {
 
         /* common and UUID name of this device */
         char *obd_name;
-        obd_uuid_t obd_uuid;
+        struct obd_uuid obd_uuid;
 
         int obd_minor;
         int obd_flags;
@@ -273,6 +290,7 @@ struct obd_device {
         struct ptlrpc_client   obd_ldlm_client; /* XXX OST/MDS only */
         /* a spinlock is OK for what we do now, may need a semaphore later */
         spinlock_t obd_dev_lock;
+        __u64                  obd_last_committed;
         struct fsfilt_operations *obd_fsops;
         union {
                 struct ext2_obd ext2;
@@ -310,7 +328,7 @@ struct obd_ops {
         int (*o_setup) (struct obd_device *dev, obd_count len, void *data);
         int (*o_cleanup)(struct obd_device *dev);
         int (*o_connect)(struct lustre_handle *conn, struct obd_device *src,
-                         obd_uuid_t cluuid, struct recovd_obd *recovd,
+                         struct obd_uuid *cluuid, struct recovd_obd *recovd,
                          ptlrpc_recovery_cb_t recover);
         int (*o_disconnect)(struct lustre_handle *conn);
 
@@ -324,41 +342,43 @@ struct obd_ops {
         int (*o_preallocate)(struct lustre_handle *, obd_count *req,
                              obd_id *ids);
         int (*o_create)(struct lustre_handle *conn,  struct obdo *oa,
-                        struct lov_stripe_md **ea);
+                        struct lov_stripe_md **ea, struct obd_trans_info *oti);
         int (*o_destroy)(struct lustre_handle *conn, struct obdo *oa,
-                         struct lov_stripe_md *ea);
+                         struct lov_stripe_md *ea, struct obd_trans_info *oti);
         int (*o_setattr)(struct lustre_handle *conn, struct obdo *oa,
-                         struct lov_stripe_md *ea);
+                         struct lov_stripe_md *ea, struct obd_trans_info *oti);
         int (*o_getattr)(struct lustre_handle *conn, struct obdo *oa,
                          struct lov_stripe_md *ea);
         int (*o_open)(struct lustre_handle *conn, struct obdo *oa,
-                      struct lov_stripe_md *ea);
+                      struct lov_stripe_md *ea, struct obd_trans_info *oti);
         int (*o_close)(struct lustre_handle *conn, struct obdo *oa,
-                       struct lov_stripe_md *ea);
+                       struct lov_stripe_md *ea, struct obd_trans_info *oti);
         int (*o_brw)(int rw, struct lustre_handle *conn,
                      struct lov_stripe_md *ea, obd_count oa_bufs,
-                     struct brw_page *pgarr, struct obd_brw_set *);
+                     struct brw_page *pgarr, struct obd_brw_set *, 
+                     struct obd_trans_info *oti);
         int (*o_punch)(struct lustre_handle *conn, struct obdo *tgt,
                        struct lov_stripe_md *ea, obd_size count,
-                       obd_off offset);
+                       obd_off offset, struct obd_trans_info *oti);
         int (*o_sync)(struct lustre_handle *conn, struct obdo *tgt,
                       obd_size count, obd_off offset);
         int (*o_migrate)(struct lustre_handle *conn, struct obdo *dst,
                          struct obdo *src, obd_size count, obd_off offset);
         int (*o_copy)(struct lustre_handle *dstconn, struct obdo *dst,
                       struct lustre_handle *srconn, struct obdo *src,
-                      obd_size count, obd_off offset);
+                      obd_size count, obd_off offset, struct obd_trans_info *);
         int (*o_iterate)(struct lustre_handle *conn,
                          int (*)(obd_id, obd_gr, void *),
                          obd_id *startid, obd_gr group, void *data);
         int (*o_preprw)(int cmd, struct lustre_handle *conn,
                         int objcount, struct obd_ioobj *obj,
                         int niocount, struct niobuf_remote *remote,
-                        struct niobuf_local *local, void **desc_private);
+                        struct niobuf_local *local, void **desc_private, 
+                        struct obd_trans_info *oti);
         int (*o_commitrw)(int cmd, struct lustre_handle *conn,
                           int objcount, struct obd_ioobj *obj,
                           int niocount, struct niobuf_local *local,
-                          void *desc_private);
+                          void *desc_private, struct obd_trans_info *oti);
         int (*o_enqueue)(struct lustre_handle *conn, struct lov_stripe_md *md,
                          struct lustre_handle *parent_lock,
                          __u32 type, void *cookie, int cookielen, __u32 mode,
index ed3eb99..8e160ad 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
 #include <linux/lprocfs_status.h>
 #endif
 
-
 /* OBD Device Declarations */
 #define MAX_OBD_DEVICES 128
 extern struct obd_device obd_dev[MAX_OBD_DEVICES];
 
-#define OBD_ATTACHED 0x1
-#define OBD_SET_UP   0x2
+#define OBD_ATTACHED       0x01
+#define OBD_SET_UP         0x02
+#define OBD_RECOVERING     0x04
+#define OBD_ABORT_RECOVERY 0x08
+#define OBD_REPLAYABLE     0x10
+#define OBD_NO_TRANSNO     0x20 /* XXX needs better name */
 
 /* OBD Operations Declarations */
 
@@ -104,7 +107,8 @@ do {                                                            \
                                                                 \
         exp = class_conn2export(conn);                          \
         if (!(exp)) {                                           \
-                CERROR("No export\n");                          \
+                CERROR("No export for conn "LPX64":"LPX64"\n",  \
+                       conn->addr, conn->cookie);               \
                 RETURN(-EINVAL);                                \
         }                                                       \
                                                                 \
@@ -276,7 +280,8 @@ static inline int obd_free_memmd(struct lustre_handle *conn,
 }
 
 static inline int obd_create(struct lustre_handle *conn, struct obdo *obdo,
-                             struct lov_stripe_md **ea)
+                             struct lov_stripe_md **ea,
+                             struct obd_trans_info *oti)
 {
         struct obd_export *exp;
         int rc;
@@ -285,12 +290,13 @@ static inline int obd_create(struct lustre_handle *conn, struct obdo *obdo,
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, create);
 
-        rc = OBP(exp->exp_obd, create)(conn, obdo, ea);
+        rc = OBP(exp->exp_obd, create)(conn, obdo, ea, oti);
         RETURN(rc);
 }
 
 static inline int obd_destroy(struct lustre_handle *conn, struct obdo *obdo,
-                              struct lov_stripe_md *ea)
+                              struct lov_stripe_md *ea,
+                              struct obd_trans_info *oti)
 {
         struct obd_export *exp;
         int rc;
@@ -299,7 +305,7 @@ static inline int obd_destroy(struct lustre_handle *conn, struct obdo *obdo,
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, destroy);
 
-        rc = OBP(exp->exp_obd, destroy)(conn, obdo, ea);
+        rc = OBP(exp->exp_obd, destroy)(conn, obdo, ea, oti);
         RETURN(rc);
 }
 
@@ -318,7 +324,8 @@ static inline int obd_getattr(struct lustre_handle *conn, struct obdo *obdo,
 }
 
 static inline int obd_close(struct lustre_handle *conn, struct obdo *obdo,
-                            struct lov_stripe_md *ea)
+                            struct lov_stripe_md *ea,
+                            struct obd_trans_info *oti)
 {
         struct obd_export *exp;
         int rc;
@@ -327,12 +334,12 @@ static inline int obd_close(struct lustre_handle *conn, struct obdo *obdo,
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, close);
 
-        rc = OBP(exp->exp_obd, close)(conn, obdo, ea);
+        rc = OBP(exp->exp_obd, close)(conn, obdo, ea, oti);
         RETURN(rc);
 }
 
 static inline int obd_open(struct lustre_handle *conn, struct obdo *obdo,
-                           struct lov_stripe_md *ea)
+                           struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
         struct obd_export *exp;
         int rc;
@@ -341,12 +348,13 @@ static inline int obd_open(struct lustre_handle *conn, struct obdo *obdo,
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, open);
 
-        rc = OBP(exp->exp_obd, open)(conn, obdo, ea);
+        rc = OBP(exp->exp_obd, open)(conn, obdo, ea, oti);
         RETURN(rc);
 }
 
 static inline int obd_setattr(struct lustre_handle *conn, struct obdo *obdo,
-                              struct lov_stripe_md *ea)
+                              struct lov_stripe_md *ea,
+                              struct obd_trans_info *oti)
 {
         struct obd_export *exp;
         int rc;
@@ -355,12 +363,12 @@ static inline int obd_setattr(struct lustre_handle *conn, struct obdo *obdo,
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, setattr);
 
-        rc = OBP(exp->exp_obd, setattr)(conn, obdo, ea);
+        rc = OBP(exp->exp_obd, setattr)(conn, obdo, ea, oti);
         RETURN(rc);
 }
 
 static inline int obd_connect(struct lustre_handle *conn,
-                              struct obd_device *obd, obd_uuid_t cluuid,
+                              struct obd_device *obd, struct obd_uuid *cluuid,
                               struct recovd_obd *recovd,
                               ptlrpc_recovery_cb_t recover)
 {
@@ -401,8 +409,8 @@ static inline int obd_statfs(struct lustre_handle *conn,struct obd_statfs *osfs)
 }
 
 static inline int obd_punch(struct lustre_handle *conn, struct obdo *oa,
-                            struct lov_stripe_md *ea,
-                            obd_size start, obd_size end)
+                            struct lov_stripe_md *ea, obd_size start,
+                            obd_size end, struct obd_trans_info *oti)
 {
         struct obd_export *exp;
         int rc;
@@ -411,13 +419,14 @@ static inline int obd_punch(struct lustre_handle *conn, struct obdo *oa,
         OBD_CHECK_SETUP(conn, exp);
         OBD_CHECK_OP(exp->exp_obd, punch);
 
-        rc = OBP(exp->exp_obd, punch)(conn, oa, ea, start, end);
+        rc = OBP(exp->exp_obd, punch)(conn, oa, ea, start, end, oti);
         RETURN(rc);
 }
 
 static inline int obd_brw(int cmd, struct lustre_handle *conn,
                           struct lov_stripe_md *ea, obd_count oa_bufs,
-                          struct brw_page *pg, struct obd_brw_set *set)
+                          struct brw_page *pg, struct obd_brw_set *set,
+                          struct obd_trans_info *oti)
 {
         struct obd_export *exp;
         int rc;
@@ -431,14 +440,15 @@ static inline int obd_brw(int cmd, struct lustre_handle *conn,
                 LBUG();
         }
 
-        rc = OBP(exp->exp_obd, brw)(cmd, conn, ea, oa_bufs, pg, set);
+        rc = OBP(exp->exp_obd, brw)(cmd, conn, ea, oa_bufs, pg, set, oti);
         RETURN(rc);
 }
 
 static inline int obd_preprw(int cmd, struct lustre_handle *conn,
                              int objcount, struct obd_ioobj *obj,
                              int niocount, struct niobuf_remote *remote,
-                             struct niobuf_local *local, void **desc_private)
+                             struct niobuf_local *local, void **desc_private,
+                             struct obd_trans_info *oti)
 {
         struct obd_export *exp;
         int rc;
@@ -448,14 +458,14 @@ static inline int obd_preprw(int cmd, struct lustre_handle *conn,
         OBD_CHECK_OP(exp->exp_obd, preprw);
 
         rc = OBP(exp->exp_obd, preprw)(cmd, conn, objcount, obj, niocount,
-                                       remote, local, desc_private);
+                                       remote, local, desc_private, oti);
         RETURN(rc);
 }
 
 static inline int obd_commitrw(int cmd, struct lustre_handle *conn,
                                int objcount, struct obd_ioobj *obj,
                                int niocount, struct niobuf_local *local,
-                               void *desc_private)
+                               void *desc_private, struct obd_trans_info *oti)
 {
         struct obd_export *exp;
         int rc;
@@ -465,7 +475,7 @@ static inline int obd_commitrw(int cmd, struct lustre_handle *conn,
         OBD_CHECK_OP(exp->exp_obd, commitrw);
 
         rc = OBP(exp->exp_obd, commitrw)(cmd, conn, objcount, obj, niocount,
-                                         local, desc_private);
+                                         local, desc_private, oti);
         RETURN(rc);
 }
 
@@ -554,7 +564,7 @@ static inline void obd_oa2handle(struct lustre_handle *handle, struct obdo *oa)
 
 static inline void obd_handle2oa(struct obdo *oa, struct lustre_handle *handle)
 {
-        if (handle->addr) {
+        if (handle && handle->addr) {
                 struct lustre_handle *oa_handle = obdo_handle(oa);
                 memcpy(oa_handle, handle, sizeof(*handle));
                 oa->o_valid |= OBD_MD_FLHANDLE;
@@ -714,7 +724,7 @@ static inline void obdo_to_inode(struct inode *dst, struct obdo *src,
                 dst->i_atime = src->o_atime;
         if (valid & OBD_MD_FLMTIME)
                 dst->i_mtime = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME)
+        if (valid & OBD_MD_FLCTIME && src->o_ctime > dst->i_ctime)
                 dst->i_ctime = src->o_ctime;
         if (valid & OBD_MD_FLSIZE)
                 dst->i_size = src->o_size;
@@ -835,21 +845,23 @@ static inline int obdo_cmp_md(struct obdo *dst, struct obdo *src,
 /* I'm as embarrassed about this as you are.
  *
  * <shaver> // XXX do not look into _superhack with remaining eye
- * <shaver> // XXX if this were any uglier, I'd get my own show on MTV */ 
+ * <shaver> // XXX if this were any uglier, I'd get my own show on MTV */
 extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
+extern void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp,
+                                               int dying_import);
 
-int class_register_type(struct obd_ops *ops, struct lprocfs_vars* vars, 
+int class_register_type(struct obd_ops *ops, struct lprocfs_vars* vars,
                         char *nm);
 int class_unregister_type(char *nm);
 int class_name2dev(char *name);
-int class_uuid2dev(char *uuid);
-struct obd_device *class_uuid2obd(char *uuid);
+int class_uuid2dev(struct obd_uuid *uuid);
+struct obd_device *class_uuid2obd(struct obd_uuid *uuid);
 struct obd_export *class_new_export(struct obd_device *obddev);
 struct obd_type *class_get_type(char *name);
 void class_put_type(struct obd_type *type);
 void class_destroy_export(struct obd_export *exp);
 int class_connect(struct lustre_handle *conn, struct obd_device *obd,
-                  obd_uuid_t cluuid);
+                  struct obd_uuid *cluuid);
 int class_disconnect(struct lustre_handle *conn);
 void class_disconnect_all(struct obd_device *obddev);
 
@@ -872,6 +884,17 @@ void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs);
 void obd_statfs_pack(struct obd_statfs *tgt, struct obd_statfs *src);
 void obd_statfs_unpack(struct obd_statfs *tgt, struct obd_statfs *src);
 
+
+struct obd_class_user_state {
+        struct obd_device     *ocus_current_obd;
+        struct list_head       ocus_conns;
+};
+
+struct obd_class_user_conn {
+        struct list_head       ocuc_chain;
+        struct lustre_handle   ocuc_conn;
+};
+
 #endif
 
 /* sysctl.c */
@@ -880,6 +903,6 @@ extern void obd_sysctl_clean (void);
 
 /* uuid.c  */
 typedef __u8 class_uuid_t[16];
-//int class_uuid_parse(obd_uuid_t in, class_uuid_t out);
-void class_uuid_unparse(class_uuid_t in, obd_uuid_t out);
-#endif /* __LINUX_CLASS_OBD_H */
+//int class_uuid_parse(struct obd_uuid in, class_uuid_t out);
+void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out);
+#endif /* __LINUX_OBD_CLASS_H */
index 6bc32f2..273779a 100644 (file)
 #define OBD_ECHO_DEVICENAME "obdecho"
 #define OBD_ECHO_CLIENT_DEVICENAME "echo_client"
 
+struct ec_object
+{
+       struct list_head       eco_obj_chain;
+       struct obd_device     *eco_device;
+       int                    eco_refcount;
+       int                    eco_deleted;
+       obd_id                 eco_id;
+       struct lov_stripe_md  *eco_lsm;
+};
+
+struct ec_open_object
+{
+       struct list_head       ecoo_exp_chain;
+       struct ec_object      *ecoo_object;
+       struct obdo            ecoo_oa;
+        __u64                  ecoo_cookie;
+};
+
+struct ec_lock
+{
+       struct list_head       ecl_exp_chain;
+       struct lustre_handle   ecl_handle;
+       struct ldlm_extent     ecl_extent;
+       __u32                  ecl_mode;
+       struct ec_object      *ecl_object;
+       __u64                  ecl_cookie;
+};
+
 #endif
index fb3d1ff..16a4d03 100644 (file)
 #define OBD_FILTER_DEVICENAME "obdfilter"
 #endif
 
+#define FILTER_LR_SERVER_SIZE    512
+
+#define FILTER_LR_CLIENT_START   8192
+#define FILTER_LR_CLIENT_SIZE    128
+
+#define FILTER_MOUNT_RECOV 2
+#define FILTER_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
+
+/* Data stored per server at the head of the last_rcvd file.  In le32 order. */
+struct filter_server_data {
+        __u8  fsd_uuid[37];        /* server UUID */
+        __u8  fsd_uuid_padding[3]; /* unused */
+        __u64 fsd_last_objid;      /* last completed transaction ID */
+        __u64 fsd_last_rcvd;       /* last completed transaction ID */
+        __u64 fsd_mount_count;     /* FILTER incarnation number */
+        __u8  fsd_padding[FILTER_LR_SERVER_SIZE - 64]; /*  */
+};
+
+/* Data stored per client in the last_rcvd file.  In le32 order. */
+struct filter_client_data {
+        __u8  fcd_uuid[37];        /* client UUID */
+        __u8  fcd_uuid_padding[3]; /* unused */
+        __u64 fcd_last_rcvd;       /* last completed transaction ID */
+        __u64 fcd_mount_count;     /* FILTER incarnation number */
+        __u64 fcd_last_xid;        /* client RPC xid for the last transaction */
+        __u8  fcd_padding[FILTER_LR_CLIENT_SIZE - 64]; 
+};
+
 /* In-memory access to client data from OST struct */
 struct filter_export_data {
         struct list_head  fed_open_head; /* files to close on disconnect */
         spinlock_t        fed_lock;      /* protects fed_open_head */
+        struct filter_client_data  *fed_fcd;
+        int               fed_lr_off;
 };
 
 /* file data for open files on OST */
@@ -47,4 +77,5 @@ struct filter_dentry_data {
 
 #define FILTER_FLAG_DESTROY 0x0001      /* destroy dentry on last file close */
 
+
 #endif
index e3e23f4..69e4126 100644 (file)
@@ -35,6 +35,7 @@ extern int obd_memmax;
 extern unsigned long obd_fail_loc;
 extern unsigned long obd_timeout;
 extern char obd_recovery_upcall[128];
+extern unsigned long obd_sync_filter;
 
 #define OBD_FAIL_MDS                     0x100
 #define OBD_FAIL_MDS_HANDLE_UNPACK       0x101
@@ -68,6 +69,8 @@ extern char obd_recovery_upcall[128];
 #define OBD_FAIL_MDS_STATFS_PACK         0x11d
 #define OBD_FAIL_MDS_STATFS_NET          0x11e
 #define OBD_FAIL_MDS_GETATTR_NAME_NET    0x11f
+#define OBD_FAIL_MDS_ALL_REPLY_NET       0x120
+#define OBD_FAIL_MDS_ALL_REQUEST_NET     0x121
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
@@ -108,8 +111,9 @@ extern char obd_recovery_upcall[128];
 #define OBD_FAIL_MDS_ALL_NET 0x01000000
 #define OBD_FAIL_OST_ALL_NET 0x02000000
 
-#define OBD_FAIL_CHECK(id)   ((obd_fail_loc & OBD_FAIL_MASK_LOC) == (id) &&  \
-                              ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!=\
+#define OBD_FAIL_CHECK(id)   (((obd_fail_loc & OBD_FAIL_MASK_LOC) ==           \
+                              ((id) & OBD_FAIL_MASK_LOC)) &&                   \
+                              ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!= \
                                 (OBD_FAILED | OBD_FAIL_ONCE)))
 
 #define OBD_FAIL_RETURN(id, ret)                                             \
index 9ff075e..bac5ebf 100644 (file)
 +EXPORT_SYMBOL(dev_clear_rdonly);
 --- linux-2.4.18-17.8.0/drivers/block/loop.c~dev_read_only     2002-12-06 14:52:29.000000000 -0800
 +++ linux-2.4.18-17.8.0-zab/drivers/block/loop.c       2002-12-06 14:52:29.000000000 -0800
-@@ -491,6 +491,11 @@ static int loop_make_request(request_que
+@@ -491,6 +491,9 @@ static int loop_make_request(request_que
        spin_unlock_irq(&lo->lo_lock);
  
        if (rw == WRITE) {
-+#ifdef CONFIG_DEV_RDONLY
 +              if (dev_check_rdonly(rbh->b_rdev))
 +                      goto err;
-+#endif
 +
                if (lo->lo_flags & LO_FLAGS_READ_ONLY)
                        goto err;
        } else if (rw == READA) {
 --- linux-2.4.18-17.8.0/drivers/ide/ide-disk.c~dev_read_only   2002-12-06 14:52:29.000000000 -0800
 +++ linux-2.4.18-17.8.0-zab/drivers/ide/ide-disk.c     2002-12-06 14:52:29.000000000 -0800
-@@ -557,6 +557,12 @@ static ide_startstop_t lba_48_rw_disk (i
+@@ -557,6 +557,10 @@ static ide_startstop_t lba_48_rw_disk (i
   */
  static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
  {
-+#ifdef CONFIG_DEV_RDONLY
 +      if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
 +              ide_end_request(1, HWGROUP(drive));
 +              return ide_stopped;
 +      }
-+#endif
        if (IDE_CONTROL_REG)
                OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
  
diff --git a/lustre/kernel_patches/patches/dev_read_only_hp.patch b/lustre/kernel_patches/patches/dev_read_only_hp.patch
new file mode 100644 (file)
index 0000000..b2cf6f0
--- /dev/null
@@ -0,0 +1,77 @@
+ drivers/block/blkpg.c  |   38 ++++++++++++++++++++++++++++++++++++++
+ drivers/block/loop.c   |    5 +++++
+ drivers/ide/ide-disk.c |    6 ++++++
+ 3 files changed, 49 insertions(+)
+
+--- linux-2.4.19-hp2_pnnl2/drivers/block/blkpg.c~dev_read_only_hp      Sun Jan 19 18:51:12 2003
++++ linux-2.4.19-hp2_pnnl2-root/drivers/block/blkpg.c  Sun Jan 19 18:52:28 2003
+@@ -310,6 +310,42 @@ int blk_ioctl(kdev_t dev, unsigned int c
+ EXPORT_SYMBOL(blk_ioctl);
++
++#define NUM_DEV_NO_WRITE 16
++static int dev_no_write[NUM_DEV_NO_WRITE];
++
++/*
++ * Debug code for turning block devices "read-only" (will discard writes
++ * silently).  This is for filesystem crash/recovery testing.
++ */
++void dev_set_rdonly(kdev_t dev, int no_write)
++{
++      if (dev) {
++              printk(KERN_WARNING "Turning device %s read-only\n",
++                     bdevname(dev));
++              dev_no_write[no_write] = 0xdead0000 + dev;
++      }
++}
++
++int dev_check_rdonly(kdev_t dev) {
++      int i;
++
++      for (i = 0; i < NUM_DEV_NO_WRITE; i++) {
++              if ((dev_no_write[i] & 0xffff0000) == 0xdead0000 &&
++                  dev == (dev_no_write[i] & 0xffff))
++                      return 1;
++      }
++      return 0;
++}
++
++void dev_clear_rdonly(int no_write) {
++      dev_no_write[no_write] = 0;
++}
++
++EXPORT_SYMBOL(dev_set_rdonly);
++EXPORT_SYMBOL(dev_check_rdonly);
++EXPORT_SYMBOL(dev_clear_rdonly);
++
+ /**
+  * get_last_sector()
+  *  
+--- linux-2.4.19-hp2_pnnl2/drivers/block/loop.c~dev_read_only_hp       Sun Jan 19 18:51:12 2003
++++ linux-2.4.19-hp2_pnnl2-root/drivers/block/loop.c   Sun Jan 19 18:51:12 2003
+@@ -474,6 +474,9 @@ static int loop_make_request(request_que
+       spin_unlock_irq(&lo->lo_lock);
+       if (rw == WRITE) {
++              if (dev_check_rdonly(rbh->b_rdev))
++                      goto err;
++
+               if (lo->lo_flags & LO_FLAGS_READ_ONLY)
+                       goto err;
+       } else if (rw == READA) {
+--- linux-2.4.19-hp2_pnnl2/drivers/ide/ide-disk.c~dev_read_only_hp     Sun Jan 19 18:51:12 2003
++++ linux-2.4.19-hp2_pnnl2-root/drivers/ide/ide-disk.c Sun Jan 19 18:51:12 2003
+@@ -551,6 +551,10 @@ static ide_startstop_t lba_48_rw_disk (i
+  */
+ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+ {
++      if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
++              ide_end_request(1, HWGROUP(drive));
++              return ide_stopped;
++      }
+       if (IDE_CONTROL_REG)
+               OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
+
+_
index cdf72f0..716c156 100644 (file)
@@ -1,10 +1,14 @@
 
 
 
- 0 files changed
+ fs/ext3/Makefile   |    2 ++
+ fs/ext3/super.c    |    2 +-
+ include/linux/fs.h |    1 +
+ kernel/ksyms.c     |    5 +++++
+ 4 files changed, 9 insertions(+), 1 deletion(-)
 
---- linux-2.4.18-17.8.0/fs/ext3/Makefile~exports       2002-12-06 14:52:29.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/ext3/Makefile   2002-12-06 14:52:29.000000000 -0800
+--- linux-2.4.19-hp2_pnnl2/fs/ext3/Makefile~exports    Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/ext3/Makefile       Sun Jan 19 18:52:38 2003
 @@ -9,6 +9,8 @@
  
  O_TARGET := ext3.o
@@ -14,9 +18,9 @@
  obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
                ioctl.o namei.o super.o symlink.o
  obj-m    := $(O_TARGET)
---- linux-2.4.18-17.8.0/fs/ext3/super.c~exports        2002-12-06 14:52:29.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/ext3/super.c    2002-12-06 14:52:29.000000000 -0800
-@@ -1746,7 +1746,7 @@ static void __exit exit_ext3_fs(void)
+--- linux-2.4.19-hp2_pnnl2/fs/ext3/super.c~exports     Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/ext3/super.c        Sun Jan 19 18:52:38 2003
+@@ -1744,7 +1744,7 @@ static void __exit exit_ext3_fs(void)
        unregister_filesystem(&ext3_fs_type);
  }
  
@@ -25,9 +29,9 @@
  
  MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
  MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
---- linux-2.4.18-17.8.0/include/linux/fs.h~exports     2002-12-06 14:52:29.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/include/linux/fs.h 2002-12-06 14:52:29.000000000 -0800
-@@ -1046,6 +1046,7 @@ extern int unregister_filesystem(struct 
+--- linux-2.4.19-hp2_pnnl2/include/linux/fs.h~exports  Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/include/linux/fs.h     Sun Jan 19 18:52:38 2003
+@@ -1020,6 +1020,7 @@ extern int unregister_filesystem(struct 
  extern struct vfsmount *kern_mount(struct file_system_type *);
  extern int may_umount(struct vfsmount *);
  extern long do_mount(char *, char *, char *, unsigned long, void *);
  extern void umount_tree(struct vfsmount *);
  
  #define kern_umount mntput
---- linux-2.4.18-17.8.0/kernel/ksyms.c~exports 2002-12-06 14:52:29.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/kernel/ksyms.c     2002-12-06 14:52:29.000000000 -0800
-@@ -306,6 +306,11 @@ EXPORT_SYMBOL_GPL(buffermem_pages);
- EXPORT_SYMBOL_GPL(nr_free_pages);
- EXPORT_SYMBOL_GPL(page_cache_size);
+--- linux-2.4.19-hp2_pnnl2/kernel/ksyms.c~exports      Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/kernel/ksyms.c Sun Jan 19 18:52:38 2003
+@@ -308,6 +308,11 @@ EXPORT_SYMBOL(dcache_dir_fsync);
+ EXPORT_SYMBOL(dcache_readdir);
+ EXPORT_SYMBOL(dcache_dir_ops);
  
 +/* lustre */
 +EXPORT_SYMBOL(panic_notifier_list);
diff --git a/lustre/kernel_patches/patches/exports_hp.patch b/lustre/kernel_patches/patches/exports_hp.patch
new file mode 100644 (file)
index 0000000..0222b46
--- /dev/null
@@ -0,0 +1,56 @@
+
+
+
+ fs/ext3/Makefile   |    2 ++
+ fs/ext3/super.c    |    2 +-
+ include/linux/fs.h |    1 +
+ kernel/ksyms.c     |    4 ++++
+ 4 files changed, 9 insertions(+), 1 deletion(-)
+
+--- linux-2.4.19-hp2_pnnl2/fs/ext3/Makefile~exports    Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/ext3/Makefile       Sun Jan 19 18:52:38 2003
+@@ -9,6 +9,8 @@
+ O_TARGET := ext3.o
++export-objs :=        super.o
++
+ obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+               ioctl.o namei.o super.o symlink.o
+ obj-m    := $(O_TARGET)
+--- linux-2.4.19-hp2_pnnl2/fs/ext3/super.c~exports     Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/ext3/super.c        Sun Jan 19 18:52:38 2003
+@@ -1744,7 +1744,7 @@ static void __exit exit_ext3_fs(void)
+       unregister_filesystem(&ext3_fs_type);
+ }
+-EXPORT_NO_SYMBOLS;
++EXPORT_SYMBOL(ext3_bread);
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+--- linux-2.4.19-hp2_pnnl2/include/linux/fs.h~exports  Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/include/linux/fs.h     Sun Jan 19 18:52:38 2003
+@@ -1020,6 +1020,7 @@ extern int unregister_filesystem(struct 
+ extern struct vfsmount *kern_mount(struct file_system_type *);
+ extern int may_umount(struct vfsmount *);
+ extern long do_mount(char *, char *, char *, unsigned long, void *);
++struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data);
+ extern void umount_tree(struct vfsmount *);
+ #define kern_umount mntput
+--- linux-2.4.19-hp2_pnnl2/kernel/ksyms.c~exports      Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/kernel/ksyms.c Sun Jan 19 18:52:38 2003
+@@ -308,6 +308,10 @@ EXPORT_SYMBOL(dcache_dir_fsync);
+ EXPORT_SYMBOL(dcache_readdir);
+ EXPORT_SYMBOL(dcache_dir_ops);
++/* lustre */
++EXPORT_SYMBOL(pagecache_lock_cacheline);
++EXPORT_SYMBOL(do_kern_mount);
++
+ /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */
+ EXPORT_SYMBOL(default_llseek);
+ EXPORT_SYMBOL(dentry_open);
+
+_
diff --git a/lustre/kernel_patches/patches/invalidate_show.patch b/lustre/kernel_patches/patches/invalidate_show.patch
new file mode 100644 (file)
index 0000000..c3ae2f5
--- /dev/null
@@ -0,0 +1,104 @@
+--- lum/fs/inode.c     Sat Oct 19 11:42:42 2002
++++ linux-2.4.18-uml35-ext3online/fs/inode.c   Mon Oct 14 00:41:20 2002
+@@ -606,7 +553,8 @@ static void dispose_list(struct list_hea
+ /*
+  * Invalidate all inodes for a device.
+  */
+-static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
++static int invalidate_list(struct list_head *head, struct super_block * sb,
++                         struct list_head * dispose, int show)
+ {
+       struct list_head *next;
+       int busy = 0, count = 0;
+@@ -631,6 +579,11 @@ static int invalidate_list(struct list_h
+                       count++;
+                       continue;
+               }
++              if (show)
++                      printk(KERN_ERR
++                             "inode busy: dev %s:%lu (%p) mode %o count %u\n",
++                             kdevname(sb->s_dev), inode->i_ino, inode,
++                             inode->i_mode, atomic_read(&inode->i_count));
+               busy = 1;
+       }
+       /* only unused inodes may be cached with i_count zero */
+@@ -649,22 +601,23 @@ static int invalidate_list(struct list_h
+ /**
+  *    invalidate_inodes       - discard the inodes on a device
+  *    @sb: superblock
++ *    @show: whether we should display any busy inodes found
+  *
+  *    Discard all of the inodes for a given superblock. If the discard
+  *    fails because there are busy inodes then a non zero value is returned.
+  *    If the discard is successful all the inodes have been discarded.
+  */
+  
+-int invalidate_inodes(struct super_block * sb)
++int invalidate_inodes(struct super_block * sb, int show)
+ {
+       int busy;
+       LIST_HEAD(throw_away);
+       spin_lock(&inode_lock);
+-      busy = invalidate_list(&inode_in_use, sb, &throw_away);
+-      busy |= invalidate_list(&inode_unused, sb, &throw_away);
+-      busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+-      busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
++      busy = invalidate_list(&inode_in_use, sb, &throw_away, show);
++      busy |= invalidate_list(&inode_unused, sb, &throw_away, show);
++      busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show);
++      busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show);
+       spin_unlock(&inode_lock);
+       dispose_list(&throw_away);
+@@ -690,7 +643,7 @@ int invalidate_device(kdev_t dev, int do
+                * hold).
+                */
+               shrink_dcache_sb(sb);
+-              res = invalidate_inodes(sb);
++              res = invalidate_inodes(sb, 0);
+               drop_super(sb);
+       }
+       invalidate_buffers(dev);
+--- lum/fs/super.c.orig        Sat Oct 19 11:42:42 2002
++++ lum/fs/super.c     Wed Oct 30 17:16:55 2002
+@@ -936,7 +936,7 @@
+       lock_super(sb);
+       lock_kernel();
+       sb->s_flags &= ~MS_ACTIVE;
+-      invalidate_inodes(sb);  /* bad name - it should be evict_inodes() */
++      invalidate_inodes(sb, 0);  /* bad name - it should be evict_inodes() */
+       if (sop) {
+               if (sop->write_super && sb->s_dirt)
+                       sop->write_super(sb);
+@@ -945,7 +945,7 @@
+       }
+       /* Forget any remaining inodes */
+-      if (invalidate_inodes(sb)) {
++      if (invalidate_inodes(sb, 1)) {
+               printk(KERN_ERR "VFS: Busy inodes after unmount. "
+                       "Self-destruct in 5 seconds.  Have a nice day...\n");
+       }
+--- lum/include/linux/fs.h     Wed Oct 30 17:10:42 2002
++++ lum/include/linux/fs.h.orig        Tue Oct 22 23:15:00 2002
+@@ -1261,7 +1261,7 @@
+ extern void set_buffer_flushtime(struct buffer_head *);
+ extern void balance_dirty(void);
+ extern int check_disk_change(kdev_t);
+-extern int invalidate_inodes(struct super_block *);
++extern int invalidate_inodes(struct super_block *, int);
+ extern int invalidate_device(kdev_t, int);
+ extern void invalidate_inode_pages(struct inode *);
+ extern void invalidate_inode_pages2(struct address_space *);
+--- lum/fs/smbfs/inode.c.orig  Mon Feb 25 12:38:09 2002
++++ lum/fs/smbfs/inode.c       Thu Feb  6 21:34:26 2003
+@@ -166,7 +166,7 @@
+ {
+       VERBOSE("\n");
+       shrink_dcache_sb(SB_of(server));
+-      invalidate_inodes(SB_of(server));
++      invalidate_inodes(SB_of(server), 0);
+ }
+ /*
diff --git a/lustre/kernel_patches/patches/iod-rmap-exports.patch b/lustre/kernel_patches/patches/iod-rmap-exports.patch
new file mode 100644 (file)
index 0000000..00eba97
--- /dev/null
@@ -0,0 +1,64 @@
+--- linux-chaos/fs/inode.c.b_io_export Wed Jan 29 16:56:15 2003
++++ linux-chaos/fs/inode.c     Wed Jan 29 16:56:27 2003
+@@ -66,7 +66,8 @@
+  * NOTE! You also have to own the lock if you change
+  * the i_state of an inode while it is in use..
+  */
+-static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
++spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
++EXPORT_SYMBOL(inode_lock);
+ /*
+  * Statistics gathering..
+--- linux-chaos/fs/Makefile.b_io_export        Wed Jan 29 16:56:45 2003
++++ linux-chaos/fs/Makefile    Wed Jan 29 16:56:53 2003
+@@ -7,7 +7,7 @@
+ O_TARGET := fs.o
+-export-objs :=        filesystems.o open.o dcache.o buffer.o
++export-objs :=        filesystems.o open.o dcache.o buffer.o inode.o
+ mod-subdirs :=        nls
+ obj-y :=      open.o read_write.o devices.o file_table.o buffer.o \
+--- linux-chaos/mm/filemap.c.b_io_export       Wed Jan 29 16:50:39 2003
++++ linux-chaos/mm/filemap.c   Wed Jan 29 16:51:11 2003
+@@ -65,6 +65,7 @@
+  *                    pagecache_lock
+  */
+ spinlock_cacheline_t pagemap_lru_lock_cacheline = {SPIN_LOCK_UNLOCKED};
++EXPORT_SYMBOL(pagemap_lru_lock_cacheline);
+ #define CLUSTER_PAGES         (1 << page_cluster)
+ #define CLUSTER_OFFSET(x)     (((x) >> page_cluster) << page_cluster)
+--- linux-chaos/mm/vmscan.c.b_io_export        Wed Jan 29 16:51:58 2003
++++ linux-chaos/mm/vmscan.c    Wed Jan 29 16:55:16 2003
+@@ -839,6 +839,7 @@
+       set_current_state(TASK_RUNNING);
+       remove_wait_queue(&kswapd_done, &wait);
+ }
++EXPORT_SYMBOL(wakeup_kswapd);
+ static void wakeup_memwaiters(void)
+ {
+--- linux-chaos/mm/Makefile.b_io_export        Wed Jan 29 16:52:46 2003
++++ linux-chaos/mm/Makefile    Wed Jan 29 16:54:23 2003
+@@ -9,7 +9,7 @@
+ O_TARGET := mm.o
+-export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o
++export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o vmscan.c
+ obj-y  := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
+           vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
+--- linux-chaos/mm/page_alloc.c.b_io_export    Wed Jan 29 17:00:32 2003
++++ linux-chaos/mm/page_alloc.c        Wed Jan 29 17:01:31 2003
+@@ -31,6 +31,7 @@
+ int nr_inactive_dirty_pages;
+ int nr_inactive_clean_pages;
+ pg_data_t *pgdat_list;
++EXPORT_SYMBOL(pgdat_list);
+ /*
+  * The zone_table array is used to look up the address of the
diff --git a/lustre/kernel_patches/patches/jbd-transno-cb.patch b/lustre/kernel_patches/patches/jbd-transno-cb.patch
new file mode 100644 (file)
index 0000000..ceb086d
--- /dev/null
@@ -0,0 +1,240 @@
+
+
+
+ fs/jbd/commit.c      |   27 +++++++++++++++++++++---
+ fs/jbd/journal.c     |    1 
+ fs/jbd/transaction.c |   56 ++++++++++++++++++++++++++++++++++++++++-----------
+ include/linux/jbd.h  |   20 ++++++++++++++++++
+ 4 files changed, 90 insertions(+), 14 deletions(-)
+
+--- linux-2.4.19/fs/jbd/commit.c~vanilla-2.4.19        Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/jbd/commit.c  Sun Jan 19 19:46:42 2003
+@@ -475,7 +475,7 @@ start_journal_io:
+            transaction's t_log_list queue, and metadata buffers are on
+            the t_iobuf_list queue.
+-         Wait for the transactions in reverse order.  That way we are
++         Wait for the buffers in reverse order.  That way we are
+          less likely to be woken up until all IOs have completed, and
+          so we incur less scheduling load.
+       */
+@@ -566,8 +566,10 @@ start_journal_io:
+       jbd_debug(3, "JBD: commit phase 6\n");
+-      if (is_journal_aborted(journal))
++      if (is_journal_aborted(journal)) {
++              unlock_journal(journal);
+               goto skip_commit;
++      }
+       /* Done it all: now write the commit record.  We should have
+        * cleaned up our previous buffers by now, so if we are in abort
+@@ -577,6 +579,7 @@ start_journal_io:
+       descriptor = journal_get_descriptor_buffer(journal);
+       if (!descriptor) {
+               __journal_abort_hard(journal);
++              unlock_journal(journal);
+               goto skip_commit;
+       }
+       
+@@ -600,7 +603,6 @@ start_journal_io:
+               put_bh(bh);             /* One for getblk() */
+               journal_unlock_journal_head(descriptor);
+       }
+-      lock_journal(journal);
+       /* End of a transaction!  Finally, we can do checkpoint
+            processing: any buffers committed as a result of this
+@@ -609,6 +611,25 @@ start_journal_io:
+ skip_commit:
++      /* Call any callbacks that had been registered for handles in this
++       * transaction.  It is up to the callback to free any allocated
++       * memory.
++       */
++      if (!list_empty(&commit_transaction->t_jcb)) {
++              struct list_head *p, *n;
++              int error = is_journal_aborted(journal);
++
++              list_for_each_safe(p, n, &commit_transaction->t_jcb) {
++                      struct journal_callback *jcb;
++
++                      jcb = list_entry(p, struct journal_callback, jcb_list);
++                      list_del(p);
++                      jcb->jcb_func(jcb, error);
++              }
++      }
++
++      lock_journal(journal);
++
+       jbd_debug(3, "JBD: commit phase 7\n");
+       J_ASSERT(commit_transaction->t_sync_datalist == NULL);
+--- linux-2.4.19/fs/jbd/journal.c~vanilla-2.4.19       Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/jbd/journal.c Sun Jan 19 19:46:42 2003
+@@ -58,6 +58,7 @@ EXPORT_SYMBOL(journal_sync_buffer);
+ #endif
+ EXPORT_SYMBOL(journal_flush);
+ EXPORT_SYMBOL(journal_revoke);
++EXPORT_SYMBOL(journal_callback_set);
+ EXPORT_SYMBOL(journal_init_dev);
+ EXPORT_SYMBOL(journal_init_inode);
+--- linux-2.4.19/fs/jbd/transaction.c~vanilla-2.4.19   Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/jbd/transaction.c     Sun Jan 19 19:46:42 2003
+@@ -57,6 +57,7 @@ static transaction_t * get_transaction (
+       transaction->t_state = T_RUNNING;
+       transaction->t_tid = journal->j_transaction_sequence++;
+       transaction->t_expires = jiffies + journal->j_commit_interval;
++      INIT_LIST_HEAD(&transaction->t_jcb);
+       /* Set up the commit timer for the new transaction. */
+       J_ASSERT (!journal->j_commit_timer_active);
+@@ -201,6 +202,20 @@ repeat_locked:
+       return 0;
+ }
++/* Allocate a new handle.  This should probably be in a slab... */
++static handle_t *new_handle(int nblocks)
++{
++      handle_t *handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
++      if (!handle)
++              return NULL;
++      memset(handle, 0, sizeof (handle_t));
++      handle->h_buffer_credits = nblocks;
++      handle->h_ref = 1;
++      INIT_LIST_HEAD(&handle->h_jcb);
++
++      return handle;
++}
++
+ /*
+  * Obtain a new handle.  
+  *
+@@ -227,14 +242,11 @@ handle_t *journal_start(journal_t *journ
+               handle->h_ref++;
+               return handle;
+       }
+-      
+-      handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
++
++      handle = new_handle(nblocks);
+       if (!handle)
+               return ERR_PTR(-ENOMEM);
+-      memset (handle, 0, sizeof (handle_t));
+-      handle->h_buffer_credits = nblocks;
+-      handle->h_ref = 1;
+       current->journal_info = handle;
+       err = start_this_handle(journal, handle);
+@@ -333,14 +345,11 @@ handle_t *journal_try_start(journal_t *j
+       
+       if (is_journal_aborted(journal))
+               return ERR_PTR(-EIO);
+-      
+-      handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
++
++      handle = new_handle(nblocks);
+       if (!handle)
+               return ERR_PTR(-ENOMEM);
+-      memset (handle, 0, sizeof (handle_t));
+-      handle->h_buffer_credits = nblocks;
+-      handle->h_ref = 1;
+       current->journal_info = handle;
+       err = try_start_this_handle(journal, handle);
+@@ -1328,6 +1337,28 @@ out:
+ #endif
+ /*
++ * Register a callback function for this handle.  The function will be
++ * called when the transaction that this handle is part of has been
++ * committed to disk with the original callback data struct and the
++ * error status of the journal as parameters.  There is no guarantee of
++ * ordering between handles within a single transaction, nor between
++ * callbacks registered on the same handle.
++ *
++ * The caller is responsible for allocating the journal_callback struct.
++ * This is to allow the caller to add as much extra data to the callback
++ * as needed, but reduce the overhead of multiple allocations.  The caller
++ * allocated struct must start with a struct journal_callback at offset 0,
++ * and has the caller-specific data afterwards.
++ */
++void journal_callback_set(handle_t *handle,
++                        void (*func)(struct journal_callback *jcb, int error),
++                        struct journal_callback *jcb)
++{
++      list_add(&jcb->jcb_list, &handle->h_jcb);
++      jcb->jcb_func = func;
++}
++
++/*
+  * All done for a particular handle.
+  *
+  * There is not much action needed here.  We just return any remaining
+@@ -1393,7 +1424,10 @@ int journal_stop(handle_t *handle)
+                       wake_up(&journal->j_wait_transaction_locked);
+       }
+-      /* 
++      /* Move callbacks from the handle to the transaction. */
++      list_splice(&handle->h_jcb, &transaction->t_jcb);
++
++      /*
+        * If the handle is marked SYNC, we need to set another commit
+        * going!  We also want to force a commit if the current
+        * transaction is occupying too much of the log, or if the
+--- linux-2.4.19/include/linux/jbd.h~vanilla-2.4.19    Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/include/linux/jbd.h      Sun Jan 19 19:46:42 2003
+@@ -249,6 +249,13 @@ static inline struct journal_head *bh2jh
+       return bh->b_private;
+ }
++#define HAVE_JOURNAL_CALLBACK_STATUS
++struct journal_callback {
++      struct list_head jcb_list;
++      void (*jcb_func)(struct journal_callback *jcb, int error);
++      /* user data goes here */
++};
++
+ struct jbd_revoke_table_s;
+ /* The handle_t type represents a single atomic update being performed
+@@ -279,6 +286,12 @@ struct handle_s 
+          operations */
+       int                     h_err;
++      /* List of application registered callbacks for this handle.
++       * The function(s) will be called after the transaction that
++       * this handle is part of has been committed to disk.
++       */
++      struct list_head        h_jcb;
++
+       /* Flags */
+       unsigned int    h_sync:         1;      /* sync-on-close */
+       unsigned int    h_jdata:        1;      /* force data journaling */
+@@ -398,6 +411,10 @@ struct transaction_s 
+       /* How many handles used this transaction? */
+       int t_handle_count;
++
++      /* List of registered callback functions for this transaction.
++       * Called when the transaction is committed. */
++      struct list_head        t_jcb;
+ };
+@@ -646,6 +663,9 @@ extern int  journal_flushpage(journal_t 
+ extern int     journal_try_to_free_buffers(journal_t *, struct page *, int);
+ extern int     journal_stop(handle_t *);
+ extern int     journal_flush (journal_t *);
++extern void    journal_callback_set(handle_t *handle,
++                                    void (*fn)(struct journal_callback *,int),
++                                    struct journal_callback *jcb);
+ extern void    journal_lock_updates (journal_t *);
+ extern void    journal_unlock_updates (journal_t *);
diff --git a/lustre/kernel_patches/patches/kmem_cache_validate_hp.patch b/lustre/kernel_patches/patches/kmem_cache_validate_hp.patch
new file mode 100644 (file)
index 0000000..03385a7
--- /dev/null
@@ -0,0 +1,105 @@
+ arch/ia64/mm/init.c  |    6 +++++
+ include/linux/slab.h |    1 
+ kernel/ksyms.c       |    1 
+ mm/slab.c            |   53 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ 4 files changed, 61 insertions(+)
+
+--- linux-2.4.19-hp2_pnnl2/arch/ia64/mm/init.c~kmem_cache_validate_hp  Sun Jan 19 18:59:23 2003
++++ linux-2.4.19-hp2_pnnl2-root/arch/ia64/mm/init.c    Sun Jan 19 18:59:24 2003
+@@ -44,6 +44,12 @@ unsigned long vmalloc_end = VMALLOC_END_
+ static struct page *vmem_map;
+ static unsigned long num_dma_physpages;
++struct page *check_get_page(unsigned long kaddr)
++{
++#warning FIXME: Lustre team, is this solid?
++      return virt_to_page(kaddr);
++}
++
+ int
+ do_check_pgt_cache (int low, int high)
+ {
+--- linux-2.4.19-hp2_pnnl2/include/linux/slab.h~kmem_cache_validate_hp Sun Jan 19 18:59:23 2003
++++ linux-2.4.19-hp2_pnnl2-root/include/linux/slab.h   Sun Jan 19 19:01:07 2003
+@@ -56,6 +56,7 @@ extern kmem_cache_t *kmem_cache_create(c
+ extern int kmem_cache_destroy(kmem_cache_t *);
+ extern int kmem_cache_shrink(kmem_cache_t *);
+ extern void *kmem_cache_alloc(kmem_cache_t *, int);
++extern int kmem_cache_validate(kmem_cache_t *cachep, void *objp);
+ extern void kmem_cache_free(kmem_cache_t *, void *);
+ extern unsigned int kmem_cache_size(kmem_cache_t *);
+--- linux-2.4.19-hp2_pnnl2/kernel/ksyms.c~kmem_cache_validate_hp       Sun Jan 19 18:59:23 2003
++++ linux-2.4.19-hp2_pnnl2-root/kernel/ksyms.c Sun Jan 19 19:00:32 2003
+@@ -118,6 +118,7 @@ EXPORT_SYMBOL(kmem_find_general_cachep);
+ EXPORT_SYMBOL(kmem_cache_create);
+ EXPORT_SYMBOL(kmem_cache_destroy);
+ EXPORT_SYMBOL(kmem_cache_shrink);
++EXPORT_SYMBOL(kmem_cache_validate);
+ EXPORT_SYMBOL(kmem_cache_alloc);
+ EXPORT_SYMBOL(kmem_cache_free);
+ EXPORT_SYMBOL(kmem_cache_size);
+--- linux-2.4.19-hp2_pnnl2/mm/slab.c~kmem_cache_validate_hp    Sun Jan 19 18:59:23 2003
++++ linux-2.4.19-hp2_pnnl2-root/mm/slab.c      Sun Jan 19 18:59:24 2003
+@@ -1207,6 +1207,59 @@ failed:
+  * Called with the cache-lock held.
+  */
++extern struct page *check_get_page(unsigned long kaddr);
++struct page *page_mem_map(struct page *page);
++static int kmem_check_cache_obj (kmem_cache_t * cachep,
++                               slab_t *slabp, void * objp)
++{
++      int i;
++      unsigned int objnr;
++
++#if DEBUG
++      if (cachep->flags & SLAB_RED_ZONE) {
++              objp -= BYTES_PER_WORD;
++              if ( *(unsigned long *)objp != RED_MAGIC2)
++                      /* Either write before start, or a double free. */
++                      return 0;
++              if (*(unsigned long *)(objp+cachep->objsize -
++                              BYTES_PER_WORD) != RED_MAGIC2)
++                      /* Either write past end, or a double free. */
++                      return 0;
++      }
++#endif
++
++      objnr = (objp-slabp->s_mem)/cachep->objsize;
++      if (objnr >= cachep->num)
++              return 0;
++      if (objp != slabp->s_mem + objnr*cachep->objsize)
++              return 0;
++
++      /* Check slab's freelist to see if this obj is there. */
++      for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
++              if (i == objnr)
++                      return 0;
++      }
++      return 1;
++}
++
++
++int kmem_cache_validate(kmem_cache_t *cachep, void *objp)
++{
++      struct page *page = check_get_page((unsigned long)objp);
++
++      if (!VALID_PAGE(page))
++              return 0;
++
++      if (!PageSlab(page))
++              return 0;
++
++      /* XXX check for freed slab objects ? */
++      if (!kmem_check_cache_obj(cachep, GET_PAGE_SLAB(page), objp))
++              return 0;
++
++      return (cachep == GET_PAGE_CACHE(page));
++}
++
+ #if DEBUG
+ static int kmem_extra_free_checks (kmem_cache_t * cachep,
+                       slab_t *slabp, void * objp)
+
+_
index 2e69e01..9ed43cf 100644 (file)
@@ -1,11 +1,12 @@
 
 
 
- 0 files changed
+ include/linux/lustre_version.h |    1 +
+ 1 files changed, 1 insertion(+)
 
---- /dev/null  2002-08-30 16:31:37.000000000 -0700
-+++ linux-2.4.18-17.8.0-zab/include/linux/lustre_version.h     2002-12-06 14:52:30.000000000 -0800
+--- /dev/null  Fri Aug 30 17:31:37 2002
++++ linux-2.4.18-18.8.0-l7-root/include/linux/lustre_version.h Mon Jan 20 12:24:45 2003
 @@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 5
++#define LUSTRE_KERNEL_VERSION 10
 
 _
index 72949cd..4ed5bb9 100644 (file)
@@ -1,34 +1,33 @@
- arch/i386/mm/init.c            |    6 +
- arch/ia64/mm/init.c            |    6 +
- drivers/block/blkpg.c          |   35 ++++++
+
+
+
+ arch/i386/mm/init.c            |    6 
+ arch/ia64/mm/init.c            |    6 
+ drivers/block/blkpg.c          |   35 ++++
  drivers/block/loop.c           |    5 
- drivers/ide/ide-disk.c         |    6 +
+ drivers/ide/ide-disk.c         |    6 
  fs/dcache.c                    |    1 
  fs/ext3/Makefile               |    2 
  fs/ext3/super.c                |    2 
- fs/jbd/commit.c                |   27 ++++-
- fs/jbd/journal.c               |    1 
- fs/jbd/transaction.c           |   56 ++++++++--
- fs/namei.c                     |  215 ++++++++++++++++++++++++++++++++---------
+ fs/namei.c                     |  296 ++++++++++++++++++++++++++++++++++-------
  fs/nfsd/vfs.c                  |    2 
- fs/open.c                      |   63 +++++++++---
- fs/stat.c                      |   30 ++++-
+ fs/open.c                      |   63 ++++++--
+ fs/stat.c                      |   30 +++-
  include/linux/blkdev.h         |    4 
- include/linux/dcache.h         |   31 +++++
- include/linux/fs.h             |   14 ++
- include/linux/jbd.h            |   20 +++
+ include/linux/dcache.h         |   31 ++++
+ include/linux/fs.h             |   23 +++
  include/linux/lustre_version.h |    1 
  include/linux/slab.h           |    1 
- kernel/ksyms.c                 |    7 +
- mm/slab.c                      |   53 ++++++++++
23 files changed, 502 insertions(+), 86 deletions(-)
+ kernel/ksyms.c                 |    7 
+ mm/slab.c                      |   53 +++++++
19 files changed, 501 insertions(+), 73 deletions(-)
 
 --- /dev/null  Fri Aug 30 17:31:37 2002
-+++ linux-2.4.19-root/include/linux/lustre_version.h   Sun Dec 15 16:58:43 2002
++++ linux-2.4.19-root/include/linux/lustre_version.h   Sun Jan 19 19:54:00 2003
 @@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 5
---- linux-2.4.19/arch/ia64/mm/init.c~vanilla-2.4.19    Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/arch/ia64/mm/init.c      Sun Dec 15 16:58:43 2002
++#define LUSTRE_KERNEL_VERSION 7
+--- linux-2.4.19/arch/ia64/mm/init.c~vanilla-2.4.19    Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/arch/ia64/mm/init.c      Sun Jan 19 19:46:42 2003
 @@ -37,6 +37,12 @@ unsigned long MAX_DMA_ADDRESS = PAGE_OFF
  
  static unsigned long totalram_pages;
@@ -42,8 +41,8 @@
  int
  do_check_pgt_cache (int low, int high)
  {
---- linux-2.4.19/arch/i386/mm/init.c~vanilla-2.4.19    Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/arch/i386/mm/init.c      Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/arch/i386/mm/init.c~vanilla-2.4.19    Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/arch/i386/mm/init.c      Sun Jan 19 19:46:42 2003
 @@ -43,6 +43,12 @@ unsigned long highstart_pfn, highend_pfn
  static unsigned long totalram_pages;
  static unsigned long totalhigh_pages;
@@ -57,8 +56,8 @@
  int do_check_pgt_cache(int low, int high)
  {
        int freed = 0;
---- linux-2.4.19/drivers/block/blkpg.c~vanilla-2.4.19  Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/drivers/block/blkpg.c    Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/drivers/block/blkpg.c~vanilla-2.4.19  Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/drivers/block/blkpg.c    Sun Jan 19 19:46:42 2003
 @@ -296,3 +296,38 @@ int blk_ioctl(kdev_t dev, unsigned int c
  }
  
@@ -98,8 +97,8 @@
 +EXPORT_SYMBOL(dev_set_rdonly);
 +EXPORT_SYMBOL(dev_check_rdonly);
 +EXPORT_SYMBOL(dev_clear_rdonly);
---- linux-2.4.19/drivers/block/loop.c~vanilla-2.4.19   Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/drivers/block/loop.c     Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/drivers/block/loop.c~vanilla-2.4.19   Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/drivers/block/loop.c     Sun Jan 19 19:46:42 2003
 @@ -474,6 +474,11 @@ static int loop_make_request(request_que
        spin_unlock_irq(&lo->lo_lock);
  
                if (lo->lo_flags & LO_FLAGS_READ_ONLY)
                        goto err;
        } else if (rw == READA) {
---- linux-2.4.19/drivers/ide/ide-disk.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/drivers/ide/ide-disk.c   Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/drivers/ide/ide-disk.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/drivers/ide/ide-disk.c   Sun Jan 19 19:46:42 2003
 @@ -551,6 +551,12 @@ static ide_startstop_t lba_48_rw_disk (i
   */
  static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
        if (IDE_CONTROL_REG)
                OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
  
---- linux-2.4.19/fs/ext3/Makefile~vanilla-2.4.19       Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/ext3/Makefile Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/ext3/Makefile~vanilla-2.4.19       Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/ext3/Makefile Sun Jan 19 19:46:42 2003
 @@ -9,6 +9,8 @@
  
  O_TARGET := ext3.o
  obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
                ioctl.o namei.o super.o symlink.o
  obj-m    := $(O_TARGET)
---- linux-2.4.19/fs/ext3/super.c~vanilla-2.4.19        Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/ext3/super.c  Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/ext3/super.c~vanilla-2.4.19        Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/ext3/super.c  Sun Jan 19 19:46:42 2003
 @@ -1744,7 +1744,7 @@ static void __exit exit_ext3_fs(void)
        unregister_filesystem(&ext3_fs_type);
  }
  
  MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
  MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
---- linux-2.4.19/fs/jbd/commit.c~vanilla-2.4.19        Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/jbd/commit.c  Sun Dec 15 16:58:43 2002
-@@ -475,7 +475,7 @@ start_journal_io:
-            transaction's t_log_list queue, and metadata buffers are on
-            the t_iobuf_list queue.
--         Wait for the transactions in reverse order.  That way we are
-+         Wait for the buffers in reverse order.  That way we are
-          less likely to be woken up until all IOs have completed, and
-          so we incur less scheduling load.
-       */
-@@ -566,8 +566,10 @@ start_journal_io:
-       jbd_debug(3, "JBD: commit phase 6\n");
--      if (is_journal_aborted(journal))
-+      if (is_journal_aborted(journal)) {
-+              unlock_journal(journal);
-               goto skip_commit;
-+      }
-       /* Done it all: now write the commit record.  We should have
-        * cleaned up our previous buffers by now, so if we are in abort
-@@ -577,6 +579,7 @@ start_journal_io:
-       descriptor = journal_get_descriptor_buffer(journal);
-       if (!descriptor) {
-               __journal_abort_hard(journal);
-+              unlock_journal(journal);
-               goto skip_commit;
-       }
-       
-@@ -600,7 +603,6 @@ start_journal_io:
-               put_bh(bh);             /* One for getblk() */
-               journal_unlock_journal_head(descriptor);
-       }
--      lock_journal(journal);
-       /* End of a transaction!  Finally, we can do checkpoint
-            processing: any buffers committed as a result of this
-@@ -609,6 +611,25 @@ start_journal_io:
- skip_commit:
-+      /* Call any callbacks that had been registered for handles in this
-+       * transaction.  It is up to the callback to free any allocated
-+       * memory.
-+       */
-+      if (!list_empty(&commit_transaction->t_jcb)) {
-+              struct list_head *p, *n;
-+              int error = is_journal_aborted(journal);
-+
-+              list_for_each_safe(p, n, &commit_transaction->t_jcb) {
-+                      struct journal_callback *jcb;
-+
-+                      jcb = list_entry(p, struct journal_callback, jcb_list);
-+                      list_del(p);
-+                      jcb->jcb_func(jcb, error);
-+              }
-+      }
-+
-+      lock_journal(journal);
-+
-       jbd_debug(3, "JBD: commit phase 7\n");
-       J_ASSERT(commit_transaction->t_sync_datalist == NULL);
---- linux-2.4.19/fs/jbd/journal.c~vanilla-2.4.19       Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/jbd/journal.c Sun Dec 15 16:58:43 2002
-@@ -58,6 +58,7 @@ EXPORT_SYMBOL(journal_sync_buffer);
- #endif
- EXPORT_SYMBOL(journal_flush);
- EXPORT_SYMBOL(journal_revoke);
-+EXPORT_SYMBOL(journal_callback_set);
- EXPORT_SYMBOL(journal_init_dev);
- EXPORT_SYMBOL(journal_init_inode);
---- linux-2.4.19/fs/jbd/transaction.c~vanilla-2.4.19   Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/jbd/transaction.c     Sun Dec 15 16:58:43 2002
-@@ -57,6 +57,7 @@ static transaction_t * get_transaction (
-       transaction->t_state = T_RUNNING;
-       transaction->t_tid = journal->j_transaction_sequence++;
-       transaction->t_expires = jiffies + journal->j_commit_interval;
-+      INIT_LIST_HEAD(&transaction->t_jcb);
-       /* Set up the commit timer for the new transaction. */
-       J_ASSERT (!journal->j_commit_timer_active);
-@@ -201,6 +202,20 @@ repeat_locked:
-       return 0;
- }
-+/* Allocate a new handle.  This should probably be in a slab... */
-+static handle_t *new_handle(int nblocks)
-+{
-+      handle_t *handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+      if (!handle)
-+              return NULL;
-+      memset(handle, 0, sizeof (handle_t));
-+      handle->h_buffer_credits = nblocks;
-+      handle->h_ref = 1;
-+      INIT_LIST_HEAD(&handle->h_jcb);
-+
-+      return handle;
-+}
-+
- /*
-  * Obtain a new handle.  
-  *
-@@ -227,14 +242,11 @@ handle_t *journal_start(journal_t *journ
-               handle->h_ref++;
-               return handle;
-       }
--      
--      handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+
-+      handle = new_handle(nblocks);
-       if (!handle)
-               return ERR_PTR(-ENOMEM);
--      memset (handle, 0, sizeof (handle_t));
--      handle->h_buffer_credits = nblocks;
--      handle->h_ref = 1;
-       current->journal_info = handle;
-       err = start_this_handle(journal, handle);
-@@ -333,14 +345,11 @@ handle_t *journal_try_start(journal_t *j
-       
-       if (is_journal_aborted(journal))
-               return ERR_PTR(-EIO);
--      
--      handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+
-+      handle = new_handle(nblocks);
-       if (!handle)
-               return ERR_PTR(-ENOMEM);
--      memset (handle, 0, sizeof (handle_t));
--      handle->h_buffer_credits = nblocks;
--      handle->h_ref = 1;
-       current->journal_info = handle;
-       err = try_start_this_handle(journal, handle);
-@@ -1328,6 +1337,28 @@ out:
- #endif
- /*
-+ * Register a callback function for this handle.  The function will be
-+ * called when the transaction that this handle is part of has been
-+ * committed to disk with the original callback data struct and the
-+ * error status of the journal as parameters.  There is no guarantee of
-+ * ordering between handles within a single transaction, nor between
-+ * callbacks registered on the same handle.
-+ *
-+ * The caller is responsible for allocating the journal_callback struct.
-+ * This is to allow the caller to add as much extra data to the callback
-+ * as needed, but reduce the overhead of multiple allocations.  The caller
-+ * allocated struct must start with a struct journal_callback at offset 0,
-+ * and has the caller-specific data afterwards.
-+ */
-+void journal_callback_set(handle_t *handle,
-+                        void (*func)(struct journal_callback *jcb, int error),
-+                        struct journal_callback *jcb)
-+{
-+      list_add(&jcb->jcb_list, &handle->h_jcb);
-+      jcb->jcb_func = func;
-+}
-+
-+/*
-  * All done for a particular handle.
-  *
-  * There is not much action needed here.  We just return any remaining
-@@ -1393,7 +1424,10 @@ int journal_stop(handle_t *handle)
-                       wake_up(&journal->j_wait_transaction_locked);
-       }
--      /* 
-+      /* Move callbacks from the handle to the transaction. */
-+      list_splice(&handle->h_jcb, &transaction->t_jcb);
-+
-+      /*
-        * If the handle is marked SYNC, we need to set another commit
-        * going!  We also want to force a commit if the current
-        * transaction is occupying too much of the log, or if the
---- linux-2.4.19/include/linux/blkdev.h~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/include/linux/blkdev.h   Sun Dec 15 17:02:24 2002
+--- linux-2.4.19/include/linux/blkdev.h~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/include/linux/blkdev.h   Sun Jan 19 21:05:55 2003
 @@ -240,4 +240,8 @@ static inline unsigned int block_size(kd
        return retval;
  }
 +int dev_check_rdonly(kdev_t);
 +void dev_clear_rdonly(int);
  #endif
---- linux-2.4.19/include/linux/slab.h~vanilla-2.4.19   Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/include/linux/slab.h     Sun Dec 15 17:02:12 2002
+--- linux-2.4.19/include/linux/slab.h~vanilla-2.4.19   Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/include/linux/slab.h     Sun Jan 19 21:05:52 2003
 @@ -57,6 +57,7 @@ extern int kmem_cache_destroy(kmem_cache
  extern int kmem_cache_shrink(kmem_cache_t *);
  extern void *kmem_cache_alloc(kmem_cache_t *, int);
  
  extern void *kmalloc(size_t, int);
  extern void kfree(const void *);
---- linux-2.4.19/include/linux/jbd.h~vanilla-2.4.19    Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/include/linux/jbd.h      Sun Dec 15 16:58:43 2002
-@@ -249,6 +249,13 @@ static inline struct journal_head *bh2jh
-       return bh->b_private;
- }
-+#define HAVE_JOURNAL_CALLBACK_STATUS
-+struct journal_callback {
-+      struct list_head jcb_list;
-+      void (*jcb_func)(struct journal_callback *jcb, int error);
-+      /* user data goes here */
-+};
-+
- struct jbd_revoke_table_s;
- /* The handle_t type represents a single atomic update being performed
-@@ -279,6 +286,12 @@ struct handle_s 
-          operations */
-       int                     h_err;
-+      /* List of application registered callbacks for this handle.
-+       * The function(s) will be called after the transaction that
-+       * this handle is part of has been committed to disk.
-+       */
-+      struct list_head        h_jcb;
-+
-       /* Flags */
-       unsigned int    h_sync:         1;      /* sync-on-close */
-       unsigned int    h_jdata:        1;      /* force data journaling */
-@@ -398,6 +411,10 @@ struct transaction_s 
-       /* How many handles used this transaction? */
-       int t_handle_count;
-+
-+      /* List of registered callback functions for this transaction.
-+       * Called when the transaction is committed. */
-+      struct list_head        t_jcb;
- };
-@@ -646,6 +663,9 @@ extern int  journal_flushpage(journal_t 
- extern int     journal_try_to_free_buffers(journal_t *, struct page *, int);
- extern int     journal_stop(handle_t *);
- extern int     journal_flush (journal_t *);
-+extern void    journal_callback_set(handle_t *handle,
-+                                    void (*fn)(struct journal_callback *,int),
-+                                    struct journal_callback *jcb);
- extern void    journal_lock_updates (journal_t *);
- extern void    journal_unlock_updates (journal_t *);
---- linux-2.4.19/kernel/ksyms.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/kernel/ksyms.c   Sun Dec 15 17:03:55 2002
+--- linux-2.4.19/kernel/ksyms.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/kernel/ksyms.c   Sun Jan 19 19:46:42 2003
 @@ -264,6 +264,7 @@ EXPORT_SYMBOL(read_cache_page);
  EXPORT_SYMBOL(set_page_dirty);
  EXPORT_SYMBOL(vfs_readlink);
  /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */
  EXPORT_SYMBOL(default_llseek);
  EXPORT_SYMBOL(dentry_open);
---- linux-2.4.19/include/linux/dcache.h~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/include/linux/dcache.h   Sun Dec 15 17:02:11 2002
+--- linux-2.4.19/include/linux/dcache.h~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/include/linux/dcache.h   Sun Jan 19 19:46:42 2003
 @@ -6,6 +6,34 @@
  #include <asm/atomic.h>
  #include <linux/mount.h>
  };
  
  /* the dentry parameter passed to d_hash and d_compare is the parent
---- linux-2.4.19/include/linux/fs.h~vanilla-2.4.19     Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/include/linux/fs.h       Sun Dec 15 17:02:11 2002
+--- linux-2.4.19/include/linux/fs.h~vanilla-2.4.19     Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/include/linux/fs.h       Sun Jan 19 21:05:40 2003
 @@ -541,6 +541,7 @@ struct file {
  
        /* needed for tty driver, and maybe others */
  
  /*
   * File types
-@@ -853,6 +856,7 @@ struct file_operations {
+@@ -853,16 +856,28 @@ struct file_operations {
  struct inode_operations {
        int (*create) (struct inode *,struct dentry *,int);
        struct dentry * (*lookup) (struct inode *,struct dentry *);
 +      struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
++      int (*link2) (struct inode *,struct inode *, const char *, int);
        int (*unlink) (struct inode *,struct dentry *);
++      int (*unlink2) (struct inode *, char *, int);
        int (*symlink) (struct inode *,struct dentry *,const char *);
-@@ -863,6 +867,8 @@ struct inode_operations {
++      int (*symlink2) (struct inode *,const char *, int, const char *);
+       int (*mkdir) (struct inode *,struct dentry *,int);
++      int (*mkdir2) (struct inode *,char *, int,int);
+       int (*rmdir) (struct inode *,struct dentry *);
++      int (*rmdir2) (struct inode *, char *, int);
+       int (*mknod) (struct inode *,struct dentry *,int,int);
++      int (*mknod2) (struct inode *,char *, int,int,int);
+       int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
++      int (*rename2) (struct inode *, struct inode *, 
++                      char *oldname, int oldlen, 
++                      char *newname, int newlen);
        int (*readlink) (struct dentry *, char *,int);
        int (*follow_link) (struct dentry *, struct nameidata *);
 +      int (*follow_link2) (struct dentry *, struct nameidata *,
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int);
        int (*revalidate) (struct dentry *);
-@@ -999,6 +1005,7 @@ extern int unregister_filesystem(struct 
+@@ -999,6 +1014,7 @@ extern int unregister_filesystem(struct 
  extern struct vfsmount *kern_mount(struct file_system_type *);
  extern int may_umount(struct vfsmount *);
  extern long do_mount(char *, char *, char *, unsigned long, void *);
-+struct vfsmount *do_kern_mount(char *type, int flags, char *name, void *data);
++struct vfsmount *do_kern_mount(const char *fstype, int flags, char *name, void *data);
  extern void umount_tree(struct vfsmount *);
  
  #define kern_umount mntput
-@@ -1329,6 +1336,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1329,6 +1345,7 @@ typedef int (*read_actor_t)(read_descrip
  extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
  
  extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_walk(const char *, struct nameidata *));
  extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
-@@ -1339,6 +1347,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1339,6 +1356,8 @@ extern struct dentry * lookup_one_len(co
  extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
  #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
  #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
  
  extern void iput(struct inode *);
  extern void force_delete(struct inode *);
-@@ -1448,6 +1458,8 @@ extern struct file_operations generic_ro
+@@ -1448,6 +1467,8 @@ extern struct file_operations generic_ro
  
  extern int vfs_readlink(struct dentry *, char *, int, const char *);
  extern int vfs_follow_link(struct nameidata *, const char *);
  extern int page_readlink(struct dentry *, char *, int);
  extern int page_follow_link(struct dentry *, struct nameidata *);
  extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.19/fs/dcache.c~vanilla-2.4.19    Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/dcache.c      Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/dcache.c~vanilla-2.4.19    Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/dcache.c      Sun Jan 19 19:46:42 2003
 @@ -616,6 +616,7 @@ struct dentry * d_alloc(struct dentry * 
        dentry->d_op = NULL;
        dentry->d_fsdata = NULL;
        INIT_LIST_HEAD(&dentry->d_hash);
        INIT_LIST_HEAD(&dentry->d_lru);
        INIT_LIST_HEAD(&dentry->d_subdirs);
---- linux-2.4.19/fs/nfsd/vfs.c~vanilla-2.4.19  Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/nfsd/vfs.c    Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/nfsd/vfs.c~vanilla-2.4.19  Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/nfsd/vfs.c    Sun Jan 19 19:46:42 2003
 @@ -1295,7 +1295,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
                        err = nfserr_perm;
        } else
        if (!err && EX_ISSYNC(tfhp->fh_export)) {
                nfsd_sync_dir(tdentry);
                nfsd_sync_dir(fdentry);
---- linux-2.4.19/fs/namei.c~vanilla-2.4.19     Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/namei.c       Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/namei.c~vanilla-2.4.19     Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/namei.c       Sun Jan 19 19:46:42 2003
 @@ -94,6 +94,12 @@
   * XEmacs seems to be relying on it...
   */
                                break;
                }
                goto return_base;
-@@ -651,10 +683,21 @@ return_err:
+@@ -646,15 +678,28 @@ out_dput:
+               dput(dentry);
+               break;
+       }
++      if (err)
++              intent_release(nd->dentry, it);
+       path_release(nd);
+ return_err:
        return err;
  }
  
  }
  
  /* SMP-safe */
-@@ -757,7 +800,8 @@ int path_init(const char *name, unsigned
+@@ -757,7 +802,8 @@ int path_init(const char *name, unsigned
   * needs parent already locked. Doesn't follow mounts.
   * SMP-safe.
   */
  {
        struct dentry * dentry;
        struct inode *inode;
-@@ -780,13 +824,16 @@ struct dentry * lookup_hash(struct qstr 
+@@ -780,13 +826,16 @@ struct dentry * lookup_hash(struct qstr 
                        goto out;
        }
  
                dentry = inode->i_op->lookup(inode, new);
                unlock_kernel();
                if (!dentry)
-@@ -798,6 +845,12 @@ out:
+@@ -798,6 +847,12 @@ out:
        return dentry;
  }
  
  /* SMP-safe */
  struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
  {
-@@ -819,7 +872,7 @@ struct dentry * lookup_one_len(const cha
+@@ -819,7 +874,7 @@ struct dentry * lookup_one_len(const cha
        }
        this.hash = end_name_hash(hash);
  
  access:
        return ERR_PTR(-EACCES);
  }
-@@ -851,6 +904,23 @@ int __user_walk(const char *name, unsign
+@@ -851,6 +906,23 @@ int __user_walk(const char *name, unsign
        return err;
  }
  
  /*
   * It's inline, so penalty for filesystems that don't use sticky bit is
   * minimal.
-@@ -987,7 +1057,8 @@ exit_lock:
+@@ -987,7 +1059,8 @@ exit_lock:
   * for symlinks (where the permissions are checked later).
   * SMP-safe
   */
  {
        int acc_mode, error = 0;
        struct inode *inode;
-@@ -1002,7 +1073,7 @@ int open_namei(const char * pathname, in
+@@ -1002,7 +1075,7 @@ int open_namei(const char * pathname, in
         */
        if (!(flag & O_CREAT)) {
                if (path_init(pathname, lookup_flags(flag), nd))
                if (error)
                        return error;
                dentry = nd->dentry;
-@@ -1012,6 +1083,10 @@ int open_namei(const char * pathname, in
+@@ -1012,6 +1085,10 @@ int open_namei(const char * pathname, in
        /*
         * Create - we need to know the parent.
         */
        if (path_init(pathname, LOOKUP_PARENT, nd))
                error = path_walk(pathname, nd);
        if (error)
-@@ -1028,7 +1103,7 @@ int open_namei(const char * pathname, in
+@@ -1028,7 +1105,7 @@ int open_namei(const char * pathname, in
  
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
  
  do_last:
        error = PTR_ERR(dentry);
-@@ -1037,6 +1112,7 @@ do_last:
+@@ -1037,6 +1114,7 @@ do_last:
                goto exit;
        }
  
        /* Negative dentry, just create the file */
        if (!dentry->d_inode) {
                error = vfs_create(dir->d_inode, dentry,
-@@ -1070,7 +1146,8 @@ do_last:
+@@ -1070,7 +1148,8 @@ do_last:
        error = -ENOENT;
        if (!dentry->d_inode)
                goto exit_dput;
                goto do_link;
  
        dput(nd->dentry);
-@@ -1156,8 +1233,10 @@ ok:
+@@ -1156,8 +1235,10 @@ ok:
        return 0;
  
  exit_dput:
        path_release(nd);
        return error;
  
-@@ -1176,7 +1255,12 @@ do_link:
+@@ -1176,7 +1257,12 @@ do_link:
         * are done. Procfs-like symlinks just set LAST_BIND.
         */
        UPDATE_ATIME(dentry->d_inode);
        dput(dentry);
        if (error)
                return error;
-@@ -1198,13 +1282,20 @@ do_link:
+@@ -1198,13 +1284,20 @@ do_link:
        }
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
  {
        struct dentry *dentry;
  
-@@ -1212,7 +1303,7 @@ static struct dentry *lookup_create(stru
+@@ -1212,7 +1305,7 @@ static struct dentry *lookup_create(stru
        dentry = ERR_PTR(-EEXIST);
        if (nd->last_type != LAST_NORM)
                goto fail;
        if (IS_ERR(dentry))
                goto fail;
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1258,6 +1349,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1258,6 +1351,7 @@ asmlinkage long sys_mknod(const char * f
        char * tmp;
        struct dentry * dentry;
        struct nameidata nd;
  
        if (S_ISDIR(mode))
                return -EPERM;
-@@ -1269,7 +1361,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1269,7 +1363,19 @@ asmlinkage long sys_mknod(const char * f
                error = path_walk(tmp, &nd);
        if (error)
                goto out;
 -      dentry = lookup_create(&nd, 0);
++
++      if (nd.dentry->d_inode->i_op->mknod2) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->mknod2(nd.dentry->d_inode, 
++                                 nd.last.name, 
++                                 nd.last.len,
++                                 mode, dev);
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto out2;
++      }
++      
 +      dentry = lookup_create(&nd, 0, &it);
        error = PTR_ERR(dentry);
  
        mode &= ~current->fs->umask;
-@@ -1287,6 +1379,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1287,9 +1393,11 @@ asmlinkage long sys_mknod(const char * f
                default:
                        error = -EINVAL;
                }
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
-@@ -1327,6 +1420,7 @@ asmlinkage long sys_mkdir(const char * p
++ out2:
+       path_release(&nd);
+ out:
+       putname(tmp);
+@@ -1327,6 +1435,7 @@ asmlinkage long sys_mkdir(const char * p
  {
        int error = 0;
        char * tmp;
  
        tmp = getname(pathname);
        error = PTR_ERR(tmp);
-@@ -1338,11 +1432,12 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1338,14 +1447,26 @@ asmlinkage long sys_mkdir(const char * p
                        error = path_walk(tmp, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 1);
++              if (nd.dentry->d_inode->i_op->mkdir2) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->mkdir2(nd.dentry->d_inode, 
++                                         nd.last.name, 
++                                         nd.last.len,
++                                         mode);
++                      /* the file system want to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
 +              dentry = lookup_create(&nd, 1, &it);
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
-@@ -1426,6 +1521,7 @@ asmlinkage long sys_rmdir(const char * p
++out2:
+               path_release(&nd);
+ out:
+               putname(tmp);
+@@ -1426,6 +1547,7 @@ asmlinkage long sys_rmdir(const char * p
        char * name;
        struct dentry *dentry;
        struct nameidata nd;
  
        name = getname(pathname);
        if(IS_ERR(name))
-@@ -1448,10 +1544,11 @@ asmlinkage long sys_rmdir(const char * p
+@@ -1447,11 +1569,21 @@ asmlinkage long sys_rmdir(const char * p
+                       error = -EBUSY;
                        goto exit1;
        }
++      if (nd.dentry->d_inode->i_op->rmdir2) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->rmdir2(nd.dentry->d_inode, 
++                                 nd.last.name, 
++                                 nd.last.len);
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
-@@ -1495,6 +1592,7 @@ asmlinkage long sys_unlink(const char * 
+@@ -1495,6 +1627,7 @@ asmlinkage long sys_unlink(const char * 
        char * name;
        struct dentry *dentry;
        struct nameidata nd;
  
        name = getname(pathname);
        if(IS_ERR(name))
-@@ -1508,7 +1606,7 @@ asmlinkage long sys_unlink(const char * 
+@@ -1507,8 +1640,17 @@ asmlinkage long sys_unlink(const char * 
+       error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
++      if (nd.dentry->d_inode->i_op->unlink2) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink2(nd.dentry->d_inode, 
++                                  nd.last.name, 
++                                  nd.last.len);
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
 +      dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
-@@ -1516,6 +1614,7 @@ asmlinkage long sys_unlink(const char * 
+@@ -1516,6 +1658,7 @@ asmlinkage long sys_unlink(const char * 
                        goto slashes;
                error = vfs_unlink(nd.dentry->d_inode, dentry);
        exit2:
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
-@@ -1562,6 +1661,7 @@ asmlinkage long sys_symlink(const char *
+@@ -1562,6 +1705,7 @@ asmlinkage long sys_symlink(const char *
        int error = 0;
        char * from;
        char * to;
  
        from = getname(oldname);
        if(IS_ERR(from))
-@@ -1576,10 +1676,12 @@ asmlinkage long sys_symlink(const char *
+@@ -1576,15 +1720,28 @@ asmlinkage long sys_symlink(const char *
                        error = path_walk(to, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 0);
++              if (nd.dentry->d_inode->i_op->symlink2) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->symlink2(nd.dentry->d_inode, 
++                                           nd.last.name, 
++                                           nd.last.len,
++                                           from);
++                      /* the file system want to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
 +              it.it_data = from;
 +              dentry = lookup_create(&nd, 0, &it);
                error = PTR_ERR(dentry);
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
-@@ -1645,6 +1747,7 @@ asmlinkage long sys_link(const char * ol
++      out2:
+               path_release(&nd);
+-out:
++      out:
+               putname(to);
+       }
+       putname(from);
+@@ -1645,6 +1802,7 @@ asmlinkage long sys_link(const char * ol
        int error;
        char * from;
        char * to;
  
        from = getname(oldname);
        if(IS_ERR(from))
-@@ -1657,7 +1760,7 @@ asmlinkage long sys_link(const char * ol
+@@ -1657,7 +1815,7 @@ asmlinkage long sys_link(const char * ol
  
                error = 0;
                if (path_init(from, LOOKUP_POSITIVE, &old_nd))
                if (error)
                        goto exit;
                if (path_init(to, LOOKUP_PARENT, &nd))
-@@ -1667,10 +1770,12 @@ asmlinkage long sys_link(const char * ol
+@@ -1667,10 +1825,22 @@ asmlinkage long sys_link(const char * ol
                error = -EXDEV;
                if (old_nd.mnt != nd.mnt)
                        goto out_release;
 -              new_dentry = lookup_create(&nd, 0);
++              if (nd.dentry->d_inode->i_op->link2) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->link2(old_nd.dentry->d_inode, 
++                                        nd.dentry->d_inode, 
++                                        nd.last.name, 
++                                        nd.last.len);
++                      /* the file system want to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out_release;
++              }
 +              it.it_op = IT_LINK2;
 +              new_dentry = lookup_create(&nd, 0, &it);
                error = PTR_ERR(new_dentry);
                        dput(new_dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
-@@ -1713,7 +1818,8 @@ exit:
+@@ -1713,7 +1883,8 @@ exit:
   *       locking].
   */
  int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
  {
        int error;
        struct inode *target;
-@@ -1771,6 +1877,7 @@ int vfs_rename_dir(struct inode *old_dir
+@@ -1771,6 +1942,7 @@ int vfs_rename_dir(struct inode *old_dir
                error = -EBUSY;
        else 
                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
        if (target) {
                if (!error)
                        target->i_flags |= S_DEAD;
-@@ -1792,7 +1899,8 @@ out_unlock:
+@@ -1792,7 +1964,8 @@ out_unlock:
  }
  
  int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
  {
        int error;
  
-@@ -1823,6 +1931,7 @@ int vfs_rename_other(struct inode *old_d
+@@ -1823,6 +1996,7 @@ int vfs_rename_other(struct inode *old_d
                error = -EBUSY;
        else
                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
        double_up(&old_dir->i_zombie, &new_dir->i_zombie);
        if (error)
                return error;
-@@ -1834,13 +1943,14 @@ int vfs_rename_other(struct inode *old_d
+@@ -1834,13 +2008,14 @@ int vfs_rename_other(struct inode *old_d
  }
  
  int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (!error) {
                if (old_dir == new_dir)
                        inode_dir_notify(old_dir, DN_RENAME);
-@@ -1857,6 +1967,7 @@ static inline int do_rename(const char *
+@@ -1857,6 +2032,7 @@ static inline int do_rename(const char *
        int error = 0;
        struct dentry * old_dir, * new_dir;
        struct dentry * old_dentry, *new_dentry;
        struct nameidata oldnd, newnd;
  
        if (path_init(oldname, LOOKUP_PARENT, &oldnd))
-@@ -1885,7 +1996,7 @@ static inline int do_rename(const char *
+@@ -1883,9 +2059,23 @@ static inline int do_rename(const char *
+       if (newnd.last_type != LAST_NORM)
+               goto exit2;
++      if (old_dir->d_inode->i_op->rename2) {
++              lock_kernel();
++              error = old_dir->d_inode->i_op->rename2(old_dir->d_inode, 
++                                              new_dir->d_inode,
++                                              oldnd.last.name, 
++                                              oldnd.last.len,
++                                              newnd.last.name,
++                                              newnd.last.len);
++              unlock_kernel();
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
++
        double_lock(new_dir, old_dir);
  
 -      old_dentry = lookup_hash(&oldnd.last, old_dir);
        error = PTR_ERR(old_dentry);
        if (IS_ERR(old_dentry))
                goto exit3;
-@@ -1901,18 +2012,21 @@ static inline int do_rename(const char *
+@@ -1901,18 +2091,21 @@ static inline int do_rename(const char *
                if (newnd.last.name[newnd.last.len])
                        goto exit4;
        }
        dput(old_dentry);
  exit3:
        double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
-@@ -1961,7 +2075,8 @@ out:
+@@ -1961,7 +2154,8 @@ out:
  }
  
  static inline int
  {
        int res = 0;
        char *name;
-@@ -1974,7 +2089,7 @@ __vfs_follow_link(struct nameidata *nd, 
+@@ -1974,7 +2168,7 @@ __vfs_follow_link(struct nameidata *nd, 
                        /* weird __emul_prefix() stuff did it */
                        goto out;
        }
  out:
        if (current->link_count || res || nd->last_type!=LAST_NORM)
                return res;
-@@ -1996,7 +2111,13 @@ fail:
+@@ -1996,7 +2190,13 @@ fail:
  
  int vfs_follow_link(struct nameidata *nd, const char *link)
  {
  }
  
  /* get the link contents into pagecache */
-@@ -2038,7 +2159,7 @@ int page_follow_link(struct dentry *dent
+@@ -2038,7 +2238,7 @@ int page_follow_link(struct dentry *dent
  {
        struct page *page = NULL;
        char *s = page_getlink(dentry, &page);
        if (page) {
                kunmap(page);
                page_cache_release(page);
---- linux-2.4.19/fs/open.c~vanilla-2.4.19      Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/open.c        Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/open.c~vanilla-2.4.19      Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/open.c        Sun Jan 19 19:46:42 2003
 @@ -19,6 +19,9 @@
  #include <asm/uaccess.h>
  
  /*
   * Find an empty file descriptor entry, and mark it busy.
   */
---- linux-2.4.19/fs/stat.c~vanilla-2.4.19      Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/stat.c        Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/stat.c~vanilla-2.4.19      Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/stat.c        Sun Jan 19 19:46:42 2003
 @@ -13,6 +13,7 @@
  
  #include <asm/uaccess.h>
                path_release(&nd);
        }
        return error;
---- linux-2.4.19/mm/slab.c~vanilla-2.4.19      Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/mm/slab.c        Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/mm/slab.c~vanilla-2.4.19      Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/mm/slab.c        Sun Jan 19 19:46:42 2003
 @@ -1207,6 +1207,59 @@ failed:
   * Called with the cache-lock held.
   */
index ba7b7ac..7384675 100644 (file)
@@ -1,25 +1,30 @@
- fs/dcache.c            |    
- fs/namei.c             |  228 ++++++++++++++++++++++++++++++++++++++-----------
+ fs/dcache.c            |    8 +
+ fs/namei.c             |  288 ++++++++++++++++++++++++++++++++++++++++---------
  fs/nfsd/vfs.c          |    2 
- fs/open.c              |   53 +++++++++--
+ fs/open.c              |   53 +++++++--
  fs/stat.c              |    9 +
- include/linux/dcache.h |   31 ++++++
- include/linux/fs.h     |   13 ++
+ include/linux/dcache.h |   25 ++++
+ include/linux/fs.h     |   22 +++
  kernel/ksyms.c         |    1 
- 8 files changed, 278 insertions(+), 62 deletions(-)
+ 8 files changed, 345 insertions(+), 63 deletions(-)
 
---- linux-2.4.18-18.8.0-l4/fs/dcache.c~vfs_intent-2.4.18-18    Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/fs/dcache.c    Sat Dec 14 06:31:22 2002
-@@ -150,6 +150,8 @@ repeat:
- unhash_it:
-       list_del_init(&dentry->d_hash);
+--- linux-2.4.18-49chaos-lustre9/fs/dcache.c~vfs_intent-2.4.18-18      Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/fs/dcache.c      Wed Jan 29 12:43:32 2003
+@@ -186,6 +186,13 @@ int d_invalidate(struct dentry * dentry)
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
 +
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
 +
- kill_it: {
-               struct dentry *parent;
-               list_del(&dentry->d_child);
-@@ -645,6 +647,7 @@ struct dentry * d_alloc(struct dentry * 
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -645,6 +652,7 @@ struct dentry * d_alloc(struct dentry * 
        dentry->d_fsdata = NULL;
        dentry->d_extra_attributes = NULL;
        dentry->d_mounted = 0;
        INIT_LIST_HEAD(&dentry->d_hash);
        INIT_LIST_HEAD(&dentry->d_lru);
        INIT_LIST_HEAD(&dentry->d_subdirs);
---- linux-2.4.18-18.8.0-l4/fs/namei.c~vfs_intent-2.4.18-18     Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/fs/namei.c     Sat Dec 14 06:37:21 2002
-@@ -1,3 +1,6 @@
-+
-+
-+
- /*
-  *  linux/fs/namei.c
-  *
-@@ -94,6 +97,14 @@
+--- linux-2.4.18-49chaos-lustre9/fs/namei.c~vfs_intent-2.4.18-18       Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/fs/namei.c       Wed Feb  5 16:23:06 2003
+@@ -94,6 +94,13 @@
   * XEmacs seems to be relying on it...
   */
  
 +
 +}
 +
-+
  /* In order to reduce some races, while at the same time doing additional
   * checking and hopefully speeding things up, we copy filenames to the
   * kernel data space before using them..
-@@ -260,10 +271,19 @@ void path_release(struct nameidata *nd)
+@@ -260,10 +267,19 @@ void path_release(struct nameidata *nd)
   * Internal lookup() using the new generic dcache.
   * SMP-safe
   */
@@ -72,7 +69,7 @@
        if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
                if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
                        dput(dentry);
-@@ -281,7 +301,8 @@ static struct dentry * cached_lookup(str
+@@ -281,11 +297,14 @@ static struct dentry * cached_lookup(str
   * make sure that nobody added the entry to the dcache in the meantime..
   * SMP-safe
   */
  {
        struct dentry * result;
        struct inode *dir = parent->d_inode;
-@@ -300,6 +321,9 @@ static struct dentry * real_lookup(struc
++again:
++
+       down(&dir->i_sem);
+       /*
+        * First re-do the cached lookup just in case it was created
+@@ -300,6 +319,9 @@ static struct dentry * real_lookup(struc
                result = ERR_PTR(-ENOMEM);
                if (dentry) {
                        lock_kernel();
@@ -92,7 +95,7 @@
                        result = dir->i_op->lookup(dir, dentry);
                        unlock_kernel();
                        if (result)
-@@ -321,6 +345,12 @@ static struct dentry * real_lookup(struc
+@@ -321,6 +343,12 @@ static struct dentry * real_lookup(struc
                        dput(result);
                        result = ERR_PTR(-ENOENT);
                }
 +              if (!result->d_op->d_revalidate2(result, flags, it) &&
 +                  !d_invalidate(result)) {
 +                      dput(result);
-+                      result = ERR_PTR(-ENOENT);
++                      goto again;
 +              }
        }
        return result;
  }
-@@ -334,7 +364,8 @@ int max_recursive_link = 5;
+@@ -334,7 +362,8 @@ int max_recursive_link = 5;
   * Without that kind of total limit, nasty chains of consecutive
   * symlinks can cause almost arbitrarily long lookups. 
   */
 -static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
-+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd, 
-+                                 struct lookup_intent *it)
++static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd,
++                               struct lookup_intent *it)
  {
        int err;
        if (current->link_count >= max_recursive_link)
-@@ -348,10 +379,14 @@ static inline int do_follow_link(struct 
+@@ -348,10 +377,14 @@ static inline int do_follow_link(struct 
        current->link_count++;
        current->total_link_count++;
        UPDATE_ATIME(dentry->d_inode);
 -      err = dentry->d_inode->i_op->follow_link(dentry, nd);
-+        if (dentry->d_inode->i_op->follow_link2)
-+                err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+        else 
-+                err = dentry->d_inode->i_op->follow_link(dentry, nd);
++      if (dentry->d_inode->i_op->follow_link2)
++              err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
++      else
++              err = dentry->d_inode->i_op->follow_link(dentry, nd);
        current->link_count--;
        return err;
  loop:
-+        intent_release(dentry, it);
++      intent_release(dentry, it);
        path_release(nd);
        return -ELOOP;
  }
-@@ -449,7 +484,8 @@ static inline void follow_dotdot(struct 
+@@ -449,7 +482,8 @@ static inline void follow_dotdot(struct 
   *
   * We expect 'base' to be positive and a directory.
   */
  {
        struct dentry *dentry;
        struct inode *inode;
-@@ -526,12 +562,12 @@ int link_path_walk(const char * name, st
+@@ -526,12 +560,12 @@ int link_path_walk(const char * name, st
                                break;
                }
                /* This does the actual lookups.. */
                        err = PTR_ERR(dentry);
                        if (IS_ERR(dentry))
                                break;
-@@ -548,8 +584,8 @@ int link_path_walk(const char * name, st
+@@ -548,8 +582,8 @@ int link_path_walk(const char * name, st
                if (!inode->i_op)
                        goto out_dput;
  
                        dput(dentry);
                        if (err)
                                goto return_err;
-@@ -565,7 +601,7 @@ int link_path_walk(const char * name, st
+@@ -565,7 +599,7 @@ int link_path_walk(const char * name, st
                        nd->dentry = dentry;
                }
                err = -ENOTDIR; 
                        break;
                continue;
                /* here ends the main loop */
-@@ -592,12 +628,12 @@ last_component:
+@@ -592,12 +626,12 @@ last_component:
                        if (err < 0)
                                break;
                }
                        err = PTR_ERR(dentry);
                        if (IS_ERR(dentry))
                                break;
-@@ -606,8 +642,10 @@ last_component:
+@@ -606,8 +640,9 @@ last_component:
                        ;
                inode = dentry->d_inode;
                if ((lookup_flags & LOOKUP_FOLLOW)
 -                  && inode && inode->i_op && inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
-+                  && inode && inode->i_op && 
-+                    (inode->i_op->follow_link || 
-+                     inode->i_op->follow_link2)) {
++                  && inode && inode->i_op &&
++                  (inode->i_op->follow_link || inode->i_op->follow_link2)) {
 +                      err = do_follow_link(dentry, nd, it);
                        dput(dentry);
                        if (err)
                                goto return_err;
-@@ -621,7 +659,8 @@ last_component:
+@@ -621,7 +656,8 @@ last_component:
                        goto no_inode;
                if (lookup_flags & LOOKUP_DIRECTORY) {
                        err = -ENOTDIR; 
 -                      if (!inode->i_op || !inode->i_op->lookup)
-+                      if (!inode->i_op || (!inode->i_op->lookup &&
-+                                           !inode->i_op->lookup2))
++                      if (!inode->i_op ||
++                          (!inode->i_op->lookup && !inode->i_op->lookup2))
                                break;
                }
                goto return_base;
-@@ -663,10 +702,21 @@ return_err:
+@@ -658,15 +694,28 @@ out_dput:
+               dput(dentry);
+               break;
+       }
++      if (err)
++              intent_release(nd->dentry, it);
+       path_release(nd);
+ return_err:
        return err;
  }
  
  }
  
  /* SMP-safe */
-@@ -751,6 +801,17 @@ walk_init_root(const char *name, struct 
+@@ -751,6 +800,17 @@ walk_init_root(const char *name, struct 
  }
  
  /* SMP-safe */
  int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
  {
        int error = 0;
-@@ -779,7 +840,8 @@ int path_init(const char *name, unsigned
+@@ -779,7 +839,8 @@ int path_init(const char *name, unsigned
   * needs parent already locked. Doesn't follow mounts.
   * SMP-safe.
   */
  {
        struct dentry * dentry;
        struct inode *inode;
-@@ -802,13 +864,16 @@ struct dentry * lookup_hash(struct qstr 
+@@ -802,13 +863,16 @@ struct dentry * lookup_hash(struct qstr 
                        goto out;
        }
  
                dentry = inode->i_op->lookup(inode, new);
                unlock_kernel();
                if (!dentry)
-@@ -820,6 +885,12 @@ out:
+@@ -820,6 +884,12 @@ out:
        return dentry;
  }
  
  /* SMP-safe */
  struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
  {
-@@ -841,7 +912,7 @@ struct dentry * lookup_one_len(const cha
+@@ -841,7 +911,7 @@ struct dentry * lookup_one_len(const cha
        }
        this.hash = end_name_hash(hash);
  
  access:
        return ERR_PTR(-EACCES);
  }
-@@ -872,6 +943,23 @@ int __user_walk(const char *name, unsign
+@@ -872,6 +942,23 @@ int __user_walk(const char *name, unsign
        return err;
  }
  
  /*
   * It's inline, so penalty for filesystems that don't use sticky bit is
   * minimal.
-@@ -1045,14 +1133,17 @@ int may_open(struct nameidata *nd, int a
+@@ -1045,14 +1132,17 @@ int may_open(struct nameidata *nd, int a
          return get_lease(inode, flag);
  }
  
 +extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
-+                          int flags, struct lookup_intent *it);
++                                 int flags, struct lookup_intent *it);
 +
  struct file *filp_open(const char * pathname, int open_flags, int mode)
  {
        struct dentry *dir;
        int flag = open_flags;
        struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_OPEN };
++      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = open_flags };
        int count = 0;
  
        if ((flag+1) & O_ACCMODE)
-@@ -1066,7 +1157,7 @@ struct file *filp_open(const char * path
+@@ -1066,7 +1156,7 @@ struct file *filp_open(const char * path
         * The simplest case - just a plain lookup.
         */
        if (!(flag & O_CREAT)) {
                if (error)
                        return ERR_PTR(error);
                dentry = nd.dentry;
-@@ -1076,6 +1167,8 @@ struct file *filp_open(const char * path
+@@ -1076,6 +1166,8 @@ struct file *filp_open(const char * path
        /*
         * Create - we need to know the parent.
         */
-+        it.it_mode = mode;
-+        it.it_op |= IT_CREAT;
++      it.it_mode = mode;
++      it.it_op |= IT_CREAT;
        error = path_lookup(pathname, LOOKUP_PARENT, &nd);
        if (error)
                return ERR_PTR(error);
-@@ -1091,7 +1184,7 @@ struct file *filp_open(const char * path
+@@ -1091,7 +1183,7 @@ struct file *filp_open(const char * path
  
        dir = nd.dentry;
        down(&dir->d_inode->i_sem);
  
  do_last:
        error = PTR_ERR(dentry);
-@@ -1100,6 +1193,7 @@ do_last:
+@@ -1100,6 +1192,7 @@ do_last:
                goto exit;
        }
  
        /* Negative dentry, just create the file */
        if (!dentry->d_inode) {
                error = vfs_create(dir->d_inode, dentry,
-@@ -1134,7 +1228,8 @@ do_last:
+@@ -1134,7 +1227,8 @@ do_last:
        error = -ENOENT;
        if (!dentry->d_inode)
                goto exit_dput;
 -      if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
-+      if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link || 
-+                                      dentry->d_inode->i_op->follow_link2))
++      if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
++                                    dentry->d_inode->i_op->follow_link2))
                goto do_link;
  
        dput(nd.dentry);
-@@ -1149,11 +1244,13 @@ ok:
+@@ -1149,11 +1243,13 @@ ok:
        if (!S_ISREG(nd.dentry->d_inode->i_mode))
                open_flags &= ~O_TRUNC;
  
 -        return dentry_open(nd.dentry, nd.mnt, open_flags);
-+        return dentry_open_it(nd.dentry, nd.mnt, open_flags, &it);
++      return dentry_open_it(nd.dentry, nd.mnt, open_flags, &it);
  
  exit_dput:
-+        intent_release(dentry, &it);
++      intent_release(dentry, &it);
        dput(dentry);
  exit:
-+        intent_release(nd.dentry, &it);
++      intent_release(nd.dentry, &it);
        path_release(&nd);
        return ERR_PTR(error);
  
-@@ -1172,7 +1269,12 @@ do_link:
+@@ -1172,7 +1268,12 @@ do_link:
         * are done. Procfs-like symlinks just set LAST_BIND.
         */
        UPDATE_ATIME(dentry->d_inode);
 -      error = dentry->d_inode->i_op->follow_link(dentry, &nd);
-+        if (dentry->d_inode->i_op->follow_link2) 
-+                error = dentry->d_inode->i_op->follow_link2(dentry, &nd, &it);
-+        else 
-+                error = dentry->d_inode->i_op->follow_link(dentry, &nd);
++      if (dentry->d_inode->i_op->follow_link2)
++              error = dentry->d_inode->i_op->follow_link2(dentry, &nd, &it);
++      else
++              error = dentry->d_inode->i_op->follow_link(dentry, &nd);
 +      if (error)
 +              intent_release(dentry, &it);
        dput(dentry);
        if (error)
                return error;
-@@ -1194,13 +1296,15 @@ do_link:
+@@ -1194,13 +1295,15 @@ do_link:
        }
        dir = nd.dentry;
        down(&dir->d_inode->i_sem);
  {
        struct dentry *dentry;
  
-@@ -1208,7 +1312,7 @@ static struct dentry *lookup_create(stru
+@@ -1208,7 +1311,7 @@ static struct dentry *lookup_create(stru
        dentry = ERR_PTR(-EEXIST);
        if (nd->last_type != LAST_NORM)
                goto fail;
        if (IS_ERR(dentry))
                goto fail;
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1254,6 +1358,7 @@ asmlinkage long sys_mknod(const char * f
-       char * tmp;
-       struct dentry * dentry;
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_MKNOD, .it_mode = mode };
-       if (S_ISDIR(mode))
-               return -EPERM;
-@@ -1264,7 +1369,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1264,7 +1367,19 @@ asmlinkage long sys_mknod(const char * f
        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
        if (error)
                goto out;
 -      dentry = lookup_create(&nd, 0);
-+      dentry = lookup_create(&nd, 0, &it);
++
++      if (nd.dentry->d_inode->i_op->mknod2) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->mknod2(nd.dentry->d_inode,
++                                 nd.last.name,
++                                 nd.last.len,
++                                 mode, dev);
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto out2;
++      }
++
++      dentry = lookup_create(&nd, 0, NULL);
        error = PTR_ERR(dentry);
  
        mode &= ~current->fs->umask;
-@@ -1282,6 +1387,7 @@ asmlinkage long sys_mknod(const char * f
-               default:
-                       error = -EINVAL;
-               }
-+              intent_release(dentry, &it);
+@@ -1285,6 +1400,7 @@ asmlinkage long sys_mknod(const char * f
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
-@@ -1322,6 +1428,7 @@ asmlinkage long sys_mkdir(const char * p
- {
-       int error = 0;
-       char * tmp;
-+      struct lookup_intent it = { .it_op = IT_MKDIR, .it_mode = mode };
-       tmp = getname(pathname);
-       error = PTR_ERR(tmp);
-@@ -1332,11 +1439,12 @@ asmlinkage long sys_mkdir(const char * p
++out2:
+       path_release(&nd);
+ out:
+       putname(tmp);
+@@ -1332,7 +1448,17 @@ asmlinkage long sys_mkdir(const char * p
                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 1);
-+              dentry = lookup_create(&nd, 1, &it);
++              if (nd.dentry->d_inode->i_op->mkdir2) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->mkdir2(nd.dentry->d_inode,
++                                         nd.last.name,
++                                         nd.last.len,
++                                         mode);
++                      /* the file system want to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
++              dentry = lookup_create(&nd, 1, NULL);
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
                        error = vfs_mkdir(nd.dentry->d_inode, dentry,
-                                         mode & ~current->fs->umask);
-+                      intent_release(dentry, &it);
+@@ -1340,6 +1466,7 @@ asmlinkage long sys_mkdir(const char * p
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
-@@ -1420,6 +1528,7 @@ asmlinkage long sys_rmdir(const char * p
-       char * name;
-       struct dentry *dentry;
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_RMDIR };
-       name = getname(pathname);
-       if(IS_ERR(name))
-@@ -1441,10 +1550,11 @@ asmlinkage long sys_rmdir(const char * p
++out2:
+               path_release(&nd);
+ out:
+               putname(tmp);
+@@ -1440,8 +1567,17 @@ asmlinkage long sys_rmdir(const char * p
+                       error = -EBUSY;
                        goto exit1;
        }
++      if (nd.dentry->d_inode->i_op->rmdir2) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->rmdir2(nd.dentry->d_inode,
++                                 nd.last.name,
++                                 nd.last.len);
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
-+      dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
++      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                error = vfs_rmdir(nd.dentry->d_inode, dentry);
-+              intent_release(dentry, &it);
-               dput(dentry);
-       }
-       up(&nd.dentry->d_inode->i_sem);
-@@ -1488,6 +1598,7 @@ asmlinkage long sys_unlink(const char * 
-       char * name;
-       struct dentry *dentry;
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_UNLINK };
-       name = getname(pathname);
-       if(IS_ERR(name))
-@@ -1500,7 +1611,7 @@ asmlinkage long sys_unlink(const char * 
+@@ -1499,8 +1635,17 @@ asmlinkage long sys_unlink(const char * 
+       error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
++      if (nd.dentry->d_inode->i_op->unlink2) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink2(nd.dentry->d_inode,
++                                  nd.last.name,
++                                  nd.last.len);
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
-+      dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
++      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
-@@ -1508,6 +1619,7 @@ asmlinkage long sys_unlink(const char * 
-                       goto slashes;
-               error = vfs_unlink(nd.dentry->d_inode, dentry);
-       exit2:
-+              intent_release(dentry, &it);
-               dput(dentry);
-       }
-       up(&nd.dentry->d_inode->i_sem);
-@@ -1554,6 +1666,7 @@ asmlinkage long sys_symlink(const char *
-       int error = 0;
-       char * from;
-       char * to;
-+      struct lookup_intent it = { .it_op = IT_SYMLINK };
-       from = getname(oldname);
-       if(IS_ERR(from))
-@@ -1567,10 +1680,12 @@ asmlinkage long sys_symlink(const char *
+@@ -1567,15 +1712,26 @@ asmlinkage long sys_symlink(const char *
                error = path_lookup(to, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 0);
-+              it.it_data = from;
-+              dentry = lookup_create(&nd, 0, &it);
++              if (nd.dentry->d_inode->i_op->symlink2) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->symlink2(nd.dentry->d_inode,
++                                           nd.last.name,
++                                           nd.last.len,
++                                           from);
++                      /* the file system want to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
++              dentry = lookup_create(&nd, 0, NULL);
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
                        error = vfs_symlink(nd.dentry->d_inode, dentry, from);
-+                      intent_release(dentry, &it);
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
-@@ -1635,6 +1750,7 @@ asmlinkage long sys_link(const char * ol
- {
-       int error;
-       char * to;
-+      struct lookup_intent it = { .it_op = IT_LINK };
-       to = getname(newname);
-       error = PTR_ERR(to);
-@@ -1642,7 +1758,7 @@ asmlinkage long sys_link(const char * ol
++      out2:
+               path_release(&nd);
+-out:
++      out:
+               putname(to);
+       }
+       putname(from);
+@@ -1642,7 +1798,7 @@ asmlinkage long sys_link(const char * ol
                struct dentry *new_dentry;
                struct nameidata nd, old_nd;
  
 -              error = __user_walk(oldname, LOOKUP_POSITIVE, &old_nd);
-+              error = __user_walk_it(oldname, LOOKUP_POSITIVE, &old_nd, &it);
++              error = __user_walk_it(oldname, LOOKUP_POSITIVE, &old_nd, NULL);
                if (error)
                        goto exit;
                error = path_lookup(to, LOOKUP_PARENT, &nd);
-@@ -1651,10 +1767,12 @@ asmlinkage long sys_link(const char * ol
+@@ -1651,7 +1807,17 @@ asmlinkage long sys_link(const char * ol
                error = -EXDEV;
                if (old_nd.mnt != nd.mnt)
                        goto out_release;
 -              new_dentry = lookup_create(&nd, 0);
-+              it.it_op = IT_LINK2;
-+              new_dentry = lookup_create(&nd, 0, &it);
++              if (nd.dentry->d_inode->i_op->link2) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->link2(old_nd.dentry->d_inode,
++                                        nd.dentry->d_inode,
++                                        nd.last.name,
++                                        nd.last.len);
++                      /* the file system want to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out_release;
++              }
++              new_dentry = lookup_create(&nd, 0, NULL);
                error = PTR_ERR(new_dentry);
                if (!IS_ERR(new_dentry)) {
                        error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-+                      intent_release(new_dentry, &it);
-                       dput(new_dentry);
-               }
-               up(&nd.dentry->d_inode->i_sem);
-@@ -1695,7 +1813,8 @@ exit:
+@@ -1695,7 +1861,8 @@ exit:
   *       locking].
   */
  int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
  {
        int error;
        struct inode *target;
-@@ -1753,6 +1872,7 @@ int vfs_rename_dir(struct inode *old_dir
+@@ -1753,6 +1920,7 @@ int vfs_rename_dir(struct inode *old_dir
                error = -EBUSY;
        else 
                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
        if (target) {
                if (!error)
                        target->i_flags |= S_DEAD;
-@@ -1774,7 +1894,8 @@ out_unlock:
+@@ -1774,7 +1942,8 @@ out_unlock:
  }
  
  int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
  {
        int error;
  
-@@ -1805,6 +1926,7 @@ int vfs_rename_other(struct inode *old_d
+@@ -1805,6 +1974,7 @@ int vfs_rename_other(struct inode *old_d
                error = -EBUSY;
        else
                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
        double_up(&old_dir->i_zombie, &new_dir->i_zombie);
        if (error)
                return error;
-@@ -1816,13 +1938,14 @@ int vfs_rename_other(struct inode *old_d
+@@ -1816,13 +1986,14 @@ int vfs_rename_other(struct inode *old_d
  }
  
  int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (!error) {
                if (old_dir == new_dir)
                        inode_dir_notify(old_dir, DN_RENAME);
-@@ -1839,6 +1962,7 @@ static inline int do_rename(const char *
-       int error = 0;
-       struct dentry * old_dir, * new_dir;
-       struct dentry * old_dentry, *new_dentry;
-+      struct lookup_intent it = { .it_op = IT_RENAME };
-       struct nameidata oldnd, newnd;
-       error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
-@@ -1864,7 +1988,7 @@ static inline int do_rename(const char *
+@@ -1862,9 +2033,23 @@ static inline int do_rename(const char *
+       if (newnd.last_type != LAST_NORM)
+               goto exit2;
++      if (old_dir->d_inode->i_op->rename2) {
++              lock_kernel();
++              error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
++                                                      new_dir->d_inode,
++                                                      oldnd.last.name,
++                                                      oldnd.last.len,
++                                                      newnd.last.name,
++                                                      newnd.last.len);
++              unlock_kernel();
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
++
        double_lock(new_dir, old_dir);
  
 -      old_dentry = lookup_hash(&oldnd.last, old_dir);
-+      old_dentry = lookup_hash_it(&oldnd.last, old_dir, &it);
++      old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL);
        error = PTR_ERR(old_dentry);
        if (IS_ERR(old_dentry))
                goto exit3;
-@@ -1880,18 +2004,21 @@ static inline int do_rename(const char *
+@@ -1880,14 +2065,14 @@ static inline int do_rename(const char *
                if (newnd.last.name[newnd.last.len])
                        goto exit4;
        }
 -      new_dentry = lookup_hash(&newnd.last, new_dir);
-+      it.it_op = IT_RENAME2;
-+      new_dentry = lookup_hash_it(&newnd.last, new_dir, &it);
++      new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL);
        error = PTR_ERR(new_dentry);
        if (IS_ERR(new_dentry))
                goto exit4;
        lock_kernel();
        error = vfs_rename(old_dir->d_inode, old_dentry,
 -                                 new_dir->d_inode, new_dentry);
-+                                 new_dir->d_inode, new_dentry, &it);
++                                 new_dir->d_inode, new_dentry, NULL);
        unlock_kernel();
  
-+      intent_release(new_dentry, &it);
        dput(new_dentry);
- exit4:
-+      intent_release(old_dentry, &it);
-       dput(old_dentry);
- exit3:
-       double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
-@@ -1940,7 +2067,8 @@ out:
+@@ -1940,7 +2125,8 @@ out:
  }
  
  static inline int
 -__vfs_follow_link(struct nameidata *nd, const char *link)
-+__vfs_follow_link(struct nameidata *nd, const char *link, 
-+                     struct lookup_intent *it)
++__vfs_follow_link(struct nameidata *nd, const char *link,
++                struct lookup_intent *it)
  {
        int res = 0;
        char *name;
-@@ -1953,7 +2081,7 @@ __vfs_follow_link(struct nameidata *nd, 
+@@ -1953,7 +2139,7 @@ __vfs_follow_link(struct nameidata *nd, 
                        /* weird __emul_prefix() stuff did it */
                        goto out;
        }
  out:
        if (current->link_count || res || nd->last_type!=LAST_NORM)
                return res;
-@@ -1975,7 +2103,13 @@ fail:
+@@ -1975,7 +2161,13 @@ fail:
  
  int vfs_follow_link(struct nameidata *nd, const char *link)
  {
 +      return __vfs_follow_link(nd, link, NULL);
 +}
 +
-+int vfs_follow_link_it(struct nameidata *nd, const char *link, 
-+                       struct lookup_intent *it)
++int vfs_follow_link_it(struct nameidata *nd, const char *link,
++                     struct lookup_intent *it)
 +{
 +      return __vfs_follow_link(nd, link, it);
  }
  
  /* get the link contents into pagecache */
-@@ -2017,7 +2151,7 @@ int page_follow_link(struct dentry *dent
+@@ -2017,7 +2209,7 @@ int page_follow_link(struct dentry *dent
  {
        struct page *page = NULL;
        char *s = page_getlink(dentry, &page);
        if (page) {
                kunmap(page);
                page_cache_release(page);
---- linux-2.4.18-18.8.0-l4/fs/nfsd/vfs.c~vfs_intent-2.4.18-18  Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/fs/nfsd/vfs.c  Sat Dec 14 06:31:22 2002
+--- linux-2.4.18-49chaos-lustre9/fs/nfsd/vfs.c~vfs_intent-2.4.18-18    Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/fs/nfsd/vfs.c    Wed Jan 29 12:43:32 2003
 @@ -1298,7 +1298,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
                        err = nfserr_perm;
        } else
        unlock_kernel();
        if (!err && EX_ISSYNC(tfhp->fh_export)) {
                nfsd_sync_dir(tdentry);
---- linux-2.4.18-18.8.0-l4/fs/open.c~vfs_intent-2.4.18-18      Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/fs/open.c      Sat Dec 14 06:31:22 2002
+--- linux-2.4.18-49chaos-lustre9/fs/open.c~vfs_intent-2.4.18-18        Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/fs/open.c        Wed Jan 29 12:43:32 2003
 @@ -19,6 +19,9 @@
  #include <asm/uaccess.h>
  
        struct nameidata nd;
        struct inode * inode;
        int error;
-+      struct lookup_intent it = { .it_op = IT_SETATTR };
++      struct lookup_intent it = { .it_op = IT_TRUNC };
  
        error = -EINVAL;
        if (length < 0) /* sorry, but loff_t says... */
  /*
   * Find an empty file descriptor entry, and mark it busy.
   */
---- linux-2.4.18-18.8.0-l4/fs/stat.c~vfs_intent-2.4.18-18      Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/fs/stat.c      Sat Dec 14 06:31:22 2002
+--- linux-2.4.18-49chaos-lustre9/fs/stat.c~vfs_intent-2.4.18-18        Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/fs/stat.c        Wed Jan 29 12:43:32 2003
 @@ -13,6 +13,7 @@
  
  #include <asm/uaccess.h>
                path_release(&nd);
        }
        return error;
---- linux-2.4.18-18.8.0-l4/include/linux/dcache.h~vfs_intent-2.4.18-18 Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/include/linux/dcache.h Sat Dec 14 06:31:22 2002
-@@ -6,6 +6,34 @@
+--- linux-2.4.18-49chaos-lustre9/include/linux/dcache.h~vfs_intent-2.4.18-18   Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/include/linux/dcache.h   Wed Jan 29 12:43:32 2003
+@@ -6,6 +6,27 @@
  #include <asm/atomic.h>
  #include <linux/mount.h>
  
-+#define IT_OPEN  (1)
-+#define IT_CREAT  (1<<1)
-+#define IT_MKDIR  (1<<2)
-+#define IT_LINK  (1<<3)
-+#define IT_LINK2  (1<<4)
-+#define IT_SYMLINK  (1<<5)
-+#define IT_UNLINK  (1<<6)
-+#define IT_RMDIR  (1<<7)
-+#define IT_RENAME  (1<<8)
-+#define IT_RENAME2  (1<<9)
-+#define IT_READDIR  (1<<10)
-+#define IT_GETATTR  (1<<11)
-+#define IT_SETATTR  (1<<12)
-+#define IT_READLINK  (1<<13)
-+#define IT_MKNOD  (1<<14)
-+#define IT_LOOKUP  (1<<15)
++#define IT_OPEN     (1)
++#define IT_CREAT    (1<<1)
++#define IT_READDIR  (1<<2)
++#define IT_GETATTR  (1<<3)
++#define IT_SETATTR  (1<<4)
++#define IT_TRUNC    (1<<5)
++#define IT_READLINK (1<<6)
++#define IT_LOOKUP   (1<<7)
 +
 +struct lookup_intent {
 +      int it_op;
 +      int it_mode;
++      int it_flags;
 +      int it_disposition;
 +      int it_status;
 +      struct iattr *it_iattr;
  /*
   * linux/include/linux/dcache.h
   *
-@@ -78,6 +106,7 @@ struct dentry {
+@@ -78,6 +99,7 @@ struct dentry {
        unsigned long d_time;           /* used by d_revalidate */
        struct dentry_operations  *d_op;
        struct super_block * d_sb;      /* The root of the dentry tree */
        unsigned long d_vfs_flags;
        void * d_fsdata;                /* fs-specific data */
        void * d_extra_attributes;      /* TUX-specific data */
-@@ -91,6 +120,8 @@ struct dentry_operations {
+@@ -91,6 +113,8 @@ struct dentry_operations {
        int (*d_delete)(struct dentry *);
        void (*d_release)(struct dentry *);
        void (*d_iput)(struct dentry *, struct inode *);
  };
  
  /* the dentry parameter passed to d_hash and d_compare is the parent
---- linux-2.4.18-18.8.0-l4/include/linux/fs.h~vfs_intent-2.4.18-18     Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/include/linux/fs.h     Sat Dec 14 06:33:11 2002
+@@ -124,6 +148,7 @@ d_iput:            no              no              yes
+                                        * s_nfsd_free_path semaphore will be down
+                                        */
+ #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
++#define DCACHE_LUSTRE_INVALID 0x0010  /* Lustre invalidated */
+ extern spinlock_t dcache_lock;
+--- linux-2.4.18-49chaos-lustre9/include/linux/fs.h~vfs_intent-2.4.18-18       Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/include/linux/fs.h       Wed Jan 29 12:43:32 2003
 @@ -576,6 +576,7 @@ struct file {
  
        /* needed for tty driver, and maybe others */
  
  /*
   * File types
-@@ -897,6 +900,7 @@ struct file_operations {
+@@ -897,16 +900,28 @@ struct file_operations {
  struct inode_operations {
        int (*create) (struct inode *,struct dentry *,int);
        struct dentry * (*lookup) (struct inode *,struct dentry *);
 +      struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
++      int (*link2) (struct inode *,struct inode *, const char *, int);
        int (*unlink) (struct inode *,struct dentry *);
++      int (*unlink2) (struct inode *, const char *, int);
        int (*symlink) (struct inode *,struct dentry *,const char *);
-@@ -907,6 +911,8 @@ struct inode_operations {
++      int (*symlink2) (struct inode *, const char *, int, const char *);
+       int (*mkdir) (struct inode *,struct dentry *,int);
++      int (*mkdir2) (struct inode *, const char *, int,int);
+       int (*rmdir) (struct inode *,struct dentry *);
++      int (*rmdir2) (struct inode *, const char *, int);
+       int (*mknod) (struct inode *,struct dentry *,int,int);
++      int (*mknod2) (struct inode *, const char *, int,int,int);
+       int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
++      int (*rename2) (struct inode *, struct inode *,
++                      const char *oldname, int oldlen,
++                      const char *newname, int newlen);
        int (*readlink) (struct dentry *, char *,int);
        int (*follow_link) (struct dentry *, struct nameidata *);
-+      int (*follow_link2) (struct dentry *, struct nameidata *, 
-+                            struct lookup_intent *it);
++      int (*follow_link2) (struct dentry *, struct nameidata *,
++                           struct lookup_intent *it);
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int);
        int (*revalidate) (struct dentry *);
-@@ -1381,6 +1387,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1383,6 +1398,7 @@ typedef int (*read_actor_t)(read_descrip
  extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
  
  extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_walk(const char *, struct nameidata *));
  extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1392,6 +1399,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1394,6 +1410,8 @@ extern struct dentry * lookup_one_len(co
  extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
  #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
  #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
  
  extern void inode_init_once(struct inode *);
  extern void iput(struct inode *);
-@@ -1492,6 +1501,8 @@ extern struct file_operations generic_ro
+@@ -1494,6 +1512,8 @@ extern struct file_operations generic_ro
  
  extern int vfs_readlink(struct dentry *, char *, int, const char *);
  extern int vfs_follow_link(struct nameidata *, const char *);
-+extern int vfs_follow_link_it(struct nameidata *, const char *, 
-+                              struct lookup_intent *it);
++extern int vfs_follow_link_it(struct nameidata *, const char *,
++                            struct lookup_intent *it);
  extern int page_readlink(struct dentry *, char *, int);
  extern int page_follow_link(struct dentry *, struct nameidata *);
  extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.18-18.8.0-l4/kernel/ksyms.c~vfs_intent-2.4.18-18 Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/kernel/ksyms.c Sat Dec 14 06:31:22 2002
-@@ -293,6 +293,7 @@ EXPORT_SYMBOL(read_cache_page);
+--- linux-2.4.18-49chaos-lustre9/kernel/ksyms.c~vfs_intent-2.4.18-18   Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/kernel/ksyms.c   Wed Jan 29 12:43:32 2003
+@@ -294,6 +294,7 @@ EXPORT_SYMBOL(read_cache_page);
  EXPORT_SYMBOL(set_page_dirty);
  EXPORT_SYMBOL(vfs_readlink);
  EXPORT_SYMBOL(vfs_follow_link);
index 54c498a..75e404b 100644 (file)
@@ -1,20 +1,30 @@
+ fs/dcache.c            |    8 +
+ fs/namei.c             |  287 ++++++++++++++++++++++++++++++++++++++++---------
+ fs/nfsd/vfs.c          |    2 
+ fs/open.c              |   53 +++++++--
+ fs/stat.c              |    9 +
+ include/linux/dcache.h |   25 ++++
+ include/linux/fs.h     |   22 +++
+ kernel/ksyms.c         |    1 
+ 8 files changed, 344 insertions(+), 63 deletions(-)
 
-
-
- 0 files changed
-
---- linux-2.4.18-17.8.0/fs/dcache.c~vfs_intent 2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/dcache.c        2002-12-06 14:52:31.000000000 -0800
-@@ -150,6 +150,8 @@ repeat:
- unhash_it:
-       list_del_init(&dentry->d_hash);
+--- linux-2.4.18-18.8.0-l7/fs/dcache.c~vfs_intent-2.4.18-18    Mon Jan 20 08:28:00 2003
++++ linux-2.4.18-18.8.0-l7-root/fs/dcache.c    Mon Jan 20 08:54:54 2003
+@@ -186,6 +188,13 @@ int d_invalidate(struct dentry * dentry)
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
 +
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { 
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
 +
- kill_it: {
-               struct dentry *parent;
-               list_del(&dentry->d_child);
-@@ -645,6 +647,7 @@ struct dentry * d_alloc(struct dentry * 
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -645,6 +654,7 @@ struct dentry * d_alloc(struct dentry * 
        dentry->d_fsdata = NULL;
        dentry->d_extra_attributes = NULL;
        dentry->d_mounted = 0;
        INIT_LIST_HEAD(&dentry->d_hash);
        INIT_LIST_HEAD(&dentry->d_lru);
        INIT_LIST_HEAD(&dentry->d_subdirs);
---- linux-2.4.18-17.8.0/fs/namei.c~vfs_intent  2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/namei.c 2002-12-06 14:52:31.000000000 -0800
-@@ -1,3 +1,6 @@
-+
-+
-+
- /*
-  *  linux/fs/namei.c
-  *
-@@ -94,6 +97,14 @@
+--- linux-2.4.18-18.8.0-l7/fs/namei.c~vfs_intent-2.4.18-18     Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/fs/namei.c     Wed Jan 22 22:53:28 2003
+@@ -94,6 +97,13 @@
   * XEmacs seems to be relying on it...
   */
  
@@ -42,7 +45,6 @@
 +
 +}
 +
-+
  /* In order to reduce some races, while at the same time doing additional
   * checking and hopefully speeding things up, we copy filenames to the
   * kernel data space before using them..
   */
 -static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
 +static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd, 
-+                                 struct lookup_intent *it)
++                               struct lookup_intent *it)
  {
        int err;
        if (current->link_count >= max_recursive_link)
        current->total_link_count++;
        UPDATE_ATIME(dentry->d_inode);
 -      err = dentry->d_inode->i_op->follow_link(dentry, nd);
-+        if (dentry->d_inode->i_op->follow_link2)
-+                err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+        else 
-+                err = dentry->d_inode->i_op->follow_link(dentry, nd);
++      if (dentry->d_inode->i_op->follow_link2)
++              err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
++      else 
++              err = dentry->d_inode->i_op->follow_link(dentry, nd);
        current->link_count--;
        return err;
  loop:
-+        intent_release(dentry, it);
++      intent_release(dentry, it);
        path_release(nd);
        return -ELOOP;
  }
                        err = PTR_ERR(dentry);
                        if (IS_ERR(dentry))
                                break;
-@@ -606,8 +642,10 @@ last_component:
+@@ -606,8 +642,9 @@ last_component:
                        ;
                inode = dentry->d_inode;
                if ((lookup_flags & LOOKUP_FOLLOW)
 -                  && inode && inode->i_op && inode->i_op->follow_link) {
 -                      err = do_follow_link(dentry, nd);
 +                  && inode && inode->i_op && 
-+                    (inode->i_op->follow_link || 
-+                     inode->i_op->follow_link2)) {
++                  (inode->i_op->follow_link || inode->i_op->follow_link2)) {
 +                      err = do_follow_link(dentry, nd, it);
                        dput(dentry);
                        if (err)
                                break;
                }
                goto return_base;
-@@ -663,10 +702,21 @@ return_err:
+@@ -658,15 +697,28 @@ out_dput:
+               dput(dentry);
+               break;
+       }
++      if (err)
++              intent_release(nd->dentry, it);
+       path_release(nd);
+ return_err:
        return err;
  }
  
  }
  
  /* SMP-safe */
-@@ -751,6 +801,17 @@ walk_init_root(const char *name, struct 
+@@ -751,6 +803,17 @@ walk_init_root(const char *name, struct 
  }
  
  /* SMP-safe */
  int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
  {
        int error = 0;
-@@ -779,7 +840,8 @@ int path_init(const char *name, unsigned
+@@ -779,7 +842,8 @@ int path_init(const char *name, unsigned
   * needs parent already locked. Doesn't follow mounts.
   * SMP-safe.
   */
  {
        struct dentry * dentry;
        struct inode *inode;
-@@ -802,13 +864,16 @@ struct dentry * lookup_hash(struct qstr 
+@@ -802,13 +866,16 @@ struct dentry * lookup_hash(struct qstr 
                        goto out;
        }
  
                dentry = inode->i_op->lookup(inode, new);
                unlock_kernel();
                if (!dentry)
-@@ -820,6 +885,12 @@ out:
+@@ -820,6 +887,12 @@ out:
        return dentry;
  }
  
  /* SMP-safe */
  struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
  {
-@@ -841,7 +912,7 @@ struct dentry * lookup_one_len(const cha
+@@ -841,7 +914,7 @@ struct dentry * lookup_one_len(const cha
        }
        this.hash = end_name_hash(hash);
  
  access:
        return ERR_PTR(-EACCES);
  }
-@@ -872,6 +943,23 @@ int __user_walk(const char *name, unsign
+@@ -872,6 +945,23 @@ int __user_walk(const char *name, unsign
        return err;
  }
  
  /*
   * It's inline, so penalty for filesystems that don't use sticky bit is
   * minimal.
-@@ -1010,7 +1098,8 @@ exit_lock:
-  * for symlinks (where the permissions are checked later).
-  * SMP-safe
-  */
--int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
-+int open_namei_it(const char *pathname, int flag, int mode,
-+                struct nameidata *nd, struct lookup_intent *it)
+@@ -1045,14 +1135,17 @@ int may_open(struct nameidata *nd, int a
+         return get_lease(inode, flag);
+ }
++extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++                          int flags, struct lookup_intent *it);
++
+ struct file *filp_open(const char * pathname, int open_flags, int mode)
  {
        int acc_mode, error = 0;
-       struct inode *inode;
-@@ -1024,7 +1113,7 @@ int open_namei(const char * pathname, in
+-      struct inode *inode;
+       struct dentry *dentry;
+       struct dentry *dir;
+       int flag = open_flags;
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = open_flags };
+       int count = 0;
+       if ((flag+1) & O_ACCMODE)
+@@ -1066,7 +1159,7 @@ struct file *filp_open(const char * path
         * The simplest case - just a plain lookup.
         */
        if (!(flag & O_CREAT)) {
--              error = path_lookup(pathname, lookup_flags(flag), nd);
-+              error = path_lookup_it(pathname, lookup_flags(flag), nd, it);
+-              error = path_lookup(pathname, lookup_flags(flag), &nd);
++              error = path_lookup_it(pathname, lookup_flags(flag), &nd, &it);
                if (error)
-                       return error;
-               dentry = nd->dentry;
-@@ -1034,6 +1123,10 @@ int open_namei(const char * pathname, in
+                       return ERR_PTR(error);
+               dentry = nd.dentry;
+@@ -1076,6 +1169,8 @@ struct file *filp_open(const char * path
        /*
         * Create - we need to know the parent.
         */
-+      if (it) {
-+              it->it_mode = mode;
-+              it->it_op |= IT_CREAT;
-+      }
-       error = path_lookup(pathname, LOOKUP_PARENT, nd);
++      it.it_mode = mode;
++      it.it_op |= IT_CREAT;
+       error = path_lookup(pathname, LOOKUP_PARENT, &nd);
        if (error)
-               return error;
-@@ -1049,7 +1142,7 @@ int open_namei(const char * pathname, in
+               return ERR_PTR(error);
+@@ -1091,7 +1186,7 @@ struct file *filp_open(const char * path
  
-       dir = nd->dentry;
+       dir = nd.dentry;
        down(&dir->d_inode->i_sem);
--      dentry = lookup_hash(&nd->last, nd->dentry);
-+      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+-      dentry = lookup_hash(&nd.last, nd.dentry);
++      dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
  
  do_last:
        error = PTR_ERR(dentry);
-@@ -1058,6 +1151,7 @@ do_last:
+@@ -1100,6 +1195,7 @@ do_last:
                goto exit;
        }
  
-+      it->it_mode = mode;
++      it.it_mode = mode;
        /* Negative dentry, just create the file */
        if (!dentry->d_inode) {
                error = vfs_create(dir->d_inode, dentry,
-@@ -1091,7 +1185,8 @@ do_last:
+@@ -1134,7 +1230,8 @@ do_last:
        error = -ENOENT;
        if (!dentry->d_inode)
                goto exit_dput;
 -      if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
 +      if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link || 
-+                                      dentry->d_inode->i_op->follow_link2))
++                                    dentry->d_inode->i_op->follow_link2))
                goto do_link;
  
-       dput(nd->dentry);
-@@ -1177,8 +1272,10 @@ ok:
-       return 0;
+       dput(nd.dentry);
+@@ -1149,11 +1246,13 @@ ok:
+       if (!S_ISREG(nd.dentry->d_inode->i_mode))
+               open_flags &= ~O_TRUNC;
+-        return dentry_open(nd.dentry, nd.mnt, open_flags);
++      return dentry_open_it(nd.dentry, nd.mnt, open_flags, &it);
  
  exit_dput:
-+      intent_release(dentry, it);
++      intent_release(dentry, &it);
        dput(dentry);
  exit:
-+      intent_release(nd->dentry, it);
-       path_release(nd);
-       return error;
++      intent_release(nd.dentry, &it);
+       path_release(&nd);
+       return ERR_PTR(error);
  
-@@ -1197,7 +1294,12 @@ do_link:
+@@ -1172,7 +1271,12 @@ do_link:
         * are done. Procfs-like symlinks just set LAST_BIND.
         */
        UPDATE_ATIME(dentry->d_inode);
--      error = dentry->d_inode->i_op->follow_link(dentry, nd);
-+        if (dentry->d_inode->i_op->follow_link2) 
-+                error = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+        else 
-+                error = dentry->d_inode->i_op->follow_link(dentry, nd);
+-      error = dentry->d_inode->i_op->follow_link(dentry, &nd);
++      if (dentry->d_inode->i_op->follow_link2) 
++              error = dentry->d_inode->i_op->follow_link2(dentry, &nd, &it);
++      else 
++              error = dentry->d_inode->i_op->follow_link(dentry, &nd);
 +      if (error)
-+              intent_release(dentry, it);
++              intent_release(dentry, &it);
        dput(dentry);
        if (error)
                return error;
-@@ -1219,13 +1321,20 @@ do_link:
+@@ -1194,13 +1298,15 @@ do_link:
        }
-       dir = nd->dentry;
+       dir = nd.dentry;
        down(&dir->d_inode->i_sem);
--      dentry = lookup_hash(&nd->last, nd->dentry);
-+      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
-       putname(nd->last.name);
+-      dentry = lookup_hash(&nd.last, nd.dentry);
++      dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
+       putname(nd.last.name);
        goto do_last;
  }
  
-+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd)
-+{
-+      return open_namei_it(pathname, flag, mode, nd, NULL);
-+}
-+
 +
  /* SMP-safe */
 -static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
  {
        struct dentry *dentry;
  
-@@ -1233,7 +1342,7 @@ static struct dentry *lookup_create(stru
+@@ -1208,7 +1314,7 @@ static struct dentry *lookup_create(stru
        dentry = ERR_PTR(-EEXIST);
        if (nd->last_type != LAST_NORM)
                goto fail;
        if (IS_ERR(dentry))
                goto fail;
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1279,6 +1388,7 @@ asmlinkage long sys_mknod(const char * f
-       char * tmp;
-       struct dentry * dentry;
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_MKNOD, .it_mode = mode };
-       if (S_ISDIR(mode))
-               return -EPERM;
-@@ -1289,7 +1399,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1264,7 +1370,19 @@ asmlinkage long sys_mknod(const char * f
        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
        if (error)
                goto out;
 -      dentry = lookup_create(&nd, 0);
-+      dentry = lookup_create(&nd, 0, &it);
++
++      if (nd.dentry->d_inode->i_op->mknod2) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->mknod2(nd.dentry->d_inode, 
++                                 nd.last.name, 
++                                 nd.last.len,
++                                 mode, dev);
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto out2;
++      }
++
++      dentry = lookup_create(&nd, 0, NULL);
        error = PTR_ERR(dentry);
  
        mode &= ~current->fs->umask;
-@@ -1307,6 +1417,7 @@ asmlinkage long sys_mknod(const char * f
-               default:
-                       error = -EINVAL;
-               }
-+              intent_release(dentry, &it);
+@@ -1285,6 +1403,7 @@ asmlinkage long sys_mknod(const char * f
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
-@@ -1347,6 +1458,7 @@ asmlinkage long sys_mkdir(const char * p
- {
-       int error = 0;
-       char * tmp;
-+      struct lookup_intent it = { .it_op = IT_MKDIR, .it_mode = mode };
-       tmp = getname(pathname);
-       error = PTR_ERR(tmp);
-@@ -1357,11 +1469,12 @@ asmlinkage long sys_mkdir(const char * p
++ out2:
+       path_release(&nd);
+ out:
+       putname(tmp);
+@@ -1332,7 +1451,17 @@ asmlinkage long sys_mkdir(const char * p
                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 1);
-+              dentry = lookup_create(&nd, 1, &it);
++              if (nd.dentry->d_inode->i_op->mkdir2) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->mkdir2(nd.dentry->d_inode, 
++                                         nd.last.name, 
++                                         nd.last.len,
++                                         mode);
++                      /* the file system want to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
++              dentry = lookup_create(&nd, 1, NULL);
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
                        error = vfs_mkdir(nd.dentry->d_inode, dentry,
-                                         mode & ~current->fs->umask);
-+                      intent_release(dentry, &it);
+@@ -1340,6 +1469,7 @@ asmlinkage long sys_mkdir(const char * p
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
-@@ -1445,6 +1558,7 @@ asmlinkage long sys_rmdir(const char * p
-       char * name;
-       struct dentry *dentry;
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_RMDIR };
-       name = getname(pathname);
-       if(IS_ERR(name))
-@@ -1466,10 +1580,11 @@ asmlinkage long sys_rmdir(const char * p
++out2:
+               path_release(&nd);
+ out:
+               putname(tmp);
+@@ -1440,8 +1570,17 @@ asmlinkage long sys_rmdir(const char * p
+                       error = -EBUSY;
                        goto exit1;
        }
++      if (nd.dentry->d_inode->i_op->rmdir2) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->rmdir2(nd.dentry->d_inode, 
++                                 nd.last.name, 
++                                 nd.last.len);
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
-+      dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
++      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                error = vfs_rmdir(nd.dentry->d_inode, dentry);
-+              intent_release(dentry, &it);
-               dput(dentry);
-       }
-       up(&nd.dentry->d_inode->i_sem);
-@@ -1513,6 +1628,7 @@ asmlinkage long sys_unlink(const char * 
-       char * name;
-       struct dentry *dentry;
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_UNLINK };
-       name = getname(pathname);
-       if(IS_ERR(name))
-@@ -1525,7 +1641,7 @@ asmlinkage long sys_unlink(const char * 
+@@ -1499,8 +1638,17 @@ asmlinkage long sys_unlink(const char * 
+       error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
++      if (nd.dentry->d_inode->i_op->unlink2) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink2(nd.dentry->d_inode, 
++                                  nd.last.name, 
++                                  nd.last.len);
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
 -      dentry = lookup_hash(&nd.last, nd.dentry);
-+      dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
++      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
-@@ -1533,6 +1649,7 @@ asmlinkage long sys_unlink(const char * 
-                       goto slashes;
-               error = vfs_unlink(nd.dentry->d_inode, dentry);
-       exit2:
-+              intent_release(dentry, &it);
-               dput(dentry);
-       }
-       up(&nd.dentry->d_inode->i_sem);
-@@ -1579,6 +1696,7 @@ asmlinkage long sys_symlink(const char *
-       int error = 0;
-       char * from;
-       char * to;
-+      struct lookup_intent it = { .it_op = IT_SYMLINK };
-       from = getname(oldname);
-       if(IS_ERR(from))
-@@ -1592,10 +1710,12 @@ asmlinkage long sys_symlink(const char *
+@@ -1567,15 +1715,26 @@ asmlinkage long sys_symlink(const char *
                error = path_lookup(to, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
 -              dentry = lookup_create(&nd, 0);
-+              it.it_data = from;
-+              dentry = lookup_create(&nd, 0, &it);
++              if (nd.dentry->d_inode->i_op->symlink2) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->symlink2(nd.dentry->d_inode, 
++                                           nd.last.name, 
++                                           nd.last.len,
++                                           from);
++                      /* the file system want to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
++              dentry = lookup_create(&nd, 0, NULL);
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
                        error = vfs_symlink(nd.dentry->d_inode, dentry, from);
-+                      intent_release(dentry, &it);
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
-@@ -1660,6 +1780,7 @@ asmlinkage long sys_link(const char * ol
- {
-       int error;
-       char * to;
-+      struct lookup_intent it = { .it_op = IT_LINK };
-       to = getname(newname);
-       error = PTR_ERR(to);
-@@ -1667,7 +1788,7 @@ asmlinkage long sys_link(const char * ol
++      out2:
+               path_release(&nd);
+-out:
++      out:
+               putname(to);
+       }
+       putname(from);
+@@ -1642,7 +1801,7 @@ asmlinkage long sys_link(const char * ol
                struct dentry *new_dentry;
                struct nameidata nd, old_nd;
  
 -              error = __user_walk(oldname, LOOKUP_POSITIVE, &old_nd);
-+              error = __user_walk_it(oldname, LOOKUP_POSITIVE, &old_nd, &it);
++              error = __user_walk_it(oldname, LOOKUP_POSITIVE, &old_nd, NULL);
                if (error)
                        goto exit;
                error = path_lookup(to, LOOKUP_PARENT, &nd);
-@@ -1676,10 +1797,12 @@ asmlinkage long sys_link(const char * ol
+@@ -1651,7 +1810,17 @@ asmlinkage long sys_link(const char * ol
                error = -EXDEV;
                if (old_nd.mnt != nd.mnt)
                        goto out_release;
 -              new_dentry = lookup_create(&nd, 0);
-+              it.it_op = IT_LINK2;
-+              new_dentry = lookup_create(&nd, 0, &it);
++              if (nd.dentry->d_inode->i_op->link2) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->link2(old_nd.dentry->d_inode, 
++                                        nd.dentry->d_inode, 
++                                        nd.last.name, 
++                                        nd.last.len);
++                      /* the file system want to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out_release;
++              }
++              new_dentry = lookup_create(&nd, 0, NULL);
                error = PTR_ERR(new_dentry);
                if (!IS_ERR(new_dentry)) {
                        error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-+                      intent_release(new_dentry, &it);
-                       dput(new_dentry);
-               }
-               up(&nd.dentry->d_inode->i_sem);
-@@ -1720,7 +1843,8 @@ exit:
+@@ -1695,7 +1864,8 @@ exit:
   *       locking].
   */
  int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
  {
        int error;
        struct inode *target;
-@@ -1778,6 +1902,7 @@ int vfs_rename_dir(struct inode *old_dir
+@@ -1753,6 +1923,7 @@ int vfs_rename_dir(struct inode *old_dir
                error = -EBUSY;
        else 
                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
        if (target) {
                if (!error)
                        target->i_flags |= S_DEAD;
-@@ -1799,7 +1924,8 @@ out_unlock:
+@@ -1774,7 +1945,8 @@ out_unlock:
  }
  
  int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
  {
        int error;
  
-@@ -1830,6 +1956,7 @@ int vfs_rename_other(struct inode *old_d
+@@ -1805,6 +1977,7 @@ int vfs_rename_other(struct inode *old_d
                error = -EBUSY;
        else
                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
        double_up(&old_dir->i_zombie, &new_dir->i_zombie);
        if (error)
                return error;
-@@ -1841,13 +1968,14 @@ int vfs_rename_other(struct inode *old_d
+@@ -1816,13 +1989,14 @@ int vfs_rename_other(struct inode *old_d
  }
  
  int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (!error) {
                if (old_dir == new_dir)
                        inode_dir_notify(old_dir, DN_RENAME);
-@@ -1864,6 +1992,7 @@ static inline int do_rename(const char *
-       int error = 0;
-       struct dentry * old_dir, * new_dir;
-       struct dentry * old_dentry, *new_dentry;
-+      struct lookup_intent it = { .it_op = IT_RENAME };
-       struct nameidata oldnd, newnd;
-       error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
-@@ -1889,7 +2018,7 @@ static inline int do_rename(const char *
+@@ -1862,9 +2036,23 @@ static inline int do_rename(const char *
+       if (newnd.last_type != LAST_NORM)
+               goto exit2;
++      if (old_dir->d_inode->i_op->rename2) {
++              lock_kernel();
++              error = old_dir->d_inode->i_op->rename2(old_dir->d_inode, 
++                                                      new_dir->d_inode,
++                                                      oldnd.last.name, 
++                                                      oldnd.last.len,
++                                                      newnd.last.name,
++                                                      newnd.last.len);
++              unlock_kernel();
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
++
        double_lock(new_dir, old_dir);
  
 -      old_dentry = lookup_hash(&oldnd.last, old_dir);
-+      old_dentry = lookup_hash_it(&oldnd.last, old_dir, &it);
++      old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL);
        error = PTR_ERR(old_dentry);
        if (IS_ERR(old_dentry))
                goto exit3;
-@@ -1905,18 +2034,21 @@ static inline int do_rename(const char *
+@@ -1880,14 +2068,14 @@ static inline int do_rename(const char *
                if (newnd.last.name[newnd.last.len])
                        goto exit4;
        }
 -      new_dentry = lookup_hash(&newnd.last, new_dir);
-+      it.it_op = IT_RENAME2;
-+      new_dentry = lookup_hash_it(&newnd.last, new_dir, &it);
++      new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL);
        error = PTR_ERR(new_dentry);
        if (IS_ERR(new_dentry))
                goto exit4;
        lock_kernel();
        error = vfs_rename(old_dir->d_inode, old_dentry,
 -                                 new_dir->d_inode, new_dentry);
-+                                 new_dir->d_inode, new_dentry, &it);
++                                 new_dir->d_inode, new_dentry, NULL);
        unlock_kernel();
  
-+      intent_release(new_dentry, &it);
        dput(new_dentry);
- exit4:
-+      intent_release(old_dentry, &it);
-       dput(old_dentry);
- exit3:
-       double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
-@@ -1965,7 +2097,8 @@ out:
+@@ -1940,7 +2127,8 @@ out:
  }
  
  static inline int
 -__vfs_follow_link(struct nameidata *nd, const char *link)
 +__vfs_follow_link(struct nameidata *nd, const char *link, 
-+                     struct lookup_intent *it)
++                struct lookup_intent *it)
  {
        int res = 0;
        char *name;
-@@ -1978,7 +2111,7 @@ __vfs_follow_link(struct nameidata *nd, 
+@@ -1953,7 +2141,7 @@ __vfs_follow_link(struct nameidata *nd, 
                        /* weird __emul_prefix() stuff did it */
                        goto out;
        }
  out:
        if (current->link_count || res || nd->last_type!=LAST_NORM)
                return res;
-@@ -2000,7 +2133,13 @@ fail:
+@@ -1975,7 +2163,13 @@ fail:
  
  int vfs_follow_link(struct nameidata *nd, const char *link)
  {
 +}
 +
 +int vfs_follow_link_it(struct nameidata *nd, const char *link, 
-+                       struct lookup_intent *it)
++                     struct lookup_intent *it)
 +{
 +      return __vfs_follow_link(nd, link, it);
  }
  
  /* get the link contents into pagecache */
-@@ -2042,7 +2181,7 @@ int page_follow_link(struct dentry *dent
+@@ -2017,7 +2211,7 @@ int page_follow_link(struct dentry *dent
  {
        struct page *page = NULL;
        char *s = page_getlink(dentry, &page);
        if (page) {
                kunmap(page);
                page_cache_release(page);
---- linux-2.4.18-17.8.0/fs/nfsd/vfs.c~vfs_intent       2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/nfsd/vfs.c      2002-12-06 14:52:31.000000000 -0800
+--- linux-2.4.18-18.8.0-l7/fs/nfsd/vfs.c~vfs_intent-2.4.18-18  Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/fs/nfsd/vfs.c  Mon Jan 20 12:25:10 2003
 @@ -1298,7 +1298,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
                        err = nfserr_perm;
        } else
        unlock_kernel();
        if (!err && EX_ISSYNC(tfhp->fh_export)) {
                nfsd_sync_dir(tdentry);
---- linux-2.4.18-17.8.0/fs/open.c~vfs_intent   2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/open.c  2002-12-06 14:52:31.000000000 -0800
+--- linux-2.4.18-18.8.0-l7/fs/open.c~vfs_intent-2.4.18-18      Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/fs/open.c      Wed Jan 22 10:39:31 2003
 @@ -19,6 +19,9 @@
  #include <asm/uaccess.h>
  
        struct nameidata nd;
        struct inode * inode;
        int error;
-+      struct lookup_intent it = { .it_op = IT_SETATTR };
++      struct lookup_intent it = { .it_op = IT_TRUNC };
  
        error = -EINVAL;
        if (length < 0) /* sorry, but loff_t says... */
                path_release(&nd);
        }
        return error;
-@@ -638,10 +661,16 @@ asmlinkage long sys_fchown(unsigned int 
-  * for the internal routines (ie open_namei()/follow_link() etc). 00 is
-  * used by symlinks.
-  */
-+extern int open_namei_it(const char *filename, int namei_flags, int mode,
-+                       struct nameidata *nd, struct lookup_intent *it);
-+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
-+                          int flags, struct lookup_intent *it);
-+
- struct file *filp_open(const char * filename, int flags, int mode)
- {
-       int namei_flags, error;
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_OPEN };
-       namei_flags = flags;
-       if ((namei_flags+1) & O_ACCMODE)
-@@ -649,18 +678,19 @@ struct file *filp_open(const char * file
-       if (namei_flags & O_TRUNC)
-               namei_flags |= 2;
--      error = open_namei(filename, namei_flags, mode, &nd);
--      if (!error)
--              return dentry_open(nd.dentry, nd.mnt, flags);
-+      error = open_namei_it(filename, namei_flags, mode, &nd, &it);
-+      if (error)
-+              return ERR_PTR(error);
--      return ERR_PTR(error);
-+      return dentry_open_it(nd.dentry, nd.mnt, flags, &it);
- }
- extern ssize_t do_readahead(struct file *file, unsigned long index, unsigned long nr);
+@@ -628,7 +651,8 @@ extern ssize_t do_readahead(struct file 
  /* for files over a certains size it doesn't pay to do readahead on open */
  #define READAHEAD_CUTOFF 48000
  
  {
        struct file * f;
        struct inode *inode;
-@@ -711,6 +741,7 @@ struct file *dentry_open(struct dentry *
+@@ -693,6 +717,7 @@ struct file *dentry_open(struct dentry *
                do_readahead(f, 0, (48 * 1024) >> PAGE_SHIFT);
        
  
        return f;
  
  cleanup_all:
-@@ -725,11 +756,17 @@ cleanup_all:
+@@ -707,11 +732,17 @@ cleanup_all:
  cleanup_file:
        put_filp(f);
  cleanup_dentry:
  /*
   * Find an empty file descriptor entry, and mark it busy.
   */
---- linux-2.4.18-17.8.0/fs/stat.c~vfs_intent   2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/stat.c  2002-12-06 14:52:31.000000000 -0800
+--- linux-2.4.18-18.8.0-l7/fs/stat.c~vfs_intent-2.4.18-18      Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/fs/stat.c      Mon Jan 20 12:25:10 2003
 @@ -13,6 +13,7 @@
  
  #include <asm/uaccess.h>
                path_release(&nd);
        }
        return error;
---- linux-2.4.18-17.8.0/include/linux/dcache.h~vfs_intent      2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/include/linux/dcache.h     2002-12-06 14:52:31.000000000 -0800
-@@ -6,6 +6,34 @@
+--- linux-2.4.18-18.8.0-l7/include/linux/dcache.h~vfs_intent-2.4.18-18 Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/include/linux/dcache.h Wed Jan 22 19:38:12 2003
+@@ -6,6 +6,27 @@
  #include <asm/atomic.h>
  #include <linux/mount.h>
  
-+#define IT_OPEN  (1)
-+#define IT_CREAT  (1<<1)
-+#define IT_MKDIR  (1<<2)
-+#define IT_LINK  (1<<3)
-+#define IT_LINK2  (1<<4)
-+#define IT_SYMLINK  (1<<5)
-+#define IT_UNLINK  (1<<6)
-+#define IT_RMDIR  (1<<7)
-+#define IT_RENAME  (1<<8)
-+#define IT_RENAME2  (1<<9)
-+#define IT_READDIR  (1<<10)
-+#define IT_GETATTR  (1<<11)
-+#define IT_SETATTR  (1<<12)
-+#define IT_READLINK  (1<<13)
-+#define IT_MKNOD  (1<<14)
-+#define IT_LOOKUP  (1<<15)
++#define IT_OPEN     (1)
++#define IT_CREAT    (1<<1)
++#define IT_READDIR  (1<<2)
++#define IT_GETATTR  (1<<3)
++#define IT_SETATTR  (1<<4)
++#define IT_TRUNC    (1<<5)
++#define IT_READLINK (1<<6)
++#define IT_LOOKUP   (1<<7)
 +
 +struct lookup_intent {
 +      int it_op;
 +      int it_mode;
++      int it_flags;
 +      int it_disposition;
 +      int it_status;
 +      struct iattr *it_iattr;
  /*
   * linux/include/linux/dcache.h
   *
-@@ -78,6 +106,7 @@ struct dentry {
+@@ -78,6 +99,7 @@ struct dentry {
        unsigned long d_time;           /* used by d_revalidate */
        struct dentry_operations  *d_op;
        struct super_block * d_sb;      /* The root of the dentry tree */
        unsigned long d_vfs_flags;
        void * d_fsdata;                /* fs-specific data */
        void * d_extra_attributes;      /* TUX-specific data */
-@@ -91,6 +120,8 @@ struct dentry_operations {
+@@ -91,6 +113,8 @@ struct dentry_operations {
        int (*d_delete)(struct dentry *);
        void (*d_release)(struct dentry *);
        void (*d_iput)(struct dentry *, struct inode *);
  };
  
  /* the dentry parameter passed to d_hash and d_compare is the parent
---- linux-2.4.18-17.8.0/include/linux/fs.h~vfs_intent  2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/include/linux/fs.h 2002-12-06 14:52:31.000000000 -0800
+@@ -124,6 +148,7 @@ d_iput:            no              no              yes
+                                        * s_nfsd_free_path semaphore will be down
+                                        */
+ #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
++#define DCACHE_LUSTRE_INVALID 0x0010  /* Lustre invalidated */
+ extern spinlock_t dcache_lock;
+--- linux-2.4.18-18.8.0-l7/include/linux/fs.h~vfs_intent-2.4.18-18     Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/include/linux/fs.h     Wed Jan 22 22:46:13 2003
 @@ -576,6 +576,7 @@ struct file {
  
        /* needed for tty driver, and maybe others */
  
  /*
   * File types
-@@ -897,6 +900,7 @@ struct file_operations {
+@@ -897,16 +900,28 @@ struct file_operations {
  struct inode_operations {
        int (*create) (struct inode *,struct dentry *,int);
        struct dentry * (*lookup) (struct inode *,struct dentry *);
 +      struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
++      int (*link2) (struct inode *,struct inode *, const char *, int);
        int (*unlink) (struct inode *,struct dentry *);
++      int (*unlink2) (struct inode *, const char *, int);
        int (*symlink) (struct inode *,struct dentry *,const char *);
-@@ -907,6 +911,8 @@ struct inode_operations {
++      int (*symlink2) (struct inode *, const char *, int, const char *);
+       int (*mkdir) (struct inode *,struct dentry *,int);
++      int (*mkdir2) (struct inode *, const char *, int,int);
+       int (*rmdir) (struct inode *,struct dentry *);
++      int (*rmdir2) (struct inode *, const char *, int);
+       int (*mknod) (struct inode *,struct dentry *,int,int);
++      int (*mknod2) (struct inode *, const char *, int,int,int);
+       int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
++      int (*rename2) (struct inode *, struct inode *, 
++                      const char *oldname, int oldlen, 
++                      const char *newname, int newlen);
        int (*readlink) (struct dentry *, char *,int);
        int (*follow_link) (struct dentry *, struct nameidata *);
 +      int (*follow_link2) (struct dentry *, struct nameidata *, 
-+                            struct lookup_intent *it);
++                           struct lookup_intent *it);
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int);
        int (*revalidate) (struct dentry *);
-@@ -1381,6 +1387,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1381,6 +1396,7 @@ typedef int (*read_actor_t)(read_descrip
  extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
  
  extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
  extern int FASTCALL(path_walk(const char *, struct nameidata *));
  extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1392,6 +1399,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1392,6 +1408,8 @@ extern struct dentry * lookup_one_len(co
  extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
  #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
  #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
  
  extern void inode_init_once(struct inode *);
  extern void iput(struct inode *);
-@@ -1492,6 +1501,8 @@ extern struct file_operations generic_ro
+@@ -1492,6 +1510,8 @@ extern struct file_operations generic_ro
  
  extern int vfs_readlink(struct dentry *, char *, int, const char *);
  extern int vfs_follow_link(struct nameidata *, const char *);
 +extern int vfs_follow_link_it(struct nameidata *, const char *, 
-+                              struct lookup_intent *it);
++                            struct lookup_intent *it);
  extern int page_readlink(struct dentry *, char *, int);
  extern int page_follow_link(struct dentry *, struct nameidata *);
  extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.18-17.8.0/kernel/ksyms.c~vfs_intent      2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/kernel/ksyms.c     2002-12-06 14:52:31.000000000 -0800
+--- linux-2.4.18-18.8.0-l7/kernel/ksyms.c~vfs_intent-2.4.18-18 Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/kernel/ksyms.c Mon Jan 20 12:25:10 2003
 @@ -293,6 +293,7 @@ EXPORT_SYMBOL(read_cache_page);
  EXPORT_SYMBOL(set_page_dirty);
  EXPORT_SYMBOL(vfs_readlink);
diff --git a/lustre/kernel_patches/patches/vfs_intent_hp.patch b/lustre/kernel_patches/patches/vfs_intent_hp.patch
new file mode 100644 (file)
index 0000000..63f09b3
--- /dev/null
@@ -0,0 +1,1267 @@
+ fs/dcache.c            |    3 
+ fs/namei.c             |  306 ++++++++++++++++++++++++++++++++++++++++---------
+ fs/nfsd/vfs.c          |    2 
+ fs/open.c              |   63 +++++++---
+ fs/stat.c              |   29 +++-
+ include/linux/dcache.h |   31 ++++
+ include/linux/fs.h     |   22 +++
+ kernel/ksyms.c         |    1 
+ 8 files changed, 384 insertions(+), 73 deletions(-)
+
+--- linux-2.4.19-hp2_pnnl2/fs/dcache.c~vfs_intent_hp   Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/dcache.c    Sun Jan 19 19:04:47 2003
+@@ -186,6 +188,13 @@ int d_invalidate(struct dentry * dentry)
+               spin_unlock(&dcache_lock);
+               return 0;
+       }
++
++      /* network invalidation by Lustre */
++      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++              spin_unlock(&dcache_lock);
++              return 0;
++      }
++
+       /*
+        * Check whether to do a partial shrink_dcache
+        * to get rid of unused child entries.
+@@ -616,6 +618,7 @@ struct dentry * d_alloc(struct dentry * 
+       dentry->d_op = NULL;
+       dentry->d_fsdata = NULL;
+       dentry->d_mounted = 0;
++      dentry->d_it = NULL;
+       INIT_LIST_HEAD(&dentry->d_hash);
+       INIT_LIST_HEAD(&dentry->d_lru);
+       INIT_LIST_HEAD(&dentry->d_subdirs);
+--- linux-2.4.19-hp2_pnnl2/fs/namei.c~vfs_intent_hp    Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/namei.c     Sun Jan 19 19:35:55 2003
+@@ -94,6 +97,13 @@
+  * XEmacs seems to be relying on it...
+  */
++void intent_release(struct dentry *de, struct lookup_intent *it)
++{
++      if (it && de->d_op && de->d_op->d_intent_release)
++              de->d_op->d_intent_release(de, it);
++
++}
++
+ /* In order to reduce some races, while at the same time doing additional
+  * checking and hopefully speeding things up, we copy filenames to the
+  * kernel data space before using them..
+@@ -260,10 +271,19 @@ void path_release(struct nameidata *nd)
+  * Internal lookup() using the new generic dcache.
+  * SMP-safe
+  */
+-static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
++static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name,
++                                  int flags, struct lookup_intent *it)
+ {
+       struct dentry * dentry = d_lookup(parent, name);
++      if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
++              if (!dentry->d_op->d_revalidate2(dentry, flags, it) &&
++                  !d_invalidate(dentry)) {
++                      dput(dentry);
++                      dentry = NULL;
++              }
++              return dentry;
++      } else
+       if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+               if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
+                       dput(dentry);
+@@ -281,11 +301,14 @@ static struct dentry * cached_lookup(str
+  * make sure that nobody added the entry to the dcache in the meantime..
+  * SMP-safe
+  */
+-static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
++static struct dentry *real_lookup(struct dentry *parent, struct qstr *name,
++                                int flags, struct lookup_intent *it)
+ {
+       struct dentry * result;
+       struct inode *dir = parent->d_inode;
++again:
++
+       down(&dir->i_sem);
+       /*
+        * First re-do the cached lookup just in case it was created
+@@ -300,6 +321,9 @@ static struct dentry * real_lookup(struc
+               result = ERR_PTR(-ENOMEM);
+               if (dentry) {
+                       lock_kernel();
++                      if (dir->i_op->lookup2)
++                              result = dir->i_op->lookup2(dir, dentry, it);
++                      else
+                       result = dir->i_op->lookup(dir, dentry);
+                       unlock_kernel();
+                       if (result)
+@@ -321,6 +345,12 @@ static struct dentry * real_lookup(struc
+                       dput(result);
+                       result = ERR_PTR(-ENOENT);
+               }
++      } else if (result->d_op && result->d_op->d_revalidate2) {
++              if (!result->d_op->d_revalidate2(result, flags, it) &&
++                  !d_invalidate(result)) {
++                      dput(result);
++                      goto again;
++              }
+       }
+       return result;
+ }
+@@ -332,7 +362,8 @@ static struct dentry * real_lookup(struc
+  * Without that kind of total limit, nasty chains of consecutive
+  * symlinks can cause almost arbitrarily long lookups. 
+  */
+-static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
++static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd,
++                               struct lookup_intent *it)
+ {
+       int err;
+       if (current->link_count >= 5)
+@@ -346,10 +377,14 @@ static inline int do_follow_link(struct 
+       current->link_count++;
+       current->total_link_count++;
+       UPDATE_ATIME(dentry->d_inode);
+-      err = dentry->d_inode->i_op->follow_link(dentry, nd);
++      if (dentry->d_inode->i_op->follow_link2)
++              err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
++      else
++              err = dentry->d_inode->i_op->follow_link(dentry, nd);
+       current->link_count--;
+       return err;
+ loop:
++      intent_release(dentry, it);
+       path_release(nd);
+       return -ELOOP;
+ }
+@@ -447,7 +482,8 @@ static inline void follow_dotdot(struct 
+  *
+  * We expect 'base' to be positive and a directory.
+  */
+-int link_path_walk(const char * name, struct nameidata *nd)
++int link_path_walk_it(const char *name, struct nameidata *nd,
++                    struct lookup_intent *it)
+ {
+       struct dentry *dentry;
+       struct inode *inode;
+@@ -520,9 +556,9 @@ int link_path_walk(const char * name, st
+                               break;
+               }
+               /* This does the actual lookups.. */
+-              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
++              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
+               if (!dentry) {
+-                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
++                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
+                       err = PTR_ERR(dentry);
+                       if (IS_ERR(dentry))
+                               break;
+@@ -539,8 +575,8 @@ int link_path_walk(const char * name, st
+               if (!inode->i_op)
+                       goto out_dput;
+-              if (inode->i_op->follow_link) {
+-                      err = do_follow_link(dentry, nd);
++              if (inode->i_op->follow_link || inode->i_op->follow_link2) {
++                      err = do_follow_link(dentry, nd, NULL);
+                       dput(dentry);
+                       if (err)
+                               goto return_err;
+@@ -556,7 +592,7 @@ int link_path_walk(const char * name, st
+                       nd->dentry = dentry;
+               }
+               err = -ENOTDIR; 
+-              if (!inode->i_op->lookup)
++              if (!inode->i_op->lookup && !inode->i_op->lookup2)
+                       break;
+               continue;
+               /* here ends the main loop */
+@@ -583,9 +619,9 @@ last_component:
+                       if (err < 0)
+                               break;
+               }
+-              dentry = cached_lookup(nd->dentry, &this, 0);
++              dentry = cached_lookup(nd->dentry, &this, 0, it);
+               if (!dentry) {
+-                      dentry = real_lookup(nd->dentry, &this, 0);
++                      dentry = real_lookup(nd->dentry, &this, 0, it);
+                       err = PTR_ERR(dentry);
+                       if (IS_ERR(dentry))
+                               break;
+@@ -594,8 +630,9 @@ last_component:
+                       ;
+               inode = dentry->d_inode;
+               if ((lookup_flags & LOOKUP_FOLLOW)
+-                  && inode && inode->i_op && inode->i_op->follow_link) {
+-                      err = do_follow_link(dentry, nd);
++                  && inode && inode->i_op &&
++                  (inode->i_op->follow_link || inode->i_op->follow_link2)) {
++                      err = do_follow_link(dentry, nd, it);
+                       dput(dentry);
+                       if (err)
+                               goto return_err;
+@@ -609,7 +647,8 @@ last_component:
+                       goto no_inode;
+               if (lookup_flags & LOOKUP_DIRECTORY) {
+                       err = -ENOTDIR; 
+-                      if (!inode->i_op || !inode->i_op->lookup)
++                      if (!inode->i_op ||
++                          (!inode->i_op->lookup && !inode->i_op->lookup2))
+                               break;
+               }
+               goto return_base;
+@@ -646,15 +685,28 @@ out_dput:
+               dput(dentry);
+               break;
+       }
++      if (err)
++              intent_release(nd->dentry, it);
+       path_release(nd);
+ return_err:
+       return err;
+ }
++int link_path_walk(const char * name, struct nameidata *nd)
++{
++      return link_path_walk_it(name, nd, NULL);
++}
++
++int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it)
++{
++      current->total_link_count = 0;
++      return link_path_walk_it(name, nd, it);
++}
++
+ int path_walk(const char * name, struct nameidata *nd)
+ {
+       current->total_link_count = 0;
+-      return link_path_walk(name, nd);
++      return link_path_walk_it(name, nd, NULL);
+ }
+ /* SMP-safe */
+@@ -757,7 +809,8 @@ int path_init(const char *name, unsigned
+  * needs parent already locked. Doesn't follow mounts.
+  * SMP-safe.
+  */
+-struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
++struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base,
++                             struct lookup_intent *it)
+ {
+       struct dentry * dentry;
+       struct inode *inode;
+@@ -780,13 +833,16 @@ struct dentry * lookup_hash(struct qstr 
+                       goto out;
+       }
+-      dentry = cached_lookup(base, name, 0);
++      dentry = cached_lookup(base, name, 0, it);
+       if (!dentry) {
+               struct dentry *new = d_alloc(base, name);
+               dentry = ERR_PTR(-ENOMEM);
+               if (!new)
+                       goto out;
+               lock_kernel();
++              if (inode->i_op->lookup2)
++                      dentry = inode->i_op->lookup2(inode, new, it);
++              else
+               dentry = inode->i_op->lookup(inode, new);
+               unlock_kernel();
+               if (!dentry)
+@@ -798,6 +854,12 @@ out:
+       return dentry;
+ }
++struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
++{
++      return lookup_hash_it(name, base, NULL);
++}
++
++
+ /* SMP-safe */
+ struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
+ {
+@@ -819,7 +881,7 @@ struct dentry * lookup_one_len(const cha
+       }
+       this.hash = end_name_hash(hash);
+-      return lookup_hash(&this, base);
++      return lookup_hash_it(&this, base, NULL);
+ access:
+       return ERR_PTR(-EACCES);
+ }
+@@ -851,6 +913,23 @@ int __user_walk(const char *name, unsign
+       return err;
+ }
++int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd,
++                 struct lookup_intent *it)
++{
++      char *tmp;
++      int err;
++
++      tmp = getname(name);
++      err = PTR_ERR(tmp);
++      if (!IS_ERR(tmp)) {
++              err = 0;
++              if (path_init(tmp, flags, nd))
++                      err = path_walk_it(tmp, nd, it);
++              putname(tmp);
++      }
++      return err;
++}
++
+ /*
+  * It's inline, so penalty for filesystems that don't use sticky bit is
+  * minimal.
+@@ -987,7 +1066,8 @@ exit_lock:
+  * for symlinks (where the permissions are checked later).
+  * SMP-safe
+  */
+-int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
++int open_namei_it(const char *pathname, int flag, int mode,
++                struct nameidata *nd, struct lookup_intent *it)
+ {
+       int acc_mode, error = 0;
+       struct inode *inode;
+@@ -1002,7 +1082,7 @@ int open_namei(const char * pathname, in
+        */
+       if (!(flag & O_CREAT)) {
+               if (path_init(pathname, lookup_flags(flag), nd))
+-                      error = path_walk(pathname, nd);
++                      error = path_walk_it(pathname, nd, it);
+               if (error)
+                       return error;
+               dentry = nd->dentry;
+@@ -1012,6 +1092,10 @@ int open_namei(const char * pathname, in
+       /*
+        * Create - we need to know the parent.
+        */
++      if (it) {
++              it->it_mode = mode;
++              it->it_op |= IT_CREAT;
++      }
+       if (path_init(pathname, LOOKUP_PARENT, nd))
+               error = path_walk(pathname, nd);
+       if (error)
+@@ -1028,7 +1112,7 @@ int open_namei(const char * pathname, in
+       dir = nd->dentry;
+       down(&dir->d_inode->i_sem);
+-      dentry = lookup_hash(&nd->last, nd->dentry);
++      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+ do_last:
+       error = PTR_ERR(dentry);
+@@ -1037,6 +1121,7 @@ do_last:
+               goto exit;
+       }
++      it->it_mode = mode;
+       /* Negative dentry, just create the file */
+       if (!dentry->d_inode) {
+               if (!IS_POSIXACL(dir->d_inode))
+@@ -1071,7 +1156,8 @@ do_last:
+       error = -ENOENT;
+       if (!dentry->d_inode)
+               goto exit_dput;
+-      if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
++      if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
++                                    dentry->d_inode->i_op->follow_link2))
+               goto do_link;
+       dput(nd->dentry);
+@@ -1157,8 +1243,10 @@ ok:
+       return 0;
+ exit_dput:
++      intent_release(dentry, it);
+       dput(dentry);
+ exit:
++      intent_release(nd->dentry, it);
+       path_release(nd);
+       return error;
+@@ -1177,7 +1265,12 @@ do_link:
+        * are done. Procfs-like symlinks just set LAST_BIND.
+        */
+       UPDATE_ATIME(dentry->d_inode);
+-      error = dentry->d_inode->i_op->follow_link(dentry, nd);
++      if (dentry->d_inode->i_op->follow_link2)
++              error = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
++      else
++              error = dentry->d_inode->i_op->follow_link(dentry, nd);
++      if (error)
++              intent_release(dentry, it);
+       dput(dentry);
+       if (error)
+               return error;
+@@ -1199,13 +1292,20 @@ do_link:
+       }
+       dir = nd->dentry;
+       down(&dir->d_inode->i_sem);
+-      dentry = lookup_hash(&nd->last, nd->dentry);
++      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+       putname(nd->last.name);
+       goto do_last;
+ }
++int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd)
++{
++      return open_namei_it(pathname, flag, mode, nd, NULL);
++}
++
++
+ /* SMP-safe */
+-static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
++static struct dentry *lookup_create(struct nameidata *nd, int is_dir,
++                                  struct lookup_intent *it)
+ {
+       struct dentry *dentry;
+@@ -1213,7 +1313,7 @@ static struct dentry *lookup_create(stru
+       dentry = ERR_PTR(-EEXIST);
+       if (nd->last_type != LAST_NORM)
+               goto fail;
+-      dentry = lookup_hash(&nd->last, nd->dentry);
++      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+       if (IS_ERR(dentry))
+               goto fail;
+       if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
+@@ -1270,7 +1371,19 @@ asmlinkage long sys_mknod(const char * f
+               error = path_walk(tmp, &nd);
+       if (error)
+               goto out;
+-      dentry = lookup_create(&nd, 0);
++
++      if (nd.dentry->d_inode->i_op->mknod2) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->mknod2(nd.dentry->d_inode,
++                                 nd.last.name,
++                                 nd.last.len,
++                                 mode, dev);
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto out2;
++      }
++
++      dentry = lookup_create(&nd, 0, NULL);
+       error = PTR_ERR(dentry);
+       if (!IS_POSIXACL(nd.dentry->d_inode))
+@@ -1289,6 +1402,7 @@ asmlinkage long sys_mknod(const char * f
+               dput(dentry);
+       }
+       up(&nd.dentry->d_inode->i_sem);
++out2:
+       path_release(&nd);
+ out:
+       putname(tmp);
+@@ -1340,15 +1456,25 @@ asmlinkage long sys_mkdir(const char * p
+                       error = path_walk(tmp, &nd);
+               if (error)
+                       goto out;
+-              dentry = lookup_create(&nd, 1);
++              if (nd.dentry->d_inode->i_op->mkdir2) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->mkdir2(nd.dentry->d_inode,
++                                         nd.last.name,
++                                         nd.last.len,
++                                         mode);
++                      /* the file system want to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
++              dentry = lookup_create(&nd, 1, NULL);
+               error = PTR_ERR(dentry);
+               if (!IS_ERR(dentry)) {
+-                      if (!IS_POSIXACL(nd.dentry->d_inode))
+-                              mode &= ~current->fs->umask;
+-                      error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
++                      error = vfs_mkdir(nd.dentry->d_inode, dentry,
++                                        mode & ~current->fs->umask);
+                       dput(dentry);
+               }
+               up(&nd.dentry->d_inode->i_sem);
++out2:
+               path_release(&nd);
+ out:
+               putname(tmp);
+@@ -1450,8 +1578,17 @@ asmlinkage long sys_rmdir(const char * p
+                       error = -EBUSY;
+                       goto exit1;
+       }
++      if (nd.dentry->d_inode->i_op->rmdir2) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->rmdir2(nd.dentry->d_inode,
++                                 nd.last.name,
++                                 nd.last.len);
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
+       down(&nd.dentry->d_inode->i_sem);
+-      dentry = lookup_hash(&nd.last, nd.dentry);
++      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+       error = PTR_ERR(dentry);
+       if (!IS_ERR(dentry)) {
+               error = vfs_rmdir(nd.dentry->d_inode, dentry);
+@@ -1510,8 +1649,17 @@ asmlinkage long sys_unlink(const char * 
+       error = -EISDIR;
+       if (nd.last_type != LAST_NORM)
+               goto exit1;
++      if (nd.dentry->d_inode->i_op->unlink2) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink2(nd.dentry->d_inode,
++                                  nd.last.name,
++                                  nd.last.len);
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
+       down(&nd.dentry->d_inode->i_sem);
+-      dentry = lookup_hash(&nd.last, nd.dentry);
++      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+       error = PTR_ERR(dentry);
+       if (!IS_ERR(dentry)) {
+               /* Why not before? Because we want correct error value */
+@@ -1579,15 +1729,26 @@ asmlinkage long sys_symlink(const char *
+                       error = path_walk(to, &nd);
+               if (error)
+                       goto out;
+-              dentry = lookup_create(&nd, 0);
++              if (nd.dentry->d_inode->i_op->symlink2) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->symlink2(nd.dentry->d_inode,
++                                           nd.last.name,
++                                           nd.last.len,
++                                           from);
++                      /* the file system want to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
++              dentry = lookup_create(&nd, 0, NULL);
+               error = PTR_ERR(dentry);
+               if (!IS_ERR(dentry)) {
+                       error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+                       dput(dentry);
+               }
+               up(&nd.dentry->d_inode->i_sem);
++      out2:
+               path_release(&nd);
+-out:
++      out:
+               putname(to);
+       }
+       putname(from);
+@@ -1660,7 +1824,7 @@ asmlinkage long sys_link(const char * ol
+               error = 0;
+               if (path_init(from, LOOKUP_POSITIVE, &old_nd))
+-                      error = path_walk(from, &old_nd);
++                      error = path_walk_it(from, &old_nd, NULL);
+               if (error)
+                       goto exit;
+               if (path_init(to, LOOKUP_PARENT, &nd))
+@@ -1670,7 +1834,17 @@ asmlinkage long sys_link(const char * ol
+               error = -EXDEV;
+               if (old_nd.mnt != nd.mnt)
+                       goto out_release;
+-              new_dentry = lookup_create(&nd, 0);
++              if (nd.dentry->d_inode->i_op->link2) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->link2(old_nd.dentry->d_inode,
++                                        nd.dentry->d_inode,
++                                        nd.last.name,
++                                        nd.last.len);
++                      /* the file system want to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out_release;
++              }
++              new_dentry = lookup_create(&nd, 0, NULL);
+               error = PTR_ERR(new_dentry);
+               if (!IS_ERR(new_dentry)) {
+                       error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+@@ -1716,7 +1892,8 @@ exit:
+  *       locking].
+  */
+ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
+-             struct inode *new_dir, struct dentry *new_dentry)
++                 struct inode *new_dir, struct dentry *new_dentry,
++                 struct lookup_intent *it)
+ {
+       int error;
+       struct inode *target;
+@@ -1753,6 +1923,7 @@ int vfs_rename_dir(struct inode *old_dir
+               error = -EBUSY;
+       else 
+               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
++      intent_release(new_dentry, it);
+       if (target) {
+               if (!error)
+                       target->i_flags |= S_DEAD;
+@@ -1795,7 +1973,8 @@ out_unlock:
+ }
+ int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
+-             struct inode *new_dir, struct dentry *new_dentry)
++                   struct inode *new_dir, struct dentry *new_dentry,
++                   struct lookup_intent *it)
+ {
+       int error;
+@@ -1826,6 +2005,7 @@ int vfs_rename_other(struct inode *old_d
+               error = -EBUSY;
+       else
+               error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
++      intent_release(new_dentry, it);
+       double_up(&old_dir->i_zombie, &new_dir->i_zombie);
+       if (error)
+               return error;
+@@ -1837,13 +2017,14 @@ int vfs_rename_other(struct inode *old_d
+ }
+ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+-             struct inode *new_dir, struct dentry *new_dentry)
++             struct inode *new_dir, struct dentry *new_dentry,
++             struct lookup_intent *it)
+ {
+       int error;
+       if (S_ISDIR(old_dentry->d_inode->i_mode))
+-              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
++              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it);
+       else
+-              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
++              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it);
+       if (!error) {
+               if (old_dir == new_dir)
+                       inode_dir_notify(old_dir, DN_RENAME);
+@@ -1886,9 +2068,23 @@ static inline int do_rename(const char *
+       if (newnd.last_type != LAST_NORM)
+               goto exit2;
++      if (old_dir->d_inode->i_op->rename2) {
++              lock_kernel();
++              error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
++                                                      new_dir->d_inode,
++                                                      oldnd.last.name,
++                                                      oldnd.last.len,
++                                                      newnd.last.name,
++                                                      newnd.last.len);
++              unlock_kernel();
++              /* the file system want to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
++
+       double_lock(new_dir, old_dir);
+-      old_dentry = lookup_hash(&oldnd.last, old_dir);
++      old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL);
+       error = PTR_ERR(old_dentry);
+       if (IS_ERR(old_dentry))
+               goto exit3;
+@@ -1904,14 +2100,14 @@ static inline int do_rename(const char *
+               if (newnd.last.name[newnd.last.len])
+                       goto exit4;
+       }
+-      new_dentry = lookup_hash(&newnd.last, new_dir);
++      new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL);
+       error = PTR_ERR(new_dentry);
+       if (IS_ERR(new_dentry))
+               goto exit4;
+       lock_kernel();
+       error = vfs_rename(old_dir->d_inode, old_dentry,
+-                                 new_dir->d_inode, new_dentry);
++                                 new_dir->d_inode, new_dentry, NULL);
+       unlock_kernel();
+       dput(new_dentry);
+@@ -1964,7 +2163,8 @@ out:
+ }
+ static inline int
+-__vfs_follow_link(struct nameidata *nd, const char *link)
++__vfs_follow_link(struct nameidata *nd, const char *link,
++                struct lookup_intent *it)
+ {
+       int res = 0;
+       char *name;
+@@ -1977,7 +2177,7 @@ __vfs_follow_link(struct nameidata *nd, 
+                       /* weird __emul_prefix() stuff did it */
+                       goto out;
+       }
+-      res = link_path_walk(link, nd);
++      res = link_path_walk_it(link, nd, it);
+ out:
+       if (current->link_count || res || nd->last_type!=LAST_NORM)
+               return res;
+@@ -1999,7 +2199,13 @@ fail:
+ int vfs_follow_link(struct nameidata *nd, const char *link)
+ {
+-      return __vfs_follow_link(nd, link);
++      return __vfs_follow_link(nd, link, NULL);
++}
++
++int vfs_follow_link_it(struct nameidata *nd, const char *link,
++                     struct lookup_intent *it)
++{
++      return __vfs_follow_link(nd, link, it);
+ }
+ /* get the link contents into pagecache */
+@@ -2041,7 +2247,7 @@ int page_follow_link(struct dentry *dent
+ {
+       struct page *page = NULL;
+       char *s = page_getlink(dentry, &page);
+-      int res = __vfs_follow_link(nd, s);
++      int res = __vfs_follow_link(nd, s, NULL);
+       if (page) {
+               kunmap(page);
+               page_cache_release(page);
+--- linux-2.4.19-hp2_pnnl2/fs/nfsd/vfs.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/nfsd/vfs.c  Sun Jan 19 19:37:57 2003
+@@ -1295,7 +1295,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+                       err = nfserr_perm;
+       } else
+ #endif
+-      err = vfs_rename(fdir, odentry, tdir, ndentry);
++      err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
+       if (!err && EX_ISSYNC(tfhp->fh_export)) {
+               nfsd_sync_dir(tdentry);
+               nfsd_sync_dir(fdentry);
+--- linux-2.4.19-hp2_pnnl2/fs/open.c~vfs_intent_hp     Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/open.c      Sun Jan 19 19:41:00 2003
+@@ -19,6 +19,9 @@
+ #include <asm/uaccess.h>
+ #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
++extern int path_walk_it(const char *name, struct nameidata *nd,
++                      struct lookup_intent *it);
++extern void intent_release(struct dentry *de, struct lookup_intent *it);
+ int vfs_statfs(struct super_block *sb, struct statfs *buf)
+ {
+@@ -118,12 +121,13 @@ static inline long do_sys_truncate(const
+       struct nameidata nd;
+       struct inode * inode;
+       int error;
++      struct lookup_intent it = { .it_op = IT_TRUNC };
+       error = -EINVAL;
+       if (length < 0) /* sorry, but loff_t says... */
+               goto out;
+-      error = user_path_walk(path, &nd);
++      error = user_path_walk_it(path, &nd, &it);
+       if (error)
+               goto out;
+       inode = nd.dentry->d_inode;
+@@ -168,6 +172,7 @@ static inline long do_sys_truncate(const
+       put_write_access(inode);
+ dput_and_out:
++      intent_release(nd.dentry, &it);
+       path_release(&nd);
+ out:
+       return error;
+@@ -259,8 +264,9 @@ asmlinkage long sys_utime(char * filenam
+       struct nameidata nd;
+       struct inode * inode;
+       struct iattr newattrs;
++      struct lookup_intent it = { .it_op = IT_SETATTR };
+-      error = user_path_walk(filename, &nd);
++      error = user_path_walk_it(filename, &nd, &it);
+       if (error)
+               goto out;
+       inode = nd.dentry->d_inode;
+@@ -286,6 +292,7 @@ asmlinkage long sys_utime(char * filenam
+       }
+       error = notify_change(nd.dentry, &newattrs);
+ dput_and_out:
++      intent_release(nd.dentry, &it);
+       path_release(&nd);
+ out:
+       return error;
+@@ -303,8 +310,9 @@ asmlinkage long sys_utimes(char * filena
+       struct nameidata nd;
+       struct inode * inode;
+       struct iattr newattrs;
++      struct lookup_intent it = { .it_op = IT_SETATTR };
+-      error = user_path_walk(filename, &nd);
++      error = user_path_walk_it(filename, &nd, &it);
+       if (error)
+               goto out;
+@@ -331,6 +339,7 @@ asmlinkage long sys_utimes(char * filena
+       }
+       error = notify_change(nd.dentry, &newattrs);
+ dput_and_out:
++      intent_release(nd.dentry, &it);
+       path_release(&nd);
+ out:
+       return error;
+@@ -347,6 +356,7 @@ asmlinkage long sys_access(const char * 
+       int old_fsuid, old_fsgid;
+       kernel_cap_t old_cap;
+       int res;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
+               return -EINVAL;
+@@ -364,13 +374,14 @@ asmlinkage long sys_access(const char * 
+       else
+               current->cap_effective = current->cap_permitted;
+-      res = user_path_walk(filename, &nd);
++      res = user_path_walk_it(filename, &nd, &it);
+       if (!res) {
+               res = permission(nd.dentry->d_inode, mode);
+               /* SuS v2 requires we report a read only fs too */
+               if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
+                  && !special_file(nd.dentry->d_inode->i_mode))
+                       res = -EROFS;
++              intent_release(nd.dentry, &it);
+               path_release(&nd);
+       }
+@@ -386,6 +397,7 @@ asmlinkage long sys_chdir(const char * f
+       int error;
+       struct nameidata nd;
+       char *name;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       name = getname(filename);
+       error = PTR_ERR(name);
+@@ -394,7 +406,7 @@ asmlinkage long sys_chdir(const char * f
+       error = 0;
+       if (path_init(name,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd))
+-              error = path_walk(name, &nd);
++              error = path_walk_it(name, &nd, &it);
+       putname(name);
+       if (error)
+               goto out;
+@@ -406,6 +418,7 @@ asmlinkage long sys_chdir(const char * f
+       set_fs_pwd(current->fs, nd.mnt, nd.dentry);
+ dput_and_out:
++      intent_release(nd.dentry, &it);
+       path_release(&nd);
+ out:
+       return error;
+@@ -446,6 +459,7 @@ asmlinkage long sys_chroot(const char * 
+       int error;
+       struct nameidata nd;
+       char *name;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       name = getname(filename);
+       error = PTR_ERR(name);
+@@ -454,7 +468,7 @@ asmlinkage long sys_chroot(const char * 
+       path_init(name, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
+                     LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
+-      error = path_walk(name, &nd);   
++      error = path_walk_it(name, &nd, &it);
+       putname(name);
+       if (error)
+               goto out;
+@@ -471,6 +485,7 @@ asmlinkage long sys_chroot(const char * 
+       set_fs_altroot();
+       error = 0;
+ dput_and_out:
++      intent_release(nd.dentry, &it);
+       path_release(&nd);
+ out:
+       return error;
+@@ -515,8 +530,9 @@ asmlinkage long sys_chmod(const char * f
+       struct inode * inode;
+       int error;
+       struct iattr newattrs;
++      struct lookup_intent it = { .it_op = IT_SETATTR };
+-      error = user_path_walk(filename, &nd);
++      error = user_path_walk_it(filename, &nd, &it);
+       if (error)
+               goto out;
+       inode = nd.dentry->d_inode;
+@@ -536,6 +552,7 @@ asmlinkage long sys_chmod(const char * f
+       error = notify_change(nd.dentry, &newattrs);
+ dput_and_out:
++      intent_release(nd.dentry, &it);
+       path_release(&nd);
+ out:
+       return error;
+@@ -605,10 +622,12 @@ asmlinkage long sys_chown(const char * f
+ {
+       struct nameidata nd;
+       int error;
++      struct lookup_intent it = { .it_op = IT_SETATTR };
+-      error = user_path_walk(filename, &nd);
++      error = user_path_walk_it(filename, &nd, &it);
+       if (!error) {
+               error = chown_common(nd.dentry, user, group);
++              intent_release(nd.dentry, &it);
+               path_release(&nd);
+       }
+       return error;
+@@ -618,10 +637,12 @@ asmlinkage long sys_lchown(const char * 
+ {
+       struct nameidata nd;
+       int error;
++      struct lookup_intent it = { .it_op = IT_SETATTR };
+-      error = user_path_walk_link(filename, &nd);
++      error = user_path_walk_link_it(filename, &nd, &it);
+       if (!error) {
+               error = chown_common(nd.dentry, user, group);
++              intent_release(nd.dentry, &it);
+               path_release(&nd);
+       }
+       return error;
+@@ -655,10 +676,16 @@ asmlinkage long sys_fchown(unsigned int 
+  * for the internal routines (ie open_namei()/follow_link() etc). 00 is
+  * used by symlinks.
+  */
++extern int open_namei_it(const char *filename, int namei_flags, int mode,
++                       struct nameidata *nd, struct lookup_intent *it);
++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++                          int flags, struct lookup_intent *it);
++
+ struct file *filp_open(const char * filename, int flags, int mode)
+ {
+       int namei_flags, error;
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags };
+       namei_flags = flags;
+       if ((namei_flags+1) & O_ACCMODE)
+@@ -666,14 +693,15 @@ struct file *filp_open(const char * file
+       if (namei_flags & O_TRUNC)
+               namei_flags |= 2;
+-      error = open_namei(filename, namei_flags, mode, &nd);
+-      if (!error)
+-              return dentry_open(nd.dentry, nd.mnt, flags);
++      error = open_namei_it(filename, namei_flags, mode, &nd, &it);
++      if (error)
++              return ERR_PTR(error);
+-      return ERR_PTR(error);
++      return dentry_open_it(nd.dentry, nd.mnt, flags, &it);
+ }
+-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++                          int flags, struct lookup_intent *it)
+ {
+       struct file * f;
+       struct inode *inode;
+@@ -716,6 +744,7 @@ struct file *dentry_open(struct dentry *
+       }
+       f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
++      intent_release(dentry, it);
+       return f;
+ cleanup_all:
+@@ -730,11 +759,17 @@ cleanup_all:
+ cleanup_file:
+       put_filp(f);
+ cleanup_dentry:
++      intent_release(dentry, it);
+       dput(dentry);
+       mntput(mnt);
+       return ERR_PTR(error);
+ }
++struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++{
++      return dentry_open_it(dentry, mnt, flags, NULL);
++}
++
+ /*
+  * Find an empty file descriptor entry, and mark it busy.
+  */
+--- linux-2.4.19-hp2_pnnl2/fs/stat.c~vfs_intent_hp     Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/stat.c      Sun Jan 19 19:44:51 2003
+@@ -13,6 +13,7 @@
+ #include <asm/uaccess.h>
++extern void intent_release(struct dentry *de, struct lookup_intent *it);
+ /*
+  * Revalidate the inode. This is required for proper NFS attribute caching.
+  */
+@@ -135,13 +136,15 @@ static int cp_new_stat(struct inode * in
+ asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int error;
+-      error = user_path_walk(filename, &nd);
++      error = user_path_walk_it(filename, &nd, &it);
+       if (!error) {
+               error = do_revalidate(nd.dentry);
+               if (!error)
+                       error = cp_old_stat(nd.dentry->d_inode, statbuf);
++              intent_release(nd.dentry, &it);
+               path_release(&nd);
+       }
+       return error;
+@@ -151,13 +154,15 @@ asmlinkage long sys_stat(char * filename
+ asmlinkage long sys_newstat(char * filename, struct stat * statbuf)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int error;
+-      error = user_path_walk(filename, &nd);
++      error = user_path_walk_it(filename, &nd, &it);
+       if (!error) {
+               error = do_revalidate(nd.dentry);
+               if (!error)
+                       error = cp_new_stat(nd.dentry->d_inode, statbuf);
++              intent_release(nd.dentry, &it);
+               path_release(&nd);
+       }
+       return error;
+@@ -172,13 +177,15 @@ asmlinkage long sys_newstat(char * filen
+ asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int error;
+-      error = user_path_walk_link(filename, &nd);
++      error = user_path_walk_link_it(filename, &nd, &it);
+       if (!error) {
+               error = do_revalidate(nd.dentry);
+               if (!error)
+                       error = cp_old_stat(nd.dentry->d_inode, statbuf);
++              intent_release(nd.dentry, &it);
+               path_release(&nd);
+       }
+       return error;
+@@ -189,13 +196,15 @@ asmlinkage long sys_lstat(char * filenam
+ asmlinkage long sys_newlstat(char * filename, struct stat * statbuf)
+ {
+       struct nameidata nd;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+       int error;
+-      error = user_path_walk_link(filename, &nd);
++      error = user_path_walk_link_it(filename, &nd, &it);
+       if (!error) {
+               error = do_revalidate(nd.dentry);
+               if (!error)
+                       error = cp_new_stat(nd.dentry->d_inode, statbuf);
++              intent_release(nd.dentry, &it);
+               path_release(&nd);
+       }
+       return error;
+@@ -247,11 +256,12 @@ asmlinkage long sys_readlink(const char 
+ {
+       struct nameidata nd;
+       int error;
++      struct lookup_intent it = { .it_op = IT_READLINK };
+       if (bufsiz <= 0)
+               return -EINVAL;
+-      error = user_path_walk_link(path, &nd);
++      error = user_path_walk_link_it(path, &nd, &it);
+       if (!error) {
+               struct inode * inode = nd.dentry->d_inode;
+@@ -261,6 +271,7 @@ asmlinkage long sys_readlink(const char 
+                       UPDATE_ATIME(inode);
+                       error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+               }
++              intent_release(nd.dentry, &it);
+               path_release(&nd);
+       }
+       return error;
+@@ -333,12 +344,14 @@ asmlinkage long sys_stat64(char * filena
+ {
+       struct nameidata nd;
+       int error;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+-      error = user_path_walk(filename, &nd);
++      error = user_path_walk_it(filename, &nd, &it);
+       if (!error) {
+               error = do_revalidate(nd.dentry);
+               if (!error)
+                       error = cp_new_stat64(nd.dentry->d_inode, statbuf);
++              intent_release(nd.dentry, &it);
+               path_release(&nd);
+       }
+       return error;
+@@ -348,12 +361,14 @@ asmlinkage long sys_lstat64(char * filen
+ {
+       struct nameidata nd;
+       int error;
++      struct lookup_intent it = { .it_op = IT_GETATTR };
+-      error = user_path_walk_link(filename, &nd);
++      error = user_path_walk_link_it(filename, &nd, &it);
+       if (!error) {
+               error = do_revalidate(nd.dentry);
+               if (!error)
+                       error = cp_new_stat64(nd.dentry->d_inode, statbuf);
++              intent_release(nd.dentry, &it);
+               path_release(&nd);
+       }
+       return error;
+--- linux-2.4.19-hp2_pnnl2/include/linux/dcache.h~vfs_intent_hp        Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/include/linux/dcache.h Sun Jan 19 19:04:48 2003
+@@ -6,6 +6,27 @@
+ #include <asm/atomic.h>
+ #include <linux/mount.h>
++#define IT_OPEN     (1)
++#define IT_CREAT    (1<<1)
++#define IT_READDIR  (1<<2)
++#define IT_GETATTR  (1<<3)
++#define IT_SETATTR  (1<<4)
++#define IT_TRUNC    (1<<5)
++#define IT_READLINK (1<<6)
++#define IT_LOOKUP   (1<<7)
++
++struct lookup_intent {
++      int it_op;
++      int it_mode;
++      int it_flags;
++      int it_disposition;
++      int it_status;
++      struct iattr *it_iattr;
++      __u64 it_lock_handle[2];
++      int it_lock_mode;
++      void *it_data;
++};
++
+ /*
+  * linux/include/linux/dcache.h
+  *
+@@ -78,6 +106,7 @@ struct dentry {
+       unsigned long d_time;           /* used by d_revalidate */
+       struct dentry_operations  *d_op;
+       struct super_block * d_sb;      /* The root of the dentry tree */
++      struct lookup_intent *d_it;
+       unsigned long d_vfs_flags;
+       void * d_fsdata;                /* fs-specific data */
+       unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
+@@ -90,6 +119,8 @@ struct dentry_operations {
+       int (*d_delete)(struct dentry *);
+       void (*d_release)(struct dentry *);
+       void (*d_iput)(struct dentry *, struct inode *);
++      int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *);
++      void (*d_intent_release)(struct dentry *, struct lookup_intent *);
+ };
+ /* the dentry parameter passed to d_hash and d_compare is the parent
+@@ -124,6 +148,7 @@ d_iput:            no              no              yes
+                                        * s_nfsd_free_path semaphore will be down
+                                        */
+ #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
++#define DCACHE_LUSTRE_INVALID 0x0010  /* Lustre invalidated */
+ extern spinlock_t dcache_lock;
+--- linux-2.4.19-hp2_pnnl2/include/linux/fs.h~vfs_intent_hp    Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/include/linux/fs.h     Sun Jan 19 19:04:48 2003
+@@ -575,6 +575,7 @@ struct file {
+       /* needed for tty driver, and maybe others */
+       void                    *private_data;
++      struct lookup_intent    *f_intent;
+       /* preallocated helper kiobuf to speedup O_DIRECT */
+       struct kiobuf           *f_iobuf;
+@@ -815,7 +816,9 @@ extern int vfs_symlink(struct inode *, s
+ extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
+ extern int vfs_rmdir(struct inode *, struct dentry *);
+ extern int vfs_unlink(struct inode *, struct dentry *);
+-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
++int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++              struct inode *new_dir, struct dentry *new_dentry,
++              struct lookup_intent *it);
+ /*
+  * File types
+@@ -876,16 +879,28 @@ struct file_operations {
+ struct inode_operations {
+       int (*create) (struct inode *,struct dentry *,int);
+       struct dentry * (*lookup) (struct inode *,struct dentry *);
++      struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
+       int (*link) (struct dentry *,struct inode *,struct dentry *);
++      int (*link2) (struct inode *,struct inode *, const char *, int);
+       int (*unlink) (struct inode *,struct dentry *);
++      int (*unlink2) (struct inode *, const char *, int);
+       int (*symlink) (struct inode *,struct dentry *,const char *);
++      int (*symlink2) (struct inode *, const char *, int, const char *);
+       int (*mkdir) (struct inode *,struct dentry *,int);
++      int (*mkdir2) (struct inode *, const char *, int,int);
+       int (*rmdir) (struct inode *,struct dentry *);
++      int (*rmdir2) (struct inode *, const char *, int);
+       int (*mknod) (struct inode *,struct dentry *,int,int);
++      int (*mknod2) (struct inode *, const char *, int,int,int);
+       int (*rename) (struct inode *, struct dentry *,
+                       struct inode *, struct dentry *);
++      int (*rename2) (struct inode *, struct inode *,
++                      const char *oldname, int oldlen,
++                      const char *newname, int newlen);
+       int (*readlink) (struct dentry *, char *,int);
+       int (*follow_link) (struct dentry *, struct nameidata *);
++      int (*follow_link2) (struct dentry *, struct nameidata *,
++                           struct lookup_intent *it);
+       void (*truncate) (struct inode *);
+       int (*permission) (struct inode *, int);
+       int (*revalidate) (struct dentry *);
+@@ -1354,6 +1369,7 @@ typedef int (*read_actor_t)(read_descrip
+ extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
+ extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
++extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it));
+ extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
+ extern int FASTCALL(path_walk(const char *, struct nameidata *));
+ extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
+@@ -1364,6 +1380,8 @@ extern struct dentry * lookup_one_len(co
+ extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
+ #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
+ #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
++#define user_path_walk_it(name,nd,it)  __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it)
++#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it)
+ extern void inode_init_once(struct inode *);
+ extern void iput(struct inode *);
+@@ -1499,6 +1517,8 @@ extern struct file_operations generic_ro
+ extern int vfs_readlink(struct dentry *, char *, int, const char *);
+ extern int vfs_follow_link(struct nameidata *, const char *);
++extern int vfs_follow_link_it(struct nameidata *, const char *,
++                            struct lookup_intent *it);
+ extern int page_readlink(struct dentry *, char *, int);
+ extern int page_follow_link(struct dentry *, struct nameidata *);
+ extern struct inode_operations page_symlink_inode_operations;
+--- linux-2.4.19-hp2_pnnl2/kernel/ksyms.c~vfs_intent_hp        Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/kernel/ksyms.c Sun Jan 19 19:04:48 2003
+@@ -293,6 +293,7 @@ EXPORT_SYMBOL(read_cache_page);
+ EXPORT_SYMBOL(set_page_dirty);
+ EXPORT_SYMBOL(vfs_readlink);
+ EXPORT_SYMBOL(vfs_follow_link);
++EXPORT_SYMBOL(vfs_follow_link_it);
+ EXPORT_SYMBOL(page_readlink);
+ EXPORT_SYMBOL(page_follow_link);
+ EXPORT_SYMBOL(page_symlink_inode_operations);
+
+_
diff --git a/lustre/kernel_patches/pc/dev_read_only_hp.pc b/lustre/kernel_patches/pc/dev_read_only_hp.pc
new file mode 100644 (file)
index 0000000..4760ad1
--- /dev/null
@@ -0,0 +1,3 @@
+drivers/block/blkpg.c
+drivers/block/loop.c
+drivers/ide/ide-disk.c
diff --git a/lustre/kernel_patches/pc/exports_hp.pc b/lustre/kernel_patches/pc/exports_hp.pc
new file mode 100644 (file)
index 0000000..6472a11
--- /dev/null
@@ -0,0 +1,4 @@
+fs/ext3/Makefile
+fs/ext3/super.c
+include/linux/fs.h
+kernel/ksyms.c
diff --git a/lustre/kernel_patches/pc/invalidate_show.pc b/lustre/kernel_patches/pc/invalidate_show.pc
new file mode 100644 (file)
index 0000000..1f565ab
--- /dev/null
@@ -0,0 +1,5 @@
+fs/inode.c
+fs/block_dev.c
+fs/devfs/base.c
+fs/super.c
+include/linux/fs.h
diff --git a/lustre/kernel_patches/pc/iod-rmap-exports.pc b/lustre/kernel_patches/pc/iod-rmap-exports.pc
new file mode 100644 (file)
index 0000000..1218f55
--- /dev/null
@@ -0,0 +1,6 @@
+fs/inode.c
+fs/Makefile
+mm/filemap.c
+mm/vmscan.c
+mm/Makefile
+mm/page_alloc.c
diff --git a/lustre/kernel_patches/pc/jbd-transno-cb.pc b/lustre/kernel_patches/pc/jbd-transno-cb.pc
new file mode 100644 (file)
index 0000000..cde73d8
--- /dev/null
@@ -0,0 +1,4 @@
+fs/jbd/commit.c
+fs/jbd/journal.c
+fs/jbd/transaction.c
+include/linux/jbd.h
index 12f8816..a0a6297 100644 (file)
@@ -2,5 +2,4 @@ arch/i386/mm/init.c
 arch/ia64/mm/init.c
 include/linux/slab.h
 kernel/ksyms.c
-kernel/ksyms.c.validate
 mm/slab.c
diff --git a/lustre/kernel_patches/pc/kmem_cache_validate_hp.pc b/lustre/kernel_patches/pc/kmem_cache_validate_hp.pc
new file mode 100644 (file)
index 0000000..a0a6297
--- /dev/null
@@ -0,0 +1,5 @@
+arch/i386/mm/init.c
+arch/ia64/mm/init.c
+include/linux/slab.h
+kernel/ksyms.c
+mm/slab.c
index c1ed719..bb5c390 100644 (file)
@@ -6,12 +6,8 @@ drivers/block/loop.c
 drivers/ide/ide-disk.c
 fs/ext3/Makefile
 fs/ext3/super.c
-fs/jbd/commit.c
-fs/jbd/journal.c
-fs/jbd/transaction.c
 include/linux/blkdev.h
 include/linux/slab.h
-include/linux/jbd.h
 kernel/ksyms.c
 include/linux/dcache.h
 include/linux/fs.h
diff --git a/lustre/kernel_patches/pc/vfs_intent_hp.pc b/lustre/kernel_patches/pc/vfs_intent_hp.pc
new file mode 100644 (file)
index 0000000..881576c
--- /dev/null
@@ -0,0 +1,8 @@
+fs/dcache.c
+fs/namei.c
+fs/nfsd/vfs.c
+fs/open.c
+fs/stat.c
+include/linux/dcache.h
+include/linux/fs.h
+kernel/ksyms.c
index b35612f..913ae18 100644 (file)
@@ -2,4 +2,6 @@ dev_read_only.patch
 exports.patch
 kmem_cache_validate.patch
 lustre_version.patch
-vfs_intent.patch
+vfs_intent-2.4.18-18.patch
+invalidate_show.patch
+iod-rmap-exports.patch
index d0171e0..6723ab6 100644 (file)
@@ -1 +1,7 @@
-patch-2.4.18-hp1_pnnl18.2.8qsnet
+dev_read_only_hp.patch
+exports_hp.patch
+kmem_cache_validate_hp.patch
+jbd-transno-cb.patch
+lustre_version.patch
+vfs_intent_hp.patch
+invalidate_show.patch
index ec72618..51a833f 100644 (file)
@@ -6,3 +6,5 @@ uml_check_get_page.patch
 uml_no_panic.patch
 vfs_intent-2.4.18-18.patch
 uml_compile_fixes.patch
+invalidate_show.patch
+iod-rmap-exports.patch
index 4c64ad2..2ba39f5 100644 (file)
@@ -6,3 +6,4 @@ uml_check_get_page.patch
 uml_no_panic.patch
 vfs_intent.patch
 uml_compile_fixes.patch
+invalidate_show.patch
index f868802..37cb65e 100644 (file)
@@ -1 +1,3 @@
-vanilla-2.4.19
+vanilla-2.4.19.patch
+jbd-transno-cb.patch
+invalidate_show.patch
index 010cdb7..00b991e 100644 (file)
@@ -1,3 +1,3 @@
 DESC
-(undescribed patch)
+Required kernel function exports for Lustre.
 EDESC
diff --git a/lustre/kernel_patches/txt/exports_hp.txt b/lustre/kernel_patches/txt/exports_hp.txt
new file mode 100644 (file)
index 0000000..00b991e
--- /dev/null
@@ -0,0 +1,3 @@
+DESC
+Required kernel function exports for Lustre.
+EDESC
diff --git a/lustre/kernel_patches/txt/invalidate_show.txt b/lustre/kernel_patches/txt/invalidate_show.txt
new file mode 100644 (file)
index 0000000..88f093a
--- /dev/null
@@ -0,0 +1,3 @@
+DESC
+Prints which inodes are busy at filesystem unmount time.
+EDESC
index 45e259a..4a5e662 100644 (file)
@@ -1,12 +1,8 @@
-series/rh-8.0 
-   redhat 2.4.18-14
-   redhat 2.4.18-17
+series/chaos
+   chaos-39
 series/rh-2.4.18-18
    redhat 2.4.18-18
-series/hp-pnnl              ** NOTE: equivalent to vanilla-2.4.18
-   linux-2.4.18-hp1_pnnl18
-   linux-2.4.18-hp1_pnnl19
-series/vanilla-2.4.18       ** Not officially supported
-   linux-2.4.18
+series/hp-pnnl              ** Note: functionally equivalent to 2.4.19 
+   linux-2.4.18-hp2_pnnl2
 series/vanilla-2.4.19       ** Not officially supported
    linux-2.4.19
index ed5051a..a76ff4a 100644 (file)
@@ -8,7 +8,7 @@ MODULE = ldlm
 modulefs_DATA = ldlm.o
 EXTRA_PROGRAMS = ldlm
 
-ldlm_SOURCES = l_lock.c ldlm_lock.c ldlm_resource.c ldlm_test.c ldlm_lockd.c \
+ldlm_SOURCES = l_lock.c ldlm_lock.c ldlm_resource.c ldlm_lockd.c \
 ldlm_extent.c ldlm_request.c
 
 include $(top_srcdir)/Rules
index ae1153f..5a84909 100644 (file)
@@ -67,10 +67,11 @@ static void policy_internal(struct list_head *queue, struct ldlm_extent *req_ex,
 }
 
 /* apply the internal policy by walking all the lists */
-int ldlm_extent_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
-                       void *req_cookie,
-                       ldlm_mode_t mode, int flags, void *data)
+int ldlm_extent_policy(struct ldlm_namespace *ns, struct ldlm_lock **lockp,
+                       void *req_cookie, ldlm_mode_t mode, int flags,
+                       void *data)
 {
+        struct ldlm_lock *lock = *lockp;
         struct ldlm_resource *res = lock->l_resource;
         struct ldlm_extent *req_ex = req_cookie;
         struct ldlm_extent new_ex;
index a1220ab..b1ba4ef 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
  *   Author: Peter Braam <braam@clusterfs.com>
  *   Author: Phil Schwan <phil@clusterfs.com>
  *
@@ -55,34 +55,18 @@ char *ldlm_it2str(int it)
                 return "creat";
         case (IT_OPEN | IT_CREAT):
                 return "open|creat";
-        case IT_MKDIR:
-                return "mkdir";
-        case IT_LINK:
-                return "link";
-        case IT_LINK2:
-                return "link2";
-        case IT_SYMLINK:
-                return "symlink";
-        case IT_UNLINK:
-                return "unlink";
-        case IT_RMDIR:
-                return "rmdir";
-        case IT_RENAME:
-                return "rename";
-        case IT_RENAME2:
-                return "rename2";
         case IT_READDIR:
                 return "readdir";
         case IT_GETATTR:
                 return "getattr";
+        case IT_TRUNC:
+                return "truncate";
         case IT_SETATTR:
                 return "setattr";
-        case IT_READLINK:
-                return "readlink";
-        case IT_MKNOD:
-                return "mknod";
         case IT_LOOKUP:
                 return "lookup";
+        case IT_UNLINK:
+                return "unlink";
         default:
                 CERROR("Unknown intent %d\n", it);
                 return "UNKNOWN";
@@ -101,7 +85,7 @@ ldlm_res_compat ldlm_res_compat_table[] = {
 
 static ldlm_res_policy ldlm_intent_policy_func;
 
-static int ldlm_plain_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+static int ldlm_plain_policy(struct ldlm_namespace *ns, struct ldlm_lock **lock,
                              void *req_cookie, ldlm_mode_t mode, int flags,
                              void *data)
 {
@@ -186,22 +170,26 @@ void ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
         EXIT;
 }
 
-/* Only called with strict == 0 by recovery, to mark in-use locks as
- * should-be-destroyed */
+/* This used to have a 'strict' flact, which recovery would use to mark an
+ * in-use lock as needing-to-die.  Lest I am ever tempted to put it back, I
+ * shall explain why it's gone: with the new hash table scheme, once you call
+ * ldlm_lock_destroy, you can never drop your final references on this lock.
+ * Because it's not in the hash table anymore.  -phil */
 void ldlm_lock_destroy(struct ldlm_lock *lock)
 {
         ENTRY;
         l_lock(&lock->l_resource->lr_namespace->ns_lock);
 
         if (!list_empty(&lock->l_children)) {
-                LDLM_DEBUG(lock, "still has children (%p)!",
+                LDLM_ERROR(lock, "still has children (%p)!",
                            lock->l_children.next);
                 ldlm_lock_dump(D_ERROR, lock);
                 LBUG();
         }
         if (lock->l_readers || lock->l_writers) {
-                LDLM_DEBUG(lock, "lock still has references");
-                ldlm_lock_dump(D_OTHER, lock);
+                LDLM_ERROR(lock, "lock still has references");
+                ldlm_lock_dump(D_ERROR, lock);
+                LBUG();
         }
 
         if (!list_empty(&lock->l_res_link)) {
@@ -238,7 +226,7 @@ void ldlm_lock_destroy(struct ldlm_lock *lock)
 /* this is called by portals_handle2object with the handle lock taken */
 static void lock_handle_addref(void *lock)
 {
-        ldlm_lock_get(lock);
+        LDLM_LOCK_GET((struct ldlm_lock *)lock);
 }
 
 /*
@@ -288,20 +276,20 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
 }
 
 int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
-                              __u64 new_resid[3])
+                              struct ldlm_res_id new_resid)
 {
         struct ldlm_resource *oldres = lock->l_resource;
         ENTRY;
 
         l_lock(&ns->ns_lock);
-        if (memcmp(new_resid, lock->l_resource->lr_name,
+        if (memcmp(&new_resid, &lock->l_resource->lr_name,
                    sizeof(lock->l_resource->lr_name)) == 0) {
                 /* Nothing to do */
                 l_unlock(&ns->ns_lock);
                 RETURN(0);
         }
 
-        LASSERT(new_resid[0] != 0);
+        LASSERT(new_resid.name[0] != 0);
 
         /* This function assumes that the lock isn't on any lists */
         LASSERT(list_empty(&lock->l_res_link));
@@ -326,12 +314,11 @@ int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
 
 void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
 {
-        //lockh->addr = (__u64)(unsigned long)lock;
         memset(&lockh->addr, 0x69, sizeof(lockh->addr));
         lockh->cookie = lock->l_handle.h_cookie;
 }
 
-/* if flags: atomically get the lock and set the flags. 
+/* if flags: atomically get the lock and set the flags.
  *           Return NULL if flag already set
  */
 
@@ -354,7 +341,7 @@ struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *handle, int flags)
         /* It's unlikely but possible that someone marked the lock as
          * destroyed after we did handle2object on it */
         if (lock->l_destroyed) {
-                CERROR("lock already destroyed: lock %p\n", lock);
+                CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
                 LDLM_LOCK_PUT(lock);
                 GOTO(out, retval);
         }
@@ -401,7 +388,8 @@ void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
 }
 
 static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
-                                   struct ldlm_lock *new)
+                                   struct ldlm_lock *new, 
+                                   void *data, int datalen)
 {
         struct ldlm_ast_work *w;
         ENTRY;
@@ -416,6 +404,8 @@ static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
                 GOTO(out, 0);
         }
 
+        w->w_data = data;
+        w->w_datalen = datalen;
         if (new) {
                 lock->l_flags |= LDLM_FL_AST_SENT;
                 w->w_blocking = 1;
@@ -424,7 +414,8 @@ static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
 
         w->w_lock = LDLM_LOCK_GET(lock);
         list_add(&w->w_list, lock->l_resource->lr_tmp);
-      out:
+        EXIT;
+ out:
         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
         return;
 }
@@ -454,20 +445,16 @@ void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
 
 /* Args: unlocked lock */
 int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
-                                    __u64 *res_id, int flags);
+                                    struct ldlm_res_id, int flags);
 
-void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
 {
-        struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
         struct ldlm_namespace *ns;
         ENTRY;
 
-        if (lock == NULL)
-                LBUG();
-
         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
         ns = lock->l_resource->lr_namespace;
-        l_lock(&lock->l_resource->lr_namespace->ns_lock);
+        l_lock(&ns->ns_lock);
         if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR) {
                 LASSERT(lock->l_readers > 0);
                 lock->l_readers--;
@@ -476,38 +463,71 @@ void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
                 lock->l_writers--;
         }
 
-        /* If we received a blocked AST and this was the last reference,
-         * run the callback. */
+        if (lock->l_flags & LDLM_FL_LOCAL &&
+            !lock->l_readers && !lock->l_writers) {
+                /* If this is a local lock on a server namespace and this was
+                 * the last reference, cancel the lock. */
+                CDEBUG(D_INFO, "forcing cancel of local lock\n");
+                lock->l_flags |= LDLM_FL_CBPENDING;
+        }
+
         if (!lock->l_readers && !lock->l_writers &&
             (lock->l_flags & LDLM_FL_CBPENDING)) {
-                if (!lock->l_resource->lr_namespace->ns_client &&
-                    lock->l_export)
+                /* If we received a blocked AST and this was the last reference,
+                 * run the callback. */
+                if (!ns->ns_client && lock->l_export)
                         CERROR("FL_CBPENDING set on non-local lock--just a "
                                "warning\n");
 
                 LDLM_DEBUG(lock, "final decref done on cbpending lock");
-                l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+                l_unlock(&ns->ns_lock);
 
                 /* FIXME: need a real 'desc' here */
                 lock->l_blocking_ast(lock, NULL, lock->l_data,
-                                     lock->l_data_len, LDLM_CB_BLOCKING);
+                                     LDLM_CB_BLOCKING);
         } else if (ns->ns_client && !lock->l_readers && !lock->l_writers) {
+                /* If this is a client-side namespace and this was the last
+                 * reference, put it on the LRU. */
                 LASSERT(list_empty(&lock->l_lru));
                 LASSERT(ns->ns_nr_unused >= 0);
                 list_add_tail(&lock->l_lru, &ns->ns_unused_list);
                 ns->ns_nr_unused++;
-                l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+                l_unlock(&ns->ns_lock);
                 ldlm_cancel_lru(ns);
         } else {
-                l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+                l_unlock(&ns->ns_lock);
         }
 
         LDLM_LOCK_PUT(lock);    /* matches the ldlm_lock_get in addref */
-        LDLM_LOCK_PUT(lock);    /* matches the handle2lock above */
 
         EXIT;
 }
 
+void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
+{
+        struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
+        LASSERT(lock != NULL);
+        ldlm_lock_decref_internal(lock, mode);
+        LDLM_LOCK_PUT(lock);
+}
+
+/* This will drop a lock reference and mark it for destruction, but will not
+ * necessarily cancel the lock before returning. */
+void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
+{
+        struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
+        ENTRY;
+
+        LASSERT(lock != NULL);
+
+        LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
+        l_lock(&lock->l_resource->lr_namespace->ns_lock);
+        lock->l_flags |= LDLM_FL_CBPENDING;
+        ldlm_lock_decref_internal(lock, mode);
+        l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+        LDLM_LOCK_PUT(lock);
+}
+
 static int ldlm_lock_compat_list(struct ldlm_lock *lock, int send_cbs,
                                  struct list_head *queue)
 {
@@ -537,7 +557,7 @@ static int ldlm_lock_compat_list(struct ldlm_lock *lock, int send_cbs,
                 if (send_cbs && child->l_blocking_ast != NULL) {
                         CDEBUG(D_OTHER, "lock %p incompatible; sending "
                                "blocking AST.\n", child);
-                        ldlm_add_ast_work_item(child, lock);
+                        ldlm_add_ast_work_item(child, lock, NULL, 0);
                 }
         }
 
@@ -562,9 +582,11 @@ static int ldlm_lock_compat(struct ldlm_lock *lock, int send_cbs)
 }
 
 /* NOTE: called by
-   - ldlm_handle_enqueuque - resource
-*/
-void ldlm_grant_lock(struct ldlm_lock *lock)
+ *  - ldlm_lock_enqueue
+ *  - ldlm_reprocess_queue
+ *  - ldlm_lock_convert
+ */
+void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen)
 {
         struct ldlm_resource *res = lock->l_resource;
         ENTRY;
@@ -576,17 +598,18 @@ void ldlm_grant_lock(struct ldlm_lock *lock)
         if (lock->l_granted_mode < res->lr_most_restr)
                 res->lr_most_restr = lock->l_granted_mode;
 
-        if (lock->l_completion_ast) {
-                ldlm_add_ast_work_item(lock, NULL);
-        }
+        if (lock->l_completion_ast != NULL)
+                ldlm_add_ast_work_item(lock, NULL, data, datalen);
+
         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
         EXIT;
 }
 
-/* returns a referenced lock or NULL */
+/* returns a referenced lock or NULL.  See the flag descriptions below, in the
+ * comment above ldlm_lock_match */
 static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
                                       struct ldlm_extent *extent,
-                                      struct ldlm_lock *old_lock)
+                                      struct ldlm_lock *old_lock, int flags)
 {
         struct ldlm_lock *lock;
         struct list_head *tmp;
@@ -595,7 +618,7 @@ static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
 
                 if (lock == old_lock)
-                        continue;
+                        break;
 
                 if (lock->l_flags & LDLM_FL_CBPENDING)
                         continue;
@@ -611,6 +634,10 @@ static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
                 if (lock->l_destroyed)
                         continue;
 
+                if ((flags & LDLM_FL_LOCAL_ONLY) &&
+                    !(lock->l_flags & LDLM_FL_LOCAL))
+                        continue;
+
                 ldlm_lock_addref_internal(lock, mode);
                 return lock;
         }
@@ -625,12 +652,17 @@ static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
  *
  * Otherwise, all of the fields must be filled in, to match against.
  *
+ * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the
+ *     server (ie, connh is NULL)
+ * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted
+ *     list will be considered
+ *
  * Returns 1 if it finds an already-existing lock that is compatible; in this
  * case, lockh is filled in with a addref()ed lock
  */
-int ldlm_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type,
-                    void *cookie, int cookielen, ldlm_mode_t mode,
-                    struct lustre_handle *lockh)
+int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
+                    struct ldlm_res_id *res_id, __u32 type, void *cookie,
+                    int cookielen, ldlm_mode_t mode,struct lustre_handle *lockh)
 {
         struct ldlm_resource *res;
         struct ldlm_lock *lock, *old_lock = NULL;
@@ -642,12 +674,12 @@ int ldlm_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type,
                 LASSERT(old_lock);
 
                 ns = old_lock->l_resource->lr_namespace;
-                res_id = old_lock->l_resource->lr_name;
+                res_id = &old_lock->l_resource->lr_name;
                 type = old_lock->l_resource->lr_type;
                 mode = old_lock->l_req_mode;
         }
 
-        res = ldlm_resource_get(ns, NULL, res_id, type, 0);
+        res = ldlm_resource_get(ns, NULL, *res_id, type, 0);
         if (res == NULL) {
                 LASSERT(old_lock == NULL);
                 RETURN(0);
@@ -655,11 +687,16 @@ int ldlm_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type,
 
         l_lock(&ns->ns_lock);
 
-        if ((lock = search_queue(&res->lr_granted, mode, cookie, old_lock)))
+        lock = search_queue(&res->lr_granted, mode, cookie, old_lock, flags);
+        if (lock != NULL)
                 GOTO(out, rc = 1);
-        if ((lock = search_queue(&res->lr_converting, mode, cookie, old_lock)))
+        if (flags & LDLM_FL_BLOCK_GRANTED)
+                GOTO(out, rc = 0);
+        lock = search_queue(&res->lr_converting, mode, cookie, old_lock, flags);
+        if (lock != NULL)
                 GOTO(out, rc = 1);
-        if ((lock = search_queue(&res->lr_waiting, mode, cookie, old_lock)))
+        lock = search_queue(&res->lr_waiting, mode, cookie, old_lock, flags);
+        if (lock != NULL)
                 GOTO(out, rc = 1);
 
         EXIT;
@@ -670,7 +707,7 @@ int ldlm_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type,
         if (lock) {
                 ldlm_lock2handle(lock, lockh);
                 if (lock->l_completion_ast)
-                        lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC);
+                        lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, NULL);
         }
         if (rc)
                 LDLM_DEBUG(lock, "matched");
@@ -686,11 +723,12 @@ int ldlm_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type,
 /* Returns a referenced lock */
 struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
                                    struct lustre_handle *parent_lock_handle,
-                                   __u64 * res_id, __u32 type,
-                                   ldlm_mode_t mode, void *data, __u32 data_len)
+                                   struct ldlm_res_id res_id, __u32 type,
+                                   ldlm_mode_t mode, void *data, void *cp_data)
 {
         struct ldlm_resource *res, *parent_res = NULL;
         struct ldlm_lock *lock, *parent_lock = NULL;
+        ENTRY;
 
         if (parent_lock_handle) {
                 parent_lock = ldlm_handle2lock(parent_lock_handle);
@@ -712,19 +750,20 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
 
         lock->l_req_mode = mode;
         lock->l_data = data;
-        lock->l_data_len = data_len;
+        lock->l_cp_data = cp_data;
 
-        return lock;
+        RETURN(lock);
 }
 
 ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
-                               struct ldlm_lock *lock,
+                               struct ldlm_lock **lockp,
                                void *cookie, int cookie_len,
                                int *flags,
                                ldlm_completion_callback completion,
                                ldlm_blocking_callback blocking)
 {
         struct ldlm_resource *res;
+        struct ldlm_lock *lock = *lockp;
         int local;
         ldlm_res_policy policy;
         ENTRY;
@@ -740,11 +779,18 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
         if (!local && !(*flags & LDLM_FL_REPLAY) &&
             (policy = ldlm_res_policy_table[res->lr_type])) {
                 int rc;
-                rc = policy(ns, lock, cookie, lock->l_req_mode, *flags, NULL);
-
+                rc = policy(ns, lockp, cookie, lock->l_req_mode, *flags, NULL);
                 if (rc == ELDLM_LOCK_CHANGED) {
                         res = lock->l_resource;
                         *flags |= LDLM_FL_LOCK_CHANGED;
+                } else if (rc == ELDLM_LOCK_REPLACED) {
+                        /* The lock that was returned has already been granted,
+                         * and placed into lockp.  Destroy the old one and our
+                         * work here is done. */
+                        ldlm_lock_destroy(lock);
+                        LDLM_LOCK_PUT(lock);
+                        *flags |= LDLM_FL_LOCK_CHANGED;
+                        RETURN(0);
                 } else if (rc == ELDLM_LOCK_ABORTED) {
                         ldlm_lock_destroy(lock);
                         RETURN(rc);
@@ -756,8 +802,8 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
                 /* The server returned a blocked lock, but it was granted before
                  * we got a chance to actually enqueue it.  We don't need to do
                  * anything else. */
-                *flags &= ~(LDLM_FL_BLOCK_GRANTED | 
-                          LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
+                *flags &= ~(LDLM_FL_BLOCK_GRANTED |
+                            LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
                 GOTO(out, ELDLM_OK);
         }
 
@@ -775,23 +821,21 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
         ldlm_resource_unlink_lock(lock);
         if (local) {
                 if (*flags & LDLM_FL_BLOCK_CONV)
-                        ldlm_resource_add_lock(res, res->lr_converting.prev,
-                                               lock);
+                        ldlm_resource_add_lock(res, &res->lr_converting, lock);
                 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
-                        ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
+                        ldlm_resource_add_lock(res, &res->lr_waiting, lock);
                 else
-                        ldlm_grant_lock(lock);
+                        ldlm_grant_lock(lock, NULL, 0);
                 GOTO(out, ELDLM_OK);
         } else if (*flags & LDLM_FL_REPLAY) {
                 if (*flags & LDLM_FL_BLOCK_CONV) {
-                        ldlm_resource_add_lock(res, res->lr_converting.prev,
-                                               lock);
+                        ldlm_resource_add_lock(res, &res->lr_converting, lock);
                         GOTO(out, ELDLM_OK);
                 } else if (*flags & LDLM_FL_BLOCK_WAIT) {
-                        ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
+                        ldlm_resource_add_lock(res, &res->lr_waiting, lock);
                         GOTO(out, ELDLM_OK);
                 } else if (*flags & LDLM_FL_BLOCK_GRANTED) {
-                        ldlm_grant_lock(lock);
+                        ldlm_grant_lock(lock, NULL, 0);
                         GOTO(out, ELDLM_OK);
                 }
                 /* If no flags, fall through to normal enqueue path. */
@@ -799,22 +843,27 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
 
         /* FIXME: We may want to optimize by checking lr_most_restr */
         if (!list_empty(&res->lr_converting)) {
-                ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
+                ldlm_resource_add_lock(res, &res->lr_waiting, lock);
                 *flags |= LDLM_FL_BLOCK_CONV;
                 GOTO(out, ELDLM_OK);
         }
         if (!list_empty(&res->lr_waiting)) {
-                ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
+                ldlm_resource_add_lock(res, &res->lr_waiting, lock);
                 *flags |= LDLM_FL_BLOCK_WAIT;
                 GOTO(out, ELDLM_OK);
         }
         if (!ldlm_lock_compat(lock, 0)) {
-                ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
+                ldlm_resource_add_lock(res, &res->lr_waiting, lock);
                 *flags |= LDLM_FL_BLOCK_GRANTED;
                 GOTO(out, ELDLM_OK);
         }
 
-        ldlm_grant_lock(lock);
+        if (lock->l_granted_cb != NULL && lock->l_data != NULL) {
+                /* We just -know- */
+                struct ptlrpc_request *req = lock->l_data;
+                lock->l_granted_cb(lock, req->rq_repmsg, 0);
+        }
+        ldlm_grant_lock(lock, NULL, 0);
         EXIT;
       out:
         l_unlock(&ns->ns_lock);
@@ -841,7 +890,7 @@ static int ldlm_reprocess_queue(struct ldlm_resource *res,
                         RETURN(1);
 
                 list_del_init(&pending->l_res_link);
-                ldlm_grant_lock(pending);
+                ldlm_grant_lock(pending, NULL, 0);
         }
 
         RETURN(0);
@@ -860,9 +909,10 @@ int ldlm_run_ast_work(struct list_head *rpc_list)
                 if (w->w_blocking)
                         rc = w->w_lock->l_blocking_ast
                                 (w->w_lock, &w->w_desc, w->w_data,
-                                 w->w_datalen, LDLM_CB_BLOCKING);
+                                 LDLM_CB_BLOCKING);
                 else
-                        rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags);
+                        rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags,
+                                                         w->w_data);
                 if (rc == -ERESTART)
                         retval = rc;
                 else if (rc)
@@ -886,7 +936,6 @@ void ldlm_reprocess_all_ns(struct ldlm_namespace *ns)
         (void)ldlm_namespace_foreach_res(ns, reprocess_one_queue, NULL);
 }
 
-/* Must be called with resource->lr_lock not taken. */
 void ldlm_reprocess_all(struct ldlm_resource *res)
 {
         struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
@@ -923,7 +972,6 @@ void ldlm_cancel_callback(struct ldlm_lock *lock)
                 lock->l_flags |= LDLM_FL_CANCEL;
                 if (lock->l_blocking_ast)
                         lock->l_blocking_ast(lock, NULL, lock->l_data,
-                                             lock->l_data_len,
                                              LDLM_CB_CANCELING);
                 else
                         LDLM_DEBUG(lock, "no blocking ast");
@@ -937,6 +985,8 @@ void ldlm_lock_cancel(struct ldlm_lock *lock)
         struct ldlm_namespace *ns;
         ENTRY;
 
+        ldlm_del_waiting_lock(lock);
+
         res = lock->l_resource;
         ns = res->lr_namespace;
 
@@ -951,14 +1001,13 @@ void ldlm_lock_cancel(struct ldlm_lock *lock)
 
         ldlm_cancel_callback(lock);
 
-        ldlm_del_waiting_lock(lock);
         ldlm_resource_unlink_lock(lock);
         ldlm_lock_destroy(lock);
         l_unlock(&ns->ns_lock);
         EXIT;
 }
 
-int ldlm_lock_set_data(struct lustre_handle *lockh, void *data, int datalen)
+int ldlm_lock_set_data(struct lustre_handle *lockh, void *data, void *cp_data)
 {
         struct ldlm_lock *lock = ldlm_handle2lock(lockh);
         ENTRY;
@@ -967,16 +1016,18 @@ int ldlm_lock_set_data(struct lustre_handle *lockh, void *data, int datalen)
                 RETURN(-EINVAL);
 
         lock->l_data = data;
-        lock->l_data_len = datalen;
+        lock->l_cp_data = cp_data;
 
         LDLM_LOCK_PUT(lock);
 
         RETURN(0);
 }
 
+/* This function is only called from one thread (per export); no locking around
+ * the list ops needed */
 void ldlm_cancel_locks_for_export(struct obd_export *exp)
 {
-        struct list_head *iter, *n; /* MUST BE CALLED "n"! */
+        struct list_head *iter, *n;
 
         list_for_each_safe(iter, n, &exp->exp_ldlm_data.led_held_locks) {
                 struct ldlm_lock *lock;
@@ -999,6 +1050,8 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
         int granted = 0;
         ENTRY;
 
+        LBUG();
+
         res = lock->l_resource;
         ns = res->lr_namespace;
 
@@ -1009,26 +1062,25 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
 
         /* If this is a local resource, put it on the appropriate list. */
         if (res->lr_namespace->ns_client) {
-                if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED))
-                        ldlm_resource_add_lock(res, res->lr_converting.prev,
-                                               lock);
-                else {
+                if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED)) {
+                        ldlm_resource_add_lock(res, &res->lr_converting, lock);
+                } else {
                         /* This should never happen, because of the way the
                          * server handles conversions. */
                         LBUG();
 
                         res->lr_tmp = &rpc_list;
-                        ldlm_grant_lock(lock);
+                        ldlm_grant_lock(lock, NULL, 0);
                         res->lr_tmp = NULL;
                         granted = 1;
                         /* FIXME: completion handling not with ns_lock held ! */
                         if (lock->l_completion_ast)
-                                lock->l_completion_ast(lock, 0);
+                                lock->l_completion_ast(lock, 0, NULL);
                 }
         } else {
                 /* FIXME: We should try the conversion right away and possibly
                  * return success without the need for an extra AST */
-                ldlm_resource_add_lock(res, res->lr_converting.prev, lock);
+                ldlm_resource_add_lock(res, &res->lr_converting, lock);
                 *flags |= LDLM_FL_BLOCK_CONV;
         }
 
@@ -1043,7 +1095,7 @@ void ldlm_lock_dump(int level, struct ldlm_lock *lock)
 {
         char ver[128];
 
-        if (!(portal_debug & level))
+        if (!((portal_debug | D_ERROR) & level))
                 return;
 
         if (RES_VERSION_SIZE != 4)
@@ -1058,7 +1110,8 @@ void ldlm_lock_dump(int level, struct ldlm_lock *lock)
                  lock->l_version[0], lock->l_version[1],
                  lock->l_version[2], lock->l_version[3]);
 
-        CDEBUG(level, "  -- Lock dump: %p (%s)\n", lock, ver);
+        CDEBUG(level, "  -- Lock dump: %p (%s) (rc: %d)\n", lock, ver,
+               atomic_read(&lock->l_refc));
         if (lock->l_export && lock->l_export->exp_connection)
                 CDEBUG(level, "  Node: NID %x (rhandle: "LPX64")\n",
                        lock->l_export->exp_connection->c_peer.peer_nid,
@@ -1067,7 +1120,7 @@ void ldlm_lock_dump(int level, struct ldlm_lock *lock)
                 CDEBUG(level, "  Node: local\n");
         CDEBUG(level, "  Parent: %p\n", lock->l_parent);
         CDEBUG(level, "  Resource: %p ("LPD64")\n", lock->l_resource,
-               lock->l_resource->lr_name[0]);
+               lock->l_resource->lr_name.name[0]);
         CDEBUG(level, "  Requested mode: %d, granted mode: %d\n",
                (int)lock->l_req_mode, (int)lock->l_granted_mode);
         CDEBUG(level, "  Readers: %u ; Writers; %u\n",
index d826db1..803e59d 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
  *   Author: Peter Braam <braam@clusterfs.com>
  *   Author: Phil Schwan <phil@clusterfs.com>
  *
@@ -42,6 +42,7 @@ inline unsigned long round_timeout(unsigned long timeout)
         return ((timeout / HZ) + 1) * HZ;
 }
 
+/* XXX should this be per-ldlm? */
 static struct list_head waiting_locks_list;
 static spinlock_t waiting_locks_spinlock;
 static struct timer_list waiting_locks_timer;
@@ -129,9 +130,9 @@ int ldlm_del_waiting_lock(struct ldlm_lock *lock)
         RETURN(1);
 }
 
-static int ldlm_server_blocking_ast(struct ldlm_lock *lock,
-                                    struct ldlm_lock_desc *desc,
-                                    void *data, __u32 data_len, int flag)
+int ldlm_server_blocking_ast(struct ldlm_lock *lock,
+                             struct ldlm_lock_desc *desc,
+                             void *data, int flag)
 {
         struct ldlm_request *body;
         struct ptlrpc_request *req;
@@ -146,6 +147,13 @@ static int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         LASSERT(lock);
 
         l_lock(&lock->l_resource->lr_namespace->ns_lock);
+        /* XXX This is necessary because, with the lock re-tasking, we actually
+         * _can_ get called in here twice.  (bug 830) */
+        if (!list_empty(&lock->l_pending_chain)) {
+                l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+                RETURN(0);
+        }
+
         if (lock->l_destroyed) {
                 /* What's the point? */
                 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
@@ -171,6 +179,7 @@ static int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         req->rq_level = LUSTRE_CONN_RECOVD;
         rc = ptlrpc_queue_wait(req);
         if (rc == -ETIMEDOUT || rc == -EINTR) {
+                ldlm_del_waiting_lock(lock);
                 ldlm_expired_completion_wait(lock);
         } else if (rc) {
                 CERROR("client returned %d from blocking AST for lock %p\n",
@@ -188,7 +197,7 @@ static int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         RETURN(rc);
 }
 
-static int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags)
+int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 {
         struct ldlm_request *body;
         struct ptlrpc_request *req;
@@ -217,6 +226,7 @@ static int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags)
         req->rq_level = LUSTRE_CONN_RECOVD;
         rc = ptlrpc_queue_wait(req);
         if (rc == -ETIMEDOUT || rc == -EINTR) {
+                ldlm_del_waiting_lock(lock);
                 ldlm_expired_completion_wait(lock);
         } else if (rc) {
                 CERROR("client returned %d from completion AST for lock %p\n",
@@ -233,7 +243,9 @@ static int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags)
         RETURN(rc);
 }
 
-int ldlm_handle_enqueue(struct ptlrpc_request *req)
+int ldlm_handle_enqueue(struct ptlrpc_request *req,
+                        ldlm_completion_callback completion_callback,
+                        ldlm_blocking_callback blocking_callback)
 {
         struct obd_device *obddev = req->rq_export->exp_obd;
         struct ldlm_reply *dlm_rep;
@@ -268,8 +280,7 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req)
                 }
         }
 
-        /* XXX notice that this lock has no callback data: of course the
-           export would be exactly what we may want to use here... */
+        /* The lock's callback data might be set in the policy function */
         lock = ldlm_lock_create(obddev->obd_namespace,
                                 &dlm_req->lock_handle2,
                                 dlm_req->lock_desc.l_resource.lr_name,
@@ -289,10 +300,9 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req)
                  &lock->l_export->exp_ldlm_data.led_held_locks);
         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
 
-        err = ldlm_lock_enqueue(obddev->obd_namespace, lock, cookie, cookielen,
-                                &flags, ldlm_server_completion_ast,
-                                ldlm_server_blocking_ast);
-        if (err != ELDLM_OK)
+        err = ldlm_lock_enqueue(obddev->obd_namespace, &lock, cookie, cookielen,
+                                &flags, completion_callback, blocking_callback);
+        if (err)
                 GOTO(out, err);
 
         dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
@@ -303,7 +313,7 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req)
                 memcpy(&dlm_rep->lock_extent, &lock->l_extent,
                        sizeof(lock->l_extent));
         if (dlm_rep->lock_flags & LDLM_FL_LOCK_CHANGED) {
-                memcpy(dlm_rep->lock_resource_name, lock->l_resource->lr_name,
+                memcpy(&dlm_rep->lock_resource_name, &lock->l_resource->lr_name,
                        sizeof(dlm_rep->lock_resource_name));
                 dlm_rep->lock_mode = lock->l_req_mode;
         }
@@ -315,6 +325,8 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req)
                            "(err=%d)", err);
         req->rq_status = err;
 
+        /* The LOCK_CHANGED code in ldlm_lock_enqueue depends on this
+         * ldlm_reprocess_all.  If this moves, revisit that code. -phil */
         if (lock) {
                 if (!err)
                         ldlm_reprocess_all(lock->l_resource);
@@ -384,9 +396,11 @@ int ldlm_handle_cancel(struct ptlrpc_request *req)
 
         lock = ldlm_handle2lock(&dlm_req->lock_handle1);
         if (!lock) {
-                LDLM_DEBUG_NOLOCK("server-side cancel handler stale lock (lock "
-                                  "%p)", (void *)(unsigned long)
-                                  dlm_req->lock_handle1.addr);
+                CERROR("received cancel for unknown lock cookie "LPX64"\n",
+                       dlm_req->lock_handle1.cookie);
+                LDLM_DEBUG_NOLOCK("server-side cancel handler stale lock "
+                                  "(cookie "LPU64")",
+                                  dlm_req->lock_handle1.cookie);
                 req->rq_status = ESTALE;
         } else {
                 LDLM_DEBUG(lock, "server-side cancel handler START");
@@ -442,8 +456,7 @@ static int ldlm_handle_bl_callback(struct ptlrpc_request *req,
                            "callback (%p)", lock->l_blocking_ast);
                 if (lock->l_blocking_ast != NULL) {
                         lock->l_blocking_ast(lock, &dlm_req->lock_desc,
-                                             lock->l_data, lock->l_data_len,
-                                             LDLM_CB_BLOCKING);
+                                             lock->l_data, LDLM_CB_BLOCKING);
                 }
         } else
                 LDLM_DEBUG(lock, "Lock still has references, will be"
@@ -487,15 +500,15 @@ static int ldlm_handle_cp_callback(struct ptlrpc_request *req,
                 memcpy(&lock->l_extent, &dlm_req->lock_desc.l_extent,
                        sizeof(lock->l_extent));
         ldlm_resource_unlink_lock(lock);
-        if (memcmp(dlm_req->lock_desc.l_resource.lr_name,
-                   lock->l_resource->lr_name,
-                   sizeof(__u64) * RES_NAME_SIZE) != 0) {
+        if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
+                   &lock->l_resource->lr_name,
+                   sizeof(lock->l_resource->lr_name)) != 0) {
                 ldlm_lock_change_resource(ns, lock,
                                          dlm_req->lock_desc.l_resource.lr_name);
                 LDLM_DEBUG(lock, "completion AST, new resource");
         }
         lock->l_resource->lr_tmp = &ast_list;
-        ldlm_grant_lock(lock);
+        ldlm_grant_lock(lock, req, sizeof(*req));
         lock->l_resource->lr_tmp = NULL;
         l_unlock(&ns->ns_lock);
         LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work");
@@ -618,6 +631,7 @@ static int ldlm_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
 {
         struct obd_device *obddev = class_conn2obd(conn);
         struct ptlrpc_connection *connection;
+        struct obd_uuid uuid = { "ldlm" };
         int err = 0;
         ENTRY;
 
@@ -630,14 +644,15 @@ static int ldlm_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
 
         OBD_ALLOC(obddev->u.ldlm.ldlm_client,
                   sizeof(*obddev->u.ldlm.ldlm_client));
-        connection = ptlrpc_uuid_to_connection("ldlm");
+        connection = ptlrpc_uuid_to_connection(&uuid);
         if (!connection)
                 CERROR("No LDLM UUID found: assuming ldlm is local.\n");
 
         switch (cmd) {
         case IOC_LDLM_TEST:
-                err = ldlm_test(obddev, conn);
-                CERROR("-- done err %d\n", err);
+                //err = ldlm_test(obddev, conn);
+                err = 0;
+                CERROR("-- NO TESTS WERE RUN done err %d\n", err);
                 GOTO(out, err);
         case IOC_LDLM_DUMP:
                 ldlm_dump_all_namespaces();
@@ -657,6 +672,7 @@ static int ldlm_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
 static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf)
 {
         struct ldlm_obd *ldlm = &obddev->u.ldlm;
+        struct obd_uuid uuid = {"self"};
         int rc, i;
         ENTRY;
 
@@ -670,7 +686,7 @@ static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf)
         ldlm->ldlm_cb_service =
                 ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE,
                                 LDLM_MAXREQSIZE, LDLM_CB_REQUEST_PORTAL,
-                                LDLM_CB_REPLY_PORTAL, "self",
+                                LDLM_CB_REPLY_PORTAL, &uuid,
                                 ldlm_callback_handler, "ldlm_cbd");
 
         if (!ldlm->ldlm_cb_service) {
@@ -681,7 +697,7 @@ static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf)
         ldlm->ldlm_cancel_service =
                 ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE,
                                 LDLM_MAXREQSIZE, LDLM_CANCEL_REQUEST_PORTAL,
-                                LDLM_CANCEL_REPLY_PORTAL, "self",
+                                LDLM_CANCEL_REPLY_PORTAL, &uuid,
                                 ldlm_cancel_handler, "ldlm_canceld");
 
         if (!ldlm->ldlm_cancel_service) {
@@ -755,7 +771,7 @@ static int ldlm_cleanup(struct obd_device *obddev)
 }
 
 static int ldlm_connect(struct lustre_handle *conn, struct obd_device *src,
-                        obd_uuid_t cluuid, struct recovd_obd *recovd,
+                        struct obd_uuid *cluuid, struct recovd_obd *recovd,
                         ptlrpc_recovery_cb_t recover)
 {
         return class_connect(conn, src, cluuid);
@@ -804,43 +820,63 @@ static void __exit ldlm_exit(void)
                 CERROR("couldn't free ldlm lock slab\n");
 }
 
-EXPORT_SYMBOL(ldlm_completion_ast);
-EXPORT_SYMBOL(ldlm_handle_enqueue);
-EXPORT_SYMBOL(ldlm_handle_cancel);
-EXPORT_SYMBOL(ldlm_handle_convert);
+/* ldlm_lock.c */
+EXPORT_SYMBOL(ldlm_lock2desc);
 EXPORT_SYMBOL(ldlm_register_intent);
 EXPORT_SYMBOL(ldlm_unregister_intent);
 EXPORT_SYMBOL(ldlm_lockname);
 EXPORT_SYMBOL(ldlm_typename);
-EXPORT_SYMBOL(__ldlm_handle2lock);
 EXPORT_SYMBOL(ldlm_lock2handle);
+EXPORT_SYMBOL(__ldlm_handle2lock);
 EXPORT_SYMBOL(ldlm_lock_put);
 EXPORT_SYMBOL(ldlm_lock_match);
+EXPORT_SYMBOL(ldlm_lock_cancel);
 EXPORT_SYMBOL(ldlm_lock_addref);
 EXPORT_SYMBOL(ldlm_lock_decref);
+EXPORT_SYMBOL(ldlm_lock_decref_and_cancel);
 EXPORT_SYMBOL(ldlm_lock_change_resource);
 EXPORT_SYMBOL(ldlm_lock_set_data);
+EXPORT_SYMBOL(ldlm_it2str);
+EXPORT_SYMBOL(ldlm_lock_dump);
+EXPORT_SYMBOL(ldlm_lock_dump_handle);
+EXPORT_SYMBOL(ldlm_cancel_locks_for_export);
+EXPORT_SYMBOL(ldlm_reprocess_all_ns);
+
+/* ldlm_request.c */
+EXPORT_SYMBOL(ldlm_completion_ast);
+EXPORT_SYMBOL(ldlm_expired_completion_wait);
 EXPORT_SYMBOL(ldlm_cli_convert);
 EXPORT_SYMBOL(ldlm_cli_enqueue);
 EXPORT_SYMBOL(ldlm_cli_cancel);
 EXPORT_SYMBOL(ldlm_cli_cancel_unused);
 EXPORT_SYMBOL(ldlm_match_or_enqueue);
-EXPORT_SYMBOL(ldlm_it2str);
+EXPORT_SYMBOL(ldlm_replay_locks);
+EXPORT_SYMBOL(ldlm_resource_foreach);
+EXPORT_SYMBOL(ldlm_namespace_foreach);
+EXPORT_SYMBOL(ldlm_namespace_foreach_res);
+
+/* ldlm_lockd.c */
+EXPORT_SYMBOL(ldlm_server_blocking_ast);
+EXPORT_SYMBOL(ldlm_server_completion_ast);
+EXPORT_SYMBOL(ldlm_handle_enqueue);
+EXPORT_SYMBOL(ldlm_handle_cancel);
+EXPORT_SYMBOL(ldlm_handle_convert);
+EXPORT_SYMBOL(ldlm_del_waiting_lock);
+
+#if 0
+/* ldlm_test.c */
 EXPORT_SYMBOL(ldlm_test);
 EXPORT_SYMBOL(ldlm_regression_start);
 EXPORT_SYMBOL(ldlm_regression_stop);
-EXPORT_SYMBOL(ldlm_lock_dump);
-EXPORT_SYMBOL(ldlm_lock_dump_handle);
+#endif
+
+/* ldlm_resource.c */
 EXPORT_SYMBOL(ldlm_namespace_new);
 EXPORT_SYMBOL(ldlm_namespace_cleanup);
 EXPORT_SYMBOL(ldlm_namespace_free);
 EXPORT_SYMBOL(ldlm_namespace_dump);
-EXPORT_SYMBOL(ldlm_cancel_locks_for_export);
-EXPORT_SYMBOL(ldlm_replay_locks);
-EXPORT_SYMBOL(ldlm_resource_foreach);
-EXPORT_SYMBOL(ldlm_reprocess_all_ns);
-EXPORT_SYMBOL(ldlm_namespace_foreach);
-EXPORT_SYMBOL(ldlm_namespace_foreach_res);
+
+/* l_lock.c */
 EXPORT_SYMBOL(l_lock);
 EXPORT_SYMBOL(l_unlock);
 
index b71dd20..44122f5 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -47,14 +47,14 @@ int ldlm_expired_completion_wait(void *data)
         else {
                 LDLM_DEBUG(lock, "timed out waiting for completion");
                 CERROR("lock %p timed out from %s\n", lock,
-                       conn->c_remote_uuid);
+                       conn->c_remote_uuid.uuid);
                 ldlm_lock_dump(D_ERROR, lock);
                 class_signal_connection_failure(conn);
         }
         RETURN(0);
 }
 
-int ldlm_completion_ast(struct ldlm_lock *lock, int flags)
+int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 {
         struct l_wait_info lwi =
                 LWI_TIMEOUT_INTR(obd_timeout * HZ, ldlm_expired_completion_wait,
@@ -102,7 +102,7 @@ int ldlm_completion_ast(struct ldlm_lock *lock, int flags)
 
 static int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
                                   struct lustre_handle *parent_lockh,
-                                  __u64 *res_id,
+                                  struct ldlm_res_id res_id,
                                   __u32 type,
                                   void *cookie, int cookielen,
                                   ldlm_mode_t mode,
@@ -110,7 +110,7 @@ static int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
                                   ldlm_completion_callback completion,
                                   ldlm_blocking_callback blocking,
                                   void *data,
-                                  __u32 data_len,
+                                  void *cp_data,
                                   struct lustre_handle *lockh)
 {
         struct ldlm_lock *lock;
@@ -122,17 +122,17 @@ static int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
                 LBUG();
         }
 
-        lock = ldlm_lock_create(ns, parent_lockh, res_id, type, mode, data,
-                                data_len);
+        lock = ldlm_lock_create(ns, parent_lockh, res_id, type, mode,
+                                data, cp_data);
         if (!lock)
                 GOTO(out_nolock, err = -ENOMEM);
         LDLM_DEBUG(lock, "client-side local enqueue handler, new lock created");
 
         ldlm_lock_addref_internal(lock, mode);
         ldlm_lock2handle(lock, lockh);
-        lock->l_connh = NULL;
+        lock->l_flags |= LDLM_FL_LOCAL;
 
-        err = ldlm_lock_enqueue(ns, lock, cookie, cookielen, flags, completion,
+        err = ldlm_lock_enqueue(ns, &lock, cookie, cookielen, flags, completion,
                                 blocking);
         if (err != ELDLM_OK)
                 GOTO(out, err);
@@ -140,13 +140,13 @@ static int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
         if (type == LDLM_EXTENT)
                 memcpy(cookie, &lock->l_extent, sizeof(lock->l_extent));
         if ((*flags) & LDLM_FL_LOCK_CHANGED)
-                memcpy(res_id, lock->l_resource->lr_name, sizeof(*res_id));
+                memcpy(&res_id, &lock->l_resource->lr_name, sizeof(res_id));
 
         LDLM_DEBUG_NOLOCK("client-side local enqueue handler END (lock %p)",
                           lock);
 
         if (lock->l_completion_ast)
-                lock->l_completion_ast(lock, *flags);
+                lock->l_completion_ast(lock, *flags, NULL);
 
         LDLM_DEBUG(lock, "client-side local enqueue END");
         EXIT;
@@ -160,7 +160,7 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                      struct ptlrpc_request *req,
                      struct ldlm_namespace *ns,
                      struct lustre_handle *parent_lock_handle,
-                     __u64 *res_id,
+                     struct ldlm_res_id res_id,
                      __u32 type,
                      void *cookie, int cookielen,
                      ldlm_mode_t mode,
@@ -168,7 +168,7 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                      ldlm_completion_callback completion,
                      ldlm_blocking_callback blocking,
                      void *data,
-                     __u32 data_len,
+                     void *cp_data,
                      struct lustre_handle *lockh)
 {
         struct ldlm_lock *lock;
@@ -180,11 +180,13 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
         is_replay = *flags & LDLM_FL_REPLAY;
         LASSERT(connh != NULL || !is_replay);
 
-        if (connh == NULL)
-                return ldlm_cli_enqueue_local(ns, parent_lock_handle, res_id,
-                                              type, cookie, cookielen, mode,
-                                              flags, completion, blocking, data,
-                                              data_len, lockh);
+        if (connh == NULL) {
+                rc = ldlm_cli_enqueue_local(ns, parent_lock_handle, res_id,
+                                            type, cookie, cookielen, mode,
+                                            flags, completion, blocking, data,
+                                            cp_data, lockh);
+                RETURN(rc);
+        }
 
         /* If we're replaying this lock, just check some invariants.
          * If we're creating a new lock, get everything all setup nice. */
@@ -194,9 +196,14 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                 LASSERT(connh == lock->l_connh);
         } else {
                 lock = ldlm_lock_create(ns, parent_lock_handle, res_id, type,
-                                        mode, data, data_len);
+                                        mode, data, cp_data);
                 if (lock == NULL)
                         GOTO(out_nolock, rc = -ENOMEM);
+                /* ugh.  I set this early (instead of waiting for _enqueue)
+                 * because the completion AST might arrive early, and we need
+                 * (in just this one case) to run the completion_cb even if it
+                 * arrives before the reply. */
+                lock->l_completion_ast = completion;
                 LDLM_DEBUG(lock, "client-side enqueue START");
                 /* for the local lock, add the reference */
                 ldlm_lock_addref_internal(lock, mode);
@@ -240,9 +247,12 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                 LASSERT(!is_replay);
                 LDLM_DEBUG(lock, "client-side enqueue END (%s)",
                            rc == ELDLM_LOCK_ABORTED ? "ABORTED" : "FAILED");
+                /* Set a flag to prevent us from sending a CANCEL (bug 407) */
+                l_lock(&ns->ns_lock);
+                lock->l_flags |= LDLM_FL_CANCELING;
+                l_unlock(&ns->ns_lock);
+
                 ldlm_lock_decref(lockh, mode);
-                /* FIXME: if we've already received a completion AST, this will
-                 * LBUG! */
                 ldlm_lock_destroy(lock);
                 GOTO(out_req, rc);
         }
@@ -276,12 +286,12 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
                         lock->l_req_mode = newmode;
                 }
 
-                if (reply->lock_resource_name[0] !=
-                    lock->l_resource->lr_name[0]) {
+                if (reply->lock_resource_name.name[0] !=
+                    lock->l_resource->lr_name.name[0]) {
                         CDEBUG(D_INFO, "remote intent success, locking %ld "
                                "instead of %ld\n",
-                               (long)reply->lock_resource_name[0],
-                               (long)lock->l_resource->lr_name[0]);
+                               (long)reply->lock_resource_name.name[0],
+                               (long)lock->l_resource->lr_name.name[0]);
 
                         ldlm_lock_change_resource(ns, lock,
                                                   reply->lock_resource_name);
@@ -294,10 +304,13 @@ int ldlm_cli_enqueue(struct lustre_handle *connh,
         }
 
         if (!is_replay) {
-                rc = ldlm_lock_enqueue(ns, lock, cookie, cookielen, flags,
+                l_lock(&ns->ns_lock);
+                lock->l_completion_ast = NULL;
+                rc = ldlm_lock_enqueue(ns, &lock, cookie, cookielen, flags,
                                        completion, blocking);
+                l_unlock(&ns->ns_lock);
                 if (lock->l_completion_ast)
-                        lock->l_completion_ast(lock, *flags);
+                        lock->l_completion_ast(lock, *flags, NULL);
         }
 
         LDLM_DEBUG(lock, "client-side enqueue END");
@@ -315,7 +328,7 @@ int ldlm_match_or_enqueue(struct lustre_handle *connh,
                           struct ptlrpc_request *req,
                           struct ldlm_namespace *ns,
                           struct lustre_handle *parent_lock_handle,
-                          __u64 *res_id,
+                          struct ldlm_res_id res_id,
                           __u32 type,
                           void *cookie, int cookielen,
                           ldlm_mode_t mode,
@@ -323,30 +336,39 @@ int ldlm_match_or_enqueue(struct lustre_handle *connh,
                           ldlm_completion_callback completion,
                           ldlm_blocking_callback blocking,
                           void *data,
-                          __u32 data_len,
+                          void *cp_data,
                           struct lustre_handle *lockh)
 {
         int rc;
         ENTRY;
-        rc = ldlm_lock_match(ns, res_id, type, cookie, cookielen, mode, lockh);
+        if (connh == NULL) {
+                /* Just to make sure that I understand things --phil */
+                LASSERT(*flags & LDLM_FL_LOCAL_ONLY);
+        }
+
+        LDLM_DEBUG_NOLOCK("resource "LPU64"/"LPU64, res_id.name[0],
+                          res_id.name[1]);
+        rc = ldlm_lock_match(ns, *flags, &res_id, type, cookie, cookielen, mode,
+                             lockh);
         if (rc == 0) {
-                rc = ldlm_cli_enqueue(connh, req, ns,
-                                      parent_lock_handle, res_id, type, cookie,
-                                      cookielen, mode, flags, completion,
-                                      blocking, data, data_len, lockh);
+                rc = ldlm_cli_enqueue(connh, req, ns, parent_lock_handle,
+                                      res_id, type, cookie, cookielen, mode,
+                                      flags, completion, blocking, data,
+                                      cp_data, lockh);
                 if (rc != ELDLM_OK)
                         CERROR("ldlm_cli_enqueue: err: %d\n", rc);
                 RETURN(rc);
-        } else
-                RETURN(0);
+        }
+        RETURN(0);
 }
 
 int ldlm_cli_replay_enqueue(struct ldlm_lock *lock)
 {
         struct lustre_handle lockh;
+        struct ldlm_res_id junk;
         int flags = LDLM_FL_REPLAY;
         ldlm_lock2handle(lock, &lockh);
-        return ldlm_cli_enqueue(lock->l_connh, NULL, NULL, NULL, NULL,
+        return ldlm_cli_enqueue(lock->l_connh, NULL, NULL, NULL, junk,
                                 lock->l_resource->lr_type, NULL, 0, -1, &flags,
                                 NULL, NULL, NULL, 0, &lockh);
 }
@@ -421,7 +443,7 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags)
         /* Go to sleep until the lock is granted. */
         /* FIXME: or cancelled. */
         if (lock->l_completion_ast)
-                lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC);
+                lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, NULL);
         EXIT;
  out:
         LDLM_LOCK_PUT(lock);
@@ -443,13 +465,22 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
                 RETURN(0);
 
         if (lock->l_connh) {
+                int local_only;
+
                 LDLM_DEBUG(lock, "client-side cancel");
                 /* Set this flag to prevent others from getting new references*/
                 l_lock(&lock->l_resource->lr_namespace->ns_lock);
                 lock->l_flags |= LDLM_FL_CBPENDING;
                 ldlm_cancel_callback(lock);
+                local_only = (lock->l_flags & LDLM_FL_LOCAL_ONLY);
                 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
 
+                if (local_only) {
+                        CDEBUG(D_INFO, "not sending request (at caller's "
+                               "instruction\n");
+                        goto local_cancel;
+                }
+
                 req = ptlrpc_prep_req(class_conn2cliimp(lock->l_connh),
                                       LDLM_CANCEL, 1, &size, NULL);
                 if (!req)
@@ -467,9 +498,14 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
 
                 rc = ptlrpc_queue_wait(req);
                 ptlrpc_req_finished(req);
+                if (rc == ESTALE) {
+                        CERROR("client/server out of sync\n");
+                        LBUG();
+                }
                 if (rc != ELDLM_OK)
-                        GOTO(out, rc);
-
+                        CERROR("Got rc %d from cancel RPC: canceling "
+                               "anyway\n", rc);
+        local_cancel:
                 ldlm_lock_cancel(lock);
         } else {
                 LDLM_DEBUG(lock, "client-side local cancel");
@@ -482,8 +518,6 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
                 LDLM_DEBUG(lock, "client-side local cancel handler END");
         }
 
-        lock->l_flags |= LDLM_FL_CANCELING;
-
         EXIT;
  out:
         LDLM_LOCK_PUT(lock);
@@ -549,7 +583,7 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns)
 }
 
 int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
-                                    __u64 *res_id, int flags)
+                                    struct ldlm_res_id res_id, int flags)
 {
         struct ldlm_resource *res;
         struct list_head *tmp, *next, list = LIST_HEAD_INIT(list);
@@ -559,7 +593,7 @@ int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
         res = ldlm_resource_get(ns, NULL, res_id, 0, 0);
         if (res == NULL) {
                 /* This is not a problem. */
-                CDEBUG(D_INFO, "No resource "LPU64"\n", res_id[0]);
+                CDEBUG(D_INFO, "No resource "LPU64"\n", res_id.name[0]);
                 RETURN(0);
         }
 
@@ -615,8 +649,8 @@ int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
  *
  * If 'local_only' is true, throw the locks away without trying to notify the
  * server. */
-int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, __u64 *res_id,
-                           int flags)
+int ldlm_cli_cancel_unused(struct ldlm_namespace *ns,
+                           struct ldlm_res_id *res_id, int flags)
 {
         int i;
         ENTRY;
@@ -625,7 +659,7 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, __u64 *res_id,
                 RETURN(ELDLM_OK);
 
         if (res_id)
-                RETURN(ldlm_cli_cancel_unused_resource(ns, res_id, flags));
+                RETURN(ldlm_cli_cancel_unused_resource(ns, *res_id, flags));
 
         l_lock(&ns->ns_lock);
         for (i = 0; i < RES_HASH_SIZE; i++) {
@@ -641,7 +675,7 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, __u64 *res_id,
 
                         if (rc)
                                 CERROR("cancel_unused_res ("LPU64"): %d\n",
-                                       res->lr_name[0], rc);
+                                       res->lr_name.name[0], rc);
                         ldlm_resource_putref(res);
                 }
         }
index e5960bd..9e757a6 100644 (file)
@@ -1,12 +1,24 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *   Author: Peter Braam <braam@clusterfs.com>
  *
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ *   This file is part of Lustre, http://www.lustre.org.
  *
- * by Cluster File Systems, Inc.
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define DEBUG_SUBSYSTEM S_LDLM
@@ -22,58 +34,59 @@ static struct proc_dir_entry *ldlm_ns_proc_dir = NULL;
 
 int ldlm_proc_setup(struct obd_device *obd)
 {
+        int rc;
         ENTRY;
         LASSERT(ldlm_ns_proc_dir == NULL);
-        ldlm_ns_proc_dir = obd->obd_type->typ_procroot;
+        rc = lprocfs_obd_attach(obd, 0);
+        if (rc) {
+                CERROR("LProcFS failed in ldlm-init\n");
+                RETURN(rc);
+        }
+        ldlm_ns_proc_dir = obd->obd_proc_entry;
         RETURN(0);
 }
 
 void ldlm_proc_cleanup(struct obd_device *obd)
 {
-        ldlm_ns_proc_dir = NULL;
+        if (ldlm_ns_proc_dir) {
+                lprocfs_obd_detach(obd);
+                ldlm_ns_proc_dir = NULL;
+        }
 }
 
 static int lprocfs_uint_rd(char *page, char **start, off_t off,
                            int count, int *eof, void *data)
 {
         unsigned int *temp = (unsigned int *)data;
-        int len;
-        len = snprintf(page, count, "%u\n", *temp);
-        return len;
+        return snprintf(page, count, "%u\n", *temp);
 }
 
-#define MAX_STRING_SIZE 100
+#define MAX_STRING_SIZE 128
 void ldlm_proc_namespace(struct ldlm_namespace *ns)
 {
         struct lprocfs_vars lock_vars[2];
-        char lock_names[MAX_STRING_SIZE + 1];
+        char lock_name[MAX_STRING_SIZE + 1];
+
+        lock_name[MAX_STRING_SIZE] = '\0';
 
         memset(lock_vars, 0, sizeof(lock_vars));
-        snprintf(lock_names, MAX_STRING_SIZE, "%s/resource_count", ns->ns_name);
-        lock_names[MAX_STRING_SIZE] = '\0';
-        lock_vars[0].name = lock_names;
-        lock_vars[0].read_fptr = lprocfs_ll_rd;
-        lock_vars[0].write_fptr = NULL;
+        lock_vars[0].read_fptr = lprocfs_rd_u64;
+
+        lock_vars[0].name = lock_name;
+
+        snprintf(lock_name, MAX_STRING_SIZE, "%s/resource_count", ns->ns_name);
+
         lock_vars[0].data = &ns->ns_resources;
         lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
 
-        memset(lock_vars, 0, sizeof(lock_vars));
-        snprintf(lock_names, MAX_STRING_SIZE, "%s/lock_count", ns->ns_name);
-        lock_names[MAX_STRING_SIZE] = '\0';
-        lock_vars[0].name = lock_names;
-        lock_vars[0].read_fptr = lprocfs_ll_rd;
-        lock_vars[0].write_fptr = NULL;
+        snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_count", ns->ns_name);
         lock_vars[0].data = &ns->ns_locks;
         lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
 
-        memset(lock_vars, 0, sizeof(lock_vars));
-        snprintf(lock_names, MAX_STRING_SIZE, "%s/lock_unused_count",
+        snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_unused_count",
                  ns->ns_name);
-        lock_names[MAX_STRING_SIZE] = '\0';
-        lock_vars[0].name = lock_names;
-        lock_vars[0].read_fptr = lprocfs_uint_rd;
-        lock_vars[0].write_fptr = NULL;
         lock_vars[0].data = &ns->ns_nr_unused;
+        lock_vars[0].read_fptr = lprocfs_uint_rd;
         lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
 }
 #undef MAX_STRING_SIZE
@@ -136,7 +149,9 @@ extern struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
 
 /* If 'local_only' is true, don't try to tell the server, just cleanup.
  * This is currently only used for recovery, and we make certain assumptions
- * as a result--notably, that we shouldn't cancel locks with refs. -phil */
+ * as a result--notably, that we shouldn't cancel locks with refs. -phil
+ *
+ * Called with the ns_lock held. */
 static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
                              int local_only)
 {
@@ -156,7 +171,9 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
                          * will go away ... */
                         lock->l_flags |= LDLM_FL_CBPENDING;
                         /* ... without sending a CANCEL message. */
-                        lock->l_flags |= LDLM_FL_CANCELING;
+                        lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+                        /* ... and without calling the cancellation callback */
+                        lock->l_flags |= LDLM_FL_CANCEL;
                         LDLM_LOCK_PUT(lock);
                         continue;
                 }
@@ -177,7 +194,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
                                 ldlm_lock_cancel(lock);
                 } else {
                         LDLM_DEBUG(lock, "Freeing a lock still held by a "
-                                   "client node.\n");
+                                   "client node");
 
                         ldlm_resource_unlink_lock(lock);
                         ldlm_lock_destroy(lock);
@@ -256,13 +273,13 @@ int ldlm_client_free(struct obd_export *exp)
         RETURN(0);
 }
 
-static __u32 ldlm_hash_fn(struct ldlm_resource *parent, __u64 *name)
+static __u32 ldlm_hash_fn(struct ldlm_resource *parent, struct ldlm_res_id name)
 {
         __u32 hash = 0;
         int i;
 
         for (i = 0; i < RES_NAME_SIZE; i++)
-                hash += name[i];
+                hash += name.name[i];
 
         hash += (__u32)((unsigned long)parent >> 4);
 
@@ -293,9 +310,9 @@ static struct ldlm_resource *ldlm_resource_new(void)
 
 /* Args: locked namespace
  * Returns: newly-allocated, referenced, unlocked resource */
-static struct ldlm_resource *ldlm_resource_add(struct ldlm_namespace *ns,
-                                               struct ldlm_resource *parent,
-                                               __u64 *name, __u32 type)
+static struct ldlm_resource *
+ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent,
+                  struct ldlm_res_id name, __u32 type)
 {
         struct list_head *bucket;
         struct ldlm_resource *res;
@@ -317,7 +334,7 @@ static struct ldlm_resource *ldlm_resource_add(struct ldlm_namespace *ns,
         spin_unlock(&ns->ns_counter_lock);
 
         l_lock(&ns->ns_lock);
-        memcpy(res->lr_name, name, sizeof(res->lr_name));
+        memcpy(&res->lr_name, &name, sizeof(res->lr_name));
         res->lr_namespace = ns;
         ns->ns_refcount++;
 
@@ -341,9 +358,9 @@ static struct ldlm_resource *ldlm_resource_add(struct ldlm_namespace *ns,
 /* Args: unlocked namespace
  * Locks: takes and releases ns->ns_lock and res->lr_lock
  * Returns: referenced, unlocked ldlm_resource or NULL */
-struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns,
-                                        struct ldlm_resource *parent,
-                                        __u64 *name, __u32 type, int create)
+struct ldlm_resource *
+ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
+                  struct ldlm_res_id name, __u32 type, int create)
 {
         struct list_head *bucket, *tmp;
         struct ldlm_resource *res = NULL;
@@ -358,7 +375,7 @@ struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns,
         list_for_each(tmp, bucket) {
                 res = list_entry(tmp, struct ldlm_resource, lr_hash);
 
-                if (memcmp(res->lr_name, name, sizeof(res->lr_name)) == 0) {
+                if (memcmp(&res->lr_name, &name, sizeof(res->lr_name)) == 0) {
                         ldlm_resource_getref(res);
                         l_unlock(&ns->ns_lock);
                         RETURN(res);
@@ -451,12 +468,17 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
         l_lock(&res->lr_namespace->ns_lock);
 
         ldlm_resource_dump(res);
-        CDEBUG(D_OTHER, "About to grant this lock:\n");
+        CDEBUG(D_OTHER, "About to add this lock:\n");
         ldlm_lock_dump(D_OTHER, lock);
 
+        if (lock->l_destroyed) {
+                CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
+                return;
+        }
+
         LASSERT(list_empty(&lock->l_res_link));
 
-        list_add(&lock->l_res_link, head);
+        list_add_tail(&lock->l_res_link, head);
         l_unlock(&res->lr_namespace->ns_lock);
 }
 
@@ -470,7 +492,7 @@ void ldlm_resource_unlink_lock(struct ldlm_lock *lock)
 void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc)
 {
         desc->lr_type = res->lr_type;
-        memcpy(desc->lr_name, res->lr_name, sizeof(desc->lr_name));
+        memcpy(&desc->lr_name, &res->lr_name, sizeof(desc->lr_name));
         memcpy(desc->lr_version, res->lr_version, sizeof(desc->lr_version));
 }
 
@@ -517,9 +539,9 @@ void ldlm_resource_dump(struct ldlm_resource *res)
                 LBUG();
 
         snprintf(name, sizeof(name), "%Lx %Lx %Lx",
-                 (unsigned long long)res->lr_name[0],
-                 (unsigned long long)res->lr_name[1],
-                 (unsigned long long)res->lr_name[2]);
+                 (unsigned long long)res->lr_name.name[0],
+                 (unsigned long long)res->lr_name.name[1],
+                 (unsigned long long)res->lr_name.name[2]);
 
         CDEBUG(D_OTHER, "--- Resource: %p (%s) (rc: %d)\n", res, name,
                atomic_read(&res->lr_refcount));
index b34c9ab..6cf1056 100644 (file)
@@ -75,7 +75,7 @@ static int ldlm_do_convert(void);
  */
 static int ldlm_test_blocking_ast(struct ldlm_lock *lock,
                                   struct ldlm_lock_desc *new,
-                                  void *data, __u32 data_len, int flag)
+                                  void *data, int flag)
 {
         int rc;
         struct lustre_handle lockh;
@@ -104,7 +104,7 @@ static int ldlm_test_blocking_ast(struct ldlm_lock *lock,
 /* blocking ast for basic tests. noop */
 static int ldlm_blocking_ast(struct ldlm_lock *lock,
                              struct ldlm_lock_desc *new,
-                             void *data, __u32 data_len, int flag)
+                             void *data, int flag)
 {
         ENTRY;
         CERROR("ldlm_blocking_ast: lock=%p, new=%p, flag=%d\n", lock, new,
@@ -115,7 +115,7 @@ static int ldlm_blocking_ast(struct ldlm_lock *lock,
 /* Completion ast for regression test.
  * Does not sleep when blocked.
  */
-static int ldlm_test_completion_ast(struct ldlm_lock *lock, int flags)
+static int ldlm_test_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 {
         struct ldlm_test_lock *lock_info;
         ENTRY;
@@ -159,7 +159,7 @@ int ldlm_test_basics(struct obd_device *obddev)
 {
         struct ldlm_namespace *ns;
         struct ldlm_resource *res;
-        __u64 res_id[RES_NAME_SIZE] = {1, 2, 3};
+        struct ldlm_res_id res_id = { .name = {1, 2, 3} };
         ldlm_error_t err;
         struct ldlm_lock *lock1, *lock;
         int flags;
@@ -207,7 +207,7 @@ int ldlm_test_extents(struct obd_device *obddev)
         struct ldlm_namespace *ns;
         struct ldlm_resource *res;
         struct ldlm_lock *lock, *lock1, *lock2;
-        __u64 res_id[RES_NAME_SIZE] = {0, 0, 0};
+        struct ldlm_res_id res_id = { .name = {0} };
         struct ldlm_extent ext1 = {4, 6}, ext2 = {6, 9}, ext3 = {10, 11};
         ldlm_error_t err;
         int flags;
@@ -275,8 +275,7 @@ int ldlm_test_extents(struct obd_device *obddev)
 static int ldlm_test_network(struct obd_device *obddev,
                              struct lustre_handle *connh)
 {
-
-        __u64 res_id[RES_NAME_SIZE] = {1, 2, 3};
+        struct ldlm_res_id res_id = { .name = {1, 2, 3} };
         struct ldlm_extent ext = {4, 6};
         struct lustre_handle lockh1;
         struct ldlm_lock *lock;
@@ -341,7 +340,7 @@ static int ldlm_do_decrement(void)
 static int ldlm_do_enqueue(struct ldlm_test_thread *thread)
 {
         struct lustre_handle lockh;
-        __u64 res_id[3] = {0};
+        struct ldlm_res_id res_id = { .name = {0} };
         __u32 lock_mode;
         struct ldlm_extent ext;
         unsigned char random;
@@ -350,7 +349,7 @@ static int ldlm_do_enqueue(struct ldlm_test_thread *thread)
 
         /* Pick a random resource from 1 to num_resources */
         get_random_bytes(&random, sizeof(random));
-        res_id[0] = random % num_resources;
+        res_id.name[0] = random % num_resources;
 
         /* Pick a random lock mode */
         get_random_bytes(&random, sizeof(random));
@@ -364,7 +363,7 @@ static int ldlm_do_enqueue(struct ldlm_test_thread *thread)
                 (num_extents - (int)ext.start) + ext.start;
 
         LDLM_DEBUG_NOLOCK("about to enqueue with resource "LPX64", mode %d,"
-                          " extent "LPX64" -> "LPX64, res_id[0], lock_mode,
+                          " extent "LPX64" -> "LPX64, res_id.name[0], lock_mode,
                           ext.start, ext.end);
 
         rc = ldlm_match_or_enqueue(&regress_connh, NULL,
index 5bf0d4a..122142b 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
  *   Author: Peter J. Braam <braam@clusterfs.com>
  *   Author: Phil Schwan <phil@clusterfs.com>
  *   Author: Mike Shaver <shaver@clusterfs.com>
@@ -40,7 +40,7 @@ struct client_obd *client_conn2cli(struct lustre_handle *conn)
         return &export->exp_obd->u.cli;
 }
 
-struct obd_device *client_tgtuuid2obd(char *tgtuuid)
+struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid)
 {
         int i;
 
@@ -49,8 +49,8 @@ struct obd_device *client_tgtuuid2obd(char *tgtuuid)
                 if ((strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0) ||
                     (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0)) {
                         struct client_obd *cli = &obd->u.cli;
-                        if (strncmp(tgtuuid, cli->cl_target_uuid,
-                                    sizeof(cli->cl_target_uuid)) == 0)
+                        if (strncmp(tgtuuid->uuid, cli->cl_target_uuid.uuid,
+                                    sizeof(cli->cl_target_uuid.uuid)) == 0)
                                 return obd;
                 }
         }
@@ -65,7 +65,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
         char *name;
         struct client_obd *cli = &obddev->u.cli;
         struct obd_import *imp = &cli->cl_import;
-        obd_uuid_t server_uuid;
+        struct obd_uuid server_uuid;
         ENTRY;
 
         if (obddev->obd_type->typ_ops->o_brw) {
@@ -100,11 +100,11 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
 
         sema_init(&cli->cl_sem, 1);
         cli->cl_conn_count = 0;
-        memcpy(cli->cl_target_uuid, data->ioc_inlbuf1, data->ioc_inllen1);
-        memcpy(server_uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2,
+        memcpy(cli->cl_target_uuid.uuid, data->ioc_inlbuf1, data->ioc_inllen1);
+        memcpy(server_uuid.uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2,
                                                    sizeof(server_uuid)));
 
-        imp->imp_connection = ptlrpc_uuid_to_connection(server_uuid);
+        imp->imp_connection = ptlrpc_uuid_to_connection(&server_uuid);
         if (!imp->imp_connection)
                 RETURN(-ENOENT);
 
@@ -134,17 +134,18 @@ int client_obd_cleanup(struct obd_device * obddev)
 }
 
 int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd,
-                       obd_uuid_t cluuid, struct recovd_obd *recovd,
+                       struct obd_uuid *cluuid, struct recovd_obd *recovd,
                        ptlrpc_recovery_cb_t recover)
 {
         struct client_obd *cli = &obd->u.cli;
         struct ptlrpc_request *request;
         int rc, size[] = {sizeof(cli->cl_target_uuid),
                           sizeof(obd->obd_uuid) };
-        char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid};
+        char *tmp[] = {cli->cl_target_uuid.uuid, obd->obd_uuid.uuid};
         int rq_opc = (obd->obd_type->typ_ops->o_brw) ? OST_CONNECT :MDS_CONNECT;
         struct ptlrpc_connection *c;
         struct obd_import *imp = &cli->cl_import;
+        int msg_flags;
 
         ENTRY;
         down(&cli->cl_sem);
@@ -166,7 +167,6 @@ int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd,
         INIT_LIST_HEAD(&imp->imp_chain);
         imp->imp_last_xid = 0;
         imp->imp_max_transno = 0;
-        imp->imp_peer_last_xid = 0;
         imp->imp_peer_committed_transno = 0;
 
         request = ptlrpc_prep_req(&cli->cl_import, rq_opc, 2, size, tmp);
@@ -187,8 +187,11 @@ int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd,
         if (rc)
                 GOTO(out_req, rc);
 
-        if (rq_opc == MDS_CONNECT)
+        msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
+        if (rq_opc == MDS_CONNECT || msg_flags & MSG_CONNECT_REPLAYABLE) {
                 imp->imp_flags |= IMP_REPLAYABLE;
+                CDEBUG(D_HA, "connected to replayable target: %s\n", cli->cl_target_uuid.uuid);
+        }
         imp->imp_level = LUSTRE_CONN_FULL;
         imp->imp_handle.addr = request->rq_repmsg->addr;
         imp->imp_handle.cookie = request->rq_repmsg->cookie;
@@ -248,10 +251,12 @@ int client_obd_disconnect(struct lustre_handle *conn)
         if (cli->cl_conn_count)
                 GOTO(out_no_disconnect, rc = 0);
 
-        ldlm_namespace_free(obd->obd_namespace);
-        obd->obd_namespace = NULL;
-        request = ptlrpc_prep_req(&cli->cl_import, rq_opc, 0, NULL,
-                                  NULL);
+        if (obd->obd_namespace != NULL) {
+                ldlm_cli_cancel_unused(obd->obd_namespace, NULL, 0);
+                ldlm_namespace_free(obd->obd_namespace);
+                obd->obd_namespace = NULL;
+        }
+        request = ptlrpc_prep_req(&cli->cl_import, rq_opc, 0, NULL, NULL);
         if (!request)
                 GOTO(out_req, rc = -ENOMEM);
 
index 6a53cb6..4d7f37a 100644 (file)
@@ -1,14 +1,24 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copryright (C) 2002 Cluster File Systems, Inc.
+ * Lustre Lite Update Records
  *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
  *
- *   This code is issued under the GNU General Public License.
- *   See the file COPYING in this distribution
+ *   This file is part of Lustre, http://www.lustre.org.
  *
- * Lustre Lite Update Records
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/config.h>
@@ -20,7 +30,7 @@
 #include <linux/errno.h>
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 #include <linux/locks.h>   // for wait_on_buffer
-#else 
+#else
 #include <linux/buffer_head.h>   // for wait_on_buffer
 #endif
 #include <linux/unistd.h>
@@ -52,20 +62,23 @@ void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode)
 void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
 {
         b->valid = OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME |
-                OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLUID | OBD_MD_FLGID |
-                OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER;
+                OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
+                OBD_MD_FLNLINK | OBD_MD_FLGENER;
         b->ino = HTON__u32(inode->i_ino);
         b->atime = HTON__u32(inode->i_atime);
         b->mtime = HTON__u32(inode->i_mtime);
         b->ctime = HTON__u32(inode->i_ctime);
         b->mode = HTON__u32(inode->i_mode);
         b->size = HTON__u64(inode->i_size);
+        b->blocks = HTON__u64(inode->i_blocks);
         b->uid = HTON__u32(inode->i_uid);
         b->gid = HTON__u32(inode->i_gid);
         b->flags = HTON__u32(inode->i_flags);
         b->rdev = HTON__u32(b->rdev);
         b->nlink = HTON__u32(inode->i_nlink);
         b->generation = HTON__u32(inode->i_generation);
+        b->suppgid = HTON__u32(-1);
 }
 
 
@@ -100,11 +113,12 @@ static void mds_pack_body(struct mds_body *b)
         b->rdev = HTON__u32(b->rdev);
         b->nlink = HTON__u32(b->nlink);
         b->generation = HTON__u32(b->generation);
+        b->suppgid = HTON__u32(b->suppgid);
 }
 
-void mds_getattr_pack(struct ptlrpc_request *req, int offset,
-                      struct inode *inode,
-                      const char *name, int namelen)
+void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
+                      int flags,
+                      struct inode *inode, const char *name, int namelen)
 {
         struct mds_body *b;
         b = lustre_msg_buf(req->rq_reqmsg, offset);
@@ -112,6 +126,12 @@ void mds_getattr_pack(struct ptlrpc_request *req, int offset,
         b->fsuid = HTON__u32(current->fsuid);
         b->fsgid = HTON__u32(current->fsgid);
         b->capability = HTON__u32(current->cap_effective);
+        b->valid = HTON__u32(valid);
+        b->flags = HTON__u32(flags);
+        if (in_group_p(inode->i_gid))
+                b->suppgid = HTON__u32(inode->i_gid);
+        else
+                b->suppgid = HTON__u32(-1);
 
         ll_inode2fid(&b->fid1, inode);
         if (name) {
@@ -122,7 +142,7 @@ void mds_getattr_pack(struct ptlrpc_request *req, int offset,
 }
 
 void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset,
-                      obd_id ino, int type)
+                      obd_id ino, int type, __u64 xid)
 {
         struct mds_body *b;
 
@@ -133,6 +153,8 @@ void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset,
         b->fid1.id = HTON__u64(ino);
         b->fid1.f_type = HTON__u32(type);
         b->size = HTON__u64(offset);
+        b->suppgid = HTON__u32(-1);
+        b->blocks = HTON__u64(xid);
 }
 
 
@@ -159,7 +181,6 @@ void mds_create_pack(struct ptlrpc_request *req, int offset, struct inode *dir,
         char *tmp;
         rec = lustre_msg_buf(req->rq_reqmsg, offset);
 
-        /* XXX do something about time, uid, gid */
         rec->cr_opcode = HTON__u32(REINT_CREATE);
         rec->cr_fsuid = HTON__u32(current->fsuid);
         rec->cr_fsgid = HTON__u32(current->fsgid);
@@ -180,34 +201,78 @@ void mds_create_pack(struct ptlrpc_request *req, int offset, struct inode *dir,
                 LOGL0(data, datalen, tmp);
         }
 }
+/* packing of MDS records */
+void mds_open_pack(struct ptlrpc_request *req, int offset, struct inode *dir,
+                     __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
+                     __u32 flags,
+                     const char *name, int namelen,
+                     const void *data, int datalen)
+{
+        struct mds_rec_create *rec;
+        char *tmp;
+        rec = lustre_msg_buf(req->rq_reqmsg, offset);
+
+        /* XXX do something about time, uid, gid */
+        rec->cr_opcode = HTON__u32(REINT_OPEN);
+        rec->cr_fsuid = HTON__u32(current->fsuid);
+        rec->cr_fsgid = HTON__u32(current->fsgid);
+        rec->cr_cap = HTON__u32(current->cap_effective);
+        ll_inode2fid(&rec->cr_fid, dir);
+        memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
+        rec->cr_mode = HTON__u32(mode);
+        rec->cr_flags = HTON__u32(flags);
+        rec->cr_rdev = HTON__u64(rdev);
+        rec->cr_uid = HTON__u32(uid);
+        rec->cr_gid = HTON__u32(gid);
+        rec->cr_time = HTON__u64(time);
+        if (in_group_p(dir->i_gid))
+                rec->cr_suppgid = HTON__u32(dir->i_gid);
+        else
+                rec->cr_suppgid = HTON__u32(-1);
+
+        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1);
+        LOGL0(name, namelen, tmp);
+
+        if (data) {
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2);
+                LOGL0(data, datalen, tmp);
+        }
+}
 
-void mds_setattr_pack(struct ptlrpc_request *req, int offset,
+void mds_setattr_pack(struct ptlrpc_request *req,
                       struct inode *inode, struct iattr *iattr,
-                      const char *name, int namelen)
+                      void *ea, int ealen)
 {
-        struct mds_rec_setattr *rec;
-        rec = lustre_msg_buf(req->rq_reqmsg, offset);
+        struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, 0);
 
         rec->sa_opcode = HTON__u32(REINT_SETATTR);
         rec->sa_fsuid = HTON__u32(current->fsuid);
         rec->sa_fsgid = HTON__u32(current->fsgid);
         rec->sa_cap = HTON__u32(current->cap_effective);
         ll_inode2fid(&rec->sa_fid, inode);
-        rec->sa_valid = HTON__u32(iattr->ia_valid);
-        rec->sa_mode = HTON__u32(iattr->ia_mode);
-        rec->sa_uid = HTON__u32(iattr->ia_uid);
-        rec->sa_gid = HTON__u32(iattr->ia_gid);
-        rec->sa_size = HTON__u64(iattr->ia_size);
-        rec->sa_atime = HTON__u64(iattr->ia_atime);
-        rec->sa_mtime = HTON__u64(iattr->ia_mtime);
-        rec->sa_ctime = HTON__u64(iattr->ia_ctime);
-        rec->sa_attr_flags = HTON__u32(iattr->ia_attr_flags);
-
-        if (namelen) {
-                char *tmp;
-                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1);
-                LOGL0(name, namelen, tmp);
+
+        if (iattr) {
+                rec->sa_valid = HTON__u32(iattr->ia_valid);
+                rec->sa_mode = HTON__u32(iattr->ia_mode);
+                rec->sa_uid = HTON__u32(iattr->ia_uid);
+                rec->sa_gid = HTON__u32(iattr->ia_gid);
+                rec->sa_size = HTON__u64(iattr->ia_size);
+                rec->sa_atime = HTON__u64(iattr->ia_atime);
+                rec->sa_mtime = HTON__u64(iattr->ia_mtime);
+                rec->sa_ctime = HTON__u64(iattr->ia_ctime);
+                rec->sa_attr_flags = HTON__u32(iattr->ia_attr_flags);
+
+                if ((iattr->ia_valid & ATTR_GID) && in_group_p(iattr->ia_gid))
+                        rec->sa_suppgid = HTON__u32(iattr->ia_gid);
+                else if ((iattr->ia_valid & ATTR_MODE) &&
+                         in_group_p(inode->i_gid))
+                        rec->sa_suppgid = HTON__u32(inode->i_gid);
+                else
+                        rec->sa_suppgid = HTON__u32(-1);
         }
+
+        if (ealen)
+                memcpy(lustre_msg_buf(req->rq_reqmsg, 1), ea, ealen);
 }
 
 void mds_unlink_pack(struct ptlrpc_request *req, int offset,
@@ -224,6 +289,10 @@ void mds_unlink_pack(struct ptlrpc_request *req, int offset,
         rec->ul_fsgid = HTON__u32(current->fsgid);
         rec->ul_cap = HTON__u32(current->cap_effective);
         rec->ul_mode = HTON__u32(mode);
+        if (in_group_p(inode->i_gid))
+                rec->ul_suppgid = HTON__u32(inode->i_gid);
+        else
+                rec->ul_suppgid = HTON__u32(-1);
         ll_inode2fid(&rec->ul_fid1, inode);
         if (child)
                 ll_inode2fid(&rec->ul_fid2, child);
@@ -245,6 +314,10 @@ void mds_link_pack(struct ptlrpc_request *req, int offset,
         rec->lk_fsuid = HTON__u32(current->fsuid);
         rec->lk_fsgid = HTON__u32(current->fsgid);
         rec->lk_cap = HTON__u32(current->cap_effective);
+        if (in_group_p(dir->i_gid))
+                rec->lk_suppgid = HTON__u32(dir->i_gid);
+        else
+                rec->lk_suppgid = HTON__u32(-1);
         ll_inode2fid(&rec->lk_fid1, inode);
         ll_inode2fid(&rec->lk_fid2, dir);
 
@@ -294,6 +367,7 @@ void mds_unpack_body(struct mds_body *b)
         mds_unpack_fid(&b->fid1);
         mds_unpack_fid(&b->fid2);
         b->size = NTOH__u64(b->size);
+        b->blocks = NTOH__u64(b->blocks);
         b->valid = NTOH__u32(b->valid);
         b->fsuid = NTOH__u32(b->fsuid);
         b->fsgid = NTOH__u32(b->fsgid);
@@ -309,6 +383,7 @@ void mds_unpack_body(struct mds_body *b)
         b->rdev = NTOH__u32(b->rdev);
         b->nlink = NTOH__u32(b->nlink);
         b->generation = NTOH__u32(b->generation);
+        b->suppgid = NTOH__u32(b->suppgid);
 }
 
 static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
@@ -325,6 +400,7 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
         r->ur_fsuid = NTOH__u32(rec->sa_fsuid);
         r->ur_fsgid = NTOH__u32(rec->sa_fsgid);
         r->ur_cap = NTOH__u32(rec->sa_cap);
+        r->ur_suppgid = NTOH__u32(rec->sa_suppgid);
         r->ur_fid1 = &rec->sa_fid;
         attr->ia_valid = NTOH__u32(rec->sa_valid);
         attr->ia_mode = NTOH__u32(rec->sa_mode);
@@ -339,8 +415,9 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
         if (req->rq_reqmsg->bufcount == offset + 2) {
                 r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
                 r->ur_name = lustre_msg_buf(req->rq_reqmsg, offset + 1);
-        } else
+        } else {
                 r->ur_namelen = 0;
+        }
 
         RETURN(0);
 }
@@ -365,6 +442,8 @@ static int mds_create_unpack(struct ptlrpc_request *req, int offset,
         r->ur_uid = NTOH__u32(rec->cr_uid);
         r->ur_gid = NTOH__u32(rec->cr_gid);
         r->ur_time = NTOH__u64(rec->cr_time);
+        r->ur_flags = NTOH__u32(rec->cr_flags);
+        r->ur_suppgid = NTOH__u32(rec->cr_suppgid);
 
         r->ur_name = lustre_msg_buf(req->rq_reqmsg, offset + 1);
         r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
@@ -392,6 +471,7 @@ static int mds_link_unpack(struct ptlrpc_request *req, int offset,
         r->ur_fsuid = NTOH__u32(rec->lk_fsuid);
         r->ur_fsgid = NTOH__u32(rec->lk_fsgid);
         r->ur_cap = NTOH__u32(rec->lk_cap);
+        r->ur_suppgid = NTOH__u32(rec->lk_suppgid);
         r->ur_fid1 = &rec->lk_fid1;
         r->ur_fid2 = &rec->lk_fid2;
 
@@ -414,6 +494,7 @@ static int mds_unlink_unpack(struct ptlrpc_request *req, int offset,
         r->ur_fsgid = NTOH__u32(rec->ul_fsgid);
         r->ur_cap = NTOH__u32(rec->ul_cap);
         r->ur_mode = NTOH__u32(rec->ul_mode);
+        r->ur_suppgid = NTOH__u32(rec->ul_suppgid);
         r->ur_fid1 = &rec->ul_fid1;
         r->ur_fid2 = &rec->ul_fid2;
 
@@ -455,6 +536,7 @@ static update_unpacker mds_unpackers[REINT_MAX + 1] = {
         [REINT_LINK] mds_link_unpack,
         [REINT_UNLINK] mds_unlink_unpack,
         [REINT_RENAME] mds_rename_unpack,
+        [REINT_OPEN] mds_create_unpack,
 };
 
 int mds_update_unpack(struct ptlrpc_request *req, int offset,
@@ -470,8 +552,10 @@ int mds_update_unpack(struct ptlrpc_request *req, int offset,
         realop = rec->ur_opcode = NTOH__u32(*opcode);
         realop &= REINT_OPCODE_MASK;
 
-        if (realop < 0 || realop > REINT_MAX)
+        if (realop < 0 || realop > REINT_MAX) {
+                LBUG();
                 RETURN(-EFAULT);
+        }
 
         rc = mds_unpackers[realop](req, offset, rec);
         RETURN(rc);
index 73a4383..f5627ba 100644 (file)
@@ -1,15 +1,24 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  lib/simple.c
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *  Author: Peter Braam <braam@clusterfs.com>
+ *  Aurhot: Andreas Dilger <adilger@clusterfs.com>
  *
- * Copyright (C) 2002  Cluster File Systems, Inc.
+ *   This file is part of Lustre, http://www.lustre.org.
  *
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
  *
- * by Peter Braam <braam@clusterfs.com>
- * and Andreas Dilger <adilger@clusterfs.com>
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define EXPORT_SYMTAB
@@ -71,6 +80,8 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx,
                 current->fsuid = uc->ouc_fsuid;
                 current->fsgid = uc->ouc_fsgid;
                 current->cap_effective = uc->ouc_cap;
+                if (uc->ouc_suppgid != -1)
+                        current->groups[current->ngroups++] = uc->ouc_suppgid;
         }
         set_fs(new_ctx->fs);
         set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
@@ -115,6 +126,9 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx,
                 current->fsuid = saved->fsuid;
                 current->fsgid = saved->fsgid;
                 current->cap_effective = saved->cap;
+
+                if (uc->ouc_suppgid != -1)
+                        current->ngroups--;
         }
 
         /*
@@ -135,7 +149,6 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode)
         ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
         CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
 
-        down(&dir->d_inode->i_sem);
         dchild = lookup_one_len(name, dir, strlen(name));
         if (IS_ERR(dchild))
                 GOTO(out_up, dchild);
@@ -151,14 +164,12 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode)
         if (err)
                 GOTO(out_err, err);
 
-        up(&dir->d_inode->i_sem);
         RETURN(dchild);
 
 out_err:
         dput(dchild);
         dchild = ERR_PTR(err);
 out_up:
-        up(&dir->d_inode->i_sem);
         return dchild;
 }
 
@@ -171,7 +182,6 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode)
 
         ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
         CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
-        down(&dir->d_inode->i_sem);
         dchild = lookup_one_len(name, dir, strlen(name));
         if (IS_ERR(dchild))
                 GOTO(out_up, dchild);
@@ -187,14 +197,12 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode)
         if (err)
                 GOTO(out_err, err);
 
-        up(&dir->d_inode->i_sem);
         RETURN(dchild);
 
 out_err:
         dput(dchild);
         dchild = ERR_PTR(err);
 out_up:
-        up(&dir->d_inode->i_sem);
         return dchild;
 }
 
index 3889f1c..81638f1 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
  *   Author: Peter J. Braam <braam@clusterfs.com>
  *   Author: Phil Schwan <phil@clusterfs.com>
  *   Author: Mike Shaver <shaver@clusterfs.com>
 #include <linux/lustre_dlm.h>
 
 int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
-                            char *cluuid)
+                            struct obd_uuid *cluuid)
 {
         if (exp->exp_connection) {
                 struct lustre_handle *hdl;
                 hdl = &exp->exp_ldlm_data.led_import.imp_handle;
                 /* Might be a re-connect after a partition. */
                 if (!memcmp(conn, hdl, sizeof *conn)) {
-                        CERROR("%s reconnecting\n", cluuid);
+                        CERROR("%s reconnecting\n", cluuid->uuid);
                         conn->addr = (__u64) (unsigned long)exp;
                         conn->cookie = exp->exp_cookie;
                         RETURN(EALREADY);
                 } else {
                         CERROR("%s reconnecting from %s, "
                                "handle mismatch (ours "LPX64"/"LPX64", "
-                               "theirs "LPX64"/"LPX64")\n", cluuid,
-                               exp->exp_connection->c_remote_uuid, hdl->addr,
+                               "theirs "LPX64"/"LPX64")\n", cluuid->uuid,
+                               exp->exp_connection->c_remote_uuid.uuid,
+                               hdl->addr,
                                hdl->cookie, conn->addr, conn->cookie);
                         /* XXX disconnect them here? */
                         memset(conn, 0, sizeof *conn);
@@ -62,7 +63,7 @@ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
 
         conn->addr = (__u64) (unsigned long)exp;
         conn->cookie = exp->exp_cookie;
-        CDEBUG(D_INFO, "existing export for UUID '%s' at %p\n", cluuid, exp);
+        CDEBUG(D_INFO, "existing export for UUID '%s' at %p\n", cluuid->uuid, exp);
         CDEBUG(D_IOCTL,"connect: addr %Lx cookie %Lx\n",
                (long long)conn->addr, (long long)conn->cookie);
         RETURN(0);
@@ -71,28 +72,30 @@ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
 int target_handle_connect(struct ptlrpc_request *req)
 {
         struct obd_device *target;
-        struct obd_export *export;
+        struct obd_export *export = NULL;
         struct obd_import *dlmimp;
         struct lustre_handle conn;
-        char *tgtuuid, *cluuid;
+        struct obd_uuid tgtuuid;
+        struct obd_uuid cluuid;
+        struct list_head *p;
         int rc, i;
         ENTRY;
 
-        tgtuuid = lustre_msg_buf(req->rq_reqmsg, 0);
         if (req->rq_reqmsg->buflens[0] > 37) {
                 CERROR("bad target UUID for connect\n");
                 GOTO(out, rc = -EINVAL);
         }
+        obd_str2uuid(&tgtuuid, lustre_msg_buf(req->rq_reqmsg, 0));
 
-        cluuid = lustre_msg_buf(req->rq_reqmsg, 1);
         if (req->rq_reqmsg->buflens[1] > 37) {
                 CERROR("bad client UUID for connect\n");
                 GOTO(out, rc = -EINVAL);
         }
+        obd_str2uuid(&cluuid, lustre_msg_buf(req->rq_reqmsg, 1));
 
-        i = class_uuid2dev(tgtuuid);
+        i = class_uuid2dev(&tgtuuid);
         if (i == -1) {
-                CERROR("UUID '%s' not found for connect\n", tgtuuid);
+                CERROR("UUID '%s' not found for connect\n", tgtuuid.uuid);
                 GOTO(out, rc = -ENODEV);
         }
 
@@ -103,18 +106,62 @@ int target_handle_connect(struct ptlrpc_request *req)
         conn.addr = req->rq_reqmsg->addr;
         conn.cookie = req->rq_reqmsg->cookie;
 
-        rc = obd_connect(&conn, target, cluuid, ptlrpc_recovd,
-                         target_revoke_connection);
-        /* EALREADY indicates a reconnection, send the reply normally. */
-        if (rc && rc != EALREADY)
+        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
                 GOTO(out, rc);
 
+        /* lctl gets a backstage, all-access pass. */
+        if (!strcmp(cluuid.uuid, "OBD_CLASS_UUID"))
+                goto dont_check_exports;
+
+        spin_lock(&target->obd_dev_lock);
+        list_for_each(p, &target->obd_exports) {
+                export = list_entry(p, struct obd_export, exp_obd_chain);
+                if (!memcmp(&cluuid, &export->exp_client_uuid,
+                            sizeof(export->exp_client_uuid))) {
+                        spin_unlock(&target->obd_dev_lock);
+                        LASSERT(export->exp_obd == target);
+
+                        rc = target_handle_reconnect(&conn, export, &cluuid);
+                        break;
+                }
+                export = NULL;
+        }
+        /* If we found an export, we already unlocked. */
+        if (!export)
+                spin_unlock(&target->obd_dev_lock);
+
+        /* Tell the client if we're in recovery. */
+        if (target->obd_flags & OBD_RECOVERING)
+                lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
+
+        /* Tell the client if we support replayable requests */
+        if (target->obd_flags & OBD_REPLAYABLE)
+                lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_REPLAYABLE);
+
+        if (!export) {
+                if (target->obd_flags & OBD_RECOVERING) {
+                        CERROR("denying connection for new client %s: "
+                               "in recovery\n", cluuid.uuid);
+                        rc = -EBUSY;
+                } else {
+ dont_check_exports:
+                        rc = obd_connect(&conn, target, &cluuid, ptlrpc_recovd,
+                                         target_revoke_connection);
+                }
+        }
+
+        if (rc == EALREADY) {
+                /* We indicate the reconnection in a flag, not an error code. */
+                lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT);
+                rc = 0;
+        } else if (rc) {
+                GOTO(out, rc);
+        }
+
         /* If all else goes well, this is our RPC return code. */
         req->rq_status = rc;
 
-        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
-        if (rc)
-                GOTO(out, rc);
         req->rq_repmsg->addr = conn.addr;
         req->rq_repmsg->cookie = conn.cookie;
 
@@ -122,7 +169,7 @@ int target_handle_connect(struct ptlrpc_request *req)
         LASSERT(export);
 
         req->rq_export = export;
-        export->exp_connection = ptlrpc_get_connection(&req->rq_peer, cluuid);
+        export->exp_connection = ptlrpc_get_connection(&req->rq_peer, &cluuid);
         if (req->rq_connection != NULL)
                 ptlrpc_put_connection(req->rq_connection);
         req->rq_connection = ptlrpc_connection_addref(export->exp_connection);
@@ -162,7 +209,7 @@ int target_handle_disconnect(struct ptlrpc_request *req)
                 RETURN(rc);
 
         req->rq_status = obd_disconnect(conn);
-
+        req->rq_export = NULL;
         RETURN(0);
 }
 
@@ -200,7 +247,7 @@ static int target_fence_failed_connection(struct ptlrpc_connection *conn)
 int target_revoke_connection(struct recovd_data *rd, int phase)
 {
         struct ptlrpc_connection *conn = class_rd2conn(rd);
-        
+
         LASSERT(conn);
         ENTRY;
 
index a9d4aac..0286cc6 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
 #include <linux/lustre_idl.h>
 #include <linux/lustre_dlm.h>
 
-extern struct address_space_operations ll_aops;
-
+/* should NOT be called with the dcache lock, see fs/dcache.c */
 void ll_release(struct dentry *de)
 {
         ENTRY;
-
         OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
         EXIT;
 }
 
-extern void d_delete_aliases(struct inode *);
+void ll_set_dd(struct dentry *de)
+{
+        ENTRY;
+        LASSERT(de != NULL);
+
+        lock_kernel();
+
+        if (de->d_fsdata == NULL) {
+                OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
+                sema_init(&ll_d2d(de)->lld_it_sem, 1);
+        }
+
+        unlock_kernel();
+
+        EXIT;
+}
+
 void ll_intent_release(struct dentry *de, struct lookup_intent *it)
 {
         struct lustre_handle *handle;
         ENTRY;
 
         LASSERT(ll_d2d(de) != NULL);
+        mdc_put_rpc_lock(&mdc_rpc_lock, it);
 
         if (it->it_lock_mode) {
                 handle = (struct lustre_handle *)it->it_lock_handle;
-                if (it->it_op == IT_SETATTR) {
-                        int rc;
-                        ldlm_lock_decref(handle, it->it_lock_mode);
-                        rc = ldlm_cli_cancel(handle);
-                        if (rc < 0)
-                                CERROR("ldlm_cli_cancel: %d\n", rc);
-                } else
+                if (it->it_op == IT_SETATTR)
+                        ldlm_lock_decref_and_cancel(handle, it->it_lock_mode);
+                else
                         ldlm_lock_decref(handle, it->it_lock_mode);
 
-                /* intent_release may be called multiple times, and we don't
-                 * want to double-decref this lock (see bug 494) */
+                /* intent_release may be called multiple times, from
+                   this thread and we don't want to double-decref this
+                   lock (see bug 494) */
                 it->it_lock_mode = 0;
         }
 
@@ -72,6 +84,8 @@ void ll_intent_release(struct dentry *de, struct lookup_intent *it)
 
         if (de->d_it == it)
                 LL_GET_INTENT(de, it);
+        else 
+                CERROR("STRANGE intent release: %p %p\n", de->d_it, it);
 
         EXIT;
 }
@@ -79,21 +93,33 @@ void ll_intent_release(struct dentry *de, struct lookup_intent *it)
 extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
 
 static int revalidate2_finish(int flag, struct ptlrpc_request *request,
-                          struct dentry **de,
-                          struct lookup_intent *it,
-                          int offset, obd_id ino)
+                              struct dentry **de, struct lookup_intent *it,
+                              int offset, obd_id ino)
 {
-        ldlm_lock_set_data((struct lustre_handle *)it->it_lock_handle,
-                           (*de)->d_inode, sizeof(*((*de)->d_inode)));
+        struct mds_body *body;
+        struct lov_mds_md *lmm = NULL;
+        int rc = 0; 
+        ENTRY;
+
+        if (!(flag & LL_LOOKUP_NEGATIVE)) {
+                body = lustre_msg_buf(request->rq_repmsg, offset);
+                if (body->valid & OBD_MD_FLEASIZE)
+                        lmm = lustre_msg_buf(request->rq_repmsg, offset + 1);
+                ll_update_inode((*de)->d_inode, body, lmm);
+                mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle,
+                                   (*de)->d_inode);
+        } else 
+                rc = -ENOENT;
+
         ptlrpc_req_finished(request);
-        return 0;
+        RETURN(rc);
 }
 
 int ll_have_md_lock(struct dentry *de)
 {
         struct ll_sb_info *sbi = ll_s2sbi(de->d_sb);
         struct lustre_handle lockh;
-        __u64 res_id[RES_NAME_SIZE] = {0};
+        struct ldlm_res_id res_id = { .name = {0} };
         struct obd_device *obddev;
         ENTRY;
 
@@ -101,19 +127,19 @@ int ll_have_md_lock(struct dentry *de)
                RETURN(0);
 
         obddev = class_conn2obd(&sbi->ll_mdc_conn);
-        res_id[0] = de->d_inode->i_ino;
-        res_id[1] = de->d_inode->i_generation;
+        res_id.name[0] = de->d_inode->i_ino;
+        res_id.name[1] = de->d_inode->i_generation;
 
-        CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id[0]);
+        CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
 
-        if (ldlm_lock_match(obddev->obd_namespace, res_id, LDLM_PLAIN,
-                            NULL, 0, LCK_PR, &lockh)) {
+        if (ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
+                            &res_id, LDLM_PLAIN, NULL, 0, LCK_PR, &lockh)) {
                 ldlm_lock_decref(&lockh, LCK_PR);
                 RETURN(1);
         }
 
-        if (ldlm_lock_match(obddev->obd_namespace, res_id, LDLM_PLAIN,
-                            NULL, 0, LCK_PW, &lockh)) {
+        if (ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
+                            &res_id, LDLM_PLAIN, NULL, 0, LCK_PW, &lockh)) {
                 ldlm_lock_decref(&lockh, LCK_PW);
                 RETURN(1);
         }
@@ -133,10 +159,62 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
                 RETURN(0);
         }
 
+        if (it && it->it_op == IT_TRUNC)
+                it->it_op = IT_SETATTR;
+
+        if (it == NULL || it->it_op == IT_GETATTR) {
+                /* We could just return 1 immediately, but since we should only
+                 * be called in revalidate2 if we already have a lock, let's
+                 * verify that. */
+                struct inode *inode = de->d_inode;
+                struct ll_sb_info *sbi = ll_i2sbi(inode);
+                struct obd_device *obddev = class_conn2obd(&sbi->ll_mdc_conn);
+                struct ldlm_res_id res_id =
+                        { .name = {inode->i_ino, (__u64)inode->i_generation} };
+                struct lustre_handle lockh;
+                rc = ldlm_lock_match(obddev->obd_namespace,
+                                     LDLM_FL_BLOCK_GRANTED, &res_id,
+                                     LDLM_PLAIN, NULL, 0, LCK_PR, &lockh);
+                if (rc) {
+                        de->d_flags &= ~DCACHE_LUSTRE_INVALID;
+                        if (it && it->it_op == IT_GETATTR) {
+                                memcpy(it->it_lock_handle, &lockh,
+                                       sizeof(lockh));
+                                it->it_lock_mode = LCK_PR;
+                                LL_SAVE_INTENT(de, it);
+                        } else {
+                                ldlm_lock_decref(&lockh, LCK_PR);
+                        }
+                        RETURN(1);
+                }
+                rc = ldlm_lock_match(obddev->obd_namespace,
+                                     LDLM_FL_BLOCK_GRANTED, &res_id,
+                                     LDLM_PLAIN, NULL, 0, LCK_PW, &lockh);
+                if (rc) {
+                        de->d_flags &= ~DCACHE_LUSTRE_INVALID;
+                        if (it && it->it_op == IT_GETATTR) {
+                                memcpy(it->it_lock_handle, &lockh,
+                                       sizeof(lockh));
+                                it->it_lock_mode = LCK_PW;
+                                LL_SAVE_INTENT(de, it);
+                        } else {
+                                ldlm_lock_decref(&lockh, LCK_PW);
+                        }
+                        RETURN(1);
+                }
+                if (S_ISDIR(de->d_inode->i_mode))
+                        ll_invalidate_inode_pages(de->d_inode);
+                d_unhash_aliases(de->d_inode);
+                RETURN(0);
+        }
+
         rc = ll_intent_lock(de->d_parent->d_inode, &de, it, revalidate2_finish);
-        if (rc < 0) {
-                /* Something bad happened; overwrite it_status? */
-                CERROR("ll_intent_lock: %d\n", rc);
+        if (rc == -ESTALE)
+                RETURN(0);
+        if (rc < 0 && it->it_status) {
+                CERROR("ll_intent_lock: rc %d : it->it_status %d\n", rc,
+                       it->it_status);
+                RETURN(0);
         }
         /* unfortunately ll_intent_lock may cause a callback and revoke our
            dentry */
@@ -148,25 +226,6 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
         RETURN(1);
 }
 
-int ll_set_dd(struct dentry *de)
-{
-        ENTRY;
-        LASSERT(de != NULL);
-
-        lock_kernel();
-
-        if (de->d_fsdata != NULL) {
-                CERROR("dentry %p already has d_fsdata set\n", de);
-        } else {
-                OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
-                sema_init(&ll_d2d(de)->lld_it_sem, 1);
-        }
-
-        unlock_kernel();
-
-        RETURN(0);
-}
-
 struct dentry_operations ll_d_ops = {
         .d_revalidate2 = ll_revalidate2,
         .d_intent_release = ll_intent_release,
index 921eea2..072eeea 100644 (file)
@@ -22,7 +22,7 @@
  *  and moved here. AV
  *
  *  Adapted for Lustre Light
- *  Copyright (C) 2002, Cluster File Systems, Inc.
+ *  Copyright (C) 2002-2003, Cluster File Systems, Inc.
  *
  */
 
@@ -76,6 +76,11 @@ static int ll_dir_readpage(struct file *file, struct page *page)
         ENTRY;
 
         if ((inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT <= page->index){
+                /* XXX why do we need this exactly, and why do we think that
+                 *     an all-zero directory page is useful?
+                 */
+                CERROR("memsetting dir page %lu to zero (size %lld)\n",
+                       page->index, inode->i_size);
                 memset(kmap(page), 0, PAGE_CACHE_SIZE);
                 kunmap(page);
                 GOTO(readpage_out, rc);
@@ -86,7 +91,7 @@ static int ll_dir_readpage(struct file *file, struct page *page)
         request = (struct ptlrpc_request *)it.it_data;
         if (request)
                 ptlrpc_req_finished(request);
-        if (rc != ELDLM_OK) {
+        if (rc < 0) {
                 CERROR("lock enqueue: err: %d\n", rc);
                 unlock_page(page);
                 RETURN(rc);
@@ -118,7 +123,8 @@ static int ll_dir_readpage(struct file *file, struct page *page)
                 SetPageUptodate(page);
 
         unlock_page(page);
-        rc = ll_unlock(LCK_PR, &lockh);
+        ll_unlock(LCK_PR, &lockh);
+        mdc_put_rpc_lock(&mdc_rpc_lock, &it);
         if (rc != ELDLM_OK)
                 CERROR("ll_unlock: err: %d\n", rc);
         return rc;
@@ -206,7 +212,7 @@ static void ext2_check_page(struct page *page)
                 limit = dir->i_size & ~PAGE_CACHE_MASK;
                 if (limit & (chunk_size - 1)) {
                         CERROR("limit %d dir size %lld index %ld\n",
-                                        limit, dir->i_size, page->index);
+                               limit, dir->i_size, page->index);
                         goto Ebadsize;
                 }
                 for (offs = limit; offs<PAGE_CACHE_SIZE; offs += chunk_size) {
@@ -263,8 +269,8 @@ Espan:
         // error = "inode out of bounds";
 bad_entry:
         CERROR("ext2_check_page: bad entry in directory #%lu: %s - "
-                "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
-                dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+                "offset=%lu+%u, inode=%lu, rec_len=%d, name_len=%d",
+                dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)offs,
                 (unsigned long) le32_to_cpu(p->inode),
                 rec_len, p->name_len);
         goto fail;
@@ -281,7 +287,7 @@ fail:
         LBUG();
 }
 
-static struct page * ll_get_page(struct inode *dir, unsigned long n)
+static struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
 {
         struct address_space *mapping = dir->i_mapping;
         struct page *page = read_cache_page(mapping, n,
@@ -397,8 +403,10 @@ int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
                 char *kaddr, *limit;
                 ext2_dirent *de;
                 struct page *page;
-                
-                page = ll_get_page(inode, n);
+
+                CDEBUG(D_EXT2, "reading %lu of dir %lu page %lu, size %llu\n",
+                       PAGE_CACHE_SIZE, inode->i_ino, n, inode->i_size);
+                page = ll_get_dir_page(inode, n);
 
                 /* size might have been updated by mdc_readpage */
                 npages = dir_pages(inode);
@@ -422,8 +430,8 @@ int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
 
                                 offset = (char *)de - kaddr;
                                 over = filldir(dirent, de->name, de->name_len,
-                                                (n<<PAGE_CACHE_SHIFT) | offset,
-                                                le32_to_cpu(de->inode), d_type);
+                                               (n<<PAGE_CACHE_SHIFT) | offset,
+                                               le32_to_cpu(de->inode), d_type);
                                 if (over) {
                                         ext2_put_page(page);
                                         GOTO(done,0);
@@ -468,7 +476,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
         n = start;
         do {
                 char *kaddr;
-                page = ll_get_page(dir, n);
+                page = ll_get_dir_page(dir, n);
                 if (!IS_ERR(page)) {
                         kaddr = page_address(page);
                         de = (ext2_dirent *) kaddr;
@@ -493,7 +501,7 @@ found:
 
 struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
 {
-        struct page *page = ll_get_page(dir, 0);
+        struct page *page = ll_get_dir_page(dir, 0);
         ext2_dirent *de = NULL;
 
         if (!IS_ERR(page)) {
@@ -559,7 +567,7 @@ int ll_add_link (struct dentry *dentry, struct inode *inode)
 
         /* We take care of directory expansion in the same loop */
         for (n = 0; n <= npages; n++) {
-                page = ll_get_page(dir, n);
+                page = ll_get_dir_page(dir, n);
                 err = PTR_ERR(page);
                 if (IS_ERR(page))
                         goto out;
@@ -711,7 +719,7 @@ int ext2_empty_dir (struct inode * inode)
         for (i = 0; i < npages; i++) {
                 char *kaddr;
                 ext2_dirent * de;
-                page = ll_get_page(inode, i);
+                page = ll_get_dir_page(inode, i);
 
                 if (IS_ERR(page))
                         continue;
index 6b37d99..1e26110 100644 (file)
@@ -1,26 +1,25 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  linux/fs/ext2/file.c
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Peter Braam <braam@clusterfs.com>
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
  *
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ *   This file is part of Lustre, http://www.lustre.org.
  *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
  *
- *  from
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
  *
- *  linux/fs/minix/file.c
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- *  ext2 fs regular file handling primitives
- *
- *  64-bit file support on 64-bit platforms by Jakub Jelinek
- *      (jj@sunsite.ms.mff.cuni.cz)
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define DEBUG_SUBSYSTEM S_LLITE
 int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
 extern int ll_setattr(struct dentry *de, struct iattr *attr);
 
-static int ll_mdc_open(struct lustre_handle *mdc_conn, struct inode *inode,
-                       struct file *file, struct lov_mds_md *lmm, int lmm_size)
-{
-        struct ptlrpc_request *req = NULL;
-        struct ll_file_data *fd;
-        int rc;
-        ENTRY;
-
-        LASSERT(!file->private_data);
-
-        fd = kmem_cache_alloc(ll_file_data_slab, SLAB_KERNEL);
-        if (!fd)
-                RETURN(-ENOMEM);
-
-        memset(fd, 0, sizeof(*fd));
-        fd->fd_mdshandle.addr = (__u64)(unsigned long)file;
-        get_random_bytes(&fd->fd_mdshandle.cookie,
-                         sizeof(fd->fd_mdshandle.cookie));
-
-        rc = mdc_open(mdc_conn, inode->i_ino, S_IFREG | inode->i_mode,
-                      file->f_flags, lmm, lmm_size, &fd->fd_mdshandle, &req);
-
-        /* This is the "reply" refcount. */
-        ptlrpc_req_finished(req);
-
-        if (rc)
-                GOTO(out_fd, rc);
-
-        fd->fd_req = req;
-        file->private_data = fd;
-
-        if (!fd->fd_mdshandle.addr ||
-            fd->fd_mdshandle.addr == (__u64)(unsigned long)file) {
-                CERROR("hmm, mdc_open didn't assign fd_mdshandle?\n");
-                /* XXX handle this how, abort or is it non-fatal? */
-        }
-
-        file->f_flags &= ~O_LOV_DELAY_CREATE;
-        RETURN(0);
-
-out_fd:
-        fd->fd_mdshandle.cookie = DEAD_HANDLE_MAGIC;
-        kmem_cache_free(ll_file_data_slab, fd);
-
-        return -abs(rc);
-}
-
 static int ll_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode,
                         struct file *file)
 {
         struct ll_file_data *fd = file->private_data;
         struct ptlrpc_request *req = NULL;
         unsigned long flags;
-        struct obd_import *imp = fd->fd_req->rq_import;
+        struct obd_import *imp;
         int rc;
+        ENTRY;
 
         /* Complete the open request and remove it from replay list */
-        DEBUG_REQ(D_HA, fd->fd_req, "matched open req %p", fd->fd_req);
         rc = mdc_close(&ll_i2sbi(inode)->ll_mdc_conn, inode->i_ino,
                        inode->i_mode, &fd->fd_mdshandle, &req);
-
         if (rc)
                 CERROR("inode %lu close failed: rc = %d\n", inode->i_ino, rc);
-        ptlrpc_req_finished(req);
 
+        imp = fd->fd_req->rq_import;
+        LASSERT(imp != NULL);
         spin_lock_irqsave(&imp->imp_lock, flags);
+
+        DEBUG_REQ(D_HA, fd->fd_req, "matched open req %p", fd->fd_req);
+
+        /* We held on to the request for replay until we saw a close for that
+         * file.  Now that we've closed it, it gets replayed on the basis of
+         * its transno only. */
+        fd->fd_req->rq_flags &= ~PTL_RPC_FL_REPLAY;
+
         if (fd->fd_req->rq_transno) {
-                /* This caused an EA to be written, need to replay as a normal
-                 * transaction now.  Our reference is now effectively owned
-                 * by the imp_replay_list, and we'll be committed just like
-                 * other transno-having requests now.
-                 */
-                fd->fd_req->rq_flags &= ~PTL_RPC_FL_REPLAY;
+                /* This open created a file, so it needs replay as a
+                 * normal transaction now.  Our reference to it now
+                 * effectively owned by the imp_replay_list, and it'll
+                 * be committed just like other transno-having
+                 * requests from here on out. */
+
+                /* We now retain this close request, so that it is
+                 * replayed if the open is replayed.  We duplicate the
+                 * transno, so that we get freed at the right time,
+                 * and rely on the difference in xid to keep
+                 * everything ordered correctly.
+                 *
+                 * But! If this close was already given a transno
+                 * (because it caused real unlinking of an
+                 * open-unlinked file, f.e.), then we'll be ordered on
+                 * the basis of that and we don't need to do anything
+                 * magical here. */
+                if (!req->rq_transno) {
+                        req->rq_transno = fd->fd_req->rq_transno;
+                        ptlrpc_retain_replayable_request(req, imp);
+                }
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+                /* Should we free_committed now? we always free before
+                 * replay, so it's probably a wash.  We could check to
+                 * see if the fd_req should already be committed, in
+                 * which case we can avoid the whole retain_replayable
+                 * dance. */
         } else {
                 /* No transno means that we can just drop our ref. */
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
                 ptlrpc_req_finished(fd->fd_req);
         }
+
+        /* Do this after the fd_req->rq_transno check, because we don't want
+         * to bounce off zero references. */
+        ptlrpc_req_finished(req);
         fd->fd_mdshandle.cookie = DEAD_HANDLE_MAGIC;
         file->private_data = NULL;
         kmem_cache_free(ll_file_data_slab, fd);
 
-        return -abs(rc);
+        RETURN(-abs(rc));
+}
+
+/* While this returns an error code, fput() the caller does not, so we need
+ * to make every effort to clean up all of our state here.  Also, applications
+ * rarely check close errors and even if an error is returned they will not
+ * re-try the close call.
+ */
+static int ll_file_release(struct inode *inode, struct file *file)
+{
+        struct ll_file_data *fd;
+        struct obdo oa;
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct lov_stripe_md *lsm = lli->lli_smd;
+        int rc = 0, rc2;
+
+        ENTRY;
+
+        fd = (struct ll_file_data *)file->private_data;
+        if (!fd) /* no process opened the file after an mcreate */
+                RETURN(rc = 0);
+
+        if (lsm != NULL) {
+                memset(&oa, 0, sizeof(oa));
+                oa.o_id = lsm->lsm_object_id;
+                oa.o_mode = S_IFREG;
+                oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
+                obd_handle2oa(&oa, &fd->fd_osthandle);
+                rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
+                if (rc)
+                        CERROR("inode %lu object close failed: rc = %d\n",
+                               inode->i_ino, rc);
+        }
+
+        mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+        rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
+        mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
+        if (rc2 && !rc)
+                rc = rc2;
+
+        if (atomic_dec_and_test(&lli->lli_open_count)) {
+                CDEBUG(D_INFO, "last close, cancelling unused locks\n");
+                rc2 = obd_cancel_unused(&sbi->ll_osc_conn, lsm, 0);
+                if (rc2 && !rc) {
+                        rc = rc2;
+                        CERROR("obd_cancel_unused: %d\n", rc);
+                }
+        } else
+                CDEBUG(D_INFO, "not last close, not cancelling unused locks\n");
+
+        RETURN(rc);
+}
+
+static int ll_local_open(struct file *file, struct lookup_intent *it)
+{
+        struct ptlrpc_request *req = it->it_data;
+        struct ll_file_data *fd;
+        struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
+        ENTRY;
+
+        LASSERT(!file->private_data);
+
+        fd = kmem_cache_alloc(ll_file_data_slab, SLAB_KERNEL);
+        /* We can't handle this well without reorganizing ll_file_open and
+         * ll_mdc_close, so don't even try right now. */
+        LASSERT(fd != NULL);
+
+        memset(fd, 0, sizeof(*fd));
+
+        memcpy(&fd->fd_mdshandle, &body->handle, sizeof(body->handle));
+        fd->fd_req = it->it_data;
+        file->private_data = fd;
+
+        RETURN(0);
 }
 
 static int ll_osc_open(struct lustre_handle *conn, struct inode *inode,
                        struct file *file, struct lov_stripe_md *lsm)
 {
-        struct ll_file_data *fd;
+        struct ll_file_data *fd = file->private_data;
         struct obdo *oa;
         int rc;
         ENTRY;
@@ -133,14 +192,15 @@ static int ll_osc_open(struct lustre_handle *conn, struct inode *inode,
         oa->o_id = lsm->lsm_object_id;
         oa->o_mode = S_IFREG;
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
-                OBD_MD_FLBLOCKS;
-        rc = obd_open(conn, oa, lsm);
+                OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
+        rc = obd_open(conn, oa, lsm, NULL);
         if (rc)
                 GOTO(out, rc);
 
-        obdo_to_inode(inode, oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+        file->f_flags &= ~O_LOV_DELAY_CREATE;
+        obdo_to_inode(inode, oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                OBD_MD_FLMTIME | OBD_MD_FLCTIME);
 
-        fd = file->private_data;
         obd_oa2handle(&fd->fd_osthandle, oa);
 
         atomic_inc(&ll_i2info(inode)->lli_open_count);
@@ -154,9 +214,10 @@ out:
  * the mdc open was successful (hence stored stripe MD on MDS), otherwise
  * other nodes could try to create different objects for the same file.
  */
-static int ll_create_open_obj(struct lustre_handle *conn, struct inode *inode,
-                              struct file *file, struct lov_stripe_md *lsm)
+static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
+                         struct file *file, struct lov_stripe_md *lsm)
 {
+        struct ptlrpc_request *req = NULL;
         struct ll_inode_info *lli = ll_i2info(inode);
         struct lov_mds_md *lmm = NULL;
         int lmm_size = 0;
@@ -179,10 +240,14 @@ static int ll_create_open_obj(struct lustre_handle *conn, struct inode *inode,
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
                 OBD_MD_FLUID | OBD_MD_FLGID;
 
-        rc = obd_create(conn, oa, &lsm);
+        rc = obd_create(conn, oa, &lsm, NULL);
         if (rc) {
                 CERROR("error creating objects for inode %lu: rc = %d\n",
                        inode->i_ino, rc);
+                if (rc > 0) {
+                        CERROR("obd_create returned invalid rc %d\n", rc);
+                        rc = -EIO;
+                }
                 GOTO(out_oa, rc);
         }
 
@@ -193,7 +258,10 @@ static int ll_create_open_obj(struct lustre_handle *conn, struct inode *inode,
 
         lmm_size = rc;
 
-        rc = ll_mdc_open(&ll_i2sbi(inode)->ll_mdc_conn,inode,file,lmm,lmm_size);
+        /* Save the stripe MD with this file on the MDS */
+        rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, inode, NULL,
+                         lmm, lmm_size, &req);
+        ptlrpc_req_finished(req);
 
         obd_free_wiremd(conn, &lmm);
 
@@ -201,7 +269,7 @@ static int ll_create_open_obj(struct lustre_handle *conn, struct inode *inode,
          * MDS, we need to destroy the objects now or they will be leaked.
          */
         if (rc) {
-                CERROR("error MDS opening %lu with delayed create: rc %d\n",
+                CERROR("error: storing stripe MD for %lu: rc %d\n",
                        inode->i_ino, rc);
                 GOTO(out_destroy, rc);
         }
@@ -216,7 +284,7 @@ out_destroy:
         obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
         oa->o_id = lsm->lsm_object_id;
         oa->o_valid |= OBD_MD_FLID;
-        err = obd_destroy(conn, oa, lsm);
+        err = obd_destroy(conn, oa, lsm, NULL);
         obd_free_memmd(conn, &lsm);
         if (err)
                 CERROR("error uncreating inode %lu objects: rc %d\n",
@@ -239,43 +307,55 @@ out_destroy:
  * before returning in the O_LOV_DELAY_CREATE case and dropping it here
  * or in ll_file_release(), but I'm not sure that is desirable/necessary.
  */
+extern int ll_it_open_error(int phase, struct lookup_intent *it);
+
 static int ll_file_open(struct inode *inode, struct file *file)
 {
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct ll_inode_info *lli = ll_i2info(inode);
         struct lustre_handle *conn = ll_i2obdconn(inode);
+        struct lookup_intent *it;
         struct lov_stripe_md *lsm;
         int rc = 0;
         ENTRY;
 
+        LL_GET_INTENT(file->f_dentry, it);
+        rc = ll_it_open_error(IT_OPEN_OPEN, it);
+        if (rc)
+                RETURN(rc);
+
+        rc = ll_local_open(file, it);
+        if (rc)
+                LBUG();
+
+        mdc_set_open_replay_data((struct ll_file_data *)file->private_data);
+
         lsm = lli->lli_smd;
         if (lsm == NULL) {
                 if (file->f_flags & O_LOV_DELAY_CREATE) {
                         CDEBUG(D_INODE, "delaying object creation\n");
                         RETURN(0);
                 }
-
                 down(&lli->lli_open_sem);
                 if (!lli->lli_smd) {
-                        rc = ll_create_open_obj(conn, inode, file, NULL);
+                        rc = ll_create_obj(conn, inode, file, NULL);
                         up(&lli->lli_open_sem);
+                        if (rc)
+                                GOTO(out_close, rc);
                 } else {
-                        CERROR("stripe already set on ino %lu\n", inode->i_ino);
+                        CERROR("warning: stripe already set on ino %lu\n",
+                               inode->i_ino);
                         up(&lli->lli_open_sem);
-                        rc = ll_mdc_open(&sbi->ll_mdc_conn, inode, file,NULL,0);
                 }
                 lsm = lli->lli_smd;
-        } else
-                rc = ll_mdc_open(&sbi->ll_mdc_conn, inode, file, NULL, 0);
-
-        if (rc)
-                RETURN(rc);
+        }
 
         rc = ll_osc_open(conn, inode, file, lsm);
         if (rc)
                 GOTO(out_close, rc);
         RETURN(0);
-out_close:
+
+ out_close:
         ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
         return rc;
 }
@@ -321,90 +401,40 @@ int ll_size_unlock(struct inode *inode, struct lov_stripe_md *lsm, int mode,
         RETURN(rc);
 }
 
-int ll_file_size(struct inode *inode, struct lov_stripe_md *lsm)
+/* This function is solely "sampling" the file size, and does not explicit
+ * locking on the size itself (see ll_size_lock() and ll_size_unlock()).
+ *
+ * XXX We need to optimize away the obd_getattr for decent performance here,
+ *     by checking if we already have the size lock and considering our size
+ *     authoritative in that case.  In order to do that either the act of
+ *     getting the size lock includes retrieving the file size, or the client
+ *     keeps an atomic flag in the inode which indicates whether the size
+ *     has been updated (see bug 280).
+ */
+int ll_file_size(struct inode *inode, struct lov_stripe_md *lsm,
+                 struct lustre_handle *handle)
 {
         struct ll_sb_info *sbi = ll_i2sbi(inode);
-        //struct lustre_handle lockh = { 0, 0 };
         struct obdo oa;
-        //int err;
         int rc;
         ENTRY;
 
         LASSERT(lsm);
         LASSERT(sbi);
 
-        /* XXX do not yet need size lock - OST size always correct (sync write)
-        rc = ll_size_lock(inode, lsm, 0, LCK_PR, &lockh);
-        if (rc != ELDLM_OK) {
-                CERROR("lock enqueue: %d\n", rc);
-                RETURN(rc);
-        }
-        */
-
         memset(&oa, 0, sizeof oa);
         oa.o_id = lsm->lsm_object_id;
         oa.o_mode = S_IFREG;
-        oa.o_valid = OBD_MD_FLID|OBD_MD_FLTYPE|OBD_MD_FLSIZE|OBD_MD_FLBLOCKS;
+        oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
+                OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
+        obd_handle2oa(&oa, handle);
         rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm);
         if (!rc) {
-                obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
-                CDEBUG(D_INODE, LPX64" size %Lu/%Lu\n",
+                obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                                        OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+                CDEBUG(D_INODE, "objid "LPX64" size %Lu/%Lu\n",
                        lsm->lsm_object_id, inode->i_size, inode->i_size);
         }
-        /* XXX do not need size lock, because OST size always correct (sync write)
-        err = ll_size_unlock(inode, lsm, LCK_PR, &lockh);
-        if (err != ELDLM_OK) {
-                CERROR("lock cancel: %d\n", err);
-                if (!rc)
-                        rc = err;
-        }
-        */
-        RETURN(rc);
-}
-
-/* While this returns an error code, fput() the caller does not, so we need
- * to make every effort to clean up all of our state here.  Also, applications
- * rarely check close errors and even if an error is returned they will not
- * re-try the close call.
- */
-static int ll_file_release(struct inode *inode, struct file *file)
-{
-        struct ll_file_data *fd;
-        struct obdo oa;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct ll_inode_info *lli = ll_i2info(inode);
-        struct lov_stripe_md *lsm = lli->lli_smd;
-        int rc, rc2;
-
-        ENTRY;
-
-        fd = (struct ll_file_data *)file->private_data;
-        if (!fd) /* no process opened the file after an mcreate */
-                RETURN(rc = 0);
-
-        memset(&oa, 0, sizeof(oa));
-        oa.o_id = lsm->lsm_object_id;
-        oa.o_mode = S_IFREG;
-        oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
-        obd_handle2oa(&oa, &fd->fd_osthandle);
-        rc = obd_close(&sbi->ll_osc_conn, &oa, lsm);
-        if (rc)
-                CERROR("inode %lu object close failed: rc = %d\n",
-                       inode->i_ino, rc);
-
-        rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
-        if (rc2 && !rc)
-                rc = rc2;
-
-        if (atomic_dec_and_test(&lli->lli_open_count)) {
-                CDEBUG(D_INFO, "last close, cancelling unused locks\n");
-                rc2 = obd_cancel_unused(&sbi->ll_osc_conn, lsm, 0);
-                if (rc2 && !rc) {
-                        rc = rc2;
-                        CERROR("obd_cancel_unused: %d\n", rc);
-                }
-        } else
-                CDEBUG(D_INFO, "not last close, not cancelling unused locks\n");
 
         RETURN(rc);
 }
@@ -426,6 +456,7 @@ static inline void ll_remove_suid(struct inode *inode)
 
 static void ll_update_atime(struct inode *inode)
 {
+#ifdef USE_ATIME
         struct iattr attr;
 
         attr.ia_atime = CURRENT_TIME;
@@ -437,19 +468,20 @@ static void ll_update_atime(struct inode *inode)
 
         /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */
         ll_inode_setattr(inode, &attr, 0);
+#else
+        /* update atime, but don't explicitly write it out just this change */
+        inode->i_atime = CURRENT_TIME;
+#endif
 }
 
 int ll_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
-                     void *data, __u32 data_len, int flag)
+                     void *data, int flag)
 {
         struct inode *inode = data;
         struct lustre_handle lockh = { 0, 0 };
         int rc;
         ENTRY;
 
-        if (data_len != sizeof(struct inode))
-                LBUG();
-
         if (inode == NULL)
                 LBUG();
 
@@ -477,7 +509,7 @@ int ll_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
 static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
                             loff_t *ppos)
 {
-        struct ll_file_data *fd = (struct ll_file_data *)filp->private_data;
+        struct ll_file_data *fd = filp->private_data;
         struct inode *inode = filp->f_dentry->d_inode;
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct lustre_handle lockh = { 0, 0 };
@@ -487,14 +519,6 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
         ssize_t retval;
         ENTRY;
 
-        /* If we don't refresh the file size, generic_file_read may not even
-         * call us */
-        retval = ll_file_size(inode, lsm);
-        if (retval < 0) {
-                CERROR("ll_file_size: "LPSZ"\n", retval);
-                RETURN(retval);
-        }
-
         if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) &&
             !(sbi->ll_flags & LL_SBI_NOLCK)) {
                 struct ldlm_extent extent;
@@ -513,6 +537,14 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
                 }
         }
 
+        /* If we don't refresh the file size, generic_file_read may not even
+         * call us */
+        retval = ll_file_size(inode, lsm, &fd->fd_osthandle);
+        if (retval < 0) {
+                CERROR("ll_file_size: "LPSZ"\n", retval);
+                RETURN(retval);
+        }
+
         CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld\n",
                inode->i_ino, count, *ppos);
         retval = generic_file_read(filp, buf, count, ppos);
@@ -538,7 +570,7 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
 static ssize_t
 ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
 {
-        struct ll_file_data *fd = (struct ll_file_data *)file->private_data;
+        struct ll_file_data *fd = file->private_data;
         struct inode *inode = file->f_dentry->d_inode;
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct lustre_handle lockh = { 0, 0 }, eof_lockh = { 0, 0 };
@@ -549,32 +581,16 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
         ENTRY;
 
         if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) {
-                struct obdo *oa;
-
-                oa = obdo_alloc();
-                if (!oa)
-                        RETURN(-ENOMEM);
-
                 err = ll_size_lock(inode, lsm, 0, LCK_PW, &eof_lockh);
-                if (err) {
-                        obdo_free(oa);
+                if (err)
                         RETURN(err);
-                }
 
-                oa->o_id = lsm->lsm_object_id;
-                oa->o_mode = inode->i_mode;
-                oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
-                        OBD_MD_FLBLOCKS;
-                obd_handle2oa(oa, &fd->fd_osthandle);
-                retval = obd_getattr(&sbi->ll_osc_conn, oa, lsm);
-                if (retval) {
-                        obdo_free(oa);
+                /* Get size here so we know extent to enqueue write lock on. */
+                retval = ll_file_size(inode, lsm, &fd->fd_osthandle);
+                if (retval)
                         GOTO(out_eof, retval);
-                }
 
-                *ppos = oa->o_size;
-                obdo_to_inode(inode, oa, oa->o_valid);
-                obdo_free(oa);
+                *ppos = inode->i_size;
         }
 
         if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) &&
@@ -600,21 +616,19 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
 
         retval = generic_file_write(file, buf, count, ppos);
 
-        if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) ||
-            sbi->ll_flags & LL_SBI_NOLCK) {
+        if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) &&
+            !(sbi->ll_flags & LL_SBI_NOLCK)) {
                 err = obd_cancel(&sbi->ll_osc_conn, lsm, LCK_PW, &lockh);
-                if (err != ELDLM_OK) {
+                if (err != ELDLM_OK)
                         CERROR("lock cancel: err: %d\n", err);
-                        GOTO(out_eof, retval = err);
-                }
         }
 
         EXIT;
  out_eof:
         if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) {
                 err = ll_size_unlock(inode, lsm, LCK_PW, &eof_lockh);
-                if (err && !retval)
-                        retval = err;
+                if (err)
+                        CERROR("ll_size_unlock: %d\n", err);
         }
 
         return retval;
@@ -624,7 +638,7 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file,
                             unsigned long arg)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
-        struct lustre_handle *conn;
+        struct lustre_handle *conn = ll_i2obdconn(inode);
         struct lov_stripe_md *lsm;
         int rc;
         ENTRY;
@@ -636,7 +650,7 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file,
                 CERROR("stripe already set for ino %lu\n", inode->i_ino);
                 /* If we haven't already done the open, do so now */
                 if (file->f_flags & O_LOV_DELAY_CREATE) {
-                        int rc2 = ll_file_open(inode, file);
+                        int rc2 = ll_osc_open(conn, inode, file, lsm);
                         if (rc2)
                                 RETURN(rc2);
                 }
@@ -644,11 +658,12 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file,
                 RETURN(-EALREADY);
         }
 
-        conn = ll_i2obdconn(inode);
-
         rc = obd_iocontrol(LL_IOC_LOV_SETSTRIPE, conn, 0, &lsm, (void *)arg);
-        if (!rc)
-                rc = ll_create_open_obj(conn, inode, file, lsm);
+        if (rc) {
+                up(&lli->lli_open_sem);
+                RETURN(rc);
+        }
+        rc = ll_create_obj(conn, inode, file, lsm);
         up(&lli->lli_open_sem);
 
         if (rc) {
@@ -673,11 +688,13 @@ static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                   unsigned long arg)
 {
-        struct ll_file_data *fd = (struct ll_file_data *)file->private_data;
+        struct ll_file_data *fd = file->private_data;
         struct lustre_handle *conn;
         int flags;
 
         switch(cmd) {
+        case TCGETS:
+                return -ENOTTY;
         case LL_IOC_GETFLAGS:
                 /* Get the current value of the file flags */
                 return put_user(fd->fd_flags, (int *)arg);
@@ -725,8 +742,9 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
         switch (origin) {
         case 2: {
                 struct ll_inode_info *lli = ll_i2info(inode);
+                struct ll_file_data *fd = file->private_data;
 
-                retval = ll_file_size(inode, lli->lli_smd);
+                retval = ll_file_size(inode, lli->lli_smd, &fd->fd_osthandle);
                 if (retval)
                         RETURN(retval);
 
@@ -757,7 +775,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
         return 0;
 }
 
-static int ll_inode_revalidate(struct dentry *dentry)
+int ll_inode_revalidate(struct dentry *dentry)
 {
         struct inode *inode = dentry->d_inode;
         struct lov_stripe_md *lsm;
@@ -768,14 +786,21 @@ static int ll_inode_revalidate(struct dentry *dentry)
                 RETURN(0);
         }
 
-        if (!ll_have_md_lock(dentry)) {
+        /* this is very tricky.  it is unsafe to call ll_have_md_lock
+           when we have a referenced lock: because it may cause an RPC
+           below when the lock is marked CB_PENDING.  That RPC may not
+           go out because someone else may be in another RPC waiting for
+           that lock*/
+        if (!(dentry->d_it && dentry->d_it->it_lock_mode) &&
+            !ll_have_md_lock(dentry)) {
                 struct ptlrpc_request *req = NULL;
                 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
                 struct mds_body *body;
                 unsigned long valid = 0;
-                int datalen = 0;
-                int rc;
+                int datalen = 0, rc;
 
+                /* Why don't we update all valid MDS fields here, if we're
+                 * doing an RPC anyways?  -phil */
                 if (S_ISREG(inode->i_mode)) {
                         datalen = obd_size_wiremd(&sbi->ll_osc_conn, NULL);
                         valid |= OBD_MD_FLEASIZE;
@@ -789,7 +814,11 @@ static int ll_inode_revalidate(struct dentry *dentry)
                 }
 
                 body = lustre_msg_buf(req->rq_repmsg, 0);
-                ll_update_inode(inode, body);
+                if (body->valid & OBD_MD_FLEASIZE)
+                        ll_update_inode(inode, body,
+                                        lustre_msg_buf(req->rq_repmsg, 1));
+                else
+                        ll_update_inode(inode, body, NULL);
                 ptlrpc_req_finished(req);
         }
 
@@ -797,7 +826,10 @@ static int ll_inode_revalidate(struct dentry *dentry)
         if (!lsm)       /* object not yet allocated, don't validate size */
                 RETURN(0);
 
-        RETURN(ll_file_size(inode, lsm));
+        /* XXX this should probably become an unconditional obd_getattr()
+         *     so that we update the blocks count and mtime from the OST too.
+         */
+        RETURN(ll_file_size(inode, lsm, NULL));
 }
 
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
@@ -828,3 +860,12 @@ struct inode_operations ll_file_inode_operations = {
         revalidate: ll_inode_revalidate,
 #endif
 };
+
+struct inode_operations ll_special_inode_operations = {
+        setattr:    ll_setattr,
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+        getattr:    ll_getattr,
+#else
+        revalidate: ll_inode_revalidate,
+#endif
+};
index 65df985..8989a82 100644 (file)
 #include <linux/lustre_lite.h>
 #include <linux/lprocfs_status.h>
 
+/* /proc/lustre/llite mount point registration */
 
-int rd_path(char* page, char **start, off_t off, int count, int *eof,
-            void *data)
+#ifndef LPROCFS
+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+                                struct super_block *sb, char *osc, char *mdc)
 {
         return 0;
 }
+#else
 
-int rd_fstype(char* page, char **start, off_t off, int count, int *eof,
-              void *data)
-{
-        int len = 0;
-        struct super_block *sb = (struct super_block*)data;
-
-        len += snprintf(page, count, "%s\n", sb->s_type->name);
-        return len;
-}
-
-int rd_blksize(char* page, char **start, off_t off, int count, int *eof,
-               void *data)
-{
-        int len = 0;
-        struct super_block *sb = (struct super_block*)data;
-        struct statfs mystats;
-
-        (sb->s_op->statfs)(sb, &mystats);
-        len += snprintf(page, count, "%lu\n", mystats.f_bsize);
-        return len;
-
-}
+long long mnt_instance;
 
-int rd_kbytestotal(char* page, char **start, off_t off, int count, int *eof,
-                   void *data)
+static inline int lprocfs_llite_statfs(void *data, struct statfs *sfs)
 {
-        int len = 0;
         struct super_block *sb = (struct super_block*)data;
-        struct statfs mystats;
-        __u32 blk_size;
-        __u64 result;
-
-        (sb->s_op->statfs)(sb, &mystats);
-        blk_size = mystats.f_bsize;
-        blk_size >>= 10;
-        result = mystats.f_blocks;
-
-        while(blk_size >>= 1)
-                result <<= 1;
-
-        len += snprintf(page, count, LPU64"\n", result);
-        return len;
+        return (sb->s_op->statfs)(sb, sfs);
 }
 
+DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     lprocfs_llite_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_llite_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  lprocfs_llite_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  lprocfs_llite_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   lprocfs_llite_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  lprocfs_llite_statfs);
 
-int rd_kbytesfree(char* page, char **start, off_t off, int count, int *eof,
-                  void *data)
+int rd_path(char *page, char **start, off_t off, int count, int *eof,
+            void *data)
 {
-        int len = 0;
-        struct super_block *sb = (struct super_block*)data;
-        struct statfs mystats;
-        __u32 blk_size;
-        __u64 result;
-
-        (sb->s_op->statfs)(sb, &mystats);
-        blk_size = mystats.f_bsize;
-        blk_size >>= 10;
-        result = mystats.f_bfree;
-
-        while(blk_size >>= 1)
-                result <<= 1;
-
-        len += snprintf(page, count, LPU64"\n", result);
-        return len;
+        return 0;
 }
 
-int rd_filestotal(char* page, char **start, off_t off, int count, int *eof,
-                  void *data)
+int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
+              void *data)
 {
-        int len = 0;
         struct super_block *sb = (struct super_block*)data;
-        struct statfs mystats;
 
-        (sb->s_op->statfs)(sb, &mystats);
-        len += snprintf(page, count, LPU64"\n", (__u64)(mystats.f_files));
-        return len;
+        *eof = 1;
+        return snprintf(page, count, "%s\n", sb->s_type->name);
 }
 
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof,
-                 void *data)
+int rd_sb_uuid(char *page, char **start, off_t off, int count, int *eof,
+               void *data)
 {
-        int len = 0;
-        struct super_block *sb = (struct super_block*)data;
-        struct statfs mystats;
+        struct super_block *sb = (struct super_block *)data;
 
-        (sb->s_op->statfs)(sb, &mystats);
-        len += snprintf(page, count, LPU64"\n", (__u64)(mystats.f_ffree));
-        return len;
+        *eof = 1;
+        return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid);
 }
 
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof,
-                  void *data)
-{
-        return 0;
-}
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof,
-            void *data)
-{
-        int len = 0;
-        struct super_block *sb = (struct super_block*)data;
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-
-        len += snprintf(page, count, "%s\n", sbi->ll_sb_uuid);
-
-        return len;
-
-}
-int rd_dev_name(char* page, char **start, off_t off, int count, int *eof,
-                void *data)
-{
-        int len = 0;
-        struct obd_device* dev = (struct obd_device*)data;
-        len += snprintf(page, count, "%s\n", dev->obd_name);
-        return len;
-}
+struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",        rd_sb_uuid,     0, 0 },
+        { "mntpt_path",  rd_path,        0, 0 },
+        { "fstype",      rd_fstype,      0, 0 },
+        { "blocksize",   rd_blksize,     0, 0 },
+        { "kbytestotal", rd_kbytestotal, 0, 0 },
+        { "kbytesfree",  rd_kbytesfree,  0, 0 },
+        { "filestotal",  rd_filestotal,  0, 0 },
+        { "filesfree",   rd_filesfree,   0, 0 },
+        { "filegroups",  rd_filegroups,  0, 0 },
+        { 0 }
+};
 
-int rd_dev_uuid(char* page, char **start, off_t off, int count, int *eof,
-                void *data)
+#define MAX_STRING_SIZE 128
+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+                                struct super_block *sb, char *osc, char *mdc)
 {
-        int len = 0;
-        struct obd_device* dev = (struct obd_device*)data;
-        len += snprintf(page, count, "%s\n", dev->obd_uuid);
-        return len;
-}
+        struct lprocfs_vars lvars[2];
+        struct ll_sb_info *sbi = ll_s2sbi(sb);
+        struct obd_device *obd;
+        char name[MAX_STRING_SIZE + 1];
+        struct obd_uuid uuid;
+        int err;
+        ENTRY;
 
+        memset(lvars, 0, sizeof(lvars));
 
-struct lprocfs_vars status_var_nm_1[] = {
-        {"status/uuid", rd_uuid, 0, 0},
-        {"status/mntpt_path", rd_path, 0, 0},
-        {"status/fstype", rd_fstype, 0, 0},
-        {"status/blocksize",rd_blksize, 0, 0},
-        {"status/kbytestotal",rd_kbytestotal, 0, 0},
-        {"status/kbytesfree", rd_kbytesfree, 0, 0},
-        {"status/filestotal", rd_filestotal, 0, 0},
-        {"status/filesfree", rd_filesfree, 0, 0},
-        {"status/filegroups", rd_filegroups, 0, 0},
-        {0}
-};
+        name[MAX_STRING_SIZE] = '\0';
+        lvars[0].name = name;
 
-/*
- * Proc registration function for Lustre
- * file system
- */
+        /* Mount info */
+        snprintf(name, MAX_STRING_SIZE, "fs%llu", mnt_instance);
 
+        mnt_instance++;
+        sbi->ll_proc_root = lprocfs_register(name, parent, NULL, NULL);
+        if (IS_ERR(sbi->ll_proc_root))
+                RETURN(err = PTR_ERR(sbi->ll_proc_root));
 
-#define MAX_STRING_SIZE 100
-void ll_proc_namespace(struct super_block* sb, char* osc, char* mdc)
-{
-        char mnt_name[MAX_STRING_SIZE+1];
-        char uuid_name[MAX_STRING_SIZE+1];
-        struct lprocfs_vars d_vars[3];
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_device* obd;
-        int err;
+        /* Static configuration info */
+        err = lprocfs_add_vars(sbi->ll_proc_root, lprocfs_obd_vars, sb);
+        if (err)
+                RETURN(err);
 
-        /* Register this mount instance with LProcFS */
-        snprintf(mnt_name, MAX_STRING_SIZE, "mount_%s", sbi->ll_sb_uuid);
-        mnt_name[MAX_STRING_SIZE] = '\0';
-        sbi->ll_proc_root = lprocfs_reg_mnt(mnt_name);
-        if (sbi->ll_proc_root == NULL) {
-                CDEBUG(D_OTHER, "Could not register FS");
-                return;
-        }
-        /* Add the static configuration info */
-        err = lprocfs_add_vars(sbi->ll_proc_root,status_var_nm_1, sb);
-        if (err) {
-                CDEBUG(D_OTHER, "Unable to add procfs variables\n");
-                return;
-        }
-        /* MDC */
-        obd = class_uuid2obd(mdc);
-        snprintf(mnt_name, MAX_STRING_SIZE, "status/%s/common_name",
-                 obd->obd_type->typ_name);
-        mnt_name[MAX_STRING_SIZE] = '\0';
-        memset(d_vars, 0, sizeof(d_vars));
-        d_vars[0].read_fptr = rd_dev_name;
-        d_vars[0].write_fptr = NULL;
-        d_vars[0].name = mnt_name;
-        snprintf(uuid_name, MAX_STRING_SIZE, "status/%s/uuid",
+        /* MDC info */
+        strncpy(uuid.uuid, mdc, sizeof(uuid.uuid));
+        obd = class_uuid2obd(&uuid);
+        snprintf(name, MAX_STRING_SIZE, "%s/common_name",
                  obd->obd_type->typ_name);
-        uuid_name[MAX_STRING_SIZE] = '\0';
-        d_vars[1].read_fptr = rd_dev_uuid;
-        d_vars[1].write_fptr = NULL;
-        d_vars[1].name = uuid_name;
-
-        err = lprocfs_add_vars(sbi->ll_proc_root, d_vars, obd);
-        if (err) {
-                CDEBUG(D_OTHER, "Unable to add fs proc dynamic variables\n");
-                return;
-        }
-        /* OSC or LOV*/
-        obd = class_uuid2obd(osc);
-
-        /* Reuse mnt_name */
-        snprintf(mnt_name, MAX_STRING_SIZE,
-                 "status/%s/common_name", obd->obd_type->typ_name);
-        mnt_name[MAX_STRING_SIZE] = '\0';
-        memset(d_vars, 0, sizeof(d_vars));
-        d_vars[0].read_fptr = rd_dev_name;
-        d_vars[0].write_fptr = NULL;
-        d_vars[0].name = mnt_name;
-
-        snprintf(uuid_name, MAX_STRING_SIZE, "status/%s/uuid",
+        lvars[0].read_fptr = lprocfs_rd_name;
+        err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
+        if (err)
+                RETURN(err);
+
+        snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name);
+        lvars[0].read_fptr = lprocfs_rd_uuid;
+        err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
+        if (err < 0)
+                RETURN(err);
+
+        /* OSC */
+        strncpy(uuid.uuid, osc, sizeof(uuid.uuid));
+        obd = class_uuid2obd(&uuid);
+
+        snprintf(name, MAX_STRING_SIZE, "%s/common_name",
                  obd->obd_type->typ_name);
-        uuid_name[MAX_STRING_SIZE] = '\0';
-        d_vars[1].read_fptr = rd_dev_uuid;
-        d_vars[1].write_fptr = NULL;
-        d_vars[1].name = uuid_name;
-
-        err = lprocfs_add_vars(sbi->ll_proc_root, d_vars, obd);
-        if (err) {
-                CDEBUG(D_OTHER, "Unable to add fs proc dynamic variables\n");
-                return;
-        }
+        lvars[0].read_fptr = lprocfs_rd_name;
+        err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
+        if (err)
+                RETURN(err);
+
+        snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name);
+        lvars[0].read_fptr = lprocfs_rd_uuid;
+        err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
+
+        RETURN(err);
 }
+
 #undef MAX_STRING_SIZE
+#endif /* LPROCFS */
index 81a5aad..f72e6ba 100644 (file)
@@ -1,17 +1,24 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
  *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
+ *   This file is part of Lustre, http://www.lustre.org.
  *
- *  from
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
  *
- *  linux/fs/ext2/namei.c
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  derived in small part from linux/fs/ext2/namei.c
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
  *        David S. Miller (davem@caip.rutgers.edu), 1995
  *  Directory entry file type support and forward compatibility hooks
  *      for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
- *
- *  Changes for use in OBDFS
- *  Copyright (c) 1999, Seagate Technology Inc.
- *  Copyright (C) 2001, Cluster File Systems, Inc.
- *                       Rewritten based on recent ext2 page cache use.
- *
  */
 
 #include <linux/fs.h>
@@ -41,7 +42,8 @@
 #include <linux/lustre_lite.h>
 #include <linux/lustre_dlm.h>
 
-extern struct address_space_operations ll_aops;
+/* from dcache.c */
+extern void ll_set_dd(struct dentry *de);
 
 /* from super.c */
 extern void ll_change_inode(struct inode *inode);
@@ -100,7 +102,7 @@ static int ll_test_inode(struct inode *inode, void *opaque)
                 return 0;
 
         /* Apply the attributes in 'opaque' to this inode */
-        ll_update_inode(inode, body);
+        ll_update_inode(inode, body, lic->lic_lmm);
 
         return 1;
 }
@@ -149,24 +151,45 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
 static int ll_intent_to_lock_mode(struct lookup_intent *it)
 {
         /* CREAT needs to be tested before open (both could be set) */
-        if ((it->it_op & (IT_CREAT | IT_MKDIR | IT_SETATTR | IT_MKNOD))) {
+        if (it->it_op & (IT_CREAT | IT_SETATTR))
                 return LCK_PW;
-        } else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_UNLINK |
-                                IT_RMDIR | IT_RENAME | IT_RENAME2 | IT_READLINK|
-                                IT_LINK | IT_LINK2 | IT_LOOKUP | IT_SYMLINK)) {
+        else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP))
                 return LCK_PR;
-        }
 
         LBUG();
         RETURN(-EINVAL);
 }
 
-#define LL_LOOKUP_POSITIVE 1
-#define LL_LOOKUP_NEGATIVE 2
+int ll_it_open_error(int phase, struct lookup_intent *it)
+{
+        if (it->it_disposition & IT_OPEN_OPEN) {
+                if (phase == IT_OPEN_OPEN)
+                        return it->it_status;
+                else
+                        return 0;
+        }
+
+        if (it->it_disposition & IT_OPEN_CREATE) {
+                if (phase == IT_OPEN_CREATE)
+                        return it->it_status;
+                else
+                        return 0;
+        }
+
+        if (it->it_disposition & IT_OPEN_LOOKUP) {
+                if (phase == IT_OPEN_LOOKUP)
+                        return it->it_status;
+                else
+                        return 0;
+        }
+        LBUG();
+        return 0;
+}
+
+#define IT_ENQ_COMPLETE (1<<16)
 
 int ll_intent_lock(struct inode *parent, struct dentry **de,
-                   struct lookup_intent *it,
-                   intent_finish_cb intent_finish)
+                   struct lookup_intent *it, intent_finish_cb intent_finish)
 {
         struct dentry *dentry = *de;
         struct ll_sb_info *sbi = ll_i2sbi(parent);
@@ -174,9 +197,8 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
         struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
         struct ptlrpc_request *request = NULL;
         char *data = NULL;
-        int rc, lock_mode, datalen = 0, offset, flag = LL_LOOKUP_POSITIVE;
+        int rc = 0, datalen = 0, offset, flag = 0;
         obd_id ino = 0;
-
         ENTRY;
 
         if (it == NULL)
@@ -188,26 +210,23 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
         if (dentry->d_name.len > EXT2_NAME_LEN)
                 RETURN(-ENAMETOOLONG);
 
-        lock_mode = ll_intent_to_lock_mode(it);
-        if (it->it_op & IT_SYMLINK) {
-                data = it->it_data;
-                datalen = strlen(data) + 1;
-                it->it_data = NULL;
+        if (!(it->it_disposition & IT_ENQ_COMPLETE)) {
+                rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it,
+                                 ll_intent_to_lock_mode(it), parent, dentry,
+                                 &lockh, data, datalen, parent,
+                                 sizeof(*parent));
+                if (rc < 0)
+                        RETURN(rc);
+                memcpy(it->it_lock_handle, &lockh, sizeof(lockh));
         }
 
-        rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it, lock_mode, parent,
-                         dentry, &lockh, data, datalen, parent,sizeof(*parent));
-        if (rc < 0)
-                RETURN(rc);
-        memcpy(it->it_lock_handle, &lockh, sizeof(lockh));
-
         request = (struct ptlrpc_request *)it->it_data;
-        /* it_disposition == 1 indicates that the server performed the
+
+        /* non-zero it_disposition indicates that the server performed the
          * intent on our behalf. */
         if (it->it_disposition) {
                 struct mds_body *mds_body;
                 int mode;
-                obd_flag valid;
 
                 /* This long block is all about fixing up the local
                  * state so that it is correct as of the moment
@@ -237,76 +256,73 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
                 ino = mds_body->fid1.id;
                 mode = mds_body->mode;
 
-                if (it->it_op & (IT_CREAT | IT_MKDIR | IT_SYMLINK | IT_MKNOD)) {
+                /*We were called from revalidate2: did we find the same inode?*/
+                if ((*de)->d_inode &&
+                    (ino != (*de)->d_inode->i_ino || 
+                   mds_body->fid1.generation != (*de)->d_inode->i_generation)) {
+                        it->it_disposition |= IT_ENQ_COMPLETE;
+                        RETURN(-ESTALE);
+                }
+
+                /* If we're doing an IT_OPEN which did not result in an actual
+                 * successful open, then we need to remove the bit which saves
+                 * this request for unconditional replay. */
+                if (it->it_op & IT_OPEN &&
+                    (!(it->it_disposition & IT_OPEN_OPEN) ||
+                     it->it_status != 0))
+                        request->rq_flags &= ~PTL_RPC_FL_REPLAY;
+
+                if (it->it_op & IT_CREAT) {
                         mdc_store_inode_generation(request, 2, 1);
-                        /* For create ops, we want the lookup to be negative,
-                         * unless the create failed in a way that indicates
-                         * that the file is already there */
-                        if (it->it_status == 0)
-                                atomic_inc(&request->rq_refcount);
-                        if (it->it_status != -EEXIST)
-                                GOTO(out, flag = LL_LOOKUP_NEGATIVE);
-                        /*
-                         * Fall through to update attibutes: it may already
-                         * have appeared in the namespace of another client
-                         */
+                        /* The server will return to us, in it_disposition, an
+                         * indication of exactly what it_status refers to.
+                         *
+                         * If IT_OPEN_OPEN is set, then it_status refers to the
+                         * open() call, otherwise if IT_OPEN_CREATE is set, then
+                         * it status is the creation failure mode.  In either
+                         * case, one of IT_OPEN_NEG or IT_OPEN_POS will be set,
+                         * indicating whether the child lookup was successful.
+                         *
+                         * Else, if IT_OPEN_LOOKUP then it_status is the rc
+                         * of the child lookup.
+                         *
+                         * Finally, if none of the bits are set, then the
+                         * failure occurred while looking up the parent. */
+                        rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
+                        if (rc)
+                                GOTO(drop_req, rc);
+
+                        if (it->it_disposition & IT_OPEN_CREATE)
+                                ptlrpc_request_addref(request);
+
+                        if (it->it_disposition & IT_OPEN_NEG)
+                                flag = LL_LOOKUP_NEGATIVE;
+                        else
+                                flag = LL_LOOKUP_POSITIVE;
+                } else if (it->it_op == IT_OPEN) {
+                        LASSERT(!(it->it_disposition & IT_OPEN_CREATE));
+
+                        rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
+                        if (rc)
+                                GOTO(drop_req, rc);
+
+                        if (it->it_disposition & IT_OPEN_OPEN)
+                                ptlrpc_request_addref(request);
+
+                        if (it->it_disposition & IT_OPEN_NEG)
+                                flag = LL_LOOKUP_NEGATIVE;
+                        else
+                                flag = LL_LOOKUP_POSITIVE;
                 } else if (it->it_op & (IT_GETATTR | IT_SETATTR | IT_LOOKUP |
                                         IT_READLINK)) {
                         /* For check ops, we want the lookup to succeed */
                         it->it_data = NULL;
                         if (it->it_status)
-                                GOTO(out, flag = LL_LOOKUP_NEGATIVE);
-                        /* Fall through to update attibutes. */
-                } else if (it->it_op & (IT_RENAME | IT_LINK)) {
-                        /* For rename, we want the source lookup to succeed */
-                        if (it->it_status) {
-                                it->it_data = NULL;
-                                GOTO(drop_req, rc = it->it_status);
-                        }
-                        /* Fall through to update attibutes. */
-                } else if (it->it_op & (IT_UNLINK | IT_RMDIR)) {
-                        /* For remove ops, we want the lookup to succeed unless
-                         * the file truly doesn't exist */
-                        it->it_data = NULL;
-                        if (it->it_status == -ENOENT)
-                                GOTO(out, flag = LL_LOOKUP_NEGATIVE);
-                        /* No point in updating attributes that we're about to
-                         * unlink.  -phil */
-                        GOTO(out, flag = LL_LOOKUP_POSITIVE);
-                } else if (it->it_op == IT_OPEN) {
-                        it->it_data = NULL;
-                        if (it->it_status && it->it_status != -EEXIST)
-                                GOTO(out, flag = LL_LOOKUP_NEGATIVE);
-                        /* Fall through to update attibutes. */
-                } else if (it->it_op & (IT_RENAME2 | IT_LINK2)) {
-                        it->it_data = NULL;
-                        /* This means the target lookup is negative */
-                        if (mds_body->valid == 0)
-                                GOTO(out, flag = LL_LOOKUP_NEGATIVE);
-                        /* XXX bug 289: should we maybe fall through here? -p */
-                        GOTO(out, flag = LL_LOOKUP_POSITIVE);
-                }
-
-                /* Do a getattr now that we have the lock, and fetch the
-                 * up-to-date stripe MD at the same time.
-                 */
-                valid = OBD_MD_FLNOTOBD;
-                if (it->it_op == IT_READLINK) {
-                        datalen = mds_body->size;
-                        valid |= OBD_MD_LINKNAME;
-                } else if (S_ISREG(mode)) {
-                        datalen = obd_size_wiremd(&sbi->ll_osc_conn, NULL);
-                        valid |= OBD_MD_FLEASIZE;
-                }
-                ptlrpc_req_finished(request);
-                request = NULL;
-                rc = mdc_getattr(&sbi->ll_mdc_conn, ino, mode,
-                                 valid, datalen, &request);
-                if (rc) {
-                        CERROR("failure %d inode "LPX64"\n", rc, ino);
-                        GOTO(drop_req, rc = -abs(rc));
-                }
-                offset = 0;
+                                flag = LL_LOOKUP_NEGATIVE;
+                        else
+                                flag = LL_LOOKUP_POSITIVE;
+                } else
+                        LBUG();
         } else {
                 obd_flag valid;
                 int mode;
@@ -332,6 +348,8 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
                 if (S_ISREG(mode)) {
                         datalen = obd_size_wiremd(&sbi->ll_osc_conn, NULL),
                         valid |= OBD_MD_FLEASIZE;
+                } else {
+                        valid |= OBD_MD_FLBLOCKS;
                 }
 
                 rc = mdc_getattr(&sbi->ll_mdc_conn, ino, mode, valid,
@@ -342,7 +360,6 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
                 }
         }
 
- out:
         if (intent_finish != NULL) {
                 rc = intent_finish(flag, request, de, it, offset, ino);
                 dentry = *de; /* intent_finish may change *de */
@@ -350,29 +367,19 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
                 ptlrpc_req_finished(request);
         }
 
-        if (it->it_disposition && it->it_op & (IT_RENAME | IT_LINK))
-                it->it_data = dentry;
-
-        /* this places the intent in the dentry so that the vfs_xxx
-         *  operation can lay its hands on it; but that is not 
-         *  always needed...
-         */
-        if ( // it->it_status == 0 && 
-            it->it_op != IT_RENAME && 
-            it->it_op != IT_LINK && 
-            it->it_op != IT_SETATTR &&
-            it->it_op != IT_GETATTR &&
-            it->it_op != IT_READDIR &&
-            it->it_op != IT_LOOKUP) {
+        /* This places the intent in the dentry so that the vfs_xxx
+         * operation can lay its hands on it; but that is not always
+         * needed...  (we need to save it in the GETATTR case for the
+         * benefit of ll_inode_revalidate -phil) */
+        if (it->it_op & (IT_OPEN | IT_GETATTR))
                 LL_SAVE_INTENT(dentry, it);
-        } else {
+        else
                 CDEBUG(D_DENTRY,
                        "D_IT dentry %p fsdata %p intent: %s status %d\n",
                        dentry, ll_d2d(dentry), ldlm_it2str(it->it_op),
                        it->it_status);
-        }
 
-        if (rc < 0 || it->it_op == IT_LOOKUP)
+        if (it->it_op == IT_LOOKUP)
                 ll_intent_release(dentry, it);
 
         RETURN(rc);
@@ -395,7 +402,7 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
                 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
 
                 /* We are called here with 'de' already on the aliases list. */
-                if (dentry == de) { 
+                if (dentry == de) {
                         CERROR("whoops\n");
                         continue;
                 }
@@ -418,6 +425,7 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
                 d_rehash(dentry);
                 atomic_inc(&dentry->d_count);
                 iput(inode);
+                dentry->d_flags &= ~DCACHE_LUSTRE_INVALID;
                 return dentry;
         }
 
@@ -434,7 +442,7 @@ lookup2_finish(int flag, struct ptlrpc_request *request, struct dentry **de,
         struct inode *inode = NULL;
         struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lmm = NULL};
 
-        if (flag == LL_LOOKUP_POSITIVE) {
+        if (!(flag & LL_LOOKUP_NEGATIVE)) {
                 ENTRY;
                 lic.lic_body = lustre_msg_buf(request->rq_repmsg, offset);
 
@@ -460,10 +468,8 @@ lookup2_finish(int flag, struct ptlrpc_request *request, struct dentry **de,
                 /* We asked for a lock on the directory, and may have been
                  * granted a lock on the inode.  Just in case, fixup the data
                  * pointer. */
-                ldlm_lock_set_data((struct lustre_handle *)it->it_lock_handle,
-                                   inode, sizeof(*inode));
-
-                EXIT;
+                mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle,
+                                   inode);
         } else {
                 ENTRY;
         }
@@ -471,9 +477,7 @@ lookup2_finish(int flag, struct ptlrpc_request *request, struct dentry **de,
         ptlrpc_req_finished(request);
 
         dentry->d_op = &ll_d_ops;
-        if (ll_d2d(dentry) == NULL) {
-                ll_set_dd(dentry);
-        }
+        ll_set_dd(dentry);
 
         if (dentry == saved)
                 d_add(dentry, inode);
@@ -488,9 +492,12 @@ static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
         int rc;
         ENTRY;
 
+        if (it && it->it_op == IT_TRUNC)
+                it->it_op = IT_SETATTR;
+
         rc = ll_intent_lock(parent, &dentry, it, lookup2_finish);
         if (rc < 0) {
-                CERROR("ll_intent_lock: %d\n", rc);
+                CDEBUG(D_INFO, "ll_intent_lock: %d\n", rc);
                 RETURN(ERR_PTR(rc));
         }
 
@@ -500,6 +507,7 @@ static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
                 RETURN(dentry);
 }
 
+/* We depend on "mode" being set with the proper file type/umask by now */
 static struct inode *ll_create_node(struct inode *dir, const char *name,
                                     int namelen, const void *data, int datalen,
                                     int mode, __u64 extra,
@@ -514,12 +522,6 @@ static struct inode *ll_create_node(struct inode *dir, const char *name,
         ENTRY;
 
         if (it && it->it_disposition) {
-                int rc = it->it_status;
-                if (rc) {
-                        CERROR("error creating MDS inode for %*s: rc = %d\n",
-                               namelen, name, rc);
-                        RETURN(ERR_PTR(rc));
-                }
                 ll_invalidate_inode_pages(dir);
                 request = it->it_data;
                 body = lustre_msg_buf(request->rq_repmsg, 1);
@@ -567,8 +569,8 @@ static struct inode *ll_create_node(struct inode *dir, const char *name,
                 /* We asked for a lock on the directory, but were
                  * granted a lock on the inode.  Since we finally have
                  * an inode pointer, stuff it in the lock. */
-                ldlm_lock_set_data((struct lustre_handle *)it->it_lock_handle,
-                                   inode, sizeof(*inode));
+                mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle,
+                                   inode);
         }
 
         EXIT;
@@ -582,47 +584,63 @@ static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode,
 {
         struct ptlrpc_request *request = NULL;
         struct ll_sb_info *sbi = ll_i2sbi(dir);
+        struct mds_body *body;
+        struct lov_stripe_md *lsm = NULL;
+        struct lustre_handle lockh;
+        struct lookup_intent it = { .it_op = IT_UNLINK };
+        struct obdo *oa;
         int err;
-
-        ENTRY;
-
-        err = mdc_unlink(&sbi->ll_mdc_conn, dir, child, mode, name, len,
-                         &request);
-        ptlrpc_req_finished(request);
-
-        RETURN(err);
-}
-
-int ll_mdc_link(struct dentry *src, struct inode *dir,
-                const char *name, int len)
-{
-        struct ptlrpc_request *request = NULL;
-        int err;
-        struct ll_sb_info *sbi = ll_i2sbi(dir);
-
+        struct mdc_unlink_data data;
         ENTRY;
 
-        err = mdc_link(&sbi->ll_mdc_conn, src, dir, name, len, &request);
-        ptlrpc_req_finished(request);
-
-        RETURN(err);
-}
-
-int ll_mdc_rename(struct inode *src, struct inode *tgt,
-                  struct dentry *old, struct dentry *new)
-{
-        struct ptlrpc_request *request = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(src);
-        int err;
-
-        ENTRY;
+        data.unl_dir = dir;
+        data.unl_de = child;
+        data.unl_mode = mode;
+        data.unl_name = name;
+        data.unl_len = len;
+
+        err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_EX, dir,
+                         NULL, &lockh, NULL, 0, &data, sizeof(data));
+        mdc_put_rpc_lock(&mdc_rpc_lock, &it); 
+        request = (struct ptlrpc_request *)it.it_data;
+        if (err < 0)
+                GOTO(out, err);
+        if (it.it_status)
+                GOTO(out, err = it.it_status);
+        err = 0;
+
+        body = lustre_msg_buf(request->rq_repmsg, 1);
+        LASSERT(body != NULL);
+        if (!(body->valid & OBD_MD_FLEASIZE))
+                GOTO(out, 0);
+
+        /* The MDS sent back the EA because we unlinked the last reference
+         * to this file.  Use this EA to unlink the objects on the OST */
+        err = obd_unpackmd(ll_i2obdconn(dir), &lsm,
+                           lustre_msg_buf(request->rq_repmsg, 2));
+        if (err < 0)
+                CERROR("obd_unpackmd: %d\n", err);
+
+        oa = obdo_alloc();
+        if (oa == NULL)
+                GOTO(out_unlock, err = -ENOMEM);
+
+        oa->o_id = lsm->lsm_object_id;
+        oa->o_mode = body->mode & S_IFMT;
+        oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
+
+        err = obd_destroy(ll_i2obdconn(dir), oa, lsm, NULL);
+        obdo_free(oa);
+        if (err)
+                CERROR("obd destroy objid 0x"LPX64" error %d\n",
+                       lsm->lsm_object_id, err);
 
-        err = mdc_rename(&sbi->ll_mdc_conn, src, tgt,
-                         old->d_name.name, old->d_name.len,
-                         new->d_name.name, new->d_name.len, &request);
+        obd_free_memmd(ll_i2obdconn(dir), &lsm);
+ out_unlock:
+        ldlm_lock_decref_and_cancel(&lockh, LCK_EX);
+ out:
         ptlrpc_req_finished(request);
-
-        RETURN(err);
+        return err;
 }
 
 /*
@@ -646,24 +664,66 @@ static int ll_create(struct inode *dir, struct dentry *dentry, int mode)
         int rc = 0;
         ENTRY;
 
-        LL_GET_INTENT(dentry, it);
+        it = dentry->d_it;
+
+        rc = ll_it_open_error(IT_OPEN_CREATE, it);
+        if (rc) {
+                LL_GET_INTENT(dentry, it);
+                ptlrpc_req_finished(it->it_data);
+                RETURN(rc);
+        }
 
         inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
                                NULL, 0, mode, 0, it);
 
-        if (IS_ERR(inode))
+        if (IS_ERR(inode)) {
+                LL_GET_INTENT(dentry, it);
                 RETURN(PTR_ERR(inode));
+        }
 
+        /* no directory data updates when intents rule */
         if (it && it->it_disposition) {
                 d_instantiate(dentry, inode);
-        } else {
-                /* no directory data updates when intents rule */
-                rc = ext2_add_nondir(dentry, inode);
+                RETURN(0);
         }
 
+        rc = ext2_add_nondir(dentry, inode);
         RETURN(rc);
 }
 
+static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
+                     int rdev)
+{
+        struct ptlrpc_request *request = NULL;
+        time_t time = CURRENT_TIME;
+        struct ll_sb_info *sbi = ll_i2sbi(dir);
+        int err = -EMLINK;
+        ENTRY;
+
+        if (dir->i_nlink >= EXT2_LINK_MAX)
+                RETURN(err);
+
+        mode &= ~current->fs->umask;
+
+        switch (mode & S_IFMT) {
+        case 0: case S_IFREG:
+                mode |= S_IFREG; /* for mode = 0 case, fallthrough */
+        case S_IFCHR: case S_IFBLK:
+        case S_IFIFO: case S_IFSOCK:
+                err = mdc_create(&sbi->ll_mdc_conn, dir, name, len, NULL, 0,
+                                 mode, current->fsuid, current->fsgid, time,
+                                 rdev, &request);
+                ptlrpc_req_finished(request);
+                break;
+        case S_IFDIR:
+                err = -EPERM;
+                break;
+        default:
+                err = -EINVAL;
+        }
+        RETURN(err);
+}
+
 static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode,
                     int rdev)
 {
@@ -673,6 +733,8 @@ static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode,
 
         LL_GET_INTENT(dentry, it);
 
+        if ((mode & S_IFMT) == 0)
+                mode |= S_IFREG;
         inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
                                NULL, 0, mode, rdev, it);
 
@@ -688,6 +750,25 @@ static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode,
         return rc;
 }
 
+static int ll_symlink2(struct inode *dir, const char *name, int len,
+                       const char *tgt)
+{
+        struct ptlrpc_request *request = NULL;
+        time_t time = CURRENT_TIME;
+        struct ll_sb_info *sbi = ll_i2sbi(dir);
+        int err = -EMLINK;
+        ENTRY;
+
+        if (dir->i_nlink >= EXT2_LINK_MAX)
+                RETURN(err);
+
+        err = mdc_create(&sbi->ll_mdc_conn, dir, name, len,
+                         tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
+                         current->fsuid, current->fsgid, time, 0, &request);
+        ptlrpc_req_finished(request);
+        RETURN(err);
+}
+
 static int ll_symlink(struct inode *dir, struct dentry *dentry,
                       const char *symname)
 {
@@ -726,6 +807,21 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry,
         RETURN(err);
 }
 
+static int ll_link2(struct inode *src, struct inode *dir,
+                    const char *name, int len)
+{
+        struct ptlrpc_request *request = NULL;
+        int err;
+        struct ll_sb_info *sbi = ll_i2sbi(dir);
+
+        ENTRY;
+
+        err = mdc_link(&sbi->ll_mdc_conn, src, dir, name, len, &request);
+        ptlrpc_req_finished(request);
+
+        RETURN(err);
+}
+
 static int ll_link(struct dentry *old_dentry, struct inode * dir,
                    struct dentry *dentry)
 {
@@ -752,8 +848,8 @@ static int ll_link(struct dentry *old_dentry, struct inode * dir,
         if (inode->i_nlink >= EXT2_LINK_MAX)
                 return -EMLINK;
 
-        rc = ll_mdc_link(old_dentry, dir,
-                          dentry->d_name.name, dentry->d_name.len);
+        rc = ll_link2(old_dentry->d_inode, dir,
+                      dentry->d_name.name, dentry->d_name.len);
         if (rc)
                 RETURN(rc);
 
@@ -764,6 +860,26 @@ static int ll_link(struct dentry *old_dentry, struct inode * dir,
         return ext2_add_nondir(dentry, inode);
 }
 
+static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
+{
+        struct ptlrpc_request *request = NULL;
+        time_t time = CURRENT_TIME;
+        struct ll_sb_info *sbi = ll_i2sbi(dir);
+        int err = -EMLINK;
+        ENTRY;
+
+        if (dir->i_nlink >= EXT2_LINK_MAX)
+                RETURN(err);
+
+        mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
+        err = mdc_create(&sbi->ll_mdc_conn, dir, name, len, NULL, 0,
+                         mode, current->fsuid, current->fsgid,
+                         time, 0, &request);
+        ptlrpc_req_finished(request);
+        RETURN(err);
+}
+
+
 static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
         struct lookup_intent *it;
@@ -812,6 +928,24 @@ out_dir:
         goto out;
 }
 
+static int ll_rmdir2(struct inode *dir, const char *name, int len)
+{
+        int rc;
+        ENTRY;
+
+        rc = ll_mdc_unlink(dir, NULL, S_IFDIR, name, len);
+        RETURN(rc);
+}
+
+static int ll_unlink2(struct inode *dir, const char *name, int len)
+{
+        int rc;
+        ENTRY;
+
+        rc = ll_mdc_unlink(dir, NULL, S_IFREG, name, len);
+        RETURN(rc);
+}
+
 static int ll_common_unlink(struct inode *dir, struct dentry *dentry,
                             struct lookup_intent *it, __u32 mode)
 {
@@ -819,6 +953,7 @@ static int ll_common_unlink(struct inode *dir, struct dentry *dentry,
         struct ext2_dir_entry_2 * de;
         struct page * page;
         int rc = 0;
+        ENTRY;
 
         if (it && it->it_disposition) {
                 rc = it->it_status;
@@ -846,6 +981,7 @@ static int ll_common_unlink(struct inode *dir, struct dentry *dentry,
         ll_invalidate_inode_pages(dir);
 
         inode->i_ctime = dir->i_ctime;
+        EXIT;
 out_dec:
         ext2_dec_count(inode);
 out:
@@ -855,10 +991,11 @@ out:
 static int ll_unlink(struct inode *dir, struct dentry *dentry)
 {
         struct lookup_intent * it;
+        ENTRY;
 
         LL_GET_INTENT(dentry, it);
 
-        return ll_common_unlink(dir, dentry, it, S_IFREG);
+        RETURN(ll_common_unlink(dir, dentry, it, S_IFREG));
 }
 
 static int ll_rmdir(struct inode *dir, struct dentry *dentry)
@@ -883,6 +1020,24 @@ static int ll_rmdir(struct inode *dir, struct dentry *dentry)
         RETURN(rc);
 }
 
+static int ll_rename2(struct inode *src, struct inode *tgt,
+                      const char *oldname, int oldlen,
+                      const char *newname, int newlen)
+{
+        struct ptlrpc_request *request = NULL;
+        struct ll_sb_info *sbi = ll_i2sbi(src);
+        int err;
+        ENTRY;
+
+        err = mdc_rename(&sbi->ll_mdc_conn, src, tgt,
+                         oldname, oldlen, newname, newlen, &request);
+        ptlrpc_req_finished(request);
+
+        RETURN(err);
+}
+
+
+
 static int ll_rename(struct inode * old_dir, struct dentry * old_dentry,
                      struct inode * new_dir, struct dentry * new_dentry)
 {
@@ -907,7 +1062,9 @@ static int ll_rename(struct inode * old_dir, struct dentry * old_dentry,
                 GOTO(out, err = it->it_status);
         }
 
-        err = ll_mdc_rename(old_dir, new_dir, old_dentry, new_dentry);
+        err = ll_rename2(old_dir, new_dir,
+                         old_dentry->d_name.name, old_dentry->d_name.len,
+                         new_dentry->d_name.name, new_dentry->d_name.len);
         if (err)
                 goto out;
 
@@ -977,15 +1134,24 @@ out:
         return err;
 }
 
+extern int ll_inode_revalidate(struct dentry *dentry);
 struct inode_operations ll_dir_inode_operations = {
-        create:         ll_create,
-        lookup2:        ll_lookup2,
-        link:           ll_link,
-        unlink:         ll_unlink,
-        symlink:        ll_symlink,
-        mkdir:          ll_mkdir,
-        rmdir:          ll_rmdir,
-        mknod:          ll_mknod,
-        rename:         ll_rename,
-        setattr:        ll_setattr
+        create:          ll_create,
+        lookup2:         ll_lookup2,
+        link:            ll_link,
+        link2:           ll_link2,
+        unlink:          ll_unlink,
+        unlink2:         ll_unlink2,
+        symlink:         ll_symlink,
+        symlink2:        ll_symlink2,
+        mkdir:           ll_mkdir,
+        mkdir2:          ll_mkdir2,
+        rmdir:           ll_rmdir,
+        rmdir2:          ll_rmdir2,
+        mknod:           ll_mknod,
+        mknod2:          ll_mknod2,
+        rename:          ll_rename,
+        rename2:         ll_rename2,
+        setattr:         ll_setattr,
+        revalidate:      ll_inode_revalidate,
 };
index e1402d1..ab3ff86 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Lustre Lite I/O Page Cache
  *
- *  Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -120,7 +120,7 @@ static int ll_brw(int cmd, struct inode *inode, struct page *page, int create)
         pg.flag = create ? OBD_BRW_CREATE : 0;
 
         set->brw_callback = ll_brw_sync_wait;
-        rc = obd_brw(cmd, ll_i2obdconn(inode), lsm, 1, &pg, set);
+        rc = obd_brw(cmd, ll_i2obdconn(inode), lsm, 1, &pg, set, NULL);
         if (rc) {
                 if (rc != -EIO)
                         CERROR("error from obd_brw: rc = %d\n", rc);
@@ -195,7 +195,7 @@ void ll_truncate(struct inode *inode)
 
         /* truncate == punch from new size to absolute end of file */
         err = obd_punch(ll_i2obdconn(inode), &oa, lsm, inode->i_size,
-                        OBD_OBJECT_EOF);
+                        OBD_OBJECT_EOF, NULL);
         if (err)
                 CERROR("obd_truncate fails (%d) ino %lu\n", err, inode->i_ino);
         else
@@ -232,10 +232,24 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
         if (from == 0 && to == PAGE_SIZE)
                 RETURN(0);
 
-        /* We are writing to a new page, no need to read old data */
+        /* If are writing to a new page, no need to read old data.  If we
+         * haven't already gotten the file size in ll_file_write() since
+         * we got our extent lock, we need to verify it here before we
+         * overwrite some other node's write (bug 445).
+         */
         if (inode->i_size <= offset) {
-                memset(addr, 0, PAGE_SIZE);
-                GOTO(prepare_done, rc=0);
+                if (!S_ISBLK(inode->i_mode) && !(file->f_flags & O_APPEND)) {
+                        struct ll_file_data *fd = file->private_data;
+                        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+
+                        rc = ll_file_size(inode, lsm, &fd->fd_osthandle);
+                        if (rc)
+                                GOTO(prepare_done, rc);
+                }
+                if (inode->i_size <= offset) {
+                        memset(addr, 0, PAGE_SIZE);
+                        GOTO(prepare_done, rc=0);
+                }
         }
 
         rc = ll_brw(OBD_BRW_READ, inode, page, 0);
@@ -244,7 +258,9 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
  prepare_done:
         if (!rc)
                 SetPageUptodate(page);
-
+        else
+                kunmap (page);
+        
         return rc;
 }
 
@@ -307,7 +323,7 @@ static int ll_commit_write(struct file *file, struct page *page,
                pg.off, pg.count);
 
         set->brw_callback = ll_brw_sync_wait;
-        rc = obd_brw(OBD_BRW_WRITE, ll_i2obdconn(inode), md, 1, &pg, set);
+        rc = obd_brw(OBD_BRW_WRITE, ll_i2obdconn(inode), md, 1, &pg, set, NULL);
         if (rc)
                 CERROR("error from obd_brw: rc = %d\n", rc);
         else {
@@ -368,7 +384,7 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
 
         set->brw_callback = ll_brw_sync_wait;
         rc = obd_brw(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
-                     ll_i2obdconn(inode), lsm, bufs_per_obdo, pga, set);
+                     ll_i2obdconn(inode), lsm, bufs_per_obdo, pga, set, NULL);
         if (rc)
                 CERROR("error from obd_brw: rc = %d\n", rc);
         else {
index 73b6ea5..8df74f1 100644 (file)
@@ -3,10 +3,22 @@
  *
  * Lustre Light Super operations
  *
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
  *
- * Copryright (C) 2002 Cluster File Systems, Inc.
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define DEBUG_SUBSYSTEM S_LLITE
@@ -27,12 +39,17 @@ extern struct address_space_operations ll_aops;
 extern struct address_space_operations ll_dir_aops;
 struct super_operations ll_super_operations;
 
+/* /proc/lustre/llite root that tracks llite mount points */
+struct proc_dir_entry *proc_lustre_fs_root;
+/* lproc_llite.c */
+extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+                                       struct super_block *sb,
+                                       char *osc, char *mdc);
+
 extern int ll_recover(struct recovd_data *, int);
 extern int ll_commitcbd_setup(struct ll_sb_info *);
 extern int ll_commitcbd_cleanup(struct ll_sb_info *);
 
-extern void ll_proc_namespace(struct super_block* sb, char* osc, char* mdc);
-
 static char *ll_read_opt(const char *opt, char *data)
 {
         char *value;
@@ -110,6 +127,7 @@ static struct super_block *ll_read_super(struct super_block *sb,
         struct ptlrpc_connection *mdc_conn;
         struct ll_read_inode2_cookie lic;
         class_uuid_t uuid;
+        struct obd_uuid param_uuid;
 
         ENTRY;
 
@@ -120,7 +138,7 @@ static struct super_block *ll_read_super(struct super_block *sb,
         INIT_LIST_HEAD(&sbi->ll_conn_chain);
         INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
         generate_random_uuid(uuid);
-        class_uuid_unparse(uuid, sbi->ll_sb_uuid);
+        class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
 
         sb->u.generic_sbp = sbi;
 
@@ -136,13 +154,14 @@ static struct super_block *ll_read_super(struct super_block *sb,
                 GOTO(out_free, sb = NULL);
         }
 
-        obd = class_uuid2obd(mdc);
+        strncpy(param_uuid.uuid, mdc, sizeof(param_uuid.uuid));
+        obd = class_uuid2obd(&param_uuid);
         if (!obd) {
                 CERROR("MDC %s: not setup or attached\n", mdc);
                 GOTO(out_free, sb = NULL);
         }
 
-        err = obd_connect(&sbi->ll_mdc_conn, obd, sbi->ll_sb_uuid,
+        err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid,
                           ptlrpc_recovd, ll_recover);
         if (err) {
                 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
@@ -152,13 +171,14 @@ static struct super_block *ll_read_super(struct super_block *sb,
         mdc_conn = sbi2mdc(sbi)->cl_import.imp_connection;
         list_add(&mdc_conn->c_sb_chain, &sbi->ll_conn_chain);
 
-        obd = class_uuid2obd(osc);
+        strncpy(param_uuid.uuid, osc, sizeof(param_uuid.uuid));
+        obd = class_uuid2obd(&param_uuid);
         if (!obd) {
                 CERROR("OSC %s: not setup or attached\n", osc);
                 GOTO(out_mdc, sb = NULL);
         }
 
-        err = obd_connect(&sbi->ll_osc_conn, obd, sbi->ll_sb_uuid,
+        err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid,
                           ptlrpc_recovd, ll_recover);
         if (err) {
                 CERROR("cannot connect to %s: rc = %d\n", osc, err);
@@ -215,7 +235,13 @@ static struct super_block *ll_read_super(struct super_block *sb,
 
         ptlrpc_req_finished(request);
         request = NULL;
-        ll_proc_namespace(sb, osc, mdc);
+
+        if (proc_lustre_fs_root) {
+                err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
+                                                  osc, mdc);
+                if (err < 0)
+                        CERROR("could not register mount in /proc/lustre");
+        }
 
 out_dev:
         if (mdc)
@@ -257,8 +283,10 @@ static void ll_put_super(struct super_block *sb)
          */
         mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
 
-        lprocfs_dereg_mnt(sbi->ll_proc_root);
-        sbi->ll_proc_root = NULL;
+        if (sbi->ll_proc_root) {
+                lprocfs_remove(sbi->ll_proc_root);
+                sbi->ll_proc_root = NULL;
+        }
 
         obd_disconnect(&sbi->ll_mdc_conn);
 
@@ -303,13 +331,15 @@ static void ll_clear_inode(struct inode *inode)
                 obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
 
         if (lli->lli_symlink_name) {
-                OBD_FREE(lli->lli_symlink_name,strlen(lli->lli_symlink_name)+1);
+                OBD_FREE(lli->lli_symlink_name,
+                         strlen(lli->lli_symlink_name) + 1);
                 lli->lli_symlink_name = NULL;
         }
 
         EXIT;
 }
 
+#if 0
 static void ll_delete_inode(struct inode *inode)
 {
         ENTRY;
@@ -335,19 +365,21 @@ static void ll_delete_inode(struct inode *inode)
                 oa->o_id = lsm->lsm_object_id;
                 obdo_from_inode(oa, inode, OBD_MD_FLID | OBD_MD_FLTYPE);
 
-                err = obd_destroy(ll_i2obdconn(inode), oa, lsm);
+                err = obd_destroy(ll_i2obdconn(inode), oa, lsm, NULL);
                 obdo_free(oa);
                 if (err)
-                        CDEBUG(D_SUPER, "obd destroy objid "LPX64" error %d\n",
-                               lsm->lsm_object_id, err);
+                        CDEBUG(D_INODE,
+                               "inode %lu obd_destroy objid "LPX64" error %d\n",
+                               inode->i_ino, lsm->lsm_object_id, err);
         }
 out:
         clear_inode(inode);
         EXIT;
 }
+#endif
 
 /* like inode_setattr, but doesn't mark the inode dirty */
-static int ll_attr2inode(struct inode * inode, struct iattr * attr, int trunc)
+static int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc)
 {
         unsigned int ia_valid = attr->ia_valid;
         int error = 0;
@@ -393,11 +425,30 @@ int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc)
          */
         attr->ia_valid &= ~ATTR_SIZE;
         if (attr->ia_valid) {
-                err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, &request);
+                err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, NULL, 0,
+                                  &request);
                 if (err)
-                        CERROR("mdc_setattr fails (%d)\n", err);
+                        CERROR("mdc_setattr fails: err = %d\n", err);
 
                 ptlrpc_req_finished(request);
+                if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
+                        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+                        struct obdo oa;
+                        int err2;
+
+                        CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
+                               inode->i_ino, attr->ia_mtime);
+                        oa.o_id = lsm->lsm_object_id;
+                        oa.o_mode = S_IFREG;
+                        oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
+                        oa.o_mtime = attr->ia_mtime;
+                        err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
+                        if (err2) {
+                                CERROR("obd_setattr fails: rc=%d\n", err);
+                                if (!err)
+                                        err = err2;
+                        }
+                }
         }
 
         RETURN(err);
@@ -461,8 +512,14 @@ out:
         RETURN(rc);
 }
 
-void ll_update_inode(struct inode *inode, struct mds_body *body)
+void ll_update_inode(struct inode *inode, struct mds_body *body,
+                     struct lov_mds_md *lmm)
 {
+        struct ll_inode_info *lli = ll_i2info(inode);
+
+        if (lmm != NULL)
+                obd_unpackmd(ll_i2obdconn(inode), &lli->lli_smd, lmm);
+
         if (body->valid & OBD_MD_FLID)
                 inode->i_ino = body->ino;
         if (body->valid & OBD_MD_FLATIME)
@@ -489,6 +546,8 @@ void ll_update_inode(struct inode *inode, struct mds_body *body)
                 inode->i_rdev = body->rdev;
         if (body->valid & OBD_MD_FLSIZE)
                 inode->i_size = body->size;
+        if (body->valid & OBD_MD_FLBLOCKS)
+                inode->i_blocks = body->blocks;
 }
 
 static void ll_read_inode2(struct inode *inode, void *opaque)
@@ -501,18 +560,16 @@ static void ll_read_inode2(struct inode *inode, void *opaque)
         sema_init(&lli->lli_open_sem, 1);
         atomic_set(&lli->lli_open_count, 0);
 
-        /* core attributes first */
-        ll_update_inode(inode, body);
-
         LASSERT(!lli->lli_smd);
-        if (lic && lic->lic_lmm)
-                obd_unpackmd(ll_i2obdconn(inode), &lli->lli_smd, lic->lic_lmm);
+
+        /* core attributes first */
+        ll_update_inode(inode, body, lic ? lic->lic_lmm : NULL);
 
         /* Get the authoritative file size */
         if (lli->lli_smd && (inode->i_mode & S_IFREG)) {
                 int rc;
                 LASSERT(lli->lli_smd->lsm_object_id != 0);
-                rc = ll_file_size(inode, lli->lli_smd);
+                rc = ll_file_size(inode, lli->lli_smd, NULL);
                 if (rc) {
                         CERROR("ll_file_size: %d\n", rc);
                         ll_clear_inode(inode);
@@ -536,6 +593,7 @@ static void ll_read_inode2(struct inode *inode, void *opaque)
                 inode->i_op = &ll_fast_symlink_inode_operations;
                 EXIT;
         } else {
+                inode->i_op = &ll_special_inode_operations;
                 init_special_inode(inode, inode->i_mode, inode->i_rdev);
                 EXIT;
         }
@@ -549,7 +607,7 @@ static inline void invalidate_request_list(struct list_head *req_list)
                         list_entry(tmp, struct ptlrpc_request, rq_list);
                 CERROR("invalidating req xid "LPU64" op %d to %s:%d\n",
                        req->rq_xid, req->rq_reqmsg->opc,
-                       req->rq_connection->c_remote_uuid,
+                       req->rq_connection->c_remote_uuid.uuid,
                        req->rq_import->imp_client->cli_request_portal);
                 req->rq_flags |= PTL_RPC_FL_ERR;
                 wake_up(&req->rq_wait_for_rep);
@@ -584,7 +642,7 @@ struct super_operations ll_super_operations =
 {
         read_inode2: ll_read_inode2,
         clear_inode: ll_clear_inode,
-        delete_inode: ll_delete_inode,
+        //        delete_inode: ll_delete_inode,
         put_super: ll_put_super,
         statfs: ll_statfs,
         umount_begin: ll_umount_begin
@@ -599,12 +657,16 @@ static struct file_system_type lustre_lite_fs_type = {
 
 static int __init init_lustre_lite(void)
 {
-        printk(KERN_INFO "Lustre Lite 0.5.14, info@clusterfs.com\n");
+        printk(KERN_INFO "Lustre Lite Client File System; "
+               "info@clusterfs.com\n");
         ll_file_data_slab = kmem_cache_create("ll_file_data",
                                               sizeof(struct ll_file_data), 0,
                                               SLAB_HWCACHE_ALIGN, NULL, NULL);
         if (ll_file_data_slab == NULL)
                 return -ENOMEM;
+
+        proc_lustre_fs_root = proc_lustre_root ? proc_mkdir("llite", proc_lustre_root) : NULL;
+
         return register_filesystem(&lustre_lite_fs_type);
 }
 
@@ -612,10 +674,15 @@ static void __exit exit_lustre_lite(void)
 {
         unregister_filesystem(&lustre_lite_fs_type);
         kmem_cache_destroy(ll_file_data_slab);
+
+        if (proc_lustre_fs_root) {
+                lprocfs_remove(proc_lustre_fs_root);
+                proc_lustre_fs_root = NULL;
+        }
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Lite Client File System v1.0");
+MODULE_DESCRIPTION("Lustre Lite Client File System");
 MODULE_LICENSE("GPL");
 
 module_init(init_lustre_lite);
index 557d715..fad4a4d 100644 (file)
@@ -3,10 +3,22 @@
  *
  * Lustre Light Super operations
  *
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
  *
- * Copryright (C) 2002 Cluster File Systems, Inc.
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define DEBUG_SUBSYSTEM S_LLITE
@@ -27,6 +39,13 @@ extern struct address_space_operations ll_aops;
 extern struct address_space_operations ll_dir_aops;
 struct super_operations ll_super_operations;
 
+/* /proc/lustre/llite root that tracks llite mount points */
+struct proc_dir_entry *proc_lustre_fs_root;
+/* lproc_llite.c */
+extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+                                       struct super_block *sb,
+                                       char *osc, char *mdc);
+
 extern int ll_init_inodecache(void);
 extern void ll_destroy_inodecache(void);
 extern int ll_recover(struct recovd_data *, int);
@@ -34,7 +53,7 @@ extern int ll_commitcbd_setup(struct ll_sb_info *);
 extern int ll_commitcbd_cleanup(struct ll_sb_info *);
 int ll_read_inode2(struct inode *inode, void *opaque);
 
-extern void ll_proc_namespace(struct super_block* sb, char* osc, char* mdc)
+extern int ll_proc_namespace(struct super_block* sb, char* osc, char* mdc)
 
 static char *ll_read_opt(const char *opt, char *data)
 {
@@ -216,7 +235,14 @@ static int ll_fill_super(struct super_block *sb, void *data, int silent)
 
         ptlrpc_req_finished(request);
         request = NULL;
-        ll_proc_namespace(sb, osc, mdc)
+
+        if (proc_lustre_fs_root) {
+                err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
+                                                  osc, mdc);
+                if (err < 0)
+                        CERROR("could not register mount in /proc/lustre");
+        }
+
 out_dev:
         if (mdc)
                 OBD_FREE(mdc, strlen(mdc) + 1);
@@ -262,8 +288,10 @@ static void ll_put_super(struct super_block *sb)
          */
         mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
 
-        lprocfs_dereg_mnt(sbi->ll_proc_root);
+        if (sbi->ll_proc_root) {
+                lprocfs_remove(sbi->ll_proc_root);
         sbi->ll_proc_root = NULL;
+        }
 
         obd_disconnect(&sbi->ll_mdc_conn);
         OBD_FREE(sbi, sizeof(*sbi));
@@ -397,9 +425,26 @@ int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc)
         if (attr->ia_valid) {
                 err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, &request);
                 if (err)
-                        CERROR("mdc_setattr fails (%d)\n", err);
+                        CERROR("mdc_setattr fails: err = %d\n", err);
 
                 ptlrpc_req_finished(request);
+                if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
+                        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+                        struct obdo oa;
+                        int err;
+
+                        CDEBUG(D_ERROR, "setting mtime on OST\n");
+                        oa.o_id = lsm->lsm_object_id;
+                        oa.o_mode = S_IFREG;
+                        oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
+                        oa.o_mtime = attr->ia_mtime;
+                        err = obd_setattr(&sbi->ll_osc_conn, &oa, lsm);
+                        if (err) {
+                                CERROR("obd_setattr fails: rc=%d\n", err);
+                                if (!rc)
+                                        rc = err;
+                        }
+                }
         }
 
         RETURN(err);
@@ -463,8 +508,14 @@ out:
         RETURN(rc);
 }
 
-void ll_update_inode(struct inode *inode, struct mds_body *body)
+void ll_update_inode(struct inode *inode, struct mds_body *body,
+                     struct lov_mds_md *lmm)
 {
+        struct ll_inode_info *lli = ll_i2info(inode);
+
+        if (lmm != NULL)
+                obd_unpackmd(ll_i2obdconn(inode), &lli->lli_smd, lmm);
+
         if (body->valid & OBD_MD_FLID)
                 inode->i_ino = body->ino;
         if (body->valid & OBD_MD_FLATIME)
@@ -491,6 +542,8 @@ void ll_update_inode(struct inode *inode, struct mds_body *body)
                 inode->i_rdev = to_kdev_t(body->rdev);
         if (body->valid & OBD_MD_FLSIZE)
                 inode->i_size = body->size;
+        if (body->valid & OBD_MD_FLBLOCKS)
+                inode->i_blocks = body->blocks;
 }
 
 int ll_read_inode2(struct inode *inode, void *opaque)
@@ -503,16 +556,14 @@ int ll_read_inode2(struct inode *inode, void *opaque)
 
         sema_init(&lli->lli_open_sem, 1);
 
-        /* core attributes first */
-        ll_update_inode(inode, body);
-
         LASSERT(!lli->lli_smd);
-        if (lic && lic->lic_lmm)
-                obd_unpackmd(ll_i2obdconn(inode), &lli->lli_smd, lic->lic_lmm);
+
+        /* core attributes first */
+        ll_update_inode(inode, body, lic ? lic->lic_lmm : NULL);
 
         /* Get the authoritative file size */
         if (lli->lli_smd && S_ISREG(inode->i_mode)) {
-                rc = ll_file_size(inode, lli->lli_smd);
+                rc = ll_file_size(inode, lli->lli_smd, NULL);
                 if (rc) {
                         CERROR("ll_file_size: %d\n", rc);
                         ll_clear_inode(inode);
@@ -652,7 +703,8 @@ struct file_system_type lustre_lite_fs_type = {
 static int __init init_lustre_lite(void)
 {
         int rc;
-        printk(KERN_INFO "Lustre Lite 0.5.14, info@clusterfs.com\n");
+        printk(KERN_INFO "Lustre Lite Client File System; "
+               "info@clusterfs.com\n");
         rc = ll_init_inodecache();
         if (rc)
                 return -ENOMEM;
@@ -663,6 +715,10 @@ static int __init init_lustre_lite(void)
                 ll_destroy_inodecache();
                 return -ENOMEM;
         }
+
+        proc_lustre_fs_root = proc_lustre_root ?
+                              proc_mkdir("llite", proc_lustre_root) : NULL;
+
         return register_filesystem(&lustre_lite_fs_type);
 }
 
@@ -671,10 +727,14 @@ static void __exit exit_lustre_lite(void)
         unregister_filesystem(&lustre_lite_fs_type);
         ll_destroy_inodecache();
         kmem_cache_destroy(ll_file_data_slab);
+        if (proc_lustre_fs_root) {
+                lprocfs_remove(proc_lustre_fs_root);
+                proc_lustre_fs_root = NULL;
+        }
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Lite Client File System v1.0");
+MODULE_DESCRIPTION("Lustre Lite Client File System");
 MODULE_LICENSE("GPL");
 
 module_init(init_lustre_lite);
index 5be4717..3c9d646 100644 (file)
@@ -101,8 +101,8 @@ static int ll_follow_link(struct dentry *dentry, struct nameidata *nd,
         }
 
         down(&lli->lli_open_sem);
-
         rc = ll_readlink_internal(inode, &request, &symname);
+        up(&lli->lli_open_sem);
         if (rc)
                 GOTO(out, rc);
 
@@ -113,15 +113,16 @@ static int ll_follow_link(struct dentry *dentry, struct nameidata *nd,
 
         rc = vfs_follow_link_it(nd, symname, it);
  out:
-        up(&lli->lli_open_sem);
         ptlrpc_req_finished(request);
 
         RETURN(rc);
 }
 
+extern int ll_inode_revalidate(struct dentry *dentry);
 extern int ll_setattr(struct dentry *de, struct iattr *attr);
 struct inode_operations ll_fast_symlink_inode_operations = {
         readlink:       ll_readlink,
         setattr:        ll_setattr,
-        follow_link2:    ll_follow_link
+        follow_link2:   ll_follow_link,
+        revalidate:     ll_inode_revalidate
 };
index 2320dcc..2070b01 100644 (file)
@@ -8,8 +8,11 @@ DEFS=
 MODULE = lov
 modulefs_DATA = lov.o
 EXTRA_PROGRAMS = lov
-LINX=
+LINX=client.c
 
 lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c $(LINX)
 
+client.c: 
+       test -e client.c || ln -sf $(top_srcdir)/lib/client.c
+
 include $(top_srcdir)/Rules
index 7135743..3e6b2d2 100644 (file)
@@ -1,15 +1,25 @@
  /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  lov/lov.c
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@off.net>
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
  *         Peter Braam <braam@clusterfs.com>
- *         Mike Shaver <shaver@off.net>
+ *         Mike Shaver <shaver@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
  *
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define EXPORT_SYMTAB
@@ -31,8 +41,6 @@
 #include <asm/div64.h>
 #include <linux/lprocfs_status.h>
 
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
 
 static kmem_cache_t *lov_file_cache;
 
@@ -60,16 +68,19 @@ extern int lov_getstripe(struct lustre_handle *conn, struct lov_mds_md *lmmu,
 /* obd methods */
 int lov_attach(struct obd_device *dev, obd_count len, void *data)
 {
-        return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_vars(&lvars);
+        return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
 int lov_detach(struct obd_device *dev)
 {
-        return lprocfs_dereg_obd(dev);
+        return lprocfs_obd_detach(dev);
 }
 
 static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
-                       obd_uuid_t cluuid, struct recovd_obd *recovd,
+                       struct obd_uuid *cluuid, struct recovd_obd *recovd,
                        ptlrpc_recovery_cb_t recover)
 {
         struct ptlrpc_request *req = NULL;
@@ -78,7 +89,9 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
         struct lov_desc *desc = &lov->desc;
         struct obd_export *exp;
         struct lustre_handle mdc_conn;
-        obd_uuid_t *uuidarray;
+        struct obd_uuid lov_mds_uuid = {"LOV_MDS_UUID"};
+        struct obd_uuid uuid;
+        char *tmp;
         int rc, rc2, i;
         ENTRY;
 
@@ -97,7 +110,7 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
         INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
 
         /* retrieve LOV metadata from MDS */
-        rc = obd_connect(&mdc_conn, lov->mdcobd, NULL, recovd, recover);
+        rc = obd_connect(&mdc_conn, lov->mdcobd, &lov_mds_uuid, recovd,recover);
         if (rc) {
                 CERROR("cannot connect to mdc: rc = %d\n", rc);
                 GOTO(out_conn, rc);
@@ -125,14 +138,15 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
         memcpy(desc, lustre_msg_buf(req->rq_repmsg, 0), sizeof(*desc));
         lov_unpackdesc(desc);
 
-        if (req->rq_repmsg->buflens[1] < sizeof(*uuidarray)*desc->ld_tgt_count){
+        if (req->rq_repmsg->buflens[1] < sizeof(uuid.uuid)*desc->ld_tgt_count){
                 CERROR("LOV desc: invalid uuid array returned\n");
                 GOTO(out_conn, rc = -EINVAL);
         }
 
-        if (memcmp(obd->obd_uuid, desc->ld_uuid, sizeof(desc->ld_uuid))) {
+        if (memcmp(obd->obd_uuid.uuid, desc->ld_uuid.uuid,
+                   sizeof(desc->ld_uuid.uuid))) {
                 CERROR("LOV desc: uuid %s not on mds device (%s)\n",
-                       obd->obd_uuid, desc->ld_uuid);
+                       obd->obd_uuid.uuid, desc->ld_uuid.uuid);
                 GOTO(out_conn, rc = -EINVAL);
         }
 
@@ -163,37 +177,40 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
                 GOTO(out_conn, rc = -ENOMEM);
         }
 
-        uuidarray = lustre_msg_buf(req->rq_repmsg, 1);
-        for (i = 0; i < desc->ld_tgt_count; i++)
-                memcpy(lov->tgts[i].uuid, uuidarray[i], sizeof(*uuidarray));
-
+        tmp = lustre_msg_buf(req->rq_repmsg, 1);
         for (i = 0; i < desc->ld_tgt_count; i++) {
-                struct obd_device *tgt = client_tgtuuid2obd(uuidarray[i]);
+                struct obd_device *tgt;
+                struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" };
+
+                strncpy(uuid.uuid, tmp, sizeof(uuid.uuid));
+                memcpy(&lov->tgts[i].uuid, &uuid, sizeof(uuid));
+                tgt = client_tgtuuid2obd(&uuid);
+                tmp += sizeof(uuid.uuid);
 
                 if (!tgt) {
-                        CERROR("Target %s not attached\n", uuidarray[i]);
+                        CERROR("Target %s not attached\n", uuid.uuid);
                         GOTO(out_disc, rc = -EINVAL);
                 }
 
                 if (!(tgt->obd_flags & OBD_SET_UP)) {
-                        CERROR("Target %s not set up\n", uuidarray[i]);
+                        CERROR("Target %s not set up\n", uuid.uuid);
                         GOTO(out_disc, rc = -EINVAL);
                 }
 
-                rc = obd_connect(&lov->tgts[i].conn, tgt, NULL, recovd,
+                rc = obd_connect(&lov->tgts[i].conn, tgt, &lov_osc_uuid, recovd,
                                  recover);
 
                 if (rc) {
-                        CERROR("Target %s connect error %d\n", uuidarray[i],
+                        CERROR("Target %s connect error %d\n", uuid.uuid,
                                rc);
                         GOTO(out_disc, rc);
                 }
-                        
+
                 rc = obd_iocontrol(IOC_OSC_REGISTER_LOV, &lov->tgts[i].conn,
                                     sizeof(struct obd_device *), obd, NULL);
                 if (rc) {
                         CERROR("Target %s REGISTER_LOV error %d\n",
-                               uuidarray[i], rc);
+                               uuid.uuid, rc);
                         GOTO(out_disc, rc);
                 }
 
@@ -212,10 +229,11 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
         while (i-- > 0) {
                 desc->ld_active_tgt_count--;
                 lov->tgts[i].active = 0;
+                memcpy(&uuid, &lov->tgts[i].uuid, sizeof(uuid));
                 rc2 = obd_disconnect(&lov->tgts[i].conn);
                 if (rc2)
-                        CERROR("LOV Target %s disconnect error: rc = %d\n",
-                                uuidarray[i], rc2);
+                        CERROR("error: LOV target %s disconnect on OST idx %d: "
+                               "rc = %d\n", uuid.uuid, i, rc2);
         }
         OBD_FREE(lov->tgts, lov->bufsize);
  out_conn:
@@ -244,7 +262,7 @@ static int lov_disconnect(struct lustre_handle *conn)
                 if (rc) {
                         if (lov->tgts[i].active) {
                                 CERROR("Target %s disconnect error %d\n",
-                                       lov->tgts[i].uuid, rc);
+                                       lov->tgts[i].uuid.uuid, rc);
                         }
                         rc = 0;
                 }
@@ -284,7 +302,7 @@ static int lov_disconnect(struct lustre_handle *conn)
  *  -EBADF   : The UUID is found, but the OBD is the wrong type (!)
  *  -EALREADY: The OSC is already marked (in)active
  */
-static int lov_set_osc_active(struct lov_obd *lov, obd_uuid_t uuid,
+static int lov_set_osc_active(struct lov_obd *lov, struct obd_uuid *uuid,
                               int activate)
 {
         struct obd_device *obd;
@@ -293,13 +311,13 @@ static int lov_set_osc_active(struct lov_obd *lov, obd_uuid_t uuid,
         ENTRY;
 
         CDEBUG(D_INFO, "Searching in lov %p for uuid %s (activate=%d)\n",
-               lov, uuid, activate);
+               lov, uuid->uuid, activate);
 
         spin_lock(&lov->lov_lock);
         for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) {
                 CDEBUG(D_INFO, "lov idx %d is %s conn "LPX64"\n",
-                       i, tgt->uuid, tgt->conn.addr);
-                if (strncmp(uuid, tgt->uuid, sizeof(tgt->uuid)) == 0)
+                       i, tgt->uuid.uuid, tgt->conn.addr);
+                if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof(uuid->uuid)) == 0)
                         break;
         }
 
@@ -313,7 +331,7 @@ static int lov_set_osc_active(struct lov_obd *lov, obd_uuid_t uuid,
         }
 
         CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LOV idx %d\n",
-               obd->obd_name, obd->obd_uuid, obd->obd_minor, obd,
+               obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
                obd->obd_type->typ_name, i);
         if (strcmp(obd->obd_type->typ_name, "osc") != 0) {
                 LBUG();
@@ -359,6 +377,7 @@ static int lov_setup(struct obd_device *obd, obd_count len, void *buf)
 {
         struct obd_ioctl_data *data = buf;
         struct lov_obd *lov = &obd->u.lov;
+        struct obd_uuid uuid;
         int rc = 0;
         ENTRY;
 
@@ -373,9 +392,10 @@ static int lov_setup(struct obd_device *obd, obd_count len, void *buf)
         }
 
         spin_lock_init(&lov->lov_lock);
-        lov->mdcobd = class_uuid2obd(data->ioc_inlbuf1);
+        obd_str2uuid(&uuid, data->ioc_inlbuf1);
+        lov->mdcobd = class_uuid2obd(&uuid);
         if (!lov->mdcobd) {
-                CERROR("LOV %s cannot locate MDC %s\n", obd->obd_uuid,
+                CERROR("LOV %s cannot locate MDC %s\n", obd->obd_uuid.uuid,
                        data->ioc_inlbuf1);
                 rc = -EINVAL;
         }
@@ -401,7 +421,7 @@ static struct lov_file_handles *lov_handle2lfh(struct lustre_handle *handle)
 
 /* the LOV expects oa->o_id to be set to the LOV object id */
 static int lov_create(struct lustre_handle *conn, struct obdo *oa,
-                      struct lov_stripe_md **ea)
+                      struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
@@ -448,9 +468,9 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
         if (!*ea || lsm->lsm_stripe_offset >= ost_count) {
                 int mult = lsm->lsm_object_id * lsm->lsm_stripe_count;
                 int stripe_offset = mult % ost_count;
-                int sub_offset = (mult / ost_count) % lsm->lsm_stripe_count;
+                int sub_offset = (mult / ost_count);
 
-                ost_idx = stripe_offset + sub_offset;
+                ost_idx = (stripe_offset + sub_offset) % ost_count;
         } else
                 ost_idx = lsm->lsm_stripe_offset;
 
@@ -471,12 +491,17 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
                 /* create data objects with "parent" OA */
                 memcpy(tmp, oa, sizeof(*tmp));
                 /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
-                err = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp);
+                err = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp, oti);
                 if (err) {
                         if (lov->tgts[ost_idx].active) {
                                 CERROR("error creating objid "LPX64" sub-object"
-                                       "on OST idx %d: rc = %d\n",
-                                       oa->o_id, ost_idx, err);
+                                       " on OST idx %d/%d: rc = %d\n", oa->o_id,
+                                       ost_idx, lsm->lsm_stripe_count, err);
+                                if (err > 0) {
+                                        CERROR("obd_create returned invalid "
+                                               "err %d\n", err);
+                                        err = -EIO;
+                                }
                                 if (!rc)
                                         rc = err;
                         }
@@ -525,14 +550,14 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
         return rc;
 
  out_cleanup:
-        while (i-- > 0) {
+        while (obj_alloc-- > 0) {
                 int err;
 
                 --loi;
                 /* destroy already created objects here */
                 memcpy(tmp, oa, sizeof(*tmp));
                 tmp->o_id = loi->loi_id;
-                err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
+                err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL, NULL);
                 if (err)
                         CERROR("Failed to uncreate objid "LPX64" subobj "
                                LPX64" on OST idx %d: rc = %d\n",
@@ -545,7 +570,7 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa,
 }
 
 static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
-                       struct lov_stripe_md *lsm)
+                       struct lov_stripe_md *lsm, struct obd_trans_info *oti)
 {
         struct obdo tmp;
         struct obd_export *export = class_conn2export(conn);
@@ -589,9 +614,9 @@ static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
                 err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, &tmp,
-                                  NULL);
+                                  NULL, NULL);
                 if (err && lov->tgts[loi->loi_ost_idx].active) {
-                        CERROR("Error destroying objid "LPX64" subobj "
+                        CERROR("error: destroying objid "LPX64" subobj "
                                LPX64" on OST idx %d\n: rc = %d",
                                oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
                         if (!rc)
@@ -625,14 +650,9 @@ static obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
 }
 
 static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
-                            struct lov_stripe_md *lsm, int stripeno, int *new)
+                            struct lov_stripe_md *lsm, int stripeno, int *set)
 {
-        if (*new) {
-                obdo_cpy_md(tgt, src, valid);
-                if (valid & OBD_MD_FLSIZE)
-                        tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
-                *new = 0;
-        } else {
+        if (*set) {
                 if (valid & OBD_MD_FLSIZE) {
                         /* this handles sparse files properly */
                         obd_size lov_size;
@@ -647,6 +667,11 @@ static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
                         tgt->o_ctime = src->o_ctime;
                 if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
                         tgt->o_mtime = src->o_mtime;
+        } else {
+                obdo_cpy_md(tgt, src, valid);
+                if (valid & OBD_MD_FLSIZE)
+                        tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
+                *set = 1;
         }
 }
 
@@ -659,7 +684,7 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
         struct lov_oinfo *loi;
         struct lov_file_handles *lfh = NULL;
         int i;
-        int new = 1;
+        int set = 0;
         ENTRY;
 
         if (!lsm) {
@@ -705,36 +730,31 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa,
                 err = obd_getattr(&lov->tgts[loi->loi_ost_idx].conn, &tmp,NULL);
                 if (err) {
                         if (lov->tgts[loi->loi_ost_idx].active) {
-                                CERROR("Error getattr objid "LPX64" subobj "
+                                CERROR("error: getattr objid "LPX64" subobj "
                                        LPX64" on OST idx %d: rc = %d\n",
                                        oa->o_id, loi->loi_id, loi->loi_ost_idx,
                                        err);
                                 RETURN(err);
                         }
                 } else {
-                        lov_merge_attrs(oa, &tmp, tmp.o_valid, lsm, i, &new);
+                        lov_merge_attrs(oa, &tmp, tmp.o_valid, lsm, i, &set);
                 }
         }
 
-        RETURN(0);
+        RETURN(set ? 0 : -EIO);
 }
 
 static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
-                       struct lov_stripe_md *lsm)
+                       struct lov_stripe_md *lsm, struct obd_trans_info *oti)
 {
         struct obdo *tmp;
         struct obd_export *export = class_conn2export(conn);
         struct lov_obd *lov;
         struct lov_oinfo *loi;
         struct lov_file_handles *lfh = NULL;
-        int rc = 0, i;
+        int rc = 0, i, set = 0;
         ENTRY;
 
-        /* Note that this code is currently unused, hence LBUG(), just
-         * to know when/if it is ever revived that it needs cleanups.
-         */
-        LBUG();
-
         if (!lsm) {
                 CERROR("LOV requires striping ea\n");
                 RETURN(-EINVAL);
@@ -752,6 +772,9 @@ static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
         /* size changes should go through punch and not setattr */
         LASSERT(!(oa->o_valid & OBD_MD_FLSIZE));
 
+        /* for now, we only expect mtime updates here */
+        LASSERT(!(oa->o_valid & ~(OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME)));
+
         tmp = obdo_alloc();
         if (!tmp)
                 RETURN(-ENOMEM);
@@ -763,31 +786,43 @@ static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
                 int err;
 
+                if (lov->tgts[loi->loi_ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+                        continue;
+                }
+
                 obdo_cpy_md(tmp, oa, oa->o_valid);
 
                 if (lfh)
                         memcpy(obdo_handle(tmp), &lfh->lfh_handles[i],
-                                sizeof(lfh->lfh_handles[i]));
+                               sizeof(lfh->lfh_handles[i]));
                 else
                         tmp->o_valid &= ~OBD_MD_FLHANDLE;
 
                 tmp->o_id = loi->loi_id;
 
-                err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
+                err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+                                  NULL, NULL);
                 if (err) {
-                        CERROR("Error setattr objid "LPX64" subobj "LPX64
-                               " on OST idx %d: rc = %d\n",
-                               oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
-                        if (!rc)
-                                rc = err;
-                }
+                        if (lov->tgts[loi->loi_ost_idx].active) {
+                                CERROR("error: setattr objid "LPX64" subobj "
+                                       LPX64" on OST idx %d: rc = %d\n",
+                                       oa->o_id, loi->loi_id, loi->loi_ost_idx,
+                                       err);
+                                if (!rc)
+                                        rc = err;
+                        }
+                } else
+                        set = 1;
         }
         obdo_free(tmp);
+        if (!set && !rc)
+                rc = -EIO;
         RETURN(rc);
 }
 
 static int lov_open(struct lustre_handle *conn, struct obdo *oa,
-                    struct lov_stripe_md *lsm)
+                    struct lov_stripe_md *lsm, struct obd_trans_info *oti)
 {
         struct obdo *tmp; /* on the heap here, on the stack in lov_close? */
         struct obd_export *export = class_conn2export(conn);
@@ -795,7 +830,7 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
         struct lov_oinfo *loi;
         struct lov_file_handles *lfh = NULL;
         struct lustre_handle *handle;
-        int new = 1;
+        int set = 0;
         int rc = 0, i;
         ENTRY;
 
@@ -829,7 +864,6 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
         oa->o_size = 0;
         oa->o_blocks = 0;
         for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
-
                 if (lov->tgts[loi->loi_ost_idx].active == 0) {
                         CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
                         continue;
@@ -839,10 +873,11 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                 memcpy(tmp, oa, sizeof(*tmp));
                 tmp->o_id = loi->loi_id;
 
-                rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
+                rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+                              NULL, NULL);
                 if (rc) {
                         if (lov->tgts[loi->loi_ost_idx].active) {
-                                CERROR("Error open objid "LPX64" subobj "LPX64
+                                CERROR("error: open objid "LPX64" subobj "LPX64
                                        " on OST idx %d: rc = %d\n",
                                        oa->o_id, lsm->lsm_oinfo[i].loi_id,
                                        loi->loi_ost_idx, rc);
@@ -851,7 +886,7 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
                         continue;
                 }
 
-                lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &new);
+                lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &set);
 
                 if (tmp->o_valid & OBD_MD_FLHANDLE)
                         memcpy(&lfh->lfh_handles[i], obdo_handle(tmp),
@@ -859,10 +894,10 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
         }
 
         handle = obdo_handle(oa);
-        
+
         lfh->lfh_count = lsm->lsm_stripe_count;
         get_random_bytes(&lfh->lfh_cookie, sizeof(lfh->lfh_cookie));
-        
+
         handle->addr = (__u64)(unsigned long)lfh;
         handle->cookie = lfh->lfh_cookie;
         oa->o_valid |= OBD_MD_FLHANDLE;
@@ -870,6 +905,8 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa,
         list_add(&lfh->lfh_list, &export->exp_lov_data.led_open_head);
         spin_unlock(&export->exp_lov_data.led_lock);
 
+        if (!set && !rc)
+                rc = -EIO;
 out_tmp:
         obdo_free(tmp);
         RETURN(rc);
@@ -886,14 +923,15 @@ out_handles:
                 memcpy(obdo_handle(tmp), &lfh->lfh_handles[i],
                        sizeof(lfh->lfh_handles[i]));
 
-                err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
-                if (err) {
-                        CERROR("Error closing objid "LPX64" subobj "LPX64
-                               " on OST idx %d after open error: rc = %d\n",
+                err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+                                NULL, NULL);
+                if (err && lov->tgts[loi->loi_ost_idx].active) {
+                        CERROR("error: closing objid "LPX64" subobj "LPX64
+                               " on OST idx %d after open error: rc=%d\n",
                                oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
                 }
         }
-       
+
         OBD_FREE(lfh->lfh_handles,
                  lsm->lsm_stripe_count * sizeof(*lfh->lfh_handles));
 out_lfh:
@@ -903,7 +941,7 @@ out_lfh:
 }
 
 static int lov_close(struct lustre_handle *conn, struct obdo *oa,
-                     struct lov_stripe_md *lsm)
+                     struct lov_stripe_md *lsm, struct obd_trans_info *oti)
 {
         struct obdo tmp;
         struct obd_export *export = class_conn2export(conn);
@@ -948,11 +986,14 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa,
                 else
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
 
-                err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, &tmp, NULL);
+                err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, &tmp,
+                                NULL, NULL);
                 if (err) {
-                        CERROR("Error close objid "LPX64" subobj "LPX64
-                               " on OST idx %d: rc = %d\n",
-                               oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
+                        if (lov->tgts[loi->loi_ost_idx].active) {
+                                CERROR("error: close objid "LPX64" subobj "LPX64
+                                       " on OST idx %d: rc = %d\n", oa->o_id,
+                                       loi->loi_id, loi->loi_ost_idx, err);
+                        }
                         if (!rc)
                                 rc = err;
                 }
@@ -1020,7 +1061,7 @@ static int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off)
  * that the punch will affect. */
 static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
                      struct lov_stripe_md *lsm,
-                     obd_off start, obd_off end)
+                     obd_off start, obd_off end, struct obd_trans_info *oti)
 {
         struct obdo tmp;
         struct obd_export *export = class_conn2export(conn);
@@ -1066,11 +1107,13 @@ static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
                         tmp.o_valid &= ~OBD_MD_FLHANDLE;
 
                 err = obd_punch(&lov->tgts[loi->loi_ost_idx].conn, &tmp, NULL,
-                                starti, endi);
+                                starti, endi, NULL);
                 if (err) {
-                        CERROR("Error punch objid "LPX64" subobj "LPX64
-                               " on OST idx %d: rc = %d\n",
-                               oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
+                        if (lov->tgts[loi->loi_ost_idx].active) {
+                                CERROR("error: punch objid "LPX64" subobj "LPX64
+                                       " on OST idx %d: rc = %d\n", oa->o_id,
+                                       loi->loi_id, loi->loi_ost_idx, err);
+                        }
                         if (!rc)
                                 rc = err;
                 }
@@ -1080,7 +1123,8 @@ static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
 
 static inline int lov_brw(int cmd, struct lustre_handle *conn,
                           struct lov_stripe_md *lsm, obd_count oa_bufs,
-                          struct brw_page *pga, struct obd_brw_set *set)
+                          struct brw_page *pga, struct obd_brw_set *set,
+                          struct obd_trans_info *oti)
 {
         struct {
                 int bufct;
@@ -1151,7 +1195,8 @@ static inline int lov_brw(int cmd, struct lustre_handle *conn,
                 if (si->bufct) {
                         LASSERT(shift < oa_bufs);
                         rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn,
-                                     &si->lsm, si->bufct, &ioarr[shift], set);
+                                     &si->lsm, si->bufct, &ioarr[shift],
+                                     set, oti);
                         if (rc)
                                 GOTO(out_ioarr, rc);
                 }
@@ -1274,7 +1319,7 @@ static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm,
                 if (rc)
                         memset(lov_lockhp, 0, sizeof(*lov_lockhp));
                 if (rc && lov->tgts[loi->loi_ost_idx].active) {
-                        CERROR("Error enqueue objid "LPX64" subobj "LPX64
+                        CERROR("error: enqueue objid "LPX64" subobj "LPX64
                                " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
                                loi->loi_id, loi->loi_ost_idx, rc);
                         goto out_locks;
@@ -1296,9 +1341,9 @@ out_locks:
                 submd.lsm_stripe_count = 0;
                 err = obd_cancel(&lov->tgts[loi->loi_ost_idx].conn, &submd,
                                  mode, lov_lockhp);
-                if (err) {
-                        CERROR("Error cancelling objid "LPX64
-                               " on OST idx %d after enqueue error: rc = %d\n",
+                if (err && lov->tgts[loi->loi_ost_idx].active) {
+                        CERROR("error: cancelling objid "LPX64" on OST "
+                               "idx %d after enqueue error: rc = %d\n",
                                loi->loi_id, loi->loi_ost_idx, err);
                 }
         }
@@ -1370,7 +1415,7 @@ static int lov_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
                                  mode, lov_lockhp);
                 if (err) {
                         if (lov->tgts[loi->loi_ost_idx].active) {
-                                CERROR("Error cancel objid "LPX64" subobj "
+                                CERROR("error: cancel objid "LPX64" subobj "
                                        LPX64" on OST idx %d: rc = %d\n",
                                        lsm->lsm_object_id,
                                        loi->loi_id, loi->loi_ost_idx, err);
@@ -1419,7 +1464,7 @@ static int lov_cancel_unused(struct lustre_handle *conn,
                 err = obd_cancel_unused(&lov->tgts[loi->loi_ost_idx].conn,
                                        &submd, flags);
                 if (err && lov->tgts[loi->loi_ost_idx].active) {
-                        CERROR("Error cancel unused objid "LPX64" subobj "LPX64
+                        CERROR("error: cancel unused objid "LPX64" subobj "LPX64
                                " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
                                loi->loi_id, loi->loi_ost_idx, err);
                         if (!rc)
@@ -1456,11 +1501,14 @@ static int lov_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
 
                 err = obd_statfs(&lov->tgts[i].conn, &lov_sfs);
                 if (err) {
-                        CERROR("Error statfs OSC %s i %d: err = %d\n",
-                               lov->tgts[i].uuid, i, err);
-                        if (!rc)
-                                rc = err;
-                        continue; /* XXX or break? - probably OK to continue */
+                        if (lov->tgts[i].active) {
+                                CERROR("error: statfs OSC %s on OST idx %d: "
+                                       "err = %d\n",
+                                       lov->tgts[i].uuid.uuid, i, err);
+                                if (!rc)
+                                        rc = err;
+                        }
+                        continue;
                 }
                 if (!set) {
                         memcpy(osfs, &lov_sfs, sizeof(lov_sfs));
@@ -1480,6 +1528,8 @@ static int lov_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
                          */
                 }
         }
+        if (!set && !rc)
+                rc = -EIO;
         RETURN(rc);
 }
 
@@ -1489,6 +1539,7 @@ static int lov_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
         struct obd_device *obddev = class_conn2obd(conn);
         struct lov_obd *lov = &obddev->u.lov;
         int i, count = lov->desc.ld_tgt_count;
+        struct obd_uuid *uuidp;
         int rc;
 
         ENTRY;
@@ -1496,14 +1547,14 @@ static int lov_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
         switch (cmd) {
         case IOC_LOV_SET_OSC_ACTIVE: {
                 struct obd_ioctl_data *data = karg;
-                rc = lov_set_osc_active(lov,data->ioc_inlbuf1,data->ioc_offset);
+                uuidp = (struct obd_uuid *)data->ioc_inlbuf1;
+                rc = lov_set_osc_active(lov, uuidp, data->ioc_offset);
                 break;
         }
         case OBD_IOC_LOV_GET_CONFIG: {
                 struct obd_ioctl_data *data = karg;
                 struct lov_tgt_desc *tgtdesc;
                 struct lov_desc *desc;
-                obd_uuid_t *uuidp;
                 char *buf = NULL;
 
                 buf = NULL;
@@ -1518,18 +1569,18 @@ static int lov_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                         RETURN(-EINVAL);
                 }
 
-                if (sizeof(*uuidp) * count > data->ioc_inllen2) {
+                if (sizeof(uuidp->uuid) * count > data->ioc_inllen2) {
                         OBD_FREE(buf, len);
                         RETURN(-EINVAL);
                 }
 
                 desc = (struct lov_desc *)data->ioc_inlbuf1;
-                uuidp = (obd_uuid_t *)data->ioc_inlbuf2;
                 memcpy(desc, &(lov->desc), sizeof(*desc));
 
+                uuidp = (struct obd_uuid *)data->ioc_inlbuf2;
                 tgtdesc = lov->tgts;
                 for (i = 0; i < count; i++, uuidp++, tgtdesc++)
-                        memcpy(uuidp, tgtdesc->uuid, sizeof(*uuidp));
+                        obd_str2uuid(uuidp, tgtdesc->uuid.uuid);
 
                 rc = copy_to_user((void *)uarg, buf, len);
                 if (rc)
@@ -1543,7 +1594,8 @@ static int lov_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
         case LL_IOC_LOV_GETSTRIPE:
                 rc = lov_getstripe(conn, karg, uarg);
                 break;
-        default:
+        default: {
+                int set = 0;
                 if (count == 0)
                         RETURN(-ENOTTY);
                 rc = 0;
@@ -1552,9 +1604,20 @@ static int lov_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
 
                         err = obd_iocontrol(cmd, &lov->tgts[i].conn,
                                             len, karg, uarg);
-                        if (err && !rc)
-                                rc = err;
+                        if (err) {
+                                if (lov->tgts[i].active) {
+                                        CERROR("error: iocontrol OSC %s on OST"
+                                               "idx %d: err = %d\n",
+                                               lov->tgts[i].uuid.uuid, i, err);
+                                        if (!rc)
+                                                rc = err;
+                                }
+                        } else
+                                set = 1;
                 }
+                if (!set && !rc)
+                        rc = -EIO;
+        }
         }
 
         RETURN(rc);
@@ -1584,21 +1647,21 @@ struct obd_ops lov_obd_ops = {
         o_iocontrol:   lov_iocontrol
 };
 
-
-#define LOV_VERSION "v0.1"
-
 static int __init lov_init(void)
 {
+        struct lprocfs_static_vars lvars;
         int rc;
-        printk(KERN_INFO "Lustre Logical Object Volume driver " LOV_VERSION
-               ", info@clusterfs.com\n");
+
+        printk(KERN_INFO "Lustre Logical Object Volume driver; "
+               "info@clusterfs.com\n");
         lov_file_cache = kmem_cache_create("ll_lov_file_data",
                                            sizeof(struct lov_file_handles),
                                            0, 0, NULL, NULL);
         if (!lov_file_cache)
                 RETURN(-ENOMEM);
 
-        rc = class_register_type(&lov_obd_ops, status_class_var,
+        lprocfs_init_vars(&lvars);
+        rc = class_register_type(&lov_obd_ops, lvars.module_vars,
                                  OBD_LOV_DEVICENAME);
         RETURN(rc);
 }
@@ -1611,7 +1674,7 @@ static void __exit lov_exit(void)
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver " LOV_VERSION);
+MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver");
 MODULE_LICENSE("GPL");
 
 module_init(lov_init);
index 3d4b4b8..9dc4e03 100644 (file)
@@ -1,7 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc. <adilger@clusterfs.com>
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -260,13 +261,14 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                 RETURN(-EINVAL);
         }
         if (lmm.lmm_stripe_count > lov->desc.ld_tgt_count) {
-                CERROR("stripe count %d more than OST count %d\n",
-                       (int)lmm.lmm_stripe_count, lov->desc.ld_tgt_count);
+                CERROR("stripe count %u more than OST count %d\n",
+                       lmm.lmm_stripe_count, lov->desc.ld_tgt_count);
                 RETURN(-EINVAL);
         }
-        if (lmm.lmm_stripe_offset >= lov->desc.ld_tgt_count) {
-                CERROR("stripe offset %d more than max OST index %d\n",
-                       (int)lmm.lmm_stripe_count, lov->desc.ld_tgt_count);
+        if (lmm.lmm_stripe_offset >= lov->desc.ld_tgt_count &&
+            lmm.lmm_stripe_offset != 0xffffffff) {
+                CERROR("stripe offset %u more than max OST index %d\n",
+                       lmm.lmm_stripe_offset, lov->desc.ld_tgt_count);
                 RETURN(-EINVAL);
         }
         if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) {
@@ -274,7 +276,7 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                        lmm.lmm_stripe_size, PAGE_SIZE);
                 RETURN(-EINVAL);
         }
-        if (lmm.lmm_stripe_size * lmm.lmm_stripe_count > ~0UL) {
+        if ((__u64)lmm.lmm_stripe_size * lmm.lmm_stripe_count > ~0UL) {
                 CERROR("stripe width %ux%u > %lu on 32-bit system\n",
                        lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL);
                 RETURN(-EINVAL);
@@ -288,7 +290,6 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
                 RETURN(-ENOMEM);
 
         lsm->lsm_magic = LOV_MAGIC;
-        /* This is all validated in lov_create() */
         lsm->lsm_stripe_count = stripe_count;
         lsm->lsm_stripe_offset = lmm.lmm_stripe_offset;
         lsm->lsm_stripe_size = lmm.lmm_stripe_size;
index 0812e00..648f80b 100644 (file)
  */
 #define DEBUG_SUBSYSTEM S_CLASS
 
-#include <linux/lustre_lite.h>
 #include <linux/lprocfs_status.h>
+#include <linux/obd_class.h>
 
-/*
- * Common STATUS namespace
- */
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+#else
 
-int rd_uuid(char *page, char **start, off_t off, int count, int *eof,
-            void *data)
-{
-        struct obd_device* dev = (struct obd_device*)data;
-        return snprintf(page, count, "%s\n", dev->obd_uuid);
-}
+DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  obd_self_statfs);
 
 int rd_stripesize(char *page, char **start, off_t off, int count, int *eof,
                   void *data)
 {
-        struct obd_device *dev = (struct obd_device*)data;
+        struct obd_device *dev = (struct obd_device *)data;
         struct lov_desc *desc = &dev->u.lov.desc;
 
+        *eof = 1;
         return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size);
 }
 
 int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof,
                     void *data)
 {
-        struct obd_device* dev = (struct obd_device*)data;
-        struct lov_obd* lov = &dev->u.lov;
+        struct obd_device *dev = (struct obd_device *)data;
+        struct lov_desc *desc = &dev->u.lov.desc;
 
-        return snprintf(page, count, LPU64"\n",
-                        lov->desc.ld_default_stripe_offset);
+        *eof = 1;
+        return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset);
 }
 
 int rd_stripetype(char *page, char **start, off_t off, int count, int *eof,
                   void *data)
 {
         struct obd_device* dev = (struct obd_device*)data;
-        struct lov_obd* lov = &dev->u.lov;
+        struct lov_desc *desc = &dev->u.lov.desc;
 
-        return snprintf(page, count, "%u\n", lov->desc.ld_pattern);
+        *eof = 1;
+        return snprintf(page, count, "%u\n", desc->ld_pattern);
 }
 
 int rd_stripecount(char *page, char **start, off_t off, int count, int *eof,
                    void *data)
 {
-        struct obd_device* dev = (struct obd_device*)data;
-        struct lov_obd* lov = &dev->u.lov;
+        struct obd_device *dev = (struct obd_device *)data;
+        struct lov_desc *desc = &dev->u.lov.desc;
 
-        return snprintf(page, count, "%u\n", lov->desc.ld_default_stripe_count);
+        *eof = 1;
+        return snprintf(page, count, "%u\n", desc->ld_default_stripe_count);
 }
 
 int rd_numobd(char *page, char **start, off_t off, int count, int *eof,
               void *data)
 {
         struct obd_device *dev = (struct obd_device*)data;
-        struct lov_obd *lov = &dev->u.lov;
+        struct lov_desc *desc = &dev->u.lov.desc;
 
-        return snprintf(page, count, "%u\n", lov->desc.ld_tgt_count);
+        *eof = 1;
+        return snprintf(page, count, "%u\n", desc->ld_tgt_count);
 
 }
 
@@ -86,103 +91,64 @@ int rd_activeobd(char *page, char **start, off_t off, int count, int *eof,
                  void *data)
 {
         struct obd_device* dev = (struct obd_device*)data;
-        struct lov_obd* lov = &dev->u.lov;
-
-        return snprintf(page, count, "%u\n", lov->desc.ld_active_tgt_count);
-}
-
-int rd_blksize(char *page, char **start, off_t off, int count, int *eof,
-               void *data)
-{
-        return 0;
-}
-
-
-int rd_kbtotal(char *page, char **start, off_t off, int count, int *eof,
-               void *data)
-{
-        return 0;
-}
-
-
-int rd_kbfree(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
-{
-        return 0;
-}
-
-int rd_filestotal(char *page, char **start, off_t off, int count, int *eof,
-                  void *data)
-{
-        return 0;
-}
-
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof,
-                 void *data)
-{
-        return 0;
-}
+        struct lov_desc *desc = &dev->u.lov.desc;
 
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof,
-                  void *data)
-{
-        return 0;
+        *eof = 1;
+        return snprintf(page, count, "%u\n", desc->ld_active_tgt_count);
 }
 
 int rd_target(char *page, char **start, off_t off, int count, int *eof,
               void *data)
 {
-        struct obd_device* dev = (struct obd_device*)data;
-        int len = 0, i = 0;
-        struct lov_obd* lov = &dev->u.lov;
-        struct lov_tgt_desc* tgts = lov->tgts;
-        while (i < lov->desc.ld_tgt_count) {
-                len += snprintf(&page[len], count - len, "%d: %s %sACTIVE\n",
-                                i, tgts->uuid, tgts->active ? "" : "IN");
-                i++;
-                tgts++;
+        struct obd_device *dev = (struct obd_device*) data;
+        int len = 0, i;
+        struct lov_obd *lov = &dev->u.lov;
+        struct lov_tgt_desc *tgts = lov->tgts;
+
+        for (i = 0; i < lov->desc.ld_tgt_count; i++, tgts++) {
+                int cur;
+                cur = snprintf(&page[len], count, "%d: %s %sACTIVE\n",
+                                i, tgts->uuid.uuid, tgts->active ? "" : "IN");
+                len += cur;
+                count -= cur;
         }
 
+        *eof = 1;
         return len;
 }
 
-int rd_mdc(charpage, char **start, off_t off, int count, int *eof, void *data)
+int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data)
 {
-        struct obd_device* dev = (struct obd_device*)data;
-        int len = 0;
-        struct lov_obd* lov = &dev->u.lov;
-        len += snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid);
-        return len;
-}
+        struct obd_device *dev = (struct obd_device*) data;
+        struct lov_obd *lov = &dev->u.lov;
 
-struct lprocfs_vars status_var_nm_1[] = {
-        {"status/uuid", rd_uuid, 0, 0},
-        {"status/stripesize",rd_stripesize, 0, 0},
-        {"status/stripeoffset",rd_stripeoffset, 0, 0},
-        {"status/stripecount",rd_stripecount, 0, 0},
-        {"status/stripetype", rd_stripetype, 0, 0},
-        {"status/numobd",rd_numobd, 0, 0},
-        {"status/activeobd", rd_activeobd, 0, 0},
-        {"status/filestotal", rd_filestotal, 0, 0},
-        {"status/filesfree", rd_filesfree, 0, 0},
-        {"status/filegroups", rd_filegroups, 0, 0},
-        {"status/blocksize", rd_blksize, 0, 0},
-        {"status/kbytestotal", rd_kbtotal, 0, 0},
-        {"status/kbytesfree", rd_kbfree, 0, 0},
-        {"status/target_obd", rd_target, 0, 0},
-        {"status/target_mdc", rd_mdc, 0, 0},
-        {0}
+        *eof = 1;
+        return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid);
+}
+
+struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",         lprocfs_rd_uuid, 0, 0 },
+        { "stripesize",   rd_stripesize,   0, 0 },
+        { "stripeoffset", rd_stripeoffset, 0, 0 },
+        { "stripecount",  rd_stripecount,  0, 0 },
+        { "stripetype",   rd_stripetype,   0, 0 },
+        { "numobd",       rd_numobd,       0, 0 },
+        { "activeobd",    rd_activeobd,    0, 0 },
+        { "filestotal",   rd_filestotal,   0, 0 },
+        { "filesfree",    rd_filesfree,    0, 0 },
+        { "filegroups",   rd_filegroups,   0, 0 },
+        { "blocksize",    rd_blksize,      0, 0 },
+        { "kbytestotal",  rd_kbytestotal,  0, 0 },
+        { "kbytesfree",   rd_kbytesfree,   0, 0 },
+        { "target_obd",   rd_target,       0, 0 },
+        { "target_mdc",   rd_mdc,          0, 0 },
+        { 0 }
 };
 
-int rd_numrefs(char *page, char **start, off_t off, int count, int *eof,
-               void *data)
-{
-        struct obd_type* class = (struct obd_type*)data;
-
-        return snprintf(page, count, "%d\n", class->typ_refcnt);
-}
-
-struct lprocfs_vars status_class_var[]={
-        {"status/num_refs", rd_numrefs, 0, 0},
-        {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",     lprocfs_rd_numrefs, 0, 0 },
+        { 0 }
 };
+
+#endif /* LPROCFS */
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
index b0fcad6..f5b5b80 100644 (file)
  */
 #define DEBUG_SUBSYSTEM S_CLASS
 
-#include <linux/lustre_lite.h>
+#include <linux/obd_class.h>
 #include <linux/lprocfs_status.h>
 
-
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof, 
-            void *data)
-{
-
-       struct obd_device* temp = (struct obd_device*)data;
-       int len = 0;
-       len += snprintf(page, count, "%s\n",temp->obd_uuid);   
-       return len;
-
-
-}
-int rd_blksize(char* page, char **start, off_t off, int count, int *eof, 
-               void *data)
-{
-        return 0;
-}
-int rd_kbtotal(char* page, char **start, off_t off, int count, int *eof, 
-               void *data)
-{
-        return 0;
-}
-
-int rd_kbfree(char* page, char **start, off_t off, int count, int *eof, 
-              void *data)
-{
-        return 0;
-}
-
-
-int rd_filestotal(char* page, char **start, off_t off, int count, int *eof, 
-                  void *data)
-{
-        return 0;
-}
-
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof, 
-                 void *data)
-{
-        return 0;
-}
-
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof, 
-                  void *data)
-{
-        return 0;
-}
-int rd_conn_uuid(char* page, char **start, off_t off, int count, int *eof, 
-                 void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        struct client_obd* cli = &temp->u.cli;
-        struct obd_import* imp = &cli->cl_import;
-        int len = 0;
-        
-        len += snprintf(page, count, "%s\n",imp->imp_connection->c_remote_uuid);   
-        return len;
-}
-
-int rd_server_uuid(char* page, char **start, off_t off, int count, int *eof, 
-                   void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        struct client_obd* cli = &temp->u.cli;
-        int len = 0;
-        
-        len += snprintf(page, count, "%s\n",cli->cl_target_uuid);   
-        return len;
-}
-
-int rd_server_name(char* page, char **start, off_t off, int count, int *eof, 
-                   void *data)
-{
-        return 0;
-        
-}
-
-struct lprocfs_vars status_var_nm_1[] = {
-        {"status/uuid", rd_uuid, 0, 0},
-        {"status/blocksize",rd_blksize, 0, 0},
-        {"status/kbytestotal",rd_kbtotal, 0, 0},
-        {"status/kbytesfree", rd_kbfree, 0, 0},
-        {"status/filestotal", rd_filestotal, 0, 0},
-        {"status/filesfree", rd_filesfree, 0, 0},
-        {"status/filegroups", rd_filegroups, 0, 0},
-        {"status/mds_server_uuid", rd_server_uuid, 0, 0},
-        {"status/mds_conn_uuid", rd_conn_uuid, 0, 0},
-        {0}
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
+
+DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  obd_self_statfs);
+
+struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",            lprocfs_rd_uuid,        0, 0 },
+        { "blocksize",       rd_blksize,             0, 0 },
+        { "kbytestotal",     rd_kbytestotal,         0, 0 },
+        { "kbytesfree",      rd_kbytesfree,          0, 0 },
+        { "filestotal",      rd_filestotal,          0, 0 },
+        { "filesfree",       rd_filesfree,           0, 0 },
+        { "filegroups",      rd_filegroups,          0, 0 },
+        { "mds_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
+        { "mds_conn_uuid",   lprocfs_rd_conn_uuid,   0, 0 },
+        { 0 }
 };
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof, 
-               void *data)
-{
-        struct obd_type* class = (struct obd_type*)data;
-        int len = 0;
-        len += snprintf(page, count, "%d\n", class->typ_refcnt);
-        return len;
-}
 
-struct lprocfs_vars status_class_var[] = {
-        {"status/num_refs", rd_numrefs, 0, 0},
-        {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",        lprocfs_rd_numrefs,     0, 0 },
+        { 0 }
 };
+
+#endif /* LPROCFS */
+
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
index 63c1ef0..1fbd346 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.sf.net/projects/lustre/
  *
@@ -17,7 +17,6 @@
  *   You should have received a copy of the GNU General Public License
  *   along with Lustre; if not, write to the Free Software
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
  */
 
 #define EXPORT_SYMTAB
 #include <linux/obd_class.h>
 #include <linux/lustre_mds.h>
 
+extern struct semaphore mdc_sem;
+
 static int mdc_reint(struct ptlrpc_request *request, int level)
 {
         int rc;
+        __u32 *opcodeptr = lustre_msg_buf(request->rq_reqmsg, 0);
+
         request->rq_level = level;
 
+        if (!(*opcodeptr == REINT_SETATTR))
+                mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+
         rc = ptlrpc_queue_wait(request);
+        if (!(*opcodeptr == REINT_SETATTR))
+                mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
 
         if (rc) {
-                CERROR("error in handling %d\n", rc);
+                CDEBUG(D_INFO, "error in handling %d\n", rc);
         } else {
                 /* For future resend/replays. */
-                u32 *opcodeptr = lustre_msg_buf(request->rq_reqmsg, 0);
                 *opcodeptr |= REINT_REPLAYING;
         }
         return rc;
 }
 
-int mdc_setattr(struct lustre_handle *conn,
-                struct inode *inode, struct iattr *iattr,
+int mdc_setattr(struct lustre_handle *conn, struct inode *inode,
+                struct iattr *iattr, void *ea, int ealen,
                 struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
         struct mds_rec_setattr *rec;
-        int rc, size = sizeof(*rec);
+        int rc, bufcount = 1, size[2] = {sizeof(*rec), ealen};
         ENTRY;
 
-        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 1, &size,
-                              NULL);
+        if (ealen > 0)
+                bufcount = 2;
+
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount,
+                              size, NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
-        mds_setattr_pack(req, 0, inode, iattr, NULL, 0);
+        /* XXX FIXME bug 249 */
+        req->rq_request_portal = MDS_GETATTR_PORTAL;
+
+        mds_setattr_pack(req, inode, iattr, ea, ealen);
 
-        size = sizeof(struct mds_body);
-        req->rq_replen = lustre_msg_size(1, &size);
+        size[0] = sizeof(struct mds_body);
+        req->rq_replen = lustre_msg_size(1, size);
 
         rc = mdc_reint(req, LUSTRE_CONN_FULL);
         *request = req;
-        if (rc == -ERESTARTSYS )
+        if (rc == -ERESTARTSYS)
                 rc = 0;
 
         RETURN(rc);
@@ -113,7 +126,8 @@ int mdc_create(struct lustre_handle *conn, struct inode *dir,
                 goto resend;
         }
 
-        mdc_store_inode_generation(req, 0, 0);
+        if (!rc)
+                mdc_store_inode_generation(req, 0, 0);
 
         *request = req;
         RETURN(rc);
@@ -123,47 +137,52 @@ int mdc_unlink(struct lustre_handle *conn, struct inode *dir,
                struct inode *child, __u32 mode, const char *name, int namelen,
                struct ptlrpc_request **request)
 {
-        struct ptlrpc_request *req;
+        struct obd_device *obddev = class_conn2obd(conn);
+        struct ptlrpc_request *req = *request;
         int rc, size[2] = {sizeof(struct mds_rec_unlink), namelen + 1};
         ENTRY;
 
-        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size, NULL);
+        LASSERT(req == NULL);
+
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
+                              NULL);
         if (!req)
                 RETURN(-ENOMEM);
-
-        mds_unlink_pack(req, 0, dir, child, mode, name, namelen);
+        *request = req;
 
         size[0] = sizeof(struct mds_body);
-        req->rq_replen = lustre_msg_size(1, size);
+        size[1] = obddev->u.cli.cl_max_mds_easize;
+        req->rq_replen = lustre_msg_size(2, size);
+
+        mds_unlink_pack(req, 0, dir, child, mode, name, namelen);
 
         rc = mdc_reint(req, LUSTRE_CONN_FULL);
-        *request = req;
         if (rc == -ERESTARTSYS)
                 rc = 0;
-
         RETURN(rc);
 }
 
 int mdc_link(struct lustre_handle *conn,
-             struct dentry *src, struct inode *dir, const char *name,
+             struct inode *src, struct inode *dir, const char *name,
              int namelen, struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
         int rc, size[2] = {sizeof(struct mds_rec_link), namelen + 1};
         ENTRY;
 
-        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size, NULL);
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
+                              NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
-        mds_link_pack(req, 0, src->d_inode, dir, name, namelen);
+        mds_link_pack(req, 0, src, dir, name, namelen);
 
         size[0] = sizeof(struct mds_body);
         req->rq_replen = lustre_msg_size(1, size);
 
         rc = mdc_reint(req, LUSTRE_CONN_FULL);
         *request = req;
-        if (rc == -ERESTARTSYS )
+        if (rc == -ERESTARTSYS)
                 rc = 0;
 
         RETURN(rc);
@@ -179,7 +198,8 @@ int mdc_rename(struct lustre_handle *conn,
                            newlen + 1};
         ENTRY;
 
-        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 3, size, NULL);
+        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 3, size,
+                              NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
index a97cfb5..101e63d 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.sf.net/projects/lustre/
  *
  *   You should have received a copy of the GNU General Public License
  *   along with Lustre; if not, write to the Free Software
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
  */
 
 #define EXPORT_SYMTAB
 #define DEBUG_SUBSYSTEM S_MDC
 
 #include <linux/module.h>
+#include <linux/pagemap.h>
 #include <linux/miscdevice.h>
 #include <linux/lustre_mds.h>
 #include <linux/lustre_lite.h>
@@ -34,8 +34,8 @@
 #define REQUEST_MINOR 244
 
 extern int mds_queue_req(struct ptlrpc_request *);
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
+struct mdc_rpc_lock mdc_rpc_lock;
+EXPORT_SYMBOL(mdc_rpc_lock);
 
 /* Helper that implements most of mdc_getstatus and signal_completed_replay. */
 static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
@@ -53,10 +53,12 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
         body = lustre_msg_buf(req->rq_reqmsg, 0);
         req->rq_level = level;
         req->rq_replen = lustre_msg_size(1, &size);
-        
+
         mds_pack_req_body(req);
         req->rq_reqmsg->flags |= msg_flags;
+        mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
         rc = ptlrpc_queue_wait(req);
+        mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
 
         if (!rc) {
                 body = lustre_msg_buf(req->rq_repmsg, 0);
@@ -105,8 +107,9 @@ int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
         size[0] = 512;
         size[1] = 8192;
         req->rq_replen = lustre_msg_size(2, size);
-
+        mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
         rc = ptlrpc_queue_wait(req);
+        mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
 
  out:
         RETURN(rc);
@@ -129,6 +132,9 @@ int mdc_getattr(struct lustre_handle *conn,
         if (!req)
                 GOTO(out, rc = -ENOMEM);
 
+        /* XXX FIXME bug 249 */
+        req->rq_request_portal = MDS_GETATTR_PORTAL;
+
         body = lustre_msg_buf(req->rq_reqmsg, 0);
         ll_ino2fid(&body->fid1, ino, 0, type);
         body->valid = valid;
@@ -143,15 +149,16 @@ int mdc_getattr(struct lustre_handle *conn,
         req->rq_replen = lustre_msg_size(bufcount, size);
         mds_pack_req_body(req);
 
+        mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
         rc = ptlrpc_queue_wait(req);
-
+        mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
         if (!rc) {
                 body = lustre_msg_buf(req->rq_repmsg, 0);
                 mds_unpack_body(body);
                 CDEBUG(D_NET, "mode: %o\n", body->mode);
         }
 
-        EXIT;
+        GOTO(out, rc);
  out:
         *request = req;
         return rc;
@@ -188,8 +195,9 @@ int mdc_getattr_name(struct lustre_handle *conn, struct inode *parent,
         req->rq_replen = lustre_msg_size(bufcount, size);
         mds_pack_req_body(req);
 
+        mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
         rc = ptlrpc_queue_wait(req);
-
+        mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
         if (!rc) {
                 body = lustre_msg_buf(req->rq_repmsg, 0);
                 mds_unpack_body(body);
@@ -201,32 +209,26 @@ int mdc_getattr_name(struct lustre_handle *conn, struct inode *parent,
         return rc;
 }
 
-void d_delete_aliases(struct inode *inode)
+/* This should be called with both the request and the reply still packed. */
+void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
+                                int repoff)
 {
-        struct dentry *dentry = NULL;
-       struct list_head *tmp;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        ENTRY;
-
-       spin_lock(&dcache_lock);
-        list_for_each(tmp, &inode->i_dentry) {
-                dentry = list_entry(tmp, struct dentry, d_alias);
-
-                list_del_init(&dentry->d_hash);
-                list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
-        }
+        struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff);
+        struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff);
 
-        spin_unlock(&dcache_lock);
-        EXIT;
+        memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
+        DEBUG_REQ(D_HA, req, "storing generation %x for ino "LPD64,
+                  rec->cr_replayfid.generation, rec->cr_replayfid.id);
 }
 
 static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
-                            void *data, __u32 data_len, int flag)
+                            void *data, int flag)
 {
         int rc;
         struct lustre_handle lockh;
         ENTRY;
 
+
         switch (flag) {
         case LDLM_CB_BLOCKING:
                 ldlm_lock2handle(lock, &lockh);
@@ -238,15 +240,15 @@ static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                 break;
         case LDLM_CB_CANCELING: {
                 /* Invalidate all dentries associated with this inode */
-                struct inode *inode;
+                struct inode *inode = lock->l_data;
 
                 LASSERT(data != NULL);
-                LASSERT(data_len == sizeof(*inode));
 
                 /* XXX what tells us that 'data' is a valid inode at all?
                  *     we should probably validate the lock handle first?
                  */
-                inode = igrab(data);
+
+                inode = igrab(inode);
 
                 if (inode == NULL)      /* inode->i_state & I_FREEING */
                         break;
@@ -259,7 +261,7 @@ static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                 }
 
                 if (inode != inode->i_sb->s_root->d_inode)
-                        d_delete_aliases(inode);
+                        d_unhash_aliases(inode);
 
                 iput(inode);
                 break;
@@ -271,18 +273,6 @@ static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
         RETURN(0);
 }
 
-/* This should be called with both the request and the reply still packed. */
-void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
-                                int repoff)
-{
-        struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff);
-        struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff);
-
-        memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
-        DEBUG_REQ(D_HA, req, "storing generation %x for ino "LPD64,
-                  rec->cr_replayfid.generation, rec->cr_replayfid.id);
-}
-
 /* We always reserve enough space in the reply packet for a stripe MD, because
  * we don't know in advance the file type.
  *
@@ -295,12 +285,14 @@ int mdc_enqueue(struct lustre_handle *conn, int lock_type,
 {
         struct ptlrpc_request *req;
         struct obd_device *obddev = class_conn2obd(conn);
-        __u64 res_id[RES_NAME_SIZE] = {dir->i_ino, (__u64)dir->i_generation};
+        struct ldlm_res_id res_id =
+                { .name = {dir->i_ino, dir->i_generation} };
         int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
         int rc, flags = LDLM_FL_HAS_INTENT;
         int repsize[3] = {sizeof(struct ldlm_reply),
                           sizeof(struct mds_body),
                           obddev->u.cli.cl_max_mds_easize};
+        struct mdc_unlink_data *d = data;
         struct ldlm_reply *dlm_rep;
         struct ldlm_intent *lit;
         struct ldlm_request *lockreq;
@@ -309,79 +301,31 @@ int mdc_enqueue(struct lustre_handle *conn, int lock_type,
         LDLM_DEBUG_NOLOCK("mdsintent %s parent dir %lu",
                           ldlm_it2str(it->it_op), dir->i_ino);
 
-        if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
-                switch (it->it_op) {
-                case IT_MKDIR:
-                        it->it_mode |= S_IFDIR;
-                        break;
-                case (IT_CREAT|IT_OPEN):
-                case IT_CREAT:
-                        it->it_mode |= S_IFREG;
-                        break;
-                case IT_SYMLINK:
-                        it->it_mode |= S_IFLNK;
-                        break;
-                }
+        if (it->it_op & IT_OPEN) {
+                it->it_mode |= S_IFREG;
                 it->it_mode &= ~current->fs->umask;
 
                 size[2] = sizeof(struct mds_rec_create);
                 size[3] = de->d_name.len + 1;
-                size[4] = tgtlen + 1;
-                req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
-                                      size, NULL);
-                if (!req)
-                        RETURN(-ENOMEM);
-
-                /* pack the intent */
-                lit = lustre_msg_buf(req->rq_reqmsg, 1);
-                lit->opc = NTOH__u64((__u64)it->it_op);
-
-                /* pack the intended request */
-                mds_create_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
-                                current->fsgid, CURRENT_TIME, de->d_name.name,
-                                de->d_name.len, tgt, tgtlen);
-                req->rq_replen = lustre_msg_size(3, repsize);
-        } else if (it->it_op == IT_RENAME2) {
-                struct dentry *old_de = it->it_data;
-
-                size[2] = sizeof(struct mds_rec_rename);
-                size[3] = old_de->d_name.len + 1;
-                size[4] = de->d_name.len + 1;
-                req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
-                                      size, NULL);
-                if (!req)
-                        RETURN(-ENOMEM);
-
-                /* pack the intent */
-                lit = lustre_msg_buf(req->rq_reqmsg, 1);
-                lit->opc = NTOH__u64((__u64)it->it_op);
-
-                /* pack the intended request */
-                mds_rename_pack(req, 2, old_de->d_parent->d_inode, dir,
-                                old_de->d_name.name, old_de->d_name.len,
-                                de->d_name.name, de->d_name.len);
-                req->rq_replen = lustre_msg_size(3, repsize);
-        } else if (it->it_op == IT_LINK2) {
-                struct dentry *old_de = it->it_data;
-
-                size[2] = sizeof(struct mds_rec_link);
-                size[3] = de->d_name.len + 1;
                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
                                       size, NULL);
                 if (!req)
                         RETURN(-ENOMEM);
 
+                req->rq_flags |= PTL_RPC_FL_REPLAY;
+
                 /* pack the intent */
                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
                 lit->opc = NTOH__u64((__u64)it->it_op);
 
                 /* pack the intended request */
-                mds_link_pack(req, 2, old_de->d_inode, dir,
-                              de->d_name.name, de->d_name.len);
+                mds_open_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
+                              current->fsgid, CURRENT_TIME, it->it_flags,
+                              de->d_name.name, de->d_name.len, tgt, tgtlen);
                 req->rq_replen = lustre_msg_size(3, repsize);
-        } else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) {
+        } else if (it->it_op & IT_UNLINK) {
                 size[2] = sizeof(struct mds_rec_unlink);
-                size[3] = de->d_name.len + 1;
+                size[3] = d->unl_len + 1;
                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
                                       size, NULL);
                 if (!req)
@@ -392,13 +336,12 @@ int mdc_enqueue(struct lustre_handle *conn, int lock_type,
                 lit->opc = NTOH__u64((__u64)it->it_op);
 
                 /* pack the intended request */
-                mds_unlink_pack(req, 2, dir, NULL,
-                                it->it_op == IT_UNLINK ? S_IFREG : S_IFDIR,
-                                de->d_name.name, de->d_name.len);
-
+                mds_unlink_pack(req, 2, d->unl_dir, 
+                                d->unl_de, d->unl_mode,
+                                d->unl_name, d->unl_len);
                 req->rq_replen = lustre_msg_size(3, repsize);
-        } else if (it->it_op & (IT_GETATTR | IT_RENAME | IT_LINK |
-                   IT_OPEN | IT_SETATTR | IT_LOOKUP | IT_READLINK)) {
+        } else if (it->it_op & (IT_GETATTR| IT_SETATTR | IT_LOOKUP)) {
+                int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
                 size[2] = sizeof(struct mds_body);
                 size[3] = de->d_name.len + 1;
 
@@ -412,8 +355,8 @@ int mdc_enqueue(struct lustre_handle *conn, int lock_type,
                 lit->opc = NTOH__u64((__u64)it->it_op);
 
                 /* pack the intended request */
-                mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
-
+                mds_getattr_pack(req, valid, 2, it->it_flags,  dir,
+                                 de->d_name.name, de->d_name.len);
                 /* get ready for the reply */
                 req->rq_replen = lustre_msg_size(3, repsize);
         } else if (it->it_op == IT_READDIR) {
@@ -424,163 +367,128 @@ int mdc_enqueue(struct lustre_handle *conn, int lock_type,
 
                 /* get ready for the reply */
                 req->rq_replen = lustre_msg_size(1, repsize);
-        } else {
+        }  else {
                 LBUG();
                 RETURN(-EINVAL);
         }
 
+        mdc_get_rpc_lock(&mdc_rpc_lock, it);
         rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id,
                               lock_type, NULL, 0, lock_mode, &flags,
-                              ldlm_completion_ast, mdc_blocking_ast, data,
-                              datalen, lockh);
-
-        if (it->it_op != IT_READDIR) {
-                /* XXX This should become a lustre_msg flag, but for now... */
-                __u32 *opp = lustre_msg_buf(req->rq_reqmsg, 2);
-                *opp |= REINT_REPLAYING;
+                              ldlm_completion_ast, mdc_blocking_ast, dir, NULL,
+                              lockh);
+
+        /* If we successfully created, mark the request so that replay will
+         * do the right thing */
+        if (req->rq_transno) {
+                struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, 2);
+                rec->cr_opcode |= REINT_REPLAYING;
         }
-
-        if (rc == -ENOENT) {
-                /* This can go when we're sure that this can never happen */
-                LBUG();
+        /* Similarly, if we're going to replay this request, we don't want to
+         * actually get a lock, just perform the intent. */
+        if (req->rq_transno || (req->rq_flags & PTL_RPC_FL_REPLAY)) {
+                lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
+                lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
         }
+
+        dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
+
+        /* This can go when we're sure that this can never happen */
+        LASSERT(rc != -ENOENT);
         if (rc == ELDLM_LOCK_ABORTED) {
                 lock_mode = 0;
                 memset(lockh, 0, sizeof(*lockh));
-                /* rc = 0 */
         } else if (rc != 0) {
                 CERROR("ldlm_cli_enqueue: %d\n", rc);
                 RETURN(rc);
-        } else {
-                /* The server almost certainly gave us a lock other than the one
-                 * that we asked for.  If we already have a matching lock, then
-                 * cancel this one--we don't need two. */
+        } else { /* rc = 0 */
                 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
                 struct lustre_handle lockh2;
                 LASSERT(lock);
 
+                /* If the server gave us back a different lock mode, we should
+                 * fix up our variables. */
+                if (lock->l_req_mode != lock_mode) {
+                        ldlm_lock_addref(lockh, lock->l_req_mode);
+                        ldlm_lock_decref(lockh, lock_mode);
+                        lock_mode = lock->l_req_mode;
+                }
+
+                /* The server almost certainly gave us a lock other than the
+                 * one that we asked for.  If we already have a matching lock,
+                 * then cancel this one--we don't need two. */
                 LDLM_DEBUG(lock, "matching against this");
 
                 memcpy(&lockh2, lockh, sizeof(lockh2));
-                if (ldlm_lock_match(NULL, NULL, LDLM_PLAIN, NULL, 0, LCK_NL,
-                                    &lockh2)) {
-                        /* We already have a lock; cancel the old one */
-                        ldlm_lock_decref(lockh, lock_mode);
-                        /* FIXME: bug 563 */
-                        //ldlm_cli_cancel(lockh);
+                if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL,
+                                    LDLM_PLAIN, NULL, 0, LCK_NL, &lockh2)) {
+                        /* We already have a lock; cancel the new one */
+                        ldlm_lock_decref_and_cancel(lockh, lock_mode);
                         memcpy(lockh, &lockh2, sizeof(lockh2));
                 }
                 LDLM_LOCK_PUT(lock);
         }
 
-        /* On replay, we don't want the lock granted. */
-        lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
-        lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
-
-        dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
         it->it_disposition = (int) dlm_rep->lock_policy_res1;
         it->it_status = (int) dlm_rep->lock_policy_res2;
         it->it_lock_mode = lock_mode;
         it->it_data = req;
 
-        RETURN(0);
+        RETURN(rc);
+}
+
+void mdc_lock_set_inode(struct lustre_handle *lockh, struct inode *inode)
+{
+        struct ldlm_lock *lock = ldlm_handle2lock(lockh);
+        ENTRY;
+
+        LASSERT(lock != NULL);
+        lock->l_data = inode;
+        LDLM_LOCK_PUT(lock);
+        EXIT;
 }
 
 int mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
                       int flags)
 {
-        __u64 res_id[RES_NAME_SIZE] = {inode->i_ino, inode->i_generation};
+        struct ldlm_res_id res_id =
+                { .name = {inode->i_ino, inode->i_generation} };
         struct obd_device *obddev = class_conn2obd(conn);
         ENTRY;
-        RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags));
+        RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags));
 }
 
-struct replay_open_data {
-        struct lustre_handle *fh;
-};
-
 static void mdc_replay_open(struct ptlrpc_request *req)
 {
-        int offset;
-        struct replay_open_data *saved;
+        struct lustre_handle old, *file_fh = req->rq_replay_data;
+        struct list_head *tmp;
         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
 
-        if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA)
-                offset = 2;
-        else
-                offset = 1;
-
-        saved = lustre_msg_buf(req->rq_reqmsg, offset);
         mds_unpack_body(body);
+        memcpy(&old, file_fh, sizeof(old));
         CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
-               saved->fh->addr, saved->fh->cookie,
-               body->handle.addr, body->handle.cookie);
-        memcpy(saved->fh, &body->handle, sizeof(body->handle));
+               file_fh->addr, file_fh->cookie, body->handle.addr,
+               body->handle.cookie);
+        memcpy(file_fh, &body->handle, sizeof(body->handle));
+
+        /* A few frames up, ptlrpc_replay holds the lock, so this is safe. */
+        list_for_each(tmp, &req->rq_import->imp_sending_list) {
+                req = list_entry(tmp, struct ptlrpc_request, rq_list);
+                if (req->rq_reqmsg->opc != MDS_CLOSE)
+                        continue;
+                body = lustre_msg_buf(req->rq_reqmsg, 0);
+                if (memcmp(&body->handle, &old, sizeof(old)))
+                        continue;
+
+                DEBUG_REQ(D_HA, req, "updating close body with new fh");
+                memcpy(&body->handle, file_fh, sizeof(*file_fh));
+        }
 }
 
-/* If lmm is non-NULL and lmm_size is non-zero, the stripe MD is stored on
- * the MDS.  Otherwise, we have already read a copy from the MDS (probably
- * during mdc_enqueue() and we do not need to send it to the MDS again.
- *
- * In the future (when we support the non-intent case) we need to be able
- * to read the stripe MD from the MDS here (need to fix mds_open() too).
- */
-int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
-             struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
-             struct ptlrpc_request **request)
+void mdc_set_open_replay_data(struct ll_file_data *fd)
 {
-        struct mds_body *body;
-        struct replay_open_data *replay_data;
-        int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2;
-        struct ptlrpc_request *req;
-        ENTRY;
-
-        if (lmm_size) {
-                bufcount = 3;
-                size[2] = size[1]; /* shuffle the replay data along */
-                size[1] = lmm_size;
-        }
-
-        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_OPEN, bufcount, size,
-                              NULL);
-        if (!req)
-                GOTO(out, rc = -ENOMEM);
-
-        req->rq_flags |= PTL_RPC_FL_REPLAY;
-        body = lustre_msg_buf(req->rq_reqmsg, 0);
-
-        ll_ino2fid(&body->fid1, ino, 0, type);
-        body->flags = HTON__u32(flags);
-        memcpy(&body->handle, fh, sizeof(body->handle));
-
-        if (lmm_size) {
-                body->flags |= HTON__u32(OBD_MD_FLEASIZE);
-                if (lmm) {
-                        CDEBUG(D_INODE, "sending %u bytes MD for ino "LPU64"\n",
-                               lmm_size, ino);
-                        lustre_msg_set_op_flags(req->rq_reqmsg,MDS_OPEN_HAS_EA);
-                        memcpy(lustre_msg_buf(req->rq_reqmsg,1), lmm, lmm_size);
-                }
-        }
-
-        req->rq_replen = lustre_msg_size(1, size);
-
-        rc = ptlrpc_queue_wait(req);
-        if (!rc) {
-                body = lustre_msg_buf(req->rq_repmsg, 0);
-                mds_unpack_body(body);
-                memcpy(fh, &body->handle, sizeof(*fh));
-
-                /* If open is replayed, we need to fix up the fh. */
-                req->rq_replay_cb = mdc_replay_open;
-                replay_data = lustre_msg_buf(req->rq_reqmsg, lmm ? 2 : 1);
-                replay_data->fh = fh;
-        }
-
-        EXIT;
- out:
-        *request = req;
-        return rc;
+        fd->fd_req->rq_replay_cb = mdc_replay_open;
+        fd->fd_req->rq_replay_data = &fd->fd_mdshandle;
 }
 
 int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
@@ -613,12 +521,14 @@ int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
 int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
                  char *addr, struct ptlrpc_request **request)
 {
-        struct ptlrpc_connection *connection = 
+        struct obd_import *imp = class_conn2cliimp(conn);
+        struct ptlrpc_connection *connection =
                 client_conn2cli(conn)->cl_import.imp_connection;
         struct ptlrpc_request *req = NULL;
         struct ptlrpc_bulk_desc *desc = NULL;
         struct ptlrpc_bulk_page *bulk = NULL;
         struct mds_body *body;
+        unsigned long flags;
         int rc, size = sizeof(*body);
         ENTRY;
 
@@ -628,25 +538,30 @@ int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
         if (desc == NULL)
                 GOTO(out, rc = -ENOMEM);
 
-        req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_READPAGE, 1, &size,
-                              NULL);
+        req = ptlrpc_prep_req(imp, MDS_READPAGE, 1, &size, NULL);
         if (!req)
                 GOTO(out2, rc = -ENOMEM);
 
         bulk = ptlrpc_prep_bulk_page(desc);
-        bulk->bp_buflen = PAGE_SIZE;
+        if (bulk == NULL)
+                GOTO(out2, rc = -ENOMEM);
+
+        spin_lock_irqsave(&imp->imp_lock, flags);
+        bulk->bp_xid = ++imp->imp_last_bulk_xid;
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
+        bulk->bp_buflen = PAGE_CACHE_SIZE;
         bulk->bp_buf = addr;
-        bulk->bp_xid = req->rq_xid;
+
         desc->bd_ptl_ev_hdlr = NULL;
         desc->bd_portal = MDS_BULK_PORTAL;
 
-        rc = ptlrpc_register_bulk(desc);
+        rc = ptlrpc_register_bulk_put(desc);
         if (rc) {
                 CERROR("couldn't setup bulk sink: error %d.\n", rc);
                 GOTO(out2, rc);
         }
 
-        mds_readdir_pack(req, offset, ino, type);
+        mds_readdir_pack(req, offset, ino, type, bulk->bp_xid);
 
         req->rq_replen = lustre_msg_size(1, &size);
         rc = ptlrpc_queue_wait(req);
@@ -679,7 +594,9 @@ static int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
 
         req->rq_replen = lustre_msg_size(1, &size);
 
+        mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
         rc = ptlrpc_queue_wait(req);
+        mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
 
         if (rc)
                 GOTO(out, rc);
@@ -695,19 +612,22 @@ out:
 
 static int mdc_attach(struct obd_device *dev, obd_count len, void *data)
 {
-        return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_vars(&lvars);
+        return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
 static int mdc_detach(struct obd_device *dev)
 {
-        return lprocfs_dereg_obd(dev);
+        return lprocfs_obd_detach(dev);
 }
 
 /* Send a mostly-dummy GETSTATUS request and indicate that we're done replay. */
 static int signal_completed_replay(struct obd_import *imp)
 {
         struct ll_fid fid;
-        
+
         return send_getstatus(imp, &fid, LUSTRE_CONN_RECOVD, MSG_LAST_REPLAY);
 }
 
@@ -716,51 +636,63 @@ static int mdc_recover(struct obd_import *imp, int phase)
         int rc;
         unsigned long flags;
         struct ptlrpc_request *req;
+        struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
         ENTRY;
 
         switch(phase) {
             case PTLRPC_RECOVD_PHASE_PREPARE:
-                ldlm_cli_cancel_unused(imp->imp_obd->obd_namespace,
-                                       NULL, LDLM_FL_LOCAL_ONLY);
+                ldlm_cli_cancel_unused(ns, NULL, LDLM_FL_LOCAL_ONLY);
                 RETURN(0);
+
+            case PTLRPC_RECOVD_PHASE_NOTCONN:
+                ldlm_namespace_cleanup(ns, 1);
+                ptlrpc_abort_inflight(imp, 0);
+                /* FALL THROUGH */
             case PTLRPC_RECOVD_PHASE_RECOVER:
         reconnect:
                 rc = ptlrpc_reconnect_import(imp, MDS_CONNECT, &req);
 
-                /* We were still connected, just go about our business. */
-                if (rc == EALREADY)
-                        GOTO(skip_replay, rc);
+                flags = req->rq_repmsg
+                        ? lustre_msg_get_op_flags(req->rq_repmsg)
+                        : 0;
+
+                if (rc == -EBUSY && (flags & MSG_CONNECT_RECOVERING))
+                        CERROR("reconnect denied by recovery; should retry\n");
 
                 if (rc) {
-                        ptlrpc_req_finished(req);
-                        RETURN(rc);
-                }
-                
-                /* We can't replay, which might be a problem. */
-                if (!(lustre_msg_get_flags(req->rq_repmsg) &
-                      MSG_REPLAY_IN_PROGRESS)) {
                         if (phase != PTLRPC_RECOVD_PHASE_NOTCONN) {
-                             CERROR("can't replay, invalidating\n");
-                             ldlm_namespace_cleanup(imp->imp_obd->obd_namespace,
-                                                    1);
-                             ptlrpc_abort_inflight(imp);
+                                CERROR("can't reconnect, invalidating\n");
+                                ldlm_namespace_cleanup(ns, 1);
+                                ptlrpc_abort_inflight(imp, 0);
                         }
-                        goto skip_replay;
-                }
-
-                rc = ptlrpc_replay(imp);
-                if (rc)
-                        RETURN(rc);
-
-                rc = ldlm_replay_locks(imp);
-                if (rc)
+                        ptlrpc_req_finished(req);
                         RETURN(rc);
+                }
 
-                rc = signal_completed_replay(imp);
-                if (rc)
-                        RETURN(rc);
+                if (flags & MSG_CONNECT_RECOVERING) {
+                        /* Replay if they want it. */
+                        DEBUG_REQ(D_HA, req, "MDS wants replay");
+                        rc = ptlrpc_replay(imp);
+                        if (rc)
+                                GOTO(check_rc, rc);
+
+                        rc = ldlm_replay_locks(imp);
+                        if (rc)
+                                GOTO(check_rc, rc);
+
+                        rc = signal_completed_replay(imp);
+                        if (rc)
+                                GOTO(check_rc, rc);
+                } else if (flags & MSG_CONNECT_RECONNECT) {
+                        DEBUG_REQ(D_HA, req, "reconnecting to MDS\n");
+                        /* Nothing else to do here. */
+                } else {
+                        DEBUG_REQ(D_HA, req, "evicted: invalidating\n");
+                        /* Otherwise, clean everything up. */
+                        ldlm_namespace_cleanup(ns, 1);
+                        ptlrpc_abort_inflight(imp, 0);
+                }
 
-        skip_replay:
                 ptlrpc_req_finished(req);
                 spin_lock_irqsave(&imp->imp_lock, flags);
                 imp->imp_level = LUSTRE_CONN_FULL;
@@ -771,14 +703,16 @@ static int mdc_recover(struct obd_import *imp, int phase)
 
                 rc = ptlrpc_resend(imp);
                 if (rc)
-                        RETURN(rc);
+                        GOTO(check_rc, rc);
 
                 RETURN(0);
-
-            case PTLRPC_RECOVD_PHASE_NOTCONN:
-                ldlm_namespace_cleanup(imp->imp_obd->obd_namespace, 1);
-                ptlrpc_abort_inflight(imp);
-                goto reconnect;
+        check_rc:
+                /* If we get disconnected in the middle, recovery has probably
+                 * failed.  Reconnect and find out.
+                 */
+                if (rc == -ENOTCONN)
+                        goto reconnect;
+                RETURN(rc);
 
             default:
                 RETURN(-EINVAL);
@@ -786,7 +720,7 @@ static int mdc_recover(struct obd_import *imp, int phase)
 }
 
 static int mdc_connect(struct lustre_handle *conn, struct obd_device *obd,
-                       obd_uuid_t cluuid, struct recovd_obd *recovd,
+                       struct obd_uuid *cluuid, struct recovd_obd *recovd,
                        ptlrpc_recovery_cb_t recover)
 {
         struct obd_import *imp = &obd->u.cli.cl_import;
@@ -807,7 +741,10 @@ struct obd_ops mdc_obd_ops = {
 
 static int __init ptlrpc_request_init(void)
 {
-        return class_register_type(&mdc_obd_ops, status_class_var,
+        struct lprocfs_static_vars lvars;
+        mdc_init_rpc_lock(&mdc_rpc_lock);
+        lprocfs_init_vars(&lvars);
+        return class_register_type(&mdc_obd_ops, lvars.module_vars,
                                    LUSTRE_MDC_NAME);
 }
 
@@ -816,11 +753,10 @@ static void __exit ptlrpc_request_exit(void)
         class_unregister_type(LUSTRE_MDC_NAME);
 }
 
-MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Metadata Client v1.0");
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Metadata Client");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(d_delete_aliases);
 EXPORT_SYMBOL(mdc_getstatus);
 EXPORT_SYMBOL(mdc_getlovinfo);
 EXPORT_SYMBOL(mdc_enqueue);
@@ -834,7 +770,8 @@ EXPORT_SYMBOL(mdc_link);
 EXPORT_SYMBOL(mdc_readpage);
 EXPORT_SYMBOL(mdc_setattr);
 EXPORT_SYMBOL(mdc_close);
-EXPORT_SYMBOL(mdc_open);
+EXPORT_SYMBOL(mdc_lock_set_inode);
+EXPORT_SYMBOL(mdc_set_open_replay_data);
 
 EXPORT_SYMBOL(mdc_store_inode_generation);
 
index 12f06fc..f789c22 100644 (file)
@@ -10,7 +10,7 @@ MODULE = mds
 modulefs_DATA = mds.o
 EXTRA_PROGRAMS = mds
 
-LINX= mds_updates.c simple.c target.c
+LINX= mds_updates.c mds_open.c simple.c target.c
 
 mds_updates.c: 
        test -e mds_updates.c || ln -sf $(top_srcdir)/lib/mds_updates.c
index bfdad03..e700a7a 100644 (file)
@@ -4,7 +4,7 @@
  *  lustre/mds/handler.c
  *  Lustre Metadata Server (mds) request handler
  *
- *  Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
  *   Author: Peter Braam <braam@clusterfs.com>
  *   Author: Andreas Dilger <adilger@clusterfs.com>
  *   Author: Phil Schwan <phil@clusterfs.com>
 #include <linux/locks.h>
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #include <linux/buffer_head.h>
+#include <linux/workqueue.h>
 #endif
 #include <linux/obd_lov.h>
 #include <linux/lustre_mds.h>
 #include <linux/lustre_fsfilt.h>
 #include <linux/lprocfs_status.h>
 
-static kmem_cache_t *mds_file_cache;
+kmem_cache_t *mds_file_cache;
 
 extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count,
-                           obd_uuid_t *uuidarray);
+                           struct obd_uuid *uuidarray);
 extern int mds_get_lovdesc(struct mds_obd  *obd, struct lov_desc *desc);
 extern void mds_start_transno(struct mds_obd *mds);
 extern int mds_finish_transno(struct mds_obd *mds, void *handle,
                               struct ptlrpc_request *req, int rc);
 static int mds_cleanup(struct obd_device * obddev);
 
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
-
 inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req)
 {
         return &req->rq_export->exp_obd->u.mds;
@@ -73,7 +71,7 @@ static int mds_bulk_timeout(void *data)
 
 /* Assumes caller has already pushed into the kernel filesystem context */
 static int mds_sendpage(struct ptlrpc_request *req, struct file *file,
-                        __u64 offset)
+                        __u64 offset, __u64 xid)
 {
         struct ptlrpc_bulk_desc *desc;
         struct ptlrpc_bulk_page *bulk;
@@ -90,23 +88,26 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file,
         if (bulk == NULL)
                 GOTO(cleanup_bulk, rc = -ENOMEM);
 
-        OBD_ALLOC(buf, PAGE_SIZE);
+        OBD_ALLOC(buf, PAGE_CACHE_SIZE);
         if (buf == NULL)
                 GOTO(cleanup_bulk, rc = -ENOMEM);
 
-        rc = fsfilt_readpage(req->rq_export->exp_obd, file, buf, PAGE_SIZE,
-                             (loff_t *)&offset);
+        CDEBUG(D_EXT2, "reading %lu@"LPU64" from dir %lu (size %llu)\n",
+               PAGE_CACHE_SIZE, offset, file->f_dentry->d_inode->i_ino,
+               file->f_dentry->d_inode->i_size);
+        rc = fsfilt_readpage(req->rq_export->exp_obd, file, buf,
+                             PAGE_CACHE_SIZE, (loff_t *)&offset);
 
-        if (rc != PAGE_SIZE)
+        if (rc != PAGE_CACHE_SIZE)
                 GOTO(cleanup_buf, rc = -EIO);
 
-        bulk->bp_xid = req->rq_xid;
+        bulk->bp_xid = xid;
         bulk->bp_buf = buf;
-        bulk->bp_buflen = PAGE_SIZE;
+        bulk->bp_buflen = PAGE_CACHE_SIZE;
         desc->bd_ptl_ev_hdlr = NULL;
         desc->bd_portal = MDS_BULK_PORTAL;
 
-        rc = ptlrpc_send_bulk(desc);
+        rc = ptlrpc_bulk_put(desc);
         if (rc)
                 GOTO(cleanup_buf, rc);
 
@@ -135,76 +136,26 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file,
         return rc;
 }
 
-/*
- * Look up a named entry in a directory, and get an LDLM lock on it.
- * 'dir' is a inode for which an LDLM lock has already been taken.
- *
- * If we do not need an exclusive or write lock on this entry (e.g.
- * a read lock for attribute lookup only) then we do not hold the
- * directory semaphore on return.  It is up to the caller to know what
- * type of lock it is getting, and clean up appropriately.
- */
-struct dentry *mds_name2locked_dentry(struct obd_device *obd,
-                                      struct dentry *dir, struct vfsmount **mnt,
-                                      char *name, int namelen, int lock_mode,
-                                      struct lustre_handle *lockh,
-                                      int dir_lock_mode)
-{
-        struct dentry *dchild;
-        int flags = 0, rc;
-        __u64 res_id[3] = {0};
-        ENTRY;
-
-        down(&dir->d_inode->i_sem);
-        dchild = lookup_one_len(name, dir, namelen);
-        if (IS_ERR(dchild)) {
-                CERROR("child lookup error %ld\n", PTR_ERR(dchild));
-                up(&dir->d_inode->i_sem);
-                LBUG();
-                RETURN(dchild);
-        }
-        if (dir_lock_mode != LCK_EX && dir_lock_mode != LCK_PW) {
-                up(&dir->d_inode->i_sem);
-                ldlm_lock_decref(lockh, dir_lock_mode);
-        }
-
-        if (lock_mode == 0 || !dchild->d_inode)
-                RETURN(dchild);
-
-        res_id[0] = dchild->d_inode->i_ino;
-        res_id[1] = dchild->d_inode->i_generation;
-        rc = ldlm_match_or_enqueue(NULL, NULL, obd->obd_namespace, NULL,
-                                   res_id, LDLM_PLAIN, NULL, 0, lock_mode,
-                                   &flags, ldlm_completion_ast,
-                                   mds_blocking_ast, NULL, 0, lockh);
-        if (rc != ELDLM_OK) {
-                l_dput(dchild);
-                up(&dir->d_inode->i_sem);
-                RETURN(ERR_PTR(-ENOLCK)); /* XXX translate ldlm code */
-        }
-
-        RETURN(dchild);
-}
-
+/* only valid locked dentries or errors should be returned */
 struct dentry *mds_fid2locked_dentry(struct obd_device *obd, struct ll_fid *fid,
                                      struct vfsmount **mnt, int lock_mode,
                                      struct lustre_handle *lockh)
 {
         struct mds_obd *mds = &obd->u.mds;
         struct dentry *de = mds_fid2dentry(mds, fid, mnt), *retval = de;
+        struct ldlm_res_id res_id = { .name = {0} };
         int flags = 0, rc;
-        __u64 res_id[3] = {0};
         ENTRY;
 
         if (IS_ERR(de))
                 RETURN(de);
 
-        res_id[0] = de->d_inode->i_ino;
-        res_id[1] = de->d_inode->i_generation;
-        rc = ldlm_match_or_enqueue(NULL, NULL, obd->obd_namespace, NULL,
-                                   res_id, LDLM_PLAIN, NULL, 0, lock_mode,
-                                   &flags, ldlm_completion_ast,
-                                   mds_blocking_ast, NULL, 0, lockh);
+        res_id.name[0] = de->d_inode->i_ino;
+        res_id.name[1] = de->d_inode->i_generation;
+        rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+                              res_id, LDLM_PLAIN, NULL, 0, lock_mode,
+                              &flags, ldlm_completion_ast,
+                              mds_blocking_ast, NULL, NULL, lockh);
         if (rc != ELDLM_OK) {
                 l_dput(de);
                 retval = ERR_PTR(-ENOLCK); /* XXX translate ldlm code */
@@ -217,7 +168,11 @@ struct dentry *mds_fid2locked_dentry(struct obd_device *obd, struct ll_fid *fid,
 #define DCACHE_DISCONNECTED DCACHE_NFSD_DISCONNECTED
 #endif
 
+
+
 /* Look up an entry by inode number. */
+/* this function ONLY returns valid dget'd dentries with an initialized inode
+   or errors */
 struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
                               struct vfsmount **mnt)
 {
@@ -277,6 +232,8 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
         return result;
 }
 
+static void mds_abort_recovery(void *data);
+
 /* Establish a connection to the MDS.
  *
  * This will set up an export structure for the client to hold state data
@@ -284,47 +241,25 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
  * on the server, etc.
  */
 static int mds_connect(struct lustre_handle *conn, struct obd_device *obd,
-                       obd_uuid_t cluuid, struct recovd_obd *recovd,
+                       struct obd_uuid *cluuid, struct recovd_obd *recovd,
                        ptlrpc_recovery_cb_t recover)
 {
         struct obd_export *exp;
         struct mds_export_data *med;
         struct mds_client_data *mcd;
-        struct list_head *p;
+        struct mds_obd *mds = &obd->u.mds;
         int rc;
         ENTRY;
 
         if (!conn || !obd || !cluuid)
                 RETURN(-EINVAL);
 
-        /* lctl gets a backstage, all-access pass. */
-        if (!strcmp(cluuid, "OBD_CLASS_UUID"))
-                goto dont_check_exports;
-
-        spin_lock(&obd->obd_dev_lock);
-        list_for_each(p, &obd->obd_exports) {
-                exp = list_entry(p, struct obd_export, exp_obd_chain);
-                mcd = exp->exp_mds_data.med_mcd;
-                if (!mcd) {
-                        CERROR("FYI: NULL mcd - simultaneous connects\n");
-                        continue;
-                }
-                if (!memcmp(cluuid, mcd->mcd_uuid, sizeof mcd->mcd_uuid)) {
-                        spin_unlock(&obd->obd_dev_lock);
-                        LASSERT(exp->exp_obd == obd);
-                        
-                        RETURN(target_handle_reconnect(conn, exp, cluuid));
-                }
-        }
-        spin_unlock(&obd->obd_dev_lock);
-
-        if (obd->u.mds.mds_recoverable_clients != 0) {
-                CERROR("denying connection for new client %s: in recovery\n",
-                       cluuid);
-                RETURN(-EBUSY);
-        }
+        /* Check for aborted recovery. */
+        spin_lock_bh(&mds->mds_processing_task_lock);
+        if (obd->obd_flags & OBD_ABORT_RECOVERY)
+                mds_abort_recovery(mds);
+        spin_unlock_bh(&mds->mds_processing_task_lock);
 
- dont_check_exports:
         /* XXX There is a small race between checking the list and adding a
          * new connection for the same UUID, but the real threat (list
          * corruption when multiple different clients connect) is solved.
@@ -372,13 +307,22 @@ out_export:
 inline int mds_close_mfd(struct mds_file_data *mfd, struct mds_export_data *med)
 {
         struct file *file = mfd->mfd_file;
+        int rc;
+        struct dentry *de = NULL;
         LASSERT(file->private_data == mfd);
 
+        LASSERT(mfd->mfd_servercookie != DEAD_HANDLE_MAGIC);
+
         list_del(&mfd->mfd_list);
         mfd->mfd_servercookie = DEAD_HANDLE_MAGIC;
         kmem_cache_free(mds_file_cache, mfd);
 
-        return filp_close(file, 0);
+        if (file->f_dentry->d_parent)
+                de = dget(file->f_dentry->d_parent);
+        rc = filp_close(file, 0);
+        if (de)
+                l_dput(de);
+        RETURN(rc);
 }
 
 static int mds_disconnect(struct lustre_handle *conn)
@@ -490,7 +434,7 @@ static int mds_getlovinfo(struct ptlrpc_request *req)
         memcpy(desc, &mds->mds_lov_desc, sizeof *desc);
         lov_packdesc(desc);
         tgt_count = le32_to_cpu(desc->ld_tgt_count);
-        if (tgt_count * sizeof(obd_uuid_t) > streq->repbuf) {
+        if (tgt_count * sizeof(struct obd_uuid) > streq->repbuf) {
                 CERROR("too many targets, enlarge client buffers\n");
                 req->rq_status = -ENOSPC;
                 RETURN(0);
@@ -507,7 +451,7 @@ static int mds_getlovinfo(struct ptlrpc_request *req)
 }
 
 int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
-                     void *data, __u32 data_len, int flag)
+                     void *data, int flag)
 {
         int do_ast;
         ENTRY;
@@ -519,6 +463,16 @@ int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 
         /* XXX layering violation!  -phil */
         l_lock(&lock->l_resource->lr_namespace->ns_lock);
+        /* Get this: if mds_blocking_ast is racing with ldlm_intent_policy,
+         * such that mds_blocking_ast is called just before l_i_p takes the
+         * ns_lock, then by the time we get the lock, we might not be the
+         * correct blocking function anymore.  So check, and return early, if
+         * so. */
+        if (lock->l_blocking_ast != mds_blocking_ast) {
+                l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+                RETURN(0);
+        }
+
         lock->l_flags |= LDLM_FL_CBPENDING;
         do_ast = (!lock->l_readers && !lock->l_writers);
         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
@@ -532,25 +486,29 @@ int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                 rc = ldlm_cli_cancel(&lockh);
                 if (rc < 0)
                         CERROR("ldlm_cli_cancel: %d\n", rc);
-        } else
-                LDLM_DEBUG(lock, "Lock still has references, will be"
+        } else {
+                LDLM_DEBUG(lock, "Lock still has references, will be "
                            "cancelled later");
+        }
         RETURN(0);
 }
 
-int mds_pack_md(struct mds_obd *mds, struct ptlrpc_request *req,
+int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg,
                 int offset, struct mds_body *body, struct inode *inode)
 {
+        struct mds_obd *mds = &obd->u.mds;
         struct lov_mds_md *lmm;
-        int lmm_size = req->rq_repmsg->buflens[offset];
+        int lmm_size = msg->buflens[offset];
         int rc;
+        ENTRY;
 
         if (lmm_size == 0) {
-                CDEBUG(D_INFO, "no space reserved for inode %lu MD\n", inode->i_ino);
+                CDEBUG(D_INFO, "no space reserved for inode %lu MD\n",
+                       inode->i_ino);
                 RETURN(0);
         }
 
-        lmm = lustre_msg_buf(req->rq_repmsg, offset);
+        lmm = lustre_msg_buf(msg, offset);
 
         /* I don't really like this, but it is a sanity check on the client
          * MD request.  However, if the client doesn't know how much space
@@ -566,9 +524,9 @@ int mds_pack_md(struct mds_obd *mds, struct ptlrpc_request *req,
          * discarded right after unpacking, and the LOV can figure out the
          * size itself from the ost count.
          */
-        if ((rc = fsfilt_get_md(req->rq_export->exp_obd, inode,
-                                lmm, lmm_size)) < 0) {
-                CDEBUG(D_INFO, "No md for ino %lu: rc = %d\n", inode->i_ino,rc);
+        if ((rc = fsfilt_get_md(obd, inode, lmm, lmm_size)) < 0) {
+                CDEBUG(D_INFO, "No md for ino %lu: rc = %d\n",
+                       inode->i_ino, rc);
         } else if (rc > 0) {
                 body->valid |= OBD_MD_FLEASIZE;
                 rc = 0;
@@ -577,7 +535,7 @@ int mds_pack_md(struct mds_obd *mds, struct ptlrpc_request *req,
         return rc;
 }
 
-static int mds_getattr_internal(struct mds_obd *mds, struct dentry *dentry,
+static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry,
                                 struct ptlrpc_request *req,
                                 struct mds_body *reqbody, int reply_off)
 {
@@ -595,7 +553,8 @@ static int mds_getattr_internal(struct mds_obd *mds, struct dentry *dentry,
         mds_pack_inode2body(body, inode);
 
         if (S_ISREG(inode->i_mode) && reqbody->valid & OBD_MD_FLEASIZE) {
-                rc = mds_pack_md(mds, req, reply_off + 1, body, inode);
+                rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1,
+                                 body, inode);
         } else if (S_ISLNK(inode->i_mode) && reqbody->valid & OBD_MD_LINKNAME) {
                 char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1);
                 int len = req->rq_repmsg->buflens[reply_off + 1];
@@ -664,19 +623,22 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
         return(rc);
 }
 
-static int mds_getattr_name(int offset, struct ptlrpc_request *req)
+static int mds_getattr_name(int offset, struct ptlrpc_request *req,
+                            struct lustre_handle *child_lockh)
 {
+        struct ldlm_intent *it = lustre_msg_buf(req->rq_reqmsg, 1);
+        int lock_mode;
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
         struct obd_run_ctxt saved;
         struct mds_body *body;
         struct dentry *de = NULL, *dchild = NULL;
         struct inode *dir;
-        struct lustre_handle lockh;
-        char *name;
-        int namelen, flags = 0, lock_mode, rc = 0;
         struct obd_ucred uc;
-        __u64 res_id[3] = {0, 0, 0};
+        struct ldlm_res_id child_res_id = { .name = {0} };
+        struct lustre_handle parent_lockh;
+        int namelen, flags = 0, rc = 0;
+        char *name;
         ENTRY;
 
         LASSERT(!strcmp(obd->obd_type->typ_name, "mds"));
@@ -696,56 +658,58 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req)
         uc.ouc_fsuid = body->fsuid;
         uc.ouc_fsgid = body->fsgid;
         uc.ouc_cap = body->capability;
+        uc.ouc_suppgid = body->suppgid;
         push_ctxt(&saved, &mds->mds_ctxt, &uc);
-        de = mds_fid2dentry(mds, &body->fid1, NULL);
-        if (IS_ERR(de)) {
+        /* Step 1: Lookup/lock parent */
+        de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_PR,
+                                   &parent_lockh);
+        if (IS_ERR(de))
                 GOTO(out_pre_de, rc = PTR_ERR(de));
-        }
-
         dir = de->d_inode;
-        CDEBUG(D_INODE, "parent ino %lu, name %*s\n", dir->i_ino,namelen,name);
+        LASSERT(dir);
 
-        lock_mode = LCK_PR;
-        res_id[0] = dir->i_ino;
-        res_id[1] = dir->i_generation;
-
-        rc = ldlm_lock_match(obd->obd_namespace, res_id, LDLM_PLAIN,
-                             NULL, 0, lock_mode, &lockh);
-        if (rc == 0) {
-                LDLM_DEBUG_NOLOCK("enqueue res "LPU64, res_id[0]);
-                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
-                                      res_id, LDLM_PLAIN, NULL, 0, lock_mode,
-                                      &flags, ldlm_completion_ast,
-                                      mds_blocking_ast, NULL, 0, &lockh);
-                if (rc != ELDLM_OK) {
-                        CERROR("lock enqueue: err: %d\n", rc);
-                        GOTO(out_create_de, rc = -EIO);
-                }
-        }
-        ldlm_lock_dump_handle(D_OTHER, &lockh);
+        CDEBUG(D_INODE, "parent ino %lu, name %*s\n", dir->i_ino,namelen,name);
 
-        down(&dir->i_sem);
+        /* Step 2: Lookup child */
         dchild = lookup_one_len(name, de, namelen - 1);
-        up(&dir->i_sem);
         if (IS_ERR(dchild)) {
                 CDEBUG(D_INODE, "child lookup error %ld\n", PTR_ERR(dchild));
-                GOTO(out_create_dchild, rc = PTR_ERR(dchild));
+                GOTO(out_step_1, rc = PTR_ERR(dchild));
         } else if (dchild->d_inode == NULL) {
-                GOTO(out_create_dchild, rc = -ENOENT);
+                GOTO(out_step_2, rc = -ENOENT);
+        }
+
+        /* Step 3: Lock child */
+        if (it->opc == IT_SETATTR)
+                lock_mode = LCK_PW;
+        else
+                lock_mode = LCK_PR;
+        child_res_id.name[0] = dchild->d_inode->i_ino;
+        child_res_id.name[1] = dchild->d_inode->i_generation;
+        rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+                              child_res_id, LDLM_PLAIN, NULL, 0, lock_mode,
+                              &flags, ldlm_completion_ast, mds_blocking_ast,
+                              NULL, NULL, child_lockh);
+        if (rc != ELDLM_OK) {
+                CERROR("ldlm_cli_enqueue: %d\n", rc);
+                GOTO(out_step_2, rc = -EIO);
         }
 
         if (req->rq_repmsg == NULL)
                 mds_getattr_pack_msg(req, dchild->d_inode, offset);
 
-        rc = mds_getattr_internal(mds, dchild, req, body, offset);
-
-        EXIT;
-out_create_dchild:
+        rc = mds_getattr_internal(obd, dchild, req, body, offset);
+        if (rc)
+                GOTO(out_step_3, rc);
+        GOTO(out_step_2, rc); /* returns the lock to the client */
+ out_step_3:
+        ldlm_lock_decref(child_lockh, LCK_PR);
+ out_step_2:
         l_dput(dchild);
-        ldlm_lock_decref(&lockh, lock_mode);
-out_create_de:
+ out_step_1:
+        ldlm_lock_decref(&parent_lockh, LCK_PR);
         l_dput(de);
-out_pre_de:
+ out_pre_de:
         req->rq_status = rc;
         pop_ctxt(&saved, &mds->mds_ctxt, &uc);
         return rc;
@@ -754,6 +718,7 @@ out_pre_de:
 static int mds_getattr(int offset, struct ptlrpc_request *req)
 {
         struct mds_obd *mds = mds_req2mds(req);
+        struct obd_device *obd = req->rq_export->exp_obd;
         struct obd_run_ctxt saved;
         struct dentry *de;
         struct mds_body *body;
@@ -774,10 +739,10 @@ static int mds_getattr(int offset, struct ptlrpc_request *req)
 
         rc = mds_getattr_pack_msg(req, de->d_inode, offset);
 
-        req->rq_status = mds_getattr_internal(mds, de, req, body, 0);
+        req->rq_status = mds_getattr_internal(obd, de, req, body, 0);
 
         l_dput(de);
-        EXIT;
+        GOTO(out_pop, rc);
 out_pop:
         pop_ctxt(&saved, &mds->mds_ctxt, &uc);
         return rc;
@@ -828,6 +793,8 @@ static struct mds_file_data *mds_handle2mfd(struct lustre_handle *handle)
         RETURN(mfd);
 }
 
+#if 0
+
 static int mds_store_md(struct mds_obd *mds, struct ptlrpc_request *req,
                         int offset, struct mds_body *body, struct inode *inode)
 {
@@ -875,105 +842,7 @@ out_ea:
         RETURN(rc);
 }
 
-static int mds_open(struct ptlrpc_request *req)
-{
-        struct mds_obd *mds = mds_req2mds(req);
-        struct mds_body *body;
-        struct mds_export_data *med;
-        struct mds_file_data *mfd;
-        struct dentry *de;
-        struct file *file;
-        struct vfsmount *mnt;
-        __u32 flags;
-        struct list_head *tmp;
-        int rc, size = sizeof(*body);
-        ENTRY;
-
-        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) {
-                CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n");
-                req->rq_status = -ENOMEM;
-                RETURN(-ENOMEM);
-        }
-
-        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
-        if (rc) {
-                CERROR("mds: pack error: rc = %d\n", rc);
-                req->rq_status = rc;
-                RETURN(rc);
-        }
-
-        body = lustre_msg_buf(req->rq_reqmsg, 0);
-
-        /* was this animal open already and the client lost the reply? */
-        /* XXX need some way to detect a reopen, to avoid locked list walks */
-        med = &req->rq_export->exp_mds_data;
-        spin_lock(&med->med_open_lock);
-        list_for_each(tmp, &med->med_open_head) {
-                mfd = list_entry(tmp, typeof(*mfd), mfd_list);
-                if (!memcmp(&mfd->mfd_clienthandle, &body->handle,
-                            sizeof(mfd->mfd_clienthandle)) &&
-                    body->fid1.id == mfd->mfd_file->f_dentry->d_inode->i_ino) {
-                        de = mfd->mfd_file->f_dentry;
-                        spin_unlock(&med->med_open_lock);
-                        CERROR("Re opening "LPD64"\n", body->fid1.id);
-                        GOTO(out_pack, rc = 0);
-                }
-        }
-        spin_unlock(&med->med_open_lock);
-
-        mfd = kmem_cache_alloc(mds_file_cache, GFP_KERNEL);
-        if (!mfd) {
-                CERROR("mds: out of memory\n");
-                req->rq_status = -ENOMEM;
-                RETURN(0);
-        }
-
-        de = mds_fid2dentry(mds, &body->fid1, &mnt);
-        if (IS_ERR(de))
-                GOTO(out_free, rc = PTR_ERR(de));
-
-        /* check if this inode has seen a delayed object creation */
-        if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA) {
-                rc = mds_store_md(mds, req, 1, body, de->d_inode);
-                if (rc) {
-                        l_dput(de);
-                        mntput(mnt);
-                        GOTO(out_free, rc);
-                }
-        }
-
-        flags = body->flags;
-        /* dentry_open does a dput(de) and mntput(mnt) on error */
-        file = dentry_open(de, mnt, flags & ~O_DIRECT);
-        if (IS_ERR(file)) {
-                rc = PTR_ERR(file);
-                GOTO(out_free, 0);
-        }
-
-        file->private_data = mfd;
-        mfd->mfd_file = file;
-        memcpy(&mfd->mfd_clienthandle, &body->handle, sizeof(body->handle));
-        get_random_bytes(&mfd->mfd_servercookie, sizeof(mfd->mfd_servercookie));
-        spin_lock(&med->med_open_lock);
-        list_add(&mfd->mfd_list, &med->med_open_head);
-        spin_unlock(&med->med_open_lock);
-
-out_pack:
-        body = lustre_msg_buf(req->rq_repmsg, 0);
-        mds_pack_inode2fid(&body->fid1, de->d_inode);
-        mds_pack_inode2body(body, de->d_inode);
-        body->handle.addr = (__u64)(unsigned long)mfd;
-        body->handle.cookie = mfd->mfd_servercookie;
-        CDEBUG(D_INODE, "llite file "LPX64": addr %p, cookie "LPX64"\n",
-               mfd->mfd_clienthandle.addr, mfd, mfd->mfd_servercookie);
-        RETURN(0);
-
-out_free:
-        mfd->mfd_servercookie = DEAD_HANDLE_MAGIC;
-        kmem_cache_free(mds_file_cache, mfd);
-        req->rq_status = rc;
-        RETURN(0);
-}
+#endif
 
 static int mds_close(struct ptlrpc_request *req)
 {
@@ -986,7 +855,7 @@ static int mds_close(struct ptlrpc_request *req)
         body = lustre_msg_buf(req->rq_reqmsg, 0);
 
         mfd = mds_handle2mfd(&body->handle);
-        if (!mfd) {
+        if (mfd == NULL) {
                 DEBUG_REQ(D_ERROR, req, "no handle for file close "LPD64
                           ": addr "LPX64", cookie "LPX64"\n",
                           body->fid1.id, body->handle.addr,
@@ -1054,7 +923,8 @@ static int mds_readpage(struct ptlrpc_request *req)
         /* to make this asynchronous make sure that the handling function
            doesn't send a reply when this function completes. Instead a
            callback function would send the reply */
-        rc = mds_sendpage(req, file, body->size);
+        /* body->blocks is actually the xid -phil */
+        rc = mds_sendpage(req, file, body->size, body->blocks);
 
         filp_close(file, 0);
 out_pop:
@@ -1064,70 +934,149 @@ out:
         RETURN(0);
 }
 
-int mds_reint(struct ptlrpc_request *req, int offset)
+int mds_reint(struct ptlrpc_request *req, int offset,
+              struct lustre_handle *lockh)
 {
+        struct mds_update_record *rec; /* 116 bytes on the stack?  no sir! */
         int rc;
-        struct mds_update_record rec;
 
-        rc = mds_update_unpack(req, offset, &rec);
+        OBD_ALLOC(rec, sizeof(*rec));
+        if (rec == NULL)
+                RETURN(-ENOMEM);
+
+        rc = mds_update_unpack(req, offset, rec);
         if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNPACK)) {
                 CERROR("invalid record\n");
-                req->rq_status = -EINVAL;
-                RETURN(0);
+                GOTO(out, req->rq_status = -EINVAL);
         }
         /* rc will be used to interrupt a for loop over multiple records */
-        rc = mds_reint_rec(&rec, offset, req);
+        rc = mds_reint_rec(rec, offset, req, lockh);
+ out:
+        OBD_FREE(rec, sizeof(*rec));
         return rc;
 }
 
 /* forward declaration */
 int mds_handle(struct ptlrpc_request *req);
 
+static void abort_delayed_replies(struct mds_obd *mds)
+{
+        struct ptlrpc_request *req;
+        struct list_head *tmp, *n;
+        list_for_each_safe(tmp, n, &mds->mds_delayed_reply_queue) {
+                req = list_entry(tmp, struct ptlrpc_request, rq_list);
+                DEBUG_REQ(D_ERROR, req, "aborted:");
+                req->rq_status = -ENOTCONN;
+                req->rq_type = PTL_RPC_MSG_ERR;
+                ptlrpc_reply(req->rq_svc, req);
+                list_del(&req->rq_list);
+                OBD_FREE(req, sizeof *req);
+        }
+}
+
+static void mds_abort_recovery(void *data)
+{
+        struct mds_obd *mds = data;
+        struct obd_device *obd = list_entry(mds, struct obd_device, u.mds);
+        CERROR("disconnecting clients and aborting recovery\n");
+        mds->mds_recoverable_clients = 0;
+        obd->obd_flags &= ~(OBD_RECOVERING | OBD_ABORT_RECOVERY);
+        abort_delayed_replies(mds);
+        spin_unlock_bh(&mds->mds_processing_task_lock);
+        class_disconnect_all(obd);
+        spin_lock_bh(&mds->mds_processing_task_lock);
+}
+
+static void mds_recovery_expired(unsigned long castmeharder)
+{
+        struct mds_obd *mds = (struct mds_obd *)castmeharder;
+        struct obd_device *obd = list_entry(mds, struct obd_device, u.mds);
+        CERROR("recovery timed out, aborting\n");
+        spin_lock_bh(&mds->mds_processing_task_lock);
+        obd->obd_flags |= OBD_ABORT_RECOVERY;
+        wake_up(&mds->mds_next_transno_waitq);
+        spin_unlock_bh(&mds->mds_processing_task_lock);
+}
+
+static void reset_recovery_timer(struct mds_obd *mds)
+{
+        CDEBUG(D_ERROR, "timer will expire in %ld seconds\n",
+               MDS_RECOVERY_TIMEOUT / HZ);
+        mod_timer(&mds->mds_recovery_timer, jiffies + MDS_RECOVERY_TIMEOUT);
+}
+
+static void start_recovery_timer(struct mds_obd *mds)
+{
+        mds->mds_recovery_timer.function = mds_recovery_expired;
+        mds->mds_recovery_timer.data = (unsigned long)mds;
+        init_timer(&mds->mds_recovery_timer);
+        reset_recovery_timer(mds);
+}
+
+static void cancel_recovery_timer(struct mds_obd *mds)
+{
+        del_timer(&mds->mds_recovery_timer);
+}
+
 static int check_for_next_transno(struct mds_obd *mds)
 {
         struct ptlrpc_request *req;
+        struct obd_device *obd = list_entry(mds, struct obd_device, u.mds);
         req = list_entry(mds->mds_recovery_queue.next,
                          struct ptlrpc_request, rq_list);
         LASSERT(req->rq_reqmsg->transno >= mds->mds_next_recovery_transno);
-        return req->rq_reqmsg->transno == mds->mds_next_recovery_transno;
+
+        return req->rq_reqmsg->transno == mds->mds_next_recovery_transno ||
+                (obd->obd_flags & OBD_RECOVERING) == 0;
 }
 
 static void process_recovery_queue(struct mds_obd *mds)
 {
         struct ptlrpc_request *req;
+        struct obd_device *obd = list_entry(mds, struct obd_device, u.mds);
+        int aborted = 0;
         ENTRY;
 
         for (;;) {
-                spin_lock(&mds->mds_processing_task_lock);
+                spin_lock_bh(&mds->mds_processing_task_lock);
                 LASSERT(mds->mds_processing_task == current->pid);
                 req = list_entry(mds->mds_recovery_queue.next,
                                  struct ptlrpc_request, rq_list);
 
                 if (req->rq_reqmsg->transno != mds->mds_next_recovery_transno) {
-                        spin_unlock(&mds->mds_processing_task_lock);
+                        spin_unlock_bh(&mds->mds_processing_task_lock);
                         CDEBUG(D_HA, "Waiting for transno "LPD64" (1st is "
                                LPD64")\n",
                                mds->mds_next_recovery_transno,
                                req->rq_reqmsg->transno);
                         wait_event(mds->mds_next_transno_waitq,
                                    check_for_next_transno(mds));
+                        spin_lock_bh(&mds->mds_processing_task_lock);
+                        if (obd->obd_flags & OBD_ABORT_RECOVERY) {
+                                mds_abort_recovery(mds);
+                                aborted = 1;
+                        }
+                        spin_unlock_bh(&mds->mds_processing_task_lock);
+                        if (aborted)
+                                return;
                         continue;
                 }
                 list_del_init(&req->rq_list);
-                spin_unlock(&mds->mds_processing_task_lock);
+                spin_unlock_bh(&mds->mds_processing_task_lock);
 
                 DEBUG_REQ(D_ERROR, req, "processing: ");
                 (void)mds_handle(req);
+                reset_recovery_timer(mds);
                 mds_fsync_super(mds->mds_sb);
                 OBD_FREE(req, sizeof *req);
-                spin_lock(&mds->mds_processing_task_lock);
+                spin_lock_bh(&mds->mds_processing_task_lock);
                 mds->mds_next_recovery_transno++;
                 if (list_empty(&mds->mds_recovery_queue)) {
                         mds->mds_processing_task = 0;
-                        spin_unlock(&mds->mds_processing_task_lock);
+                        spin_unlock_bh(&mds->mds_processing_task_lock);
                         break;
                 }
-                spin_unlock(&mds->mds_processing_task_lock);
+                spin_unlock_bh(&mds->mds_processing_task_lock);
         }
         EXIT;
 }
@@ -1142,16 +1091,16 @@ static int queue_recovery_request(struct ptlrpc_request *req,
 
         if (!transno) {
                 INIT_LIST_HEAD(&req->rq_list);
-                DEBUG_REQ(D_ERROR, req, "not queueing");
+                DEBUG_REQ(D_HA, req, "not queueing");
                 return 1;
         }
 
-        spin_lock(&mds->mds_processing_task_lock);
+        spin_lock_bh(&mds->mds_processing_task_lock);
 
         if (mds->mds_processing_task == current->pid) {
                 /* Processing the queue right now, don't re-add. */
                 LASSERT(list_empty(&req->rq_list));
-                spin_unlock(&mds->mds_processing_task_lock);
+                spin_unlock_bh(&mds->mds_processing_task_lock);
                 return 1;
         }
 
@@ -1184,7 +1133,7 @@ static int queue_recovery_request(struct ptlrpc_request *req,
                  */
                 if (transno == mds->mds_next_recovery_transno)
                         wake_up(&mds->mds_next_transno_waitq);
-                spin_unlock(&mds->mds_processing_task_lock);
+                spin_unlock_bh(&mds->mds_processing_task_lock);
                 return 0;
         }
 
@@ -1192,7 +1141,7 @@ static int queue_recovery_request(struct ptlrpc_request *req,
          * now, so we'll do the honours.
          */
         mds->mds_processing_task = current->pid;
-        spin_unlock(&mds->mds_processing_task_lock);
+        spin_unlock_bh(&mds->mds_processing_task_lock);
 
         process_recovery_queue(mds);
         return 0;
@@ -1202,12 +1151,12 @@ static int filter_recovery_request(struct ptlrpc_request *req,
                                    struct mds_obd *mds, int *process)
 {
         switch (req->rq_reqmsg->opc) {
-        case MDS_CONNECT:
+        case MDS_CONNECT: /* This will never get here, but for completeness. */
         case MDS_DISCONNECT:
                *process = 1;
                RETURN(0);
 
-        case MDS_OPEN:
+        case MDS_CLOSE:
         case MDS_GETSTATUS: /* used in unmounting */
         case MDS_REINT:
         case LDLM_ENQUEUE:
@@ -1225,9 +1174,10 @@ static int filter_recovery_request(struct ptlrpc_request *req,
 static int mds_queue_final_reply(struct ptlrpc_request *req, int rc)
 {
         struct mds_obd *mds = mds_req2mds(req);
+        struct obd_device *mds_obd = list_entry(mds, struct obd_device, u.mds);
         struct ptlrpc_request *saved_req;
 
-        spin_lock(&mds->mds_processing_task_lock);
+        spin_lock_bh(&mds->mds_processing_task_lock);
         if (rc) {
                 /* Just like ptlrpc_error, but without the sending. */
                 lustre_pack_msg(0, NULL, NULL, &req->rq_replen,
@@ -1245,6 +1195,7 @@ static int mds_queue_final_reply(struct ptlrpc_request *req, int rc)
                 ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace);
                 CDEBUG(D_ERROR,
                        "all clients recovered, sending delayed replies\n");
+                mds_obd->obd_flags &= ~OBD_RECOVERING;
                 list_for_each_safe(tmp, n, &mds->mds_delayed_reply_queue) {
                         req = list_entry(tmp, struct ptlrpc_request, rq_list);
                         DEBUG_REQ(D_ERROR, req, "delayed:");
@@ -1252,12 +1203,13 @@ static int mds_queue_final_reply(struct ptlrpc_request *req, int rc)
                         list_del(&req->rq_list);
                         OBD_FREE(req, sizeof *req);
                 }
+                cancel_recovery_timer(mds);
         } else {
                 CERROR("%d recoverable clients remain\n",
                        mds->mds_recoverable_clients);
         }
 
-        spin_unlock(&mds->mds_processing_task_lock);
+        spin_unlock_bh(&mds->mds_processing_task_lock);
         return 1;
 }
 
@@ -1266,14 +1218,15 @@ static char *reint_names[] = {
         [REINT_CREATE]  "create",
         [REINT_LINK]    "link",
         [REINT_UNLINK]  "unlink",
-        [REINT_RENAME]  "rename"
+        [REINT_RENAME]  "rename",
+        [REINT_OPEN]    "open",
 };
 
 int mds_handle(struct ptlrpc_request *req)
 {
-        int rc;
-        int should_process;
+        int should_process, rc;
         struct mds_obd *mds = NULL; /* quell gcc overwarning */
+        struct obd_device *mds_obd = NULL;
         ENTRY;
 
         rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
@@ -1282,20 +1235,42 @@ int mds_handle(struct ptlrpc_request *req)
                 GOTO(out, rc);
         }
 
+        OBD_FAIL_RETURN(OBD_FAIL_MDS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0);
+
         LASSERT(!strcmp(req->rq_obd->obd_type->typ_name, LUSTRE_MDT_NAME));
 
         if (req->rq_reqmsg->opc != MDS_CONNECT) {
+                struct mds_export_data *med;
                 if (req->rq_export == NULL) {
                         req->rq_status = -ENOTCONN;
                         GOTO(out, rc = -ENOTCONN);
                 }
 
-                mds = mds_req2mds(req);
-                if (mds->mds_recoverable_clients != 0) {
+                med = &req->rq_export->exp_mds_data;
+                mds_obd = req->rq_export->exp_obd;
+                mds = &mds_obd->u.mds;
+                spin_lock_bh(&mds->mds_processing_task_lock);
+                if (mds_obd->obd_flags & OBD_ABORT_RECOVERY)
+                        mds_abort_recovery(mds);
+                spin_unlock_bh(&mds->mds_processing_task_lock);
+
+                if (mds_obd->obd_flags & OBD_RECOVERING) {
                         rc = filter_recovery_request(req, mds, &should_process);
                         if (rc || !should_process)
                                 RETURN(rc);
+                } else if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+                        if (req->rq_xid == med->med_last_xid) {
+                                DEBUG_REQ(D_HA, req, "resending reply");
+                                OBD_ALLOC(req->rq_repmsg, med->med_last_replen);
+                                req->rq_replen = med->med_last_replen;
+                                memcpy(req->rq_repmsg, med->med_last_reply,
+                                       req->rq_replen);
+                                ptlrpc_reply(req->rq_svc, req);
+                                return 0;
+                        }
+                        DEBUG_REQ(D_HA, req, "no reply for resend, continuing");
                 }
+
         }
 
         switch (req->rq_reqmsg->opc) {
@@ -1309,12 +1284,6 @@ int mds_handle(struct ptlrpc_request *req)
                         mds = mds_req2mds(req);
                         mds_fsync_super(mds->mds_sb);
                 }
-
-                /* Let the client know if it can replay. */
-                if (mds->mds_recoverable_clients) {
-                        lustre_msg_add_flags(req->rq_repmsg,
-                                             MSG_REPLAY_IN_PROGRESS);
-                }
                 break;
 
         case MDS_DISCONNECT:
@@ -1344,12 +1313,15 @@ int mds_handle(struct ptlrpc_request *req)
                 rc = mds_getattr(0, req);
                 break;
 
-        case MDS_GETATTR_NAME:
+        case MDS_GETATTR_NAME: {
+                struct lustre_handle lockh;
                 DEBUG_REQ(D_INODE, req, "getattr_name");
                 OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NAME_NET, 0);
-                rc = mds_getattr_name(0, req);
+                rc = mds_getattr_name(0, req, &lockh);
+                if (rc == 0)
+                        ldlm_lock_decref(&lockh, LCK_PR);
                 break;
-
+        }
         case MDS_STATFS:
                 DEBUG_REQ(D_INODE, req, "statfs");
                 OBD_FAIL_RETURN(OBD_FAIL_MDS_STATFS_NET, 0);
@@ -1366,32 +1338,30 @@ int mds_handle(struct ptlrpc_request *req)
                 break;
 
         case MDS_REINT: {
-                int size = sizeof(struct mds_body);
-                int opc = *(u32 *)lustre_msg_buf(req->rq_reqmsg, 0),
-                        realopc = opc & REINT_OPCODE_MASK;
+                int opc = *(u32 *)lustre_msg_buf(req->rq_reqmsg, 0);
+                int size[2] = {sizeof(struct mds_body), mds->mds_max_mdsize};
+                int bufcount;
 
                 DEBUG_REQ(D_INODE, req, "reint (%s%s)",
-                          reint_names[realopc],
+                          reint_names[opc & REINT_OPCODE_MASK],
                           opc & REINT_REPLAYING ? "|REPLAYING" : "");
 
                 OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET, 0);
 
-                rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
-                                     &req->rq_repmsg);
-                if (rc) {
-                        req->rq_status = rc;
+                if (opc == REINT_UNLINK)
+                        bufcount = 2;
+                else
+                        bufcount = 1;
+
+                rc = lustre_pack_msg(bufcount, size, NULL,
+                                     &req->rq_replen, &req->rq_repmsg);
+                if (rc)
                         break;
-                }
-                rc = mds_reint(req, 0);
-                OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET_REP, 0);
-                break;
-                }
 
-        case MDS_OPEN:
-                DEBUG_REQ(D_INODE, req, "open");
-                OBD_FAIL_RETURN(OBD_FAIL_MDS_OPEN_NET, 0);
-                rc = mds_open(req);
+                rc = mds_reint(req, 0, NULL);
+                OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET_REP, 0);
                 break;
+        }
 
         case MDS_CLOSE:
                 DEBUG_REQ(D_INODE, req, "close");
@@ -1402,7 +1372,8 @@ int mds_handle(struct ptlrpc_request *req)
         case LDLM_ENQUEUE:
                 DEBUG_REQ(D_INODE, req, "enqueue");
                 OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
-                rc = ldlm_handle_enqueue(req);
+                rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
+                                         ldlm_server_blocking_ast);
                 break;
         case LDLM_CONVERT:
                 DEBUG_REQ(D_INODE, req, "convert");
@@ -1426,35 +1397,60 @@ int mds_handle(struct ptlrpc_request *req)
         /* If we're DISCONNECTing, the mds_export_data is already freed */
         if (!rc && req->rq_reqmsg->opc != MDS_DISCONNECT) {
                 struct mds_export_data *med = &req->rq_export->exp_mds_data;
-
+                struct obd_device *obd = list_entry(mds, struct obd_device,
+                                                    u.mds);
                 req->rq_repmsg->last_xid =
                         HTON__u64(le64_to_cpu(med->med_mcd->mcd_last_xid));
-                req->rq_repmsg->last_committed =
-                        HTON__u64(mds->mds_last_committed);
+                if ((obd->obd_flags & OBD_NO_TRANSNO) == 0) {
+                        req->rq_repmsg->last_committed =
+                                HTON__u64(obd->obd_last_committed);
+                } else {
+                        DEBUG_REQ(D_IOCTL, req,
+                                  "not sending last_committed update");
+                }
                 CDEBUG(D_INFO, "last_transno %Lu, last_committed %Lu, xid %d\n",
                        (unsigned long long)mds->mds_last_rcvd,
-                       (unsigned long long)mds->mds_last_committed,
+                       (unsigned long long)obd->obd_last_committed,
                        cpu_to_le32(req->rq_xid));
         }
  out:
 
         if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) {
-                struct mds_obd *mds = mds_req2mds(req);
-                LASSERT(mds->mds_recoverable_clients);
-                DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply");
-                return mds_queue_final_reply(req, rc);
-        }
-
-        /* XXX bug 578 */
-        /* MDS_CONNECT / EALREADY (note: not -EALREADY!) isn't an error */
-        if (rc && (req->rq_reqmsg->opc != MDS_CONNECT ||
-                   rc != EALREADY)) {
-                DEBUG_REQ(D_ERROR, req, "processing error (%d)", rc);
-                ptlrpc_error(req->rq_svc, req);
+                if (mds_obd && (mds_obd->obd_flags & OBD_RECOVERING)) {
+                        DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply");
+                        return mds_queue_final_reply(req, rc);
+                }
+                /* Lost a race with recovery; let the error path DTRT. */
+                rc = req->rq_status = -ENOTCONN;
+        }
+
+        if (req->rq_export && mds_obd &&
+            (mds_obd->obd_flags & OBD_RECOVERING) == 0) {
+                struct mds_export_data *med = &req->rq_export->exp_mds_data;
+                if (med->med_last_reply)
+                        OBD_FREE(med->med_last_reply, med->med_last_replen);
+                OBD_ALLOC(med->med_last_reply, req->rq_replen);
+                med->med_last_replen = req->rq_replen;
+                med->med_last_xid = req->rq_xid;
+                memcpy(med->med_last_reply, req->rq_repmsg, req->rq_replen);
+                /* XXX serialize */
+        }
+
+        if (!OBD_FAIL_CHECK(OBD_FAIL_MDS_ALL_REPLY_NET | OBD_FAIL_ONCE)) {
+                if (rc) {
+                        DEBUG_REQ(D_ERROR, req, "processing error (%d)", rc);
+                        ptlrpc_error(req->rq_svc, req);
+                } else {
+                        DEBUG_REQ(D_NET, req, "sending reply");
+                        ptlrpc_reply(req->rq_svc, req);
+                }
         } else {
-                DEBUG_REQ(D_NET, req, "sending reply");
-                ptlrpc_reply(req->rq_svc, req);
+                obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED;
+                DEBUG_REQ(D_ERROR, req, "dropping reply");
+                if (req->rq_repmsg)
+                        OBD_FREE(req->rq_repmsg, req->rq_replen);
         }
+
         return 0;
 }
 
@@ -1539,6 +1535,9 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
                 GOTO(err_put, rc);
         }
 
+        if (obddev->obd_flags & OBD_RECOVERING)
+                start_recovery_timer(mds);
+
         obddev->obd_namespace =
                 ldlm_namespace_new("mds_server", LDLM_NAMESPACE_SERVER);
         if (obddev->obd_namespace == NULL) {
@@ -1598,11 +1597,12 @@ static int mds_cleanup(struct obd_device *obddev)
         RETURN(0);
 }
 
-static int ldlm_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
-                              void *req_cookie, ldlm_mode_t mode, int flags,
-                              void *data)
+static int ldlm_intent_policy(struct ldlm_namespace *ns,
+                              struct ldlm_lock **lockp, void *req_cookie,
+                              ldlm_mode_t mode, int flags, void *data)
 {
         struct ptlrpc_request *req = req_cookie;
+        struct ldlm_lock *lock = *lockp;
         int rc = 0;
         ENTRY;
 
@@ -1613,10 +1613,11 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
                 /* an intent needs to be considered */
                 struct ldlm_intent *it = lustre_msg_buf(req->rq_reqmsg, 1);
                 struct mds_obd *mds = &req->rq_export->exp_obd->u.mds;
-                struct mds_body *mds_rep;
+                struct mds_body *mds_body;
                 struct ldlm_reply *rep;
-                __u64 new_resid[3] = {0, 0, 0}, old_res;
-                int rc, size[3] = {sizeof(struct ldlm_reply),
+                struct lustre_handle lockh;
+                struct ldlm_lock *new_lock;
+                int rc, offset = 2, repsize[3] = {sizeof(struct ldlm_reply),
                                                   sizeof(struct mds_body),
                                                   mds->mds_max_mdsize};
 
@@ -1625,7 +1626,7 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
                 LDLM_DEBUG(lock, "intent policy, opc: %s",
                            ldlm_it2str(it->opc));
 
-                rc = lustre_pack_msg(3, size, NULL, &req->rq_replen,
+                rc = lustre_pack_msg(3, repsize, NULL, &req->rq_replen,
                                      &req->rq_repmsg);
                 if (rc) {
                         rc = req->rq_status = -ENOMEM;
@@ -1633,102 +1634,103 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
                 }
 
                 rep = lustre_msg_buf(req->rq_repmsg, 0);
-                rep->lock_policy_res1 = 1;
+                rep->lock_policy_res1 = IT_INTENT_EXEC;
 
                 /* execute policy */
                 switch ((long)it->opc) {
+                case IT_OPEN:
                 case IT_CREAT|IT_OPEN:
-                        rc = mds_reint(req, 2);
-                        if (rc || (req->rq_status != 0 &&
-                                   req->rq_status != -EEXIST)) {
-                                rep->lock_policy_res2 = req->rq_status;
+                        rc = mds_reint(req, offset, &lockh);
+                        /* We return a dentry to the client if IT_OPEN_POS is
+                         * set, or if we make it to the OPEN portion of the
+                         * programme (which implies that we created) */
+                        if (!(rep->lock_policy_res1 & IT_OPEN_POS ||
+                              rep->lock_policy_res1 & IT_OPEN_OPEN)) {
+                                rep->lock_policy_res2 = rc;
                                 RETURN(ELDLM_LOCK_ABORTED);
                         }
                         break;
-                case IT_CREAT:
-                case IT_MKDIR:
-                case IT_MKNOD:
-                case IT_RENAME2:
-                case IT_LINK2:
-                case IT_RMDIR:
-                case IT_SYMLINK:
                 case IT_UNLINK:
-                        rc = mds_reint(req, 2);
-                        if (rc || (req->rq_status != 0 &&
-                                   req->rq_status != -EISDIR &&
-                                   req->rq_status != -ENOTDIR)) {
+                        rc = mds_reint(req, offset, &lockh);
+                        /* Don't return a lock if the unlink failed, or if we're
+                         * not sending back an EA */
+                        if (rc) {
+                                rep->lock_policy_res2 = rc;
+                                RETURN(ELDLM_LOCK_ABORTED);
+                        }
+                        if (req->rq_status != 0) {
                                 rep->lock_policy_res2 = req->rq_status;
                                 RETURN(ELDLM_LOCK_ABORTED);
                         }
+                        mds_body = lustre_msg_buf(req->rq_repmsg, 1);
+                        if (!(mds_body->valid & OBD_MD_FLEASIZE)) {
+                                rep->lock_policy_res2 = rc;
+                                RETURN(ELDLM_LOCK_ABORTED);
+                        }
                         break;
                 case IT_GETATTR:
                 case IT_LOOKUP:
-                case IT_OPEN:
                 case IT_READDIR:
-                case IT_READLINK:
-                case IT_RENAME:
-                case IT_LINK:
                 case IT_SETATTR:
-                        rc = mds_getattr_name(2, req);
+                        rc = mds_getattr_name(offset, req, &lockh);
                         /* FIXME: we need to sit down and decide on who should
                          * set req->rq_status, who should return negative and
                          * positive return values, and what they all mean. */
-                        if (rc || req->rq_status != 0) {
+                        if (rc) {
+                                rep->lock_policy_res2 = rc;
+                                RETURN(ELDLM_LOCK_ABORTED);
+                        }
+                        if (req->rq_status != 0) {
                                 rep->lock_policy_res2 = req->rq_status;
                                 RETURN(ELDLM_LOCK_ABORTED);
                         }
                         break;
-                case IT_READDIR|IT_OPEN:
-                        LBUG();
-                        break;
                 default:
                         CERROR("Unhandled intent "LPD64"\n", it->opc);
                         LBUG();
                 }
 
-                /* We don't bother returning a lock to the client for a file
-                 * or directory we are removing.
-                 *
-                 * As for link and rename, there is no reason for the client
-                 * to get a lock on the target at this point.  If they are
-                 * going to modify the file/directory later they will get a
-                 * lock at that time.
-                 */
-                if (it->opc & (IT_UNLINK | IT_RMDIR | IT_LINK | IT_LINK2 |
-                               IT_RENAME | IT_RENAME2))
-                        RETURN(ELDLM_LOCK_ABORTED);
-
-                rep->lock_policy_res2 = req->rq_status;
-                mds_rep = lustre_msg_buf(req->rq_repmsg, 1);
-
-                /* If the client is about to open a file that doesn't have an
-                 * MD stripe record, it's going to need a write lock.
-                 */
-                if (it->opc & IT_OPEN && !(mds_rep->valid & OBD_MD_FLEASIZE)) {
-                        LDLM_DEBUG(lock, "open with no EA; returning PW lock");
-                        lock->l_req_mode = LCK_PW;
-                }
-
                 if (flags & LDLM_FL_INTENT_ONLY) {
                         LDLM_DEBUG(lock, "INTENT_ONLY, aborting lock");
                         RETURN(ELDLM_LOCK_ABORTED);
                 }
-                /* Give the client a lock on the child object, instead of the
-                 * parent that it requested. */
-                new_resid[0] = NTOH__u32(mds_rep->ino);
-                new_resid[1] = NTOH__u32(mds_rep->generation);
-                if (new_resid[0] == 0)
-                        LBUG();
-                old_res = lock->l_resource->lr_name[0];
 
-                ldlm_lock_change_resource(ns, lock, new_resid);
-                if (lock->l_resource == NULL) {
-                        LBUG();
-                        RETURN(-ENOMEM);
-                }
-                LDLM_DEBUG(lock, "intent policy, old res %ld",
-                           (long)old_res);
-                RETURN(ELDLM_LOCK_CHANGED);
+                /* By this point, whatever function we called above must have
+                 * filled in 'lockh' or returned an error.  We want to give the
+                 * new lock to the client instead of whatever lock it was about
+                 * to get. */
+                new_lock = ldlm_handle2lock(&lockh);
+                LASSERT(new_lock != NULL);
+                mds_body = lustre_msg_buf(req->rq_repmsg, 1);
+                *lockp = new_lock;
+
+                /* Fixup the lock to be given to the client */
+                l_lock(&new_lock->l_resource->lr_namespace->ns_lock);
+                LASSERT(new_lock->l_readers + new_lock->l_writers == 1);
+                new_lock->l_readers = 0;
+                new_lock->l_writers = 0;
+
+                new_lock->l_export = req->rq_export;
+                list_add(&new_lock->l_export_chain,
+                         &new_lock->l_export->exp_ldlm_data.led_held_locks);
+
+                /* We don't need to worry about completion_ast (which isn't set
+                 * in 'lock' yet anyways), because this lock is already
+                 * granted. */
+                new_lock->l_blocking_ast = lock->l_blocking_ast;
+
+                memcpy(&new_lock->l_remote_handle, &lock->l_remote_handle,
+                       sizeof(lock->l_remote_handle));
+
+                new_lock->l_flags &= ~(LDLM_FL_LOCAL | LDLM_FL_AST_SENT |
+                                       LDLM_FL_CBPENDING);
+
+                LDLM_LOCK_PUT(new_lock);
+                l_unlock(&new_lock->l_resource->lr_namespace->ns_lock);
+
+                rep->lock_policy_res2 = req->rq_status;
+
+                RETURN(ELDLM_LOCK_REPLACED);
         } else {
                 int size = sizeof(struct ldlm_reply);
                 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
@@ -1743,26 +1745,41 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
 
 int mds_attach(struct obd_device *dev, obd_count len, void *data)
 {
-        return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_multi_vars(0, &lvars);
+        return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
 int mds_detach(struct obd_device *dev)
 {
-        return lprocfs_dereg_obd(dev);
+        return lprocfs_obd_detach(dev);
+}
+
+int mdt_attach(struct obd_device *dev, obd_count len, void *data)
+{
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_multi_vars(1, &lvars);
+        return lprocfs_obd_attach(dev, lvars.obd_vars);
+}
+
+int mdt_detach(struct obd_device *dev)
+{
+        return lprocfs_obd_detach(dev);
 }
 
 static int mdt_setup(struct obd_device *obddev, obd_count len, void *buf)
 {
-        int i;
-        //        struct obd_ioctl_data* data = buf;
         struct mds_obd *mds = &obddev->u.mds;
-        int rc = 0;
+        struct obd_uuid uuid = { "self" };
+        int i, rc = 0;
         ENTRY;
 
         mds->mds_service = ptlrpc_init_svc(MDS_NEVENTS, MDS_NBUFS,
                                            MDS_BUFSIZE, MDS_MAXREQSIZE,
                                            MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
-                                           "self", mds_handle, "mds");
+                                           &uuid, mds_handle, "mds");
         if (!mds->mds_service) {
                 CERROR("failed to start service\n");
                 RETURN(rc = -ENOMEM);
@@ -1778,12 +1795,37 @@ static int mdt_setup(struct obd_device *obddev, obd_count len, void *buf)
                 }
         }
 
+        mds->mds_getattr_service =
+                ptlrpc_init_svc(MDS_NEVENTS, MDS_NBUFS,
+                                MDS_BUFSIZE, MDS_MAXREQSIZE,
+                                MDS_GETATTR_PORTAL, MDC_REPLY_PORTAL,
+                                &uuid, mds_handle, "mds");
+        if (!mds->mds_getattr_service) {
+                CERROR("failed to start getattr service\n");
+                GOTO(err_thread, rc = -ENOMEM);
+        }
+
+        for (i = 0; i < MDT_NUM_THREADS; i++) {
+                char name[32];
+                sprintf(name, "ll_mdt_attr_%02d", i);
+                rc = ptlrpc_start_thread(obddev, mds->mds_getattr_service,
+                                         name);
+                if (rc) {
+                        CERROR("cannot start MDT getattr thread #%d: rc %d\n",
+                               i, rc);
+                        GOTO(err_thread2, rc);
+                }
+        }
+
         RETURN(0);
 
+err_thread2:
+        ptlrpc_stop_all_threads(mds->mds_getattr_service);
+        ptlrpc_unregister_service(mds->mds_getattr_service);
 err_thread:
         ptlrpc_stop_all_threads(mds->mds_service);
         ptlrpc_unregister_service(mds->mds_service);
-        RETURN(rc);
+        return rc;
 }
 
 
@@ -1792,6 +1834,9 @@ static int mdt_cleanup(struct obd_device *obddev)
         struct mds_obd *mds = &obddev->u.mds;
         ENTRY;
 
+        ptlrpc_stop_all_threads(mds->mds_getattr_service);
+        ptlrpc_unregister_service(mds->mds_getattr_service);
+
         ptlrpc_stop_all_threads(mds->mds_service);
         ptlrpc_unregister_service(mds->mds_service);
 
@@ -1815,6 +1860,8 @@ static struct obd_ops mds_obd_ops = {
 
 static struct obd_ops mdt_obd_ops = {
         o_owner:       THIS_MODULE,
+        o_attach:      mdt_attach,
+        o_detach:      mdt_detach,
         o_setup:       mdt_setup,
         o_cleanup:     mdt_cleanup,
 };
@@ -1822,14 +1869,17 @@ static struct obd_ops mdt_obd_ops = {
 
 static int __init mds_init(void)
 {
+        struct lprocfs_static_vars lvars;
         mds_file_cache = kmem_cache_create("ll_mds_file_data",
                                            sizeof(struct mds_file_data),
                                            0, 0, NULL, NULL);
         if (mds_file_cache == NULL)
                 return -ENOMEM;
 
-        class_register_type(&mds_obd_ops, status_class_var, LUSTRE_MDS_NAME);
-        class_register_type(&mdt_obd_ops, 0, LUSTRE_MDT_NAME);
+        lprocfs_init_multi_vars(0, &lvars);
+        class_register_type(&mds_obd_ops, lvars.module_vars, LUSTRE_MDS_NAME);
+        lprocfs_init_multi_vars(1, &lvars);
+        class_register_type(&mdt_obd_ops, lvars.module_vars, LUSTRE_MDT_NAME);
         ldlm_register_intent(ldlm_intent_policy);
 
         return 0;
@@ -1844,8 +1894,8 @@ static void __exit mds_exit(void)
                 CERROR("couldn't free MDS file cache\n");
 }
 
-MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Metadata Server (MDS) v0.01");
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Metadata Server (MDS)");
 MODULE_LICENSE("GPL");
 
 module_init(mds_init);
index 37c7bc8..eab0cf7 100644 (file)
 #include <linux/lustre_fsfilt.h>
 #include <linux/lprocfs_status.h>
 
-int rd_uuid(char *page, char **start, off_t off, int count, int *eof,
-            void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        return snprintf(page, count, "%s\n", temp->obd_uuid);
-}
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_mds_obd_vars[]  = { {0} };
+struct lprocfs_vars lprocfs_mds_module_vars[] = { {0} };
+struct lprocfs_vars lprocfs_mdt_obd_vars[] = { {0} };
+struct lprocfs_vars lprocfs_mdt_module_vars[] = { {0} };
 
-int rd_blksize(char *page, char **start, off_t off, int count, int *eof,
-               void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        struct mds_obd *mds = &temp->u.mds;
-        struct statfs mystats;
-        int rc;
+#else
 
-        rc = vfs_statfs(mds->mds_sb, &mystats);
-        if (rc) {
-                CERROR("mds: statfs failed: rc %d\n", rc);
-                return 0;
-        }
-        return snprintf(page, count, LPU64"\n", (__u64)(mystats.f_bsize));
-}
-
-int rd_kbtotal(char *page, char **start, off_t off, int count, int *eof,
-               void *data)
+static inline
+int lprocfs_mds_statfs(void *data, struct statfs *sfs)
 {
-        struct obd_device* temp = (struct obd_device*)data;
-        struct mds_obd *mds = &temp->u.mds;
-        struct statfs mystats;
-        int rc;
-        __u32 blk_size;
-        __u64 result;
-
-        rc = vfs_statfs(mds->mds_sb, &mystats);
-        if (rc) {
-                CERROR("mds: statfs failed: rc %d\n", rc);
-                return 0;
-        }
-
-        blk_size = mystats.f_bsize;
-        blk_size >>= 10;
-        result = mystats.f_blocks;
-        while(blk_size >>= 1)
-                result <<= 1;
-
-        return snprintf(page, count, LPU64"\n", result);
+        struct obd_device* dev = (struct obd_device*) data;
+        struct mds_obd *mds = &dev->u.mds;
+        return vfs_statfs(mds->mds_sb, sfs);
 }
 
-int rd_kbfree(char *page, char **start, off_t off, int count, int *eof,
-              void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        struct mds_obd *mds = &temp->u.mds;
-        struct statfs mystats;
-        int rc;
-        __u32 blk_size;
-        __u64 result;
-
-        rc = vfs_statfs(mds->mds_sb, &mystats);
-        if (rc) {
-                CERROR("mds: statfs failed: rc %d\n", rc);
-                return 0;
-        }
-        blk_size = mystats.f_bsize;
-        blk_size >>= 10;
-        result = mystats.f_blocks;
-        while (blk_size >>= 1)
-                result <<= 1;
-
-        return snprintf(page, count, LPU64"\n", result);
-}
+DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     lprocfs_mds_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_mds_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  lprocfs_mds_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  lprocfs_mds_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   lprocfs_mds_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  lprocfs_mds_statfs);
 
 int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
               void *data)
@@ -105,66 +56,40 @@ int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
         return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
 }
 
-int rd_filestotal(char *page, char **start, off_t off, int count, int *eof,
-                  void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        struct mds_obd *mds = &temp->u.mds;
-        struct statfs mystats;
-        int rc;
 
-        rc = vfs_statfs(mds->mds_sb, &mystats);
-        if (rc) {
-                CERROR("mds: statfs failed: rc %d\n", rc);
-                return 0;
-        }
-        return snprintf(page, count, LPU64"\n", (__u64)(mystats.f_files));
-}
-
-int rd_filesfree(char *page, char **start, off_t off, int count, int *eof,
-                 void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        struct mds_obd *mds = &temp->u.mds;
-        struct statfs mystats;
-        int rc, len = 0;
-
-        rc = vfs_statfs(mds->mds_sb, &mystats);
-        if (rc) {
-                CERROR("mds: statfs failed: rc %d\n", rc);
-                return 0;
-        }
+struct lprocfs_vars lprocfs_mds_obd_vars[] = {
+        { "uuid",       lprocfs_rd_uuid, 0, 0 },
+        { "blocksize",  rd_blksize,      0, 0 },
+        { "bytestotal", rd_kbytestotal,  0, 0 },
+        { "kbytesfree", rd_kbytesfree,   0, 0 },
+        { "fstype",     rd_fstype,       0, 0 },
+        { "filestotal", rd_filestotal,   0, 0 },
+        { "filesfree",  rd_filesfree,    0, 0 },
+        { "filegroups", rd_filegroups,   0, 0 },
+        { 0 }
+};
 
-        len += snprintf(page, count, LPU64"\n", (__u64)(mystats.f_ffree));
-        return len;
-}
+struct lprocfs_vars lprocfs_mds_module_vars[] = {
+        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
+        { 0 }
+};
 
-int rd_filegroups(char *page, char **start, off_t off, int count, int *eof,
-                  void *data)
-{
-        return 0;
-}
-struct lprocfs_vars status_var_nm_1[]={
-        {"status/uuid", rd_uuid, 0, 0},
-        {"status/blocksize",rd_blksize, 0, 0},
-        {"status/kbytestotal",rd_kbtotal, 0, 0},
-        {"status/kbytesfree", rd_kbfree, 0, 0},
-        {"status/fstype", rd_fstype, 0, 0},
-        {"status/filestotal", rd_filestotal, 0, 0},
-        {"status/filesfree", rd_filesfree, 0, 0},
-        {"status/filegroups", rd_filegroups, 0, 0},
-        {0}
+struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
+        { "uuid",       lprocfs_rd_uuid, 0, 0 },
+        { 0 }
 };
 
-int rd_numrefs(char *page, char **start, off_t off, int count, int *eof,
-               void *data)
-{
-        struct obd_type *class = (struct obd_type*)data;
+struct lprocfs_vars lprocfs_mdt_module_vars[] = {
+        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
+        { 0 }
+};
 
-        return snprintf(page, count, "%d\n", class->typ_refcnt);
-}
+#endif
+struct lprocfs_static_vars lprocfs_array_vars[] = { {lprocfs_mds_module_vars,
+                                                     lprocfs_mds_obd_vars},
+                                                    {lprocfs_mdt_module_vars,
+                                                     lprocfs_mdt_obd_vars}};
 
-struct lprocfs_vars status_class_var[]={
-        {"status/num_refs", rd_numrefs, 0, 0},
-        {0}
-};
+LPROCFS_INIT_MULTI_VARS(lprocfs_array_vars,
+                        (sizeof(lprocfs_array_vars)/
+                         sizeof(struct lprocfs_static_vars))) 
index 3f6c420..bf04553 100644 (file)
@@ -1,17 +1,26 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  linux/mds/mds_fs.c
- *
+ *  mds/mds_fs.c
  *  Lustre Metadata Server (MDS) filesystem interface code
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
  *
- *  This code is issued under the GNU General Public License.
- *  See the file COPYING in this distribution
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
  *
- *  by Andreas Dilger <adilger@clusterfs.com>
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
  *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define EXPORT_SYMTAB
@@ -79,8 +88,8 @@ int mds_client_add(struct mds_obd *mds, struct mds_export_data *med, int cl_off)
 
                 push_ctxt(&saved, &mds->mds_ctxt, NULL);
                 written = lustre_fwrite(mds->mds_rcvd_filp,
-                                                (char *)med->med_mcd,
-                                                sizeof(*med->med_mcd), &off);
+                                        (char *)med->med_mcd,
+                                        sizeof(*med->med_mcd), &off);
                 pop_ctxt(&saved, &mds->mds_ctxt, NULL);
 
                 if (written != sizeof(*med->med_mcd)) {
@@ -133,6 +142,10 @@ int mds_client_free(struct obd_export *exp)
                        med->med_mcd->mcd_uuid, med->med_off);
         }
 
+        if (med->med_last_reply) {
+                OBD_FREE(med->med_last_reply, med->med_last_replen);
+                med->med_last_reply = NULL;
+        }
         OBD_FREE(med->med_mcd, sizeof(*med->med_mcd));
 
         return 0;
@@ -177,7 +190,7 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
         }
 
         CDEBUG(D_INODE, "last_rcvd has size %lu (msd + %lu clients)\n",
-               last_rcvd_size, (last_rcvd_size - sizeof *msd) / sizeof *mcd);
+               last_rcvd_size, (last_rcvd_size - MDS_LR_CLIENT)/MDS_LR_SIZE);
 
         /*
          * When we do a clean MDS shutdown, we save the last_rcvd into
@@ -232,6 +245,8 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                                 break;
                         }
 
+                        memcpy(&exp->exp_client_uuid.uuid, mcd->mcd_uuid,
+                               sizeof exp->exp_client_uuid.uuid);
                         med = &exp->exp_mds_data;
                         med->med_mcd = mcd;
                         mds_client_add(mds, med, cl_off);
@@ -255,11 +270,12 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
                         mds->mds_last_rcvd = last_rcvd;
         }
 
-        mds->mds_last_committed = mds->mds_last_rcvd;
+        obddev->obd_last_committed = mds->mds_last_rcvd;
         if (mds->mds_recoverable_clients) {
                 CERROR("RECOVERY: %d recoverable clients, last_rcvd "LPU64"\n",
                        mds->mds_recoverable_clients, mds->mds_last_rcvd);
-                mds->mds_next_recovery_transno = mds->mds_last_committed + 1;
+                mds->mds_next_recovery_transno = obddev->obd_last_committed + 1;
+                obddev->obd_flags |= OBD_RECOVERING;
         }
 
         if (mcd)
index b548792..fef9a0d 100644 (file)
@@ -2,16 +2,25 @@
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
  *  linux/mds/mds_lov.c
- *
  *  Lustre Metadata Server (mds) handling of striped file data
  *
- *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *   Author: Peter Braam <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
  *
- *  This code is issued under the GNU General Public License.
- *  See the file COPYING in this distribution
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
  *
- *  by Peter Braam <braam@clusterfs.com> &
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
  *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define EXPORT_SYMTAB
@@ -35,7 +44,7 @@ void lov_packdesc(struct lov_desc *ld)
 }
 
 int mds_set_lovdesc(struct obd_device *obd, struct lov_desc *desc,
-                    obd_uuid_t *uuidarray)
+                    struct obd_uuid *uuidarray)
 {
         struct mds_obd *mds = &obd->u.mds;
         struct obd_run_ctxt saved;
@@ -99,11 +108,11 @@ int mds_set_lovdesc(struct obd_device *obd, struct lov_desc *desc,
 #warning FIXME: if there is an existing LOVTGTS, verify existing UUIDs same
         rc = 0;
         for (i = 0; i < tgt_count ; i++) {
-                rc = lustre_fwrite(f, uuidarray[i],
+                rc = lustre_fwrite(f, uuidarray[i].uuid,
                                    sizeof(uuidarray[i]), &f->f_pos);
                 if (rc != sizeof(uuidarray[i])) {
                         CERROR("cannot write LOV UUID %s (%d)\n",
-                               uuidarray[i], i);
+                               uuidarray[i].uuid, i);
                         if (rc >= 0)
                                 rc = -EIO;
                         break;
@@ -148,7 +157,7 @@ out:
         return rc;
 }
 
-int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,obd_uuid_t *uuidarray)
+int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,struct obd_uuid *uuidarray)
 {
         struct obd_run_ctxt saved;
         struct file *f;
@@ -188,11 +197,10 @@ int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn,
         struct obd_device *obd = class_conn2obd(conn);
         struct obd_ioctl_data *data = karg;
         struct lov_desc *desc;
-        obd_uuid_t *uuidarray;
+        struct obd_uuid *uuidarray;
         int count;
         int rc;
 
-
         switch (cmd) {
         case OBD_IOC_LOV_SET_CONFIG:
                 desc = (struct lov_desc *)data->ioc_inlbuf1;
@@ -202,7 +210,7 @@ int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn,
                 }
 
                 count = desc->ld_tgt_count;
-                uuidarray = (obd_uuid_t *)data->ioc_inlbuf2;
+                uuidarray = (struct obd_uuid *)data->ioc_inlbuf2;
                 if (sizeof(*uuidarray) * count != data->ioc_inllen2) {
                         CERROR("UUID array size wrong\n");
                         RETURN(-EINVAL);
@@ -218,7 +226,7 @@ int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn,
                 }
 
                 count = desc->ld_tgt_count;
-                uuidarray = (obd_uuid_t *)data->ioc_inlbuf2;
+                uuidarray = (struct obd_uuid *)data->ioc_inlbuf2;
                 if (sizeof(*uuidarray) * count != data->ioc_inllen2) {
                         CERROR("UUID array size wrong\n");
                         RETURN(-EINVAL);
@@ -231,9 +239,15 @@ int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn,
                 rc = mds_get_lovtgts(&obd->u.mds, desc->ld_tgt_count, uuidarray);
 
                 RETURN(rc);
+
+            case OBD_IOC_SET_READONLY:
+                CERROR("setting device %s read-only\n",
+                       ll_bdevname(obd->u.mds.mds_sb->s_dev));
+                dev_set_rdonly(obd->u.mds.mds_sb->s_dev, 2);
+                RETURN(0);
+
         default:
                 RETURN(-EINVAL);
         }
-
         RETURN(0);
 }
diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c
new file mode 100644 (file)
index 0000000..2f65384
--- /dev/null
@@ -0,0 +1,238 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  lustre/mds/handler.c
+ *  Lustre Metadata Server (mds) request handler
+ *
+ *  Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ *   Author: Peter Braam <braam@clusterfs.com>
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *   Author: Mike Shaver <shaver@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define EXPORT_SYMTAB
+#define DEBUG_SUBSYSTEM S_MDS
+
+#include <linux/module.h>
+#include <linux/lustre_mds.h>
+#include <linux/lustre_dlm.h>
+#include <linux/init.h>
+#include <linux/obd_class.h>
+#include <linux/random.h>
+#include <linux/locks.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <linux/buffer_head.h>
+#include <linux/workqueue.h>
+#endif
+#include <linux/obd_lov.h>
+#include <linux/lustre_mds.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/lprocfs_status.h>
+
+extern kmem_cache_t *mds_file_cache;
+extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
+extern void mds_start_transno(struct mds_obd *mds);
+extern int mds_finish_transno(struct mds_obd *mds, void *handle,
+                              struct ptlrpc_request *req, int rc);
+extern int enqueue_ordered_locks(int lock_mode, struct obd_device *obd,
+                                 struct ldlm_res_id *p1_res_id,
+                                 struct ldlm_res_id *p2_res_id,
+                                 struct ldlm_res_id *c1_res_id,
+                                 struct ldlm_res_id *c2_res_id,
+                                 struct lustre_handle *p1_lockh,
+                                 struct lustre_handle *p2_lockh,
+                                 struct lustre_handle *c1_lockh,
+                                 struct lustre_handle *c2_lockh);
+
+int mds_open(struct mds_update_record *rec, int offset,
+             struct ptlrpc_request *req, struct lustre_handle *child_lockh)
+{
+        struct mds_obd *mds = mds_req2mds(req);
+        struct obd_device *obd = req->rq_export->exp_obd;
+        struct ldlm_reply *rep = lustre_msg_buf(req->rq_repmsg, 0);
+        struct file *file;
+        struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
+        struct dentry *dchild, *parent;
+        struct mds_export_data *med;
+        struct mds_file_data *mfd = NULL;
+        struct ldlm_res_id child_res_id = { .name = {0} };
+        struct lustre_handle parent_lockh;
+        int rc = 0, parent_mode, child_mode = LCK_PR, lock_flags, created = 0;
+        ENTRY;
+
+#warning replay of open needs to be redone
+        /* was this animal open already and the client lost the reply? */
+        /* XXX need some way to detect a reopen, to avoid locked list walks */
+        med = &req->rq_export->exp_mds_data;
+#if 0
+        spin_lock(&med->med_open_lock);
+        list_for_each(tmp, &med->med_open_head) {
+                mfd = list_entry(tmp, typeof(*mfd), mfd_list);
+                if (!memcmp(&mfd->mfd_clienthandle, &body->handle,
+                            sizeof(mfd->mfd_clienthandle)) &&
+                    body->fid1.id == mfd->mfd_file->f_dentry->d_inode->i_ino) {
+                        dchild = mfd->mfd_file->f_dentry;
+                        spin_unlock(&med->med_open_lock);
+                        CERROR("Re opening "LPD64"\n", body->fid1.id);
+                        GOTO(out_pack, rc = 0);
+                }
+        }
+        spin_unlock(&med->med_open_lock);
+#endif
+        rep->lock_policy_res1 |= IT_OPEN_LOOKUP;
+        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) {
+                CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n");
+                req->rq_status = -ENOMEM;
+                RETURN(-ENOMEM);
+        }
+
+        /* Step 1: Find and lock the parent */
+        parent_mode = (rec->ur_flags & O_CREAT) ? LCK_PW : LCK_PR;
+        parent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, parent_mode,
+                                       &parent_lockh);
+        if (IS_ERR(parent)) {
+                rc = PTR_ERR(parent);
+                CERROR("parent lookup error %d\n", rc);
+                LBUG();
+                RETURN(rc);
+        }
+        LASSERT(parent->d_inode);
+
+        /* Step 2: Lookup the child */
+        dchild = lookup_one_len(lustre_msg_buf(req->rq_reqmsg, 3),
+                                parent, req->rq_reqmsg->buflens[3] - 1);
+        if (IS_ERR(dchild))
+                GOTO(out_step_2, rc = PTR_ERR(dchild));
+
+        if (dchild->d_inode)
+                rep->lock_policy_res1 |= IT_OPEN_POS;
+        else
+                rep->lock_policy_res1 |= IT_OPEN_NEG;
+
+        /* Step 3: If the child was negative, and we're supposed to,
+         * create it. */
+        if ((rec->ur_flags & O_CREAT) && !dchild->d_inode) {
+                int err;
+                void *handle;
+                mds_start_transno(mds);
+                rep->lock_policy_res1 |= IT_OPEN_CREATE;
+                handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE);
+                if (IS_ERR(handle)) {
+                        rc = PTR_ERR(handle);
+                        mds_finish_transno(mds, handle, req, rc);
+                        GOTO(out_step_3, rc);
+                }
+                rc = vfs_create(parent->d_inode, dchild, rec->ur_mode);
+                rc = mds_finish_transno(mds, handle, req, rc);
+                err = fsfilt_commit(obd, parent->d_inode, handle);
+                if (rc || err) {
+                        CERROR("error on commit: err = %d\n", err);
+                        if (!rc)
+                                rc = err;
+                        GOTO(out_step_3, rc);
+                }
+                created = 1;
+                child_mode = LCK_PW;
+        } else if (!dchild->d_inode) {
+                /* It's negative and we weren't supposed to create it */
+                GOTO(out_step_3, rc = -ENOENT);
+        }
+
+        /* Step 4: It's positive, so lock the child */
+        child_res_id.name[0] = dchild->d_inode->i_ino;
+        child_res_id.name[1] = dchild->d_inode->i_generation;
+ reacquire:
+        lock_flags = 0;
+        rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+                              child_res_id, LDLM_PLAIN, NULL, 0, child_mode,
+                              &lock_flags, ldlm_completion_ast,
+                              mds_blocking_ast, NULL, NULL, child_lockh);
+        if (rc != ELDLM_OK) {
+                CERROR("ldlm_cli_enqueue: %d\n", rc);
+                GOTO(out_step_3, rc = -EIO);
+        }
+
+        mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+        mds_pack_inode2body(body, dchild->d_inode);
+        if (S_ISREG(dchild->d_inode->i_mode)) {
+                rc = mds_pack_md(obd, req->rq_repmsg, 2, body, dchild->d_inode);
+                if (rc)
+                        GOTO(out_step_4, rc);
+        } else {
+                /* If this isn't a regular file, we can't open it. */
+                GOTO(out_step_3, rc = 0); /* returns the lock to the client */
+        }
+
+        if (!created && (rec->ur_flags & O_CREAT) && (rec->ur_flags & O_EXCL)) {
+                /* File already exists, we didn't just create it, and we
+                 * were passed O_EXCL; err-or. */
+                GOTO(out_step_3, rc = -EEXIST); // returns a lock to the client
+        }
+
+        /* If we're opening a file without an EA, the client needs a write
+         * lock. */
+        if (child_mode != LCK_PW && S_ISREG(dchild->d_inode->i_mode) &&
+            !(body->valid & OBD_MD_FLEASIZE)) {
+                ldlm_lock_decref(child_lockh, child_mode);
+                child_mode = LCK_PW;
+                goto reacquire;
+        }
+
+        /* Step 5: Open it */
+        rep->lock_policy_res1 |= IT_OPEN_OPEN;
+        mfd = kmem_cache_alloc(mds_file_cache, GFP_KERNEL);
+        if (!mfd) {
+                CERROR("mds: out of memory\n");
+                GOTO(out_step_4, req->rq_status = -ENOMEM);
+        }
+
+        /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
+        mntget(mds->mds_vfsmnt);
+        file = dentry_open(dchild,mds->mds_vfsmnt,
+                           rec->ur_flags & ~(O_DIRECT | O_TRUNC));
+        if (IS_ERR(file))
+                GOTO(out_step_5, rc = PTR_ERR(file));
+
+        file->private_data = mfd;
+        mfd->mfd_file = file;
+        get_random_bytes(&mfd->mfd_servercookie, sizeof(mfd->mfd_servercookie));
+        spin_lock(&med->med_open_lock);
+        list_add(&mfd->mfd_list, &med->med_open_head);
+        spin_unlock(&med->med_open_lock);
+
+        body->handle.addr = (__u64)(unsigned long)mfd;
+        body->handle.cookie = mfd->mfd_servercookie;
+        CDEBUG(D_INODE, "file %p: mfd %p, cookie "LPX64"\n",
+               mfd->mfd_file, mfd, mfd->mfd_servercookie);
+        GOTO(out_step_2, rc = 0); /* returns a lock to the client */
+
+ out_step_5:
+        if (mfd != NULL) {
+                kmem_cache_free(mds_file_cache, mfd);
+                mfd = NULL;
+        }
+ out_step_4:
+        ldlm_lock_decref(child_lockh, child_mode);
+ out_step_3:
+        l_dput(dchild);
+ out_step_2:
+        l_dput(parent);
+        ldlm_lock_decref(&parent_lockh, parent_mode);
+        RETURN(rc);
+}
index 3d340f7..608747f 100644 (file)
@@ -4,7 +4,10 @@
  *  linux/mds/mds_reint.c
  *  Lustre Metadata Server (mds) reintegration routines
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Peter Braam <braam@clusterfs.com>
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *   Author: Phil Schwan <phil@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -20,9 +23,6 @@
  *   You should have received a copy of the GNU General Public License
  *   along with Lustre; if not, write to the Free Software
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- *  Author: Peter Braam <braam@clusterfs.com>
- *  Author: Andreas Dilger <adilger@clusterfs.com>
  */
 
 #define EXPORT_SYMTAB
@@ -41,12 +41,10 @@ extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
 
 static void mds_last_rcvd_cb(struct obd_device *obd, __u64 last_rcvd, int error)
 {
-        struct mds_obd *mds = &obd->u.mds;
-
         CDEBUG(D_HA, "got callback for last_rcvd "LPD64": rc = %d\n",
                last_rcvd, error);
-        if (!error && last_rcvd > mds->mds_last_committed)
-                mds->mds_last_committed = last_rcvd;
+        if (!error && last_rcvd > obd->obd_last_committed)
+                obd->obd_last_committed = last_rcvd;
 }
 
 void mds_start_transno(struct mds_obd *mds)
@@ -102,11 +100,11 @@ int mds_finish_transno(struct mds_obd *mds, void *handle,
         return rc;
 }
 
-/* In the write-back case, the client holds a lock on a subtree.
- * In the intent case, the client holds a lock on the child inode.
- * In the pathname case, the client (may) hold a lock on the child inode. */
+/* In the write-back case, the client holds a lock on a subtree (not supported).
+ * In the intent case, the client holds a lock on the child inode. */
 static int mds_reint_setattr(struct mds_update_record *rec, int offset,
-                             struct ptlrpc_request *req)
+                             struct ptlrpc_request *req,
+                             struct lustre_handle *lh)
 {
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
@@ -114,39 +112,14 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         struct dentry *de;
         struct inode *inode;
         void *handle;
-        struct lustre_handle child_lockh;
         int rc = 0, err;
 
-        if (req->rq_reqmsg->bufcount > offset + 1) {
-                struct dentry *dir;
-                struct lustre_handle dir_lockh;
-                char *name;
-                int namelen;
-
-                /* a name was supplied by the client; fid1 is the directory */
-                dir = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PR,
-                                            &dir_lockh);
-                if (IS_ERR(dir)) {
-                        LBUG();
-                        GOTO(out_setattr, rc = PTR_ERR(dir));
-                }
-
-                name = lustre_msg_buf(req->rq_reqmsg, offset + 1);
-                namelen = req->rq_reqmsg->buflens[offset + 1] - 1;
-                de = mds_name2locked_dentry(obd, dir, NULL, name, namelen,
-                                            0, &child_lockh, LCK_PR);
-                l_dput(dir);
-                if (IS_ERR(de)) {
-                        LBUG();
-                        GOTO(out_setattr_de, rc = PTR_ERR(de));
-                }
-        } else {
-                de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
-                if (!de || IS_ERR(de)) {
-                        GOTO(out_setattr_de, rc = PTR_ERR(de));
-                }
-        }
+        de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
+        if (IS_ERR(de))
+                GOTO(out_setattr, rc = PTR_ERR(de));
         inode = de->d_inode;
+
+        LASSERT(inode);
         CDEBUG(D_INODE, "ino %lu\n", inode->i_ino);
 
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE,
@@ -161,15 +134,18 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
         }
 
         rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr);
-
-        if (offset) {
-                body = lustre_msg_buf(req->rq_repmsg, 1);
-                mds_pack_inode2fid(&body->fid1, inode);
-                mds_pack_inode2body(body, inode);
+        if (rc == 0 && S_ISREG(inode->i_mode) &&
+            req->rq_reqmsg->bufcount > 1) {
+                rc = fsfilt_set_md(obd, inode, handle,
+                                   lustre_msg_buf(req->rq_reqmsg, 1),
+                                   req->rq_reqmsg->buflens[1]);
         }
 
-        rc = mds_finish_transno(mds, handle, req, rc);
+        body = lustre_msg_buf(req->rq_repmsg, 0);
+        mds_pack_inode2fid(&body->fid1, inode);
+        mds_pack_inode2body(body, inode);
 
+        rc = mds_finish_transno(mds, handle, req, rc);
         err = fsfilt_commit(obd, de->d_inode, handle);
         if (err) {
                 CERROR("error on commit: err = %d\n", err);
@@ -186,7 +162,8 @@ out_setattr:
 }
 
 static int mds_reint_create(struct mds_update_record *rec, int offset,
-                            struct ptlrpc_request *req)
+                            struct ptlrpc_request *req,
+                            struct lustre_handle *lh)
 {
         struct dentry *de = NULL;
         struct mds_obd *mds = mds_req2mds(req);
@@ -195,21 +172,16 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
         struct inode *dir;
         void *handle;
         struct lustre_handle lockh;
-        int rc = 0, err, lock_mode, type = rec->ur_mode & S_IFMT;
+        int rc = 0, err, type = rec->ur_mode & S_IFMT;
         ENTRY;
 
-        /* requests were at offset 2, replies go back at 1 */
-        if (offset)
-                offset = 1;
-
+        LASSERT(offset == 0);
         LASSERT(!strcmp(req->rq_export->exp_obd->obd_type->typ_name, "mds"));
 
-        lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_CW : LCK_PW;
-
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE))
                 GOTO(out_create, rc = -ESTALE);
 
-        de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, lock_mode, &lockh);
+        de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW, &lockh);
         if (IS_ERR(de)) {
                 rc = PTR_ERR(de);
                 CERROR("parent lookup error %d\n", rc);
@@ -217,42 +189,17 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 GOTO(out_create, rc);
         }
         dir = de->d_inode;
-        CDEBUG(D_INODE, "parent ino %lu name %s mode %o\n",
+        LASSERT(dir);
+        CDEBUG(D_INODE, "parent ino %lu creating name %s mode %o\n",
                dir->i_ino, rec->ur_name, rec->ur_mode);
 
         ldlm_lock_dump_handle(D_OTHER, &lockh);
 
-        down(&dir->i_sem);
         dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1);
         if (IS_ERR(dchild)) {
-                CERROR("child lookup error %ld\n", PTR_ERR(dchild));
-                LBUG();
-                GOTO(out_create_de, rc = -ESTALE);
-        }
-
-        if (dchild->d_inode) {
-                struct mds_body *body;
-                struct inode *inode = dchild->d_inode;
-
-                CDEBUG(D_INODE, "child exists (dir %lu, name %s, ino %lu)\n",
-                       dir->i_ino, rec->ur_name, dchild->d_inode->i_ino);
-
-                /* XXX check that mode is correct? */
-
-                body = lustre_msg_buf(req->rq_repmsg, offset);
-                mds_pack_inode2fid(&body->fid1, inode);
-                mds_pack_inode2body(body, inode);
-                if (S_ISREG(inode->i_mode))
-                        mds_pack_md(mds, req, offset + 1, body, inode);
-
-                /* This isn't an error for RECREATE. */
-                if (rec->ur_opcode & REINT_REPLAYING) {
-                        CDEBUG(D_INODE, "EEXIST suppressed for REPLAYING\n");
-                        rc = 0;
-                } else {
-                        rc = -EEXIST;
-                }
-                GOTO(out_create_dchild, rc);
+                rc = PTR_ERR(dchild);
+                CERROR("child lookup error %d\n", rc);
+                GOTO(out_create_de, rc);
         }
 
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE,
@@ -378,9 +325,8 @@ out_create_commit:
         }
 out_create_dchild:
         l_dput(dchild);
-        ldlm_lock_decref(&lockh, lock_mode);
 out_create_de:
-        up(&dir->i_sem);
+        ldlm_lock_decref(&lockh, LCK_PW);
         l_dput(de);
 out_create:
         req->rq_status = rc;
@@ -414,238 +360,278 @@ out_create_unlink:
         goto out_create_commit;
 }
 
+/* This function doesn't use ldlm_match_or_enqueue because we're always called
+ * with EX or PW locks, and the MDS is no longer allowed to match write locks,
+ * because they take the place of local semaphores.
+ *
+ * Two locks are taken in numerical order */
+int enqueue_ordered_locks(int lock_mode, struct obd_device *obd,
+                          struct ldlm_res_id *p1_res_id,
+                          struct ldlm_res_id *p2_res_id,
+                          struct lustre_handle *p1_lockh,
+                          struct lustre_handle *p2_lockh)
+{
+        struct ldlm_res_id res_id[2];
+        struct lustre_handle *handles[2] = {p1_lockh, p2_lockh};
+        int rc, flags;
+        ENTRY;
+
+        LASSERT(p1_res_id != NULL && p2_res_id != NULL);
+
+        CDEBUG(D_INFO, "locks before: "LPU64"/"LPU64"\n",
+               p1_res_id[0].name[0], p2_res_id[0].name[0]);
+
+        if (p1_res_id->name[0] < p2_res_id->name[0]) {
+                handles[0] = p1_lockh;
+                handles[1] = p2_lockh;
+                res_id[0] = *p1_res_id;
+                res_id[1] = *p2_res_id;
+        } else {
+                handles[1] = p1_lockh;
+                handles[0] = p2_lockh;
+                res_id[1] = *p1_res_id;
+                res_id[0] = *p2_res_id;
+        }
+
+        CDEBUG(D_INFO, "lock order: "LPU64"/"LPU64"\n",
+               p1_res_id[0].name[0], p2_res_id[0].name[0]);
+
+        flags = LDLM_FL_LOCAL_ONLY;
+        rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, res_id[0],
+                              LDLM_PLAIN, NULL, 0, lock_mode, &flags,
+                              ldlm_completion_ast, mds_blocking_ast, NULL,
+                              NULL, handles[0]);
+        if (rc != ELDLM_OK)
+                RETURN(-EIO);
+        ldlm_lock_dump_handle(D_OTHER, handles[0]);
+
+        if (memcmp(&res_id[0], &res_id[1], sizeof(res_id[0])) == 0) {
+                memcpy(handles[1], handles[0], sizeof(*(handles[1])));
+                ldlm_lock_addref(handles[1], lock_mode);
+        } else {
+                flags = LDLM_FL_LOCAL_ONLY;
+                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+                                      res_id[1], LDLM_PLAIN, NULL, 0, lock_mode,
+                                      &flags, ldlm_completion_ast,
+                                      mds_blocking_ast, NULL, 0, handles[1]);
+                if (rc != ELDLM_OK) {
+                        ldlm_lock_decref(handles[0], lock_mode);
+                        RETURN(-EIO);
+                }
+        }
+        ldlm_lock_dump_handle(D_OTHER, handles[1]);
+
+        RETURN(0);
+}
+
 static int mds_reint_unlink(struct mds_update_record *rec, int offset,
-                            struct ptlrpc_request *req)
+                            struct ptlrpc_request *req,
+                            struct lustre_handle *child_lockh)
 {
-        struct dentry *de = NULL;
+        struct dentry *dir_de = NULL;
         struct dentry *dchild = NULL;
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_body *body = NULL;
+        struct inode *dir_inode, *child_inode;
+        struct lustre_handle *handle, parent_lockh;
+        struct ldlm_res_id child_res_id = { .name = {0} };
         char *name;
-        struct inode *dir, *inode;
-        struct lustre_handle lockh, child_lockh;
-        void *handle;
-        int namelen, lock_mode, err, rc = 0;
+        int namelen, err, rc = 0, flags = 0, return_lock = 0;
         ENTRY;
 
-        /* a name was supplied by the client; fid1 is the directory */
-        lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_PW : LCK_PW;
-        de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, lock_mode, &lockh);
-        if (IS_ERR(de)) {
-                LBUG();
-                RETURN(PTR_ERR(de));
-        }
-
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
-                GOTO(out_unlink, rc = -ENOENT);
+                GOTO(out, rc = -ENOENT);
+
+        /* Step 1: Lookup the parent by FID */
+        dir_de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW,
+                                       &parent_lockh);
+        if (IS_ERR(dir_de))
+                GOTO(out, rc = PTR_ERR(dir_de));
+        dir_inode = dir_de->d_inode;
+        LASSERT(dir_inode);
 
+        /* Step 2: Lookup the child */
         name = lustre_msg_buf(req->rq_reqmsg, offset + 1);
         namelen = req->rq_reqmsg->buflens[offset + 1] - 1;
-#warning "FIXME: if mds_name2locked_dentry decrefs this lock, we must not"
-        memcpy(&child_lockh, &lockh, sizeof(child_lockh));
-        dchild = mds_name2locked_dentry(obd, de, NULL, name, namelen,
-                                        LCK_EX, &child_lockh, lock_mode);
-
-        if (IS_ERR(dchild)) {
-                LBUG();
-                GOTO(out_unlink, rc = PTR_ERR(dchild));
-        }
-
-        dir = de->d_inode;
-        inode = dchild->d_inode;
-        DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu\n", dir->i_ino,
-                  inode ? inode->i_ino : 0);
 
-        if (!inode) {
+        dchild = lookup_one_len(name, dir_de, namelen);
+        if (IS_ERR(dchild))
+                GOTO(out_step_2a, rc = PTR_ERR(dchild));
+        child_inode = dchild->d_inode;
+        if (child_inode == NULL) {
                 if (rec->ur_opcode & REINT_REPLAYING) {
                         CDEBUG(D_INODE,
                                "child missing (%lu/%s); OK for REPLAYING\n",
-                               dir->i_ino, rec->ur_name);
+                               dir_inode->i_ino, rec->ur_name);
                         rc = 0;
                 } else {
                         CDEBUG(D_INODE,
                                "child doesn't exist (dir %lu, name %s)\n",
-                               dir->i_ino, rec->ur_name);
+                               dir_inode->i_ino, rec->ur_name);
                         rc = -ENOENT;
                 }
-                /* going to out_unlink_cancel causes an LBUG, don't know why */
-                GOTO(out_unlink_dchild, rc);
+                GOTO(out_step_2b, rc);
         }
 
-        if (offset) {
-                /* XXX offset? */
-                offset = 1;
+        DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu",
+                  dir_inode->i_ino, child_inode->i_ino);
 
-                body = lustre_msg_buf(req->rq_repmsg, offset);
-                mds_pack_inode2fid(&body->fid1, inode);
-                mds_pack_inode2body(body, inode);
-        }
+        /* Step 3: Get lock a lock on the child */
+        child_res_id.name[0] = child_inode->i_ino;
+        child_res_id.name[1] = child_inode->i_generation;
+
+        rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+                              child_res_id, LDLM_PLAIN, NULL, 0, LCK_EX,
+                              &flags, ldlm_completion_ast, mds_blocking_ast,
+                              NULL, NULL, child_lockh);
+        if (rc != ELDLM_OK)
+                GOTO(out_step_2b, rc);
 
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE,
-                       to_kdev_t(dir->i_sb->s_dev));
+                       to_kdev_t(dir_inode->i_sb->s_dev));
+
+        /* Slightly magical; see ldlm_intent_policy */
+        if (offset)
+                offset = 1;
+
+        body = lustre_msg_buf(req->rq_repmsg, offset);
 
+        /* Step 4: Do the unlink: client decides between rmdir/unlink!
+         * (bug 72) */
         mds_start_transno(mds);
-        switch (rec->ur_mode /* & S_IFMT ? */) {
+        switch (rec->ur_mode & S_IFMT) {
         case S_IFDIR:
-                handle = fsfilt_start(obd, dir, FSFILT_OP_RMDIR);
+                handle = fsfilt_start(obd, dir_inode, FSFILT_OP_RMDIR);
                 if (IS_ERR(handle))
-                        GOTO(out_unlink_cancel_transno, rc = PTR_ERR(handle));
-                rc = vfs_rmdir(dir, dchild);
+                        GOTO(out_cancel_transno, rc = PTR_ERR(handle));
+                rc = vfs_rmdir(dir_inode, dchild);
                 break;
         case S_IFREG:
-                /* get OBD EA data first so client can also destroy object */
-                if ((inode->i_mode & S_IFMT) == S_IFREG && offset)
-                        mds_pack_md(mds, req, offset + 1, body, inode);
+                /* If this is the last reference to this inode, get the OBD EA
+                 * data first so the client can destroy OST objects */
+                if ((child_inode->i_mode & S_IFMT) == S_IFREG &&
+                    child_inode->i_nlink == 1) {
+                        mds_pack_inode2fid(&body->fid1, child_inode);
+                        mds_pack_inode2body(body, child_inode);
+                        mds_pack_md(obd, req->rq_repmsg, offset + 1,
+                                    body, child_inode);
+                        if (body->valid & OBD_MD_FLEASIZE)
+                                return_lock = 1;
+                }
                 /* no break */
         case S_IFLNK:
         case S_IFCHR:
         case S_IFBLK:
         case S_IFIFO:
         case S_IFSOCK:
-                handle = fsfilt_start(obd, dir, FSFILT_OP_UNLINK);
+                handle = fsfilt_start(obd, dir_inode, FSFILT_OP_UNLINK);
                 if (IS_ERR(handle))
-                        GOTO(out_unlink_cancel_transno, rc = PTR_ERR(handle));
-                rc = vfs_unlink(dir, dchild);
+                        GOTO(out_cancel_transno, rc = PTR_ERR(handle));
+                rc = vfs_unlink(dir_inode, dchild);
                 break;
         default:
                 CERROR("bad file type %o unlinking %s\n", rec->ur_mode, name);
                 handle = NULL;
                 LBUG();
-                GOTO(out_unlink_cancel_transno, rc = -EINVAL);
+                GOTO(out_cancel_transno, rc = -EINVAL);
         }
 
         rc = mds_finish_transno(mds, handle, req, rc);
-        err = fsfilt_commit(obd, dir, handle);
+        err = fsfilt_commit(obd, dir_inode, handle);
+        if (rc != 0 || err != 0) {
+                /* Don't unlink the OST objects if the MDS unlink failed */
+                body->valid = 0;
+        }
         if (err) {
                 CERROR("error on commit: err = %d\n", err);
                 if (!rc)
                         rc = err;
         }
 
-        EXIT;
-
-out_unlink_cancel:
-        ldlm_lock_decref(&child_lockh, LCK_EX);
-        err = ldlm_cli_cancel(&child_lockh);
-        if (err < 0) {
-                CERROR("failed to cancel child inode lock: err = %d\n", err);
-                if (!rc)
-                        rc = -ENOLCK;   /*XXX translate LDLM lock error */
-        }
-out_unlink_dchild:
+        GOTO(out_step_4, rc);
+ out_step_4:
+        if (rc != 0 || return_lock == 0)
+                ldlm_lock_decref(child_lockh, LCK_EX);
+ out_step_2b:
         l_dput(dchild);
-        up(&dir->i_sem);
-out_unlink:
-        ldlm_lock_decref(&lockh, lock_mode);
-        l_dput(de);
+ out_step_2a:
+        ldlm_lock_decref(&parent_lockh, LCK_EX);
+        l_dput(dir_de);
+ out:
         req->rq_status = rc;
         return 0;
 
-out_unlink_cancel_transno:
+ out_cancel_transno:
         rc = mds_finish_transno(mds, handle, req, rc);
-        goto out_unlink_cancel;
+        goto out_step_4;
 }
 
 static int mds_reint_link(struct mds_update_record *rec, int offset,
-                          struct ptlrpc_request *req)
+                          struct ptlrpc_request *req, struct lustre_handle *lh)
 {
         struct obd_device *obd = req->rq_export->exp_obd;
         struct dentry *de_src = NULL;
         struct dentry *de_tgt_dir = NULL;
         struct dentry *dchild = NULL;
         struct mds_obd *mds = mds_req2mds(req);
-        struct lustre_handle *handle, tgtlockh, srclockh;
-        int lock_mode;
-        __u64 res_id[3] = { 0 };
-        int flags = 0;
-        int rc = 0, err;
-
+        struct lustre_handle *handle, tgt_dir_lockh, src_lockh;
+        struct ldlm_res_id src_res_id = { .name = {0} };
+        struct ldlm_res_id tgt_dir_res_id = { .name = {0} };
+        int lock_mode, rc = 0, err;
         ENTRY;
-        de_src = mds_fid2dentry(mds, rec->ur_fid1, NULL);
-        if (IS_ERR(de_src) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_LINK)) {
-                GOTO(out_link, rc = -ESTALE);
-        }
 
-        /* plan to change the link count on this inode: write lock */
-        lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_PW : LCK_PW;
-        res_id[0] = de_src->d_inode->i_ino;
-        res_id[1] = de_src->d_inode->i_generation;
+        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_LINK))
+                GOTO(out, rc = -ENOENT);
 
-        rc = ldlm_lock_match(obd->obd_namespace, res_id, LDLM_PLAIN,
-                             NULL, 0, lock_mode, &srclockh);
-        if (rc == 0) {
-                LDLM_DEBUG_NOLOCK("enqueue res "LPU64, res_id[0]);
-                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
-                                      res_id, LDLM_PLAIN, NULL, 0, lock_mode,
-                                      &flags, ldlm_completion_ast,
-                                      mds_blocking_ast, NULL, 0, &srclockh);
-                if (rc != ELDLM_OK) {
-                        CERROR("lock enqueue: err: %d\n", rc);
-                        GOTO(out_link_src_put, rc = -EIO);
-                }
-        } else {
-                ldlm_lock_dump_handle(D_OTHER, &srclockh);
-        }
+        /* Step 1: Lookup the source inode and target directory by FID */
+        de_src = mds_fid2dentry(mds, rec->ur_fid1, NULL);
+        if (IS_ERR(de_src))
+                GOTO(out, rc = PTR_ERR(de_src));
 
         de_tgt_dir = mds_fid2dentry(mds, rec->ur_fid2, NULL);
-        if (IS_ERR(de_tgt_dir)) {
-                GOTO(out_link_src, rc = -ESTALE);
-        }
-
-        lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_PW : LCK_PW;
-        res_id[0] = de_tgt_dir->d_inode->i_ino;
-        res_id[1] = de_tgt_dir->d_inode->i_generation;
-
-        rc = ldlm_lock_match(obd->obd_namespace, res_id, LDLM_PLAIN,
-                             NULL, 0, lock_mode, &tgtlockh);
-        if (rc == 0) {
-                LDLM_DEBUG_NOLOCK("enqueue res "LPU64, res_id[0]);
-                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
-                                      res_id, LDLM_PLAIN, NULL, 0, lock_mode,
-                                      &flags, ldlm_completion_ast,
-                                      mds_blocking_ast, NULL, 0, &tgtlockh);
-                if (rc != ELDLM_OK) {
-                        CERROR("lock enqueue: err: %d\n", rc);
-                        GOTO(out_link_tgt_dir_put, rc = -EIO);
-                }
-        } else {
-                ldlm_lock_dump_handle(D_OTHER, &tgtlockh);
-        }
-
-        down(&de_tgt_dir->d_inode->i_sem);
+        if (IS_ERR(de_tgt_dir))
+                GOTO(out_de_src, rc = PTR_ERR(de_tgt_dir));
+
+        CDEBUG(D_INODE, "linking %*s/%s to inode %lu\n",
+               de_tgt_dir->d_name.len, de_tgt_dir->d_name.name, rec->ur_name,
+               de_src->d_inode->i_ino);
+
+        /* Step 2: Take the two locks */
+        lock_mode = LCK_EX;
+        src_res_id.name[0] = de_src->d_inode->i_ino;
+        src_res_id.name[1] = de_src->d_inode->i_generation;
+        tgt_dir_res_id.name[0] = de_tgt_dir->d_inode->i_ino;
+        tgt_dir_res_id.name[1] = de_tgt_dir->d_inode->i_generation;
+
+        rc = enqueue_ordered_locks(LCK_EX, obd, &src_res_id, &tgt_dir_res_id,
+                                   &src_lockh, &tgt_dir_lockh);
+        if (rc != ELDLM_OK)
+                GOTO(out_tgt_dir, rc = -EIO);
+
+        /* Step 3: Lookup the child */
         dchild = lookup_one_len(rec->ur_name, de_tgt_dir, rec->ur_namelen - 1);
         if (IS_ERR(dchild)) {
                 CERROR("child lookup error %ld\n", PTR_ERR(dchild));
-                GOTO(out_link_tgt_dir, rc = -ESTALE);
+                GOTO(out_drop_locks, rc = PTR_ERR(dchild));
         }
 
         if (dchild->d_inode) {
-                struct inode *inode = dchild->d_inode;
-                /* in intent case ship back attributes to client */
-                if (offset) {
-                        struct mds_body *body =
-                                lustre_msg_buf(req->rq_repmsg, 1);
-
-                        mds_pack_inode2fid(&body->fid1, inode);
-                        mds_pack_inode2body(body, inode);
-                        if (S_ISREG(inode->i_mode))
-                                mds_pack_md(mds, req, 2, body, inode);
-                }
                 if (rec->ur_opcode & REINT_REPLAYING) {
                         /* XXX verify that the link is to the the right file? */
-                        rc = 0;
                         CDEBUG(D_INODE,
                                "child exists (dir %lu, name %s) (REPLAYING)\n",
                                de_tgt_dir->d_inode->i_ino, rec->ur_name);
+                        rc = 0;
                 } else {
-                        rc = -EEXIST;
-                        CERROR("child exists (dir %lu, name %s)\n",
+                        CDEBUG(D_INODE, "child exists (dir %lu, name %s)\n",
                                de_tgt_dir->d_inode->i_ino, rec->ur_name);
+                        rc = -EEXIST;
                 }
-                GOTO(out_link_dchild, rc);
+                GOTO(out_drop_child, rc);
         }
 
+        /* Step 4: Do it. */
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
                        to_kdev_t(de_src->d_inode->i_sb->s_dev));
 
@@ -654,7 +640,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
         if (IS_ERR(handle)) {
                 rc = PTR_ERR(handle);
                 mds_finish_transno(mds, handle, req, rc);
-                GOTO(out_link_dchild, rc);
+                GOTO(out_drop_child, rc);
         }
 
         rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild);
@@ -668,26 +654,26 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
                 if (!rc)
                         rc = err;
         }
+
         EXIT;
 
-out_link_dchild:
+out_drop_child:
         l_dput(dchild);
-out_link_tgt_dir:
-        ldlm_lock_decref(&tgtlockh, lock_mode);
-out_link_tgt_dir_put:
-        up(&de_tgt_dir->d_inode->i_sem);
+out_drop_locks:
+        ldlm_lock_decref(&src_lockh, lock_mode);
+        ldlm_lock_decref(&tgt_dir_lockh, lock_mode);
+out_tgt_dir:
         l_dput(de_tgt_dir);
-out_link_src:
-        ldlm_lock_decref(&srclockh, lock_mode);
-out_link_src_put:
+out_de_src:
         l_dput(de_src);
-out_link:
+out:
         req->rq_status = rc;
         return 0;
 }
 
 static int mds_reint_rename(struct mds_update_record *rec, int offset,
-                            struct ptlrpc_request *req)
+                            struct ptlrpc_request *req,
+                            struct lustre_handle *lockh)
 {
         struct obd_device *obd = req->rq_export->exp_obd;
         struct dentry *de_srcdir = NULL;
@@ -695,93 +681,88 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
         struct dentry *de_old = NULL;
         struct dentry *de_new = NULL;
         struct mds_obd *mds = mds_req2mds(req);
-        struct lustre_handle tgtlockh, srclockh, oldhandle;
-        int flags = 0, lock_mode, rc = 0, err;
+        struct lustre_handle dlm_handles[4];
+        struct ldlm_res_id p1_res_id = { .name = {0} };
+        struct ldlm_res_id p2_res_id = { .name = {0} };
+        struct ldlm_res_id c1_res_id = { .name = {0} };
+        struct ldlm_res_id c2_res_id = { .name = {0} };
+        int rc = 0, err, lock_count = 3, flags = LDLM_FL_LOCAL_ONLY;
         void *handle;
-        __u64 res_id[3] = { 0 };
         ENTRY;
 
         de_srcdir = mds_fid2dentry(mds, rec->ur_fid1, NULL);
         if (IS_ERR(de_srcdir))
-                GOTO(out_rename, rc = -ESTALE);
-
-        lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_PW : LCK_PW;
-        res_id[0] = de_srcdir->d_inode->i_ino;
-        res_id[1] = de_srcdir->d_inode->i_generation;
-
-        rc = ldlm_lock_match(obd->obd_namespace, res_id, LDLM_PLAIN,
-                             NULL, 0, lock_mode, &srclockh);
-        if (rc == 0) {
-                LDLM_DEBUG_NOLOCK("enqueue res "LPU64, res_id[0]);
-                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
-                                      res_id, LDLM_PLAIN, NULL, 0, lock_mode,
-                                      &flags, ldlm_completion_ast,
-                                      mds_blocking_ast, NULL, 0, &srclockh);
-                if (rc != ELDLM_OK) {
-                        CERROR("lock enqueue: err: %d\n", rc);
-                        GOTO(out_rename_srcput, rc = -EIO);
-                }
-        } else {
-                ldlm_lock_dump_handle(D_OTHER, &srclockh);
-        }
-
+                GOTO(out, rc = PTR_ERR(de_srcdir));
         de_tgtdir = mds_fid2dentry(mds, rec->ur_fid2, NULL);
         if (IS_ERR(de_tgtdir))
-                GOTO(out_rename_srcdir, rc = -ESTALE);
+                GOTO(out_put_srcdir, rc = PTR_ERR(de_tgtdir));
+
+        /* The idea here is that we need to get four locks in the end:
+         * one on each parent directory, one on each child.  We need to take
+         * these locks in some kind of order (to avoid deadlocks), and the order
+         * I selected is "increasing resource number" order.  We need to take
+         * the locks on the parent directories, however, before we can lookup
+         * the children.  Thus the following plan:
+         *
+         * 1. Take locks on the parent(s), in order
+         * 2. Lookup the children
+         * 3. Take locks on the children, in order
+         * 4. Execute the rename
+         */
 
-        lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_PW : LCK_PW;
-        res_id[0] = de_tgtdir->d_inode->i_ino;
-        res_id[1] = de_tgtdir->d_inode->i_generation;
+        /* Step 1: Take locks on the parent(s), in order */
+        p1_res_id.name[0] = de_srcdir->d_inode->i_ino;
+        p1_res_id.name[1] = de_srcdir->d_inode->i_generation;
 
-        rc = ldlm_lock_match(obd->obd_namespace, res_id, LDLM_PLAIN,
-                             NULL, 0, lock_mode, &tgtlockh);
-        if (rc == 0) {
-                flags = 0;
-                LDLM_DEBUG_NOLOCK("enqueue res "LPU64, res_id[0]);
-                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
-                                      res_id, LDLM_PLAIN, NULL, 0, lock_mode,
-                                      &flags, ldlm_completion_ast,
-                                      mds_blocking_ast, NULL, 0, &tgtlockh);
-                if (rc != ELDLM_OK) {
-                        CERROR("lock enqueue: err: %d\n", rc);
-                        GOTO(out_rename_tgtput, rc = -EIO);
-                }
-        } else {
-                ldlm_lock_dump_handle(D_OTHER, &tgtlockh);
-        }
+        p2_res_id.name[0] = de_tgtdir->d_inode->i_ino;
+        p2_res_id.name[1] = de_tgtdir->d_inode->i_generation;
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        double_lock(de_tgtdir, de_srcdir);
-#endif
+        rc = enqueue_ordered_locks(LCK_EX, obd, &p1_res_id, &p2_res_id,
+                                   &(dlm_handles[0]), &(dlm_handles[1]));
+        if (rc != ELDLM_OK)
+                GOTO(out_put_tgtdir, rc);
+
+        /* Step 2: Lookup the children */
         de_old = lookup_one_len(rec->ur_name, de_srcdir, rec->ur_namelen - 1);
         if (IS_ERR(de_old)) {
                 CERROR("old child lookup error (%*s): %ld\n",
                        rec->ur_namelen - 1, rec->ur_name, PTR_ERR(de_old));
-                GOTO(out_rename_tgtdir, rc = -ENOENT);
+                GOTO(out_step_2a, rc = PTR_ERR(de_old));
         }
 
+        if (de_old->d_inode == NULL)
+                GOTO(out_step_2b, rc = -ENOENT);
+
         de_new = lookup_one_len(rec->ur_tgt, de_tgtdir, rec->ur_tgtlen - 1);
         if (IS_ERR(de_new)) {
                 CERROR("new child lookup error (%*s): %ld\n",
                        rec->ur_tgtlen - 1, rec->ur_tgt, PTR_ERR(de_new));
-                GOTO(out_rename_deold, rc = -ENOENT);
+                GOTO(out_step_2b, rc = PTR_ERR(de_new));
         }
 
-        /* in intent case ship back attributes to client */
-        if (offset) {
-                struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
-                struct inode *inode = de_new->d_inode;
-
-                if (!inode) {
-                        body->valid = 0;
-                } else {
-                        mds_pack_inode2fid(&body->fid1, inode);
-                        mds_pack_inode2body(body, inode);
-                        if (S_ISREG(inode->i_mode))
-                                mds_pack_md(mds, req, 2, body, inode);
-                }
+        /* Step 3: Take locks on the children */
+        c1_res_id.name[0] = de_old->d_inode->i_ino;
+        c1_res_id.name[1] = de_old->d_inode->i_generation;
+        if (de_new->d_inode == NULL) {
+                flags = LDLM_FL_LOCAL_ONLY;
+                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+                                      c1_res_id, LDLM_PLAIN, NULL, 0, LCK_EX,
+                                      &flags, ldlm_completion_ast,
+                                      mds_blocking_ast, NULL, NULL,
+                                      &(dlm_handles[2]));
+                lock_count = 3;
+        } else {
+                c2_res_id.name[0] = de_new->d_inode->i_ino;
+                c2_res_id.name[1] = de_new->d_inode->i_generation;
+                rc = enqueue_ordered_locks(LCK_EX, obd, &c1_res_id, &c2_res_id,
+                                           &(dlm_handles[2]),
+                                           &(dlm_handles[3]));
+                lock_count = 4;
         }
+        if (rc != ELDLM_OK)
+                GOTO(out_step_3, rc);
 
+        /* Step 4: Execute the rename */
         OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
                        to_kdev_t(de_srcdir->d_inode->i_sb->s_dev));
 
@@ -790,7 +771,7 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
         if (IS_ERR(handle)) {
                 rc = PTR_ERR(handle);
                 mds_finish_transno(mds, handle, req, rc);
-                GOTO(out_rename_denew, rc);
+                GOTO(out_step_4, rc);
         }
 
         lock_kernel();
@@ -806,56 +787,30 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
                 if (!rc)
                         rc = err;
         }
-        EXIT;
 
-out_rename_denew:
+        EXIT;
+ out_step_4:
+        ldlm_lock_decref(&(dlm_handles[2]), LCK_EX);
+        if (lock_count == 4)
+                ldlm_lock_decref(&(dlm_handles[3]), LCK_EX);
+ out_step_3:
         l_dput(de_new);
-out_rename_deold:
-        if (!rc) {
-                res_id[0] = de_old->d_inode->i_ino;
-                res_id[1] = de_old->d_inode->i_generation;
-                flags = 0;
-                /* Take an exclusive lock on the resource that we're
-                 * about to free, to force everyone to drop their
-                 * locks. */
-                LDLM_DEBUG_NOLOCK("getting EX lock res "LPU64, res_id[0]);
-                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
-                                      res_id, LDLM_PLAIN, NULL, 0, LCK_EX,
-                                      &flags, ldlm_completion_ast,
-                                      mds_blocking_ast, NULL, 0, &oldhandle);
-                if (rc)
-                        CERROR("failed to get child inode lock (child ino "
-                               LPD64" dir ino %lu)\n",
-                               res_id[0], de_old->d_inode->i_ino);
-        }
-
+ out_step_2b:
         l_dput(de_old);
-
-        if (!rc) {
-                ldlm_lock_decref(&oldhandle, LCK_EX);
-                rc = ldlm_cli_cancel(&oldhandle);
-                if (rc < 0)
-                        CERROR("failed to cancel child inode lock ino "
-                               LPD64": %d\n", res_id[0], rc);
-        }
-out_rename_tgtdir:
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        double_up(&de_srcdir->d_inode->i_sem, &de_tgtdir->d_inode->i_sem);
-#endif
-        ldlm_lock_decref(&tgtlockh, lock_mode);
-out_rename_tgtput:
+ out_step_2a:
+        ldlm_lock_decref(&(dlm_handles[0]), LCK_EX);
+        ldlm_lock_decref(&(dlm_handles[1]), LCK_EX);
+ out_put_tgtdir:
         l_dput(de_tgtdir);
-out_rename_srcdir:
-        ldlm_lock_decref(&srclockh, lock_mode);
-out_rename_srcput:
+ out_put_srcdir:
         l_dput(de_srcdir);
-out_rename:
+ out:
         req->rq_status = rc;
         return 0;
 }
 
-typedef int (*mds_reinter) (struct mds_update_record *, int offset,
-                            struct ptlrpc_request *);
+typedef int (*mds_reinter)(struct mds_update_record *, int offset,
+                           struct ptlrpc_request *, struct lustre_handle *);
 
 static mds_reinter reinters[REINT_MAX + 1] = {
         [REINT_SETATTR] mds_reint_setattr,
@@ -863,16 +818,17 @@ static mds_reinter reinters[REINT_MAX + 1] = {
         [REINT_UNLINK] mds_reint_unlink,
         [REINT_LINK] mds_reint_link,
         [REINT_RENAME] mds_reint_rename,
+        [REINT_OPEN] mds_open
 };
 
 int mds_reint_rec(struct mds_update_record *rec, int offset,
-                  struct ptlrpc_request *req)
+                  struct ptlrpc_request *req, struct lustre_handle *lockh)
 {
         struct mds_obd *mds = mds_req2mds(req);
         struct obd_run_ctxt saved;
         struct obd_ucred uc;
-        int realop = rec->ur_opcode & REINT_OPCODE_MASK;
-        int rc;
+        int realop = rec->ur_opcode & REINT_OPCODE_MASK, rc;
+        ENTRY;
 
         if (realop < 1 || realop > REINT_MAX) {
                 CERROR("opcode %d not valid (%sREPLAYING)\n", realop,
@@ -884,10 +840,11 @@ int mds_reint_rec(struct mds_update_record *rec, int offset,
         uc.ouc_fsuid = rec->ur_fsuid;
         uc.ouc_fsgid = rec->ur_fsgid;
         uc.ouc_cap = rec->ur_cap;
+        uc.ouc_suppgid = rec->ur_suppgid;
 
         push_ctxt(&saved, &mds->mds_ctxt, &uc);
-        rc = reinters[realop] (rec, offset, req);
+        rc = reinters[realop] (rec, offset, req, lockh);
         pop_ctxt(&saved, &mds->mds_ctxt, &uc);
 
-        return rc;
+        RETURN(rc);
 }
index 61e9114..9512e2a 100644 (file)
@@ -1,15 +1,27 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Object Devices Class Driver
+ *
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
  *
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  * These are the only exported functions, they provide some generic
  * infrastructure for managing object devices
- *
- * Object Devices Class Driver
  */
 
 #define EXPORT_SYMTAB
@@ -54,29 +66,92 @@ struct list_head obd_types;
 atomic_t obd_memory;
 int obd_memmax;
 
+/* Root for /proc/lustre */
+struct proc_dir_entry *proc_lustre_root = NULL;
+
 /* The following are visible and mutable through /proc/sys/lustre/. */
 unsigned long obd_fail_loc;
 unsigned long obd_timeout = 100;
 char obd_recovery_upcall[128] = "/usr/lib/lustre/ha_assist";
+unsigned long obd_sync_filter; /* = 0, don't sync by default */
 
 /*  opening /dev/obd */
 static int obd_class_open(struct inode * inode, struct file * file)
 {
+        struct obd_class_user_state *ocus;
         ENTRY;
 
-        file->private_data = NULL;
+        OBD_ALLOC (ocus, sizeof (*ocus));
+        if (ocus == NULL)
+                return (-ENOMEM);
+
+        INIT_LIST_HEAD (&ocus->ocus_conns);
+        ocus->ocus_current_obd = NULL;
+        file->private_data = ocus;
+
         MOD_INC_USE_COUNT;
         RETURN(0);
 }
 
+static int
+obd_class_add_user_conn (struct obd_class_user_state *ocus,
+                         struct lustre_handle *conn)
+{
+        struct obd_class_user_conn *c;
+
+        /* NB holding obd_conf_sem */
+
+        OBD_ALLOC (c, sizeof (*c));
+        if (ocus == NULL)
+                return (-ENOMEM);
+
+        c->ocuc_conn = *conn;
+        list_add (&c->ocuc_chain, &ocus->ocus_conns);
+        return (0);
+}
+
+static void
+obd_class_remove_user_conn (struct obd_class_user_state *ocus,
+                            struct lustre_handle *conn)
+{
+        struct list_head *e;
+        struct obd_class_user_conn *c;
+
+        /* NB holding obd_conf_sem or last reference */
+
+        list_for_each (e, &ocus->ocus_conns) {
+                c = list_entry (e, struct obd_class_user_conn, ocuc_chain);
+                if (!memcmp (conn, &c->ocuc_conn, sizeof (*conn))) {
+                        list_del (&c->ocuc_chain);
+                        OBD_FREE (c, sizeof (*c));
+                        return;
+                }
+        }
+}
+
 /*  closing /dev/obd */
 static int obd_class_release(struct inode * inode, struct file * file)
 {
+        struct obd_class_user_state *ocus = file->private_data;
+        struct obd_class_user_conn  *c;
         ENTRY;
 
-        // XXX drop lsm, connections here
-        if (file->private_data)
-                file->private_data = NULL;
+        while (!list_empty (&ocus->ocus_conns)) {
+                c = list_entry (ocus->ocus_conns.next,
+                                struct obd_class_user_conn, ocuc_chain);
+                list_del (&c->ocuc_chain);
+
+                CDEBUG (D_IOCTL, "Auto-disconnect %p\n", &c->ocuc_conn);
+
+                down (&obd_conf_sem);
+                obd_disconnect (&c->ocuc_conn);
+                up (&obd_conf_sem);
+
+                OBD_FREE (c, sizeof (*c));
+        }
+
+        OBD_FREE (ocus, sizeof (*ocus));
+
         MOD_DEC_USE_COUNT;
         RETURN(0);
 }
@@ -124,7 +199,8 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp,
 {
         char *buf = NULL;
         struct obd_ioctl_data *data;
-        struct obd_device *obd = filp->private_data;
+        struct obd_class_user_state *ocus = filp->private_data;
+        struct obd_device *obd = ocus->ocus_current_obd;
         struct lustre_handle conn;
         int err = 0, len = 0, serialised = 0;
         ENTRY;
@@ -133,6 +209,8 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp,
         case OBD_IOC_BRW_WRITE:
         case OBD_IOC_BRW_READ:
         case OBD_IOC_GETATTR:
+        case ECHO_IOC_ENQUEUE:
+        case ECHO_IOC_CANCEL:
                 break;
         default:
                 down(&obd_conf_sem);
@@ -163,7 +241,7 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp,
                 }
                 CDEBUG(D_IOCTL, "device %d\n", data->ioc_dev);
 
-                filp->private_data = &obd_dev[data->ioc_dev];
+                ocus->ocus_current_obd = &obd_dev[data->ioc_dev];
                 GOTO(out, err=0);
         }
 
@@ -192,7 +270,7 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp,
                                 status = "-";
                         l = snprintf(buf2, remains, "%2d %s %s %s %s %d\n",
                                      i, status, obd->obd_type->typ_name,
-                                     obd->obd_name, obd->obd_uuid, obd->obd_type->typ_refcnt);
+                                     obd->obd_name, obd->obd_uuid.uuid, obd->obd_type->typ_refcnt);
                         buf2 +=l;
                         remains -=l;
                         if (remains <= 0) {
@@ -263,6 +341,7 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp,
                  * currently selected device.
                  */
                 int dev;
+                struct obd_uuid uuid;
 
                 if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
                         CERROR("No UUID passed!\n");
@@ -274,7 +353,8 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp,
                 }
 
                 CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
-                dev = class_uuid2dev(data->ioc_inlbuf1);
+                obd_str2uuid(&uuid, data->ioc_inlbuf1);
+                dev = class_uuid2dev(&uuid);
                 data->ioc_dev = dev;
                 if (dev == -1) {
                         CDEBUG(D_IOCTL, "No device for name %s!\n",
@@ -294,11 +374,11 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp,
                 int dev = -1;
                 int i;
 
-                filp->private_data = NULL;
+                ocus->ocus_current_obd = NULL;
                 for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
                         struct obd_device *obd = &obd_dev[i];
                         if (!obd->obd_type) {
-                                filp->private_data = obd;
+                                ocus->ocus_current_obd = obd;
                                 dev = i;
                                 break;
                         }
@@ -359,6 +439,7 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp,
                         OBD_ALLOC(obd->obd_name, len);
                         if (!obd->obd_name) {
                                 class_put_type(obd->obd_type);
+                                obd->obd_type = NULL;
                                 GOTO(out, err = -ENOMEM);
                         }
                         memcpy(obd->obd_name, data->ioc_inlbuf2, len);
@@ -374,9 +455,10 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp,
                                         OBD_FREE(obd->obd_name,
                                                  strlen(obd->obd_name) + 1);
                                 class_put_type(obd->obd_type);
+                                obd->obd_type = NULL;
                                 GOTO(out, err=-EINVAL);
                         }
-                        memcpy(obd->obd_uuid, data->ioc_inlbuf3, len);
+                        memcpy(obd->obd_uuid.uuid, data->ioc_inlbuf3, len);
                 }
                 /* do the attach */
                 if (OBP(obd, attach))
@@ -407,14 +489,6 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp,
                         CERROR("OBD device %d not attached\n", obd->obd_minor);
                         GOTO(out, err=-ENODEV);
                 }
-                if (!list_empty(&obd->obd_exports)) {
-                        if (!data->ioc_inlbuf1 || data->ioc_inlbuf1[0] != 'F') {
-                                CERROR("OBD device %d (%p) has exports\n",
-                                       obd->obd_minor, obd);
-                                GOTO(out, err=-EBUSY);
-                        }
-                        forcibly_detach_exports(obd);
-                }
                 if (OBP(obd, detach))
                         err = OBP(obd,detach)(obd);
 
@@ -460,41 +534,69 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp,
                         CERROR("Device %d not attached\n", obd->obd_minor);
                         GOTO(out, err=-ENODEV);
                 }
-
-                if ( OBT(obd) && OBP(obd, cleanup) )
+                if (!list_empty(&obd->obd_exports)) {
+                        if (!data->ioc_inlbuf1 || data->ioc_inlbuf1[0] != 'F') {
+                                CERROR("OBD device %d (%p) has exports\n",
+                                       obd->obd_minor, obd);
+                                GOTO(out, err = -EBUSY);
+                        }
+                        forcibly_detach_exports(obd);
+                }
+                if (OBT(obd) && OBP(obd, cleanup))
                         err = obd_cleanup(obd);
 
                 if (!err) {
                         obd->obd_flags &= ~OBD_SET_UP;
                         obd->obd_type->typ_refcnt--;
                 }
-                      GOTO(out, err);
+                GOTO(out, err);
         }
 
         case OBD_IOC_CONNECT: {
-                char * cluuid = "OBD_CLASS_UUID";
+                struct obd_uuid cluuid = { "OBD_CLASS_UUID" };
                 obd_data2conn(&conn, data);
 
-                err = obd_connect(&conn, obd, cluuid, NULL, NULL);
+                err = obd_connect(&conn, obd, &cluuid, NULL, NULL);
 
                 CDEBUG(D_IOCTL, "assigned export "LPX64"\n", conn.addr);
                 obd_conn2data(data, &conn);
                 if (err)
                         GOTO(out, err);
 
+                err = obd_class_add_user_conn (ocus, &conn);
+                if (err != 0) {
+                        obd_disconnect (&conn);
+                        GOTO (out, err);
+                }
+
                 err = copy_to_user((void *)arg, data, sizeof(*data));
-                if (err)
-                        err = -EFAULT;
-                // XXX save connection data into file handle
+                if (err != 0) {
+                        obd_class_remove_user_conn (ocus, &conn);
+                        obd_disconnect (&conn);
+                        GOTO (out, err=-EFAULT);
+                }
                 GOTO(out, err);
         }
 
         case OBD_IOC_DISCONNECT: {
                 obd_data2conn(&conn, data);
+                obd_class_remove_user_conn (ocus, &conn);
                 err = obd_disconnect(&conn);
                 GOTO(out, err);
         }
 
+        case OBD_IOC_NO_TRANSNO: {
+                if (!(obd->obd_flags & OBD_ATTACHED)) {
+                        CERROR("Device %d not attached\n", obd->obd_minor);
+                        GOTO(out, err=-ENODEV);
+                }
+                CDEBUG(D_IOCTL,
+                       "disabling committed-transno notifications on %d\n",
+                       obd->obd_minor);
+                obd->obd_flags |= OBD_NO_TRANSNO;
+                GOTO(out, err = 0);
+        }
+
         default:
                 obd_data2conn(&conn, data);
 
@@ -607,7 +709,10 @@ EXPORT_SYMBOL(obd_memmax);
 EXPORT_SYMBOL(obd_fail_loc);
 EXPORT_SYMBOL(obd_timeout);
 EXPORT_SYMBOL(obd_recovery_upcall);
+EXPORT_SYMBOL(obd_sync_filter);
 EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
+EXPORT_SYMBOL(ptlrpc_abort_inflight_superhack);
+EXPORT_SYMBOL(proc_lustre_root);
 
 EXPORT_SYMBOL(class_register_type);
 EXPORT_SYMBOL(class_unregister_type);
@@ -656,14 +761,19 @@ static int __init init_obdclass(void)
 
         obd_sysctl_init();
 
-        err = lprocfs_reg_main();
-
+#ifdef LPROCFS
+        proc_lustre_root = proc_mkdir("lustre", proc_root_fs);
+        if (!proc_lustre_root)
+                printk(KERN_ERR "error registering /proc/fs/lustre\n");
+#else
+        proc_lustre_root = NULL;
+#endif
         return 0;
 }
 
 static void __exit cleanup_obdclass(void)
 {
-        int i, err;
+        int i;
         ENTRY;
 
         misc_deregister(&obd_psdev);
@@ -679,7 +789,10 @@ static void __exit cleanup_obdclass(void)
         obd_cleanup_caches();
         obd_sysctl_clean();
 
-        err = lprocfs_dereg_main();
+        if (proc_lustre_root) {
+                lprocfs_remove(proc_lustre_root);
+                proc_lustre_root = NULL;
+        }
 
         CERROR("obd mem max: %d leaked: %d\n", obd_memmax,
                atomic_read(&obd_memory));
@@ -689,8 +802,11 @@ static void __exit cleanup_obdclass(void)
 /* Check that we're building against the appropriate version of the Lustre
  * kernel patch */
 #include <linux/lustre_version.h>
-#if (LUSTRE_KERNEL_VERSION != 5)
-# error Cannot continue: Your Lustre kernel patch is out of date
+#define LUSTRE_SOURCE_VERSION 10
+#if (LUSTRE_KERNEL_VERSION < LUSTRE_SOURCE_VERSION)
+# error Cannot continue: Your Lustre kernel patch is older than the sources
+#elif (LUSTRE_KERNEL_VERSION > LUSTRE_SOURCE_VERSION)
+# error Cannot continue: Your Lustre sources are older than the kernel patch
 #endif
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
index 3878315..5c52b43 100644 (file)
@@ -23,6 +23,8 @@
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#error "FIXME: this needs to be updated to match fsfilt_extN.c"
+
 #define DEBUG_SUBSYSTEM S_FILTER
 
 #include <linux/fs.h>
@@ -269,6 +271,11 @@ static int fsfilt_ext3_statfs(struct super_block *sb, struct statfs *sfs)
         return rc;
 }
 
+static int fsfilt_ext3_sync(struct super_block *sb)
+{
+        return ext3_force_commit(sb);
+}
+
 static struct fsfilt_operations fsfilt_ext3_ops = {
         fs_type:                "ext3",
         fs_owner:               THIS_MODULE,
@@ -281,6 +288,7 @@ static struct fsfilt_operations fsfilt_ext3_ops = {
         fs_journal_data:        fsfilt_ext3_journal_data,
         fs_set_last_rcvd:       fsfilt_ext3_set_last_rcvd,
         fs_statfs:              fsfilt_ext3_statfs,
+        fs_sync:                fsfilt_ext3_sync,
 };
 
 static int __init fsfilt_ext3_init(void)
index 4302392..0984c66 100644 (file)
@@ -4,7 +4,7 @@
  *  lustre/lib/fsfilt_extN.c
  *  Lustre filesystem abstraction routines
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
  *   Author: Andreas Dilger <adilger@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
@@ -124,6 +124,8 @@ static void *fsfilt_extN_start(struct inode *inode, int op)
  * objcount inode blocks
  * 1 superblock
  * 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quota files
+ * 
+ * 1 EXTN_DATA_TRANS_BLOCKS for the last_rcvd update.
  */
 static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
 {
@@ -153,6 +155,9 @@ static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
                 ngdblocks = EXTN_SB(sb)->s_gdb_count;
 
         needed += nbitmaps + ngdblocks;
+        
+        /* last_rcvd update */
+        needed += EXTN_DATA_TRANS_BLOCKS;
 
 #ifdef CONFIG_QUOTA
         /* We assume that there will be 1 bit set in s_dquot.flags for each
@@ -351,26 +356,55 @@ static int fsfilt_extN_get_md(struct inode *inode, void *lmm, int lmm_size)
 }
 
 static ssize_t fsfilt_extN_readpage(struct file *file, char *buf, size_t count,
-                                    loff_t *offset)
+                                    loff_t *off)
 {
         struct inode *inode = file->f_dentry->d_inode;
         int rc = 0;
 
         if (S_ISREG(inode->i_mode))
-                rc = file->f_op->read(file, buf, count, offset);
+                rc = file->f_op->read(file, buf, count, off);
         else {
-                struct buffer_head *bh;
-
-                /* FIXME: this assumes the blocksize == count, but the calling
-                 *        function will detect this as an error for now */
-                bh = extN_bread(NULL, inode,
-                                *offset >> inode->i_sb->s_blocksize_bits,
-                                0, &rc);
-
-                if (bh) {
-                        memcpy(buf, bh->b_data, inode->i_blksize);
-                        brelse(bh);
-                        rc = inode->i_blksize;
+                const int blkbits = inode->i_sb->s_blocksize_bits;
+                const int blksize = inode->i_sb->s_blocksize;
+
+                CDEBUG(D_EXT2, "reading "LPSZ" at dir %lu+%llu\n",
+                       count, inode->i_ino, *off);
+                while (count > 0) {
+                        struct buffer_head *bh;
+
+                        bh = NULL;
+                        if (*off < inode->i_size) {
+                                int err = 0;
+
+                                bh = extN_bread(NULL, inode, *off >> blkbits,
+                                                0, &err);
+
+                                CDEBUG(D_EXT2, "read %u@%llu\n", blksize, *off);
+
+                                if (bh) {
+                                        memcpy(buf, bh->b_data, blksize);
+                                        brelse(bh);
+                                } else if (err) {
+                                        /* XXX in theory we should just fake
+                                         * this buffer and continue like ext3,
+                                         * especially if this is a partial read
+                                         */
+                                        CERROR("error read dir %lu+%llu: %d\n",
+                                               inode->i_ino, *off, err);
+                                        RETURN(err);
+                                }
+                        }
+                        if (!bh) {
+                                struct extN_dir_entry_2 *fake = (void *)buf;
+
+                                CDEBUG(D_EXT2, "fake %u@%llu\n", blksize, *off);
+                                memset(fake, 0, sizeof(*fake));
+                                fake->rec_len = cpu_to_le32(blksize);
+                        }
+                        count -= blksize;
+                        buf += blksize;
+                        *off += blksize;
+                        rc += blksize;
                 }
         }
 
@@ -390,7 +424,6 @@ static void fsfilt_extN_cb_func(struct journal_callback *jcb, int error)
 static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
                                      void *handle, fsfilt_cb_t cb_func)
 {
-#ifdef HAVE_JOURNAL_CALLBACK_STATUS
         struct fsfilt_cb_data *fcb;
 
         fcb = kmem_cache_alloc(fcb_cache, GFP_NOFS);
@@ -408,17 +441,6 @@ static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
         journal_callback_set(handle, fsfilt_extN_cb_func,
                              (struct journal_callback *)fcb);
         unlock_kernel();
-#else
-#warning "no journal callback kernel patch, faking it..."
-        static long next = 0;
-
-        if (time_after(jiffies, next)) {
-                CERROR("no journal callback kernel patch, faking it...\n");
-                next = jiffies + 300 * HZ;
-        }
-
-        cb_func(obd, last_rcvd, 0);
-#endif
 
         return 0;
 }
@@ -451,6 +473,11 @@ static int fsfilt_extN_statfs(struct super_block *sb, struct obd_statfs *osfs)
         return rc;
 }
 
+static int fsfilt_extN_sync(struct super_block *sb)
+{
+        return extN_force_commit(sb);
+}
+
 static struct fsfilt_operations fsfilt_extN_ops = {
         fs_type:                "extN",
         fs_owner:               THIS_MODULE,
@@ -464,6 +491,7 @@ static struct fsfilt_operations fsfilt_extN_ops = {
         fs_journal_data:        fsfilt_extN_journal_data,
         fs_set_last_rcvd:       fsfilt_extN_set_last_rcvd,
         fs_statfs:              fsfilt_extN_statfs,
+        fs_sync:                fsfilt_extN_sync,
 };
 
 static int __init fsfilt_extN_init(void)
index 1ec5916..f8d4ac3 100644 (file)
@@ -160,6 +160,12 @@ static int fsfilt_reiserfs_statfs(struct super_block *sb, struct obd_statfs *osf
         return rc;
 }
 
+static int fsfilt_reiserfs_sync(struct super_block *sb)
+{
+        CERROR("not implemented yet\n");
+        return -ENOSYS;
+}
+
 static struct fsfilt_operations fsfilt_reiserfs_ops = {
         fs_type:                "reiserfs",
         fs_owner:               THIS_MODULE,
@@ -173,6 +179,7 @@ static struct fsfilt_operations fsfilt_reiserfs_ops = {
         fs_journal_data:        fsfilt_reiserfs_journal_data,
         fs_set_last_rcvd:       fsfilt_reiserfs_set_last_rcvd,
         fs_statfs:              fsfilt_reiserfs_statfs,
+        fs_sync:                fsfilt_reiserfs_sync,
 };
 
 static int __init fsfilt_reiserfs_init(void)
index 994949e..e5be2bc 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -36,6 +36,8 @@ kmem_cache_t *import_cachep = NULL;
 kmem_cache_t *export_cachep = NULL;
 
 int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
+void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp,
+                                        int dying_import);
 
 /*
  * support functions: we could use inter-module communication, but this
@@ -87,7 +89,7 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
                         char *name)
 {
         struct obd_type *type;
-        int rc;
+        int rc = 0;
         ENTRY;
 
         LASSERT(strnlen(name, 1024) < 1024);    /* sanity check */
@@ -111,10 +113,13 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
         strcpy(type->typ_name, name);
         list_add(&type->typ_chain, &obd_types);
 
-        rc = lprocfs_reg_class(type, vars, type);
-        if (rc != 0) {
+        type->typ_procroot = lprocfs_register(type->typ_name, proc_lustre_root,
+                                              vars, type);
+        if (IS_ERR(type->typ_procroot)) {
+                rc = PTR_ERR(type->typ_procroot);
+                type->typ_procroot = NULL;
                 list_del(&type->typ_chain);
-                GOTO(failed, rc);
+                GOTO (failed, rc);
         }
 
         RETURN (0);
@@ -144,8 +149,11 @@ int class_unregister_type(char *name)
                 OBD_FREE(type->typ_ops, sizeof(*type->typ_ops));
                 RETURN(-EBUSY);
         }
-        if(type->typ_procroot)
-                lprocfs_dereg_class(type);
+
+        if (type->typ_procroot) {
+                lprocfs_remove(type->typ_procroot);
+                type->typ_procroot = NULL;
+        }
 
         list_del(&type->typ_chain);
         OBD_FREE(type->typ_name, strlen(name) + 1);
@@ -174,14 +182,14 @@ int class_name2dev(char *name)
         return res;
 }
 
-int class_uuid2dev(char *uuid)
+int class_uuid2dev(struct obd_uuid *uuid)
 {
         int res = -1;
         int i;
 
         for (i = 0; i < MAX_OBD_DEVICES; i++) {
                 struct obd_device *obd = &obd_dev[i];
-                if (strncmp(uuid, obd->obd_uuid, sizeof(obd->obd_uuid)) == 0) {
+                if (strncmp(uuid->uuid, obd->obd_uuid.uuid, sizeof(obd->obd_uuid.uuid)) == 0) {
                         res = i;
                         return res;
                 }
@@ -191,13 +199,13 @@ int class_uuid2dev(char *uuid)
 }
 
 
-struct obd_device *class_uuid2obd(char *uuid)
+struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
 {
         int i;
 
         for (i = 0; i < MAX_OBD_DEVICES; i++) {
                 struct obd_device *obd = &obd_dev[i];
-                if (strncmp(uuid, obd->obd_uuid, sizeof(obd->obd_uuid)) == 0)
+                if (strncmp(uuid->uuid, obd->obd_uuid.uuid, sizeof(obd->obd_uuid.uuid)) == 0)
                         return obd;
         }
 
@@ -353,6 +361,12 @@ void class_destroy_export(struct obd_export *exp)
                 ptlrpc_put_connection_superhack(exp->exp_connection);
         }
 
+        /* Abort any inflight DLM requests and NULL out their (about to be
+         * freed) import. */
+        if (exp->exp_ldlm_data.led_import.imp_obd)
+                ptlrpc_abort_inflight_superhack(&exp->exp_ldlm_data.led_import,
+                                                1);
+
         exp->exp_cookie = DEAD_HANDLE_MAGIC;
         kmem_cache_free(export_cachep, exp);
 
@@ -362,7 +376,7 @@ void class_destroy_export(struct obd_export *exp)
 /* a connection defines an export context in which preallocation can
    be managed. */
 int class_connect(struct lustre_handle *conn, struct obd_device *obd,
-                  obd_uuid_t cluuid)
+                  struct obd_uuid *cluuid)
 {
         struct obd_export * export;
         if (conn == NULL) {
@@ -375,12 +389,18 @@ int class_connect(struct lustre_handle *conn, struct obd_device *obd,
                 return -EINVAL;
         }
 
+        if (cluuid == NULL) {
+                LBUG();
+                return -EINVAL;
+        }
+
         export = class_new_export(obd);
         if (!export)
                 return -ENOMEM;
 
         conn->addr = (__u64) (unsigned long)export;
         conn->cookie = export->exp_cookie;
+        memcpy(&export->exp_client_uuid, cluuid, sizeof(export->exp_client_uuid));
 
         CDEBUG(D_IOCTL, "connect: addr %Lx cookie %Lx\n",
                (long long)conn->addr, (long long)conn->cookie);
@@ -427,7 +447,7 @@ void class_disconnect_all(struct obd_device *obddev)
                         CERROR("force disconnecting %s:%s export %p\n",
                                export->exp_obd->obd_type->typ_name,
                                export->exp_connection ?
-                               (char *)export->exp_connection->c_remote_uuid :
+                               (char *)export->exp_connection->c_remote_uuid.uuid :
                                "<unconnected>", export);
                         rc = obd_disconnect(&conn);
                         if (rc < 0) {
index f096772..d4be2d6 100644 (file)
@@ -1,7 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Hariharan Thantry <thantry@users.sourceforge.net>
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
  *   You should have received a copy of the GNU General Public License
  *   along with Lustre; if not, write to the Free Software
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *   
- *   Author: Hariharan Thantry thantry@users.sourceforge.net
  */
+
 #define EXPORT_SYMTAB
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/version.h>
-#include <linux/proc_fs.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 
 #define DEBUG_SUBSYSTEM S_CLASS
-#include <linux/lustre_lite.h>
+#include <linux/obd_class.h>
 #include <linux/lprocfs_status.h>
 
-#ifdef LPROC_SNMP
-
-#define DEFAULT_MODE 0444
-/*
- * Tokenizer array. Change this array to include special
- * characters for string tokenizing
- */
-const char tok[] = {'/', '\0'};
-
-/*
- * Externs
- */
-extern struct proc_dir_entry proc_root; /* Defined in proc/root.c */
+#ifdef LPROCFS
 
-/*
- * Globals
- */
-struct proc_dir_entry *proc_lustre_root;
-struct proc_dir_entry *proc_lustre_dev_root;
-struct proc_dir_entry *proc_lustre_fs_root;
-
-struct proc_dir_entry* lprocfs_mkdir(const char* dname,
-                                     struct proc_dir_entry *parent)
-{
-        struct proc_dir_entry *child_dir_entry;
-        child_dir_entry = proc_mkdir(dname, parent);
-        if (!child_dir_entry)
-                CERROR("lustre: failed to create /proc entry %s\n", dname);
-        return child_dir_entry;
-}
-
-struct proc_dir_entry* lprocfs_srch(struct proc_dir_entry* head,
-                                    const char* name)
+struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
+                                    const char *name)
 {
         struct proc_dir_entry* temp;
+
         if (!head)
                 return NULL;
+
         temp = head->subdir;
         while (temp != NULL) {
                 if (!strcmp(temp->name, name))
                         return temp;
+
                 temp = temp->next;
         }
         return NULL;
 }
 
-void lprocfs_remove_all(struct proc_dir_entry* root)
+/* lprocfs API calls */
+
+int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
+                     void *data)
+{
+        if ((root == NULL) || (list == NULL))
+                return -EINVAL;
+
+        while (list->name) {
+                struct proc_dir_entry *cur_root, *proc;
+                char *pathcopy, *cur, *next;
+                int pathsize = strlen(list->name)+1;
+
+                proc = NULL;
+                cur_root = root;
+
+                /* need copy of path for strsep */
+                OBD_ALLOC(pathcopy, pathsize);
+                if (!pathcopy)
+                        return -ENOMEM;
+
+                next = pathcopy;
+                strcpy(pathcopy, list->name);
+
+                while (cur_root && (cur = strsep(&next, "/"))) {
+                        if (*cur =='\0') /* skip double/trailing "/" */
+                                continue;
+
+                        proc = lprocfs_srch(cur_root, cur);
+                        CDEBUG(D_OTHER, "cur_root=%s, cur=%s, next=%s, (%s)\n",
+                               cur_root->name, cur, next,
+                               (proc ? "exists" : "new"));
+                        if (next)
+                                cur_root = (proc ? proc :
+                                                   proc_mkdir(cur, cur_root));
+                        else if (!proc)
+                                proc = create_proc_entry(cur, 0444, cur_root);
+                }
+
+                OBD_FREE(pathcopy, pathsize);
+
+                if ((cur_root==NULL) || (proc==NULL)) {
+                        CERROR("LprocFS: No memory to create /proc entry %s",
+                               list->name);
+                        return -ENOMEM;
+                }
+
+                proc->read_proc = list->read_fptr;
+                proc->write_proc = list->write_fptr;
+                proc->data = (list->data ? list->data : data);
+                list++;
+        }
+        return 0;
+}
+
+void lprocfs_remove(struct proc_dir_entry* root)
 {
         struct proc_dir_entry *temp = root;
         struct proc_dir_entry *rm_entry;
@@ -96,235 +124,179 @@ void lprocfs_remove_all(struct proc_dir_entry* root)
         }
 }
 
-#define MAX_STRING_SIZE 100
-struct proc_dir_entry* lprocfs_new_dir(struct proc_dir_entry* root,
-                                       const char* string, const char* tok)
+struct proc_dir_entry *lprocfs_register(const char *name,
+                                        struct proc_dir_entry *parent,
+                                        struct lprocfs_vars *list, void *data)
 {
-        struct proc_dir_entry* new_root;
-        struct proc_dir_entry* temp_entry;
-        char temp_string[MAX_STRING_SIZE+1];
-        char* my_str;
-        char* mover_str;
-
-        strncpy(temp_string, string, MAX_STRING_SIZE);
-        temp_string[MAX_STRING_SIZE] = '\0';
-
-        new_root = root;
-        mover_str = temp_string;
-        while ((my_str = strsep(&mover_str, tok))) {
-                if (!*my_str)
-                        continue;
-                CDEBUG(D_OTHER, "SEARCH= %s\t, ROOT=%s\n", my_str,
-                       new_root->name);
-                temp_entry = lprocfs_srch(new_root, my_str);
-                if (temp_entry == NULL) {
-                        CDEBUG(D_OTHER, "Adding: %s\n", my_str);
-                        temp_entry = lprocfs_mkdir(my_str, new_root);
-                        if (temp_entry == NULL) {
-                                CDEBUG(D_OTHER,
-                                       "! Did not create new dir %s !!\n",
-                                       my_str);
-                                return temp_entry;
-                        }
+        struct proc_dir_entry *newchild;
+
+        newchild = lprocfs_srch(parent, name);
+        if (newchild) {
+                CERROR(" Lproc: Attempting to register %s more than once \n",
+                                name);
+                return NULL;
+        }
+
+        newchild = proc_mkdir(name, parent);
+        if (newchild && list) {
+                int rc = lprocfs_add_vars(newchild, list, data);
+                if (rc) {
+                        lprocfs_remove(newchild);
+                        return ERR_PTR(rc);
                 }
-                new_root = temp_entry;
         }
-        return new_root;
+        return newchild;
 }
 
-int lprocfs_new_vars(struct proc_dir_entry* root, struct lprocfs_vars* list,
-                     const char* tok, void* data)
-{
-        struct proc_dir_entry *temp_root;
-        struct proc_dir_entry *new_leaf;
-        struct proc_dir_entry *new_parent;
-        char temp_string[MAX_STRING_SIZE+1];
-
-        if (list == NULL)
-                return 0;
+/* Generic callbacks */
 
-        while (list->name) {
-                temp_root = lprocfs_new_dir(root, list->name, tok);
-                if (temp_root == NULL) {
-                        CDEBUG(D_OTHER, "!LProcFS: Mods: No root!");
-                        return -ENOMEM;
-                }
+int lprocfs_rd_u64(char *page, char **start, off_t off,
+                   int count, int *eof, void *data)
+{
+        *eof = 1;
+        return snprintf(page, count, LPU64"\n", *(__u64 *)data);
+}
 
-                /* Convert the last element into a leaf-node */
-                strncpy(temp_string, temp_root->name, MAX_STRING_SIZE);
-                temp_string[MAX_STRING_SIZE] = '\0';
-                new_parent = temp_root->parent;
-                remove_proc_entry(temp_root->name, new_parent);
-                new_leaf = create_proc_entry(temp_string, DEFAULT_MODE,
-                                             new_parent);
-                if (new_leaf == NULL) {
-                        CERROR("LprocFS: No memory to create /proc entry %s",
-                                temp_string);
-                        return -ENOMEM;
-                }
-                new_leaf->read_proc = list->read_fptr;
-                new_leaf->write_proc = list->write_fptr;
-                if (data)
-                        new_leaf->data=data;
-                else
-                        new_leaf->data=list->data;
-                list++;
-        }
-        return 0;
+int lprocfs_rd_uuid(char* page, char **start, off_t off, int count,
+                    int *eof, void *data)
+{
+        struct obd_device* dev = (struct obd_device*)data;
 
+        *eof = 1;
+        return snprintf(page, count, "%s\n", dev->obd_uuid.uuid);
 }
-#undef MAX_STRING_SIZE
-/*
- *  API implementations
- */
-int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *var,
-                     void *data)
+
+int lprocfs_rd_name(char *page, char **start, off_t off, int count,
+                    int *eof, void *data)
 {
-        return lprocfs_new_vars(root, var, tok, data);
+        struct obd_device* dev = (struct obd_device *)data;
+
+        *eof = 1;
+        return snprintf(page, count, "%s\n", dev->obd_name);
 }
 
-int lprocfs_reg_obd(struct obd_device *device, struct lprocfs_vars *list,
-                    void *data)
+int lprocfs_rd_blksize(char* page, char **start, off_t off, int count,
+                       int *eof, struct statfs *sfs)
 {
-        struct proc_dir_entry* this_dev_root;
-        int retval;
+        *eof = 1;
 
-        if (lprocfs_srch(device->obd_type->typ_procroot, device->obd_name)) {
-                CDEBUG(D_OTHER, "Device with name [%s] exists!",
-                                device->obd_name);
-                return 0;
-        }
+        return snprintf(page, count, "%lu\n", sfs->f_bsize);
+}
 
-        /* Obtain this device root */
-        this_dev_root = lprocfs_mkdir(device->obd_name,
-                                      device->obd_type->typ_procroot);
+int lprocfs_rd_kbytestotal(char* page, char **start, off_t off, int count,
+                           int *eof, struct statfs *sfs)
+{
+        __u32 blk_size = sfs->f_bsize >> 10;
+        __u64 result = sfs->f_blocks;
 
-        device->obd_proc_entry = this_dev_root;
-        retval = lprocfs_add_vars(this_dev_root, list, data);
+        while (blk_size >>= 1)
+                result <<= 1;
 
-        return retval;
+        *eof = 1;
+        return snprintf(page, count, LPU64"\n", result);
 }
 
-int lprocfs_dereg_obd(struct obd_device* device)
+int lprocfs_rd_kbytesfree(char* page, char **start, off_t off, int count,
+                          int *eof, struct statfs *sfs)
 {
-        CDEBUG(D_OTHER, "LPROCFS removing device = %s\n", device->obd_name);
+        __u32 blk_size = sfs->f_bsize >> 10;
+        __u64 result = sfs->f_bfree;
 
-        if (device == NULL) {
-                CDEBUG(D_OTHER, "! LProcfs:  Null pointer !\n");
-                return 0;
-        }
-        if (device->obd_proc_entry == NULL) {
-                CDEBUG(D_OTHER, "! Proc entry non-existent !");
-                return 0;
-        }
-        lprocfs_remove_all(device->obd_proc_entry);
-        device->obd_proc_entry = NULL;
-        if (device->counters)
-                OBD_FREE(device->counters, device->cntr_mem_size);
+        while (blk_size >>= 1)
+                result <<= 1;
 
-        return 0;
+        *eof = 1;
+        return snprintf(page, count, LPU64"\n", result);
 }
 
-struct proc_dir_entry* lprocfs_reg_mnt(char* mnt_name)
+int lprocfs_rd_filestotal(char* page, char **start, off_t off, int count,
+                          int *eof, struct statfs *sfs)
 {
-        if (lprocfs_srch(proc_lustre_fs_root, mnt_name)) {
-                CDEBUG(D_OTHER, "Mount with same name exists!");
-                return 0;
-        }
-        return lprocfs_mkdir(mnt_name, proc_lustre_fs_root);
+        *eof = 1;
+        return snprintf(page, count, "%ld\n", sfs->f_files);
 }
 
-int lprocfs_dereg_mnt(struct proc_dir_entry* root)
+int lprocfs_rd_filesfree(char* page, char **start, off_t off, int count,
+                         int *eof, struct statfs *sfs)
 {
-        if (root == NULL) {
-                CDEBUG(D_OTHER, "Non-existent root!");
-                return 0;
-        }
-        lprocfs_remove_all(root);
-        return 0;
+        *eof = 1;
+        return snprintf(page, count, "%ld\n", sfs->f_ffree);
 }
 
-int lprocfs_reg_class(struct obd_type* type, struct lprocfs_vars* list,
-                      void* data)
+int lprocfs_rd_filegroups(char* page, char **start, off_t off, int count,
+                          int *eof, struct statfs *sfs)
 {
-        struct proc_dir_entry* root;
-        int retval;
-        root = lprocfs_mkdir(type->typ_name, proc_lustre_dev_root);
-        lprocfs_add_vars(root, list, data);
-        type->typ_procroot = root;
-        retval = lprocfs_add_vars(root, list, data);
-        return retval;
+        *eof = 1;
+        return snprintf(page, count, "unimplemented\n");
 }
 
-int lprocfs_dereg_class(struct obd_type* class)
+int lprocfs_rd_server_uuid(char* page, char **start, off_t off, int count,
+                           int *eof, void *data)
 {
-        if (class == NULL) {
-                CDEBUG(D_OTHER, "Non-existent class");
-                return 0;
-        }
-        lprocfs_remove_all(class->typ_procroot);
-        class->typ_procroot = NULL;
-        CDEBUG(D_OTHER, "LPROCFS removed = %s\n", class->typ_name);
-        return 0;
-
+        struct obd_device* obd = (struct obd_device*)data;
+        struct client_obd* cli = &obd->u.cli;
+        return snprintf(page, count, "%s\n", cli->cl_target_uuid.uuid);
 }
 
-int lprocfs_reg_main()
+int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count,
+                         int *eof,  void *data)
 {
-        proc_lustre_root = lprocfs_mkdir("lustre", &proc_root);
-        if (proc_lustre_root == NULL) {
-                CERROR(" !! Cannot create /proc/lustre !! \n");
-                return -EINVAL;
-        }
+        struct obd_device *obd = (struct obd_device*)data;
+        struct ptlrpc_connection *conn = obd->u.cli.cl_import.imp_connection;
 
-        proc_lustre_dev_root = lprocfs_mkdir("devices", proc_lustre_root);
-        if (proc_lustre_dev_root == NULL) {
-                CERROR(" !! Cannot create /proc/lustre/devices !! \n");
-                return -EINVAL;
-        }
-        proc_lustre_fs_root = lprocfs_mkdir("mnt_pnt", proc_lustre_root);
+        *eof = 1;
+        return snprintf(page, count, "%s\n", conn->c_remote_uuid.uuid);
+}
 
-        if (proc_lustre_fs_root == NULL) {
-                CERROR(" !! Cannot create /proc/lustre/mnt_pnt !! \n");
-                return -EINVAL;
-        }
+int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count,
+                       int *eof, void *data)
+{
+        struct obd_type* class = (struct obd_type*) data;
 
-        return 0;
+        *eof = 1;
+        return snprintf(page, count, "%d\n", class->typ_refcnt);
 }
 
-int lprocfs_dereg_main()
+int lprocfs_obd_attach(struct obd_device *dev, struct lprocfs_vars *list)
 {
-        lprocfs_remove_all(proc_lustre_root);
-        proc_lustre_root = NULL;
-        proc_lustre_dev_root = NULL;
-        proc_lustre_fs_root = NULL;
-        return 0;
+        int rc = 0;
+        dev->obd_proc_entry = lprocfs_register(dev->obd_name,
+                                               dev->obd_type->typ_procroot,
+                                               list, dev);
+        if (IS_ERR(dev->obd_proc_entry)) {
+                rc = PTR_ERR(dev->obd_proc_entry);
+               dev->obd_proc_entry = NULL;
+        }
+        return rc;
 }
 
-
-/*
- * Needs to go...
- */
-int lprocfs_ll_rd(char *page, char **start, off_t off,
-                  int count, int *eof, void *data)
+int lprocfs_obd_detach(struct obd_device *dev)
 {
-        __u64 *temp = (__u64 *)data;
-        int len;
-        len = snprintf(page, count, LPU64"\n", *temp);
-        return len;
+        if (dev && dev->obd_proc_entry) {
+                lprocfs_remove(dev->obd_proc_entry);
+                dev->obd_proc_entry = NULL;
+        }
+        return 0;
 }
 
-#endif /* LPROC_SNMP */
+#endif /* LPROCFS*/
 
-EXPORT_SYMBOL(lprocfs_reg_obd);
-EXPORT_SYMBOL(lprocfs_dereg_obd);
-EXPORT_SYMBOL(lprocfs_reg_main);
-EXPORT_SYMBOL(lprocfs_dereg_main);
-EXPORT_SYMBOL(lprocfs_reg_mnt);
-EXPORT_SYMBOL(lprocfs_dereg_mnt);
+EXPORT_SYMBOL(lprocfs_register);
+EXPORT_SYMBOL(lprocfs_remove);
 EXPORT_SYMBOL(lprocfs_add_vars);
-EXPORT_SYMBOL(lprocfs_reg_class);
-EXPORT_SYMBOL(lprocfs_dereg_class);
-EXPORT_SYMBOL(lprocfs_ll_rd);
-
-
+EXPORT_SYMBOL(lprocfs_obd_attach);
+EXPORT_SYMBOL(lprocfs_obd_detach);
+
+EXPORT_SYMBOL(lprocfs_rd_u64);
+EXPORT_SYMBOL(lprocfs_rd_uuid);
+EXPORT_SYMBOL(lprocfs_rd_name);
+EXPORT_SYMBOL(lprocfs_rd_server_uuid);
+EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
+EXPORT_SYMBOL(lprocfs_rd_numrefs);
+
+EXPORT_SYMBOL(lprocfs_rd_blksize);
+EXPORT_SYMBOL(lprocfs_rd_kbytestotal);
+EXPORT_SYMBOL(lprocfs_rd_kbytesfree);
+EXPORT_SYMBOL(lprocfs_rd_filestotal);
+EXPORT_SYMBOL(lprocfs_rd_filesfree);
+EXPORT_SYMBOL(lprocfs_rd_filegroups);
index 876d41c..4efffa5 100644 (file)
@@ -1,7 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc. <adilger@clusterfs.com>
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -27,6 +28,7 @@
 #define EXPORT_SYMTAB
 #include <linux/lustre_net.h>
 #include <linux/obd_support.h>
+#include <linux/obd_class.h>
 
 void obd_statfs_pack(struct obd_statfs *tgt, struct obd_statfs *src)
 {
@@ -69,7 +71,35 @@ void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs)
         sfs->f_namelen = osfs->os_namelen;
 }
 
+int obd_self_statfs(struct obd_device *obd, struct statfs *sfs)
+{
+        struct lustre_handle conn;
+        struct obd_export *export, *my_export = NULL;
+        struct obd_statfs osfs = { 0 };
+        int rc;
+        ENTRY;
+
+        if (list_empty(&obd->obd_exports)) {
+                export = my_export = class_new_export(obd);
+                if (export == NULL)
+                        RETURN(-ENOMEM);
+        } else
+                export = list_entry(obd->obd_exports.next, typeof(*export),
+                                    exp_obd_chain);
+        conn.addr = (unsigned long)export;
+        conn.cookie = export->exp_cookie;
+
+        rc = obd_statfs(&conn, &osfs);
+        if (!rc)
+                statfs_unpack(sfs, &osfs);
+
+        if (my_export)
+                class_destroy_export(my_export);
+        RETURN(rc);
+}
+
 EXPORT_SYMBOL(obd_statfs_pack);
 EXPORT_SYMBOL(obd_statfs_unpack);
 EXPORT_SYMBOL(statfs_pack);
 EXPORT_SYMBOL(statfs_unpack);
+EXPORT_SYMBOL(obd_self_statfs);
index 8e74aab..d1388d6 100644 (file)
@@ -60,6 +60,8 @@ static int obd_sctl_reset( ctl_table * table, int write, struct file
 #define OBD_TIMEOUT         6       /* RPC timeout before recovery/intr */
 /* XXX move to /proc/sys/lustre/recovery? */
 #define OBD_UPCALL          7       /* path to recovery upcall */
+/* XXX temporary, as we play with sync osts.. */
+#define OBD_SYNCFILTER      8
 
 #define OBD_VARS_SLOT       2
 
@@ -72,6 +74,8 @@ static ctl_table obd_table[] = {
         /* XXX need to lock so we avoid update races with the recovery upcall! */
         {OBD_UPCALL, "recovery_upcall", obd_recovery_upcall, 128, 0644, NULL,
          &proc_dostring, &sysctl_string },
+        {OBD_SYNCFILTER, "filter_sync_on_commit", &obd_sync_filter, sizeof(int),
+                0644, NULL, &proc_dointvec},
        { 0 }
 };
 
index 7048baa..0e279fb 100644 (file)
@@ -86,7 +86,7 @@ static void uuid_pack(struct uuid *uu, class_uuid_t ptr)
        memcpy(out+10, uu->node, 6);
 }
 
-int class_uuid_parse(obd_uuid_t in, class_uuid_t uu)
+int class_uuid_parse(struct obd_uuid in, class_uuid_t uu)
 {
        struct uuid uuid;
        int i;
@@ -122,12 +122,12 @@ int class_uuid_parse(obd_uuid_t in, class_uuid_t uu)
 }
 #endif
 
-void class_uuid_unparse(class_uuid_t uu, obd_uuid_t out)
+void class_uuid_unparse(class_uuid_t uu, struct obd_uuid *out)
 {
        struct uuid uuid;
 
        uuid_unpack(uu, &uuid);
-       sprintf(out,
+       sprintf(out->uuid,
                "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
                uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
                uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
index 8339327..281166e 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
  *   Author: Peter Braam <braam@clusterfs.com>
  *   Author: Andreas Dilger <adilger@clusterfs.com>
  *
@@ -21,8 +21,6 @@
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#define OBDECHO_VERSION "1.0"
-
 #define EXPORT_SYMTAB
 
 #include <linux/version.h>
 #include <linux/lustre_dlm.h>
 #include <linux/lprocfs_status.h>
 
-static atomic_t echo_page_rws;
-static atomic_t echo_getattrs;
-
-#define ECHO_PROC_STAT "sys/obdecho"
-#define ECHO_INIT_OBJID 0x1000000000000000ULL
+#define ECHO_INIT_OBJID      0x1000000000000000ULL
+#define ECHO_HANDLE_MAGIC    0xabcd0123fedc9876ULL
+
+#define ECHO_OBJECT0_NPAGES  16
+static struct page *echo_object0_pages[ECHO_OBJECT0_NPAGES];
+
+/* should be generic per-obd stats... */
+struct xprocfs_io_stat {
+        __u64    st_read_bytes;
+        __u64    st_read_reqs;
+        __u64    st_write_bytes;
+        __u64    st_write_reqs;
+        __u64    st_getattr_reqs;
+        __u64    st_setattr_reqs;
+        __u64    st_create_reqs;
+        __u64    st_destroy_reqs;
+        __u64    st_statfs_reqs;
+        __u64    st_open_reqs;
+        __u64    st_close_reqs;
+        __u64    st_punch_reqs;
+};
 
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
+static struct xprocfs_io_stat xprocfs_iostats[NR_CPUS];
+static struct proc_dir_entry *xprocfs_dir;
+
+#define XPROCFS_BUMP_MYCPU_IOSTAT(field, count)                 \
+do {                                                            \
+        xprocfs_iostats[smp_processor_id()].field += (count);   \
+} while (0)
+
+#define DECLARE_XPROCFS_SUM_STAT(field)                 \
+static long long                                        \
+xprocfs_sum_##field (void)                              \
+{                                                       \
+        long long stat = 0;                             \
+        int       i;                                    \
+                                                        \
+        for (i = 0; i < smp_num_cpus; i++)              \
+                stat += xprocfs_iostats[i].field;       \
+        return (stat);                                  \
+}
 
-int echo_proc_read(char *page, char **start, off_t off, int count, int *eof,
-                   void *data)
+DECLARE_XPROCFS_SUM_STAT (st_read_bytes)
+DECLARE_XPROCFS_SUM_STAT (st_read_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_write_bytes)
+DECLARE_XPROCFS_SUM_STAT (st_write_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_getattr_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_setattr_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_create_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_destroy_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_statfs_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_open_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_close_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_punch_reqs)
+
+static int
+xprocfs_rd_stat (char *page, char **start, off_t off, int count,
+                 int  *eof, void *data)
 {
-        long long attrs = atomic_read(&echo_getattrs);
-        long long pages = atomic_read(&echo_page_rws);
-        int len;
-
+        long long (*fn)(void) = (long long(*)(void))data;
+        int         len;
+        
         *eof = 1;
         if (off != 0)
                 return (0);
 
-        len = sprintf(page, "%Ld %Ld\n", attrs, pages);
-
+        len = snprintf (page, count, "%Ld\n", fn());
         *start = page;
         return (len);
 }
+        
 
-int echo_proc_write(struct file *file, const char *ubuffer,
-                    unsigned long count, void *data)
+static void
+xprocfs_add_stat(char *name, long long (*fn)(void))
 {
-        /* Ignore what we've been asked to write, and just zero the counters */
-        atomic_set (&echo_page_rws, 0);
-        atomic_set (&echo_getattrs, 0);
+        struct proc_dir_entry *entry;
+
+        entry = create_proc_entry (name, S_IFREG|S_IRUGO, xprocfs_dir);
+        if (entry == NULL) {
+                CERROR ("Can't add procfs stat %s\n", name);
+                return;
+        }
 
-        return (count);
+        entry->data = fn;
+        entry->read_proc = xprocfs_rd_stat;
+        entry->write_proc = NULL;
 }
 
-void echo_proc_init(void)
+static void
+xprocfs_init (char *name)
 {
-        struct proc_dir_entry *entry;
+        char  dirname[64];
+        
+        snprintf (dirname, sizeof (dirname), "sys/%s", name);
 
-        entry = create_proc_entry(ECHO_PROC_STAT, S_IFREG|S_IRUGO|S_IWUSR,NULL);
-
-        if (entry == NULL) {
-                CERROR("couldn't create proc entry %s\n", ECHO_PROC_STAT);
+        xprocfs_dir = proc_mkdir (dirname, NULL);
+        if (xprocfs_dir == NULL) {
+                CERROR ("Can't make dir\n");
                 return;
         }
 
-        entry->data = NULL;
-        entry->read_proc = echo_proc_read;
-        entry->write_proc = echo_proc_write;
+        xprocfs_add_stat ("read_bytes",   xprocfs_sum_st_read_bytes);
+        xprocfs_add_stat ("read_reqs",    xprocfs_sum_st_read_reqs);
+        xprocfs_add_stat ("write_bytes",  xprocfs_sum_st_write_bytes);
+        xprocfs_add_stat ("write_reqs",   xprocfs_sum_st_write_reqs);
+        xprocfs_add_stat ("getattr_reqs", xprocfs_sum_st_getattr_reqs);
+        xprocfs_add_stat ("setattr_reqs", xprocfs_sum_st_setattr_reqs);
+        xprocfs_add_stat ("create_reqs",  xprocfs_sum_st_create_reqs);
+        xprocfs_add_stat ("destroy_reqs", xprocfs_sum_st_destroy_reqs);
+        xprocfs_add_stat ("statfs_reqs",  xprocfs_sum_st_statfs_reqs);
+        xprocfs_add_stat ("open_reqs",    xprocfs_sum_st_open_reqs);
+        xprocfs_add_stat ("close_reqs",   xprocfs_sum_st_close_reqs);
+        xprocfs_add_stat ("punch_reqs",   xprocfs_sum_st_punch_reqs);
 }
 
-void echo_proc_fini(void)
+void xprocfs_fini (void)
 {
-        remove_proc_entry(ECHO_PROC_STAT, 0);
+        if (xprocfs_dir == NULL)
+                return;
+
+        remove_proc_entry ("read_bytes",   xprocfs_dir);
+        remove_proc_entry ("read_reqs",    xprocfs_dir);
+        remove_proc_entry ("write_bytes",  xprocfs_dir);
+        remove_proc_entry ("write_reqs",   xprocfs_dir);
+        remove_proc_entry ("getattr_reqs", xprocfs_dir);
+        remove_proc_entry ("setattr_reqs", xprocfs_dir);
+        remove_proc_entry ("create_reqs",  xprocfs_dir);
+        remove_proc_entry ("destroy_reqs", xprocfs_dir);
+        remove_proc_entry ("statfs_reqs",  xprocfs_dir);
+        remove_proc_entry ("open_reqs",    xprocfs_dir);
+        remove_proc_entry ("close_reqs",   xprocfs_dir);
+        remove_proc_entry ("punch_reqs",   xprocfs_dir);
+
+        remove_proc_entry (xprocfs_dir->name, xprocfs_dir->parent);
+        xprocfs_dir = NULL;
 }
 
 static int echo_connect(struct lustre_handle *conn, struct obd_device *obd,
-                        obd_uuid_t cluuid, struct recovd_obd *recovd,
+                        struct obd_uuid *cluuid, struct recovd_obd *recovd,
                         ptlrpc_recovery_cb_t recover)
 {
         return class_connect(conn, obd, cluuid);
 }
 
+static int echo_disconnect(struct lustre_handle *conn)
+{
+        struct obd_export *exp = class_conn2export(conn);
+        
+        LASSERT (exp != NULL);
+        
+        ldlm_cancel_locks_for_export (exp);
+        return (class_disconnect (conn));
+}
+
 static __u64 echo_next_id(struct obd_device *obddev)
 {
         obd_id id;
@@ -124,17 +212,19 @@ static __u64 echo_next_id(struct obd_device *obddev)
 }
 
 int echo_create(struct lustre_handle *conn, struct obdo *oa,
-                struct lov_stripe_md **ea)
+                struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
         struct obd_device *obd = class_conn2obd(conn);
 
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_create_reqs, 1);
+
         if (!obd) {
                 CERROR("invalid client "LPX64"\n", conn->addr);
                 return -EINVAL;
         }
 
         if (!(oa->o_mode && S_IFMT)) {
-                CERROR("filter obd: no type!\n");
+                CERROR("echo obd: no type!\n");
                 return -ENOENT;
         }
 
@@ -151,10 +241,12 @@ int echo_create(struct lustre_handle *conn, struct obdo *oa,
 }
 
 int echo_destroy(struct lustre_handle *conn, struct obdo *oa,
-                 struct lov_stripe_md *ea)
+                 struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
         struct obd_device *obd = class_conn2obd(conn);
 
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_destroy_reqs, 1);
+
         if (!obd) {
                 CERROR("invalid client "LPX64"\n", conn->addr);
                 RETURN(-EINVAL);
@@ -176,14 +268,53 @@ int echo_destroy(struct lustre_handle *conn, struct obdo *oa,
 }
 
 static int echo_open(struct lustre_handle *conn, struct obdo *oa,
-                     struct lov_stripe_md *md)
+                     struct lov_stripe_md *md, struct obd_trans_info *oti)
 {
+        struct lustre_handle *fh = obdo_handle (oa);
+        struct obd_device    *obd = class_conn2obd (conn);
+
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_open_reqs, 1);
+
+        if (!obd) {
+                CERROR ("invalid client "LPX64"\n", conn->addr);
+                return (-EINVAL);
+        }
+
+        if (!(oa->o_valid & OBD_MD_FLID)) {
+                CERROR ("obdo missing FLID valid flag: %08x\n", oa->o_valid);
+                return (-EINVAL);
+        }
+
+        fh->addr = oa->o_id;
+        fh->cookie = ECHO_HANDLE_MAGIC;
+        
+        oa->o_valid |= OBD_MD_FLHANDLE;
         return 0;
 }
 
 static int echo_close(struct lustre_handle *conn, struct obdo *oa,
-                      struct lov_stripe_md *md)
+                      struct lov_stripe_md *md, struct obd_trans_info *oti)
 {
+        struct lustre_handle *fh = obdo_handle (oa);
+        struct obd_device    *obd = class_conn2obd(conn);
+
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_close_reqs, 1);
+
+        if (!obd) {
+                CERROR("invalid client "LPX64"\n", conn->addr);
+                return (-EINVAL);
+        }
+
+        if (!(oa->o_valid & OBD_MD_FLHANDLE)) {
+                CERROR("obdo missing FLHANDLE valid flag: %08x\n", oa->o_valid);
+                return (-EINVAL);
+        }
+
+        if (fh->cookie != ECHO_HANDLE_MAGIC) {
+                CERROR ("invalid file handle on close: "LPX64"\n", fh->cookie);
+                return (-EINVAL);
+        }
+        
         return 0;
 }
 
@@ -193,6 +324,8 @@ static int echo_getattr(struct lustre_handle *conn, struct obdo *oa,
         struct obd_device *obd = class_conn2obd(conn);
         obd_id id = oa->o_id;
 
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_getattr_reqs, 1);
+        
         if (!obd) {
                 CERROR("invalid client "LPX64"\n", conn->addr);
                 RETURN(-EINVAL);
@@ -203,20 +336,19 @@ static int echo_getattr(struct lustre_handle *conn, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
-        memcpy(oa, &obd->u.echo.oa, sizeof(*oa));
+        obdo_cpy_md(oa, &obd->u.echo.oa, oa->o_valid);
         oa->o_id = id;
-        oa->o_valid |= OBD_MD_FLID;
-
-        atomic_inc(&echo_getattrs);
 
         return 0;
 }
 
 static int echo_setattr(struct lustre_handle *conn, struct obdo *oa,
-                        struct lov_stripe_md *md)
+                        struct lov_stripe_md *md, struct obd_trans_info *oti)
 {
         struct obd_device *obd = class_conn2obd(conn);
 
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_setattr_reqs, 1);
+        
         if (!obd) {
                 CERROR("invalid client "LPX64"\n", conn->addr);
                 RETURN(-EINVAL);
@@ -239,15 +371,19 @@ static int echo_setattr(struct lustre_handle *conn, struct obdo *oa,
 
 int echo_preprw(int cmd, struct lustre_handle *conn, int objcount,
                 struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb,
-                struct niobuf_local *res, void **desc_private)
+                struct niobuf_local *res, void **desc_private, struct obd_trans_info *oti)
 {
         struct obd_device *obd;
         struct niobuf_local *r = res;
         int rc = 0;
         int i;
-
         ENTRY;
 
+        if ((cmd & OBD_BRW_WRITE) != 0)
+                XPROCFS_BUMP_MYCPU_IOSTAT (st_write_reqs, 1);
+        else
+                XPROCFS_BUMP_MYCPU_IOSTAT (st_read_reqs, 1);
+
         obd = class_conn2obd(conn);
         if (!obd) {
                 CERROR("invalid client "LPX64"\n", conn->addr);
@@ -265,16 +401,26 @@ int echo_preprw(int cmd, struct lustre_handle *conn, int objcount,
 
         for (i = 0; i < objcount; i++, obj++) {
                 int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL;
-                int verify = obj->ioo_id != 0;
+                int isobj0 = obj->ioo_id == 0;
+                int verify = !isobj0;
                 int j;
 
                 for (j = 0 ; j < obj->ioo_bufcnt ; j++, nb++, r++) {
-                        r->page = alloc_pages(gfp_mask, 0);
-                        if (!r->page) {
-                                CERROR("can't get page %d/%d for id "LPU64"\n",
-                                       j, obj->ioo_bufcnt, obj->ioo_id);
-                                GOTO(preprw_cleanup, rc = -ENOMEM);
+
+                        if (isobj0 &&
+                            (nb->offset >> PAGE_SHIFT) < ECHO_OBJECT0_NPAGES) {
+                                r->page = echo_object0_pages[nb->offset >> PAGE_SHIFT];
+                                /* Take extra ref so __free_pages() can be called OK */
+                                get_page (r->page);
+                        } else {
+                                r->page = alloc_pages(gfp_mask, 0);
+                                if (r->page == NULL) {
+                                        CERROR("can't get page %d/%d for id "LPU64"\n",
+                                               j, obj->ioo_bufcnt, obj->ioo_id);
+                                        GOTO(preprw_cleanup, rc = -ENOMEM);
+                                }
                         }
+
                         atomic_inc(&obd->u.echo.eo_prep);
 
                         r->offset = nb->offset;
@@ -284,13 +430,18 @@ int echo_preprw(int cmd, struct lustre_handle *conn, int objcount,
                         CDEBUG(D_PAGE, "$$$$ get page %p, addr %p@"LPU64"\n",
                                r->page, r->addr, r->offset);
 
-                        if (verify && cmd == OBD_BRW_READ)
-                                page_debug_setup(r->addr, r->len, r->offset,
-                                                 obj->ioo_id);
-                        else if (verify)
-                                page_debug_setup(r->addr, r->len,
-                                                 0xecc0ecc0ecc0ecc0,
-                                                 0xecc0ecc0ecc0ecc0);
+                        if (cmd == OBD_BRW_READ) {
+                                XPROCFS_BUMP_MYCPU_IOSTAT (st_read_bytes, r->len);
+                                if (verify)
+                                        page_debug_setup(r->addr, r->len, r->offset,
+                                                         obj->ioo_id);
+                        } else {
+                                XPROCFS_BUMP_MYCPU_IOSTAT (st_write_bytes, r->len);
+                                if (verify)
+                                        page_debug_setup(r->addr, r->len,
+                                                         0xecc0ecc0ecc0ecc0,
+                                                         0xecc0ecc0ecc0ecc0);
+                        }
                 }
         }
         CDEBUG(D_PAGE, "%d pages allocated after prep\n",
@@ -307,6 +458,8 @@ preprw_cleanup:
         CERROR("cleaning up %ld pages (%d obdos)\n", (long)(r - res), objcount);
         while (r-- > res) {
                 kunmap(r->page);
+                /* NB if this is an 'object0' page, __free_pages will just
+                 * lose the extra ref gained above */
                 __free_pages(r->page, 0);
                 atomic_dec(&obd->u.echo.eo_prep);
         }
@@ -318,11 +471,12 @@ preprw_cleanup:
 
 int echo_commitrw(int cmd, struct lustre_handle *conn, int objcount,
                   struct obd_ioobj *obj, int niocount, struct niobuf_local *res,
-                  void *desc_private)
+                  void *desc_private, struct obd_trans_info *oti)
 {
         struct obd_device *obd;
         struct niobuf_local *r = res;
         int rc = 0;
+        int vrc = 0;
         int i;
         ENTRY;
 
@@ -363,16 +517,19 @@ int echo_commitrw(int cmd, struct lustre_handle *conn, int objcount,
                                 GOTO(commitrw_cleanup, rc = -EFAULT);
                         }
 
-                        atomic_inc(&echo_page_rws);
-
                         CDEBUG(D_PAGE, "$$$$ use page %p, addr %p@"LPU64"\n",
                                r->page, addr, r->offset);
 
-                        if (verify)
-                                page_debug_check("echo", addr, r->len,
-                                                 r->offset, obj->ioo_id);
-
+                        if (verify) {
+                                vrc = page_debug_check("echo", addr, r->len,
+                                                       r->offset, obj->ioo_id);
+                                /* check all the pages always */
+                                if (vrc != 0 && rc == 0)
+                                        rc = vrc;
+                        }
+                        
                         kunmap(page);
+                        /* NB see comment above regarding object0 pages */
                         obd_kmap_put(1);
                         __free_pages(page, 0);
                         atomic_dec(&obd->u.echo.eo_prep);
@@ -380,7 +537,7 @@ int echo_commitrw(int cmd, struct lustre_handle *conn, int objcount,
         }
         CDEBUG(D_PAGE, "%d pages remain after commit\n",
                atomic_read(&obd->u.echo.eo_prep));
-        RETURN(0);
+        RETURN(rc);
 
 commitrw_cleanup:
         CERROR("cleaning up %ld pages (%d obdos)\n",
@@ -390,6 +547,7 @@ commitrw_cleanup:
 
                 kunmap(page);
                 obd_kmap_put(1);
+                /* NB see comment above regarding object0 pages */
                 __free_pages(page, 0);
                 atomic_dec(&obd->u.echo.eo_prep);
         }
@@ -400,15 +558,18 @@ static int echo_setup(struct obd_device *obddev, obd_count len, void *buf)
 {
         ENTRY;
 
+        spin_lock_init(&obddev->u.echo.eo_lock);
+        obddev->u.echo.eo_lastino = ECHO_INIT_OBJID;
+
         obddev->obd_namespace =
                 ldlm_namespace_new("echo-tgt", LDLM_NAMESPACE_SERVER);
         if (obddev->obd_namespace == NULL) {
                 LBUG();
                 RETURN(-ENOMEM);
         }
-        spin_lock_init(&obddev->u.echo.eo_lock);
-        obddev->u.echo.eo_lastino = ECHO_INIT_OBJID;
 
+        ptlrpc_init_client (LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
+                            "echo_ldlm_cb_client", &obddev->obd_ldlm_client);
         RETURN(0);
 }
 
@@ -425,12 +586,15 @@ static int echo_cleanup(struct obd_device *obddev)
 
 int echo_attach(struct obd_device *dev, obd_count len, void *data)
 {
-        return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_vars(&lvars);
+        return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
 int echo_detach(struct obd_device *dev)
 {
-        return lprocfs_dereg_obd(dev);
+        return lprocfs_obd_detach(dev);
 }
 
 static struct obd_ops echo_obd_ops = {
@@ -438,7 +602,7 @@ static struct obd_ops echo_obd_ops = {
         o_attach:      echo_attach,
         o_detach:      echo_detach,
         o_connect:     echo_connect,
-        o_disconnect:  class_disconnect,
+        o_disconnect:  echo_disconnect,
         o_create:      echo_create,
         o_destroy:     echo_destroy,
         o_open:        echo_open,
@@ -454,35 +618,85 @@ static struct obd_ops echo_obd_ops = {
 extern int echo_client_init(void);
 extern void echo_client_cleanup(void);
 
+static void
+echo_object0_pages_fini (void) 
+{
+        int     i;
+        
+        for (i = 0; i < ECHO_OBJECT0_NPAGES; i++) 
+                if (echo_object0_pages[i] != NULL) {
+                        __free_pages (echo_object0_pages[i], 0);
+                        echo_object0_pages[i] = NULL;
+                }
+}
+
+static int
+echo_object0_pages_init (void)
+{
+        struct page *pg;
+        int          i;
+        
+        for (i = 0; i < ECHO_OBJECT0_NPAGES; i++) {
+                int gfp_mask = (i < ECHO_OBJECT0_NPAGES/2) ? GFP_KERNEL : GFP_HIGHUSER;
+                
+                pg = alloc_pages (gfp_mask, 0);
+                if (pg == NULL) {
+                        echo_object0_pages_fini ();
+                        return (-ENOMEM);
+                }
+                
+                memset (kmap (pg), 0, PAGE_SIZE);
+                kunmap (pg);
+
+                echo_object0_pages[i] = pg;
+        }
+        
+        return (0);
+}
+
 static int __init obdecho_init(void)
 {
+        struct lprocfs_static_vars lvars;
         int rc;
 
-        printk(KERN_INFO "Echo OBD driver " OBDECHO_VERSION
-               " info@clusterfs.com\n");
+        printk(KERN_INFO "Lustre Echo OBD driver; info@clusterfs.com\n");
+
+        lprocfs_init_vars(&lvars);
 
-        echo_proc_init();
-        rc = class_register_type(&echo_obd_ops, status_class_var,
+        xprocfs_init ("echo");
+
+        rc = echo_object0_pages_init ();
+        if (rc != 0)
+                goto failed_0;
+        
+        rc = class_register_type(&echo_obd_ops, lvars.module_vars,
                                  OBD_ECHO_DEVICENAME);
-        if (rc)
-                RETURN(rc);
+        if (rc != 0)
+                goto failed_1;
 
         rc = echo_client_init();
-        if (rc)
-                class_unregister_type(OBD_ECHO_DEVICENAME);
+        if (rc == 0)
+                RETURN (0);
 
+        class_unregister_type(OBD_ECHO_DEVICENAME);
+ failed_1:
+        echo_object0_pages_fini ();
+ failed_0:
+        xprocfs_fini ();
+        
         RETURN(rc);
 }
 
 static void __exit obdecho_exit(void)
 {
-        echo_proc_fini();
         echo_client_cleanup();
         class_unregister_type(OBD_ECHO_DEVICENAME);
+        echo_object0_pages_fini ();
+        xprocfs_fini ();
 }
 
-MODULE_AUTHOR("Cluster Filesystems Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Testing Echo OBD driver " OBDECHO_VERSION);
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Testing Echo OBD driver");
 MODULE_LICENSE("GPL");
 
 module_init(obdecho_init);
index e9c0e90..6c4eb6d 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -22,6 +22,8 @@
 #include <linux/version.h>
 #include <linux/module.h>
 #include <linux/fs.h>
+#include <linux/iobuf.h>
+#include <asm/div64.h>
 
 #define DEBUG_SUBSYSTEM S_ECHO
 
 #include <linux/obd_echo.h>
 #include <linux/lustre_debug.h>
 #include <linux/lprocfs_status.h>
+#include <linux/lustre_lite.h>                  /* for LL_IOC_LOV_SETSTRIPE */
 
-static int echo_iocontrol(unsigned int cmd, struct lustre_handle *obdconn, int len,
-                          void *karg, void *uarg)
+#if 0
+static void
+echo_printk_object (char *msg, struct ec_object *eco)
+{
+        struct lov_stripe_md *lsm = eco->eco_lsm;
+        int                   i;
+
+        printk (KERN_INFO "%s: object %p: "LPX64", refs %d%s: "LPX64
+                "=%u!%u@%d\n", msg, eco, eco->eco_id, eco->eco_refcount,
+                eco->eco_deleted ? "(deleted) " : "",
+                lsm->lsm_object_id, lsm->lsm_stripe_size,
+                lsm->lsm_stripe_count, lsm->lsm_stripe_offset);
+
+        for (i = 0; i < lsm->lsm_stripe_count; i++)
+                printk (KERN_INFO "   [%2u]"LPX64"\n",
+                        lsm->lsm_oinfo[i].loi_ost_idx,
+                        lsm->lsm_oinfo[i].loi_id);
+}
+#endif
+
+static struct ec_object *
+echo_find_object_locked (struct obd_device *obd, obd_id id)
 {
-        struct obd_device *obd = class_conn2obd(obdconn);
         struct echo_client_obd *ec = &obd->u.echo_client;
-        struct obd_ioctl_data *data = karg;
-        int rw = OBD_BRW_READ, rc = 0;
-        struct lov_stripe_md *lsm = NULL;
+        struct ec_object       *eco = NULL;
+        struct list_head       *el;
+
+        list_for_each (el, &ec->ec_objects) {
+                eco = list_entry (el, struct ec_object, eco_obj_chain);
+
+                if (eco->eco_id == id)
+                        return (eco);
+        }
+        return (NULL);
+}
+
+static int
+echo_copyout_lsm (struct lov_stripe_md *lsm, void *ulsm, int ulsm_nob)
+{
+        int nob;
+
+        nob = offsetof (struct lov_stripe_md, lsm_oinfo[lsm->lsm_stripe_count]);
+        if (nob > ulsm_nob)
+                return (-EINVAL);
+
+        if (copy_to_user (ulsm, lsm, nob))
+                return (-EFAULT);
+
+        return (0);
+}
+
+static int
+echo_copyin_lsm (struct obd_device *obd, struct lov_stripe_md *lsm,
+                 void *ulsm, int ulsm_nob)
+{
+        struct echo_client_obd *ec = &obd->u.echo_client;
+        int                     nob;
+
+        if (ulsm_nob < sizeof (*lsm))
+                return (-EINVAL);
+
+        if (copy_from_user (lsm, ulsm, sizeof (*lsm)))
+                return (-EFAULT);
+
+        nob = lsm->lsm_stripe_count * sizeof (lsm->lsm_oinfo[0]);
+
+        if (ulsm_nob < nob ||
+            lsm->lsm_stripe_count > ec->ec_nstripes ||
+            lsm->lsm_magic != LOV_MAGIC ||
+            (lsm->lsm_stripe_offset != 0 &&
+             lsm->lsm_stripe_offset != 0xffffffff &&
+             lsm->lsm_stripe_offset >= ec->ec_nstripes) ||
+            (lsm->lsm_stripe_size & (PAGE_SIZE - 1)) != 0 ||
+            ((__u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL))
+                return (-EINVAL);
+
+        LASSERT (ec->ec_lsmsize >= sizeof (*lsm) + nob);
+
+        if (copy_from_user(lsm->lsm_oinfo,
+                           ((struct lov_stripe_md *)ulsm)->lsm_oinfo, nob))
+                return (-EFAULT);
+
+        return (0);
+}
+
+static struct ec_object *
+echo_allocate_object (struct obd_device *obd) 
+{
+        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct ec_object       *eco;
+        
+        OBD_ALLOC (eco, sizeof (*eco));
+        if (eco == NULL)
+                return (NULL);
+
+        OBD_ALLOC (eco->eco_lsm, ec->ec_lsmsize);
+        if (eco->eco_lsm == NULL) {
+                OBD_FREE (eco, sizeof (*eco));
+                return (NULL);
+        }
+        
+        eco->eco_device = obd;
+        eco->eco_deleted = 0;
+        eco->eco_refcount = 0;
+        eco->eco_lsm->lsm_magic = LOV_MAGIC;
+        /* leave stripe count 0 by default */
+
+        return (eco);
+}
+
+static void
+echo_free_object (struct ec_object *eco) 
+{
+        struct obd_device      *obd = eco->eco_device;
+        struct echo_client_obd *ec = &obd->u.echo_client;
+
+        LASSERT (eco->eco_refcount == 0);
+        OBD_FREE (eco->eco_lsm, ec->ec_lsmsize);
+        OBD_FREE (eco, sizeof (*eco));
+}
+
+static int
+echo_create_object (struct obd_device *obd, int on_target, struct obdo *oa,
+                    void *ulsm, int ulsm_nob)
+{
+        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct ec_object       *eco2;
+        struct ec_object       *eco;
+        struct lov_stripe_md   *lsm;
+        int                     rc;
+        int                     i;
+        
+        if ((oa->o_valid & OBD_MD_FLID) == 0 && /* no obj id */
+            (on_target ||                       /* set_stripe */
+             ec->ec_nstripes != 0)) {           /* LOV */
+                CERROR ("No valid oid\n");
+                return (-EINVAL);
+        }
+
+        eco = echo_allocate_object (obd);
+        if (eco == NULL)
+                return (-ENOMEM);
+        
+        lsm = eco->eco_lsm;
+
+        if (ulsm != NULL) {
+                rc = echo_copyin_lsm (obd, lsm, ulsm, ulsm_nob);
+                if (rc != 0)
+                        goto failed;
+        }
+        
+        /* setup object ID here for !on_target and LOV hint */
+        if ((oa->o_valid & OBD_MD_FLID) != 0)
+                eco->eco_id = lsm->lsm_object_id = oa->o_id;
+        
+        /* defaults -> actual values */
+        if (lsm->lsm_stripe_offset == 0xffffffff)
+                lsm->lsm_stripe_offset = 0;
+
+        if (lsm->lsm_stripe_count == 0)
+                lsm->lsm_stripe_count = ec->ec_nstripes;
+
+        if (lsm->lsm_stripe_size == 0)
+                lsm->lsm_stripe_size = PAGE_SIZE;
+
+        /* setup stripes: indices + default ids if required */
+        for (i = 0; i < lsm->lsm_stripe_count; i++) {
+                if (lsm->lsm_oinfo[i].loi_id == 0)
+                        lsm->lsm_oinfo[i].loi_id = lsm->lsm_object_id;
+
+                lsm->lsm_oinfo[i].loi_ost_idx =
+                        (lsm->lsm_stripe_offset + i) % ec->ec_nstripes;
+        }
+        
+        if (on_target) {
+                rc = obd_create (&ec->ec_conn, oa, &lsm, NULL);
+                if (rc != 0)
+                        goto failed;
+                
+                /* See what object ID we were given */
+                LASSERT ((oa->o_valid & OBD_MD_FLID) != 0);
+                eco->eco_id = lsm->lsm_object_id = oa->o_id;
+        }
+        
+        spin_lock (&ec->ec_lock);
+
+        eco2 = echo_find_object_locked (obd, oa->o_id);
+        if (eco2 != NULL) {                     /* conflict */
+                spin_unlock (&ec->ec_lock);
+                
+                CERROR ("Can't create object id "LPX64": id already exists%s\n", 
+                        oa->o_id, on_target ? " (undoing create)" : "");
+                
+                if (on_target)
+                        obd_destroy (&ec->ec_conn, oa, lsm, NULL);
+                
+                rc = -EEXIST;
+                goto failed;
+        }
+        
+        list_add (&eco->eco_obj_chain, &ec->ec_objects);
+        spin_unlock (&ec->ec_lock);
+        CDEBUG (D_INFO,
+                "created %p: "LPX64"=%u#%u&%d refs %d del %d\n",
+                eco, eco->eco_id,
+                eco->eco_lsm->lsm_stripe_size,
+                eco->eco_lsm->lsm_stripe_count,
+                eco->eco_lsm->lsm_stripe_offset,
+                eco->eco_refcount, eco->eco_deleted);
+        return (0);
+
+ failed:
+        echo_free_object (eco);
+        return (rc);
+}
+
+static int
+echo_get_object (struct ec_object **ecop, struct obd_device *obd, struct obdo *oa)
+{
+        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct ec_object       *eco;
+        struct ec_object       *eco2;
+        int                     rc;
+
+        if ((oa->o_valid & OBD_MD_FLID) == 0) 
+        {
+                CERROR ("No valid oid\n");
+                return (-EINVAL);
+        }
+        
+        spin_lock (&ec->ec_lock);
+        eco = echo_find_object_locked (obd, oa->o_id);
+        if (eco != NULL) {
+                if (eco->eco_deleted)           /* being deleted */
+                        return (-EAGAIN);       /* (see comment in cleanup) */
+                
+                eco->eco_refcount++;
+                spin_unlock (&ec->ec_lock);
+                *ecop = eco;
+                CDEBUG (D_INFO,
+                        "found %p: "LPX64"=%u#%u&%d refs %d del %d\n",
+                        eco, eco->eco_id,
+                        eco->eco_lsm->lsm_stripe_size,
+                        eco->eco_lsm->lsm_stripe_count,
+                        eco->eco_lsm->lsm_stripe_offset,
+                        eco->eco_refcount, eco->eco_deleted);
+                return (0);
+        }
+        spin_unlock (&ec->ec_lock);
+
+        if (ec->ec_nstripes != 0)               /* striping required */
+                return (-ENOENT);
+
+        eco = echo_allocate_object (obd);
+        if (eco == NULL)
+                return (-ENOMEM);
+
+        eco->eco_id = eco->eco_lsm->lsm_object_id = oa->o_id;
+
+        spin_lock (&ec->ec_lock);
+
+        eco2 = echo_find_object_locked (obd, oa->o_id);
+        if (eco2 == NULL) {                     /* didn't race */
+                list_add (&eco->eco_obj_chain, &ec->ec_objects);
+                spin_unlock (&ec->ec_lock);
+                eco->eco_refcount = 1;
+                *ecop = eco;
+                CDEBUG (D_INFO,
+                        "created %p: "LPX64"=%u#%u&%d refs %d del %d\n",
+                        eco, eco->eco_id,
+                        eco->eco_lsm->lsm_stripe_size,
+                        eco->eco_lsm->lsm_stripe_count,
+                        eco->eco_lsm->lsm_stripe_offset,
+                        eco->eco_refcount, eco->eco_deleted);
+                return (0);
+        }
+
+        if (eco2->eco_deleted)
+                rc = -EAGAIN;                   /* lose race */
+        else {
+                eco2->eco_refcount++;           /* take existing */
+                *ecop = eco2;
+                rc = 0;
+                LASSERT (eco2->eco_id == eco2->eco_lsm->lsm_object_id);
+                CDEBUG (D_INFO,
+                        "found(2) %p: "LPX64"=%u#%u&%d refs %d del %d\n",
+                        eco2, eco2->eco_id,
+                        eco2->eco_lsm->lsm_stripe_size,
+                        eco2->eco_lsm->lsm_stripe_count,
+                        eco2->eco_lsm->lsm_stripe_offset,
+                        eco2->eco_refcount, eco2->eco_deleted);
+        }
+
+        spin_unlock (&ec->ec_lock);
+        
+        echo_free_object (eco);
+        return (rc);
+}
+
+static void
+echo_put_object (struct ec_object *eco)
+{
+        struct obd_device      *obd = eco->eco_device;
+        struct echo_client_obd *ec = &obd->u.echo_client;
+
+        /* Release caller's ref on the object.
+         * delete => mark for deletion when last ref goes
+         */
+
+        spin_lock (&ec->ec_lock);
+
+        eco->eco_refcount--;
+        LASSERT (eco->eco_refcount >= 0);
+
+        if (eco->eco_refcount != 0 ||
+            !eco->eco_deleted) {
+                spin_unlock (&ec->ec_lock);
+                return;
+        }
+
+        spin_unlock (&ec->ec_lock);
+
+        /* NB leave obj in the object list.  We must prevent anyone from
+         * attempting to enqueue on this object number until we can be
+         * sure there will be no more lock callbacks.
+         */
+        obd_cancel_unused (&ec->ec_conn, eco->eco_lsm, 0);
+        
+        /* now we can let it go */
+        spin_lock (&ec->ec_lock);
+        list_del (&eco->eco_obj_chain);
+        spin_unlock (&ec->ec_lock);
+        
+        LASSERT (eco->eco_refcount == 0);
+
+        echo_free_object (eco);
+}
+
+static void
+echo_get_stripe_off_id (struct lov_stripe_md *lsm, obd_off *offp, obd_id *idp)
+{
+        unsigned long stripe_count;
+        unsigned long stripe_size;
+        unsigned long width;
+        unsigned long woffset;
+        int           stripe_index;
+        obd_off       offset;
+
+        if (lsm->lsm_stripe_count <= 1)
+                return;
+
+        offset       = *offp;
+        stripe_size  = lsm->lsm_stripe_size;
+        stripe_count = lsm->lsm_stripe_count;
+
+        /* width = # bytes in all stripes */
+        width = stripe_size * stripe_count;
+
+        /* woffset = offset within a width; offset = whole number of widths */
+        woffset = do_div (offset, width);
+
+        stripe_index = woffset / stripe_size;
+
+        *idp = lsm->lsm_oinfo[stripe_index].loi_id;
+        *offp = offset * stripe_size + woffset % stripe_size;
+}
+
+static int
+echo_client_kbrw (struct obd_device *obd, int rw,
+                  struct obdo *oa, struct lov_stripe_md *lsm,
+                  obd_off offset, obd_size count)
+{
+        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct obd_brw_set     *set;
+        obd_count               npages;
+        struct brw_page        *pga;
+        struct brw_page        *pgp;
+        obd_off                 off;
+        int                     i;
+        int                     rc;
+        int                     verify;
+        int                     gfp_mask;
+
+        /* oa_id  == 0    => speed test (no verification) else...
+         * oa & 1         => use HIGHMEM
+         */
+        verify = (oa->o_id != 0);
+        gfp_mask = ((oa->o_id & 1) == 0) ? GFP_KERNEL : GFP_HIGHUSER;
+
+        LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
+
+        if (count <= 0 ||
+            (count & (PAGE_SIZE - 1)) != 0 ||
+            (lsm != NULL &&
+             lsm->lsm_object_id != oa->o_id))
+                return (-EINVAL);
+
+        set = obd_brw_set_new();
+        if (set == NULL)
+                return (-ENOMEM);
+
+        /* XXX think again with misaligned I/O */
+        npages = count >> PAGE_SHIFT;
+
+        rc = -ENOMEM;
+        OBD_ALLOC(pga, npages * sizeof(*pga));
+        if (pga == NULL)
+                goto out_0;
+
+        for (i = 0, pgp = pga, off = offset;
+             i < npages;
+             i++, pgp++, off += PAGE_SIZE) {
+
+                LASSERT (pgp->pg == NULL);      /* for cleanup */
+
+                rc = -ENOMEM;
+                pgp->pg = alloc_pages (gfp_mask, 0);
+                if (pgp->pg == NULL)
+                        goto out_1;
+
+                pgp->count = PAGE_SIZE;
+                pgp->off = off;
+                pgp->flag = 0;
+
+                if (verify) {
+                        void *addr = kmap(pgp->pg);
+                        obd_off      stripe_off = off;
+                        obd_id       stripe_id = oa->o_id;
+
+                        if (rw == OBD_BRW_WRITE) {
+                                echo_get_stripe_off_id(lsm, &stripe_off,
+                                                       &stripe_id);
+                                page_debug_setup(addr, pgp->count,
+                                                 stripe_off, stripe_id);
+                        } else {
+                                page_debug_setup(addr, pgp->count,
+                                                 0xdeadbeef00c0ffee,
+                                                 0xdeadbeef00c0ffee);
+                        }
+                        kunmap(pgp->pg);
+                }
+        }
+
+        set->brw_callback = ll_brw_sync_wait;
+        rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, set, NULL);
+        if (rc == 0)
+                rc = ll_brw_sync_wait(set, CB_PHASE_START);
+
+ out_1:
+        if (rc != 0)
+                verify = 0;
+
+        for (i = 0, pgp = pga; i < npages; i++, pgp++) {
+                if (pgp->pg == NULL)
+                        continue;
+
+                if (verify) {
+                        void    *addr = kmap(pgp->pg);
+                        obd_off  stripe_off = pgp->off;
+                        obd_id   stripe_id  = oa->o_id;
+                        int      vrc;
+
+                        echo_get_stripe_off_id (lsm, &stripe_off, &stripe_id);
+                        vrc = page_debug_check("test_brw", addr, pgp->count,
+                                               stripe_off, stripe_id);
+                        if (vrc != 0 && rc == 0)
+                                rc = vrc;
+                        
+                        kunmap(pgp->pg);
+                }
+                __free_pages(pgp->pg, 0);
+        }
+        OBD_FREE(pga, npages * sizeof(*pga));
+ out_0:
+        obd_brw_set_free(set);
+        return (rc);
+}
+
+static int
+echo_client_ubrw (struct obd_device *obd, int rw,
+                  struct obdo *oa, struct lov_stripe_md *lsm,
+                  obd_off offset, obd_size count, char *buffer)
+{
+        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct obd_brw_set     *set;
+        obd_count               npages;
+        struct brw_page        *pga;
+        struct brw_page        *pgp;
+        obd_off                 off;
+        struct kiobuf          *kiobuf;
+        int                     i;
+        int                     rc;
+
+        LASSERT (rw == OBD_BRW_WRITE ||
+                 rw == OBD_BRW_READ);
+
+        /* NB: for now, only whole pages, page aligned */
+
+        if (count <= 0 ||
+            ((long)buffer & (PAGE_SIZE - 1)) != 0 ||
+            (count & (PAGE_SIZE - 1)) != 0 ||
+            (lsm != NULL && lsm->lsm_object_id != oa->o_id))
+                return (-EINVAL);
+
+        set = obd_brw_set_new();
+        if (set == NULL)
+                return (-ENOMEM);
+
+        /* XXX think again with misaligned I/O */
+        npages = count >> PAGE_SHIFT;
+
+        rc = -ENOMEM;
+        OBD_ALLOC(pga, npages * sizeof(*pga));
+        if (pga == NULL)
+                goto out_0;
+
+        rc = alloc_kiovec (1, &kiobuf);
+        if (rc != 0)
+                goto out_1;
+
+        rc = map_user_kiobuf ((rw == OBD_BRW_READ) ? READ : WRITE,
+                              kiobuf, (unsigned long)buffer, count);
+        if (rc != 0)
+                goto out_2;
+
+        LASSERT (kiobuf->offset == 0);
+        LASSERT (kiobuf->nr_pages == npages);
+
+        for (i = 0, off = offset, pgp = pga;
+             i < npages;
+             i++, off += PAGE_SIZE, pgp++) {
+                pgp->off = off;
+                pgp->pg = kiobuf->maplist[i];
+                pgp->count = PAGE_SIZE;
+                pgp->flag = 0;
+        }
+
+        set->brw_callback = ll_brw_sync_wait;
+        rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, set, NULL);
+
+        if (rc == 0)
+                rc = ll_brw_sync_wait(set, CB_PHASE_START);
+
+        //        if (rw == OBD_BRW_READ)
+        //                mark_dirty_kiobuf (kiobuf, count);
+
+        unmap_kiobuf (kiobuf);
+ out_2:
+        free_kiovec (1, &kiobuf);
+ out_1:
+        OBD_FREE(pga, npages * sizeof(*pga));
+ out_0:
+        obd_brw_set_free(set);
+        return (rc);
+}
+
+static int
+echo_open (struct obd_export *exp, struct obdo *oa)
+{
+        struct obd_device      *obd = exp->exp_obd;
+        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct lustre_handle   *ufh = obdo_handle (oa);
+        struct ec_open_object  *ecoo;
+        struct ec_object       *eco;
+        int                     rc;
+        
+        rc = echo_get_object (&eco, obd, oa);
+        if (rc != 0)
+                return (rc);
+        
+        rc = -ENOMEM;
+        OBD_ALLOC (ecoo, sizeof (*ecoo));
+        if (ecoo == NULL)
+                goto failed_0;
+
+        rc = obd_open (&ec->ec_conn, oa, eco->eco_lsm, NULL);
+        if (rc != 0)
+                goto failed_1;
+        
+        memcpy (&ecoo->ecoo_oa, oa, sizeof (*oa));
+        ecoo->ecoo_object = eco;
+        /* ecoo takes ref from echo_get_object() above */
+
+        spin_lock (&ec->ec_lock);
+
+        list_add (&ecoo->ecoo_exp_chain,
+                  &exp->exp_ec_data.eced_open_head);
+        
+        ufh->addr = (__u64)((long) ecoo);
+        ufh->cookie = ecoo->ecoo_cookie = ec->ec_unique++;
+        
+        spin_unlock (&ec->ec_lock);
+        return (0);
+        
+ failed_1:
+        OBD_FREE (ecoo, sizeof (*ecoo));
+ failed_0:
+        echo_put_object (eco);
+        return (rc);
+}
+
+static int
+echo_close (struct obd_export *exp, struct obdo *oa)
+{
+        struct obd_device      *obd = exp->exp_obd;
+        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct lustre_handle   *ufh = obdo_handle (oa);
+        struct ec_open_object  *ecoo = NULL;
+        int                     found = 0;
+        struct list_head       *el;
+        int                     rc;
+        
+        if ((oa->o_valid & OBD_MD_FLHANDLE) == 0)
+                return (-EINVAL);
+        
+        spin_lock (&ec->ec_lock);
+
+        list_for_each (el, &exp->exp_ec_data.eced_open_head) {
+                ecoo = list_entry (el, struct ec_open_object, ecoo_exp_chain);
+                if ((__u64)((long)ecoo) == ufh->addr) {
+                        found = (ecoo->ecoo_cookie == ufh->cookie);
+                        if (found)
+                                list_del (&ecoo->ecoo_exp_chain);
+                        break;
+                }
+        }
+
+        spin_unlock (&ec->ec_lock);
+        
+        if (!found)
+                return (-EINVAL);
+
+        rc = obd_close (&ec->ec_conn, &ecoo->ecoo_oa,
+                        ecoo->ecoo_object->eco_lsm, NULL);
+        
+        echo_put_object (ecoo->ecoo_object);
+        OBD_FREE (ecoo, sizeof (*ecoo));
+
+        return (rc);
+}
+
+static int
+echo_ldlm_callback (struct ldlm_lock *lock, struct ldlm_lock_desc *new,
+                    void *data, int flag)
+{
+        struct ec_object       *eco = (struct ec_object *)data;
+        struct echo_client_obd *ec = &(eco->eco_device->u.echo_client);
+        struct lustre_handle    lockh;
+        struct list_head       *el;
+        int                     found = 0;
+        int                     rc;
+
+        ldlm_lock2handle (lock, &lockh);
+
+        /* #ifdef this out if we're not feeling paranoid */
+        spin_lock (&ec->ec_lock);
+        list_for_each (el, &ec->ec_objects) {
+                found = (eco == list_entry (el, struct ec_object, eco_obj_chain));
+                if (found)
+                        break;
+        }
+        spin_unlock (&ec->ec_lock);
+        LASSERT (found);
+        
+        switch (flag) {
+        case LDLM_CB_BLOCKING:
+                CDEBUG (D_INFO, "blocking callback on "LPX64", handle "LPX64"."LPX64"\n", 
+                        eco->eco_id, lockh.addr, lockh.cookie);
+                rc = ldlm_cli_cancel (&lockh);
+                if (rc != ELDLM_OK)
+                        CERROR ("ldlm_cli_cancel failed: %d\n", rc);
+                break;
+
+        case LDLM_CB_CANCELING:
+                CDEBUG (D_INFO, "canceling callback on "LPX64", handle "LPX64"."LPX64"\n", 
+                        eco->eco_id, lockh.addr, lockh.cookie);
+                break;
+
+        default:
+                LBUG ();
+        }
+
+        return (0);
+}
+
+static int
+echo_enqueue (struct obd_export *exp, struct obdo *oa,
+              int mode, obd_off offset, obd_size nob)
+{
+        struct obd_device      *obd = exp->exp_obd;
+        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct lustre_handle   *ulh = obdo_handle (oa);
+        struct ec_object       *eco;
+        struct ec_lock         *ecl;
+        int                     flags;
+        int                     rc;
+
+        if (!(mode == LCK_PR || mode == LCK_PW))
+                return (-EINVAL);
+
+        if ((offset & (PAGE_SIZE - 1)) != 0 ||
+            (nob & (PAGE_SIZE - 1)) != 0)
+                return (-EINVAL);
+
+        rc = echo_get_object (&eco, obd, oa);
+        if (rc != 0)
+                return (rc);
+
+        rc = -ENOMEM;
+        OBD_ALLOC (ecl, sizeof (*ecl));
+        if (ecl == NULL)
+                goto failed_0;
+
+        ecl->ecl_mode = mode;
+        ecl->ecl_object = eco;
+        ecl->ecl_extent.start = offset;
+        ecl->ecl_extent.end = (nob == 0) ? ((obd_off)-1) : (offset + nob - 1);
+        
+        flags = 0;
+        rc = obd_enqueue (&ec->ec_conn, eco->eco_lsm, NULL,
+                          LDLM_EXTENT, &ecl->ecl_extent, sizeof (ecl->ecl_extent),
+                          mode, &flags, echo_ldlm_callback, eco, sizeof (*eco),
+                          &ecl->ecl_handle);
+        if (rc != 0)
+                goto failed_1;
+
+        CDEBUG (D_INFO, "enqueue handle "LPX64"."LPX64"\n",
+                ecl->ecl_handle.addr, ecl->ecl_handle.cookie);
+
+        /* NB ecl takes object ref from echo_get_object() above */
+
+        spin_lock (&ec->ec_lock);
+
+        list_add (&ecl->ecl_exp_chain, &exp->exp_ec_data.eced_locks);
+
+        ulh->addr = (__u64)((long)ecl);
+        ulh->cookie = ecl->ecl_cookie = ec->ec_unique++;
+        
+        spin_unlock (&ec->ec_lock);
+
+        oa->o_valid |= OBD_MD_FLHANDLE;
+        return (0);
+
+ failed_1:
+        OBD_FREE (ecl, sizeof (*ecl));
+ failed_0:
+        echo_put_object (eco);
+        return (rc);
+}
+
+static int
+echo_cancel (struct obd_export *exp, struct obdo *oa)
+{
+        struct obd_device      *obd = exp->exp_obd;
+        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct lustre_handle   *ulh = obdo_handle (oa);
+        struct ec_lock         *ecl = NULL;
+        int                     found = 0;
+        struct list_head       *el;
+        int                     rc;
+
+        if ((oa->o_valid & OBD_MD_FLHANDLE) == 0)
+                return (-EINVAL);
+        
+        spin_lock (&ec->ec_lock);
+        
+        list_for_each (el, &exp->exp_ec_data.eced_locks) {
+                ecl = list_entry (el, struct ec_lock, ecl_exp_chain);
+                
+                if ((__u64)((long)ecl) == ulh->addr) {
+                        found = (ecl->ecl_cookie == ulh->cookie);
+                        if (found)
+                                list_del (&ecl->ecl_exp_chain);
+                        break;
+                }
+        }
+        
+        spin_unlock (&ec->ec_lock);
+        
+        if (!found)
+                return (-ENOENT);
+        
+        rc = obd_cancel (&ec->ec_conn, 
+                         ecl->ecl_object->eco_lsm,
+                         ecl->ecl_mode,
+                         &ecl->ecl_handle);
+        
+        echo_put_object (ecl->ecl_object);
+        OBD_FREE (ecl, sizeof (*ecl));
+        
+        return (rc);
+}
+
+static int echo_iocontrol(unsigned int cmd, struct lustre_handle *obdconn,
+                          int len, void *karg, void *uarg)
+{
+        struct obd_export      *exp = class_conn2export (obdconn);
+        struct obd_device      *obd;
+        struct echo_client_obd *ec;
+        struct ec_object       *eco;
+        struct obd_ioctl_data  *data = karg;
+        int                     rw = OBD_BRW_READ;
+        int                     rc = 0;
         ENTRY;
 
-        if (obd == NULL) {
+        if (exp == NULL) {
                 CERROR("ioctl: No device\n");
                 GOTO(out, rc = -EINVAL);
         }
 
-        if (data->ioc_inllen1 == sizeof(*lsm)) {
-                lsm = (struct lov_stripe_md *)data->ioc_inlbuf1;
-        } else if (data->ioc_inllen1 != 0) {
-                CERROR("nonzero ioc_inllen1 != sizeof(struct lov_stripe_md)\n");
-                GOTO(out, rc = -EINVAL);
-        }
+        obd = exp->exp_obd;
+        ec = &obd->u.echo_client;
 
         switch (cmd) {
-        case OBD_IOC_CREATE: {
-                struct lov_stripe_md *tmp_lsm = NULL;
-                rc = obd_create(&ec->conn, &data->ioc_obdo1, &tmp_lsm);
-                if (lsm && tmp_lsm ) {
-                        memcpy(lsm, tmp_lsm, sizeof(*tmp_lsm));
-                        data->ioc_conn2 = 1;
-                }
+        case OBD_IOC_CREATE:                    /* may create echo object */
+                if (!capable (CAP_SYS_ADMIN))
+                        GOTO (out, rc = -EPERM);
+                
+                rc = echo_create_object (obd, 1, &data->ioc_obdo1,
+                                         data->ioc_pbuf1, data->ioc_plen1);
+                GOTO(out, rc);
 
+        case OBD_IOC_DESTROY:
+                if (!capable (CAP_SYS_ADMIN))
+                        GOTO (out, rc = -EPERM);
+       
+                rc = echo_get_object (&eco, obd, &data->ioc_obdo1);
+                if (rc == 0) {
+                        rc = obd_destroy(&ec->ec_conn, &data->ioc_obdo1,
+                                         eco->eco_lsm, NULL);
+                        if (rc == 0)
+                                eco->eco_deleted = 1;
+                        echo_put_object(eco);
+                }
                 GOTO(out, rc);
-        }
 
         case OBD_IOC_GETATTR:
-                rc = obd_getattr(&ec->conn, &data->ioc_obdo1, lsm);
+                rc = echo_get_object (&eco, obd, &data->ioc_obdo1);
+                if (rc == 0) {
+                        rc = obd_getattr(&ec->ec_conn, &data->ioc_obdo1,
+                                         eco->eco_lsm);
+                        echo_put_object(eco);
+                }
                 GOTO(out, rc);
 
         case OBD_IOC_SETATTR:
-                rc = obd_setattr(&ec->conn, &data->ioc_obdo1, lsm);
-                GOTO(out, rc);
-
-        case OBD_IOC_DESTROY:
-                rc = obd_destroy(&ec->conn, &data->ioc_obdo1, lsm);
+                if (!capable (CAP_SYS_ADMIN))
+                        GOTO (out, rc = -EPERM);
+       
+                rc = echo_get_object (&eco, obd, &data->ioc_obdo1);
+                if (rc == 0) {
+                        rc = obd_setattr(&ec->ec_conn, &data->ioc_obdo1,
+                                         eco->eco_lsm, NULL);
+                        echo_put_object(eco);
+                }
                 GOTO(out, rc);
 
         case OBD_IOC_OPEN:
-                rc = obd_open(&ec->conn, &data->ioc_obdo1, lsm);
+                rc = echo_open (exp, &data->ioc_obdo1);
                 GOTO(out, rc);
 
         case OBD_IOC_CLOSE:
-                rc = obd_close(&ec->conn, &data->ioc_obdo1, lsm);
+                rc = echo_close (exp, &data->ioc_obdo1);
                 GOTO(out, rc);
 
         case OBD_IOC_BRW_WRITE:
+                if (!capable (CAP_SYS_ADMIN))
+                        GOTO (out, rc = -EPERM);
+       
                 rw = OBD_BRW_WRITE;
-        case OBD_IOC_BRW_READ: {
-                struct lov_stripe_md tmp_lsm;
-                struct obd_brw_set *set;
-                obd_count pages = 0;
-                struct brw_page *pga, *pgp;
-                __u64 off, id = data->ioc_obdo1.o_id;
-                int gfp_mask = (id & 1) ? GFP_HIGHUSER : GFP_KERNEL;
-                int j, verify = (id != 0);
-
-                if (lsm && lsm->lsm_object_id != id) {
-                        CERROR("LSM object ID ("LPU64") != id ("LPU64")\n",
-                               lsm->lsm_object_id, id);
-                        GOTO(out, rc = -EINVAL);
-                }
-
-                if (!lsm) {
-                        memset(&tmp_lsm, 0, sizeof(tmp_lsm));
-                        lsm = &tmp_lsm;
-                        lsm->lsm_object_id = id;
+                /* fall through */
+        case OBD_IOC_BRW_READ:
+                rc = echo_get_object (&eco, obd, &data->ioc_obdo1);
+                if (rc == 0) {
+                        if (data->ioc_pbuf2 == NULL) // NULL user data pointer
+                                rc = echo_client_kbrw(obd, rw, &data->ioc_obdo1,
+                                                      eco->eco_lsm,
+                                                      data->ioc_offset,
+                                                      data->ioc_count);
+                        else
+                                rc = echo_client_ubrw(obd, rw, &data->ioc_obdo1,
+                                                      eco->eco_lsm,
+                                                      data->ioc_offset,
+                                                      data->ioc_count,
+                                                      data->ioc_pbuf2);
+                        echo_put_object(eco);
                 }
+                GOTO(out, rc);
 
-                if (data->ioc_count < 0) {
-                        CERROR("invalid buffer size: "LPD64"\n",
-                               data->ioc_count);
-                        GOTO(out, rc = -EINVAL);
+        case ECHO_IOC_GET_STRIPE:
+                rc = echo_get_object(&eco, obd, &data->ioc_obdo1);
+                if (rc == 0) {
+                        rc = echo_copyout_lsm(eco->eco_lsm, data->ioc_pbuf1,
+                                              data->ioc_plen1);
+                        echo_put_object(eco);
                 }
+                GOTO(out, rc);
 
-                set = obd_brw_set_new();
-                if (set == NULL)
-                        GOTO(out, rc = -ENOMEM);
-
-                pages = data->ioc_count / PAGE_SIZE;
-                off = data->ioc_offset;
-
-                CDEBUG(D_INODE, "BRW %s with %d pages @ "LPX64"\n",
-                       rw == OBD_BRW_READ ? "read" : "write", pages, off);
-                OBD_ALLOC(pga, pages * sizeof(*pga));
-                if (!pga) {
-                        CERROR("no memory for %d BRW per-page data\n", pages);
-                        GOTO(brw_free, rc = -ENOMEM);
-                }
+        case ECHO_IOC_SET_STRIPE:
+                if (!capable (CAP_SYS_ADMIN))
+                        GOTO (out, rc = -EPERM);
 
-                for (j = 0, pgp = pga; j < pages; j++, off += PAGE_SIZE, pgp++){
-                        pgp->pg = alloc_pages(gfp_mask, 0);
-                        if (!pgp->pg) {
-                                CERROR("no memory for brw pages\n");
-                                GOTO(brw_cleanup, rc = -ENOMEM);
-                        }
-                        pgp->count = PAGE_SIZE;
-                        pgp->off = off;
-                        pgp->flag = 0;
-
-                        if (verify) {
-                                void *addr = kmap(pgp->pg);
-
-                                if (rw == OBD_BRW_WRITE)
-                                        page_debug_setup(addr, pgp->count,
-                                                         pgp->off, id);
-                                else
-                                        page_debug_setup(addr, pgp->count,
-                                                         0xdeadbeef00c0ffee,
-                                                         0xdeadbeef00c0ffee);
-                                kunmap(pgp->pg);
+                if (data->ioc_pbuf1 == NULL) {  /* unset */
+                        rc = echo_get_object(&eco, obd, &data->ioc_obdo1);
+                        if (rc == 0) {
+                                eco->eco_deleted = 1;
+                                echo_put_object(eco);
                         }
+                } else {
+                        rc = echo_create_object(obd, 0, &data->ioc_obdo1,
+                                                data->ioc_pbuf1, data->ioc_plen1);
                 }
+                GOTO (out, rc);
+
+        case ECHO_IOC_ENQUEUE:
+                if (!capable (CAP_SYS_ADMIN))
+                        GOTO (out, rc = -EPERM);
+       
+                rc = echo_enqueue (exp, &data->ioc_obdo1, 
+                                   data->ioc_conn1, /* lock mode */
+                                   data->ioc_offset, data->ioc_count); /* extent */
+                GOTO (out, rc);
+
+        case ECHO_IOC_CANCEL:
+                rc = echo_cancel (exp, &data->ioc_obdo1);
+                GOTO (out, rc);
 
-                set->brw_callback = ll_brw_sync_wait;
-                rc = obd_brw(rw, &ec->conn, lsm, j, pga, set);
-                if (rc)
-                        CERROR("test_brw: error from obd_brw: rc = %d\n", rc);
-                else {
-                        rc = ll_brw_sync_wait(set, CB_PHASE_START);
-                        if (rc)
-                                CERROR("test_brw: error from callback: rc = "
-                                       "%d\n", rc);
-                }
-                EXIT;
-        brw_cleanup:
-                for (j = 0, pgp = pga; j < pages; j++, pgp++) {
-                        if (pgp->pg == NULL)
-                                continue;
-
-                        if (verify && !rc) {
-                                void *addr = kmap(pgp->pg);
-
-                                rc = page_debug_check("test_brw", addr,
-                                                       pgp->count, pgp->off, id);
-                                kunmap(pgp->pg);
-                        }
-                        __free_pages(pgp->pg, 0);
-                }
-        brw_free:
-                obd_brw_set_free(set);
-                OBD_FREE(pga, pages * sizeof(*pga));
-                GOTO(out, rc);
-        }
         default:
                 CERROR ("echo_ioctl(): unrecognised ioctl %#x\n", cmd);
                 GOTO (out, rc = -ENOTTY);
@@ -197,6 +975,9 @@ static int echo_setup(struct obd_device *obddev, obd_count len, void *buf)
         struct obd_ioctl_data* data = buf;
         struct echo_client_obd *ec = &obddev->u.echo_client;
         struct obd_device *tgt;
+        struct obd_uuid uuid;
+        struct lov_stripe_md *lsm = NULL;
+        struct obd_uuid echo_uuid = { "ECHO_UUID" };
         int rc;
         ENTRY;
 
@@ -209,7 +990,8 @@ static int echo_setup(struct obd_device *obddev, obd_count len, void *buf)
                 RETURN(-EINVAL);
         }
 
-        tgt = class_uuid2obd(data->ioc_inlbuf1);
+        obd_str2uuid(&uuid, data->ioc_inlbuf1);
+        tgt = class_uuid2obd(&uuid);
         if (!tgt || !(tgt->obd_flags & OBD_ATTACHED) ||
             !(tgt->obd_flags & OBD_SET_UP)) {
                 CERROR("device not attached or not set up (%d)\n",
@@ -217,14 +999,33 @@ static int echo_setup(struct obd_device *obddev, obd_count len, void *buf)
                 RETURN(rc = -EINVAL);
         }
 
-        rc = obd_connect(&ec->conn, tgt, NULL, NULL, NULL);
-        if (rc)
+        spin_lock_init (&ec->ec_lock);
+        INIT_LIST_HEAD (&ec->ec_objects);
+        ec->ec_unique = 0;
+
+        rc = obd_connect(&ec->ec_conn, tgt, &echo_uuid, NULL, NULL);
+        if (rc) {
                 CERROR("fail to connect to device %d\n", data->ioc_dev);
+                return (rc);
+        }
+
+        ec->ec_lsmsize = obd_alloc_memmd (&ec->ec_conn, &lsm);
+        if (ec->ec_lsmsize < 0) {
+                CERROR ("Can't get # stripes: %d\n", rc);
+                obd_disconnect (&ec->ec_conn);
+                rc = ec->ec_lsmsize;
+        } else {
+                ec->ec_nstripes = lsm->lsm_stripe_count;
+                obd_free_memmd (&ec->ec_conn, &lsm);
+        }
+
         RETURN(rc);
 }
 
 static int echo_cleanup(struct obd_device * obddev)
 {
+        struct list_head       *el;
+        struct ec_object       *eco;
         struct echo_client_obd *ec = &obddev->u.echo_client;
         int rc;
         ENTRY;
@@ -234,20 +1035,90 @@ static int echo_cleanup(struct obd_device * obddev)
                 RETURN(-EBUSY);
         }
 
-        rc = obd_disconnect(&ec->conn);
-        if (rc) {
-                CERROR("fail to disconnect device: %d\n", rc);
-                RETURN(-EINVAL);
+        /* XXX assuming sole access */
+        while (!list_empty (&ec->ec_objects)) {
+                el = ec->ec_objects.next;
+                eco = list_entry (el, struct ec_object, eco_obj_chain);
+
+                LASSERT (eco->eco_refcount == 0);
+                eco->eco_refcount = 1;
+                eco->eco_deleted = 1;
+                echo_put_object (eco);
         }
 
-        RETURN(0);
+        rc = obd_disconnect (&ec->ec_conn);
+        if (rc != 0)
+                CERROR("fail to disconnect device: %d\n", rc);
+
+        RETURN (rc);
 }
 
 static int echo_connect(struct lustre_handle *conn, struct obd_device *src,
-                        obd_uuid_t cluuid, struct recovd_obd *recovd,
+                        struct obd_uuid *cluuid, struct recovd_obd *recovd,
                         ptlrpc_recovery_cb_t recover)
 {
-        return class_connect(conn, src, cluuid);
+        struct obd_export *exp;
+        int                rc;
+
+        rc = class_connect(conn, src, cluuid);
+        if (rc == 0) {
+                exp = class_conn2export (conn);
+                INIT_LIST_HEAD (&exp->exp_ec_data.eced_open_head);
+                INIT_LIST_HEAD (&exp->exp_ec_data.eced_locks);
+        }
+
+        RETURN (rc);
+}
+
+static int echo_disconnect(struct lustre_handle *conn)
+{
+        struct obd_export      *exp = class_conn2export (conn);
+        struct obd_device      *obd;
+        struct echo_client_obd *ec;
+        struct ec_open_object  *ecoo;
+        struct ec_lock         *ecl;
+        int                     rc;
+
+        if (exp == NULL)
+                return (-EINVAL);
+
+        obd = exp->exp_obd;
+        ec = &obd->u.echo_client;
+
+        /* no more contention on export's lock list */
+        while (!list_empty (&exp->exp_ec_data.eced_locks)) {
+                ecl = list_entry (exp->exp_ec_data.eced_locks.next,
+                                  struct ec_lock, ecl_exp_chain);
+                list_del (&ecl->ecl_exp_chain);
+                
+                rc = obd_cancel (&ec->ec_conn, ecl->ecl_object->eco_lsm,
+                                 ecl->ecl_mode, &ecl->ecl_handle);
+
+                CERROR ("Cancel lock on object "LPX64" on disconnect (%d)\n",
+                        ecl->ecl_object->eco_id, rc);
+                
+                echo_put_object (ecl->ecl_object);
+                OBD_FREE (ecl, sizeof (*ecl));
+        }
+
+        /* no more contention on export's open handle list  */
+        while (!list_empty (&exp->exp_ec_data.eced_open_head)) {
+                ecoo = list_entry (exp->exp_ec_data.eced_open_head.next,
+                                   struct ec_open_object, ecoo_exp_chain);
+                list_del (&ecoo->ecoo_exp_chain);
+
+                rc = obd_close (&ec->ec_conn, &ecoo->ecoo_oa,
+                                ecoo->ecoo_object->eco_lsm, NULL);
+
+                CDEBUG (D_INFO, "Closed object "LPX64" on disconnect (%d)\n",
+                        ecoo->ecoo_oa.o_id, rc);
+
+                echo_put_object (ecoo->ecoo_object);
+                OBD_FREE (ecoo, sizeof (*ecoo));
+        }
+
+        rc = class_disconnect (conn);
+        RETURN (rc);
 }
 
 static struct obd_ops echo_obd_ops = {
@@ -256,14 +1127,15 @@ static struct obd_ops echo_obd_ops = {
         o_cleanup:     echo_cleanup,
         o_iocontrol:   echo_iocontrol,
         o_connect:     echo_connect,
-        o_disconnect:  class_disconnect
+        o_disconnect:  echo_disconnect
 };
 
 int echo_client_init(void)
 {
-        extern struct lprocfs_vars status_class_var[];
+        struct lprocfs_static_vars lvars;
 
-        return class_register_type(&echo_obd_ops, status_class_var,
+        lprocfs_init_vars(&lvars);
+        return class_register_type(&echo_obd_ops, lvars.module_vars,
                                    OBD_ECHO_CLIENT_DEVICENAME);
 }
 
index 449f9c5..bb2870a 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
  */
 #define DEBUG_SUBSYSTEM S_ECHO
 
-#include <linux/lustre_lite.h>
 #include <linux/lprocfs_status.h>
+#include <linux/obd_class.h>
 
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
 
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof, 
-            void *data)
-{
-        int len = 0;
-        struct obd_device* dev = (struct obd_device*)data;
-        len += snprintf(page, count, "%s\n", dev->obd_uuid);
-        return len;
-
-}
-
-int rd_fstype(char* page, char **start, off_t off, int count, int *eof, 
+int rd_fstype(char* page, char **start, off_t off, int count, int *eof,
               void *data)
 {
-        int len = 0;
         struct obd_device* dev = (struct obd_device*)data;
-        len += snprintf(page, count, "%s\n", dev->u.echo.eo_fstype);
-        return len;
-
+        int rc = snprintf(page, count, "%s\n", dev->u.echo.eo_fstype);
+        *eof = 1;
+        return rc;
 }
 
-
-struct lprocfs_vars status_var_nm_1[] = {
-        {"status/uuid", rd_uuid, 0, 0},
-        {"status/fstype", rd_fstype, 0, 0},
-        {0}
+struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",     lprocfs_rd_uuid,    0, 0 },
+        { "fstype",   rd_fstype,          0, 0 },
+        { 0 }
 };
 
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof, 
-               void *data)
-{
-        struct obd_type* class = (struct obd_type*)data;
-        int len = 0;
-        len += snprintf(page, count, "%d\n", class->typ_refcnt);
-        return len;
-}
-
-struct lprocfs_vars status_class_var[] = {
-        {"status/num_refs", rd_numrefs, 0, 0},
-        {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+        { 0 }
 };
+
+#endif /* LPROCFS */
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
index 2d495b2..591005e 100644 (file)
@@ -3,7 +3,7 @@
  *
  *  linux/fs/obdfilter/filter.c
  *
- *  Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
  *   Author: Peter Braam <braam@clusterfs.com>
  *   Author: Andreas Dilger <adilger@clusterfs.com>
  *
  */
 
 /*
- * Invariant: get O/R i_sem for lookup, if needed, before any journal ops
+ * Invariant: Get O/R i_sem for lookup, if needed, before any journal ops
  *            (which need to get journal_lock, may block if journal full).
+ *
+ * Invariant: Call filter_start_transno() before any journal ops to avoid the
+ *            same deadlock problem.  We can (and want) to get rid of the
+ *            transno sem in favour of the dir/inode i_sem to avoid single
+ *            threaded operation on the OST.
  */
 
 #define EXPORT_SYMTAB
 #include <linux/lustre_fsfilt.h>
 #include <linux/lprocfs_status.h>
 
-extern struct lprocfs_vars status_class_var[];
-extern struct lprocfs_vars status_var_nm_1[];
 
 static kmem_cache_t *filter_open_cache;
 static kmem_cache_t *filter_dentry_cache;
 
+/* should be generic per-obd stats... */
+struct xprocfs_io_stat {
+        __u64    st_read_bytes;
+        __u64    st_read_reqs;
+        __u64    st_write_bytes;
+        __u64    st_write_reqs;
+        __u64    st_getattr_reqs;
+        __u64    st_setattr_reqs;
+        __u64    st_create_reqs;
+        __u64    st_destroy_reqs;
+        __u64    st_statfs_reqs;
+        __u64    st_open_reqs;
+        __u64    st_close_reqs;
+        __u64    st_punch_reqs;
+};
+
+static struct xprocfs_io_stat xprocfs_iostats[NR_CPUS];
+static struct proc_dir_entry *xprocfs_dir;
+
+#define XPROCFS_BUMP_MYCPU_IOSTAT(field, count)                 \
+do {                                                            \
+        xprocfs_iostats[smp_processor_id()].field += (count);   \
+} while (0)
+
+#define DECLARE_XPROCFS_SUM_STAT(field)                 \
+static long long                                        \
+xprocfs_sum_##field (void)                              \
+{                                                       \
+        long long stat = 0;                             \
+        int       i;                                    \
+                                                        \
+        for (i = 0; i < smp_num_cpus; i++)              \
+                stat += xprocfs_iostats[i].field;       \
+        return (stat);                                  \
+}
+
+DECLARE_XPROCFS_SUM_STAT (st_read_bytes)
+DECLARE_XPROCFS_SUM_STAT (st_read_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_write_bytes)
+DECLARE_XPROCFS_SUM_STAT (st_write_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_getattr_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_setattr_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_create_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_destroy_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_statfs_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_open_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_close_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_punch_reqs)
+
+static int
+xprocfs_rd_stat (char *page, char **start, off_t off, int count,
+                 int  *eof, void *data)
+{
+        long long (*fn)(void) = (long long(*)(void))data;
+        int         len;
+
+        *eof = 1;
+        if (off != 0)
+                return (0);
+
+        len = snprintf (page, count, "%Ld\n", fn());
+        *start = page;
+        return (len);
+}
+
+
+static void
+xprocfs_add_stat(char *name, long long (*fn)(void))
+{
+        struct proc_dir_entry *entry;
+
+        entry = create_proc_entry (name, S_IFREG|S_IRUGO, xprocfs_dir);
+        if (entry == NULL) {
+                CERROR ("Can't add procfs stat %s\n", name);
+                return;
+        }
+
+        entry->data = fn;
+        entry->read_proc = xprocfs_rd_stat;
+        entry->write_proc = NULL;
+}
+
+static void
+xprocfs_init (char *name)
+{
+        char  dirname[64];
+
+        snprintf (dirname, sizeof (dirname), "sys/%s", name);
+
+        xprocfs_dir = proc_mkdir ("sys/obdfilter", NULL);
+        if (xprocfs_dir == NULL) {
+                CERROR ("Can't make dir\n");
+                return;
+        }
+
+        xprocfs_add_stat ("read_bytes",   xprocfs_sum_st_read_bytes);
+        xprocfs_add_stat ("read_reqs",    xprocfs_sum_st_read_reqs);
+        xprocfs_add_stat ("write_bytes",  xprocfs_sum_st_write_bytes);
+        xprocfs_add_stat ("write_reqs",   xprocfs_sum_st_write_reqs);
+        xprocfs_add_stat ("getattr_reqs", xprocfs_sum_st_getattr_reqs);
+        xprocfs_add_stat ("setattr_reqs", xprocfs_sum_st_setattr_reqs);
+        xprocfs_add_stat ("create_reqs",  xprocfs_sum_st_create_reqs);
+        xprocfs_add_stat ("destroy_reqs", xprocfs_sum_st_destroy_reqs);
+        xprocfs_add_stat ("statfs_reqs",  xprocfs_sum_st_statfs_reqs);
+        xprocfs_add_stat ("open_reqs",    xprocfs_sum_st_open_reqs);
+        xprocfs_add_stat ("close_reqs",   xprocfs_sum_st_close_reqs);
+        xprocfs_add_stat ("punch_reqs",   xprocfs_sum_st_punch_reqs);
+}
+
+void xprocfs_fini (void)
+{
+        if (xprocfs_dir == NULL)
+                return;
+
+        remove_proc_entry ("read_bytes",   xprocfs_dir);
+        remove_proc_entry ("read_reqs",    xprocfs_dir);
+        remove_proc_entry ("write_bytes",  xprocfs_dir);
+        remove_proc_entry ("write_reqs",   xprocfs_dir);
+        remove_proc_entry ("getattr_reqs", xprocfs_dir);
+        remove_proc_entry ("setattr_reqs", xprocfs_dir);
+        remove_proc_entry ("create_reqs",  xprocfs_dir);
+        remove_proc_entry ("destroy_reqs", xprocfs_dir);
+        remove_proc_entry ("statfs_reqs",  xprocfs_dir);
+        remove_proc_entry ("open_reqs",    xprocfs_dir);
+        remove_proc_entry ("close_reqs",   xprocfs_dir);
+        remove_proc_entry ("punch_reqs",   xprocfs_dir);
+
+        remove_proc_entry (xprocfs_dir->name, xprocfs_dir->parent);
+        xprocfs_dir = NULL;
+}
+
 #define S_SHIFT 12
 static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = {
         [0]                     NULL,
@@ -67,6 +201,83 @@ static inline const char *obd_mode_to_type(int mode)
         return obd_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
 }
 
+static void filter_last_rcvd_cb(struct obd_device *obd, __u64 last_rcvd,
+                                int error)
+{
+        CDEBUG(D_HA, "got callback for last_rcvd "LPD64": rc = %d\n",
+               last_rcvd, error);
+        if (!error && last_rcvd > obd->obd_last_committed)
+                obd->obd_last_committed = last_rcvd;
+}
+
+void filter_start_transno(struct obd_export *export)
+{
+        struct obd_device * obd = export->exp_obd;
+        ENTRY;
+
+        down(&obd->u.filter.fo_transno_sem);
+}
+
+/* Assumes caller has already pushed us into the kernel context. */
+int filter_finish_transno(struct obd_export *export, void *handle,
+                          struct obd_trans_info *oti, int rc)
+{
+        __u64 last_rcvd;
+        struct obd_device *obd = export->exp_obd;
+        struct filter_obd *filter = &obd->u.filter;
+        struct filter_export_data *fed = &export->exp_filter_data;
+        struct filter_client_data *fcd = fed->fed_fcd;
+        loff_t off;
+        ssize_t written;
+
+        /* Propagate error code. */
+        if (rc)
+                GOTO(out, rc);
+
+        /* we don't allocate new transnos for replayed requests */
+#if 0
+        /* perhaps if transno already set? or should level be in oti? */
+        if (req->rq_level == LUSTRE_CONN_RECOVD)
+                GOTO(out, rc = 0);
+#endif
+
+        off = FILTER_LR_CLIENT_START + fed->fed_lr_off * FILTER_LR_CLIENT_SIZE;
+
+        last_rcvd = ++filter->fo_fsd->fsd_last_rcvd;
+        if (oti)
+                oti->oti_transno = last_rcvd;
+        fcd->fcd_last_rcvd = cpu_to_le64(last_rcvd);
+        fcd->fcd_mount_count = cpu_to_le64(filter->fo_fsd->fsd_mount_count);
+
+        /* get this from oti */
+#if 0
+        if (oti)
+                fcd->fcd_last_xid = cpu_to_le64(oti->oti_xid);
+        else
+#else
+        fcd->fcd_last_xid = 0;
+#endif
+        fsfilt_set_last_rcvd(obd, last_rcvd, handle, filter_last_rcvd_cb);
+        written = lustre_fwrite(filter->fo_rcvd_filp, (char *)fcd, sizeof(*fcd),
+                                &off);
+        CDEBUG(D_INODE, "wrote trans #"LPD64" for client %s at #%d: written = "
+               LPSZ"\n", last_rcvd, fcd->fcd_uuid, fed->fed_lr_off, written);
+
+        if (written == sizeof(*fcd))
+                GOTO(out, rc = 0);
+        CERROR("error writing to last_rcvd file: rc = %d\n", rc);
+        if (written >= 0)
+                GOTO(out, rc = -EIO);
+
+        rc = 0;
+
+        EXIT;
+ out:
+
+        up(&filter->fo_transno_sem);
+        return rc;
+}
+
 /* write the pathname into the string */
 static int filter_id(char *buf, obd_id id, obd_mode mode)
 {
@@ -95,6 +306,336 @@ struct dentry_operations filter_dops = {
 };
 
 #define LAST_RCVD "last_rcvd"
+#define INIT_OBJID 2
+
+/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
+#define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8)
+#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long))
+
+static unsigned long filter_last_rcvd_slots[FILTER_LR_MAX_CLIENT_WORDS];
+
+/* Add client data to the FILTER.  We use a bitmap to locate a free space
+ * in the last_rcvd file if cl_off is -1 (i.e. a new client).
+ * Otherwise, we have just read the data from the last_rcvd file and
+ * we know its offset.
+ */
+int filter_client_add(struct filter_obd *filter,
+                      struct filter_export_data *fed, int cl_off)
+{
+        int new_client = (cl_off == -1);
+
+        /* the bitmap operations can handle cl_off > sizeof(long) * 8, so
+         * there's no need for extra complication here
+         */
+        if (new_client) {
+                cl_off = find_first_zero_bit(filter_last_rcvd_slots,
+                                             FILTER_LR_MAX_CLIENTS);
+        repeat:
+                if (cl_off >= FILTER_LR_MAX_CLIENTS) {
+                        CERROR("no client slots - fix FILTER_LR_MAX_CLIENTS\n");
+                        return -ENOMEM;
+                }
+                if (test_and_set_bit(cl_off, filter_last_rcvd_slots)) {
+                        CERROR("FILTER client %d: found bit is set in bitmap\n",
+                               cl_off);
+                        cl_off = find_next_zero_bit(filter_last_rcvd_slots,
+                                                    FILTER_LR_MAX_CLIENTS,
+                                                    cl_off);
+                        goto repeat;
+                }
+        } else {
+                if (test_and_set_bit(cl_off, filter_last_rcvd_slots)) {
+                        CERROR("FILTER client %d: bit already set in bitmap!\n",
+                               cl_off);
+                        LBUG();
+                }
+        }
+
+        CDEBUG(D_INFO, "client at offset %d with UUID '%s' added\n",
+               cl_off, fed->fed_fcd->fcd_uuid);
+
+        fed->fed_lr_off = cl_off;
+
+        if (new_client) {
+                struct obd_run_ctxt saved;
+                loff_t off = FILTER_LR_CLIENT_START +
+                        (cl_off * FILTER_LR_CLIENT_SIZE);
+                ssize_t written;
+
+                push_ctxt(&saved, &filter->fo_ctxt, NULL);
+                written = lustre_fwrite(filter->fo_rcvd_filp,
+                                                (char *)fed->fed_fcd,
+                                                sizeof(*fed->fed_fcd), &off);
+                pop_ctxt(&saved, &filter->fo_ctxt, NULL);
+
+                if (written != sizeof(*fed->fed_fcd)) {
+                        if (written < 0)
+                                RETURN(written);
+                        RETURN(-EIO);
+                }
+                CDEBUG(D_INFO, "wrote client fcd at off %u (len %u)\n",
+                       FILTER_LR_CLIENT_START + (cl_off*FILTER_LR_CLIENT_SIZE),
+                       (unsigned int)sizeof(*fed->fed_fcd));
+        }
+        return 0;
+}
+
+int filter_client_free(struct obd_export *exp)
+{
+        struct filter_export_data *fed = &exp->exp_filter_data;
+        struct filter_obd *filter = &exp->exp_obd->u.filter;
+        struct filter_client_data zero_fcd;
+        struct obd_run_ctxt saved;
+        int written;
+        loff_t off;
+
+        if (!fed->fed_fcd)
+                RETURN(0);
+
+        off = FILTER_LR_CLIENT_START + (fed->fed_lr_off*FILTER_LR_CLIENT_SIZE);
+
+        CDEBUG(D_INFO, "freeing client at offset %u (%lld)with UUID '%s'\n",
+               fed->fed_lr_off, off, fed->fed_fcd->fcd_uuid);
+
+        if (!test_and_clear_bit(fed->fed_lr_off, filter_last_rcvd_slots)) {
+                CERROR("FILTER client %u: bit already clear in bitmap!!\n",
+                       fed->fed_lr_off);
+                LBUG();
+        }
+
+        memset(&zero_fcd, 0, sizeof zero_fcd);
+        push_ctxt(&saved, &filter->fo_ctxt, NULL);
+        written = lustre_fwrite(filter->fo_rcvd_filp, (const char *)&zero_fcd,
+                                sizeof(zero_fcd), &off);
+
+        /* XXX: this write gets lost sometimes, unless this sync is here. */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        fsync_dev(filter->fo_rcvd_filp->f_dentry->d_inode->i_rdev);
+#else
+        file_fsync(filter->fo_rcvd_filp,  filter->fo_rcvd_filp->f_dentry, 1);
+#endif
+        pop_ctxt(&saved, &filter->fo_ctxt, NULL);
+
+        if (written != sizeof(zero_fcd)) {
+                CERROR("error zeroing out client %s off %d in %s: %d\n",
+                       fed->fed_fcd->fcd_uuid, fed->fed_lr_off, LAST_RCVD,
+                       written);
+        } else {
+                CDEBUG(D_INFO,
+                       "zeroed disconnecting client %s at off %d ("LPX64")\n",
+                       fed->fed_fcd->fcd_uuid, fed->fed_lr_off, off);
+        }
+
+        OBD_FREE(fed->fed_fcd, sizeof(*fed->fed_fcd));
+
+        return 0;
+}
+
+static void filter_unpack_fsd(struct filter_server_data *fsd)
+{
+        fsd->fsd_last_objid = le64_to_cpu(fsd->fsd_last_objid);
+        fsd->fsd_last_rcvd = le64_to_cpu(fsd->fsd_last_rcvd);
+        fsd->fsd_mount_count = le64_to_cpu(fsd->fsd_mount_count);
+}
+
+static void filter_pack_fsd(struct filter_server_data *disk_fsd,
+                            struct filter_server_data *fsd)
+{
+        memset(disk_fsd, 0, sizeof(*disk_fsd));
+        memcpy(disk_fsd->fsd_uuid, fsd->fsd_uuid, sizeof(fsd->fsd_uuid));
+        disk_fsd->fsd_last_objid = cpu_to_le64(fsd->fsd_last_objid);
+        disk_fsd->fsd_last_rcvd = cpu_to_le64(fsd->fsd_last_rcvd);
+        disk_fsd->fsd_mount_count = cpu_to_le64(fsd->fsd_mount_count);
+}
+
+static int filter_free_server_data(struct filter_obd *filter)
+{
+        OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd));
+        filter->fo_fsd = NULL;
+
+        return 0;
+}
+
+
+/* assumes caller has already in kernel ctxt */
+static int filter_update_server_data(struct file *filp,
+                                     struct filter_server_data *fsd)
+{
+        struct filter_server_data disk_fsd;
+        loff_t off = 0;
+        int rc;
+
+        CDEBUG(D_INODE, "server uuid      : %s\n", fsd->fsd_uuid);
+        CDEBUG(D_INODE, "server last_objid: "LPU64"\n", fsd->fsd_last_objid);
+        CDEBUG(D_INODE, "server last_rcvd : "LPU64"\n", fsd->fsd_last_rcvd);
+        CDEBUG(D_INODE, "server last_mount: "LPU64"\n", fsd->fsd_mount_count);
+
+        filter_pack_fsd(&disk_fsd, fsd);
+        rc = lustre_fwrite(filp, (char *)&disk_fsd,
+                           sizeof(disk_fsd), &off);
+        if (rc != sizeof(disk_fsd)) {
+                CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n",
+                       rc);
+                RETURN(-EIO);
+        }
+        RETURN(0);
+}
+
+/* assumes caller has already in kernel ctxt */
+static int filter_init_server_data(struct obd_device *obd,
+                                   struct file * filp,
+                                   __u64 init_lastobjid)
+{
+        struct filter_obd *filter = &obd->u.filter;
+        struct filter_server_data *fsd;
+        struct filter_client_data *fcd = NULL;
+        struct inode *inode = filp->f_dentry->d_inode;
+        unsigned long last_rcvd_size = inode->i_size;
+        int cl_off;
+        loff_t off = 0;
+        int rc;
+
+        /* ensure padding in the struct is the correct size */
+        LASSERT (offsetof(struct filter_server_data, fsd_padding) +
+                 sizeof(fsd->fsd_padding) == FILTER_LR_SERVER_SIZE);
+        LASSERT (offsetof(struct filter_client_data, fcd_padding) +
+                 sizeof(fcd->fcd_padding) == FILTER_LR_CLIENT_SIZE);
+
+        OBD_ALLOC(fsd, sizeof(*fsd));
+        if (!fsd)
+                RETURN(-ENOMEM);
+        filter->fo_fsd = fsd;
+
+        if (last_rcvd_size == 0) {
+                CERROR("%s: initializing new last_rcvd\n", obd->obd_name);
+
+                memcpy(fsd->fsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->fsd_uuid));
+                fsd->fsd_last_objid = init_lastobjid;
+                fsd->fsd_last_rcvd = 0;
+                fsd->fsd_mount_count = 0;
+
+        } else {
+                ssize_t  retval = lustre_fread(filp, (char *)fsd,
+                                              sizeof(*fsd),
+                                              &off);
+                if (retval != sizeof(*fsd)) {
+                        CDEBUG(D_INODE,"OBD filter: error reading lastobjid\n");
+                        GOTO(out, rc = -EIO);
+                }
+                filter_unpack_fsd(fsd);
+        }
+
+        CDEBUG(D_INODE, "%s: server last_objid: "LPU64"\n",
+               obd->obd_name, fsd->fsd_last_objid);
+        CDEBUG(D_INODE, "%s: server last_rcvd : "LPU64"\n",
+               obd->obd_name, fsd->fsd_last_rcvd);
+        CDEBUG(D_INODE, "%s: server last_mount: "LPU64"\n",
+               obd->obd_name, fsd->fsd_mount_count);
+
+        /*
+         * When we do a clean FILTER shutdown, we save the last_rcvd into
+         * the header.  If we find clients with higher last_rcvd values
+         * then those clients may need recovery done.
+         */
+        /* off is adjusted by lustre_fread, so we don't adjust it in the loop */
+       for (off = FILTER_LR_CLIENT_START, cl_off = 0; off < last_rcvd_size;
+            cl_off++) {
+                __u64 last_rcvd;
+                int mount_age;
+
+                if (!fcd) {
+                        OBD_ALLOC(fcd, sizeof(*fcd));
+                        if (!fcd)
+                                GOTO(err_fsd, rc = -ENOMEM);
+                }
+
+                rc = lustre_fread(filp, (char *)fcd, sizeof(*fcd), &off);
+                if (rc != sizeof(*fcd)) {
+                        CERROR("error reading FILTER %s offset %d: rc = %d\n",
+                               LAST_RCVD, cl_off, rc);
+                        if (rc > 0) /* XXX fatal error or just abort reading? */
+                                rc = -EIO;
+                        break;
+                }
+
+                if (fcd->fcd_uuid[0] == '\0') {
+                        CDEBUG(D_INFO, "skipping zeroed client at offset %d\n",
+                               cl_off);
+                        continue;
+                }
+
+                last_rcvd = le64_to_cpu(fcd->fcd_last_rcvd);
+
+                /* These exports are cleaned up by filter_disconnect(), so they
+                 * need to be set up like real exports as filter_connect() does.
+                 */
+                mount_age = fsd->fsd_mount_count -
+                        le64_to_cpu(fcd->fcd_mount_count);
+                if (mount_age < FILTER_MOUNT_RECOV) {
+                        CERROR("RCVRNG CLIENT uuid: %s off: %d lr: "LPU64
+                               "srv lr: "LPU64" mnt: "LPU64" last mount: "LPU64
+                               "\n", fcd->fcd_uuid, cl_off,
+                               last_rcvd, fsd->fsd_last_rcvd,
+                               le64_to_cpu(fcd->fcd_mount_count),
+                               fsd->fsd_mount_count);
+#if 0
+                        /* disabled until OST recovery is actually working */
+                        struct obd_export *exp = class_new_export(obd);
+                        struct filter_export_data *fed;
+
+                        if (!exp) {
+                                rc = -ENOMEM;
+                                break;
+                        }
+
+                        fed = &exp->exp_filter_data;
+                        fed->fed_fcd = fcd;
+                        filter_client_add(filter, fed, cl_off);
+                        /* create helper if export init gets more complex */
+                        INIT_LIST_HEAD(&fed->fed_open_head);
+                        spin_lock_init(&fed->fed_lock);
+
+                        fcd = NULL;
+                        filter->fo_recoverable_clients++;
+#endif
+                } else {
+                        CDEBUG(D_INFO,
+                               "discarded client %d, UUID '%s', count %Ld\n",
+                               cl_off, fcd->fcd_uuid,
+                               (long long)le64_to_cpu(fcd->fcd_mount_count));
+                }
+
+                CDEBUG(D_OTHER, "client at offset %d has last_rcvd = %Lu\n",
+                       cl_off, (unsigned long long)last_rcvd);
+
+                if (last_rcvd > filter->fo_fsd->fsd_last_rcvd)
+                        filter->fo_fsd->fsd_last_rcvd = last_rcvd;
+        }
+
+        obd->obd_last_committed = filter->fo_fsd->fsd_last_rcvd;
+        if (filter->fo_recoverable_clients) {
+                CERROR("RECOVERY: %d recoverable clients, last_rcvd "LPU64"\n",
+                       filter->fo_recoverable_clients,
+                       filter->fo_fsd->fsd_last_rcvd);
+                filter->fo_next_recovery_transno = obd->obd_last_committed + 1;
+                obd->obd_flags |= OBD_RECOVERING;
+        }
+
+        if (fcd)
+                OBD_FREE(fcd, sizeof(*fcd));
+
+        fsd->fsd_mount_count++;
+
+        /* save it,so mount count and last_recvd is current */
+        rc = filter_update_server_data(filp, filter->fo_fsd);
+
+out:
+        RETURN(rc);
+
+err_fsd:
+        filter_free_server_data(filter);
+        RETURN(rc);
+}
 
 /* setup the object store with correct subdirectories */
 static int filter_prep(struct obd_device *obd)
@@ -105,7 +646,6 @@ static int filter_prep(struct obd_device *obd)
         struct file *file;
         struct inode *inode;
         int rc = 0;
-        __u64 lastobjid = 2;
         int mode = 0;
 
         push_ctxt(&saved, &filter->fo_ctxt, NULL);
@@ -147,36 +687,29 @@ static int filter_prep(struct obd_device *obd)
                 GOTO(out_O_mode, rc);
         }
 
+        if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
+                CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD,
+                       file->f_dentry->d_inode->i_mode);
+                GOTO(err_filp, rc = -ENOENT);
+        }
+
+        rc = fsfilt_journal_data(obd, file);
+        if (rc) {
+                CERROR("cannot journal data on %s: rc = %d\n", LAST_RCVD, rc);
+                GOTO(err_filp, rc);
+        }
         /* steal operations */
         inode = file->f_dentry->d_inode;
         filter->fo_fop = file->f_op;
         filter->fo_iop = inode->i_op;
         filter->fo_aops = inode->i_mapping->a_ops;
 
-        if (inode->i_size == 0) {
-                __u64 disk_lastobjid = cpu_to_le64(lastobjid);
-                ssize_t retval = file->f_op->write(file,(char *)&disk_lastobjid,
-                                                   sizeof(disk_lastobjid),
-                                                   &file->f_pos);
-                if (retval != sizeof(disk_lastobjid)) {
-                        CDEBUG(D_INODE,"OBD filter: error writing lastobjid\n");
-                        filp_close(file, 0);
-                        GOTO(out_O_mode, rc = -EIO);
-                }
-        } else {
-                __u64 disk_lastobjid;
-                ssize_t retval = file->f_op->read(file, (char *)&disk_lastobjid,
-                                                  sizeof(disk_lastobjid),
-                                                  &file->f_pos);
-                if (retval != sizeof(disk_lastobjid)) {
-                        CDEBUG(D_INODE,"OBD filter: error reading lastobjid\n");
-                        filp_close(file, 0);
-                        GOTO(out_O_mode, rc = -EIO);
-                }
-                lastobjid = le64_to_cpu(disk_lastobjid);
+        rc = filter_init_server_data(obd, file, INIT_OBJID);
+        if (rc) {
+                CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
+                GOTO(err_client, rc);
         }
-        filter->fo_lastobjid = lastobjid;
-        filp_close(file, 0);
+        filter->fo_rcvd_filp = file;
 
         rc = 0;
  out:
@@ -184,6 +717,12 @@ static int filter_prep(struct obd_device *obd)
 
         return(rc);
 
+err_client:
+        class_disconnect_all(obd);
+err_filp:
+        if (filp_close(file, 0))
+                CERROR("can't close %s after error\n", LAST_RCVD);
+        filter->fo_rcvd_filp = NULL;
  out_O_mode:
         while (mode-- > 0) {
                 struct dentry *dentry = filter->fo_dentry_O_mode[mode];
@@ -202,28 +741,33 @@ static void filter_post(struct obd_device *obd)
 {
         struct obd_run_ctxt saved;
         struct filter_obd *filter = &obd->u.filter;
-        __u64 disk_lastobjid;
         long rc;
-        struct file *file;
         int mode;
 
-        push_ctxt(&saved, &filter->fo_ctxt, NULL);
-        file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700);
-        if (IS_ERR(file)) {
-                CERROR("OBD filter: cannot create %s\n", LAST_RCVD);
-                goto out;
-        }
+        /* XXX: filter_update_lastobjid used to call fsync_dev.  It might be
+         * best to start a transaction with h_sync, because we removed this
+         * from lastobjid */
 
-        file->f_pos = 0;
-        disk_lastobjid = cpu_to_le64(filter->fo_lastobjid);
-        rc = file->f_op->write(file, (char *)&disk_lastobjid,
-                       sizeof(disk_lastobjid), &file->f_pos);
-        if (rc != sizeof(disk_lastobjid))
+        push_ctxt(&saved, &filter->fo_ctxt, NULL);
+        rc = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
+        if (rc)
                 CERROR("OBD filter: error writing lastobjid: rc = %ld\n", rc);
+        filter_free_server_data(filter);
 
-        rc = filp_close(file, NULL);
-        if (rc)
-                CERROR("OBD filter: cannot close status file: rc = %ld\n", rc);
+
+        if (filter->fo_rcvd_filp) {
+                /* broken sync at umount bug workaround  */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+                rc = fsync_dev(filter->fo_rcvd_filp->f_dentry->d_inode->i_rdev);
+#else
+                rc = file_fsync(filter->fo_rcvd_filp,
+                                filter->fo_rcvd_filp->f_dentry, 1);
+#endif
+                filp_close(filter->fo_rcvd_filp, 0);
+                filter->fo_rcvd_filp = NULL;
+                if (rc)
+                        CERROR("last_rcvd file won't closek rc = %ld\n", rc);
+        }
 
         for (mode = 0; mode < (S_IFMT >> S_SHIFT); mode++) {
                 struct dentry *dentry = filter->fo_dentry_O_mode[mode];
@@ -233,7 +777,6 @@ static void filter_post(struct obd_device *obd)
                 }
         }
         f_dput(filter->fo_dentry_O);
-out:
         pop_ctxt(&saved, &filter->fo_ctxt, NULL);
 }
 
@@ -241,9 +784,10 @@ out:
 static __u64 filter_next_id(struct obd_device *obd)
 {
         obd_id id;
+        LASSERT(obd->u.filter.fo_fsd != NULL);
 
         spin_lock(&obd->u.filter.fo_objidlock);
-        id = ++obd->u.filter.fo_lastobjid;
+        id = ++obd->u.filter.fo_fsd->fsd_last_objid;
         spin_unlock(&obd->u.filter.fo_objidlock);
 
         return id;
@@ -253,7 +797,7 @@ static __u64 filter_next_id(struct obd_device *obd)
 /* parent i_sem is already held if needed for exclusivity */
 static struct dentry *filter_fid2dentry(struct obd_device *obd,
                                         struct dentry *dparent,
-                                        __u64 id, int locked)
+                                        __u64 id, int lockit)
 {
         struct super_block *sb = obd->u.filter.fo_sb;
         struct dentry *dchild;
@@ -273,13 +817,13 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd,
         }
 
         len = sprintf(name, LPU64, id);
-        CDEBUG(D_INODE, "opening object O/%*s/%s\n",
+        CDEBUG(D_INODE, "looking up object O/%*s/%s\n",
                dparent->d_name.len, dparent->d_name.name, name);
-        //if (!locked)
-                //down(&dparent->d_inode->i_sem);
+        if (lockit)
+                down(&dparent->d_inode->i_sem);
         dchild = lookup_one_len(name, dparent, len);
-        //if (!locked)
-                //up(&dparent->d_inode->i_sem);
+        if (lockit)
+                up(&dparent->d_inode->i_sem);
         if (IS_ERR(dchild)) {
                 CERROR("child lookup error %ld\n", PTR_ERR(dchild));
                 RETURN(dchild);
@@ -389,9 +933,9 @@ static struct file *filter_obj_open(struct obd_export *export,
         spin_unlock(&fed->fed_lock);
 
         CDEBUG(D_INODE, "opened objid "LPX64": rc = %p\n", id, file);
-
+        EXIT;
 out:
-        RETURN(file);
+        return file;
 
 out_fdd:
         kmem_cache_free(filter_dentry_cache, fdd);
@@ -402,11 +946,11 @@ out_ffd:
 }
 
 /* Caller must hold i_sem on dir_dentry->d_inode */
+/* Caller must push us into kernel context */
 static int filter_destroy_internal(struct obd_device *obd,
                                    struct dentry *dir_dentry,
                                    struct dentry *object_dentry)
 {
-        struct obd_run_ctxt saved;
         struct inode *inode = object_dentry->d_inode;
         int rc;
         ENTRY;
@@ -418,9 +962,7 @@ static int filter_destroy_internal(struct obd_device *obd,
                        inode->i_nlink, atomic_read(&inode->i_count));
         }
 
-        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
         rc = vfs_unlink(dir_dentry->d_inode, object_dentry);
-        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
 
         if (rc)
                 CERROR("error unlinking objid %*s: rc %d\n",
@@ -430,13 +972,16 @@ static int filter_destroy_internal(struct obd_device *obd,
         RETURN(rc);
 }
 
-static int filter_close_internal(struct obd_device *obd,
-                                 struct filter_file_data *ffd)
+static int filter_close_internal(struct obd_export *export,
+                                 struct filter_file_data *ffd,
+                                 struct obd_trans_info *oti)
 {
+        struct obd_device *obd = export->exp_obd;
+        struct filter_obd *filter = &obd->u.filter;
         struct file *filp = ffd->ffd_file;
         struct dentry *object_dentry = dget(filp->f_dentry);
         struct filter_dentry_data *fdd = object_dentry->d_fsdata;
-        int rc, rc2 = 0;
+        int rc, rc2;
         ENTRY;
 
         LASSERT(filp->private_data == ffd);
@@ -447,14 +992,32 @@ static int filter_close_internal(struct obd_device *obd,
         if (atomic_dec_and_test(&fdd->fdd_open_count) &&
             fdd->fdd_flags & FILTER_FLAG_DESTROY) {
                 struct dentry *dir_dentry = filter_parent(obd, S_IFREG);
+                struct obd_run_ctxt saved;
+                void *handle;
 
                 down(&dir_dentry->d_inode->i_sem);
-                /* XXX start transaction */
+                push_ctxt(&saved, &filter->fo_ctxt, NULL);
+                filter_start_transno(export);
+                handle = fsfilt_start(obd, dir_dentry->d_inode,
+                                      FSFILT_OP_UNLINK);
+                if (IS_ERR(handle)) {
+                        rc = filter_finish_transno(export, handle, oti,
+                                                   PTR_ERR(handle));
+                        GOTO(out, rc);
+                }
                 /* XXX unlink from PENDING directory now too */
                 rc2 = filter_destroy_internal(obd, dir_dentry, object_dentry);
-                /* XXX finish transaction */
                 if (rc2 && !rc)
                         rc = rc2;
+                rc = filter_finish_transno(export, handle, oti, rc);
+                rc2 = fsfilt_commit(obd, dir_dentry->d_inode, handle);
+                if (rc2) {
+                        CERROR("error on commit, err = %d\n", rc2);
+                        if (!rc)
+                                rc = rc2;
+                }
+        out:
+                pop_ctxt(&saved, &filter->fo_ctxt, NULL);
                 up(&dir_dentry->d_inode->i_sem);
         }
 
@@ -474,20 +1037,22 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf)
         int rc = 0;
         ENTRY;
 
-        MOD_INC_USE_COUNT;
         if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2)
-                GOTO(err_dec, rc = -EINVAL);
+                RETURN(rc = -EINVAL);
 
         obd->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
         if (IS_ERR(obd->obd_fsops))
-                GOTO(err_dec, rc = PTR_ERR(obd->obd_fsops));
+                RETURN(rc = PTR_ERR(obd->obd_fsops));
 
         mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, NULL);
         rc = PTR_ERR(mnt);
         if (IS_ERR(mnt))
                 GOTO(err_ops, rc);
 
+        obd->obd_flags |= OBD_REPLAYABLE;
+
         filter = &obd->u.filter;;
+        init_MUTEX(&filter->fo_transno_sem);
         filter->fo_vfsmnt = mnt;
         filter->fo_fstype = strdup(data->ioc_inlbuf2);
         filter->fo_sb = mnt->mnt_root->d_inode->i_sb;
@@ -526,8 +1091,6 @@ err_kfree:
         lock_kernel();
 err_ops:
         fsfilt_put_ops(obd->obd_fsops);
-err_dec:
-        MOD_DEC_USE_COUNT;
         return rc;
 }
 
@@ -563,45 +1126,73 @@ static int filter_cleanup(struct obd_device *obd)
 
         lock_kernel();
 
-        MOD_DEC_USE_COUNT;
         RETURN(0);
 }
 
 int filter_attach(struct obd_device *dev, obd_count len, void *data)
 {
-        return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_vars(&lvars);
+        return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
 int filter_detach(struct obd_device *dev)
 {
-        return lprocfs_dereg_obd(dev);
+        return lprocfs_obd_detach(dev);
 }
 
+/* nearly identical to mds_connect */
 static int filter_connect(struct lustre_handle *conn, struct obd_device *obd,
-                          obd_uuid_t cluuid, struct recovd_obd *recovd,
+                          struct obd_uuid *cluuid, struct recovd_obd *recovd,
                           ptlrpc_recovery_cb_t recover)
 {
         struct obd_export *exp;
+        struct filter_export_data *fed;
+        struct filter_client_data *fcd;
+        struct filter_obd *filter = &obd->u.filter;
         int rc;
 
         ENTRY;
-        MOD_INC_USE_COUNT;
+
+        if (!conn || !obd || !cluuid)
+                RETURN(-EINVAL);
+
         rc = class_connect(conn, obd, cluuid);
         if (rc)
-                GOTO(out_dec, rc);
+                RETURN(rc);
         exp = class_conn2export(conn);
         LASSERT(exp);
+        fed = &exp->exp_filter_data;
+
+        OBD_ALLOC(fcd, sizeof(*fcd));
+        if (!fcd) {
+                CERROR("filter: out of memory for client data\n");
+                GOTO(out_export, rc = -ENOMEM);
+        }
+
+        memcpy(fcd->fcd_uuid, cluuid, sizeof(fcd->fcd_uuid));
+        fed->fed_fcd = fcd;
+        fcd->fcd_mount_count = cpu_to_le64(filter->fo_fsd->fsd_mount_count);
 
         INIT_LIST_HEAD(&exp->exp_filter_data.fed_open_head);
         spin_lock_init(&exp->exp_filter_data.fed_lock);
-out:
+
+        rc = filter_client_add(filter, fed, -1);
+        if (rc)
+                GOTO(out_fcd, rc);
+
         RETURN(rc);
 
-out_dec:
-        MOD_DEC_USE_COUNT;
-        goto out;
+out_fcd:
+        OBD_FREE(fcd, sizeof(*fcd));
+out_export:
+        class_disconnect(conn);
+
+        RETURN(rc);
 }
 
+/* also incredibly similar to mds_disconnect */
 static int filter_disconnect(struct lustre_handle *conn)
 {
         struct obd_export *exp = class_conn2export(conn);
@@ -620,19 +1211,20 @@ static int filter_disconnect(struct lustre_handle *conn)
                 list_del(&ffd->ffd_export_list);
                 spin_unlock(&fed->fed_lock);
 
-                CERROR("force closing file %*s on disconnect\n",
+                CERROR("force close file %*s (hdl %p:"LPX64") on disconnect\n",
                        ffd->ffd_file->f_dentry->d_name.len,
-                       ffd->ffd_file->f_dentry->d_name.name);
+                       ffd->ffd_file->f_dentry->d_name.name,
+                       ffd, ffd->ffd_servercookie);
 
-                filter_close_internal(exp->exp_obd, ffd);
+                filter_close_internal(exp, ffd, NULL);
                 spin_lock(&fed->fed_lock);
         }
         spin_unlock(&fed->fed_lock);
 
         ldlm_cancel_locks_for_export(exp);
+        filter_client_free(exp);
+
         rc = class_disconnect(conn);
-        if (!rc)
-                MOD_DEC_USE_COUNT;
 
         /* XXX cleanup preallocated inodes */
         RETURN(rc);
@@ -709,6 +1301,7 @@ static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
         if (!dentry->d_inode) {
                 CERROR("%s on non-existent object: "LPX64"\n", what, oa->o_id);
                 f_dput(dentry);
+                LBUG();
                 RETURN(ERR_PTR(-ENOENT));
         }
 
@@ -725,7 +1318,9 @@ static int filter_getattr(struct lustre_handle *conn, struct obdo *oa,
         int rc = 0;
         ENTRY;
 
-        dentry = filter_oa2dentry(conn, oa, 0);
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_getattr_reqs, 1);
+
+        dentry = filter_oa2dentry(conn, oa, 1);
         if (IS_ERR(dentry))
                 RETURN(PTR_ERR(dentry));
 
@@ -737,16 +1332,21 @@ static int filter_getattr(struct lustre_handle *conn, struct obdo *oa,
 
 /* this is called from filter_truncate() until we have filter_punch() */
 static int filter_setattr(struct lustre_handle *conn, struct obdo *oa,
-                          struct lov_stripe_md *md)
+                          struct lov_stripe_md *md, struct obd_trans_info *oti)
 {
         struct obd_run_ctxt saved;
+        struct obd_export *export = class_conn2export(conn);
         struct obd_device *obd = class_conn2obd(conn);
+        struct filter_obd *filter = &obd->u.filter;
         struct dentry *dentry;
         struct iattr iattr;
         struct inode *inode;
-        int rc;
+        void * handle;
+        int rc, rc2;
         ENTRY;
 
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_setattr_reqs, 1);
+
         dentry = filter_oa2dentry(conn, oa, 0);
 
         if (IS_ERR(dentry))
@@ -756,17 +1356,29 @@ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa,
         iattr.ia_mode = (iattr.ia_mode & ~S_IFMT) | S_IFREG;
         inode = dentry->d_inode;
 
-        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        push_ctxt(&saved, &filter->fo_ctxt, NULL);
         lock_kernel();
         if (iattr.ia_valid & ATTR_SIZE)
                 down(&inode->i_sem);
 
-        /* XXX start transaction */
+        filter_start_transno(export);
+        handle = fsfilt_start(obd, dentry->d_inode, FSFILT_OP_SETATTR);
+        if (IS_ERR(handle)) {
+                rc = filter_finish_transno(export, handle, oti,PTR_ERR(handle));
+                GOTO(out_unlock, rc);
+        }
+
         if (inode->i_op->setattr)
                 rc = inode->i_op->setattr(dentry, &iattr);
         else
                 rc = inode_setattr(inode, &iattr);
-        /* XXX update last_rcvd, finish transaction */
+        rc = filter_finish_transno(export, handle, oti, rc);
+        rc2 = fsfilt_commit(obd, dentry->d_inode, handle);
+        if (rc2) {
+                CERROR("error on commit, err = %d\n", rc2);
+                if (!rc)
+                        rc = rc2;
+        }
 
         if (iattr.ia_valid & ATTR_SIZE) {
                 up(&inode->i_sem);
@@ -774,15 +1386,16 @@ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa,
                 obdo_from_inode(oa, inode, oa->o_valid);
         }
 
+out_unlock:
         unlock_kernel();
-        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        pop_ctxt(&saved, &filter->fo_ctxt, NULL);
 
         f_dput(dentry);
         RETURN(rc);
 }
 
 static int filter_open(struct lustre_handle *conn, struct obdo *oa,
-                       struct lov_stripe_md *ea)
+                       struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
         struct obd_export *export;
         struct lustre_handle *handle;
@@ -797,6 +1410,8 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_open_reqs, 1);
+
         filp = filter_obj_open(export, oa->o_id, oa->o_mode);
         if (IS_ERR(filp))
                 GOTO(out, rc = PTR_ERR(filp));
@@ -814,7 +1429,7 @@ out:
 } /* filter_open */
 
 static int filter_close(struct lustre_handle *conn, struct obdo *oa,
-                        struct lov_stripe_md *ea)
+                        struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
         struct obd_export *exp;
         struct filter_file_data *ffd;
@@ -828,6 +1443,8 @@ static int filter_close(struct lustre_handle *conn, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_close_reqs, 1);
+
         if (!(oa->o_valid & OBD_MD_FLHANDLE)) {
                 CERROR("no handle for close of objid "LPX64"\n", oa->o_id);
                 RETURN(-EINVAL);
@@ -846,20 +1463,23 @@ static int filter_close(struct lustre_handle *conn, struct obdo *oa,
         list_del(&ffd->ffd_export_list);
         spin_unlock(&fed->fed_lock);
 
-        rc = filter_close_internal(exp->exp_obd, ffd);
+        rc = filter_close_internal(exp, ffd, oti);
 
         RETURN(rc);
 } /* filter_close */
 
 static int filter_create(struct lustre_handle *conn, struct obdo *oa,
-                         struct lov_stripe_md **ea)
+                         struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
+        struct obd_export *export = class_conn2export(conn);
         struct obd_device *obd = class_conn2obd(conn);
+        struct filter_obd *filter = &obd->u.filter;
         struct obd_run_ctxt saved;
         struct dentry *dir_dentry;
         struct dentry *new;
         struct iattr;
-        int rc;
+        void *handle;
+        int err, rc;
         ENTRY;
 
         if (!obd) {
@@ -867,12 +1487,14 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
                 return -EINVAL;
         }
 
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_create_reqs, 1);
+
         oa->o_id = filter_next_id(obd);
 
-        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        push_ctxt(&saved, &filter->fo_ctxt, NULL);
         dir_dentry = filter_parent(obd, oa->o_mode);
         down(&dir_dentry->d_inode->i_sem);
-        new = filter_fid2dentry(obd, dir_dentry, oa->o_id, 1);
+        new = filter_fid2dentry(obd, dir_dentry, oa->o_id, 0);
         if (IS_ERR(new))
                 GOTO(out, rc = PTR_ERR(new));
 
@@ -885,11 +1507,32 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa,
                 GOTO(out, rc = -EEXIST);
         }
 
-        /* XXX start transaction */
+        filter_start_transno(export);
+        handle = fsfilt_start(obd, dir_dentry->d_inode, FSFILT_OP_CREATE);
+        if (IS_ERR(handle)) {
+                rc = filter_finish_transno(export, handle, oti,PTR_ERR(handle));
+                GOTO(out_put, rc);
+        }
         rc = vfs_create(dir_dentry->d_inode, new, oa->o_mode);
         if (rc)
+                CERROR("create failed rc = %d\n", rc);
+
+        rc = filter_finish_transno(export, handle, oti, rc);
+        err = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
+        if (err) {
+                CERROR("unable to write lastobjid but file created\n");
+                if (!rc)
+                        rc = err;
+        }
+        err = fsfilt_commit(obd, dir_dentry->d_inode, handle);
+        if (err) {
+                CERROR("error on commit, err = %d\n", err);
+                if (!rc)
+                        rc = err;
+        }
+
+        if (rc)
                 GOTO(out_put, rc);
-        /* XXX update last_rcvd+lastobjid on disk, finish transaction */
 
         /* Set flags for fields we have set in the inode struct */
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS |
@@ -901,17 +1544,21 @@ out_put:
         f_dput(new);
 out:
         up(&dir_dentry->d_inode->i_sem);
-        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        pop_ctxt(&saved, &filter->fo_ctxt, NULL);
         return rc;
 }
 
 static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
-                          struct lov_stripe_md *ea)
+                          struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
+        struct obd_export *export = class_conn2export(conn);
         struct obd_device *obd = class_conn2obd(conn);
+        struct filter_obd *filter = &obd->u.filter;
         struct dentry *dir_dentry, *object_dentry;
         struct filter_dentry_data *fdd;
-        int rc;
+        struct obd_run_ctxt saved;
+        void *handle;
+        int rc, rc2;
         ENTRY;
 
         if (!obd) {
@@ -919,17 +1566,26 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_destroy_reqs, 1);
+
         CDEBUG(D_INODE, "destroying objid "LPX64"\n", oa->o_id);
 
         dir_dentry = filter_parent(obd, oa->o_mode);
         down(&dir_dentry->d_inode->i_sem);
 
-        object_dentry = filter_oa2dentry(conn, oa, 1);
+        object_dentry = filter_oa2dentry(conn, oa, 0);
         if (IS_ERR(object_dentry))
                 GOTO(out, rc = -ENOENT);
 
+        push_ctxt(&saved, &filter->fo_ctxt, NULL);
+        filter_start_transno(export);
+        handle = fsfilt_start(obd, dir_dentry->d_inode, FSFILT_OP_UNLINK);
+        if (IS_ERR(handle)) {
+                rc = filter_finish_transno(export, handle, oti,PTR_ERR(handle));
+                GOTO(out_ctxt, rc);
+        }
+
         fdd = object_dentry->d_fsdata;
-        /* XXX start transaction */
         if (fdd && atomic_read(&fdd->fdd_open_count)) {
                 if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) {
                         fdd->fdd_flags |= FILTER_FLAG_DESTROY;
@@ -941,12 +1597,22 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
                         CDEBUG(D_INODE,
                                "repeat destroy of %dx open objid "LPX64"\n",
                                atomic_read(&fdd->fdd_open_count), oa->o_id);
-                GOTO(out_dput, rc = 0);
+                GOTO(out_commit, rc = 0);
         }
 
         rc = filter_destroy_internal(obd, dir_dentry, object_dentry);
-out_dput:
-        /* XXX update last_rcvd on disk, finish transaction */
+
+out_commit:
+        /* XXX save last_rcvd on disk */
+        rc = filter_finish_transno(export, handle, oti, rc);
+        rc2 = fsfilt_commit(obd, dir_dentry->d_inode, handle);
+        if (rc2) {
+                CERROR("error on commit, err = %d\n", rc2);
+                if (!rc)
+                        rc = rc2;
+        }
+out_ctxt:
+        pop_ctxt(&saved, &filter->fo_ctxt, NULL);
         f_dput(object_dentry);
 
         EXIT;
@@ -958,18 +1624,21 @@ out:
 /* NB start and end are used for punch, but not truncate */
 static int filter_truncate(struct lustre_handle *conn, struct obdo *oa,
                            struct lov_stripe_md *lsm,
-                           obd_off start, obd_off end)
+                           obd_off start, obd_off end,
+                           struct obd_trans_info *oti)
 {
         int error;
         ENTRY;
 
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_punch_reqs, 1);
+
         if (end != OBD_OBJECT_EOF)
                 CERROR("PUNCH not supported, only truncate works\n");
 
         CDEBUG(D_INODE, "calling truncate for object "LPX64", valid = %x, "
                "o_size = "LPD64"\n", oa->o_id, oa->o_valid, start);
         oa->o_size = start;
-        error = filter_setattr(conn, oa, NULL);
+        error = filter_setattr(conn, oa, NULL, oti);
         RETURN(error);
 }
 
@@ -1054,11 +1723,31 @@ int waitfor_one_page(struct page *page)
 }
 #endif
 
-static int lustre_commit_write(struct page *page, unsigned from, unsigned to)
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+/* We should only change the file mtime (and not the ctime, like
+ * update_inode_times() in generic_file_write()) when we only change data.
+ */
+static inline void inode_update_time(struct inode *inode, int ctime_too)
+{
+        time_t now = CURRENT_TIME;
+        if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now))
+                return;
+        inode->i_mtime = now;
+        if (ctime_too)
+                inode->i_ctime = now;
+        mark_inode_dirty_sync(inode);
+}
+#endif
+
+static int lustre_commit_write(struct niobuf_local *lnb)
 {
+        struct page *page = lnb->page;
+        unsigned from = lnb->offset & ~PAGE_MASK;
+        unsigned to = from + lnb->len;
         struct inode *inode = page->mapping->host;
         int err;
 
+        LASSERT(to <= PAGE_SIZE);
         err = page->mapping->a_ops->commit_write(NULL, page, from, to);
         if (!err && IS_SYNC(inode))
                 err = waitfor_one_page(page);
@@ -1098,7 +1787,7 @@ struct page *filter_get_page_write(struct inode *inode,
          */
         if (!page) {
                 unsigned long addr;
-                CDEBUG(D_PAGE, "ino %lu page %ld locked\n", inode->i_ino,index);
+                CDEBUG(D_ERROR,"ino %lu page %ld locked\n", inode->i_ino,index);
                 addr = __get_free_pages(GFP_KERNEL, 0); /* locked page */
                 if (!addr) {
                         CERROR("no memory for a temp page\n");
@@ -1151,38 +1840,38 @@ err:
  *     pages, and the filesystems mark these buffers as BH_New if they
  *     were newly allocated from disk. We use the BH_New flag similarly.
  */
-static int filter_commit_write(struct page *page, unsigned from, unsigned to,
-                               int err)
+static int filter_commit_write(struct niobuf_local *lnb, int err)
 {
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
         if (err) {
                 unsigned block_start, block_end;
-                struct buffer_head *bh, *head = page->buffers;
+                struct buffer_head *bh, *head = lnb->page->buffers;
                 unsigned blocksize = head->b_size;
-                void *addr = page_address(page);
 
                 /* debugging: just seeing if this ever happens */
                 CERROR("called filter_commit_write for ino %lu:%lu on err %d\n",
-                       page->mapping->host->i_ino, page->index, err);
+                       lnb->page->mapping->host->i_ino, lnb->page->index, err);
 
                 /* Currently one buffer per page, but in the future... */
                 for (bh = head, block_start = 0; bh != head || !block_start;
                      block_start = block_end, bh = bh->b_this_page) {
                         block_end = block_start + blocksize;
                         if (buffer_new(bh))
-                                memset(addr + block_start, 0, blocksize);
+                                memset(lnb->addr + block_start, 0, blocksize);
                 }
         }
 #endif
-        return lustre_commit_write(page, from, to);
+        return lustre_commit_write(lnb);
 }
 
 static int filter_preprw(int cmd, struct lustre_handle *conn,
                          int objcount, struct obd_ioobj *obj,
                          int niocount, struct niobuf_remote *nb,
-                         struct niobuf_local *res, void **desc_private)
+                         struct niobuf_local *res, void **desc_private,
+                         struct obd_trans_info *oti)
 {
         struct obd_run_ctxt saved;
+        struct obd_export *export;
         struct obd_device *obd;
         struct obd_ioobj *o;
         struct niobuf_remote *rnb = nb;
@@ -1194,8 +1883,14 @@ static int filter_preprw(int cmd, struct lustre_handle *conn,
         int i;
         ENTRY;
 
+        if ((cmd & OBD_BRW_WRITE) != 0)
+                XPROCFS_BUMP_MYCPU_IOSTAT (st_write_reqs, 1);
+        else
+                XPROCFS_BUMP_MYCPU_IOSTAT (st_read_reqs, 1);
+
         memset(res, 0, niocount * sizeof(*res));
 
+        export = class_conn2export(conn);
         obd = class_conn2obd(conn);
         if (!obd) {
                 CDEBUG(D_IOCTL, "invalid client "LPX64"\n", conn->addr);
@@ -1238,6 +1933,22 @@ static int filter_preprw(int cmd, struct lustre_handle *conn,
         }
 
         if (cmd & OBD_BRW_WRITE) {
+#warning "FIXME: we need to get inode->i_sem for each object here"
+                /* Even worse, we need to get locks on mulitple inodes (in
+                 * order) or use the DLM to do the locking for us (and use
+                 * the same locking in filter_setattr() for truncate.  The
+                 * handling gets very ugly when dealing with locked pages.
+                 * It may be easier to just get rid of the locked page code
+                 * (which has problems of its own) and either discover we do
+                 * not need it anymore (i.e. it was a symptom of another bug)
+                 * or ensure we get the page locks in an appropriate order.
+                 */
+                /* Danger, Will Robinson! You are taking a lock here and also
+                 * starting a transaction and releasing/finishing then in
+                 * filter_commitrw(), so you must call fsfilt_commit() and
+                 * finish_transno() if an error occurs in this function.
+                 */
+                filter_start_transno(export);
                 *desc_private = fsfilt_brw_start(obd, objcount, fso,
                                                  niocount, nb);
                 if (IS_ERR(*desc_private))
@@ -1247,10 +1958,13 @@ static int filter_preprw(int cmd, struct lustre_handle *conn,
         obd_kmap_get(niocount, 1);
 
         for (i = 0, o = obj; i < objcount; i++, o++) {
-                struct dentry *dentry = fso->fso_dentry;
-                struct inode *inode = dentry->d_inode;
+                struct dentry *dentry;
+                struct inode *inode;
                 int j;
 
+                dentry = fso[i].fso_dentry;
+                inode = dentry->d_inode;
+
                 for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) {
                         struct page *page;
 
@@ -1259,18 +1973,23 @@ static int filter_preprw(int cmd, struct lustre_handle *conn,
                         else
                                 lnb->dentry = dget(dentry);
 
-                        if (cmd & OBD_BRW_WRITE)
+                        if (cmd & OBD_BRW_WRITE) {
                                 page = filter_get_page_write(inode, rnb, lnb,
                                                              &pglocked);
-                        else
+
+                                XPROCFS_BUMP_MYCPU_IOSTAT (st_write_bytes,
+                                                           rnb->len);
+                        } else {
                                 page = lustre_get_page_read(inode, rnb);
 
-                        if (IS_ERR(page)) {
-                                if (cmd & OBD_BRW_WRITE)
-                                        fsfilt_commit(obd, dir_dentry->d_inode,
-                                                      *desc_private);
+                                XPROCFS_BUMP_MYCPU_IOSTAT (st_read_bytes,
+                                                           rnb->len);
+                        }
 
-                                GOTO(out_pages, rc = PTR_ERR(page));
+                        if (IS_ERR(page)) {
+                                rc = PTR_ERR(page);
+                                f_dput(dentry);
+                                GOTO(out_pages, rc);
                         }
 
                         lnb->addr = page_address(page);
@@ -1280,13 +1999,6 @@ static int filter_preprw(int cmd, struct lustre_handle *conn,
                 }
         }
 
-        if (cmd & OBD_BRW_WRITE) {
-                int err = fsfilt_commit(obd, dir_dentry->d_inode,
-                                        *desc_private);
-                if (err)
-                        GOTO(out_pages, rc = err);
-        }
-
         EXIT;
 out:
         OBD_FREE(fso, objcount * sizeof(*fso));
@@ -1296,17 +2008,24 @@ out:
 
 out_pages:
         while (lnb-- > res) {
-                CERROR("error cleanup on brw\n");
+                CERROR("%d error cleanup on brw\n", rc);
                 if (cmd & OBD_BRW_WRITE)
-                        filter_commit_write(lnb->page, 0, PAGE_SIZE, rc);
+                        filter_commit_write(lnb, rc);
                 else
                         lustre_put_page(lnb->page);
+                f_dput(lnb->dentry);
         }
         obd_kmap_put(niocount);
+        goto out_err; /* dropped the dentry refs already (one per page) */
+
 out_objinfo:
         for (i = 0; i < objcount && fso[i].fso_dentry; i++)
                 f_dput(fso[i].fso_dentry);
-
+out_err:
+        if (cmd & OBD_BRW_WRITE) {
+                filter_finish_transno(export, *desc_private, oti, rc);
+                fsfilt_commit(obd, dir_dentry->d_inode, *desc_private);
+        }
         goto out;
 }
 
@@ -1314,6 +2033,7 @@ static int filter_write_locked_page(struct niobuf_local *lnb)
 {
         struct page *lpage;
         int rc;
+        ENTRY;
 
         lpage = lustre_get_page_write(lnb->dentry->d_inode, lnb->page->index);
         if (IS_ERR(lpage)) {
@@ -1331,7 +2051,9 @@ static int filter_write_locked_page(struct niobuf_local *lnb)
                 rc = PTR_ERR(lpage);
                 CERROR("error getting locked page index %ld: rc = %d\n",
                        lnb->page->index, rc);
-                GOTO(out, rc);
+                LBUG();
+                lustre_commit_write(lnb);
+                RETURN(rc);
         }
 
         /* lpage is kmapped in lustre_get_page_write() above and kunmapped in
@@ -1339,24 +2061,31 @@ static int filter_write_locked_page(struct niobuf_local *lnb)
          * filter_get_page_write() and kunmapped in lustre_put_page() below.
          */
         memcpy(page_address(lpage), page_address(lnb->page), PAGE_SIZE);
-        rc = lustre_commit_write(lpage, 0, PAGE_SIZE);
+        lustre_put_page(lnb->page);
+
+        lnb->page = lpage;
+        rc = lustre_commit_write(lnb);
         if (rc)
                 CERROR("error committing locked page %ld: rc = %d\n",
                        lnb->page->index, rc);
-out:
-        lustre_put_page(lnb->page);
 
-        return rc;
+        RETURN(rc);
+}
+
+static int filter_sync(struct obd_device *obd)
+{
+        RETURN(fsfilt_sync(obd, obd->u.filter.fo_sb));
 }
 
 static int filter_commitrw(int cmd, struct lustre_handle *conn,
                            int objcount, struct obd_ioobj *obj,
                            int niocount, struct niobuf_local *res,
-                           void *private)
+                           void *desc_private, struct obd_trans_info *oti)
 {
         struct obd_run_ctxt saved;
         struct obd_ioobj *o;
-        struct niobuf_local *r;
+        struct niobuf_local *lnb;
+        struct obd_export *export = class_conn2export(conn);
         struct obd_device *obd = class_conn2obd(conn);
         int found_locked = 0;
         int rc = 0;
@@ -1366,57 +2095,65 @@ static int filter_commitrw(int cmd, struct lustre_handle *conn,
         push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
 
         LASSERT(!current->journal_info);
-        current->journal_info = private;
+        current->journal_info = desc_private;
 
-        for (i = 0, o = obj, r = res; i < objcount; i++, o++) {
+        for (i = 0, o = obj, lnb = res; i < objcount; i++, o++) {
                 int j;
 
-                for (j = 0 ; j < o->ioo_bufcnt ; j++, r++) {
-                        struct page *page = r->page;
-
-                        if (!page)
-                                LBUG();
-
-                        if (r->flags & N_LOCAL_TEMP_PAGE) {
+                if (cmd & OBD_BRW_WRITE)
+                        inode_update_time(lnb->dentry->d_inode, 1);
+                for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
+                        if (lnb->flags & N_LOCAL_TEMP_PAGE) {
                                 found_locked++;
                                 continue;
                         }
 
                         if (cmd & OBD_BRW_WRITE) {
-                                int err = filter_commit_write(page, 0,
-                                                              r->len, 0);
+                                int err = filter_commit_write(lnb, 0);
 
                                 if (!rc)
                                         rc = err;
                         } else
-                                lustre_put_page(page);
+                                lustre_put_page(lnb->page);
 
                         obd_kmap_put(1);
-                        f_dput(r->dentry);
+                        f_dput(lnb->dentry);
                 }
         }
 
-        if (!found_locked)
-                goto out_ctxt;
-
-        for (i = 0, o = obj, r = res; i < objcount; i++, o++) {
+        for (i = 0, o = obj, lnb = res; found_locked > 0 && i < objcount;
+                        i++, o++) {
                 int j;
-                for (j = 0 ; j < o->ioo_bufcnt ; j++, r++) {
+                for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
                         int err;
-                        if (!(r->flags & N_LOCAL_TEMP_PAGE))
+                        if (!(lnb->flags & N_LOCAL_TEMP_PAGE))
                                 continue;
 
-                        err = filter_write_locked_page(r);
+                        err = filter_write_locked_page(lnb);
                         obd_kmap_put(1);
                         if (!rc)
                                 rc = err;
-                        f_dput(r->dentry);
+                        f_dput(lnb->dentry);
+                        found_locked--;
                 }
         }
 
-out_ctxt:
+        if (cmd & OBD_BRW_WRITE) {
+                int err;
+                struct dentry *dir_dentry = filter_parent(obd, S_IFREG);
+
+                rc = filter_finish_transno(export, desc_private, oti, rc);
+                err = fsfilt_commit(obd, dir_dentry->d_inode, desc_private);
+                if (err)
+                        rc = err;
+                if (obd_sync_filter) {
+                        /* this can fail with ENOMEM, what should we do then? */
+                        filter_sync(obd);
+                }
+                /* XXX <adilger> LASSERT(last_rcvd == last_committed)*/
+        }
+
         LASSERT(!current->journal_info);
-        current->journal_info = NULL;
 
         pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
         RETURN(rc);
@@ -1424,7 +2161,8 @@ out_ctxt:
 
 static int filter_brw(int cmd, struct lustre_handle *conn,
                       struct lov_stripe_md *lsm, obd_count oa_bufs,
-                      struct brw_page *pga, struct obd_brw_set *set)
+                      struct brw_page *pga, struct obd_brw_set *set,
+                      struct obd_trans_info *oti)
 {
         struct obd_ioobj        ioo;
         struct niobuf_local     *lnb;
@@ -1451,7 +2189,7 @@ static int filter_brw(int cmd, struct lustre_handle *conn,
         ioo.ioo_bufcnt = oa_bufs;
 
         ret = filter_preprw(cmd, conn, 1, &ioo, oa_bufs, rnb, lnb,
-                            &desc_private);
+                            &desc_private, oti);
         if (ret != 0)
                 GOTO(out, ret);
 
@@ -1467,7 +2205,8 @@ static int filter_brw(int cmd, struct lustre_handle *conn,
                 kunmap(virt);
         }
 
-        ret = filter_commitrw(cmd, conn, 1, &ioo, oa_bufs, lnb, desc_private);
+        ret = filter_commitrw(cmd, conn, 1, &ioo, oa_bufs, lnb, desc_private,
+                              oti);
 
 out:
         if (lnb)
@@ -1484,6 +2223,8 @@ static int filter_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
 
         obd = class_conn2obd(conn);
 
+        XPROCFS_BUMP_MYCPU_IOSTAT (st_statfs_reqs, 1);
+
         RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs));
 }
 
@@ -1519,7 +2260,7 @@ static int filter_get_info(struct lustre_handle *conn, obd_count keylen,
 
 int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst,
                   struct lustre_handle *src_conn, struct obdo *src,
-                  obd_size count, obd_off offset)
+                  obd_size count, obd_off offset, struct obd_trans_info *oti)
 {
         struct page *page;
         struct lov_stripe_md srcmd, dstmd;
@@ -1568,7 +2309,7 @@ int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst,
 
                 page->index = index;
                 set->brw_callback = ll_brw_sync_wait;
-                err = obd_brw(OBD_BRW_READ, src_conn, &srcmd, 1, &pg, set);
+                err = obd_brw(OBD_BRW_READ, src_conn, &srcmd, 1, &pg, set,NULL);
                 obd_brw_set_free(set);
                 if (err) {
                         EXIT;
@@ -1585,7 +2326,7 @@ int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst,
                 CDEBUG(D_INFO, "Read page %ld ...\n", page->index);
 
                 set->brw_callback = ll_brw_sync_wait;
-                err = obd_brw(OBD_BRW_WRITE, dst_conn, &dstmd, 1, &pg, set);
+                err = obd_brw(OBD_BRW_WRITE, dst_conn, &dstmd, 1, &pg, set,oti);
                 obd_brw_set_free(set);
 
                 /* XXX should handle dst->o_size, dst->o_blocks here */
@@ -1638,7 +2379,9 @@ static struct obd_ops filter_obd_ops = {
 
 static int __init obdfilter_init(void)
 {
-        printk(KERN_INFO "Filtering OBD driver  v0.001, info@clusterfs.com\n");
+        struct lprocfs_static_vars lvars;
+
+        printk(KERN_INFO "Lustre Filtering OBD driver; info@clusterfs.com\n");
         filter_open_cache = kmem_cache_create("ll_filter_fdata",
                                               sizeof(struct filter_file_data),
                                               0, 0, NULL, NULL);
@@ -1653,7 +2396,10 @@ static int __init obdfilter_init(void)
                 RETURN(-ENOMEM);
         }
 
-        return class_register_type(&filter_obd_ops, status_class_var,
+        xprocfs_init ("filter");
+
+        lprocfs_init_vars(&lvars);
+        return class_register_type(&filter_obd_ops, lvars.module_vars,
                                    OBD_FILTER_DEVICENAME);
 }
 
@@ -1664,10 +2410,11 @@ static void __exit obdfilter_exit(void)
                 CERROR("couldn't free obdfilter dentry cache\n");
         if (kmem_cache_destroy(filter_open_cache))
                 CERROR("couldn't free obdfilter open cache\n");
+        xprocfs_fini ();
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Filtering OBD driver v1.0");
+MODULE_DESCRIPTION("Lustre Filtering OBD driver");
 MODULE_LICENSE("GPL");
 
 module_init(obdfilter_init);
index e680784..ad92f83 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
  */
 #define DEBUG_SUBSYSTEM S_CLASS
 
-#include <linux/lustre_lite.h>
 #include <linux/lprocfs_status.h>
+#include <linux/obd.h>
 
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
 
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof, 
-            void *data)
+static inline int lprocfs_filter_statfs(void *data, struct statfs *sfs)
 {
-        int len = 0;
-        struct obd_device* dev = (struct obd_device*)data;
-        len += snprintf(page, count, "%s\n", dev->obd_uuid);
-        return len;
+        struct obd_device *dev = (struct obd_device *) data;
+        return vfs_statfs(dev->u.filter.fo_sb, sfs);
 }
-int rd_blksize(char* page, char **start, off_t off, int count, int *eof, 
-               void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        struct statfs mystats;
-        int len = 0;
 
-        vfs_statfs(temp->u.filter.fo_sb, &mystats);
-        len+=snprintf(page, count, "%ld\n", mystats.f_bsize); 
-        return len;
-}
-int rd_kbtotal(char* page, char **start, off_t off, int count, int *eof, 
-               void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        struct statfs mystats;
-        int len = 0;
-        __u32 blk_size;
-        __u64 result;
-
-        vfs_statfs(temp->u.filter.fo_sb, &mystats);
-        blk_size = mystats.f_bsize;
-        blk_size >>= 10;
-        result = mystats.f_blocks;
-        while(blk_size >>= 1){
-                result <<= 1;
-        }
-        len+=snprintf(page, count, LPU64"\n", result); 
-        return len;   
-}
+DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     lprocfs_filter_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_filter_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  lprocfs_filter_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  lprocfs_filter_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   lprocfs_filter_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  lprocfs_filter_statfs);
 
-int rd_kbfree(char* page, char **start, off_t off, int count, int *eof, 
+int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
               void *data)
 {
-        struct obd_device* temp = (struct obd_device*)data;
-        struct statfs mystats;
-        int len = 0;
-        __u32 blk_size;
-        __u64 result;
-
-        vfs_statfs(temp->u.filter.fo_sb, &mystats);
-        blk_size = mystats.f_bsize;
-        blk_size >>= 10;
-        result = mystats.f_bfree;
-        while(blk_size >>= 1){
-                result <<= 1;
-        }
-        len += snprintf(page, count, LPU64"\n", result); 
-        return len;     
+        struct obd_device *dev = (struct obd_device *)data;
+        return snprintf(page, count, "%s\n", dev->u.filter.fo_fstype);
 }
 
-int rd_fstype(char* page, char **start, off_t off, int count, int *eof, 
-              void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        int len = 0;
-        len += snprintf(page, count, "%s\n", temp->u.filter.fo_fstype);
-        return len;
-}
-int rd_filestotal(char* page, char **start, off_t off, int count, int *eof, 
-                  void *data)
-{ 
-        struct obd_device* temp = (struct obd_device*)data;
-        struct statfs mystats;
-        int len = 0;
-        vfs_statfs(temp->u.filter.fo_sb, &mystats);
-        len += snprintf(page, count, "%ld\n", mystats.f_files); 
-        return len;
-}
-
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof, 
-                 void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        struct statfs mystats;
-        int len = 0;
-        vfs_statfs(temp->u.filter.fo_sb, &mystats);
-        len += snprintf(page, count, "%ld\n", mystats.f_ffree); 
-        return len;
-}
-
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof, 
-                  void *data)
-{
-        return 0;
-}
-struct lprocfs_vars status_var_nm_1[] = {
-        {"status/uuid", rd_uuid, 0, 0},
-        {"status/blocksize",rd_blksize, 0, 0},
-        {"status/kbytestotal",rd_kbtotal, 0, 0},
-        {"status/kbytesfree", rd_kbfree, 0, 0},
-        {"status/filestotal", rd_filestotal, 0, 0},
-        {"status/filesfree", rd_filesfree, 0, 0},
-        {"status/filegroups", rd_filegroups, 0, 0},
-        {"status/fstype", rd_fstype, 0, 0},
-        {0}
+struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",        lprocfs_rd_uuid,    0, 0 },
+        { "blocksize",   rd_blksize,         0, 0 },
+        { "kbytestotal", rd_kbytestotal,     0, 0 },
+        { "kbytesfree",  rd_kbytesfree,      0, 0 },
+        { "filestotal",  rd_filestotal,      0, 0 },
+        { "filesfree",   rd_filesfree,       0, 0 },
+        { "filegroups",  rd_filegroups,      0, 0 },
+        { "fstype",      rd_fstype,          0, 0 },
+        { 0 }
 };
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof, 
-               void *data)
-{
-        struct obd_type* class = (struct obd_type*)data;
-        int len = 0;
-        len += snprintf(page, count, "%d\n", class->typ_refcnt);
-        return len;
-}
 
-struct lprocfs_vars status_class_var[] = {
-        {"status/num_refs", rd_numrefs, 0, 0},
-        {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",    lprocfs_rd_numrefs, 0, 0 },
+        { 0 }
 };
+
+#endif /* LPROCFS */
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
index 58e9097..69af4bc 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
  */
 #define DEBUG_SUBSYSTEM S_CLASS
 
-#include <linux/lustre_lite.h>
+#include <linux/obd_class.h>
 #include <linux/lprocfs_status.h>
 
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof, 
-            void *data)
-{
-        int len = 0;
-        struct obd_device* dev = (struct obd_device*)data;
-        len += snprintf(page, count, "%s\n", dev->obd_uuid);
-        return len;
-
-}
-int rd_blksize(char* page, char **start, off_t off, int count, int *eof, 
-               void *data)
-{
-        return 0;
-}
-int rd_kbytestotal(char* page, char **start, off_t off, int count, int *eof, 
-                   void *data)
-{
-        return 0;
-}
-
-int rd_kbytesfree(char* page, char **start, off_t off, int count, int *eof, 
-                  void *data)
-{
-        return 0;
-}
-
-int rd_filestotal(char* page, char **start, off_t off, int count, int *eof, 
-                  void *data)
-{
-        return 0;
-}
-
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof, 
-                 void *data)
-{
-        return 0;
-}
-
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof, 
-                  void *data)
-{
-        return 0;
-}
-int rd_server_uuid(char* page, char **start, off_t off, int count, int *eof, 
-                   void *data)
-{
-        
-        struct obd_device* temp = (struct obd_device*)data;
-        struct client_obd* cli = &temp->u.cli;
-        int len = 0;
-        len += snprintf(page, count, "%s\n",cli->cl_target_uuid);   
-        return len;
-
-        
-}
-int rd_conn_uuid(char* page, char **start, off_t off, int count, int *eof, 
-                 void *data)
-{
-        struct obd_device* temp=(struct obd_device*)data;
-        struct client_obd* cli=&temp->u.cli;
-        struct obd_import* imp=&cli->cl_import;
-        int len = 0;
-        len += snprintf(page, count, "%s\n", 
-                        imp->imp_connection->c_remote_uuid);   
-        return len;  
-        
-}
-
-struct lprocfs_vars status_var_nm_1[] = {
-        {"status/uuid", rd_uuid, 0, 0},
-        {"status/blocksize",rd_blksize, 0, 0},
-        {"status/kbytestotal", rd_kbytestotal, 0, 0},
-        {"status/kbytesfree", rd_kbytesfree, 0, 0},
-        {"status/filestotal", rd_filestotal, 0, 0},
-        {"status/filesfree", rd_filesfree, 0, 0},
-        {"status/filegroups", rd_filegroups, 0, 0},
-        {"status/ost_server_uuid", rd_server_uuid, 0, 0},
-        {"status/ost_conn_uuid", rd_conn_uuid, 0, 0},
-        {0}
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
+
+DEFINE_LPROCFS_STATFS_FCT(rd_blksize,     obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree,  obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filestotal,  obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filesfree,   obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filegroups,  obd_self_statfs);
+
+struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",            lprocfs_rd_uuid, 0, 0 },
+        { "blocksize",       rd_blksize, 0, 0 },
+        { "kbytestotal",     rd_kbytestotal, 0, 0 },
+        { "kbytesfree",      rd_kbytesfree, 0, 0 },
+        { "filestotal",      rd_filestotal, 0, 0 },
+        { "filesfree",       rd_filesfree, 0, 0   },
+        { "filegroups",      rd_filegroups, 0, 0 },
+        { "ost_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
+        { "ost_conn_uuid",   lprocfs_rd_conn_uuid, 0, 0 },
+        { 0 }
 };
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof, 
-               void *data)
-{
-        struct obd_type* class = (struct obd_type*)data;
-        int len = 0;
-        len += snprintf(page, count, "%d\n", class->typ_refcnt);
-        return len;
-}
 
-struct lprocfs_vars status_class_var[] = {
-        {"status/num_refs", rd_numrefs, 0, 0},
-        {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",        lprocfs_rd_numrefs, 0, 0 },
+        { 0 }
 };
+
+#endif /* LPROCFS */
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
index 85b1694..1abd150 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
  *   Author Peter Braam <braam@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
 #include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
 #include <linux/lprocfs_status.h>
 
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
-
 static int osc_attach(struct obd_device *dev, obd_count len, void *data)
 {
-        return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_vars(&lvars);
+        return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
 static int osc_detach(struct obd_device *dev)
 {
-        return lprocfs_dereg_obd(dev);
+        return lprocfs_obd_detach(dev);
 }
 
 /* Pack OSC object metadata for shipment to the MDS. */
@@ -123,6 +123,13 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
         RETURN(lsm_size);
 }
 
+inline void oti_from_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
+{
+        if (oti && req->rq_repmsg)
+                oti->oti_transno = NTOH__u64(req->rq_repmsg->transno);
+        EXIT;
+}
+
 static int osc_getattr(struct lustre_handle *conn, struct obdo *oa,
                        struct lov_stripe_md *md)
 {
@@ -150,8 +157,7 @@ static int osc_getattr(struct lustre_handle *conn, struct obdo *oa,
 
         body = lustre_msg_buf(request->rq_repmsg, 0);
         CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
-        if (oa)
-                memcpy(oa, &body->oa, sizeof(*oa));
+        memcpy(oa, &body->oa, sizeof(*oa));
 
         EXIT;
  out:
@@ -160,7 +166,7 @@ static int osc_getattr(struct lustre_handle *conn, struct obdo *oa,
 }
 
 static int osc_open(struct lustre_handle *conn, struct obdo *oa,
-                    struct lov_stripe_md *md)
+                    struct lov_stripe_md *md, struct obd_trans_info *oti)
 {
         struct ptlrpc_request *request;
         struct ost_body *body;
@@ -172,6 +178,7 @@ static int osc_open(struct lustre_handle *conn, struct obdo *oa,
         if (!request)
                 RETURN(-ENOMEM);
 
+#warning FIXME: request->rq_flags |= PTL_RPC_FL_REPLAY;
         body = lustre_msg_buf(request->rq_reqmsg, 0);
 #warning FIXME: pack only valid fields instead of memcpy, endianness
         memcpy(&body->oa, oa, sizeof(*oa));
@@ -194,7 +201,7 @@ static int osc_open(struct lustre_handle *conn, struct obdo *oa,
 }
 
 static int osc_close(struct lustre_handle *conn, struct obdo *oa,
-                     struct lov_stripe_md *md)
+                     struct lov_stripe_md *md, struct obd_trans_info *oti)
 {
         struct ptlrpc_request *request;
         struct ost_body *body;
@@ -228,7 +235,7 @@ static int osc_close(struct lustre_handle *conn, struct obdo *oa,
 }
 
 static int osc_setattr(struct lustre_handle *conn, struct obdo *oa,
-                       struct lov_stripe_md *md)
+                       struct lov_stripe_md *md, struct obd_trans_info *oti)
 {
         struct ptlrpc_request *request;
         struct ost_body *body;
@@ -252,11 +259,12 @@ static int osc_setattr(struct lustre_handle *conn, struct obdo *oa,
 }
 
 static int osc_create(struct lustre_handle *conn, struct obdo *oa,
-                      struct lov_stripe_md **ea)
+                      struct lov_stripe_md **ea, struct obd_trans_info *oti_in)
 {
         struct ptlrpc_request *request;
         struct ost_body *body;
         struct lov_stripe_md *lsm;
+        struct obd_trans_info *oti, trans_info;
         int rc, size = sizeof(*body);
         ENTRY;
 
@@ -270,6 +278,11 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa,
                         RETURN(rc);
         }
 
+        if (oti_in)
+                oti = oti_in;
+        else
+                oti = &trans_info;
+
         request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_CREATE, 1, &size,
                                   NULL);
         if (!request)
@@ -290,6 +303,9 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa,
         lsm->lsm_object_id = oa->o_id;
         lsm->lsm_stripe_count = 0;
         *ea = lsm;
+
+        oti_from_request(oti, request);
+        CDEBUG(D_HA, "transno: "LPD64"\n", oti->oti_transno);
         EXIT;
 out_req:
         ptlrpc_req_finished(request);
@@ -301,7 +317,7 @@ out:
 
 static int osc_punch(struct lustre_handle *conn, struct obdo *oa,
                      struct lov_stripe_md *md, obd_size start,
-                     obd_size end)
+                     obd_size end, struct obd_trans_info *oti)
 {
         struct ptlrpc_request *request;
         struct ost_body *body;
@@ -343,7 +359,7 @@ static int osc_punch(struct lustre_handle *conn, struct obdo *oa,
 }
 
 static int osc_destroy(struct lustre_handle *conn, struct obdo *oa,
-                       struct lov_stripe_md *ea)
+                       struct lov_stripe_md *ea, struct obd_trans_info *oti)
 {
         struct ptlrpc_request *request;
         struct ost_body *body;
@@ -398,6 +414,7 @@ static void unmap_and_decref_bulk_desc(void *data)
         EXIT;
 }
 
+
 /*  this is the callback function which is invoked by the Portals
  *  event handler associated with the bulk_sink queue and bulk_source queue.
  */
@@ -488,7 +505,7 @@ static int osc_brw_read(struct lustre_handle *conn, struct lov_stripe_md *lsm,
                 CERROR("obd_fail_loc=%x, skipping register_bulk\n",
                        OBD_FAIL_OSC_BRW_READ_BULK);
         } else {
-                rc = ptlrpc_register_bulk(desc);
+                rc = ptlrpc_register_bulk_put(desc);
                 if (rc)
                         GOTO(out_unmap, rc);
                 obd_brw_set_add(set, desc);
@@ -525,19 +542,18 @@ out_unmap:
 
 static int osc_brw_write(struct lustre_handle *conn, struct lov_stripe_md *lsm,
                          obd_count page_count, struct brw_page *pga,
-                         struct obd_brw_set *set)
+                         struct obd_brw_set *set, struct obd_trans_info *oti)
 {
         struct obd_import *imp = class_conn2cliimp(conn);
         struct ptlrpc_connection *connection = imp->imp_connection;
         struct ptlrpc_request *request = NULL;
         struct ptlrpc_bulk_desc *desc = NULL;
         struct ost_body *body;
-        struct niobuf_local *local = NULL;
-        struct niobuf_remote *remote;
         int rc, size[3] = {sizeof(*body)}, mapped = 0;
-        int j;
+        unsigned long flags;
         struct obd_ioobj *iooptr;
         void *nioptr;
+        __u32 xid;
         ENTRY;
 
         size[1] = sizeof(struct obd_ioobj);
@@ -561,73 +577,62 @@ static int osc_brw_write(struct lustre_handle *conn, struct lov_stripe_md *lsm,
         ost_pack_ioo(&iooptr, lsm, page_count);
         /* end almost identical to brw_read case */
 
-        OBD_ALLOC(local, page_count * sizeof(*local));
-        if (!local)
-                GOTO(out_desc, rc = -ENOMEM);
+        spin_lock_irqsave(&imp->imp_lock, flags);
+        xid = ++imp->imp_last_xid;       /* single xid for all pages */
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
 
         obd_kmap_get(page_count, 0);
 
         for (mapped = 0; mapped < page_count; mapped++) {
-                local[mapped].addr = kmap(pga[mapped].pg);
+                struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
+                if (bulk == NULL)
+                        GOTO(out_unmap, rc = -ENOMEM);
 
-                CDEBUG(D_INFO, "kmap(pg) = %p ; pg->flags = %lx ; pg->refcount = "
-                       "%d ; page %d of %d\n",
-                       local[mapped].addr, pga[mapped].pg->flags,
-                       page_count(pga[mapped].pg),
-                       mapped, page_count - 1);
+                bulk->bp_xid = xid;           /* single xid for all pages */
 
-                local[mapped].offset = pga[mapped].off;
-                local[mapped].len = pga[mapped].count;
+                bulk->bp_buf = kmap(pga[mapped].pg);
+                bulk->bp_page = pga[mapped].pg;
+                bulk->bp_buflen = PAGE_SIZE;
                 ost_pack_niobuf(&nioptr, pga[mapped].off, pga[mapped].count,
-                                pga[mapped].flag, 0);
-        }
-
-        size[1] = page_count * sizeof(*remote);
-        request->rq_replen = lustre_msg_size(2, size);
-        rc = ptlrpc_queue_wait(request);
-        if (rc)
-                GOTO(out_unmap, rc);
-
-        nioptr = lustre_msg_buf(request->rq_repmsg, 1);
-        if (!nioptr)
-                GOTO(out_unmap, rc = -EINVAL);
-
-        if (request->rq_repmsg->buflens[1] != size[1]) {
-                CERROR("buffer length wrong (%d vs. %d)\n",
-                       request->rq_repmsg->buflens[1], size[1]);
-                GOTO(out_unmap, rc = -EINVAL);
+                                pga[mapped].flag, bulk->bp_xid);
         }
 
-        for (j = 0; j < page_count; j++) {
-                struct ptlrpc_bulk_page *bulk;
-
-                ost_unpack_niobuf(&nioptr, &remote);
-
-                bulk = ptlrpc_prep_bulk_page(desc);
-                if (!bulk)
-                        GOTO(out_unmap, rc = -ENOMEM);
-
-                bulk->bp_buf = local[j].addr;
-                bulk->bp_buflen = local[j].len;
-                bulk->bp_xid = remote->xid;
-                bulk->bp_page = pga[j].pg;
+        /*
+         * Register the bulk first, because the reply could arrive out of
+         * order, and we want to be ready for the bulk data.
+         *
+         * One reference is released when brw_finish is complete, the other
+         * when the caller removes us from the "set" list.
+         *
+         * On error, we never do the brw_finish, so we handle all decrefs.
+         */
+        if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_WRITE_BULK)) {
+                CERROR("obd_fail_loc=%x, skipping register_bulk\n",
+                OBD_FAIL_OSC_BRW_WRITE_BULK);
+        } else {
+                rc = ptlrpc_register_bulk_get(desc);
+                if (rc)
+                        GOTO(out_unmap, rc);
+                obd_brw_set_add(set, desc);
         }
 
-        if (desc->bd_page_count != page_count)
-                LBUG();
-
-        if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_WRITE_BULK))
-                GOTO(out_unmap, rc = 0);
-
-        OBD_FREE(local, page_count * sizeof(*local));
-
-        /* One reference is released when brw_finish is complete, the other
-         * when the caller removes it from the "set" list. */
-        obd_brw_set_add(set, desc);
-        rc = ptlrpc_send_bulk(desc);
+        request->rq_replen = lustre_msg_size(1, size);
+        rc = ptlrpc_queue_wait(request);
 
-        /* XXX: Mike, same question as in osc_brw_read. */
-out_req:
+        /*
+         * XXX: If there is an error during the processing of the callback,
+         *      such as a timeout in a sleep that it performs, brw_finish
+         *      will never get called, and we'll leak the desc, fail to kunmap
+         *      things, cats will live with dogs.  One solution would be to
+         *      export brw_finish as osc_brw_finish, so that the timeout case
+         *      and its kin could call it for proper cleanup.  An alternative
+         *      would be for an error return from the callback to cause us to
+         *      clean up, but that doesn't help the truly async cases (like
+         *      LOV), which will immediately return from their PHASE_START
+         *      callback, before any such cleanup-requiring error condition can
+         *      be detected.
+         */
+ out_req:
         ptlrpc_req_finished(request);
         RETURN(rc);
 
@@ -635,18 +640,15 @@ out_req:
 out_unmap:
         while (mapped-- > 0)
                 kunmap(pga[mapped].pg);
-
         obd_kmap_put(page_count);
-
-        OBD_FREE(local, page_count * sizeof(*local));
-out_desc:
         ptlrpc_bulk_decref(desc);
         goto out_req;
 }
 
 static int osc_brw(int cmd, struct lustre_handle *conn,
                    struct lov_stripe_md *md, obd_count page_count,
-                   struct brw_page *pga, struct obd_brw_set *set)
+                   struct brw_page *pga, struct obd_brw_set *set, 
+                   struct obd_trans_info *oti)
 {
         ENTRY;
 
@@ -660,7 +662,7 @@ static int osc_brw(int cmd, struct lustre_handle *conn,
                         pages_per_brw = page_count;
 
                 if (cmd & OBD_BRW_WRITE)
-                        rc = osc_brw_write(conn, md, pages_per_brw, pga, set);
+                        rc = osc_brw_write(conn, md, pages_per_brw, pga, set, oti);
                 else
                         rc = osc_brw_read(conn, md, pages_per_brw, pga, set);
 
@@ -679,7 +681,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
                        int *flags, void *callback, void *data, int datalen,
                        struct lustre_handle *lockh)
 {
-        __u64 res_id[RES_NAME_SIZE] = { lsm->lsm_object_id };
+        struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
         struct obd_device *obddev = class_conn2obd(connh);
         struct ldlm_extent *extent = extentp;
         int rc;
@@ -694,7 +696,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
         }
 
         /* Next, search for already existing extent locks that will cover us */
-        rc = ldlm_lock_match(obddev->obd_namespace, res_id, type, extent,
+        rc = ldlm_lock_match(obddev->obd_namespace, 0, &res_id, type, extent,
                              sizeof(extent), mode, lockh);
         if (rc == 1)
                 /* We already have a lock, and it's referenced */
@@ -713,7 +715,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
          * locks out from other users right now, too. */
 
         if (mode == LCK_PR) {
-                rc = ldlm_lock_match(obddev->obd_namespace, res_id, type,
+                rc = ldlm_lock_match(obddev->obd_namespace, 0, &res_id, type,
                                      extent, sizeof(extent), LCK_PW, lockh);
                 if (rc == 1) {
                         /* FIXME: This is not incredibly elegant, but it might
@@ -728,7 +730,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
 
         rc = ldlm_cli_enqueue(connh, NULL, obddev->obd_namespace, parent_lock,
                               res_id, type, extent, sizeof(extent), mode, flags,
-                              ldlm_completion_ast, callback, data, datalen,
+                              ldlm_completion_ast, callback, data, NULL,
                               lockh);
         RETURN(rc);
 }
@@ -747,9 +749,9 @@ static int osc_cancel_unused(struct lustre_handle *connh,
                              struct lov_stripe_md *lsm, int flags)
 {
         struct obd_device *obddev = class_conn2obd(connh);
-        __u64 res_id[RES_NAME_SIZE] = { lsm->lsm_object_id };
+        struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
 
-        return ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags);
+        return ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags);
 }
 
 static int osc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
@@ -832,6 +834,7 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
         ENTRY;
 
         switch (cmd) {
+#if 0
         case IOC_LDLM_TEST: {
                 err = ldlm_test(obddev, conn);
                 CERROR("-- done err %d\n", err);
@@ -879,6 +882,7 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                 CERROR("-- done err %d\n", err);
                 GOTO(out, err);
         }
+#endif
         case IOC_OSC_REGISTER_LOV: {
                 if (obddev->u.cli.cl_containing_lov)
                         GOTO(out, err = -EALREADY);
@@ -888,7 +892,7 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
         case OBD_IOC_LOV_GET_CONFIG: {
                 char *buf;
                 struct lov_desc *desc;
-                obd_uuid_t *uuidp;
+                struct obd_uuid uuid;
 
                 buf = NULL;
                 len = 0;
@@ -902,7 +906,7 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                         GOTO(out, err = -EINVAL);
                 }
 
-                if (data->ioc_inllen2 < sizeof(*uuidp)) {
+                if (data->ioc_inllen2 < sizeof(uuid.uuid)) {
                         OBD_FREE(buf, len);
                         GOTO(out, err = -EINVAL);
                 }
@@ -914,10 +918,10 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
                 desc->ld_default_stripe_size = 0;
                 desc->ld_default_stripe_offset = 0;
                 desc->ld_pattern = 0;
-                memcpy(desc->ld_uuid,  obddev->obd_uuid, sizeof(*uuidp));
+                memcpy(desc->ld_uuid.uuid,  obddev->obd_uuid.uuid, sizeof(uuid.uuid));
 
-                uuidp = (obd_uuid_t *)data->ioc_inlbuf2;
-                memcpy(uuidp,  obddev->obd_uuid, sizeof(*uuidp));
+                memcpy(data->ioc_inlbuf2,  obddev->obd_uuid.uuid, 
+                       sizeof(uuid.uuid));
 
                 err = copy_to_user((void *)uarg, buf, len);
                 if (err)
@@ -943,7 +947,11 @@ out:
 
 static void set_osc_active(struct obd_import *imp, int active)
 {
-        struct obd_device *notify_obd = imp->imp_obd->u.cli.cl_containing_lov;
+        struct obd_device *notify_obd;
+
+        LASSERT(imp->imp_obd);
+
+        notify_obd = imp->imp_obd->u.cli.cl_containing_lov;
 
         if (notify_obd == NULL)
                 return;
@@ -952,25 +960,26 @@ static void set_osc_active(struct obd_import *imp, int active)
         if (!list_empty(&notify_obd->obd_exports)) {
                 int rc;
                 struct lustre_handle fakeconn;
-                struct obd_ioctl_data ioc_data;
+                struct obd_ioctl_data ioc_data = { 0 };
                 struct obd_export *exp =
                         list_entry(notify_obd->obd_exports.next,
                                    struct obd_export, exp_obd_chain);
 
                 fakeconn.addr = (__u64)(unsigned long)exp;
                 fakeconn.cookie = exp->exp_cookie;
-                ioc_data.ioc_inlbuf1 = imp->imp_obd->u.cli.cl_target_uuid;
+                ioc_data.ioc_inlbuf1 = &imp->imp_obd->u.cli.cl_target_uuid;
                 ioc_data.ioc_offset = active;
                 rc = obd_iocontrol(IOC_LOV_SET_OSC_ACTIVE, &fakeconn,
                                    sizeof ioc_data, &ioc_data, NULL);
-                if (rc)
+                if (rc) {
                         CERROR("disabling %s on LOV %p/%s: %d\n",
-                               imp->imp_obd->obd_uuid, notify_obd,
-                               notify_obd->obd_uuid, rc);
+                               imp->imp_obd->u.cli.cl_target_uuid.uuid,
+                               notify_obd, notify_obd->obd_uuid.uuid, rc);
+                }
         } else {
                 CDEBUG(D_HA, "No exports for obd %p/%s, can't notify about "
-                       "%p\n", notify_obd, notify_obd->obd_uuid,
-                       imp->imp_obd->obd_uuid);
+                       "%p\n", notify_obd, notify_obd->obd_uuid.uuid,
+                       imp->imp_obd->obd_uuid.uuid);
         }
 }
 
@@ -986,7 +995,7 @@ static int osc_recover(struct obd_import *imp, int phase)
             case PTLRPC_RECOVD_PHASE_PREPARE: {
                 struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
                 ldlm_namespace_cleanup(ns, 1 /* no network ops */);
-                ptlrpc_abort_inflight(imp);
+                ptlrpc_abort_inflight(imp, 0);
                 set_osc_active(imp, 0 /* inactive */);
                 RETURN(0);
             }
@@ -1022,7 +1031,7 @@ static int osc_recover(struct obd_import *imp, int phase)
 }
 
 static int osc_connect(struct lustre_handle *conn, struct obd_device *obd,
-                       obd_uuid_t cluuid, struct recovd_obd *recovd,
+                       struct obd_uuid *cluuid, struct recovd_obd *recovd,
                        ptlrpc_recovery_cb_t recover)
 {
         struct obd_import *imp = &obd->u.cli.cl_import;
@@ -1057,7 +1066,10 @@ struct obd_ops osc_obd_ops = {
 
 static int __init osc_init(void)
 {
-        RETURN(class_register_type(&osc_obd_ops, status_class_var,
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_vars(&lvars);
+        RETURN(class_register_type(&osc_obd_ops, lvars.module_vars,
                                    LUSTRE_OSC_NAME));
 }
 
@@ -1067,7 +1079,7 @@ static void __exit osc_exit(void)
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Object Storage Client (OSC) v1.0");
+MODULE_DESCRIPTION("Lustre Object Storage Client (OSC)");
 MODULE_LICENSE("GPL");
 
 module_init(osc_init);
index 1fa1c59..c44093c 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
  */
 #define DEBUG_SUBSYSTEM S_OST
 
-#include <linux/lustre_lite.h>
+#include <linux/obd_class.h>
 #include <linux/lprocfs_status.h>
 
-
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof, 
-            void *data)
-{
-         
-        struct obd_device* temp = (struct obd_device*)data;
-        int len = 0;
-        len += snprintf(page, count, "%s\n", temp->obd_uuid); 
-        return len;
-        
-
-}
-int rd_blksize(char* page, char **start, off_t off, int count, int *eof, 
-               void *data)
-{
-        
-        struct obd_device* temp = (struct obd_device*)data;
-        struct ost_obd *ost = &temp->u.ost;
-        struct lustre_handle *conn = &ost->ost_conn;
-        struct obd_statfs mystats;
-        int len = 0;
-        
-        obd_statfs(conn, &mystats);
-        len += snprintf(page, count, "%d\n", mystats.os_bsize); 
-        return len;
-        
-}
-int rd_kbtotal(char* page, char **start, off_t off, int count, int *eof, 
-               void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        struct ost_obd *ost = &temp->u.ost;
-        struct lustre_handle *conn = &ost->ost_conn;
-        struct obd_statfs mystats;
-        int len = 0;
-        __u32 blk_size;
-        __u64 result;
-                
-        obd_statfs(conn, &mystats);
-        blk_size = mystats.os_bsize;
-        blk_size >>= 10;
-        result = mystats.os_blocks;
-        while(blk_size >>= 1){
-                result <<= 1;
-        }
-        len += snprintf(page, count, LPU64"\n", result);
-        return len;
-                
-}
-
-
-int rd_kbfree(char* page, char **start, off_t off, int count, int *eof, 
-              void *data)
-{
-        
-        struct obd_device* temp = (struct obd_device*)data;
-        struct ost_obd *ost = &temp->u.ost;
-        struct lustre_handle *conn = &ost->ost_conn;
-        struct obd_statfs mystats;
-        int len = 0;
-        __u32 blk_size;
-        __u64 result;
-
-        obd_statfs(conn, &mystats);
-        blk_size = mystats.os_bsize;
-        blk_size >>= 10;
-        result = mystats.os_bfree;
-        while(blk_size >>= 1){
-                result <<= 1;
-        }
-        len += snprintf(page, count, LPU64"\n", result);
-        return len;  
-}
-
-int rd_filestotal(char* page, char **start, off_t off, int count, int *eof, 
-                  void *data)
-{
-        struct obd_device* temp = (struct obd_device*)data;
-        struct ost_obd *ost = &temp->u.ost;
-        struct lustre_handle *conn = &ost->ost_conn;
-        struct obd_statfs mystats;
-        int len = 0;
-        
-        obd_statfs(conn, &mystats);
-        len += snprintf(page, count, LPU64"\n",mystats.os_files); 
-        return len;
-        
-}
-
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof, 
-                 void *data)
-{
-        
-        struct obd_device* temp = (struct obd_device*)data;
-        struct ost_obd *ost = &temp->u.ost;
-        struct lustre_handle *conn = &ost->ost_conn;
-        struct obd_statfs mystats;
-        int len = 0;
-        
-        obd_statfs(conn, &mystats);
-        len += snprintf(page, count, LPU64"\n", mystats.os_ffree); 
-        return len;
-        
-}
-
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof, 
-                  void *data)
-{
-        return 0;
-}
-
-struct lprocfs_vars status_var_nm_1[] = {
-        {"status/uuid", rd_uuid, 0, 0},
-        {"status/blocksize",rd_blksize, 0, 0},
-        {"status/kbytesfree", rd_kbfree, 0, 0},
-        {"status/kbytestotal", rd_kbtotal, 0, 0},
-        {"status/filestotal", rd_filestotal, 0, 0},
-        {"status/filesfree", rd_filesfree, 0, 0},
-        {"status/filegroups", rd_filegroups, 0, 0},
-        {0}
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
+struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",        lprocfs_rd_uuid,   0, 0 },
+        { 0 }
 };
 
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof, 
-               void *data)
-{
-        struct obd_type* class = (struct obd_type*)data;
-        int len = 0;
-        len += snprintf(page, count, "%d\n", class->typ_refcnt);
-        return len;
-}
-
-struct lprocfs_vars status_class_var[] = {
-        {"status/num_refs", rd_numrefs, 0, 0},
-        {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
+        { 0 }
 };
+
+#endif /* LPROCFS */
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
index db7857c..d595757 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
  *   Author: Peter J. Braam <braam@clusterfs.com>
  *   Author: Phil Schwan <phil@clusterfs.com>
  *
 #include <linux/init.h>
 #include <linux/lprocfs_status.h>
 
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
 
-static int ost_destroy(struct ptlrpc_request *req)
+static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti)
 {
         struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
         struct ost_body *body;
@@ -57,7 +55,7 @@ static int ost_destroy(struct ptlrpc_request *req)
         if (rc)
                 RETURN(rc);
 
-        req->rq_status = obd_destroy(conn, &body->oa, NULL);
+        req->rq_status = obd_destroy(conn, &body->oa, NULL, oti);
         RETURN(0);
 }
 
@@ -106,7 +104,7 @@ static int ost_statfs(struct ptlrpc_request *req)
         RETURN(0);
 }
 
-static int ost_open(struct ptlrpc_request *req)
+static int ost_open(struct ptlrpc_request *req, struct obd_trans_info *oti)
 {
         struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
         struct ost_body *body, *repbody;
@@ -122,11 +120,11 @@ static int ost_open(struct ptlrpc_request *req)
         repbody = lustre_msg_buf(req->rq_repmsg, 0);
         /* FIXME: unpack only valid fields instead of memcpy, endianness */
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
-        req->rq_status = obd_open(conn, &repbody->oa, NULL);
+        req->rq_status = obd_open(conn, &repbody->oa, NULL, oti);
         RETURN(0);
 }
 
-static int ost_close(struct ptlrpc_request *req)
+static int ost_close(struct ptlrpc_request *req, struct obd_trans_info *oti)
 {
         struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
         struct ost_body *body, *repbody;
@@ -142,11 +140,11 @@ static int ost_close(struct ptlrpc_request *req)
         repbody = lustre_msg_buf(req->rq_repmsg, 0);
         /* FIXME: unpack only valid fields instead of memcpy, endianness */
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
-        req->rq_status = obd_close(conn, &repbody->oa, NULL);
+        req->rq_status = obd_close(conn, &repbody->oa, NULL, oti);
         RETURN(0);
 }
 
-static int ost_create(struct ptlrpc_request *req)
+static int ost_create(struct ptlrpc_request *req, struct obd_trans_info *oti)
 {
         struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
         struct ost_body *body, *repbody;
@@ -162,11 +160,11 @@ static int ost_create(struct ptlrpc_request *req)
         repbody = lustre_msg_buf(req->rq_repmsg, 0);
         /* FIXME: unpack only valid fields instead of memcpy, endianness */
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
-        req->rq_status = obd_create(conn, &repbody->oa, NULL);
+        req->rq_status = obd_create(conn, &repbody->oa, NULL, oti);
         RETURN(0);
 }
 
-static int ost_punch(struct ptlrpc_request *req)
+static int ost_punch(struct ptlrpc_request *req, struct obd_trans_info *oti)
 {
         struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
         struct ost_body *body, *repbody;
@@ -187,11 +185,11 @@ static int ost_punch(struct ptlrpc_request *req)
         /* FIXME: unpack only valid fields instead of memcpy, endianness */
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_punch(conn, &repbody->oa, NULL,
-                                   repbody->oa.o_size, repbody->oa.o_blocks);
+                                   repbody->oa.o_size, repbody->oa.o_blocks, oti);
         RETURN(0);
 }
 
-static int ost_setattr(struct ptlrpc_request *req)
+static int ost_setattr(struct ptlrpc_request *req, struct obd_trans_info *oti)
 {
         struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
         struct ost_body *body, *repbody;
@@ -207,7 +205,7 @@ static int ost_setattr(struct ptlrpc_request *req)
         repbody = lustre_msg_buf(req->rq_repmsg, 0);
         /* FIXME: unpack only valid fields instead of memcpy, endianness */
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
-        req->rq_status = obd_setattr(conn, &repbody->oa, NULL);
+        req->rq_status = obd_setattr(conn, &repbody->oa, NULL, oti);
         RETURN(0);
 }
 
@@ -232,7 +230,8 @@ static int ost_brw_read(struct ptlrpc_request *req)
         struct ost_body *body;
         struct l_wait_info lwi;
         void *desc_priv = NULL;
-        int rc, cmd, i, j, objcount, niocount, size = sizeof(*body);
+        int cmd, i, j, objcount, niocount, size = sizeof(*body);
+        int rc = 0;
         ENTRY;
 
         body = lustre_msg_buf(req->rq_reqmsg, 0);
@@ -244,7 +243,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
         cmd = OBD_BRW_READ;
 
         if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK))
-                GOTO(out, rc = 0);
+                GOTO(out, req->rq_status = -EIO);
 
         for (i = 0; i < objcount; i++) {
                 ost_unpack_ioo(&tmp1, &ioo);
@@ -252,8 +251,10 @@ static int ost_brw_read(struct ptlrpc_request *req)
                         LBUG();
                         GOTO(out, rc = -EFAULT);
                 }
-                for (j = 0; j < ioo->ioo_bufcnt; j++)
+                for (j = 0; j < ioo->ioo_bufcnt; j++) {
+                        /* XXX verify niobuf[j].offset > niobuf[j-1].offset */
                         ost_unpack_niobuf(&tmp2, &remote_nb);
+                }
         }
 
         OBD_ALLOC(local_nb, sizeof(*local_nb) * niocount);
@@ -264,10 +265,10 @@ static int ost_brw_read(struct ptlrpc_request *req)
         ioo = lustre_msg_buf(req->rq_reqmsg, 1);
         remote_nb = lustre_msg_buf(req->rq_reqmsg, 2);
         req->rq_status = obd_preprw(cmd, conn, objcount, ioo, niocount,
-                                    remote_nb, local_nb, &desc_priv);
+                                    remote_nb, local_nb, &desc_priv, NULL);
 
         if (req->rq_status)
-                GOTO(out, rc = 0);
+                GOTO(out, req->rq_status);
 
         desc = ptlrpc_prep_bulk(req->rq_connection);
         if (desc == NULL)
@@ -285,7 +286,7 @@ static int ost_brw_read(struct ptlrpc_request *req)
                 bulk->bp_buflen = remote_nb[i].len;
         }
 
-        rc = ptlrpc_send_bulk(desc);
+        rc = ptlrpc_bulk_put(desc);
         if (rc)
                 GOTO(out_bulk, rc);
 
@@ -298,15 +299,19 @@ static int ost_brw_read(struct ptlrpc_request *req)
         }
 
         req->rq_status = obd_commitrw(cmd, conn, objcount, ioo, niocount,
-                                      local_nb, desc_priv);
-
-        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+                                      local_nb, desc_priv, NULL);
 
 out_bulk:
         ptlrpc_bulk_decref(desc);
 out_local:
         OBD_FREE(local_nb, sizeof(*local_nb) * niocount);
 out:
+        if (!rc)
+                /* Hmm, we don't return anything in this reply buffer?
+                 * We should be returning per-page status codes and also
+                 * per-object size, blocks count, mtime, ctime.  (bug 593) */
+                rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
+                                     &req->rq_repmsg);
         if (rc)
                 ptlrpc_error(req->rq_svc, req);
         else
@@ -314,7 +319,7 @@ out:
         RETURN(rc);
 }
 
-static int ost_brw_write(struct ptlrpc_request *req)
+static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
 {
         struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
         struct ptlrpc_bulk_desc *desc;
@@ -322,16 +327,12 @@ static int ost_brw_write(struct ptlrpc_request *req)
         void *tmp2, *end2;
         struct niobuf_remote *remote_nb;
         struct niobuf_local *local_nb = NULL;
-        struct niobuf_local *lnb;
         struct obd_ioobj *ioo;
         struct ost_body *body;
         struct l_wait_info lwi;
-        int rc, cmd, i, j, objcount, niocount;
-        int size[2] = {sizeof(*body)};
         void *desc_priv = NULL;
-        int reply_sent = 0;
-        struct ptlrpc_service *srv;
-        __u32 xid;
+        int cmd, i, j, objcount, niocount, size = sizeof(*body);
+        int rc = 0;
         ENTRY;
 
         body = lustre_msg_buf(req->rq_reqmsg, 0);
@@ -342,117 +343,97 @@ static int ost_brw_write(struct ptlrpc_request *req)
         niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
         cmd = OBD_BRW_WRITE;
 
+        if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK))
+                GOTO(out, req->rq_status = -EIO);
+
         for (i = 0; i < objcount; i++) {
-                ost_unpack_ioo((void *)&tmp1, &ioo);
+                ost_unpack_ioo(&tmp1, &ioo);
                 if (tmp2 + ioo->ioo_bufcnt > end2) {
-                        rc = -EFAULT;
-                        break;
+                        LBUG();
+                        GOTO(out, rc = -EFAULT);
+                }
+                for (j = 0; j < ioo->ioo_bufcnt; j++) {
+                        /* XXX verify niobuf[j].offset > niobuf[j-1].offset */
+                        ost_unpack_niobuf(&tmp2, &remote_nb);
                 }
-                for (j = 0; j < ioo->ioo_bufcnt; j++)
-                        ost_unpack_niobuf((void *)&tmp2, &remote_nb);
         }
 
-        size[1] = niocount * sizeof(*remote_nb);
-        rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg);
-        if (rc)
-                GOTO(out, rc);
-        remote_nb = lustre_msg_buf(req->rq_repmsg, 1);
-
-        OBD_ALLOC(local_nb, niocount * sizeof(*local_nb));
+        OBD_ALLOC(local_nb, sizeof(*local_nb)* niocount);
         if (local_nb == NULL)
                 GOTO(out, rc = -ENOMEM);
 
         /* The unpackers move tmp1 and tmp2, so reset them before using */
-        tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
-        tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
-        req->rq_status = obd_preprw(cmd, conn, objcount, tmp1, niocount, tmp2,
-                                    local_nb, &desc_priv);
-        if (req->rq_status)
-                GOTO(out_free, rc = 0); /* XXX is this correct? */
+        ioo = lustre_msg_buf(req->rq_reqmsg, 1);
+        remote_nb = lustre_msg_buf(req->rq_reqmsg, 2);
+        req->rq_status = obd_preprw(cmd, conn, objcount, ioo, niocount,
+                                    remote_nb, local_nb, &desc_priv, oti);
 
-        if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK))
-                GOTO(fail_preprw, rc = 0);
+        if (req->rq_status)
+                GOTO(out, rc = 0);
 
         desc = ptlrpc_prep_bulk(req->rq_connection);
         if (desc == NULL)
-                GOTO(fail_preprw, rc = -ENOMEM);
+                GOTO(out_local, rc = -ENOMEM);
         desc->bd_ptl_ev_hdlr = NULL;
         desc->bd_portal = OSC_BULK_PORTAL;
-        desc->bd_desc_private = desc_priv;
-        memcpy(&(desc->bd_conn), &conn, sizeof(conn));
-
-        srv = req->rq_obd->u.ost.ost_service;
-        spin_lock(&srv->srv_lock);
-        xid = srv->srv_xid++;                   /* single xid for all pages */
-        spin_unlock(&srv->srv_lock);
 
-        for (i = 0, lnb = local_nb; i < niocount; i++, lnb++) {
-                struct ptlrpc_bulk_page *bulk;
+        for (i = 0; i < niocount; i++) {
+                struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
 
-                bulk = ptlrpc_prep_bulk_page(desc);
                 if (bulk == NULL)
-                        GOTO(fail_bulk, rc = -ENOMEM);
-
-                bulk->bp_xid = xid;              /* single xid for all pages */
-
-                bulk->bp_buf = lnb->addr;
-                bulk->bp_page = lnb->page;
-                bulk->bp_flags = lnb->flags;
-                bulk->bp_dentry = lnb->dentry;
-                bulk->bp_buflen = lnb->len;
-                bulk->bp_cb = NULL;
-
-                /* this advances remote_nb */
-                ost_pack_niobuf((void **)&remote_nb, lnb->offset, lnb->len, 0,
-                                bulk->bp_xid);
+                        GOTO(out_bulk, rc = -ENOMEM);
+                bulk->bp_xid = remote_nb[i].xid;
+                bulk->bp_buf = local_nb[i].addr;
+                bulk->bp_buflen = remote_nb[i].len;
         }
 
-        rc = ptlrpc_register_bulk(desc);
+        rc = ptlrpc_bulk_get(desc);
         if (rc)
-                GOTO(fail_bulk, rc);
-
-        reply_sent = 1;
-        ptlrpc_reply(req->rq_svc, req);
+                GOTO(out_bulk, rc);
 
         lwi = LWI_TIMEOUT(obd_timeout * HZ, ost_bulk_timeout, desc);
         rc = l_wait_event(desc->bd_waitq, desc->bd_flags & PTL_BULK_FL_RCVD,
                           &lwi);
         if (rc) {
-                if (rc != -ETIMEDOUT)
-                        LBUG();
+                LASSERT(rc == -ETIMEDOUT);
                 ptlrpc_abort_bulk(desc);
                 recovd_conn_fail(desc->bd_connection);
-                obd_commitrw(cmd, conn, objcount, tmp1, niocount, local_nb,
-                             desc->bd_desc_private);
-        } else {
-                rc = obd_commitrw(cmd, conn, objcount, tmp1, niocount, local_nb,
-                                  desc->bd_desc_private);
+                obd_commitrw(cmd, conn, objcount, ioo, niocount, local_nb,
+                             desc_priv, oti);
+                GOTO(out_bulk, rc);
         }
 
+        req->rq_status = obd_commitrw(cmd, conn, objcount, ioo, niocount,
+                                      local_nb, desc_priv, oti);
+
+ out_bulk:
         ptlrpc_bulk_decref(desc);
-        EXIT;
-out_free:
-        OBD_FREE(local_nb, niocount * sizeof(*local_nb));
-out:
-        if (!reply_sent) {
-                if (rc) {
-                        OBD_FREE(req->rq_repmsg, req->rq_replen);
-                        req->rq_repmsg = NULL;
-                        ptlrpc_error(req->rq_svc, req);
-                } else
-                        ptlrpc_reply(req->rq_svc, req);
-        }
-        return rc;
+ out_local:
+        OBD_FREE(local_nb, sizeof(*local_nb) * niocount);
+ out:
+        if (!rc)
+                /* Hmm, we don't return anything in this reply buffer?
+                 * We should be returning per-page status codes and also
+                 * per-object size, blocks count, mtime, ctime.  (bug 593) */
+                rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
+                                     &req->rq_repmsg);
+        if (rc)
+                ptlrpc_error(req->rq_svc, req);
+        else
+                rc = ptlrpc_reply(req->rq_svc, req);
+        RETURN(rc);
+}
 
-fail_bulk:
-        ptlrpc_free_bulk(desc);
-fail_preprw:
-        /* FIXME: how do we undo the preprw? - answer = call commitrw */
-        goto out_free;
+inline void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
+{
+        if (oti && req->rq_repmsg)
+                req->rq_repmsg->transno = HTON__u64(oti->oti_transno);
+        EXIT;
 }
 
 static int ost_handle(struct ptlrpc_request *req)
 {
+        struct obd_trans_info trans_info = { 0, }, *oti = &trans_info;
         int rc;
         ENTRY;
 
@@ -462,8 +443,7 @@ static int ost_handle(struct ptlrpc_request *req)
                 GOTO(out, rc);
         }
 
-        if (req->rq_reqmsg->opc != OST_CONNECT &&
-            req->rq_export == NULL) {
+        if (req->rq_reqmsg->opc != OST_CONNECT && req->rq_export == NULL) {
                 CERROR("lustre_ost: operation %d on unconnected OST\n",
                        req->rq_reqmsg->opc);
                 req->rq_status = -ENOTCONN;
@@ -487,12 +467,12 @@ static int ost_handle(struct ptlrpc_request *req)
         case OST_CREATE:
                 CDEBUG(D_INODE, "create\n");
                 OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0);
-                rc = ost_create(req);
+                rc = ost_create(req, oti);
                 break;
         case OST_DESTROY:
                 CDEBUG(D_INODE, "destroy\n");
                 OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0);
-                rc = ost_destroy(req);
+                rc = ost_destroy(req, oti);
                 break;
         case OST_GETATTR:
                 CDEBUG(D_INODE, "getattr\n");
@@ -502,22 +482,22 @@ static int ost_handle(struct ptlrpc_request *req)
         case OST_SETATTR:
                 CDEBUG(D_INODE, "setattr\n");
                 OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0);
-                rc = ost_setattr(req);
+                rc = ost_setattr(req, oti);
                 break;
         case OST_OPEN:
                 CDEBUG(D_INODE, "open\n");
                 OBD_FAIL_RETURN(OBD_FAIL_OST_OPEN_NET, 0);
-                rc = ost_open(req);
+                rc = ost_open(req, oti);
                 break;
         case OST_CLOSE:
                 CDEBUG(D_INODE, "close\n");
                 OBD_FAIL_RETURN(OBD_FAIL_OST_CLOSE_NET, 0);
-                rc = ost_close(req);
+                rc = ost_close(req, oti);
                 break;
         case OST_WRITE:
                 CDEBUG(D_INODE, "write\n");
                 OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
-                rc = ost_brw_write(req);
+                rc = ost_brw_write(req, oti);
                 /* ost_brw sends its own replies */
                 RETURN(rc);
         case OST_READ:
@@ -529,7 +509,7 @@ static int ost_handle(struct ptlrpc_request *req)
         case OST_PUNCH:
                 CDEBUG(D_INODE, "punch\n");
                 OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
-                rc = ost_punch(req);
+                rc = ost_punch(req, oti);
                 break;
         case OST_STATFS:
                 CDEBUG(D_INODE, "statfs\n");
@@ -539,7 +519,8 @@ static int ost_handle(struct ptlrpc_request *req)
         case LDLM_ENQUEUE:
                 CDEBUG(D_INODE, "enqueue\n");
                 OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
-                rc = ldlm_handle_enqueue(req);
+                rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
+                                         ldlm_server_blocking_ast);
                 break;
         case LDLM_CONVERT:
                 CDEBUG(D_INODE, "convert\n");
@@ -565,6 +546,20 @@ static int ost_handle(struct ptlrpc_request *req)
         }
 
         EXIT;
+        /* If we're DISCONNECTing, the export_data is already freed */
+        if (!rc && req->rq_reqmsg->opc != OST_DISCONNECT) {
+                struct obd_device *obd  = req->rq_export->exp_obd;
+                if ((obd->obd_flags & OBD_NO_TRANSNO) == 0) {
+                        req->rq_repmsg->last_committed =
+                                HTON__u64(obd->obd_last_committed);
+                } else {
+                        DEBUG_REQ(D_IOCTL, req,
+                                  "not sending last_committed update");
+                }
+                CDEBUG(D_INFO, "last_committed "LPU64", xid "LPX64"\n",
+                       obd->obd_last_committed, HTON__u64(req->rq_xid));
+        }
+
 out:
         //req->rq_status = rc;
         if (rc) {
@@ -575,51 +570,28 @@ out:
                 CDEBUG(D_INODE, "sending reply\n");
                 if (req->rq_repmsg == NULL)
                         CERROR("handler for opcode %d returned rc=0 without "
-                               "creating rq_repmsg; needs to return rc != "
-                               "0!\n", req->rq_reqmsg->opc);
+                               "creating rq_repmsg; needs to return rc != 0!\n",
+                               req->rq_reqmsg->opc);
+                else
+                        oti_to_request(oti, req);
                 ptlrpc_reply(req->rq_svc, req);
         }
 
         return 0;
 }
 
-/* mount the file system (secretly) */
 static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
 {
-        struct obd_ioctl_data* data = buf;
         struct ost_obd *ost = &obddev->u.ost;
-        struct obd_device *tgt;
+        struct obd_uuid self = { "self" };
         int err;
         int i;
         ENTRY;
 
-        if (data->ioc_inllen1 < 1) {
-                CERROR("requires a TARGET OBD UUID\n");
-                RETURN(-EINVAL);
-        }
-        if (data->ioc_inllen1 > 37) {
-                CERROR("OBD UUID must be less than 38 characters\n");
-                RETURN(-EINVAL);
-        }
-
-        tgt = class_uuid2obd(data->ioc_inlbuf1);
-        if (!tgt || !(tgt->obd_flags & OBD_ATTACHED) ||
-            !(tgt->obd_flags & OBD_SET_UP)) {
-                CERROR("device not attached or not set up (%d)\n",
-                       data->ioc_dev);
-                RETURN(err = -EINVAL);
-        }
-
-        err = obd_connect(&ost->ost_conn, tgt, NULL, NULL, NULL);
-        if (err) {
-                CERROR("fail to connect to device %d\n", data->ioc_dev);
-                RETURN(err);
-        }
-
         ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS,
                                            OST_BUFSIZE, OST_MAXREQSIZE,
                                            OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
-                                           "self", ost_handle, "ost");
+                                           &self, ost_handle, "ost");
         if (!ost->ost_service) {
                 CERROR("failed to start service\n");
                 GOTO(error_disc, err = -ENOMEM);
@@ -638,40 +610,33 @@ static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
         RETURN(0);
 
 error_disc:
-        obd_disconnect(&ost->ost_conn);
         RETURN(err);
 }
 
 static int ost_cleanup(struct obd_device * obddev)
 {
         struct ost_obd *ost = &obddev->u.ost;
-        int err;
+        int err = 0;
 
         ENTRY;
 
-        if ( !list_empty(&obddev->obd_exports) ) {
-                CERROR("still has clients!\n");
-                RETURN(-EBUSY);
-        }
-
         ptlrpc_stop_all_threads(ost->ost_service);
         ptlrpc_unregister_service(ost->ost_service);
 
-        err = obd_disconnect(&ost->ost_conn);
-        if (err)
-                CERROR("lustre ost: fail to disconnect device\n");
-
         RETURN(err);
 }
 
 int ost_attach(struct obd_device *dev, obd_count len, void *data)
 {
-        return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_vars(&lvars);
+        return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
 int ost_detach(struct obd_device *dev)
 {
-        return lprocfs_dereg_obd(dev);
+        return lprocfs_obd_detach(dev);
 }
 
 /* This is so similar to mds_connect that it makes my heart weep: we should
@@ -679,7 +644,7 @@ int ost_detach(struct obd_device *dev)
  * target_handle_connect.
  */
 static int ost_connect(struct lustre_handle *conn,
-                       struct obd_device *obd, obd_uuid_t cluuid,
+                       struct obd_device *obd, struct obd_uuid *cluuid,
                        struct recovd_obd *recovd,
                        ptlrpc_recovery_cb_t recover)
 {
@@ -693,14 +658,15 @@ static int ost_connect(struct lustre_handle *conn,
                 RETURN(-EINVAL);
 
         /* lctl gets a backstage, all-access pass. */
-        if (!strcmp(cluuid, "OBD_CLASS_UUID"))
+        if (!strcmp(cluuid->uuid, "OBD_CLASS_UUID"))
                 goto dont_check_exports;
 
         spin_lock(&obd->obd_dev_lock);
         list_for_each(p, &obd->obd_exports) {
                 exp = list_entry(p, struct obd_export, exp_obd_chain);
                 oed = &exp->exp_ost_data;
-                if (!memcmp(cluuid, oed->oed_uuid, sizeof oed->oed_uuid)) {
+                if (!memcmp(cluuid->uuid, oed->oed_uuid.uuid, 
+                            sizeof(oed->oed_uuid.uuid))) {
                         spin_unlock(&obd->obd_dev_lock);
                         LASSERT(exp->exp_obd == obd);
 
@@ -716,12 +682,11 @@ static int ost_connect(struct lustre_handle *conn,
         LASSERT(exp);
 
         oed = &exp->exp_ost_data;
-        memcpy(oed->oed_uuid, cluuid, sizeof oed->oed_uuid);
+        memcpy(oed->oed_uuid.uuid, cluuid->uuid, sizeof(oed->oed_uuid.uuid));
 
         RETURN(0);
 }
 
-
 /* use obd ops to offer management infrastructure */
 static struct obd_ops ost_obd_ops = {
         o_owner:        THIS_MODULE,
@@ -734,12 +699,12 @@ static struct obd_ops ost_obd_ops = {
 
 static int __init ost_init(void)
 {
-        int rc;
-
-        rc = class_register_type(&ost_obd_ops, status_class_var,
-                                 LUSTRE_OST_NAME);
-        RETURN(rc);
+        struct lprocfs_static_vars lvars;
+        ENTRY;
 
+        lprocfs_init_vars(&lvars);
+        RETURN(class_register_type(&ost_obd_ops, lvars.module_vars,
+                                   LUSTRE_OST_NAME));
 }
 
 static void __exit ost_exit(void)
index 4a79343..70ea9e4 100644 (file)
@@ -1,7 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Zach Brown <zab@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -49,6 +50,7 @@
 #define LOCAL_END_REQUEST
 #include <linux/blk.h>
 #include <linux/blkdev.h>
+#include <linux/blkpg.h>
 #include <linux/devfs_fs_kernel.h>
 
 static int ptlbd_size_size[PTLBD_MAX_MINOR];
@@ -106,6 +108,7 @@ static int ptlbd_ioctl(struct inode *inode, struct file *file,
                 unsigned int cmd, unsigned long arg)
 {
         struct ptlbd_obd *ptlbd;
+        int ret;
 
         if ( ! capable(CAP_SYS_ADMIN) )
                 RETURN(-EPERM);
@@ -114,9 +117,16 @@ static int ptlbd_ioctl(struct inode *inode, struct file *file,
         if ( IS_ERR(ptlbd) )
                 RETURN( PTR_ERR(ptlbd) );
 
-        /* XXX getattr{,64} */
+        switch(cmd) {
+                case BLKFLSBUF:
+                        ret = blk_ioctl(inode->i_rdev, cmd, arg);
+                        break;
+                default:
+                        ret = -EINVAL;
+                        break;
+        }
 
-        RETURN(-EINVAL);
+        RETURN(ret);
 }
 
 static int ptlbd_release(struct inode *inode, struct file *file)
index d57e001..a6580e0 100644 (file)
@@ -1,7 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Zach Brown <zab@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -36,7 +37,7 @@ static int ptlbd_cl_setup(struct obd_device *obddev, obd_count len, void *buf)
         struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
         struct obd_import *imp = &ptlbd->bd_import;
         struct obd_ioctl_data* data = buf;
-        obd_uuid_t server_uuid;
+        struct obd_uuid server_uuid;
         ENTRY;
 
         if ( ptlbd->bd_import.imp_connection != NULL )
@@ -52,10 +53,9 @@ static int ptlbd_cl_setup(struct obd_device *obddev, obd_count len, void *buf)
                 RETURN(-EINVAL);
         }
 
-        memcpy(server_uuid, data->ioc_inlbuf1, MIN(data->ioc_inllen1,
-                                                   sizeof(server_uuid)));
+        obd_str2uuid(&server_uuid, data->ioc_inlbuf1);
 
-        imp->imp_connection = ptlrpc_uuid_to_connection(server_uuid);
+        imp->imp_connection = ptlrpc_uuid_to_connection(&server_uuid);
         if (!imp->imp_connection)
                 RETURN(-ENOENT);
 
@@ -69,7 +69,6 @@ static int ptlbd_cl_setup(struct obd_device *obddev, obd_count len, void *buf)
         INIT_LIST_HEAD(&imp->imp_chain);
         imp->imp_last_xid = 0;
         imp->imp_max_transno = 0;
-        imp->imp_peer_last_xid = 0;
         imp->imp_peer_committed_transno = 0;
         imp->imp_level = LUSTRE_CONN_FULL;
 
@@ -95,7 +94,7 @@ static int ptlbd_cl_cleanup(struct obd_device *obddev)
 
 #if 0
 static int ptlbd_cl_connect(struct lustre_handle *conn, struct obd_device *obd,
-                        obd_uuid_t cluuid, struct recovd_obd *recovd,
+                        struct obd_uuid cluuid, struct recovd_obd *recovd,
                         ptlrpc_recovery_cb_t recover)
 {
         struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
@@ -104,7 +103,7 @@ static int ptlbd_cl_connect(struct lustre_handle *conn, struct obd_device *obd,
         ENTRY;
 
         rc = class_connect(conn, obd, cluuid);
-        if (rc) 
+        if (rc)
                 RETURN(rc);
 
         INIT_LIST_HEAD(&imp->imp_chain);
@@ -130,9 +129,10 @@ static struct obd_ops ptlbd_cl_obd_ops = {
 
 int ptlbd_cl_init(void)
 {
-        extern struct lprocfs_vars status_class_var[];
+        struct lprocfs_static_vars lvars;
 
-        return class_register_type(&ptlbd_cl_obd_ops, status_class_var,
+        lprocfs_init_vars(&lvars);
+        return class_register_type(&ptlbd_cl_obd_ops, lvars.module_vars,
                                    OBD_PTLBD_CL_DEVICENAME);
 }
 
index 5ff5177..62c0236 100644 (file)
@@ -1,7 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Zach Brown <zab@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
 #include <linux/lprocfs_status.h>
 #include <linux/obd_ptlbd.h>
 
-static __u32 get_next_xid(struct obd_import *imp)
-{
-        unsigned long flags;
-        __u32 xid;
-        spin_lock_irqsave(&imp->imp_lock, flags);
-        xid = ++imp->imp_last_xid;
-        spin_unlock_irqrestore(&imp->imp_lock, flags);
-        return xid;
-}
-
-static int ptlbd_brw_callback(struct obd_brw_set *set, int phase)
-{
-        ENTRY;
-        RETURN(0);
-}
-
-static void decref_bulk_desc(void *data)
-{
-        struct ptlrpc_bulk_desc *desc = data;
-        ENTRY;
-
-        ptlrpc_bulk_decref(desc);
-        EXIT;
-}
-
-/*  this is the callback function which is invoked by the Portals
- *  event handler associated with the bulk_sink queue and bulk_source queue. 
- */
-static void ptlbd_ptl_ev_hdlr(struct ptlrpc_bulk_desc *desc)
-{
-        ENTRY;
-
-        LASSERT(desc->bd_brw_set != NULL);
-        LASSERT(desc->bd_brw_set->brw_callback != NULL);
-
-        desc->bd_brw_set->brw_callback(desc->bd_brw_set, CB_PHASE_FINISH);
-
-        prepare_work(&desc->bd_queue, decref_bulk_desc, desc);
-        schedule_work(&desc->bd_queue);
-
-        EXIT;
-}
-
-
-int ptlbd_write_put_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd, 
-                struct buffer_head *first_bh, unsigned int page_count)
-{
-        struct obd_import *imp = &ptlbd->bd_import;
-        struct ptlbd_op *op;
-        struct ptlbd_niob *niob, *niobs;
-        struct ptlbd_rsp *rsp;
-        struct ptlrpc_request *req;
-        struct ptlrpc_bulk_desc *desc;
-        struct buffer_head *bh;
-        int rc, size[2];
-        struct obd_brw_set *set;
-        ENTRY;
-
-        size[0] = sizeof(struct ptlbd_op);
-        size[1] = page_count * sizeof(struct ptlbd_niob);
-
-        req = ptlrpc_prep_req(imp, cmd, 2, size, NULL);
-        if (!req)
-                GOTO(out, rc = -ENOMEM);
-        /* XXX might not need these */
-        req->rq_request_portal = PTLBD_REQUEST_PORTAL;
-        req->rq_reply_portal = PTLBD_REPLY_PORTAL;
-
-        op = lustre_msg_buf(req->rq_reqmsg, 0);
-        niobs = lustre_msg_buf(req->rq_reqmsg, 1);
-
-        /* XXX pack */
-        op->op_cmd = cmd;
-        op->op_lun = 0;
-        op->op_niob_cnt = page_count;
-        op->op__padding = 0;
-        op->op_block_cnt = page_count;
-
-        desc = ptlrpc_prep_bulk(imp->imp_connection);
-        if ( desc == NULL )
-                GOTO(out_req, rc = -ENOMEM);
-        desc->bd_portal = PTLBD_BULK_PORTAL;
-        desc->bd_ptl_ev_hdlr = ptlbd_ptl_ev_hdlr;
-
-        /* XXX someone needs to free this */
-        set = obd_brw_set_new();
-        if (set == NULL)
-                GOTO(out_desc, rc = -ENOMEM);
-
-        set->brw_callback = ptlbd_brw_callback;
-#if 0
-        xid = get_next_xid(imp);
-#endif
-
-        for ( niob = niobs, bh = first_bh ; bh ; bh = bh->b_next, niob++ ) {
-#if 0
-                struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
-                if (bulk == NULL)
-                        GOTO(out_set, rc = -ENOMEM);
-#endif
-
-#if 0
-                niob->n_xid = xid;
-#endif
-                niob->n_block_nr = bh->b_blocknr;
-                niob->n_offset = bh_offset(bh);
-                niob->n_length = bh->b_size;
-
-
-#if 0
-                bulk->bp_xid = xid;
-                bulk->bp_buf = bh->b_data;
-                bulk->bp_page = bh->b_page;
-                bulk->bp_buflen = bh->b_size;
-#endif
-        }
-
-
-        size[0] = sizeof(struct ptlbd_rsp);
-        size[1] = sizeof(struct ptlbd_niob) * page_count;
-        req->rq_replen = lustre_msg_size(2, size);
-
-        /* XXX find out how we're really supposed to manage levels */
-        req->rq_level = imp->imp_level;
-        rc = ptlrpc_queue_wait(req);
-
-        rsp = lustre_msg_buf(req->rq_repmsg, 0);
-
-        niob = lustre_msg_buf(req->rq_repmsg, 1);
-        /* XXX check that op->num matches ours */
-        for ( bh = first_bh ; bh ; bh = bh->b_next, niob++ ) {
-                struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
-                if (bulk == NULL)
-                        GOTO(out_set, rc = -ENOMEM);
-
-                bulk->bp_xid = niob->n_xid;
-                bulk->bp_page = bh->b_page;
-                bulk->bp_buf = bh->b_data;
-                bulk->bp_buflen = bh->b_size;
-        }
-
-        obd_brw_set_add(set, desc);
-        rc = ptlrpc_send_bulk(desc);
-
-        /* if there's an error, no brw_finish called, just like
-         * osc_brw_read */
-
-        GOTO(out_req, rc);
-
-out_set:
-        obd_brw_set_free(set);
-out_desc:
-        ptlrpc_bulk_decref(desc);
-out_req:
-        ptlrpc_req_finished(req);
-out:
-        RETURN(rc);
-}
-
-int ptlbd_read_put_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd, 
-                struct buffer_head *first_bh, unsigned int page_count)
+int ptlbd_send_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd, 
+                struct buffer_head *first_bh)
 {
         struct obd_import *imp = &ptlbd->bd_import;
         struct ptlbd_op *op;
@@ -201,20 +42,23 @@ int ptlbd_read_put_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd,
         struct ptlrpc_request *req;
         struct ptlrpc_bulk_desc *desc;
         struct buffer_head *bh;
+        unsigned long flags;
+        unsigned int page_count;
         int rc, rep_size, size[2];
-        struct obd_brw_set *set;
         __u32 xid;
         ENTRY;
 
+        LASSERT(cmd == PTLBD_READ || cmd == PTLBD_WRITE);
+
+        for ( page_count = 0, bh = first_bh ; bh ; bh = bh->b_next )
+                page_count++;
+
         size[0] = sizeof(struct ptlbd_op);
         size[1] = page_count * sizeof(struct ptlbd_niob);
 
         req = ptlrpc_prep_req(imp, cmd, 2, size, NULL);
         if (!req)
-                GOTO(out, rc = -ENOMEM);
-        /* XXX might not need these? */
-        req->rq_request_portal = PTLBD_REQUEST_PORTAL;
-        req->rq_reply_portal = PTLBD_REPLY_PORTAL;
+                RETURN(-ENOMEM);
 
         op = lustre_msg_buf(req->rq_reqmsg, 0);
         niobs = lustre_msg_buf(req->rq_reqmsg, 1);
@@ -230,21 +74,16 @@ int ptlbd_read_put_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd,
         if ( desc == NULL )
                 GOTO(out_req, rc = -ENOMEM);
         desc->bd_portal = PTLBD_BULK_PORTAL;
-        desc->bd_ptl_ev_hdlr = ptlbd_ptl_ev_hdlr;
-
-        /* XXX someone needs to free this */
-        set = obd_brw_set_new();
-        if (set == NULL)
-                GOTO(out_desc, rc = -ENOMEM);
-
-        set->brw_callback = ptlbd_brw_callback;
+        desc->bd_ptl_ev_hdlr = NULL;
 
-        xid = get_next_xid(imp);
+        spin_lock_irqsave(&imp->imp_lock, flags);
+        xid = ++imp->imp_last_xid;
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
 
         for ( niob = niobs, bh = first_bh ; bh ; bh = bh->b_next, niob++ ) {
                 struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
                 if (bulk == NULL)
-                        GOTO(out_set, rc = -ENOMEM);
+                        GOTO(out_req, rc = -ENOMEM);
 
                 niob->n_xid = xid;
                 niob->n_block_nr = bh->b_blocknr;
@@ -257,12 +96,13 @@ int ptlbd_read_put_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd,
                 bulk->bp_buflen = bh->b_size;
         }
 
-        /* XXX put in OBD_FAIL_CHECK for ptlbd? */
-        rc = ptlrpc_register_bulk(desc);
-        if (rc)
-                GOTO(out_set, rc);
+        if ( cmd == PTLBD_READ )
+                rc = ptlrpc_register_bulk_put(desc);
+        else
+                rc = ptlrpc_register_bulk_get(desc);
 
-        obd_brw_set_add(set, desc);
+        if (rc)
+                GOTO(out_desc, rc);
 
         rep_size = sizeof(struct ptlbd_rsp);
         req->rq_replen = lustre_msg_size(1, &rep_size);
@@ -271,48 +111,15 @@ int ptlbd_read_put_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd,
         req->rq_level = imp->imp_level;
         rc = ptlrpc_queue_wait(req);
 
-        rsp = lustre_msg_buf(req->rq_repmsg, 0);
-
-        /* if there's an error, no brw_finish called, just like
-         * osc_brw_read */
-
-        GOTO(out_req, rc);
+        if ( rc == 0 ) {
+                rsp = lustre_msg_buf(req->rq_repmsg, 0);
+                /* XXX do stuff */
+        }
 
-out_set:
-        obd_brw_set_free(set);
 out_desc:
         ptlrpc_bulk_decref(desc);
 out_req:
         ptlrpc_req_finished(req);
-out:
-        RETURN(rc);
-}
-
-int ptlbd_send_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd, 
-                struct buffer_head *first_bh)
-{
-        unsigned int page_count = 0;
-        struct buffer_head *bh;
-        int rc;
-        ENTRY;
-
-        for ( page_count = 0, bh = first_bh ; bh ; bh = bh->b_next )
-                page_count++;
-
-        switch (cmd) {
-                case PTLBD_READ:
-                        rc = ptlbd_read_put_req(ptlbd, cmd, 
-                                        first_bh, page_count);
-                        break;
-                case PTLBD_WRITE:
-                        rc = ptlbd_write_put_req(ptlbd, cmd, 
-                                        first_bh, page_count);
-                        break;
-                default:
-                        rc = -EINVAL;
-                        break;
-        };
-
         RETURN(rc);
 }
 
@@ -326,108 +133,63 @@ static int ptlbd_bulk_timeout(void *data)
         RETURN(1);
 }
 
-#define SILLY_MAX 2048
-static struct page *pages[SILLY_MAX] = {NULL,};
-
-static struct page * fake_page(int block_nr)
-{
-        if ( block_nr >= SILLY_MAX )
-                return NULL;
-
-        if (pages[block_nr] == NULL) {
-                void *vaddr = (void *)get_free_page(GFP_KERNEL);
-                pages[block_nr] = virt_to_page(vaddr);
-        } 
-        return pages[block_nr];
-}
-
-static int ptlbd_put_write(struct ptlrpc_request *req)
+void ptlbd_do_filp(struct file *filp, int op, struct ptlbd_niob *niobs, 
+                int page_count, struct list_head *page_list)
 {
-        struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
-        struct ptlbd_op *op;
-        struct ptlbd_niob *reply_niob, *request_niob;
-        struct ptlbd_rsp *rsp;
-        struct ptlrpc_bulk_desc *desc;
-        struct ptlrpc_service *srv;
-        struct l_wait_info lwi;
-        int size[2];
-        int i, page_count, rc;
-        __u32 xid;
+        mm_segment_t old_fs;
+        struct list_head *pos;
+        ENTRY;
 
-        op = lustre_msg_buf(req->rq_reqmsg, 0);
-        request_niob = lustre_msg_buf(req->rq_reqmsg, 1);
-        page_count = req->rq_reqmsg->buflens[1] / sizeof(struct ptlbd_niob);
+        old_fs = get_fs();
+        set_fs(KERNEL_DS);
 
-        size[0] = sizeof(struct ptlbd_rsp);
-        size[1] = sizeof(struct ptlbd_niob) * page_count;
-        rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg);
-        if (rc)
-                GOTO(out, rc);
-        reply_niob = lustre_msg_buf(req->rq_repmsg, 1);
+        list_for_each(pos, page_list) {
+                ssize_t ret;
+                struct page *page = list_entry(pos, struct page, list);
+                loff_t offset = (niobs->n_block_nr << PAGE_SHIFT) + 
+                        niobs->n_offset;
 
-        desc = ptlrpc_prep_bulk(req->rq_connection);
-        if (desc == NULL)
-                GOTO(out, rc = -ENOMEM);
-        desc->bd_ptl_ev_hdlr = NULL;
-        desc->bd_portal = PTLBD_BULK_PORTAL;
-        memcpy(&(desc->bd_conn), &conn, sizeof(conn)); /* XXX what? */
-
-        srv = req->rq_obd->u.ptlbd.ptlbd_service;
-        spin_lock(&srv->srv_lock);
-        xid = srv->srv_xid++;                   /* single xid for all pages */
-        spin_unlock(&srv->srv_lock);
-
-        for ( i = 0; i < page_count; i++) {
-                struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
-                if (bulk == NULL)
-                        GOTO(out_desc, rc = -ENOMEM);
-                        
-                reply_niob[i] = request_niob[i];
-                reply_niob[i].n_xid = xid;
+                if ( op == PTLBD_READ )
+                        ret = filp->f_op->read(filp, page_address(page), 
+                                        niobs->n_length, &offset);
+                else
+                        ret = filp->f_op->write(filp, page_address(page), 
+                                        niobs->n_length, &offset);
 
-                bulk->bp_xid = xid;
-                bulk->bp_page = fake_page(request_niob[i].n_block_nr);
-                bulk->bp_buf = page_address(bulk->bp_page);
-                bulk->bp_buflen = request_niob[i].n_length;
+                niobs++;
         }
 
-        rc = ptlrpc_register_bulk(desc);
-        if ( rc )
-                GOTO(out_desc, rc);
-
-        rsp = lustre_msg_buf(req->rq_reqmsg, 0);
-        rsp->r_status = 42;
-        rsp->r_error_cnt = 13;
-        ptlrpc_reply(req->rq_svc, req);
-
-        /* this synchronization probably isn't good enough */
-        lwi = LWI_TIMEOUT(obd_timeout * HZ, ptlbd_bulk_timeout, desc);
-        rc = l_wait_event(desc->bd_waitq, desc->bd_flags &PTL_BULK_FL_RCVD, 
-                        &lwi);
-
-out_desc:
-        ptlrpc_free_bulk(desc);
-out:
-        RETURN(rc);
+        set_fs(old_fs);
+        EXIT;
 }
 
-static int ptlbd_put_read(struct ptlrpc_request *req)
+int ptlbd_parse_req(struct ptlrpc_request *req)
 {
         struct ptlbd_op *op;
         struct ptlbd_niob *niob, *niobs;
         struct ptlbd_rsp *rsp;
         struct ptlrpc_bulk_desc *desc;
+        struct file *filp = req->rq_obd->u.ptlbd.filp;
         struct l_wait_info lwi;
-        int size[1];
-        int i, page_count, rc;
+        int size[1], wait_flag, i, page_count, rc;
+        struct list_head *pos, *n;
+        LIST_HEAD(tmp_pages);
+        ENTRY;
+
+        rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
+        if ( rc )
+                RETURN(rc);
 
         op = lustre_msg_buf(req->rq_reqmsg, 0);
+        LASSERT(op->op_cmd == PTLBD_READ || op->op_cmd == PTLBD_WRITE);
+
         niobs = lustre_msg_buf(req->rq_reqmsg, 1);
         page_count = req->rq_reqmsg->buflens[1] / sizeof(struct ptlbd_niob);
 
         desc = ptlrpc_prep_bulk(req->rq_connection);
         if (desc == NULL)
                 GOTO(out, rc = -ENOMEM);
+        desc->bd_ptl_ev_hdlr = NULL;
         desc->bd_portal = PTLBD_BULK_PORTAL;
 
         for ( i = 0, niob = niobs ; i < page_count; niob++, i++) {
@@ -435,23 +197,34 @@ static int ptlbd_put_read(struct ptlrpc_request *req)
                 if (bulk == NULL)
                         GOTO(out_bulk, rc = -ENOMEM);
 
+                bulk->bp_page = alloc_page(GFP_KERNEL);
+                if (bulk->bp_page == NULL)
+                        GOTO(out_bulk, rc = -ENOMEM);
+                list_add(&bulk->bp_page->list, &tmp_pages);
+
                 /* 
                  * XXX what about the block number? 
                  */
                 bulk->bp_xid = niob->n_xid;
-                bulk->bp_page = fake_page(niob->n_block_nr);
                 bulk->bp_buf = page_address(bulk->bp_page);
                 bulk->bp_buflen = niob->n_length;
         }
 
-        rc = ptlrpc_send_bulk(desc);
+        if ( op->op_cmd == PTLBD_READ ) {
+                ptlbd_do_filp(filp, PTLBD_READ, niobs, page_count, &tmp_pages);
+                rc = ptlrpc_bulk_put(desc);
+                wait_flag = PTL_BULK_FL_SENT;
+        } else {
+                rc = ptlrpc_bulk_get(desc);
+                wait_flag = PTL_BULK_FL_RCVD;
+        }
+
         if ( rc )
                 GOTO(out_bulk, rc);
 
         /* this synchronization probably isn't good enough */
         lwi = LWI_TIMEOUT(obd_timeout * HZ, ptlbd_bulk_timeout, desc);
-        rc = l_wait_event(desc->bd_waitq, desc->bd_flags &PTL_BULK_FL_SENT, 
-                        &lwi);
+        rc = l_wait_event(desc->bd_waitq, desc->bd_flags & wait_flag, &lwi);
 
         size[0] = sizeof(struct ptlbd_rsp);
         rc = lustre_pack_msg(1, size, NULL, &req->rq_replen, &req->rq_repmsg);
@@ -461,6 +234,8 @@ static int ptlbd_put_read(struct ptlrpc_request *req)
         rsp = lustre_msg_buf(req->rq_repmsg, 0);
         if ( rsp == NULL )
                 GOTO(out, rc = -EINVAL);
+        
+        ptlbd_do_filp(filp, PTLBD_WRITE, niobs, page_count, &tmp_pages);
 
         rsp->r_error_cnt = 42;
         rsp->r_status = 69;
@@ -469,82 +244,12 @@ static int ptlbd_put_read(struct ptlrpc_request *req)
         ptlrpc_reply(req->rq_svc, req);
 
 out_bulk:
-        ptlrpc_free_bulk(desc);
-out:
-        RETURN(rc);
-}
-
-
-int ptlbd_parse_req(struct ptlrpc_request *req)
-{
-        struct ptlbd_op *op;
-        int rc;
-        ENTRY;
-
-        rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
-        if ( rc )
-                RETURN(rc);
-
-        op = lustre_msg_buf(req->rq_reqmsg, 0);
-
-        switch(op->op_cmd) {
-                case PTLBD_READ:
-                        ptlbd_put_read(req);
-                        break;
-                case PTLBD_WRITE:
-                        ptlbd_put_write(req);
-                        break;
-                default:
-                        CERROR("fix this %d\n", op->op_cmd);
-                        break;
+        list_for_each_safe(pos, n, &tmp_pages) {
+                struct page *page = list_entry(pos, struct page, list);
+                list_del(&page->list);
+                __free_page(page);
         }
-
-        RETURN(0);
-}
-
-
-#if 0
-int ptlbd_bh_req(int cmd, struct ptlbd_state *st, struct buffer_head *first_bh)
-{
-        struct obd_brw_set *set = NULL;
-        struct brw_page *pg = NULL;
-        struct buffer_head *bh;
-        int rc, i, pg_bytes = 0;
-        ENTRY;
-
-        for ( bh = first_bh ; bh ; bh = bh->b_reqnext ) 
-                pg_bytes += sizeof(struct brw_page);
-
-        OBD_ALLOC(pg, pg_bytes);
-        if ( pg == NULL )
-                GOTO(out, rc = -ENOMEM);
-
-        set = obd_brw_set_new();
-        if (set == NULL)
-                GOTO(out, rc = -ENOMEM);
-
-        for ( i = 0, bh = first_bh ; bh ; bh = bh->b_reqnext, i++) {
-                pg[i].pg = bh->b_page;
-                pg[i].off = bh_offset(bh);
-                pg[i].count = bh->b_size;
-                pg[i].flag = 0;
-        }
-
-        set->brw_callback = ll_brw_sync_wait;
-        rc = obd_brw(cmd, /* lsm */NULL, num_pages, pg, set);
-        if ( rc )
-                GOTO(out, rc);
-
-        rc = ll_brw_sync_wait(set, CB_PHASE_START);
-        if (rc)
-                CERROR("error from callback: rc = %d\n", rc);
-
+        ptlrpc_bulk_decref(desc);
 out:
-        if ( pg != NULL )
-                OBD_FREE(pg, pg_bytes);
-        if ( set != NULL )
-                obd_brw_set_free(set);
-
-        RETURN(rc); 
+        RETURN(rc);
 }
-#endif
index 422f0e1..78d01a6 100644 (file)
@@ -1,7 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Zach Brown <zab@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
 #include <linux/lprocfs_status.h>
 #include <linux/obd_ptlbd.h>
 
-#if 0
-static int ptlbd_sv_callback(struct ptlrpc_request *req)
-{
-        int rc;
-        ENTRY;
-
-        rc = ptlbd_parse_request(req);
-
-        rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
-        if ( rc )
-                GOTO(out, rc);
-
-        printk("callback got a friggin opc %d\n", req->rq_reqmsg->opc);
-
-out:
-        RETURN(rc);
-}
-#endif
-
 static int ptlbd_sv_already_setup = 1;
 
 static int ptlbd_sv_setup(struct obd_device *obddev, obd_count len, void *buf)
 {
-#if 0
-        struct obd_ioctl_data* data = buf;
-        obd_uuid_t server_uuid;
-#endif
+        struct obd_uuid self_uuid = { "self" };
         struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
         int rc;
         ENTRY;
 
-#if 0
-        if (data->ioc_inllen1 < 1) {
-                CERROR("requires a PTLBD server UUID\n");
-                RETURN(rc = -EINVAL);
-        }
-
-        if (data->ioc_inllen1 > 37) {
-                CERROR("PTLBD server UUID must be less than 38 characters\n");
-                RETURN(rc = -EINVAL);
-        }
-
-        memcpy(server_uuid, data->ioc_inlbuf1, MIN(data->ioc_inllen1,
-                                                   sizeof(server_uuid)));
+        ptlbd->filp = filp_open("/tmp/ptlbd-backing-file-la-la-la", 
+                                        O_RDWR|O_CREAT, 0600);
+        if ( IS_ERR(ptlbd->filp) )
+                RETURN(PTR_ERR(ptlbd->filp));
 
-#endif
         ptlbd->ptlbd_service =
                 ptlrpc_init_svc(PTLBD_NEVENTS, PTLBD_NBUFS, PTLBD_BUFSIZE,
                                 PTLBD_MAXREQSIZE, PTLBD_REQUEST_PORTAL,
-                                PTLBD_REPLY_PORTAL, "self", 
+                                PTLBD_REPLY_PORTAL, &self_uuid,
                                 ptlbd_parse_req, "ptlbd_sv");
 
-        if (!ptlbd->ptlbd_service) {
-                CERROR("failed to start service\n");
-                RETURN(rc = -ENOMEM);
-        }
+        if (ptlbd->ptlbd_service == NULL) 
+                GOTO(out_filp, rc = -ENOMEM);
 
         rc = ptlrpc_start_thread(obddev, ptlbd->ptlbd_service, "ptldb");
-        if (rc) {
-                CERROR("cannot start PTLBD thread: rc %d\n", rc);
-                LBUG();
+        if (rc != 0) 
                 GOTO(out_thread, rc);
-        }
 
         ptlbd_sv_already_setup = 1;
 
         RETURN(0);
 
- out_thread:
+out_thread:
         ptlrpc_stop_all_threads(ptlbd->ptlbd_service);
         ptlrpc_unregister_service(ptlbd->ptlbd_service);
+out_filp:
+        filp_close(ptlbd->filp, NULL);
 
-        return rc;
+        RETURN(rc);
 }
 
 static int ptlbd_sv_cleanup(struct obd_device *obddev)
@@ -115,36 +81,25 @@ static int ptlbd_sv_cleanup(struct obd_device *obddev)
 
         ptlrpc_stop_all_threads(ptlbd->ptlbd_service);
         ptlrpc_unregister_service(ptlbd->ptlbd_service);
+        if ( ! IS_ERR(ptlbd->filp) )
+                filp_close(ptlbd->filp, NULL);
 
         ptlbd_sv_already_setup = 0;
         RETURN(0);
 }
 
-#if 0
-static int ptlbd_sv_connect(struct lustre_handle *conn, struct obd_device *src,
-                        obd_uuid_t cluuid, struct recovd_obd *recovd,
-                        ptlrpc_recovery_cb_t recover)
-{
-        return class_connect(conn, src, cluuid);
-}
-#endif
-
 static struct obd_ops ptlbd_sv_obd_ops = {
         o_owner:        THIS_MODULE,
-/*        o_iocontrol:    ptlbd_iocontrol,*/
         o_setup:        ptlbd_sv_setup,
         o_cleanup:      ptlbd_sv_cleanup,
-#if 0
-        o_connect:      ptlbd_sv_connect,
-        o_disconnect:   class_disconnect
-#endif
 };
 
 int ptlbd_sv_init(void)
 {
-        extern struct lprocfs_vars status_class_var[];
+        struct lprocfs_static_vars lvars;
 
-        return class_register_type(&ptlbd_sv_obd_ops, status_class_var,
+        lprocfs_init_vars(&lvars);
+        return class_register_type(&ptlbd_sv_obd_ops, lvars.module_vars,
                                    OBD_PTLBD_SV_DEVICENAME);
 }
 
index 1d6c719..48e11b5 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -36,42 +36,43 @@ void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
         cl->cli_name           = name;
 }
 
-__u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
+struct obd_uuid *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
 {
-        return req->rq_connection->c_remote_uuid;
+        return &req->rq_connection->c_remote_uuid;
 }
 
-struct ptlrpc_connection *ptlrpc_uuid_to_connection(obd_uuid_t uuid)
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
 {
         struct ptlrpc_connection *c;
         struct lustre_peer peer;
         int err;
 
-        err = kportal_uuid_to_peer(uuid, &peer);
+        err = kportal_uuid_to_peer(uuid->uuid, &peer);
         if (err != 0) {
-                CERROR("cannot find peer %s!\n", uuid);
+                CERROR("cannot find peer %s!\n", uuid->uuid);
                 return NULL;
         }
 
         c = ptlrpc_get_connection(&peer, uuid);
         if (c) {
-                memcpy(c->c_remote_uuid, uuid, sizeof(c->c_remote_uuid));
+                memcpy(c->c_remote_uuid.uuid,
+                       uuid->uuid, sizeof(c->c_remote_uuid.uuid));
                 c->c_epoch++;
         }
 
-        CDEBUG(D_INFO, "%s -> %p\n", uuid, c);
+        CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c);
 
         return c;
 }
 
-void ptlrpc_readdress_connection(struct ptlrpc_connection *conn,obd_uuid_t uuid)
+void ptlrpc_readdress_connection(struct ptlrpc_connection *conn,struct obd_uuid *uuid)
 {
         struct lustre_peer peer;
         int err;
 
-        err = kportal_uuid_to_peer(uuid, &peer);
+        err = kportal_uuid_to_peer(uuid->uuid, &peer);
         if (err != 0) {
-                CERROR("cannot find peer %s!\n", uuid);
+                CERROR("cannot find peer %s!\n", uuid->uuid);
                 return;
         }
 
@@ -189,12 +190,13 @@ static int ll_sync_brw_timeout(void *data)
                 if (PtlMDUnlink(desc->bd_md_h) != 0) {
                         CERROR("Near-miss on OST %s -- need to adjust "
                                "obd_timeout?\n",
-                               desc->bd_connection->c_remote_uuid);
+                               desc->bd_connection->c_remote_uuid.uuid);
                         continue;
                 }
 
                 CERROR("IO of %d pages to/from %s:%d (conn %p) timed out\n",
-                       desc->bd_page_count, desc->bd_connection->c_remote_uuid,
+                       desc->bd_page_count,
+                       desc->bd_connection->c_remote_uuid.uuid,
                        desc->bd_portal, desc->bd_connection);
 
                 /* This one will "never" arrive, don't wait for it. */
@@ -259,7 +261,6 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
 {
         struct ptlrpc_connection *conn;
         struct ptlrpc_request *request;
-        unsigned long flags;
         int rc;
         ENTRY;
 
@@ -284,7 +285,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
         request->rq_type = PTL_RPC_MSG_REQUEST;
         request->rq_import = imp;
 
-        /* XXX FIXME bug 625069 */
+        /* XXX FIXME bug 625069, now 249 */
         request->rq_request_portal = imp->imp_client->cli_request_portal;
         request->rq_reply_portal = imp->imp_client->cli_reply_portal;
 
@@ -293,10 +294,6 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
         INIT_LIST_HEAD(&request->rq_list);
         atomic_set(&request->rq_refcount, 1);
 
-        spin_lock_irqsave(&imp->imp_lock, flags);
-        request->rq_xid = HTON__u32(++imp->imp_last_xid);
-        spin_unlock_irqrestore(&imp->imp_lock, flags);
-
         request->rq_reqmsg->magic = PTLRPC_MSG_MAGIC;
         request->rq_reqmsg->version = PTLRPC_MSG_VERSION;
         request->rq_reqmsg->opc = HTON__u32(opcode);
@@ -317,7 +314,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
         if (atomic_read(&request->rq_refcount) != 0) {
                 CERROR("freeing request %p (%d->%s:%d) with refcount %d\n",
                        request, request->rq_reqmsg->opc,
-                       request->rq_connection->c_remote_uuid,
+                       request->rq_connection->c_remote_uuid.uuid,
                        request->rq_import->imp_client->cli_request_portal,
                        atomic_read (&request->rq_refcount));
                 /* LBUG(); */
@@ -428,7 +425,7 @@ static int ptlrpc_check_status(struct ptlrpc_request *req)
         }
 
         if (err < 0) {
-                DEBUG_REQ(D_ERROR, req, "status is %d", err);
+                DEBUG_REQ(D_INFO, req, "status is %d", err);
         } else if (err > 0) {
                 /* XXX: translate this error from net to host */
                 DEBUG_REQ(D_INFO, req, "status is %d", err);
@@ -466,12 +463,14 @@ void ptlrpc_free_committed(struct obd_import *imp)
         struct ptlrpc_request *req;
         ENTRY;
 
+        LASSERT(imp != NULL);
+
 #ifdef CONFIG_SMP
         LASSERT(spin_is_locked(&imp->imp_lock));
 #endif
 
-        CDEBUG(D_HA, "committing for xid "LPU64", last_committed "LPU64"\n",
-               imp->imp_peer_last_xid, imp->imp_peer_committed_transno);
+        CDEBUG(D_HA, "committing for last_committed "LPU64"\n",
+               imp->imp_peer_committed_transno);
 
         list_for_each_safe(tmp, saved, &imp->imp_replay_list) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
@@ -489,6 +488,7 @@ void ptlrpc_free_committed(struct obd_import *imp)
 
                 DEBUG_REQ(D_HA, req, "committing (last_committed "LPU64")",
                           imp->imp_peer_committed_transno);
+                list_del_init(&req->rq_list);
                 __ptlrpc_req_finished(req, 1);
         }
 
@@ -517,7 +517,7 @@ void ptlrpc_cleanup_client(struct obd_import *imp)
                 __ptlrpc_req_finished(req, 0);
         }
         spin_unlock_irqrestore(&imp->imp_lock, flags);
-        
+
         EXIT;
         return;
 }
@@ -573,9 +573,8 @@ static int expired_request(void *data)
         req->rq_flags |= PTL_RPC_FL_TIMEOUT;
 
         if (!req->rq_import) {
-                DEBUG_REQ(D_ERROR, req, "NULL import");
-                LBUG();
-                RETURN(0);
+                DEBUG_REQ(D_HA, req, "NULL import; already cleaned up?");
+                RETURN(1);
         }
 
         if (!req->rq_import->imp_connection) {
@@ -605,6 +604,50 @@ static int interrupted_request(void *data)
         RETURN(1); /* ignored, as of this writing */
 }
 
+struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req)
+{
+        ENTRY;
+        atomic_inc(&req->rq_refcount);
+        RETURN(req);
+}
+
+void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
+                                      struct obd_import *imp)
+{
+        struct list_head *tmp;
+
+#ifdef CONFIG_SMP
+        LASSERT(spin_is_locked(&imp->imp_lock));
+#endif
+
+        LASSERT(imp->imp_flags & IMP_REPLAYABLE);
+        /* Balanced in ptlrpc_free_committed, usually. */
+        ptlrpc_request_addref(req);
+        list_for_each_prev(tmp, &imp->imp_replay_list) {
+                struct ptlrpc_request *iter =
+                        list_entry(tmp, struct ptlrpc_request, rq_list);
+
+                /* We may have duplicate transnos if we create and then
+                 * open a file, or for closes retained if to match creating
+                 * opens, so use req->rq_xid as a secondary key.
+                 * (See bugs 684, 685, and 428.)
+                 */
+                if (iter->rq_transno > req->rq_transno)
+                        continue;
+
+                if (iter->rq_transno == req->rq_transno) {
+                        LASSERT(iter->rq_xid != req->rq_xid);
+                        if (iter->rq_xid > req->rq_xid)
+                                continue;
+                }
+
+                list_add(&req->rq_list, &iter->rq_list);
+                return;
+        }
+
+        list_add_tail(&req->rq_list, &imp->imp_replay_list);
+}
+
 int ptlrpc_queue_wait(struct ptlrpc_request *req)
 {
         int rc = 0;
@@ -616,15 +659,19 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
 
         init_waitqueue_head(&req->rq_wait_for_rep);
 
+        spin_lock_irqsave(&imp->imp_lock, flags);
+        req->rq_xid = HTON__u32(++imp->imp_last_xid);
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
+
         /* for distributed debugging */
-        req->rq_reqmsg->status = HTON__u32(current->pid); 
+        req->rq_reqmsg->status = HTON__u32(current->pid);
         CDEBUG(D_RPCTRACE, "Sending RPC pid:xid:nid:opc %d:"LPU64":%x:%d\n",
                NTOH__u32(req->rq_reqmsg->status), req->rq_xid,
                conn->c_peer.peer_nid, NTOH__u32(req->rq_reqmsg->opc));
 
         spin_lock_irqsave(&imp->imp_lock, flags);
 
-        /* 
+        /*
          * If the import has been invalidated (such as by an OST failure), the
          * request must fail with -EIO.
          */
@@ -646,12 +693,15 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
                                   (req->rq_level <= imp->imp_level) ||
                                   (req->rq_flags & PTL_RPC_FL_ERR), &lwi);
 
-                spin_lock_irqsave(&imp->imp_lock, flags);
-                list_del_init(&req->rq_list);
-
                 if (req->rq_flags & PTL_RPC_FL_ERR)
                         rc = -EIO;
 
+                if (!req->rq_import)
+                        RETURN(rc);
+
+                spin_lock_irqsave(&imp->imp_lock, flags);
+                list_del_init(&req->rq_list);
+
                 if (rc) {
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
                         RETURN(rc);
@@ -756,24 +806,18 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
                 spin_lock_irqsave(&imp->imp_lock, flags);
                 if ((req->rq_flags & PTL_RPC_FL_REPLAY || req->rq_transno != 0)
                     && rc >= 0) {
-                        /* Balanced in ptlrpc_free_committed, usually. */
-                        atomic_inc(&req->rq_refcount);
-                        list_add_tail(&req->rq_list, &imp->imp_replay_list);
+                        ptlrpc_retain_replayable_request(req, imp);
                 }
 
                 if (req->rq_transno > imp->imp_max_transno) {
                         imp->imp_max_transno = req->rq_transno;
-                } else if (req->rq_transno != 0 &&
-                           imp->imp_level == LUSTRE_CONN_FULL) {
-                        CDEBUG(D_HA, "got transno "LPD64" after "LPD64
-                               ": recovery may not work\n", req->rq_transno,
-                               imp->imp_max_transno);
                 }
 
                 /* Replay-enabled imports return commit-status information. */
-                imp->imp_peer_last_xid = req->rq_repmsg->last_xid;
-                imp->imp_peer_committed_transno =
-                        req->rq_repmsg->last_committed;
+                if (req->rq_repmsg->last_committed) {
+                        imp->imp_peer_committed_transno =
+                                req->rq_repmsg->last_committed;
+                }
                 ptlrpc_free_committed(imp);
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
         }
@@ -847,7 +891,7 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
 }
 
 /* XXX looks a lot like super.c:invalidate_request_list, don't it? */
-void ptlrpc_abort_inflight(struct obd_import *imp)
+void ptlrpc_abort_inflight(struct obd_import *imp, int dying_import)
 {
         unsigned long flags;
         struct list_head *tmp, *n;
@@ -866,6 +910,8 @@ void ptlrpc_abort_inflight(struct obd_import *imp)
 
                 DEBUG_REQ(D_HA, req, "inflight");
                 req->rq_flags |= PTL_RPC_FL_ERR;
+                if (dying_import)
+                        req->rq_import = NULL;
                 wake_up(&req->rq_wait_for_rep);
         }
 
@@ -875,6 +921,8 @@ void ptlrpc_abort_inflight(struct obd_import *imp)
 
                 DEBUG_REQ(D_HA, req, "aborting waiting req");
                 req->rq_flags |= PTL_RPC_FL_ERR;
+                if (dying_import)
+                        req->rq_import = NULL;
                 wake_up(&req->rq_wait_for_rep);
         }
 }
index 2182591..b2d204d 100644 (file)
@@ -32,18 +32,20 @@ static struct list_head conn_unused_list;
 
 /* If UUID is NULL, c->c_remote_uuid must be all zeroes
  * If UUID is non-NULL, c->c_remote_uuid must match. */
-static int match_connection_uuid(struct ptlrpc_connection *c, obd_uuid_t uuid)
+static int match_connection_uuid(struct ptlrpc_connection *c, struct obd_uuid *uuid)
 {
-        obd_uuid_t zero_uuid = {0};
+        struct obd_uuid zero_uuid;
+        memset(&zero_uuid, 0, sizeof(zero_uuid));
 
         if (uuid)
-                return memcmp(c->c_remote_uuid, uuid, sizeof(uuid));
+                return memcmp(c->c_remote_uuid.uuid, uuid->uuid, 
+                              sizeof(uuid->uuid));
 
-        return memcmp(c->c_remote_uuidzero_uuid, sizeof(zero_uuid));
+        return memcmp(c->c_remote_uuid.uuid, &zero_uuid, sizeof(zero_uuid));
 }
 
 struct ptlrpc_connection *ptlrpc_get_connection(struct lustre_peer *peer,
-                                                obd_uuid_t uuid)
+                                                struct obd_uuid *uuid)
 {
         struct list_head *tmp, *pos;
         struct ptlrpc_connection *c;
@@ -83,8 +85,8 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct lustre_peer *peer,
         c->c_epoch = 1;
         c->c_bootcount = 0;
         c->c_flags = 0;
-        if (uuid)
-                strcpy(c->c_remote_uuid, uuid);
+        if (uuid->uuid)
+                obd_str2uuid(&c->c_remote_uuid, uuid->uuid);
         INIT_LIST_HEAD(&c->c_imports);
         INIT_LIST_HEAD(&c->c_exports);
         INIT_LIST_HEAD(&c->c_sb_chain);
@@ -160,7 +162,7 @@ void ptlrpc_cleanup_connection(void)
         list_for_each_safe(tmp, pos, &conn_list) {
                 c = list_entry(tmp, struct ptlrpc_connection, c_link);
                 CERROR("Connection %p/%s has refcount %d (nid=%lu)\n",
-                       c, c->c_remote_uuid, atomic_read(&c->c_refcount),
+                       c, c->c_remote_uuid.uuid, atomic_read(&c->c_refcount),
                        (unsigned long)c->c_peer.peer_nid);
                 list_del(&c->c_link);
                 OBD_FREE(c, sizeof(*c));
index c260f5d..e7a1e08 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -26,8 +26,9 @@
 #include <linux/obd_support.h>
 #include <linux/lustre_net.h>
 
-ptl_handle_eq_t request_out_eq, reply_in_eq, reply_out_eq, bulk_source_eq,
-        bulk_sink_eq;
+ptl_handle_eq_t request_out_eq, reply_in_eq, reply_out_eq, 
+        bulk_put_source_eq, bulk_put_sink_eq, 
+        bulk_get_source_eq, bulk_get_sink_eq;
 static const ptl_handle_ni_t *socknal_nip = NULL, *toenal_nip = NULL, 
         *qswnal_nip = NULL, *gmnal_nip = NULL;
 
@@ -149,7 +150,7 @@ int request_in_callback(ptl_event_t *ev)
         return 0;
 }
 
-static int bulk_source_callback(ptl_event_t *ev)
+static int bulk_put_source_callback(ptl_event_t *ev)
 {
         struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
         struct ptlrpc_bulk_page *bulk;
@@ -196,7 +197,7 @@ static int bulk_source_callback(ptl_event_t *ev)
         RETURN(0);
 }
 
-static int bulk_sink_callback(ptl_event_t *ev)
+static int bulk_put_sink_callback(ptl_event_t *ev)
 {
         struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
         struct ptlrpc_bulk_page *bulk;
@@ -241,6 +242,100 @@ static int bulk_sink_callback(ptl_event_t *ev)
         RETURN(1);
 }
 
+static int bulk_get_source_callback(ptl_event_t *ev)
+{
+        struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
+        struct ptlrpc_bulk_page *bulk;
+        struct list_head        *tmp;
+        struct list_head        *next;
+        ptl_size_t               total = 0;
+        void                   (*event_handler)(struct ptlrpc_bulk_desc *);
+        ENTRY;
+
+        LASSERT(ev->type == PTL_EVENT_GET);
+
+        /* put with zero offset */
+        LASSERT(ev->offset == 0);
+        /* used iovs */
+        LASSERT((ev->mem_desc.options & PTL_MD_IOV) != 0);
+        /* 1 fragment for each page always */
+        LASSERT(ev->mem_desc.niov == desc->bd_page_count);
+
+        list_for_each_safe (tmp, next, &desc->bd_page_list) {
+                bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
+
+                total += bulk->bp_buflen;
+
+                if (bulk->bp_cb != NULL)
+                        bulk->bp_cb(bulk);
+        }
+
+        LASSERT(ev->mem_desc.length == total);
+
+        /* We need to make a note of whether there's an event handler
+         * before we call wake_up, because if there is no event
+         * handler, 'desc' might be freed before we're scheduled again. */
+        event_handler = desc->bd_ptl_ev_hdlr;
+
+        desc->bd_flags |= PTL_BULK_FL_SENT;
+        wake_up(&desc->bd_waitq);
+        if (event_handler) {
+                LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
+                event_handler(desc);
+        }
+
+        RETURN(1);
+}
+
+
+static int bulk_get_sink_callback(ptl_event_t *ev)
+{
+        struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
+        struct ptlrpc_bulk_page *bulk;
+        struct list_head        *tmp;
+        struct list_head        *next;
+        ENTRY;
+
+        CDEBUG(D_NET, "got %s event %d\n",
+               (ev->type == PTL_EVENT_SENT) ? "SENT" :
+               (ev->type == PTL_EVENT_REPLY)  ? "REPLY"  : "UNEXPECTED", 
+               ev->type);
+
+        LASSERT(ev->type == PTL_EVENT_SENT || ev->type == PTL_EVENT_REPLY);
+
+        LASSERT(atomic_read(&desc->bd_source_callback_count) > 0 &&
+                atomic_read(&desc->bd_source_callback_count) <= 2);
+
+        /* 1 fragment for each page always */
+        LASSERT(ev->mem_desc.niov == desc->bd_page_count);
+
+        if (atomic_dec_and_test(&desc->bd_source_callback_count)) {
+                void (*event_handler)(struct ptlrpc_bulk_desc *);
+
+                list_for_each_safe(tmp, next, &desc->bd_page_list) {
+                        bulk = list_entry(tmp, struct ptlrpc_bulk_page,
+                                          bp_link);
+
+                        if (bulk->bp_cb != NULL)
+                                bulk->bp_cb(bulk);
+                }
+
+                /* We need to make a note of whether there's an event handler
+                 * before we call wake_up, because if there is no event handler,
+                 * 'desc' might be freed before we're scheduled again. */
+                event_handler = desc->bd_ptl_ev_hdlr;
+
+                desc->bd_flags |= PTL_BULK_FL_RCVD;
+                wake_up(&desc->bd_waitq);
+                if (event_handler) {
+                        LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
+                        event_handler(desc);
+                }
+        }
+
+        RETURN(0);
+}
+
 int ptlrpc_init_portals(void)
 {
         int rc;
@@ -272,11 +367,21 @@ int ptlrpc_init_portals(void)
         if (rc != PTL_OK)
                 CERROR("PtlEQAlloc failed: %d\n", rc);
 
-        rc = PtlEQAlloc(ni, 1024, bulk_source_callback, &bulk_source_eq);
+        rc = PtlEQAlloc(ni, 1024, bulk_put_source_callback, 
+                        &bulk_put_source_eq);
+        if (rc != PTL_OK)
+                CERROR("PtlEQAlloc failed: %d\n", rc);
+
+        rc = PtlEQAlloc(ni, 1024, bulk_put_sink_callback, &bulk_put_sink_eq);
+        if (rc != PTL_OK)
+                CERROR("PtlEQAlloc failed: %d\n", rc);
+
+        rc = PtlEQAlloc(ni, 1024, bulk_get_source_callback, 
+                        &bulk_get_source_eq);
         if (rc != PTL_OK)
                 CERROR("PtlEQAlloc failed: %d\n", rc);
 
-        rc = PtlEQAlloc(ni, 1024, bulk_sink_callback, &bulk_sink_eq);
+        rc = PtlEQAlloc(ni, 1024, bulk_get_sink_callback, &bulk_get_sink_eq);
         if (rc != PTL_OK)
                 CERROR("PtlEQAlloc failed: %d\n", rc);
 
@@ -288,8 +393,10 @@ void ptlrpc_exit_portals(void)
         PtlEQFree(request_out_eq);
         PtlEQFree(reply_out_eq);
         PtlEQFree(reply_in_eq);
-        PtlEQFree(bulk_source_eq);
-        PtlEQFree(bulk_sink_eq);
+        PtlEQFree(bulk_put_source_eq);
+        PtlEQFree(bulk_put_sink_eq);
+        PtlEQFree(bulk_get_source_eq);
+        PtlEQFree(bulk_get_sink_eq);
 
         if (qswnal_nip != NULL)
                 inter_module_put("kqswnal_ni");
index a778b57..1b3532e 100644 (file)
  */
 #define DEBUG_SUBSYSTEM S_CLASS
 
-#include <linux/lustre_lite.h>
 #include <linux/lprocfs_status.h>
 
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof, 
-            void *data)
-{
-        int len = 0;
-        len += snprintf(page, count, "%s\n", 
-                        ((struct obd_device*)data)->obd_uuid);
-        return len;
-}
-
-struct lprocfs_vars status_var_nm_1[] = {
-        {"status/uuid", rd_uuid, 0, 0},
-        {0}
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[]  = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
+struct lprocfs_vars lprocfs_obd_vars[] = {
+        { "uuid",     lprocfs_rd_uuid,    0, 0},
+        { 0 }
 };
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof, 
-                  void *data)
-{
-        struct obd_type* class = (struct obd_type*)data;
-        int len = 0;
-        len += snprintf(page, count, "%d\n", class->typ_refcnt);
-        return len;
-}
 
-struct lprocfs_vars status_class_var[] = {
-        {"status/num_refs", rd_numrefs, 0, 0},
-        {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+        { "num_refs", lprocfs_rd_numrefs, 0, 0},
+        { 0 }
 };
+
+#endif /* LPROCFS */
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
index 1d6284e..ef3a215 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
@@ -28,7 +28,8 @@
 #include <linux/obd.h>
 
 extern ptl_handle_eq_t request_out_eq, reply_in_eq, reply_out_eq,
-        bulk_source_eq, bulk_sink_eq;
+        bulk_put_source_eq, bulk_put_sink_eq, 
+        bulk_get_source_eq, bulk_get_sink_eq;
 
 static int ptl_send_buf(struct ptlrpc_request *request,
                         struct ptlrpc_connection *conn, int portal)
@@ -113,7 +114,7 @@ ptlrpc_put_bulk_iov (struct ptlrpc_bulk_desc *desc, struct iovec *iov)
         OBD_FREE (iov, desc->bd_page_count * sizeof (struct iovec));
 }
 
-int ptlrpc_send_bulk(struct ptlrpc_bulk_desc *desc)
+int ptlrpc_bulk_put(struct ptlrpc_bulk_desc *desc)
 {
         int rc;
         struct list_head *tmp, *next;
@@ -129,7 +130,7 @@ int ptlrpc_send_bulk(struct ptlrpc_bulk_desc *desc)
         desc->bd_md.start = iov;
         desc->bd_md.niov = 0;
         desc->bd_md.length = 0;
-        desc->bd_md.eventq = bulk_source_eq;
+        desc->bd_md.eventq = bulk_put_source_eq;
         desc->bd_md.threshold = 2; /* SENT and ACK */
         desc->bd_md.options = PTL_MD_OP_PUT | PTL_MD_IOV;
         desc->bd_md.user_ptr = desc;
@@ -194,7 +195,87 @@ int ptlrpc_send_bulk(struct ptlrpc_bulk_desc *desc)
         RETURN(0);
 }
 
-int ptlrpc_register_bulk(struct ptlrpc_bulk_desc *desc)
+int ptlrpc_bulk_get(struct ptlrpc_bulk_desc *desc)
+{
+        int rc;
+        struct list_head *tmp, *next;
+        ptl_process_id_t remote_id;
+        __u32 xid = 0;
+        struct iovec *iov;
+        ENTRY;
+
+        iov = ptlrpc_get_bulk_iov (desc);
+        if (iov == NULL)
+                RETURN (-ENOMEM);
+
+        desc->bd_md.start = iov;
+        desc->bd_md.niov = 0;
+        desc->bd_md.length = 0;
+        desc->bd_md.eventq = bulk_get_sink_eq;
+        desc->bd_md.threshold = 2; /* SENT and REPLY */
+        desc->bd_md.options = PTL_MD_OP_GET | PTL_MD_IOV;
+        desc->bd_md.user_ptr = desc;
+
+        atomic_set(&desc->bd_source_callback_count, 2);
+
+        list_for_each_safe(tmp, next, &desc->bd_page_list) {
+                struct ptlrpc_bulk_page *bulk;
+                bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
+
+                LASSERT(desc->bd_md.niov < desc->bd_page_count);
+
+                if (desc->bd_md.niov == 0)
+                        xid = bulk->bp_xid;
+                LASSERT(xid == bulk->bp_xid);   /* should all be the same */
+
+                iov[desc->bd_md.niov].iov_base = bulk->bp_buf;
+                iov[desc->bd_md.niov].iov_len = bulk->bp_buflen;
+                if (iov[desc->bd_md.niov].iov_len <= 0) {
+                        CERROR("bad bp_buflen[%d] @ %p: %d\n", desc->bd_md.niov,
+                               bulk->bp_buf, bulk->bp_buflen);
+                        CERROR("desc: xid %u, pages %d, ptl %d, ref %d\n",
+                               xid, desc->bd_page_count, desc->bd_portal,
+                               atomic_read(&desc->bd_refcount));
+                        LBUG();
+                }
+                desc->bd_md.niov++;
+                desc->bd_md.length += bulk->bp_buflen;
+        }
+
+        LASSERT(desc->bd_md.niov == desc->bd_page_count);
+        LASSERT(desc->bd_md.niov != 0);
+
+        rc = PtlMDBind(desc->bd_connection->c_peer.peer_ni, desc->bd_md,
+                       &desc->bd_md_h);
+
+        ptlrpc_put_bulk_iov (desc, iov); /*move down to reduce latency to send*/
+
+        if (rc != PTL_OK) {
+                CERROR("PtlMDBind failed: %d\n", rc);
+                LBUG();
+                RETURN(rc);
+        }
+
+        remote_id.nid = desc->bd_connection->c_peer.peer_nid;
+        remote_id.pid = 0;
+
+        CDEBUG(D_NET, "Sending %u pages %u bytes to portal %d nid "LPX64" pid "
+               "%d xid %d\n", desc->bd_md.niov, desc->bd_md.length,
+               desc->bd_portal, remote_id.nid, remote_id.pid, xid);
+
+        rc = PtlGet(desc->bd_md_h, remote_id, desc->bd_portal, 0, xid, 0);
+        if (rc != PTL_OK) {
+                CERROR("PtlGet("LPU64", %d, %d) failed: %d\n",
+                       remote_id.nid, desc->bd_portal, xid, rc);
+                PtlMDUnlink(desc->bd_md_h);
+                LBUG();
+                RETURN(rc);
+        }
+
+        RETURN(0);
+}
+
+static int ptlrpc_register_bulk_shared(struct ptlrpc_bulk_desc *desc)
 {
         struct list_head *tmp, *next;
         int rc;
@@ -217,9 +298,7 @@ int ptlrpc_register_bulk(struct ptlrpc_bulk_desc *desc)
         desc->bd_md.niov = 0;
         desc->bd_md.length = 0;
         desc->bd_md.threshold = 1;
-        desc->bd_md.options = PTL_MD_OP_PUT | PTL_MD_IOV;
         desc->bd_md.user_ptr = desc;
-        desc->bd_md.eventq = bulk_sink_eq;
 
         list_for_each_safe(tmp, next, &desc->bd_page_list) {
                 struct ptlrpc_bulk_page *bulk;
@@ -276,6 +355,22 @@ int ptlrpc_register_bulk(struct ptlrpc_bulk_desc *desc)
         return rc;
 }
 
+int ptlrpc_register_bulk_get(struct ptlrpc_bulk_desc *desc)
+{
+        desc->bd_md.options = PTL_MD_OP_GET | PTL_MD_IOV;
+        desc->bd_md.eventq = bulk_get_source_eq;
+
+        return ptlrpc_register_bulk_shared(desc);
+}
+
+int ptlrpc_register_bulk_put(struct ptlrpc_bulk_desc *desc)
+{
+        desc->bd_md.options = PTL_MD_OP_PUT | PTL_MD_IOV;
+        desc->bd_md.eventq = bulk_put_sink_eq;
+
+        return ptlrpc_register_bulk_shared(desc);
+}
+
 int ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc)
 {
         /* This should be safe: these handles are initialized to be
@@ -356,14 +451,13 @@ int ptlrpc_error(struct ptlrpc_service *svc, struct ptlrpc_request *req)
         int rc;
         ENTRY;
 
-        if (req->rq_repmsg) {
-                CERROR("req already has repmsg\n");
-                LBUG();
+        if (!req->rq_repmsg) {
+                rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen,
+                                     &req->rq_repmsg);
+                if (rc)
+                        RETURN(rc);
         }
 
-        rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
-        if (rc)
-                RETURN(rc);
 
         req->rq_type = PTL_RPC_MSG_ERR;
 
@@ -390,7 +484,7 @@ int ptl_send_rpc(struct ptlrpc_request *request)
         source_id.pid = PTL_PID_ANY;
 
         /* add a ref, which will be balanced in request_out_callback */
-        atomic_inc(&request->rq_refcount);
+        ptlrpc_request_addref(request);
         if (request->rq_replen != 0) {
                 if (request->rq_reply_md.start != NULL) {
                         rc = PtlMEUnlink(request->rq_reply_me_h);
index 49d79dc..10e8200 100644 (file)
@@ -100,6 +100,9 @@ int lustre_unpack_msg(struct lustre_msg *m, int len)
 
         if (len < required_len) {
                 CERROR("len: %d, required_len %d\n", len, required_len);
+                CERROR("bufcount: %d\n", m->bufcount);
+                for (i = 0; i < m->bufcount; i++)
+                        CERROR("buffer %d length %d\n", i, m->buflens[i]);
                 RETURN(-EINVAL);
         }
 
@@ -117,15 +120,15 @@ void *lustre_msg_buf(struct lustre_msg *m, int n)
         }
 
         if (n < 0 || n >= m->bufcount) {
-                CERROR("referencing bad sub buffer in %p (want %d, count %d)!\n",
-                       m, n, m->bufcount);
+                CERROR("referencing bad sub buffer in %p (want %d, count "
+                       "%d)!\n", m, n, m->bufcount);
                 LBUG();
                 return NULL;
         }
 
         if (m->buflens[n] == 0) {
-                CERROR("zero-length buffer requested for buffer %d in %p\n", n,
-                       m);
+                CERROR("zero-length buffer requested for buffer %d in %p\n",
+                       n, m);
                 return NULL;
         }
 
index d544a19..279c903 100644 (file)
@@ -29,7 +29,8 @@ static void d_c_l(struct list_head *head)
                 struct ptlrpc_connection *conn =
                         list_entry(tmp, struct ptlrpc_connection,
                                    c_recovd_data.rd_managed_chain);
-                CDEBUG(D_HA, "   %p = %s (%d/%d)\n", conn, conn->c_remote_uuid,
+                CDEBUG(D_HA, "   %p = %s (%d/%d)\n", conn, 
+                       conn->c_remote_uuid.uuid,
                        conn->c_recovd_data.rd_phase,
                        conn->c_recovd_data.rd_next_phase);
         }
@@ -56,13 +57,13 @@ void recovd_conn_manage(struct ptlrpc_connection *conn,
         if (!list_empty(&rd->rd_managed_chain)) {
                 if (rd->rd_recovd == recovd && rd->rd_recover == recover) {
                         CDEBUG(D_HA, "conn %p/%s already setup for recovery\n",
-                               conn, conn->c_remote_uuid);
+                               conn, conn->c_remote_uuid.uuid);
                         EXIT;
                         return;
                 }
                 CDEBUG(D_HA,
                        "conn %p/%s has recovery items %p/%p, making %p/%p\n",
-                       conn, conn->c_remote_uuid, rd->rd_recovd, rd->rd_recover,
+                       conn, conn->c_remote_uuid.uuid, rd->rd_recovd, rd->rd_recover,
                        recovd, recover);
                 spin_lock(&rd->rd_recovd->recovd_lock);
                 list_del_init(&rd->rd_managed_chain);
@@ -115,21 +116,21 @@ void recovd_conn_fail(struct ptlrpc_connection *conn)
         spin_lock(&recovd->recovd_lock);
         if (rd->rd_phase == RD_TROUBLED || rd->rd_phase == RD_PREPARING) {
                 CDEBUG(D_HA, "connection %p to %s already in recovery\n",
-                       conn, conn->c_remote_uuid);
+                       conn, conn->c_remote_uuid.uuid);
                 spin_unlock(&recovd->recovd_lock);
                 EXIT;
                 return;
         }
 
         CERROR("connection %p to %s (%08x %08lx %08lx) failed\n", conn,
-               conn->c_remote_uuid, conn->c_peer.peer_nid,
+               conn->c_remote_uuid.uuid, conn->c_peer.peer_nid,
                conn->c_peer.peer_ni.nal_idx, conn->c_peer.peer_ni.handle_idx);
         list_del(&rd->rd_managed_chain);
         list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
         if (rd->rd_phase != RD_IDLE) {
                 CDEBUG(D_HA,
                        "connection %p to %s failed in recovery: restarting\n",
-                       conn, conn->c_remote_uuid);
+                       conn, conn->c_remote_uuid.uuid);
                 /* XXX call callback with PHASE_FAILED? */
                 rd->rd_next_phase = RD_TROUBLED;
         }
@@ -148,7 +149,7 @@ void recovd_conn_fixed(struct ptlrpc_connection *conn)
         ENTRY;
 
         CDEBUG(D_HA, "connection %p (now to %s) fixed\n",
-               conn, conn->c_remote_uuid);
+               conn, conn->c_remote_uuid.uuid);
         spin_lock(&rd->rd_recovd->recovd_lock);
         list_del(&rd->rd_managed_chain);
         rd->rd_phase = RD_IDLE;
index b4f3c85..1c99fed 100644 (file)
@@ -3,15 +3,23 @@
  *
  * Portal-RPC reconnection and replay operations, for use in recovery.
  *
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Mike Shaver <shaver@clusterfs.com>
  *
- * Copyright (C) 1996 Peter J. Braam <braam@stelias.com>
- * Copyright (C) 1999 Stelias Computing Inc. <braam@stelias.com>
- * Copyright (C) 1999 Seagate Technology Inc.
- * Copyright (C) 2001 Mountain View Data, Inc.
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ *   This file is part of Lustre, http://www.lustre.org.
  *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/config.h>
@@ -30,18 +38,18 @@ int ptlrpc_reconnect_import(struct obd_import *imp, int rq_opc,
         struct obd_device *obd = imp->imp_obd;
         struct client_obd *cli = &obd->u.cli;
         int size[] = { sizeof(cli->cl_target_uuid), sizeof(obd->obd_uuid) };
-        char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid };
+        char *tmp[] = {cli->cl_target_uuid.uuid, obd->obd_uuid.uuid};
         struct ptlrpc_connection *conn = imp->imp_connection;
-        struct lustre_handle old_hdl;
-        struct ptlrpc_request *request;
+        struct ptlrpc_request *req;
         struct obd_export *ldlmexp;
+        struct lustre_handle old_hdl;
         int rc;
 
-        request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp);
-        if (!request)
+        req = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp);
+        if (!req)
                 RETURN(-ENOMEM);
-        request->rq_level = LUSTRE_CONN_NEW;
-        request->rq_replen = lustre_msg_size(0, NULL);
+        req->rq_level = LUSTRE_CONN_NEW;
+        req->rq_replen = lustre_msg_size(0, NULL);
         /*
          * This address is the export that represents our client-side LDLM
          * service (for ASTs).  We should only have one on this list, so we
@@ -51,58 +59,59 @@ int ptlrpc_reconnect_import(struct obd_import *imp, int rq_opc,
          */
         ldlmexp = list_entry(obd->obd_exports.next, struct obd_export,
                              exp_obd_chain);
-        request->rq_reqmsg->addr = (__u64)(unsigned long)ldlmexp;
-        request->rq_reqmsg->cookie = ldlmexp->exp_cookie;
-        rc = ptlrpc_queue_wait(request);
-        switch (rc) {
-            case EALREADY:
-            case -EALREADY:
-                /* already connected! */
+        req->rq_reqmsg->addr = (__u64)(unsigned long)ldlmexp;
+        req->rq_reqmsg->cookie = ldlmexp->exp_cookie;
+        rc = ptlrpc_queue_wait(req);
+        if (rc) {
+                CERROR("cannot connect to %s@%s: rc = %d\n",
+                       cli->cl_target_uuid.uuid, conn->c_remote_uuid.uuid, rc);
+                GOTO(out_disc, rc);
+        }
+        if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECONNECT) {
                 memset(&old_hdl, 0, sizeof(old_hdl));
-                if (!memcmp(&old_hdl.addr, &request->rq_repmsg->addr,
+                if (!memcmp(&old_hdl.addr, &req->rq_repmsg->addr,
                             sizeof (old_hdl.addr)) &&
-                    !memcmp(&old_hdl.cookie, &request->rq_repmsg->cookie,
+                    !memcmp(&old_hdl.cookie, &req->rq_repmsg->cookie,
                             sizeof (old_hdl.cookie))) {
-                        CERROR("%s@%s didn't like our handle "LPX64"/"LPX64", failed\n",
-                               cli->cl_target_uuid, conn->c_remote_uuid,
+                        CERROR("%s@%s didn't like our handle "LPX64"/"LPX64
+                               ", failed\n", cli->cl_target_uuid.uuid,
+                               conn->c_remote_uuid.uuid,
                                (__u64)(unsigned long)ldlmexp,
                                ldlmexp->exp_cookie);
                         GOTO(out_disc, rc = -ENOTCONN);
                 }
 
-                old_hdl.addr = request->rq_repmsg->addr;
-                old_hdl.cookie = request->rq_repmsg->cookie;
+                old_hdl.addr = req->rq_repmsg->addr;
+                old_hdl.cookie = req->rq_repmsg->cookie;
                 if (memcmp(&imp->imp_handle, &old_hdl, sizeof(old_hdl))) {
-                        CERROR("%s@%s changed handle from "LPX64"/"LPX64" to "LPX64"/"LPX64"; "
+                        CERROR("%s@%s changed handle from "LPX64"/"LPX64
+                               " to "LPX64"/"LPX64"; "
                                "copying, but this may foreshadow disaster\n",
-                               cli->cl_target_uuid, conn->c_remote_uuid,
+                               cli->cl_target_uuid.uuid, 
+                               conn->c_remote_uuid.uuid,
                                old_hdl.addr, old_hdl.cookie,
                                imp->imp_handle.addr, imp->imp_handle.cookie);
-                        imp->imp_handle.addr = request->rq_repmsg->addr;
-                        imp->imp_handle.cookie = request->rq_repmsg->cookie;
-                        GOTO(out_disc, rc = EALREADY);
+                        imp->imp_handle.addr = req->rq_repmsg->addr;
+                        imp->imp_handle.cookie = req->rq_repmsg->cookie;
+                        GOTO(out_disc, rc = 0);
                 }
 
                 CERROR("reconnected to %s@%s after partition\n",
-                       cli->cl_target_uuid, conn->c_remote_uuid);
-                GOTO(out_disc, rc = EALREADY);
-            case 0:
-                old_hdl = imp->imp_handle;
-                imp->imp_handle.addr = request->rq_repmsg->addr;
-                imp->imp_handle.cookie = request->rq_repmsg->cookie;
-                CERROR("now connected to %s@%s ("LPX64"/"LPX64", was "LPX64"/"LPX64")!\n",
-                       cli->cl_target_uuid, conn->c_remote_uuid,
-                       imp->imp_handle.addr, imp->imp_handle.cookie,
-                       old_hdl.addr, old_hdl.cookie);
+                       cli->cl_target_uuid.uuid, conn->c_remote_uuid.uuid);
                 GOTO(out_disc, rc = 0);
-            default:
-                CERROR("cannot connect to %s@%s: rc = %d\n",
-                       cli->cl_target_uuid, conn->c_remote_uuid, rc);
-                GOTO(out_disc, rc = -ENOTCONN); /* XXX preserve rc? */
         }
 
+        old_hdl = imp->imp_handle;
+        imp->imp_handle.addr = req->rq_repmsg->addr;
+        imp->imp_handle.cookie = req->rq_repmsg->cookie;
+        CERROR("reconnected to %s@%s ("LPX64"/"LPX64", was "LPX64"/"
+               LPX64")!\n", cli->cl_target_uuid.uuid, conn->c_remote_uuid.uuid,
+               imp->imp_handle.addr, imp->imp_handle.cookie,
+               old_hdl.addr, old_hdl.cookie);
+        GOTO(out_disc, rc = 0);
+
  out_disc:
-        *reqptr = request;
+        *reqptr = req;
         return rc;
 }
 
@@ -114,7 +123,7 @@ int ptlrpc_run_recovery_upcall(struct ptlrpc_connection *conn)
 
         ENTRY;
         argv[0] = obd_recovery_upcall;
-        argv[1] = conn->c_remote_uuid;
+        argv[1] = conn->c_remote_uuid.uuid;
         argv[2] = NULL;
 
         envp[0] = "HOME=/";
@@ -156,7 +165,7 @@ int ptlrpc_replay(struct obd_import *imp)
         ptlrpc_free_committed(imp);
 
         CDEBUG(D_HA, "import %p from %s has committed "LPD64"\n",
-               imp, imp->imp_obd->u.cli.cl_target_uuid, committed);
+               imp, imp->imp_obd->u.cli.cl_target_uuid.uuid, committed);
 
         list_for_each(tmp, &imp->imp_replay_list) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
index 200c029..95fe7ec 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
 #include <linux/init.h>
 #include <linux/lprocfs_status.h>
 
-
-
 extern int ptlrpc_init_portals(void);
 extern void ptlrpc_exit_portals(void);
 
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
 
 int connmgr_setup(struct obd_device *obddev, obd_count len, void *buf)
 {
@@ -83,8 +79,8 @@ int connmgr_iocontrol(unsigned int cmd, struct lustre_handle *hdl, int len,
                                   c_recovd_data.rd_managed_chain);
 
                 LASSERT(conn->c_recovd_data.rd_recovd == recovd); /* sanity */
-
-                if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1))
+#warning check buffer overflow in next line
+                if (!strcmp(conn->c_remote_uuid.uuid, data->ioc_inlbuf1))
                         break;
                 conn = NULL;
         }
@@ -99,7 +95,8 @@ int connmgr_iocontrol(unsigned int cmd, struct lustre_handle *hdl, int len,
 
                         LASSERT(conn->c_recovd_data.rd_recovd == recovd);
 
-                        if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1))
+#warning check buffer overflow in next line
+                        if (!strcmp(conn->c_remote_uuid.uuid, data->ioc_inlbuf1))
                                 break;
                         conn = NULL;
                 }
@@ -111,9 +108,6 @@ int connmgr_iocontrol(unsigned int cmd, struct lustre_handle *hdl, int len,
                 spin_unlock(&recovd->recovd_lock);
                 recovd_conn_fail(conn);
                 spin_lock(&recovd->recovd_lock);
-
-                /* Jump straight to the "failed" phase of recovery. */
-                conn->c_recovd_data.rd_phase = RD_FAILED;
                 goto out;
         }
 
@@ -134,13 +128,13 @@ int connmgr_iocontrol(unsigned int cmd, struct lustre_handle *hdl, int len,
 
         if (data->ioc_inllen2) {
                 CERROR("conn %p UUID change %s -> %s\n",
-                       conn, conn->c_remote_uuid, data->ioc_inlbuf2);
-                strcpy(conn->c_remote_uuid, data->ioc_inlbuf2);
+                       conn, conn->c_remote_uuid.uuid, data->ioc_inlbuf2);
+                obd_str2uuid(&conn->c_remote_uuid, data->ioc_inlbuf2);
         } else {
                 CERROR("conn %p UUID %s reconnected\n", conn,
-                       conn->c_remote_uuid);
+                       conn->c_remote_uuid.uuid);
         }
-        ptlrpc_readdress_connection(conn, conn->c_remote_uuid);
+        ptlrpc_readdress_connection(conn, &conn->c_remote_uuid);
         spin_unlock(&conn->c_lock);
 
         conn->c_recovd_data.rd_phase = RD_PREPARED;
@@ -151,7 +145,7 @@ int connmgr_iocontrol(unsigned int cmd, struct lustre_handle *hdl, int len,
 }
 
 static int connmgr_connect(struct lustre_handle *conn, struct obd_device *src,
-                           obd_uuid_t cluuid, struct recovd_obd *recovd,
+                           struct obd_uuid *cluuid, struct recovd_obd *recovd,
                            ptlrpc_recovery_cb_t recover)
 {
         return class_connect(conn, src, cluuid);
@@ -159,12 +153,15 @@ static int connmgr_connect(struct lustre_handle *conn, struct obd_device *src,
 
 int connmgr_attach(struct obd_device *dev, obd_count len, void *data)
 {
-        return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+        struct lprocfs_static_vars lvars;
+
+        lprocfs_init_vars(&lvars);
+        return lprocfs_obd_attach(dev, lvars.obd_vars);
 }
 
 int conmgr_detach(struct obd_device *dev)
 {
-        return lprocfs_dereg_obd(dev);
+        return lprocfs_obd_detach(dev);
 }
 
 /* use obd ops to offer management infrastructure */
@@ -181,17 +178,23 @@ static struct obd_ops recovd_obd_ops = {
 
 static int __init ptlrpc_init(void)
 {
+        struct lprocfs_static_vars lvars;
         int rc;
+        ENTRY;
+
         rc = ptlrpc_init_portals();
         if (rc)
                 RETURN(rc);
         ptlrpc_init_connection();
-        rc = class_register_type(&recovd_obd_ops, status_class_var,
+
+        lprocfs_init_vars(&lvars);
+        rc = class_register_type(&recovd_obd_ops, lvars.module_vars,
                                  LUSTRE_HA_NAME);
         if (rc)
                 RETURN(rc);
         ptlrpc_put_connection_superhack = ptlrpc_put_connection;
-        return 0;
+        ptlrpc_abort_inflight_superhack = ptlrpc_abort_inflight;
+        RETURN(0);
 }
 
 static void __exit ptlrpc_exit(void)
@@ -218,8 +221,10 @@ EXPORT_SYMBOL(ptlrpc_init_connection);
 EXPORT_SYMBOL(ptlrpc_cleanup_connection);
 
 /* niobuf.c */
-EXPORT_SYMBOL(ptlrpc_send_bulk);
-EXPORT_SYMBOL(ptlrpc_register_bulk);
+EXPORT_SYMBOL(ptlrpc_bulk_put);
+EXPORT_SYMBOL(ptlrpc_bulk_get);
+EXPORT_SYMBOL(ptlrpc_register_bulk_put);
+EXPORT_SYMBOL(ptlrpc_register_bulk_get);
 EXPORT_SYMBOL(ptlrpc_abort_bulk);
 EXPORT_SYMBOL(ptlrpc_reply);
 EXPORT_SYMBOL(ptlrpc_error);
@@ -242,12 +247,14 @@ EXPORT_SYMBOL(ptlrpc_restart_req);
 EXPORT_SYMBOL(ptlrpc_prep_req);
 EXPORT_SYMBOL(ptlrpc_free_req);
 EXPORT_SYMBOL(ptlrpc_req_finished);
+EXPORT_SYMBOL(ptlrpc_request_addref);
 EXPORT_SYMBOL(ptlrpc_prep_bulk);
 EXPORT_SYMBOL(ptlrpc_free_bulk);
 EXPORT_SYMBOL(ptlrpc_prep_bulk_page);
 EXPORT_SYMBOL(ptlrpc_free_bulk_page);
 EXPORT_SYMBOL(ll_brw_sync_wait);
 EXPORT_SYMBOL(ptlrpc_abort_inflight);
+EXPORT_SYMBOL(ptlrpc_retain_replayable_request);
 
 /* service.c */
 EXPORT_SYMBOL(ptlrpc_init_svc);
@@ -268,8 +275,8 @@ EXPORT_SYMBOL(ptlrpc_replay);
 EXPORT_SYMBOL(ptlrpc_resend);
 EXPORT_SYMBOL(ptlrpc_wake_delayed);
 
-MODULE_AUTHOR("Cluster File Systems, Inc <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Request Processor v1.0");
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Request Processor");
 MODULE_LICENSE("GPL");
 
 module_init(ptlrpc_init);
index c20fc48..0ea29b3 100644 (file)
@@ -65,7 +65,7 @@ struct ptlrpc_service *
 ptlrpc_init_svc(__u32 nevents, __u32 nbufs,
                 __u32 bufsize, __u32 max_req_size,
                 int req_portal, int rep_portal,
-                obd_uuid_t uuid, svc_handler_t handler, char *name)
+                struct obd_uuid *uuid, svc_handler_t handler, char *name)
 {
         int err;
         int rc, i;
@@ -91,9 +91,10 @@ ptlrpc_init_svc(__u32 nevents, __u32 nbufs,
         service->srv_req_portal = req_portal;
         service->srv_handler = handler;
 
-        err = kportal_uuid_to_peer(uuid, &service->srv_self);
+        err = kportal_uuid_to_peer(uuid->uuid, &service->srv_self);
         if (err) {
-                CERROR("%s: cannot get peer for uuid '%s'\n", name, uuid);
+                CERROR("%s: cannot get peer for uuid '%s'\n", name, 
+                       uuid->uuid);
                 OBD_FREE(service, sizeof(*service));
                 RETURN(NULL);
         }
@@ -165,13 +166,13 @@ static int handle_incoming_request(struct obd_device *obddev,
 
         if (request->rq_reqlen < sizeof(struct lustre_msg)) {
                 CERROR("incomplete request (%d): ptl %d from "LPX64" xid "
-                       LPD64"\n",
+                       LPU64"\n",
                        request->rq_reqlen, svc->srv_req_portal,
                        event->initiator.nid, request->rq_xid);
                 goto out;
         }
 
-        CDEBUG(D_RPCTRACE, "Handling RPC pid:xid:nid:opc %d:"LPX64":"LPX64":%d\n",
+        CDEBUG(D_RPCTRACE, "Handling RPC pid:xid:nid:opc %d:"LPU64":"LPX64":%d\n",
                NTOH__u32(request->rq_reqmsg->status),
                request->rq_xid,
                event->initiator.nid,
index dd1e33c..92cd1c2 100644 (file)
@@ -4,7 +4,7 @@
 %define linuxdir @LINUX@
 %define portalsdir @PORTALS@
 %define portalslibdir @PORTALSLIB@
-Release: 0208282230chaos
+Release: 0301070810ltutor3
 
 Summary: Lustre Lite File System
 Name: lustre-lite
@@ -43,6 +43,14 @@ Group: Documentation
 %description -n lustre-doc
 Documentation and sample configuration files for Lustre
 
+%package -n lustre-ldap
+Summary: Configures openldap server for LDAP Lustre config database
+Group: Configuration
+Requires: openldap-servers, openldap-clients, python-ldap, 4Suite
+
+%description -n lustre-ldap
+Configures openldap server for LDAP Lustre config database
+
 %prep
 %setup -qn lustre-%{version}
 
@@ -62,10 +70,17 @@ rm -f lustre-source
 ln -s $RPM_BUILD_ROOT/usr/src lustre-source
 make distdir distdir=lustre-source/lustre-%{version}
 
+# ldap database directory
+mkdir -p $RPM_BUILD_ROOT/var/lib/ldap/lustre
+
 %files
 %attr(-, root, root) /usr/sbin/lmc
 %attr(-, root, root) /usr/sbin/lctl
 %attr(-, root, root) /usr/sbin/lconf
+%attr(-, root, root) /usr/sbin/llanalyze
+%attr(-, root, root) /usr/sbin/lfind
+%attr(-, root, root) /usr/sbin/lstripe
+%attr(-, root, root) /usr/sbin/mcreate
 %attr(-, root, root) /usr/lib/lustre/examples/llmount.sh
 %attr(-, root, root) /usr/lib/lustre/examples/llmountcleanup.sh
 %attr(-, root, root) /usr/lib/lustre/examples/llecho.sh
@@ -104,6 +119,14 @@ make distdir distdir=lustre-source/lustre-%{version}
 %files -n lustre-source
 %attr(-, root, root) /usr/src/lustre-%{version}
 
+%files -n lustre-ldap
+%attr(-, root, root) /etc/openldap/slapd-lustre.conf
+%attr(-, root, root) /etc/openldap/schema/lustre.schema
+%attr(-, root, root) /usr/lib/lustre/lustre2ldif.xsl
+%attr(-, root, root) /usr/lib/lustre/top.ldif
+%dir /var/lib/ldap/lustre
+%attr(700, ldap, ldap) /var/lib/ldap/lustre
+
 %post
 if [ ! -e /dev/obd ]; then
    mknod /dev/obd c 10 241
@@ -122,6 +145,20 @@ grep -q '/dev/lustre' /etc/modules.conf || \
 %postun
 depmod -ae || exit 0
 
+%post -n lustre-ldap
+if ! grep -q slapd-lustre /etc/openldap/slapd.conf; then 
+  echo "include /etc/openldap/slapd-lustre.conf" >> /etc/openldap/slapd.conf
+fi
+
+%postun -n lustre-ldap
+slapd=/etc/openldap/slapd.conf
+if grep -q slapd-lustre $slapd; then 
+   tmp=/tmp/lustre-ldap.$$
+   sed "/slapd-lustre/d" $slapd >> $tmp
+   cp $tmp $slapd
+   rm $tmp
+fi
+
 %clean
 #rm -rf $RPM_BUILD_ROOT
 
index b9e1962..239e0fd 100644 (file)
@@ -31,3 +31,5 @@ setuid
 multifstat
 checkstat
 wantedi
+createtest
+open_delay
index c7f411e..ee22c80 100644 (file)
@@ -22,11 +22,12 @@ noinst_SCRIPTS += fs.sh intent-test.sh intent-test2.sh leak_finder.pl \
        ostreq.sh runfailure-client-mds-recover.sh runfailure-mds \
        runfailure-net runfailure-ost runiozone runregression-net.sh \
        runtests runvmstat snaprun.sh tbox.sh  common.sh
-noinst_PROGRAMS = openunlink testreq truncate directio openme writeme mcreate
+noinst_PROGRAMS = openunlink testreq truncate directio openme writeme open_delay
 noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy
-noinst_PROGRAMS += stat createmany statmany mkdirmany multifstat
+noinst_PROGRAMS += stat createmany statmany mkdirmany multifstat createtest
 # noinst_PROGRAMS += ldaptest 
 noinst_PROGRAMS += checkstat wantedi
+sbin_PROGRAMS = mcreate
 
 # ldaptest_SOURCES = ldaptest.c
 tchmod_SOURCES = tchmod.c
@@ -50,5 +51,7 @@ mkdirmany_SOURCES = mkdirmany.c
 multifstat_SOURCES = multifstat.c
 checkstat_SOURCES = checkstat.c
 wantedi_SOURCES = wantedi.c
+createtest_SOURCES = createtest.c
+open_delay_SOURCES = open_delay.c
 
 include $(top_srcdir)/Rules
diff --git a/lustre/tests/acceptance-metadata-single.sh b/lustre/tests/acceptance-metadata-single.sh
new file mode 100644 (file)
index 0000000..501d2be
--- /dev/null
@@ -0,0 +1,130 @@
+#!/bin/sh
+set -e
+
+#
+# Runs create.pl and rename.pl on a single mountpoint with increasing
+# load, varying debug levels
+#
+
+SRCDIR="`dirname $0`/"
+. $SRCDIR/common.sh
+
+MNT=${MNT:-/mnt/lustre}
+
+debug_client_on
+echo "create.pl, 1 mount, 1 thread, 10 ops, debug on"
+perl create.pl -- $MNT -1 10
+echo "create.pl, 1 mount, 1 thread, 100 ops, debug on"
+perl create.pl --silent -- $MNT -1 100
+echo "create.pl --mcreate=0, 1 mount, 1 thread, 10 ops, debug on"
+perl create.pl --mcreate=0 -- $MNT -1 10
+echo "create.pl --mcreate=0, 1 mount, 1 thread, 100 ops, debug on"
+perl create.pl --mcreate=0 --silent -- $MNT -1 100
+echo "rename.pl, 1 mount, 1 thread, 10 ops, debug on"
+perl rename.pl $MNT 10
+echo "rename.pl, 1 mount, 1 thread, 100 ops, debug on"
+perl rename.pl --silent $MNT 100
+
+debug_client_off
+echo "create.pl, 1 mount, 1 thread, 1000 ops, debug off"
+perl create.pl --silent -- $MNT -1 1000
+echo "create.pl --mcreate=0, 1 mount, 1 thread, 1000 ops, debug off"
+perl create.pl --silent --mcreate=0 -- $MNT -1 1000
+echo "rename.pl, 1 mount, 1 thread, 1000 ops, debug off"
+perl rename.pl --silent $MNT 1000
+
+debug_client_on
+echo "create.pl, 1 mount, 2 threads, 100 ops, debug on"
+perl create.pl --silent -- $MNT -1 100 &
+perl create.pl --silent -- $MNT -1 100 &
+wait
+echo "create.pl --mcreate=0, 1 mount, 2 threads, 100 ops, debug on"
+perl create.pl --silent --mcreate=0 -- $MNT -1 100 &
+perl create.pl --silent --mcreate=0 -- $MNT -1 100 &
+wait
+echo "rename.pl, 1 mount, 2 thread, 1000 ops, debug on"
+perl rename.pl --silent $MNT 1000 &
+perl rename.pl --silent $MNT 1000 &
+wait
+
+debug_client_off
+echo "create.pl, 1 mount, 2 threads, 2000 ops, debug off"
+perl create.pl --silent -- $MNT -1 2000 &
+perl create.pl --silent -- $MNT -1 2000 &
+wait
+echo "create.pl --mcreate=0, 1 mount, 2 threads, 2000 ops, debug off"
+perl create.pl --silent --mcreate=0 -- $MNT -1 2000 &
+perl create.pl --silent --mcreate=0 -- $MNT -1 2000 &
+wait
+echo "rename.pl, 1 mount, 2 threads, 2000 ops, debug off"
+perl rename.pl --silent $MNT 2000 &
+perl rename.pl --silent $MNT 2000 &
+wait
+
+debug_client_on
+echo "create.pl, 1 mount, 4 threads, 100 ops, debug on"
+for i in `seq 1 4`; do
+  perl create.pl --silent -- $MNT -1 100 &
+done
+wait
+echo "create.pl --mcreate=0, 1 mount, 4 threads, 100 ops, debug on"
+for i in `seq 1 4`; do
+  perl create.pl --silent --mcreate=0 -- $MNT -1 100 &
+done
+wait
+echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug on"
+for i in `seq 1 4`; do
+  perl rename.pl --silent $MNT 2000 &
+done
+wait
+
+debug_client_off
+echo "create.pl, 1 mount, 4 threads, 2000 ops, debug off"
+for i in `seq 1 4`; do
+  perl create.pl --silent -- $MNT -1 2000 &
+done
+wait
+echo "create.pl --mcreate=0, 1 mount, 4 threads, 2000 ops, debug off"
+for i in `seq 1 4`; do
+  perl create.pl --silent --mcreate=0 -- $MNT -1 2000 &
+done
+wait
+echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug off"
+for i in `seq 1 4`; do
+  perl rename.pl --silent $MNT 2000 &
+done
+wait
+
+debug_client_on
+echo "create.pl, 1 mount, 8 threads, 500 ops, debug on"
+for i in `seq 1 8`; do
+  perl create.pl --silent -- $MNT -1 500 &
+done
+wait
+echo "create.pl --mcreate=0, 1 mount, 8 threads, 500 ops, debug on"
+for i in `seq 1 8`; do
+  perl create.pl --silent --mcreate=0 -- $MNT -1 500 &
+done
+wait
+echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug on"
+for i in `seq 1 8`; do
+  perl rename.pl --silent $MNT 2000 &
+done
+wait
+
+debug_client_off
+echo "create.pl, 1 mount, 8 threads, 2000 ops, debug off"
+for i in `seq 1 8`; do
+  perl create.pl --silent -- $MNT -1 2000 &
+done
+wait
+echo "create.pl --mcreate=0, 1 mount, 8 threads, 2000 ops, debug off"
+for i in `seq 1 8`; do
+  perl create.pl --silent --mcreate=0 -- $MNT -1 2000 &
+done
+wait
+echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug off"
+for i in `seq 1 8`; do
+  perl rename.pl --silent $MNT 2000 &
+done
+wait
index 5202052..286f417 100755 (executable)
@@ -23,6 +23,7 @@ for NAME in $CONFIGS; do
        fi
 
        [ "$SANITY" != "no" ] && sh sanity.sh
+       [ "$SANITY" != "no" ] && START=" " CLEAN=" " sh sanity.sh
 
        if [ "$DBENCH" != "no" ]; then
                mount | grep $MNT || sh llmount.sh
@@ -88,3 +89,6 @@ for NAME in $CONFIGS; do
        fi      
        mount | grep $MNT && sh llmountcleanup.sh
 done
+
+[ "$SANITYN" != "no" ] && NAME=mount2 sh sanityN.sh
+
index 9f31edc..6dc6124 100644 (file)
@@ -32,7 +32,7 @@ ${LMC} --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp
 ${LMC} --add ost --node $OST --obd obd1 --obdtype=obdecho -obduuid $OBD_UUID 
 
 # osc on client
-${LMC} --add oscref --node $CLIENT --echo_client obd1
+${LMC} --add echo_client --node $CLIENT --obd obd1
 
 $LMC_REAL --batch $BATCH
 rm -f $BATCH
diff --git a/lustre/tests/busy.sh b/lustre/tests/busy.sh
new file mode 100644 (file)
index 0000000..2f90986
--- /dev/null
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+ mkdir /mnt/lustre/d22
+ mkdir /mnt/lustre/d22/etc
+ ./mcreate /mnt/lustre/d22/etc/foo
+ ls -ld /mnt/lustre/etc
+ ls -ld /mnt/lustre/d22/etc
index 341d31b..6156869 100644 (file)
@@ -1,16 +1,21 @@
 #!/usr/bin/perl
 use Getopt::Long;
 
-GetOptions("silent!"=> \$silent);
+my $silent = 0;
+my $mcreate = 1; # should we use mcreate or open?
+my $files = 5;
+
+GetOptions("silent!" => \$silent,
+           "mcreate=i" => \$mcreate,
+           "files=i" => \$files);
 
 my $mtpt = shift || usage();
 my $mount_count = shift || usage();
 my $i = shift || usage();
-my $files = 5;
-my $mcreate = 0; # should we use mcreate or open?
+my $count = $i;
 
 sub usage () {
-    print "Usage: $0 <mount point prefix> <mount count> <iterations>\n";
+    print "Usage: $0 [--silent] [--mcreate=n] [--files=n] <mnt prefix> <mnt count> <iterations>\n";
     print "example: $0 /mnt/lustre 2 50\n";
     print "         will test in /mnt/lustre1 and /mnt/lustre2\n";
     print "         $0 /mnt/lustre -1 50\n";
@@ -57,5 +62,17 @@ while ($i--) {
     } else {
         print  "Unlink done [$$] $path: $!\n"if !$silent;
     }
+    if (($count - $i) % 100 == 0) {
+        print STDERR ($count - $i) . " operations [" . $$ . "]\n";
+    }
 }
+
+my $which = "";
+if ($mount_count > 0) {
+    $which = int(rand() * $mount_count) + 1;
+}
+for ($d = 0; $d < $files; $d++) {
+    unlink("$mtpt$which/$d");
+}
+
 print "Done.\n";
index c56eda8..8399824 100644 (file)
@@ -8,15 +8,23 @@
 #include <unistd.h>
 #include <stdlib.h>
 
+void usage(char *prog)
+{
+       printf("usage: %s {-o|-m} filenamefmt count\n", prog);
+       printf("       %s {-o|-m} filenamefmt -seconds\n", prog);
+       printf("       %s {-o|-m} filenamefmt start count\n", prog);
+}
+
 int main(int argc, char ** argv)
 {
         int i, rc = 0, do_open;
+        char format[4096], *fmt;
         char filename[4096];
-        long int start, last, end, count;
+        long start, last, end;
+       long begin = 0, count;
 
-        if (argc != 4) {
-                printf("Usage %s <-o|-m> filenamebase <count|-time>\n",
-                       argv[0]);
+        if (argc < 4 || argc > 5) {
+               usage(argv[0]);
                 return 1;
         }
 
@@ -25,8 +33,7 @@ int main(int argc, char ** argv)
         } else if (strcmp(argv[1], "-m") == 0) {
                 do_open = 0;
         } else {
-                printf("Usage %s {-o|-m} filenamebase <count|-time>\n",
-                       argv[0]);
+               usage(argv[0]);
                 return 1;
         }
 
@@ -37,18 +44,29 @@ int main(int argc, char ** argv)
 
         start = last = time(0);
 
-        end = strtol(argv[3], NULL, 0);
-
-        if (end > 0) {
-                count = end;
-                end = -1UL >> 1;
-        } else {
-                end = start - end;
-                count = -1UL >> 1;
-        }
+       if (argc == 4) {
+               end = strtol(argv[3], NULL, 0);
+               if (end > 0) {
+                       count = end;
+                       end = -1UL >> 1;
+               } else {
+                       end = start - end;
+                       count = -1UL >> 1;
+               }
+       } else {
+               end = -1UL >> 1;
+               begin = strtol(argv[3], NULL, 0);
+               count = strtol(argv[4], NULL, 0);
+       }
 
-        for (i = 0; i < count && time(0) < end; i++) {
-                sprintf(filename, "%s%d", argv[2], i);
+       if (strchr(argv[2], '%'))
+               fmt = argv[2];
+       else {
+               sprintf(format, "%s%%d", argv[2]);
+               fmt = format;
+       }
+        for (i = 0; i < count && time(0) < end; i++, begin++) {
+                sprintf(filename, fmt, begin);
                 if (do_open) {
                         int fd = open(filename, O_CREAT|O_RDWR, 0644);
                         if (fd < 0) {
diff --git a/lustre/tests/createtest.c b/lustre/tests/createtest.c
new file mode 100644 (file)
index 0000000..5404f13
--- /dev/null
@@ -0,0 +1,142 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+#ifndef S_SHIFT
+#define S_SHIFT 12
+#endif
+
+int usage(char *prog)
+{
+       fprintf(stderr, "usage: %s <basename>\n", prog);
+       exit(1);
+}
+
+int main(int argc, char *argv[])
+{
+       char name[4096];
+       int i;
+
+       if (argc != 2)
+               usage(argv[0]);
+
+       umask(0);
+       for (i = 0; i <= S_IFMT; i += (1 << S_SHIFT)) {
+               struct stat st;
+               int mode = i | 0644;
+               int rc;
+
+               sprintf(name, "%s-mknod%06o", argv[1], mode);
+               rc = mknod(name, mode, 0x1234);
+               switch (i) {
+               case 0:
+                       mode |= S_IFREG;
+               case S_IFREG:
+               case S_IFCHR: case S_IFBLK:
+                       if (rc < 0 && getuid() != 0)
+                               continue;
+               case S_IFSOCK: case S_IFIFO:
+                       if (rc < 0) {
+                               fprintf(stderr, "%s: ERROR mknod %s: %s\n",
+                                       argv[0], name, strerror(errno));
+                               exit(10);
+                       }
+                       rc = stat(name, &st);
+                       if (rc < 0) {
+                               fprintf(stderr, "%s: ERROR stat %s: %s",
+                                       argv[0], name, strerror(errno));
+                               exit(11);
+                       }
+                       if (st.st_mode != mode) {
+                               fprintf(stderr, "%s: ERROR mode %s: %o != %o",
+                                       argv[0], name, st.st_mode, mode);
+                               exit(12);
+                       }
+                       rc = unlink(name);
+                       if (rc < 0) {
+                               fprintf(stderr, "%s: ERROR unlink %s: %s",
+                                       argv[0], name, strerror(errno));
+                               exit(13);
+                       }
+                       break;
+               default:
+                       if (rc == 0) {
+                               fprintf(stderr, "%s: ERROR: %s created\n",
+                                       argv[0], name);
+                               exit(14);
+                       }
+               }
+       }
+
+       for (i = 0; i <= S_IFMT; i += (1 << S_SHIFT)) {
+               struct stat st;
+               int mode;
+               int fd;
+               int rc;
+
+               mode = i | 0644;
+               sprintf(name, "%s-creat%06o", argv[1], mode);
+               fd = open(name, O_CREAT|O_RDONLY, mode);
+               if (fd < 0) {
+                       fprintf(stderr, "%s: ERROR creat %s: %s\n",
+                               argv[0], name, strerror(errno));
+                       exit(21);
+               }
+               close(fd);
+               rc = stat(name, &st);
+               if (rc < 0) {
+                       fprintf(stderr, "%s: ERROR stat %s: %s",
+                               argv[0], name, strerror(errno));
+                       exit(11);
+               }
+               if ((st.st_mode & S_IFMT) != S_IFREG) {
+                       fprintf(stderr, "%s: ERROR mode %s: %o != %o",
+                               argv[0], name, st.st_mode & S_IFMT, S_IFREG);
+                       exit(12);
+               }
+               rc = unlink(name);
+               if (rc < 0) {
+                       fprintf(stderr, "%s: ERROR unlink %s: %s\n",
+                               argv[0], name, strerror(errno));
+                       exit(20);
+               }
+       }
+
+       for (i = 0; i <= S_IFMT; i += (1 << S_SHIFT)) {
+               struct stat st;
+               int rc;
+
+               sprintf(name, "%s-mkdir%06o", argv[1], i | 0644);
+               rc = mkdir(name, i | 0664);
+               if (rc < 0) {
+                       fprintf(stderr, "%s: ERROR mkdir %s: %s\n",
+                               argv[0], name, strerror(errno));
+                       exit(30);
+               }
+               rc = stat(name, &st);
+               if (rc < 0) {
+                       fprintf(stderr, "%s: ERROR stat %s: %s",
+                               argv[0], name, strerror(errno));
+                       exit(11);
+               }
+               if ((st.st_mode & S_IFMT) != S_IFDIR) {
+                       fprintf(stderr, "%s: ERROR mode %s: %o != %o",
+                               argv[0], name, st.st_mode & S_IFMT, S_IFDIR);
+                       exit(12);
+               }
+               rc = rmdir(name);
+               if (rc < 0) {
+                       fprintf(stderr, "%s: ERROR rmdir %s: %s\n",
+                               argv[0], name, strerror(errno));
+                       exit(31);
+               }
+       }
+
+       printf("%s: SUCCESS\n", argv[0]);
+       return 0;
+}
index f30f056..99e026f 100755 (executable)
@@ -1,47 +1,49 @@
 #!/bin/bash
 
-config=${1:-$(basename $0 .sh).xml}
+LOV=${LOV:-0}
+while [ "$1" ]; do
+        case $1 in
+        --lov) LOV="1" ;;
+       *) [ -z $config ] && config=$1 || OPTS="$OPTS $1" ;;
+        esac
+        shift
+done
+
+config=${config:-$(basename $0 .sh).xml}
 LMC=${LMC:-../utils/lmc -m $config}
+TMP=${TMP:-/tmp}
 
-SERVER=localhost
-CLIENT=localhost
+SERVER=${SERVER:-localhost}
+CLIENT=${CLIENT:-localhost}
+NET=${NET:-tcp}
 
 # FIXME: make LMC not require MDS for obdecho LOV
-MDSDEV=$TMP/mds1
+MDSDEV=${MDSDEV:-$TMP/mds1}
 MDSSIZE=10000
 
 STRIPE_BYTES=65536
 STRIPES_PER_OBJ=2      # 0 means stripe over all OSTs
 
-LOV=0
-while [ "$1" ]; do
-        case $1 in
-        --lov) LOV="1" ;;
-       *) OPTS="$OPTS $1" ;;
-        esac
-        shift
-done
-
 rm -f $config
 # create nodes
 $LMC --add node --node $SERVER  || exit 1
-$LMC --add net --node $SERVER --nid $SERVER --nettype tcp || exit 2
+$LMC --add net --node $SERVER --nid $SERVER --nettype $NET || exit 2
 
 if (($LOV)); then
     $LMC --add mds --node $SERVER --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10
     $LMC --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 11
-    $LMC --add ost --node $SERVER --lov lov1 --obdtype=obdecho || exit 12
-    $LMC --add ost --node $SERVER --lov lov1 --obdtype=obdecho || exit 13
+    $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 12
+    $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 13
     OBD_NAME=lov1
 else
-    $LMC --add ost --obd obd1 --node $SERVER --obdtype=obdecho || exit 2
+    $LMC --add ost --ost obd1 --node $SERVER --osdtype=obdecho || exit 12
     OBD_NAME=obd1
 fi
 
 if [ "$SERVER" != "$CLIENT" ]; then
    $LMC --add node --node $CLIENT  || exit 1
-   $LMC --add net --node $CLIENT --nid $CLIENT --nettype tcp || exit 2
+   $LMC --add net --node $CLIENT --nid $CLIENT --nettype $NET || exit 2
 fi
 
-$LMC --add echo_client --node $CLIENT --obd ${OBD_NAME} || exit 3
+$LMC --add echo_client --node $CLIENT --ost ${OBD_NAME} || exit 3
 
index fbf1d00..b8d234b 100644 (file)
@@ -19,6 +19,12 @@ while ($line = <>) {
         $name = $6;
         $size = $7;
         $addr = $8;
+
+       # we can't dump the log after portals has exited, so skip "leaks"
+       # from memory freed in the portals module unloading.
+       if ($func eq 'portals_handle_init') {
+           next;
+       }
         printf("%8s %6d bytes at %s called %s (%s:%s:%d)\n", $type, $size,
                $addr, $name, $file, $func, $lno);
     } else {
diff --git a/lustre/tests/lkcdmap b/lustre/tests/lkcdmap
new file mode 100755 (executable)
index 0000000..f8a1fd5
--- /dev/null
@@ -0,0 +1,11 @@
+#!/bin/sh
+TMP=${TMP:-/tmp}
+cat /tmp/ogdb-`hostname` | while read JUNK M JUNK; do
+       MOD="../$M"
+       MAP=`echo $MOD | sed -e 's/\.o$/.map/'`
+       MODNAME=`basename $MOD | sed -e 's/\.o$//'`
+
+       nm $MOD > $MAP
+       echo namelist -a $PWD/$MOD 
+       echo symtab -a $PWD/$MAP $MODNAME
+done
index bc30630..1e2bd6a 100755 (executable)
@@ -7,6 +7,12 @@ NAME=${NAME:-local}
 config=$NAME.xml
 mkconfig=$NAME.sh
 
+if [ "$PORTALS" ]; then
+  portals_opt="--portals=$PORTALS"
+fi
+
+[ -x $LCONF ] || chmod a+rx $LCONF
+
 sh $mkconfig $config || exit 1
 
-${LCONF} --reformat --gdb $config || exit 2
+${LCONF} $portals_opt --reformat --gdb $config || exit 2
diff --git a/lustre/tests/llmount2-hack.sh b/lustre/tests/llmount2-hack.sh
deleted file mode 100644 (file)
index 495626c..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/sh
-# suggested boilerplate for test script
-
-LCONF=${LCONF:-../utils/lconf}
-NAME=${NAME:-local2-hack}
-
-config=$NAME.xml
-
-${LCONF}  --reformat --gdb $config || exit 2
-
-../utils/lctl <<EOF
-newdev
-attach osc OSC2_localhost OSC2_localhost_UUID
-setup OBD_localhost_UUID NET_localhost_tcp_UUID
-newdev
-attach mdc MDC2_mds1 MDC2_uuid
-setup mds1_UUID NET_localhost_tcp_UUID
-quit
-EOF
-
-mount -t lustre_lite -o osc=OSC2_localhost_UUID,mdc=MDC2_uuid none /mnt/lustre2
diff --git a/lustre/tests/llmount2-hackcleanup.sh b/lustre/tests/llmount2-hackcleanup.sh
deleted file mode 100644 (file)
index 21c915d..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/sh
-# suggested boilerplate for test script
-
-LCONF=${LCONF:-../utils/lconf}
-NAME=${NAME:-local2-hack}
-
-config=$NAME.xml
-
-umount /mnt/lustre1
-umount /mnt/lustre2
-../utils/lctl <<EOF
-name2dev OSC2_localhost
-cleanup
-detach
-name2dev MDC2_mds1
-cleanup
-detach
-quit
-EOF
-
-${LCONF} --cleanup $config
index 2bd991f..8d4fa52 100755 (executable)
@@ -7,17 +7,22 @@ TMP=${TMP:-/tmp}
 config=$NAME.xml
 mkconfig=$NAME.sh
 
+if [ "$PORTALS" ]; then
+  portals_opt="--portals=$PORTALS"
+fi
+
 if [ ! -f $config ]; then
    sh $mkconfig $config || exit 1
 fi
 
 sync; sleep 2; sync
-${LCONF} --cleanup --dump $TMP/debug $config
+${LCONF} $portals_opt --cleanup --dump $TMP/debug $config
+rc=$?
 BUSY=`dmesg | grep -i destruct`
 if [ "$BUSY" ]; then
        echo "$BUSY" 1>&2
        mv $TMP/debug $TMP/debug-busy.`date +%s`
-       exit -1
+       exit 255
 fi
 LEAK_LUSTRE=`dmesg | tail -20 | grep -v "leaked: 0" | grep leaked`
 LEAK_PORTALS=`dmesg | tail -20 | grep "Portals memory leaked"`
@@ -25,5 +30,7 @@ if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
        echo "$LEAK_LUSTRE" 1>&2
        echo "$LEAK_PORTALS" 1>&2
        mv $TMP/debug $TMP/debug-leak.`date +%s`
-       exit -2
+       exit 254
 fi
+
+exit $rc
diff --git a/lustre/tests/llmountcleanup2-hack.sh b/lustre/tests/llmountcleanup2-hack.sh
deleted file mode 100644 (file)
index b2996cf..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/sh
-
-umount /mnt/lustre2
-umount /mnt/lustre1
-../utils/lctl <<EOF
-name2dev OSC2_localhost
-cleanup
-detach
-name2dev MDC2_mds1
-cleanup
-detach
-quit
-EOF
-
-LCONF=${LCONF:-../utils/lconf}
-NAME=${NAME:-local}
-
-config=$NAME.xml
-mkconfig=./$NAME.sh
-
-if [ ! -f $config -o $mkconfig -nt $config ]; then
-   sh $mkconfig $config || exit 1
-fi
-
-${LCONF} --cleanup --dump /tmp/debug $config
index c559821..9bf287e 100755 (executable)
@@ -6,8 +6,12 @@ NAME=${NAME:-local}
 config=$NAME.xml
 mkconfig=$NAME.sh
 
+if [ "$PORTALS" ]; then
+  portals_opt="--portals=$PORTALS"
+fi
+
 if [ ! -f $config -o $mkconfig -nt $config ]; then
    sh $mkconfig $config || exit 1
 fi
 
-${LCONF} --gdb $config || exit 2
+${LCONF} $portals_opt --gdb $config || exit 2
index d892b58..13af9d6 100755 (executable)
@@ -32,7 +32,7 @@ ${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
 ${LMC} --add mds  --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
 
 # configure ost
-${LMC} --add ost --node localhost --obd obd1 --dev $OSTDEV --size  $OSTSIZE || exit 30
+${LMC} --add ost --node localhost --ost obd1 --dev $OSTDEV --size  $OSTSIZE || exit 30
 
 # create client config
-${LMC} --add mtpt --node localhost --path /mnt/lustre --mds mds1 --obd obd1 || exit 40
+${LMC} --add mtpt --node localhost --path /mnt/lustre --mds mds1 --ost obd1 || exit 40
diff --git a/lustre/tests/local2-hack.xml b/lustre/tests/local2-hack.xml
deleted file mode 100644 (file)
index 6ae2086..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<lustre>
-  <ldlm name='ldlm' uuid='ldlm_UUID'/>
-  <node name='localhost' uuid='localhost_UUID'>
-    <profile>
-      <ldlm_ref uuidref='ldlm_UUID'/>
-      <network_ref uuidref='NET_localhost_tcp_UUID'/>
-      <mds_ref uuidref='mds1_UUID'/>
-      <obd_ref uuidref='OBD_localhost_UUID'/>
-      <ost_ref uuidref='OST_localhost_UUID'/>
-      <mountpoint_ref uuidref='MNT_localhost_UUID'/>
-    </profile>
-    <network type='tcp' name='NET_localhost_tcp' uuid='NET_localhost_tcp_UUID'>
-      <server>localhost</server>
-      <port>988</port>
-    </network>
-  </node>
-  <mds name='mds1' uuid='mds1_UUID'>
-    <fstype>extN</fstype>
-    <device size='50000'>/tmp/mds1</device>
-    <autoformat>yes</autoformat>
-    <network_ref uuidref='NET_localhost_tcp_UUID'/>
-    <node_ref uuidref='localhost_UUID'/>
-  </mds>
-  <obd type='obdfilter' name='OBD_localhost' uuid='OBD_localhost_UUID'>
-    <fstype>extN</fstype>
-    <device size='200000'>/tmp/ost1</device>
-    <autoformat>yes</autoformat>
-  </obd>
-  <osc name='OSC_localhost' uuid='OSC_localhost_UUID'>
-    <ost_ref uuidref='OST_localhost_UUID'/>
-    <obd_ref uuidref='OBD_localhost_UUID'/>
-  </osc>
-  <ost name='OST_localhost' uuid='OST_localhost_UUID'>
-    <network_ref uuidref='NET_localhost_tcp_UUID'/>
-    <obd_ref uuidref='OBD_localhost_UUID'/>
-  </ost>
-  <mountpoint name='MNT_localhost' uuid='MNT_localhost_UUID'>
-    <mds_ref uuidref='mds1_UUID'/>
-    <osc_ref uuidref='OSC_localhost_UUID'/>
-    <path>/mnt/lustre1</path>
-  </mountpoint>
-</lustre>
index f90327a..26c3016 100755 (executable)
@@ -27,7 +27,7 @@ int main(int argc, char ** argv)
 
         for (i=0 ; i < count ; i++) {
                 sprintf(dirname, "%s-%d", argv[1], i);
-                rc = mkdir(dirname, S_IFREG| 0444);
+                rc = mkdir(dirname, 0444);
                 if (rc) {
                         printf("mkdir(%s) error: %s\n",
                                dirname, strerror(errno));
index 6ae6e70..f1c00b4 100644 (file)
@@ -2,14 +2,14 @@
 
 config=${1:-mount2.xml}
 
-LMC=${LMC:-../utils/lmc}
+LMC="${LMC:-../utils/lmc} -m $config"
 TMP=${TMP:-/tmp}
 
-MDSDEV=$TMP/mds1
-MDSSIZE=50000
+MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSSIZE=${MDSSIZE:-50000}
 
-OSTDEV=$TMP/ost1
-OSTSIZE=100000
+OSTDEV=${OSTDEV:-$TMP/ost1}
+OSTSIZE=${OSTSIZE:-200000}
 
 kver=`uname -r | cut -d "." -f 1,2`
 
@@ -21,15 +21,19 @@ case $kver in
      ;;
 esac
 
+
+rm -f $config
+
 # create nodes
-${LMC} -o $config --add net --node localhost --nid localhost --nettype tcp || exit 1
+${LMC} --add node --node localhost || exit 10
+${LMC} --add net --node  localhost --nid localhost --nettype tcp || exit 11
 
 # configure mds server
-${LMC} -m $config --add mds --format --node localhost $FSTYPE --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 2
+${LMC} --add mds  --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
 
 # configure ost
-${LMC} -m $config --add ost --format --obd obd1 --node localhost $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 3
+${LMC} --add ost --node localhost --obd obd1 --dev $OSTDEV --size  $OSTSIZE || exit 30
 
 # create client config
-${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --obd obd1 || exit 4
-${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre2 --mds mds1 --obd obd1 || exit 4
+${LMC} --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --obd obd1 || exit 40
+${LMC} --add mtpt --node localhost --path /mnt/lustre2 --mds mds1 --obd obd1 || exit 40
diff --git a/lustre/tests/open_delay.c b/lustre/tests/open_delay.c
new file mode 100644 (file)
index 0000000..2f41884
--- /dev/null
@@ -0,0 +1,25 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_lite.h>
+#include <linux/obd_lov.h>
+
+int main(int argc, char **argv)
+{
+        int fd; 
+
+        if (argc != 2) { 
+                printf("Usage %s <filename>\n", argv[0]); 
+                exit(1);
+        }
+
+        fd = open(argv[1], O_RDONLY | O_LOV_DELAY_CREATE);
+        if (fd == -1) { 
+                printf("Error opening %s\n", argv[1]);
+                exit(1);
+        }
+
+        return 0;
+}
index 3d5904d..e7671c8 100644 (file)
@@ -12,17 +12,22 @@ char buf[128];
 
 int main(int argc, char **argv)
 {
+       char *fname, *fname2;
         int fd, rc;
 
-        if (argc != 2) {
-                fprintf(stderr, "usage: %s filename\n", argv[0]);
+        if (argc < 2 || argc > 3) {
+                fprintf(stderr, "usage: %s filename [filename2]\n", argv[0]);
                 exit(1);
-        } else {
-                fprintf(stderr, "congratulations - program starting\n");
         }
 
+       fname = argv[1];
+       if (argc == 3)
+               fname2 = argv[2];
+       else
+               fname2 = argv[1];
+
         fprintf(stderr, "opening\n");
-        fd = open(argv[1], O_RDWR | O_TRUNC | O_CREAT, 0644);
+        fd = open(fname, O_RDWR | O_TRUNC | O_CREAT, 0644);
         if (fd == -1) {
                 fprintf(stderr, "open (normal) %s\n", strerror(errno));
                 exit(1);
@@ -35,31 +40,41 @@ int main(int argc, char **argv)
                 exit(1);
         }
 
-        fprintf(stderr, "closing\n");
-        rc = close(fd);
-        if (rc) {
-                fprintf(stderr, "close (normal) %s\n", strerror(errno));
-                exit(1);
-        }
-
-        fprintf(stderr, "opening again\n");
-        fd = open(argv[1], O_RDWR);
-        if (fd == -1) {
-                fprintf(stderr, "open (unlink) %s\n", strerror(errno));
-                exit(1);
-        }
-
-#if 0
-        fprintf(stderr, "unlinking\n");
-        rc = unlink(argv[1]);
-        if (rc) {
-                fprintf(stderr, "unlink %s\n", strerror(errno));
-                exit(1);
-        }
-#else
-        printf("unlink %s and press enter\n", argv[1]);
-        getc(stdin);
-#endif
+       if (argc == 3) {
+               fprintf(stderr, "closing %s\n", fname);
+               rc = close(fd);
+               if (rc) {
+                       fprintf(stderr, "close (normal) %s\n", strerror(errno));
+                       exit(1);
+               }
+
+               fprintf(stderr, "opening %s\n", fname2);
+               fd = open(fname2, O_RDWR);
+               if (fd == -1) {
+                       fprintf(stderr, "open (unlink) %s\n", strerror(errno));
+                       exit(1);
+               }
+
+               fprintf (stderr, "unlinking %s\n", fname2);
+               rc = unlink(fname2);
+               if (rc) {
+                       fprintf(stderr, "unlink %s\n", strerror(errno));
+                       exit(1);
+               }
+
+               if (access(fname2, F_OK) == 0) {
+                       fprintf(stderr, "%s still exists\n", fname2);
+                       exit(1);
+               }
+       } else {
+               printf("unlink %s and press enter\n", fname);
+               getc(stdin);
+       }
+
+       if (access(fname, F_OK) == 0) {
+               fprintf(stderr, "%s still exists\n", fname);
+               exit(1);
+       }
 
         fprintf(stderr, "reading\n");
         rc = read(fd, buf, strlen(T1) + 1);
diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh
new file mode 100755 (executable)
index 0000000..26bb81f
--- /dev/null
@@ -0,0 +1,124 @@
+#!/bin/sh
+
+set -ex
+
+LUSTRE=${LUSTRE:-`dirname $0`/..}
+PATH=$PATH:$LUSTRE/utils:$LUSTRE/tests
+
+. $LUSTRE/../ltest/functional/llite/common/common.sh
+
+PDSH='pdsh -S -w'
+
+# XXX I wish all this stuff was in some default-config.sh somewhere
+MDSNODE=${MDSNODE:-dev2}
+OSTNODE=${OSTNODE:-dev3}
+CLIENT=${CLIENTNODE:-dev4}
+NETWORKTYPE=${NETWORKTYPE:-tcp}
+MOUNTPT=${MOUNTPT:-/mnt/lustre}
+CONFIG=recovery-small.xml
+MDSDEV=/tmp/mds
+OSTDEV=/tmp/ost
+MDSSIZE=100000
+OSTSIZE=100000
+
+do_mds() {
+    $PDSH $MDSNODE "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@"
+}
+
+do_client() {
+    $PDSH $CLIENT "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@"
+}
+
+do_ost() {
+    $PDSH $OSTNODE "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@"
+}
+
+drop_request() {
+    do_mds "echo 0x121 > /proc/sys/lustre/fail_loc"
+    do_client "$1"
+    do_mds "echo 0 > /proc/sys/lustre/fail_loc"
+}
+
+drop_reply() {
+    do_mds "echo 0x120 > /proc/sys/lustre/fail_loc"
+    do_client "$@"
+    do_mds "echo 0 > /proc/sys/lustre/fail_loc"
+}
+
+make_config() {
+    rm -f $CONFIG
+    for NODE in $CLIENT $MDSNODE $OSTNODE; do
+       lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \
+           --nettype $NETWORKTYPE || exit 4
+    done
+    lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV \
+        --size $MDSSIZE || exit 5
+    lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --dev $OSTDEV \
+        --size $OSTSIZE || exit 6
+    lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \
+        --ost ost1 || exit 7
+}
+
+start_mds() {
+    do_mds "lconf $@ $CONFIG"
+}
+
+shutdown_mds() {
+    do_mds "lconf $@ --cleanup $CONFIG"
+}
+
+start_ost() {
+    do_ost "lconf $@ $CONFIG"
+}
+
+shutdown_ost() {
+    do_ost "lconf $@ --cleanup $CONFIG"
+}
+
+mount_client() {
+    do_client "lconf $@ $CONFIG"
+}
+
+unmount_client() {
+    do_client "lconf $@ --cleanup $CONFIG"
+}
+
+setup() {
+    make_config
+    start_mds --reformat
+    start_ost --reformat
+    # XXX we should write our own upcall, when we move this somewhere better.
+    mount_client --timeout=10 \
+        --recovery_upcall=$PWD/../../ltest/functional/llite/09/client-upcall.sh
+}
+
+cleanup() {
+    unmount_client || true
+    shutdown_mds || true
+    shutdown_ost || true
+}
+
+replay() {
+    if [ $# -gt 1 ]; then
+        do_client "$1"
+        shift
+    fi
+    do_mds "sync"
+    do_mds 'echo -e "device \$mds1\\nprobe\\nnotransno\\nreadonly" | lctl'
+    do_client "$1" &
+    shutdown_mds -f
+    start_mds
+    wait
+    do_client "ls $MOUNPT" # trigger failover, if we haven't already
+}
+
+if [ ! -z "$ONLY" ]; then
+    eval "$ONLY"
+    exit $?
+fi
+
+setup
+drop_request "mcreate /mnt/lustre/1"
+drop_reply "mcreate /mnt/lustre/2"
+replay "mcreate /mnt/lustre/3"
+cleanup
diff --git a/lustre/tests/rename.pl b/lustre/tests/rename.pl
new file mode 100644 (file)
index 0000000..3ba9368
--- /dev/null
@@ -0,0 +1,78 @@
+#!/usr/bin/perl
+use strict;
+use diagnostics;
+use Getopt::Long;
+
+sub usage () {
+    print "Usage: $0 <mount point prefix> <iterations>\n";
+    print "example: $0 --count=2 /mnt/lustre 50\n";
+    print "         will test in /mnt/lustre1 and /mnt/lustre2\n";
+    print "         $0 --count=0 /mnt/lustre 50\n";
+    print "         will test in /mnt/lustre only\n";
+    exit;
+}
+my ($j, $k, $d, $f1, $f2, $path, $silent);
+my $count = 0;
+my $create = 10;
+
+GetOptions("silent!"=> \$silent,
+           "count=i" => \$count,
+           "create=i" => \$create);
+
+my $mtpt = shift || usage();
+my $i = shift || usage();
+my $total = $i;
+my $files = 6;
+my $dirs = 3;
+my $mcreate = 0; # should we use mcreate or open?
+
+my $which = "";
+if ($count > 0) {
+    $which = int(rand() * $count) + 1;
+}
+
+$k = $dirs;
+if ($create == 0) {
+    $k = 0;
+}
+while ($k--) {
+    $path = "$mtpt$which/$k";
+    my $rc = mkdir $path, 0755;
+    print "mkdir $path failed: $!\n" if !$rc;
+    $j = $files;
+    while ($j--) {
+        `./mcreate $path/$j`;
+    }
+}
+
+while ($i--) {
+    my $which = "";
+    if ($count > 0) {
+        $which = int(rand() * $count) + 1;
+    }
+    $d = int(rand() * $dirs);
+    $f1 = int(rand() * $files);
+    $f2 = int(rand() * $files);
+    print "[$$] $mtpt$which/$d/$f1 $mtpt$which/$d/$f2 ...\n" if !$silent;
+    my $rc = rename "$mtpt$which/$d/$f1", "$mtpt$which/$d/$f2";
+    print "[$$] done: $rc\n" if !$silent;
+    if (($total - $i) % 100 == 0) {
+        print STDERR "[" . $$ . "]" . ($total - $i) . " operations\n";
+    }
+}
+
+$k = $dirs;
+if ($create == 0) {
+    $k = 0;
+}
+while ($k--) {
+    $path = "$mtpt$which/$k";
+    $j = $files;
+    while ($j--) {
+        unlink "$path/$j";
+    }
+    my $rc = rmdir $path;
+    print "rmdir $path failed: $!\n" if !$rc;
+}
+
+print "Done.\n";
index cf198ad..4fc00b2 100755 (executable)
@@ -4,7 +4,7 @@
 [ -z "$VERIFY" ] && VERIFY="-+d"
 [ -z "$ODIR" ] && ODIR="-I"
 [ -z "$REC" ] && REC=64
-[ -z "$FILE" ] && FILE=/mnt/lustre/test.$$
+[ -z "$FILE" ] && FILE=/mnt/lustre/iozone.$$
 [ $1 ] && SIZE=$1
 COUNT=0
 rm -f endiozone
index 702bd1f..4d86248 100644 (file)
@@ -1,16 +1,15 @@
 #!/bin/sh
-export PATH=/sbin:/usr/sbin:$PATH
-
 SRCDIR="`dirname $0`/"
-. $SRCDIR/common.sh
+export PATH=/sbin:/usr/sbin:$SRCDIR:$PATH
 
+LOOPS=${LOOPS:-1}
 COUNT=${COUNT:-1000000}
 COUNT_10=`expr $COUNT / 10`
 COUNT_100=`expr $COUNT / 100`
 
 ENDRUN=endrun-`hostname`
 
-ECHONAME="`$OBDCTL device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -1`"
+ECHONAME="`lctl device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -1`"
 
 if [ -z "$ECHONAME" ]; then
        echo "$0: needs an ECHO_CLIENT set up first" 1>&2
@@ -18,7 +17,7 @@ if [ -z "$ECHONAME" ]; then
 fi
 
 cleanup () {
-       $OBDCTL --device \$$ECHONAME destroy $OID
+       lctl --device \$$ECHONAME destroy $OID
 }
        
 runthreads() {
@@ -42,7 +41,7 @@ runthreads() {
                ;;
        esac
 
-       $OBDCTL --threads $THR v \$$ECHONAME $DO $CNT $RW $V $PGS $OID || exit 1
+       lctl --threads $THR v \$$ECHONAME $DO $CNT $RW $V $PGS $OID || exit 1
 
        if [ -e $ENDRUN ]; then
                rm $ENDRUN
@@ -51,15 +50,15 @@ runthreads() {
        fi
 }
 
-[ -z "$OID" ] && OID=`$OBDCTL --device \\$$ECHONAME create 1 | awk '/is object id/ { print $6 }'`
+[ -z "$OID" ] && OID=`lctl --device \\$$ECHONAME create 1 | awk '/is object id/ { print $6 }'` && echo "created object $OID"
 [ -z "$OID" ] && echo "error creating object" 1>&2 && exit 1
 
 # TODO: obdctl needs to check on the progress of each forked thread
 #       (IPC SHM, sockets?) to see if it hangs.
-while date; do
+for i in `seq $LOOPS`; do
        PG=1
-       PGVW=16
-       PGVR=16
+       PGVW=${PGVW:-16}
+       PGVR=${PGVR:-16}
 
        # We use '--threads 1 X' instead of '--device X' so that
        # obdctl can monitor the forked thread for progress (TODO).
index 288f847..6de9a6c 100644 (file)
@@ -1,8 +1,6 @@
 #!/bin/sh
-export PATH=/sbin:/usr/sbin:$PATH
-
 SRCDIR="`dirname $0`/"
-. $SRCDIR/common.sh
+export PATH=/sbin:/usr/sbin:$SRCDIR/../utils:$PATH
 
 COUNT=${COUNT:-1000000}
 COUNT_10=`expr $COUNT / 10`
@@ -11,13 +9,17 @@ COUNT_1000=`expr $COUNT / 1000`
 
 ENDRUN=endrun-`hostname`
 
-ECHONAME="`$OBDCTL device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -1`"
+ECHONAME="`lctl device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -1`"
 
 if [ -z "$ECHONAME" ]; then
        echo "$0: needs an ECHO_CLIENT set up first" 1>&2
        exit 1
 fi
 
+cleanup () {
+       lctl --device \$$ECHONAME destroy $OID
+}
+       
 runthreads() {
        THR=$1
        DO=$2
@@ -29,28 +31,26 @@ runthreads() {
        test_getattr)
                RW=
                ;;
-
        test_brw_write)
                DO=test_brw
                RW=w
                ;;
-
        test_brw_read)
                DO=test_brw
                RW=r
                ;;
        esac
 
-       $OBDCTL --threads $THR v \$$ECHONAME $DO $CNT $RW $V $PGS $OID || exit 1
+       lctl --threads $THR v \$$ECHONAME $DO $CNT $RW $V $PGS $OID || exit 1
 
-       if [ -e endrun ]; then
-               rm endrun
-               echo "exiting because endrun file was found"
-               exit 0
+       if [ -e $ENDRUN ]; then
+               rm $ENDRUN
+               echo "exiting because $ENDRUN file was found"
+               cleanup
        fi
 }
 
-[ -z "$OID" ] && OID=`$OBDCTL --device \\$$ECHONAME create 1 | awk '/is object id/ { print $6 }'`
+[ -z "$OID" ] && OID=`lctl --device \\$$ECHONAME create 1 | awk '/is object id/ { print $6 }'` && echo "created object $OID"
 [ -z "$OID" ] && echo "error creating object" 1>&2 && exit 1
 
 # TODO: obdctl needs to check on the progress of each forked thread
@@ -63,11 +63,11 @@ for CMD in test_getattr test_brw_write test_brw_read; do
                ;;
        test_brw_write)
                PG=1
-               PGV=16
+               PGV=${PGV:-16}
                ;;
        test_brw_read)
                PG=1
-               PGV=16
+               PGV=${PGV:-16}
                ;;
        esac
 
@@ -76,8 +76,7 @@ for CMD in test_getattr test_brw_write test_brw_read; do
        runthreads 1 $CMD 1 1 $PG
        runthreads 1 $CMD 100 1 $PG
 
-       debug_server_off
-       debug_client_off
+       echo 0 > /proc/sys/portals/debug
        runthreads 1 $CMD $COUNT_100 -10 $PG
        [ "$PGV" ] && runthreads 1 $CMD $COUNT_1000 -10 $PGV
 
@@ -97,4 +96,4 @@ for CMD in test_getattr test_brw_write test_brw_read; do
        [ "$PGV" ] && runthreads 100 $CMD $COUNT_1000 -30 $PGV
 done
 
-$OBDCTL --device \$$ECHONAME destroy $OID
+lctl --device \$$ECHONAME destroy $OID
index e068a01..05bf71f 100755 (executable)
@@ -104,6 +104,12 @@ rm $HOSTS || fail "can't remove $HOSTS again" 36
 echo "removing $DST"
 rm -r $V $DST || fail "can't remove $DST" 37
 
+# mkdirmany test (bug 589)
+echo "running mkdirmany $OSCMT/base$$ 100"
+./mkdirmany $OSCMT/base$$ 100 || fail "mkdirmany failed"
+echo "removing mkdirmany directories"
+rmdir $OSCMT/base$$* || fail "mkdirmany cleanup failed"
+
 NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
 if [ $NOWUSED -gt $USED ]; then
        echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2
index 73117b6..111606a 100644 (file)
@@ -2,18 +2,28 @@
 
 set -e
 
+SRCDIR=`dirname $0`
+PATH=$SRCDIR:$SRCDIR/../utils:$PATH
+
 CHECKSTAT=${CHECKSTAT:-"./checkstat -v"}
+CREATETEST=${CREATETEST:-createtest}
+LFIND=${LFIND:-lfind}
+LSTRIPE=${LSTRIPE:-lstripe}
+MCREATE=${MCREATE:-mcreate}
+TOEXCL=${TOEXCL:-toexcl}
+
 MOUNT=${MOUNT:-/mnt/lustre}
+DIR=${DIR:-$MOUNT}
 export NAME=$NAME
 clean() {
         echo -n "cln.."
-        sh llmountcleanup.sh > /dev/null
+        sh llmountcleanup.sh > /dev/null || exit 20
 }
 CLEAN=${CLEAN:-clean}
 start() {
         echo -n "mnt.."
-        sh llrmount.sh > /dev/null
-        echo -n "done"
+        sh llrmount.sh > /dev/null || exit 10
+        echo "done"
 }
 START=${START:-start}
 
@@ -26,270 +36,279 @@ pass() {
     echo PASS
 }
 
-mount | grep $MOUNT || $START
+mount | grep $MOUNT || sh llmount.sh
 
 echo '== touch .../f ; rm .../f ======================== test 0'
-touch $MOUNT/f
-$CHECKSTAT -t file $MOUNT/f || error 
-rm $MOUNT/f
-$CHECKSTAT -a $MOUNT/f || error
+touch $DIR/f
+$CHECKSTAT -t file $DIR/f || error 
+rm $DIR/f
+$CHECKSTAT -a $DIR/f || error
 pass
 $CLEAN
 $START
 
 echo '== mkdir .../d1; mkdir .../d1/d2 ================= test 1'
-mkdir $MOUNT/d1
-mkdir $MOUNT/d1/d2
-$CHECKSTAT -t dir $MOUNT/d1/d2 || error
+mkdir $DIR/d1
+mkdir $DIR/d1/d2
+$CHECKSTAT -t dir $DIR/d1/d2 || error
 pass
 $CLEAN
 $START
 
 echo '== rmdir .../d1/d2; rmdir .../d1 ================= test 1b'
-rmdir $MOUNT/d1/d2
-rmdir $MOUNT/d1
-$CHECKSTAT -a $MOUNT/d1 || error
+rmdir $DIR/d1/d2
+rmdir $DIR/d1
+$CHECKSTAT -a $DIR/d1 || error
 pass
 $CLEAN
 $START
 
 echo '== mkdir .../d2; touch .../d2/f ================== test 2'
-mkdir $MOUNT/d2
-touch $MOUNT/d2/f
-$CHECKSTAT -t file $MOUNT/d2/f || error
+mkdir $DIR/d2
+touch $DIR/d2/f
+$CHECKSTAT -t file $DIR/d2/f || error
 pass
 $CLEAN
 $START
 
 echo '== rm -r .../d2; touch .../d2/f ================== test 2b'
-rm -r $MOUNT/d2
-$CHECKSTAT -a $MOUNT/d2 || error
+rm -r $DIR/d2
+$CHECKSTAT -a $DIR/d2 || error
 pass
 $CLEAN
 $START
 
 echo '== mkdir .../d3 ================================== test 3'
-mkdir $MOUNT/d3
-$CHECKSTAT -t dir $MOUNT/d3 || error
+mkdir $DIR/d3
+$CHECKSTAT -t dir $DIR/d3 || error
 pass
 $CLEAN
 $START
 echo '== touch .../d3/f ================================ test 3b'
-touch $MOUNT/d3/f
-$CHECKSTAT -t file $MOUNT/d3/f || error
+touch $DIR/d3/f
+$CHECKSTAT -t file $DIR/d3/f || error
 pass
 $CLEAN
 $START
 echo '== rm -r .../d3 ================================== test 3c'
-rm -r $MOUNT/d3
-$CHECKSTAT -a $MOUNT/d3 || error
+rm -r $DIR/d3
+$CHECKSTAT -a $DIR/d3 || error
 pass
 $CLEAN
 $START
 
 echo '== mkdir .../d4 ================================== test 4'
-mkdir $MOUNT/d4
-$CHECKSTAT -t dir $MOUNT/d4 || error
+mkdir $DIR/d4
+$CHECKSTAT -t dir $DIR/d4 || error
 pass
 $CLEAN
 $START
 echo '== mkdir .../d4/d2 =============================== test 4b'
-mkdir $MOUNT/d4/d2
-$CHECKSTAT -t dir $MOUNT/d4/d2 || error
+mkdir $DIR/d4/d2
+$CHECKSTAT -t dir $DIR/d4/d2 || error
 pass
 $CLEAN
 $START
 
 echo '== mkdir .../d5; mkdir .../d5/d2; chmod .../d5/d2 = test 5'
-mkdir $MOUNT/d5
-mkdir $MOUNT/d5/d2
-chmod 0666 $MOUNT/d5/d2
-$CHECKSTAT -t dir -p 0666 $MOUNT/d5/d2 || error
+mkdir $DIR/d5
+mkdir $DIR/d5/d2
+chmod 0707 $DIR/d5/d2
+$CHECKSTAT -t dir -p 0707 $DIR/d5/d2 || error
 pass
 $CLEAN
 $START
 
 echo '== touch .../f6; chmod .../f6 ==================== test 6'
-touch $MOUNT/f6
-chmod 0666 $MOUNT/f6
-$CHECKSTAT -t file -p 0666 $MOUNT/f6 || error
+touch $DIR/f6
+chmod 0666 $DIR/f6
+$CHECKSTAT -t file -p 0666 $DIR/f6 || error
 pass
 $CLEAN
 $START
 
 echo '== mkdir .../d7; mcreate .../d7/f; chmod .../d7/f = test 7'
-mkdir $MOUNT/d7
-./mcreate $MOUNT/d7/f
-chmod 0666 $MOUNT/d7/f
-$CHECKSTAT -t file -p 0666 $MOUNT/d7/f || error
+mkdir $DIR/d7
+$MCREATE $DIR/d7/f
+chmod 0666 $DIR/d7/f
+$CHECKSTAT -t file -p 0666 $DIR/d7/f || error
+pass
+$CLEAN
+$START
+
+echo '== mkdir .../d7; mcreate .../d7/f2; chmod .../d7/f2 = test 7b'
+$MCREATE $DIR/d7/f2
+echo -n foo > $DIR/d7/f2
+[ "`cat $DIR/d7/f2`" = "foo" ] || error
+$CHECKSTAT -t file -s 3 $DIR/d7/f2 || error
 pass
 $CLEAN
 $START
 
 echo '== mkdir .../d8; touch .../d8/f; chmod .../d8/f == test 8'
-mkdir $MOUNT/d8
-touch $MOUNT/d8/f
-chmod 0666 $MOUNT/d8/f
-$CHECKSTAT -t file -p 0666 $MOUNT/d8/f || error
+mkdir $DIR/d8
+touch $DIR/d8/f
+chmod 0666 $DIR/d8/f
+$CHECKSTAT -t file -p 0666 $DIR/d8/f || error
 pass
 $CLEAN
 $START
 
 
 echo '== mkdir .../d9 .../d9/d2 .../d9/d2/d3 =========== test 9'
-mkdir $MOUNT/d9
-mkdir $MOUNT/d9/d2
-mkdir $MOUNT/d9/d2/d3
-$CHECKSTAT -t dir $MOUNT/d9/d2/d3 || error
+mkdir $DIR/d9
+mkdir $DIR/d9/d2
+mkdir $DIR/d9/d2/d3
+$CHECKSTAT -t dir $DIR/d9/d2/d3 || error
 pass
 $CLEAN
 $START
 
 
 echo '== mkdir .../d10 .../d10/d2; touch .../d10/d2/f = test 10'
-mkdir $MOUNT/d10
-mkdir $MOUNT/d10/d2
-touch $MOUNT/d10/d2/f
-$CHECKSTAT -t file $MOUNT/d10/d2/f || error
+mkdir $DIR/d10
+mkdir $DIR/d10/d2
+touch $DIR/d10/d2/f
+$CHECKSTAT -t file $DIR/d10/d2/f || error
 pass
 $CLEAN
 $START
 
 echo '== mkdir .../d11 d11/d2; chmod .../d11/d2 ======= test 11'
-mkdir $MOUNT/d11
-mkdir $MOUNT/d11/d2
-chmod 0666 $MOUNT/d11/d2
-chmod 0555 $MOUNT/d11/d2
-$CHECKSTAT -t dir -p 0555 $MOUNT/d11/d2 || error
+mkdir $DIR/d11
+mkdir $DIR/d11/d2
+chmod 0666 $DIR/d11/d2
+chmod 0705 $DIR/d11/d2
+$CHECKSTAT -t dir -p 0705 $DIR/d11/d2 || error
 pass
 $CLEAN
 $START
 
 echo '== mkdir .../d12; touch .../d12/f; chmod .../d12/f == test 12'
-mkdir $MOUNT/d12
-touch $MOUNT/d12/f
-chmod 0666 $MOUNT/d12/f
-chmod 0555 $MOUNT/d12/f
-$CHECKSTAT -t file -p 0555 $MOUNT/d12/f || error
+mkdir $DIR/d12
+touch $DIR/d12/f
+chmod 0666 $DIR/d12/f
+chmod 0654 $DIR/d12/f
+$CHECKSTAT -t file -p 0654 $DIR/d12/f || error
 pass
 $CLEAN
 $START
 
-echo '== mkdir .../d13; cp /etc/passwd .../d13/f; > .../d13/f == test 13'
-mkdir $MOUNT/d13
-cp /etc/hosts $MOUNT/d13/f
->  $MOUNT/d13/f
-$CHECKSTAT -t file -s 0 $MOUNT/d13/f || error
+echo '== mkdir .../d13; creat .../d13/f;  .../d13/f; > .../d13/f == test 13'
+mkdir $DIR/d13
+dd if=/dev/zero of=$DIR/d13/f count=10
+>  $DIR/d13/f
+$CHECKSTAT -t file -s 0 $DIR/d13/f || error
 pass
 $CLEAN
 $START
 
-
 echo '================================================== test 14'
-mkdir $MOUNT/d14
-touch $MOUNT/d14/f
-rm $MOUNT/d14/f
-$CHECKSTAT -a $MOUNT/d14/f || error
+mkdir $DIR/d14
+touch $DIR/d14/f
+rm $DIR/d14/f
+$CHECKSTAT -a $DIR/d14/f || error
 pass
 $CLEAN
 $START
 
-
 echo '================================================== test 15'
-mkdir $MOUNT/d15
-touch $MOUNT/d15/f
-mv $MOUNT/d15/f $MOUNT/d15/f2
-$CHECKSTAT -t file $MOUNT/d15/f2 || error
+mkdir $DIR/d15
+touch $DIR/d15/f
+mv $DIR/d15/f $DIR/d15/f2
+$CHECKSTAT -t file $DIR/d15/f2 || error
 pass
 $CLEAN
 $START
 
 echo '================================================== test 16'
-mkdir $MOUNT/d16
-touch $MOUNT/d16/f
-rm -rf $MOUNT/d16/f
-$CHECKSTAT -a $MOUNT/d16/f || error
+mkdir $DIR/d16
+touch $DIR/d16/f
+rm -rf $DIR/d16/f
+$CHECKSTAT -a $DIR/d16/f || error
 pass
 $CLEAN
 $START
 
 echo '== symlinks: create, remove (dangling and real) == test 17'
-mkdir $MOUNT/d17
-touch $MOUNT/d17/f
-ln -s $MOUNT/d17/f $MOUNT/d17/l-exist
-ln -s no-such-file $MOUNT/d17/l-dangle
-ls -l $MOUNT/d17
-$CHECKSTAT -l $MOUNT/d17/f $MOUNT/d17/l-exist || error
-$CHECKSTAT -f -t f $MOUNT/d17/l-exist || error
-$CHECKSTAT -l no-such-file $MOUNT/d17/l-dangle || error
-$CHECKSTAT -fa $MOUNT/d17/l-dangle || error
-rm -f $MOUNT/l-dangle
-rm -f $MOUNT/l-exist
-$CHECKSTAT -a $MOUNT/l-dangle || error
-$CHECKSTAT -a $MOUNT/l-exist || error
+mkdir $DIR/d17
+touch $DIR/d17/f
+ln -s $DIR/d17/f $DIR/d17/l-exist
+ln -s no-such-file $DIR/d17/l-dangle
+ls -l $DIR/d17
+$CHECKSTAT -l $DIR/d17/f $DIR/d17/l-exist || error
+$CHECKSTAT -f -t f $DIR/d17/l-exist || error
+$CHECKSTAT -l no-such-file $DIR/d17/l-dangle || error
+$CHECKSTAT -fa $DIR/d17/l-dangle || error
+rm -f $DIR/l-dangle
+rm -f $DIR/l-exist
+$CHECKSTAT -a $DIR/l-dangle || error
+$CHECKSTAT -a $DIR/l-exist || error
 pass
 $CLEAN
 $START
 
 echo "== touch .../f ; ls ... ========================= test 18"
-touch $MOUNT/f
-ls $MOUNT || error
+touch $DIR/f
+ls $DIR || error
 pass
 $CLEAN
 $START
 
 echo "== touch .../f ; ls -l ... ====================== test 19"
-touch $MOUNT/f
-ls -l $MOUNT
-rm $MOUNT/f
-$CHECKSTAT -a $MOUNT/f || error
+touch $DIR/f
+ls -l $DIR
+rm $DIR/f
+$CHECKSTAT -a $DIR/f || error
 pass
 $CLEAN
 $START
 
 echo "== touch .../f ; ls -l ... ====================== test 20"
-touch $MOUNT/f
-rm $MOUNT/f
+touch $DIR/f
+rm $DIR/f
 echo "1 done"
-touch $MOUNT/f
-rm $MOUNT/f
+touch $DIR/f
+rm $DIR/f
 echo "2 done"
-touch $MOUNT/f
-rm $MOUNT/f
+touch $DIR/f
+rm $DIR/f
 echo "3 done"
-$CHECKSTAT -a $MOUNT/f || error
+$CHECKSTAT -a $DIR/f || error
 pass
 $CLEAN
 $START
 
 echo '== write to dangling link ======================== test 21'
-mkdir $MOUNT/d21
-[ -f $MOUNT/d21/dangle ] && rm -f $MOUNT/d21/dangle
-ln -s dangle $MOUNT/d21/link
-echo foo >> $MOUNT/d21/link
-cat $MOUNT/d21/dangle
-$CHECKSTAT -t link $MOUNT/d21/link || error
-$CHECKSTAT -f -t file $MOUNT/d21/link || error
+mkdir $DIR/d21
+[ -f $DIR/d21/dangle ] && rm -f $DIR/d21/dangle
+ln -s dangle $DIR/d21/link
+echo foo >> $DIR/d21/link
+cat $DIR/d21/dangle
+$CHECKSTAT -t link $DIR/d21/link || error
+$CHECKSTAT -f -t file $DIR/d21/link || error
 pass
 $CLEAN
 $START
 
 echo '== unpack tar archive as non-root user =========== test 22'
-mkdir $MOUNT/d22
-which sudo && chown 4711 $MOUNT/d22
+mkdir $DIR/d22
+which sudo && chown 4711 $DIR/d22
 SUDO=`which sudo 2> /dev/null` && SUDO="$SUDO -u #4711" || SUDO=""
-$SUDO tar cf - /etc/hosts /etc/sysconfig/network | $SUDO tar xfC - $MOUNT/d22
-ls -lR $MOUNT/d22/etc
-$CHECKSTAT -t dir $MOUNT/d22/etc || error
-[ -z "$SUDO" ] || $CHECKSTAT -u \#4711 $MOUNT/d22/etc || error
+echo '**** FIX THIS TEST ****'
+SUDO=""
+$SUDO tar cf - /etc/hosts /etc/sysconfig/network | $SUDO tar xfC - $DIR/d22
+ls -lR $DIR/d22/etc
+$CHECKSTAT -t dir $DIR/d22/etc || error
+[ -z "$SUDO" ] || $CHECKSTAT -u \#4711 $DIR/d22/etc || error
 pass
 $CLEAN
 $START
 
 echo '== O_CREAT|O_EXCL in subdir ====================== test 23'
-mkdir $MOUNT/d23
-./toexcl $MOUNT/d23/f23
-./toexcl -e $MOUNT/d23/f23 || error
+mkdir $DIR/d23
+$TOEXCL $DIR/d23/f23
+$TOEXCL -e $DIR/d23/f23 || error
 pass
 $CLEAN
 $START
@@ -297,189 +316,217 @@ $START
 echo '== rename sanity ================================= test24'
 echo '-- same directory rename'
 echo '-- test 24-R1: touch a ; rename a b'
-mkdir $MOUNT/R1
-touch $MOUNT/R1/f
-mv $MOUNT/R1/f $MOUNT/R1/g
-$CHECKSTAT -t file $MOUNT/R1/g || error
+mkdir $DIR/R1
+touch $DIR/R1/f
+mv $DIR/R1/f $DIR/R1/g
+$CHECKSTAT -t file $DIR/R1/g || error
 pass
 $CLEAN
 $START
 
 echo '-- test 24-R2: touch a b ; rename a b;'
-mkdir $MOUNT/R2
-touch $MOUNT/R2/{f,g}
-mv $MOUNT/R2/f $MOUNT/R2/g
-$CHECKSTAT -a $MOUNT/R2/f || error
-$CHECKSTAT -t file $MOUNT/R2/g || error
+mkdir $DIR/R2
+touch $DIR/R2/{f,g}
+mv $DIR/R2/f $DIR/R2/g
+$CHECKSTAT -a $DIR/R2/f || error
+$CHECKSTAT -t file $DIR/R2/g || error
 pass
 $CLEAN
 $START
 
 echo '-- test 24-R3: mkdir a  ; rename a b;'
-mkdir $MOUNT/R3
-mkdir $MOUNT/R3/f
-mv $MOUNT/R3/f $MOUNT/R3/g
-$CHECKSTAT -a $MOUNT/R3/f || error
-$CHECKSTAT -t dir $MOUNT/R3/g || error
+mkdir $DIR/R3
+mkdir $DIR/R3/f
+mv $DIR/R3/f $DIR/R3/g
+$CHECKSTAT -a $DIR/R3/f || error
+$CHECKSTAT -t dir $DIR/R3/g || error
 pass
 $CLEAN
 $START
 
 echo '-- test 24-R4: mkdir a b ; rename a b;'
-mkdir $MOUNT/R4
-mkdir $MOUNT/R4/{f,g}
-perl -e "rename \"$MOUNT/R4/f\", \"$MOUNT/R4/g\";"
-$CHECKSTAT -a $MOUNT/R4/f || error
-$CHECKSTAT -t dir $MOUNT/R4/g || error
+mkdir $DIR/R4
+mkdir $DIR/R4/{f,g}
+perl -e "rename \"$DIR/R4/f\", \"$DIR/R4/g\";"
+$CHECKSTAT -a $DIR/R4/f || error
+$CHECKSTAT -t dir $DIR/R4/g || error
 pass
 $CLEAN
 $START
 
 echo '-- cross directory renames --' 
 echo '-- test 24-R5: touch a ; rename a b'
-mkdir $MOUNT/R5{a,b}
-touch $MOUNT/R5a/f
-mv $MOUNT/R5a/f $MOUNT/R5b/g
-$CHECKSTAT -a $MOUNT/R5a/f || error
-$CHECKSTAT -t file $MOUNT/R5b/g || error
+mkdir $DIR/R5{a,b}
+touch $DIR/R5a/f
+mv $DIR/R5a/f $DIR/R5b/g
+$CHECKSTAT -a $DIR/R5a/f || error
+$CHECKSTAT -t file $DIR/R5b/g || error
 pass
 $CLEAN
 $START
 
 echo '-- test 24-R6: touch a ; rename a b'
-mkdir $MOUNT/R6{a,b}
-touch $MOUNT/R6a/f $MOUNT/R6b/g
-mv $MOUNT/R6a/f $MOUNT/R6b/g
-$CHECKSTAT -a $MOUNT/R6a/f || error
-$CHECKSTAT -t file $MOUNT/R6b/g || error
+mkdir $DIR/R6{a,b}
+touch $DIR/R6a/f $DIR/R6b/g
+mv $DIR/R6a/f $DIR/R6b/g
+$CHECKSTAT -a $DIR/R6a/f || error
+$CHECKSTAT -t file $DIR/R6b/g || error
 pass
 $CLEAN
 $START
 
 echo '-- test 24-R7: touch a ; rename a b'
-mkdir $MOUNT/R7{a,b}
-mkdir $MOUNT/R7a/f
-mv $MOUNT/R7a/f $MOUNT/R7b/g
-$CHECKSTAT -a $MOUNT/R7a/f || error
-$CHECKSTAT -t dir $MOUNT/R7b/g || error
+mkdir $DIR/R7{a,b}
+mkdir $DIR/R7a/f
+mv $DIR/R7a/f $DIR/R7b/g
+$CHECKSTAT -a $DIR/R7a/f || error
+$CHECKSTAT -t dir $DIR/R7b/g || error
 pass
 $CLEAN
 $START
 
 echo '-- test 24-R8: touch a ; rename a b'
-mkdir $MOUNT/R8{a,b}
-mkdir $MOUNT/R8a/f $MOUNT/R8b/g
-perl -e "rename \"$MOUNT/R8a/f\", \"$MOUNT/R8b/g\";"
-$CHECKSTAT -a $MOUNT/R8a/f || error
-$CHECKSTAT -t dir $MOUNT/R8b/g || error
+mkdir $DIR/R8{a,b}
+mkdir $DIR/R8a/f $DIR/R8b/g
+perl -e "rename \"$DIR/R8a/f\", \"$DIR/R8b/g\";"
+$CHECKSTAT -a $DIR/R8a/f || error
+$CHECKSTAT -t dir $DIR/R8b/g || error
 pass
 $CLEAN
 $START
 
 echo "-- rename error cases"
 echo "-- test 24-R9 target error: touch f ; mkdir a ; rename f a"
-mkdir $MOUNT/R9
-mkdir $MOUNT/R9/a
-touch $MOUNT/R9/f
-perl -e "rename \"$MOUNT/R9/f\", \"$MOUNT/R9/a\";"
-$CHECKSTAT -t file $MOUNT/R9/f || error
-$CHECKSTAT -t dir  $MOUNT/R9/a || error
-$CHECKSTAT -a file $MOUNT/R9/a/f || error
+mkdir $DIR/R9
+mkdir $DIR/R9/a
+touch $DIR/R9/f
+perl -e "rename \"$DIR/R9/f\", \"$DIR/R9/a\";"
+$CHECKSTAT -t file $DIR/R9/f || error
+$CHECKSTAT -t dir  $DIR/R9/a || error
+$CHECKSTAT -a file $DIR/R9/a/f || error
 pass
 $CLEAN
 $START
 
 echo "--test 24-R10 source does not exist" 
-mkdir $MOUNT/R10
-perl -e "rename \"$MOUNT/R10/f\", \"$MOUNT/R10/g\"" 
-$CHECKSTAT -t dir $MOUNT/R10 || error
-$CHECKSTAT -a $MOUNT/R10/f || error
-$CHECKSTAT -a $MOUNT/R10/g || error
+mkdir $DIR/R10
+perl -e "rename \"$DIR/R10/f\", \"$DIR/R10/g\"" 
+$CHECKSTAT -t dir $DIR/R10 || error
+$CHECKSTAT -a $DIR/R10/f || error
+$CHECKSTAT -a $DIR/R10/g || error
 pass
 $CLEAN
 $START
 
 echo '== symlink sanity ================================ test25'
 echo "--test 25.1 create file in symlinked directory"
-mkdir $MOUNT/d25
-ln -s d25 $MOUNT/s25
-touch $MOUNT/s25/foo
+mkdir $DIR/d25
+ln -s d25 $DIR/s25
+touch $DIR/s25/foo
 pass
 $CLEAN
 $START
 
 echo "--test 25.2 lookup file in symlinked directory"
-$CHECKSTAT -t file $MOUNT/s25/foo
+$CHECKSTAT -t file $DIR/s25/foo
 pass
 $CLEAN
 $START
 
 echo "--test 26 multiple component symlink"
-mkdir $MOUNT/d26
-mkdir $MOUNT/d26/d26-2
-ln -s d26/d26-2 $MOUNT/s26
-touch $MOUNT/s26/foo
+mkdir $DIR/d26
+mkdir $DIR/d26/d26-2
+ln -s d26/d26-2 $DIR/s26
+touch $DIR/s26/foo
 pass
 $CLEAN
 $START
 
 echo "--test 26.1 multiple component symlink at the end of a lookup"
-ln -s d26/d26-2/foo $MOUNT/s26-2
-touch $MOUNT/s26-2
+ln -s d26/d26-2/foo $DIR/s26-2
+touch $DIR/s26-2
 pass
 $CLEAN
 $START
 
 echo "--test 26.2 a chain of symlinks"
-mkdir $MOUNT/d26.2
-touch $MOUNT/d26.2/foo
-ln -s d26.2 $MOUNT/s26.2-1
-ln -s s26.2-1 $MOUNT/s26.2-2
-ln -s s26.2-2 $MOUNT/s26.2-3
-chmod 0666 $MOUNT/s26.2-3/foo
+mkdir $DIR/d26.2
+touch $DIR/d26.2/foo
+ln -s d26.2 $DIR/s26.2-1
+ln -s s26.2-1 $DIR/s26.2-2
+ln -s s26.2-2 $DIR/s26.2-3
+chmod 0666 $DIR/s26.2-3/foo
 pass
 $CLEAN
 $START
 
-echo '== stripe sanity ================================= test27'
-echo "--test 26.1 create one stripe"
-mkdir $MOUNT/d27
-../utils/lstripe $MOUNT/d27/f0 4096 0 1
-$CHECKSTAT -t file $MOUNT/d27/f0
-echo "--test 26.2 write to one stripe file"
-cp /etc/hosts $MOUNT/d27/f0
+# recursive symlinks (bug 439)
+echo "--test 26.3 create multiple component recursive symlink"
+ln -s d26-3/foo $DIR/d26-3
 pass
 $CLEAN
 $START
 
-echo "--test 26.3 create two stripes"
-../utils/lstripe $MOUNT/d27/f01 4096 0 2
-echo "--test 26.4 write to two stripe file"
-cp /etc/hosts $MOUNT/d27/f01
+echo "--test 26.3 unlink multiple component recursive symlink"
+rm $DIR/d26-3
 pass
 $CLEAN
 $START
 
-echo "--test 26.5 lstripe existing file (should return error)"
-../utils/lstripe $MOUNT/d27/f12 4096 1 2
-! ../utils/lstripe $MOUNT/d27/f12 4096 1 2
+echo '== stripe sanity ================================= test27'
+echo "--test 27.1 create one stripe"
+mkdir $DIR/d27
+$LSTRIPE $DIR/d27/f0 8192 0 1
+$CHECKSTAT -t file $DIR/d27/f0
+echo "--test 27.2 write to one stripe file"
+cp /etc/hosts $DIR/d27/f0
+pass
+
+echo "--test 27.3 create two stripe file f01"
+$LSTRIPE $DIR/d27/f01 8192 0 2
+echo "--test 27.4 write to two stripe file file f01"
+dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4
+pass
+
+echo "--test 27.5 create file with default settings"
+$LSTRIPE $DIR/d27/fdef 0 -1 0
+$CHECKSTAT -t file $DIR/d27/fdef
+#dd if=/dev/zero of=$DIR/d27/fdef bs=4k count=4
+
+echo "--test 27.6 lstripe existing file (should return error)"
+$LSTRIPE $DIR/d27/f12 8192 1 2
+! $LSTRIPE $DIR/d27/f12 8192 1 2
+$CHECKSTAT -t file $DIR/d27/f12
+#dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4
+pass
+
+
+echo "--test 27.7 lstripe with bad stripe size (should return error on LOV)"
+$LSTRIPE $DIR/d27/fbad 100 1 2 || /bin/true
+dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4
 pass
 $CLEAN
 $START
 
-echo "--test 26.6 lfind "
-../utils/lfind $MOUNT/d27
+echo "--test 27.8 lfind "
+$LFIND $DIR/d27
 pass
 $CLEAN
 $START
 
-echo '== IT_GETATTR regression  ======================== test28'
-mkdir $MOUNT/d28
-touch $MOUNT/d28/foo
-MDCDIR=${MDCDIR:-/proc/lustre/devices/ldlm/MDC_mds1}
+echo '== create/mknod/mkdir with bad file types ======== test28'
+mkdir $DIR/d28
+$CREATETEST $DIR/d28/ct || error
+pass
+
+echo '== IT_GETATTR regression  ======================== test29'
+mkdir $MOUNT/d29
+touch $MOUNT/d29/foo
+ls -l $MOUNT/d29
+MDCDIR=${MDCDIR:-/proc/fs/lustre/ldlm/ldlm/MDC_MNT_localhost_mds1}
 LOCKCOUNTORIG=`cat $MDCDIR/lock_count`
 LOCKUNUSEDCOUNTORIG=`cat $MDCDIR/lock_unused_count`
-ls -l $MOUNT/d28
+ls -l $MOUNT/d29
 LOCKCOUNTCURRENT=`cat $MDCDIR/lock_count`
 LOCKUNUSEDCOUNTCURRENT=`cat $MDCDIR/lock_unused_count`
 if [ $LOCKCOUNTCURRENT -gt $LOCKCOUNTORIG ] || [ $LOCKUNUSEDCOUNTCURRENT -gt $LOCKUNUSEDCOUNTORIG ]; then
@@ -490,7 +537,7 @@ $CLEAN
 $START
 
 echo '== cleanup ============================================='
-rm -r $MOUNT/[Rdfs][1-9]*
+rm -r $DIR/[Rdfs][1-9]*
 
 echo '======================= finished ======================='
 exit
index 9c50574..8e95654 100644 (file)
 #!/bin/bash
 
-export NAME=$NAME
+set -e
+
+PATH=$PATH:.
+
+CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
+MOUNT1=${MOUNT1:-/mnt/lustre1}
+MOUNT2=${MOUNT2:-/mnt/lustre2}
+export NAME=${NAME:-mount2}
+
 clean() {
-        echo -n "cleanup..."
-        sh llmount2-hackcleanup.sh > /dev/null
+        echo -n "cln.."
+        sh llmountcleanup.sh > /dev/null
 }
 
-CLEAN=clean
+CLEAN=${CLEAN:-clean}
 start() {
-        echo -n "mounting..."
-        sh llmount2-hack.sh > /dev/null
-        echo -n "mounted"
+        echo -n "mnt.."
+        sh llrmount.sh > /dev/null
+        echo -n "done"
 }
-START=start
+START=${START:-start}
 
 error () { 
-    echo $1
+    echo FAIL
     exit 1
 }
 
-mkdir -p /mnt/lustre2
-mount | grep /mnt/lustre2 || $START
+pass() { 
+    echo PASS
+}
+
+mkdir -p $MOUNT2
+mount | grep $MOUNT1 || sh llmount.sh
 
 echo -n "test 1: check create on 2 mtpt's..."
-touch /mnt/lustre1/f1
-[ -f /mnt/lustre2/f1 ] || error "test 1 failure" 
-echo "pass"
+touch $MOUNT1/f1
+[ -f $MOUNT2/f1 ] || error
+pass
+
+echo "test 2: check attribute updates on 2 mtpt's..."
+chmod 777 $MOUNT2/f1
+$CHECKSTAT -t file -p 0777 $MOUNT1/f1 || error
+pass
 
-echo -n "test 2: check attribute updates on 2 mtpt's..."
-chmod a+x /mnt/lustre2/f1
-[ -x /mnt/lustre1/f1 ] || error "test 2 failure"
-echo "pass"
+echo "test 2b: check cached attribute updates on 2 mtpt's..."
+touch $MOUNT1/f2b
+ls -l $MOUNT2/f2b
+chmod 777 $MOUNT2/f2b
+$CHECKSTAT -t file -p 0777 $MOUNT1/f2b || error
+pass
 
-echo -n "test 3: check after remount attribute updates on 2 mtpt's..."
-chmod a-x /mnt/lustre2/f1
+echo "test 2c: check cached attribute updates on 2 mtpt's..."
+touch $MOUNT1/f2c
+ls -l $MOUNT2/f2c
+chmod 777 $MOUNT1/f2c
+$CHECKSTAT -t file -p 0777 $MOUNT2/f2c || error
+pass
+
+echo "test 3: check after remount attribute updates on 2 mtpt's..."
+chmod a-x $MOUNT2/f1
 $CLEAN
 $START
+$CHECKSTAT -t file -p 0666 $MOUNT1/f1 || error
+pass
+
+echo "test 4: unlink on one mountpoint removes file on other..."
+rm $MOUNT2/f1
+$CHECKSTAT -a $MOUNT1/f1 || error
+pass
 
-[ ! -x /mnt/lustre1/f1 ] || error "test 3 failure"
-echo "pass"
+echo -n "test 5: symlink on one mtpt, readlink on another..."
+( cd $MOUNT1 ; ln -s this/is/good lnk )
 
-echo -n "test 4: symlink on one mtpt, readlink on another..."
-( cd /mnt/lustre1 ; ln -s this/is/good lnk )
+[ "this/is/good" = "`perl -e 'print readlink("/mnt/lustre2/lnk");'`" ] || error
+pass
 
-[ "Xthis/is/good" = X`perl -e 'print readlink("/mnt/lustre2/lnk");'` ] || error  "test 4 fails"
-echo "pass"
+echo -n "test 6: fstat validation on multiple mount points..."
+./multifstat $MOUNT1/f6 $MOUNT2/f6
+pass
 
-echo -n "test 5: fstat validation on multiple mount points..."
-./multifstat /mnt/lustre1/fstatfile /mnt/lustre2/fstatfile || error "test 5 fails"
-echo "pass"
+echo "test 9: remove of open file on other node..."
+./openunlink $MOUNT1/f9 $MOUNT2/f9 || error
+pass
 
-echo -n "test 9: remove of open file on other node..."
-touch /mnt/lustre1/f9
-tail -f /mnt/lustre1/f9 &
-rm /mnt/lustre2/f9
-kill %1
-cat /mnt/lustre1/f9 && error "test 9 fails"
-echo "pass"
+echo -n "test 10: append of file with sub-page size on multiple mounts..."
+MTPT=1
+> $MOUNT2/f10
+for C in a b c d e f g h i j k l; do
+       MOUNT=`eval echo \\$MOUNT$MTPT`
+       echo -n $C >> $MOUNT/f10
+       [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
+done
+[ "`cat $MOUNT1/f10`" = "abcdefghijkl" ] && pass || error
+       
+echo -n "test 11: write of file with sub-page size on multiple mounts..."
+MTPT=1
+OFFSET=0
+> $MOUNT2/f11
+for C in a b c d e f g h i j k l; do
+       MOUNT=`eval echo \\$MOUNT$MTPT`
+       echo -n $C | dd of=$MOUNT/f11 bs=1 seek=$OFFSET count=1
+       [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
+       OFFSET=`expr $OFFSET + 1`
+done
+[ "`cat $MOUNT1/f11`" = "abcdefghijkl" ] && pass || error
+       
+rm -f $MOUNT1/f[0-9]* $MOUNT1/lnk
 
 $CLEAN
 
index 112a796..7a4b95e 100644 (file)
@@ -4,12 +4,14 @@ config=${1-uml.xml}
 LMC=${LMC-../utils/lmc}
 TMP=${TMP:-/tmp}
 
-MDSDEV=$TMP/mds1
-MDSSIZE=50000
+MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSSIZE=${MDSSIZE:-50000}
 
-OSTDEV1=$TMP/ost1
-OSTDEV2=$TMP/ost2
-OSTSIZE=100000
+OSTDEV1=${OSTDEV1:-$TMP/ost1}
+OSTDEV2=${OSTDEV2:-$TMP/ost2}
+OSTSIZE=${OSTSIZE:-100000}
+
+NETTYPE=${NETTYPE:-tcp}
 
 # NOTE - You can't have different MDS/OST nodes and also have clients on the
 #        MDS/OST nodes without using --endlevel and --startlevel during lconf.
@@ -20,9 +22,9 @@ OSTSIZE=100000
 #        of the clients can be started, so plan accordingly.
 
 # Three separate systems
-MDSNODE=uml1
-OSTNODES="uml2 uml2"
-CLIENTS="uml3"
+MDSNODE=${MDSNODE:-uml1}
+OSTNODES=${OSTNODES:-"uml2 uml2"}
+CLIENTS=${CLIENTS:-"uml3"}
 
 # Single system with additional clients
 #MDSNODE=uml1
@@ -41,26 +43,47 @@ CLIENTS="uml3"
 
 rm -f $config
 
+h2tcp () {
+       case $1 in
+       client) echo '\*' ;;
+       *) echo $1 ;;
+       esac
+}
+
+h2elan () {
+       case $1 in
+       client) echo '\*' ;;
+       *) echo $1 | sed "s/[^0-9]*//" ;;
+       esac
+}
+
 # create nodes
-for NODE in $MDSNODE $OSTNODES $CLIENTS; do
-       eval [ \$$NODE ] && continue
-       ${LMC} -m $config --add net --node $NODE --nid $NODE --nettype tcp || exit 1
-       eval "$NODE=done"
+echo -n "adding NET for:"
+for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | sort -u`; do
+       echo -n " $NODE"
+       ${LMC} -m $config --add net --node $NODE --nid `h2$NETTYPE $NODE` --nettype elan || exit 1
 done
 
 # configure mds server
+echo; echo "adding MDS on: $MDSNODE"
 ${LMC} -m $config --add mds --format --node $MDSNODE --mds mds1 --dev $MDSDEV --size $MDSSIZE ||exit 10
 
 # configure ost
-${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
+${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 1 --stripe_pattern 0 || exit 20
 COUNT=1
+echo -n "adding OST on:"
 for NODE in $OSTNODES; do
        eval OSTDEV=\$OSTDEV$COUNT
+       echo -n " $NODE"
+       OSTDEV=${OSTDEV:-$OSTDEV1}
         ${LMC} -m $config --add ost --node $NODE --lov lov1 --dev $OSTDEV --size $OSTSIZE || exit 21
        COUNT=`expr $COUNT + 1`
 done
 
 # create client config(s)
+echo; echo -n "adding CLIENT on:"
 for NODE in $CLIENTS; do
+       echo -n " $NODE"
        ${LMC} -m $config --add mtpt --node $NODE --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
 done
+echo
index de7b425..fc0f010 100644 (file)
@@ -12,3 +12,6 @@ lctl
 lfind
 lstripe
 lconf
+obdstat
+obdio
+obdbarrier
index 6a5483d..bfeebd7 100644 (file)
@@ -6,10 +6,12 @@ KFLAGS:=
 CPPFLAGS = $(HAVE_LIBREADLINE)
 obdctl_LDADD := $(LIBREADLINE)
 lctl_LDADD := $(LIBREADLINE) -lptlctl
-sbin_PROGRAMS = lctl lfind lstripe obdctl
-sbin_SCRIPTS = lconf lmc
+sbin_PROGRAMS = lctl lfind lstripe obdctl obdio obdbarrier obdstat
+sbin_SCRIPTS = lconf lmc llanalyze
 obdctl_SOURCES = parser.c obdctl.c obd.c parser.h obdctl.h
 lctl_SOURCES = parser.c obd.c lctl.c parser.h
+obdio_SOURCES = obdio.c obdiolib.c obdiolib.h
+obdbarrier_SOURCES = obdbarrier.c obdiolib.c obdiolib.h
 lfind_SOURCES = lfind.c
 lstripe_SOURCES = lstripe.c
 lfind_CPPFLAGS = -D_XOPEN_SOURCE=500
index 46549cc..796871d 100755 (executable)
 # Based in part on the XML obdctl modifications done by Brian Behlendorf 
 
 import sys, getopt, types
-import string, os, stat, popen2, socket, time, random, fcntl, FCNTL, select
+import string, os, stat, popen2, socket, time, random, fcntl, select
 import re, exceptions
 import xml.dom.minidom
 
+if sys.version[0] == '1':
+    from FCNTL import F_GETFL, F_SETFL
+else:
+    from fcntl import F_GETFL, F_SETFL
+
 # Global parameters
 TCP_ACCEPTOR = ''
 MAXTCPBUF = 1048576
@@ -72,7 +77,7 @@ config.xml          Lustre configuration in xml format.
                     Levels are aproximatly like:
                             10 - network
                             20 - device, ldlm
-                            30 - obd, mdd
+                            30 - osd, mdd
                             40 - mds, ost
                             50 - mdc, osc
                             60 - lov
@@ -294,8 +299,8 @@ class LCTLInterface:
                 raise CommandError('lctl', "unable to find lctl binary.")
 
     def set_nonblock(self, fd):
-        fl = fcntl.fcntl(fd, FCNTL.F_GETFL)
-        fcntl.fcntl(fd, FCNTL.F_SETFL, fl | os.O_NDELAY)
+        fl = fcntl.fcntl(fd, F_GETFL)
+        fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
 
     def run(self, cmds):
         """
@@ -464,8 +469,8 @@ class LCTLInterface:
         cmds = """
   ignore_errors
   device $%s
-  cleanup
-  detach %s
+  cleanup %s
+  detach
   quit""" % (name, ('', 'force')[config.force()])
         self.run(cmds)
 
@@ -575,23 +580,20 @@ def is_block(path):
 
 # build fs according to type
 # fixme: dangerous
-def mkfs(fstype, dev):
+def mkfs(dev, devsize, fstype):
+    block_cnt = ''
+    if devsize:
+        # devsize is in 1k, and fs block count is in 4k
+        block_cnt = devsize/4
+
     if(fstype in ('ext3', 'extN')):
-        mkfs = 'mkfs.ext2 -j -b 4096'
+        mkfs = 'mkfs.ext2 -j -b 4096 -F '
     elif (fstype == 'reiserfs'):
-        mkfs = 'mkfs.reiserfs -f'
+        mkfs = 'mkreiserfs -ff'
     else:
         print 'unsupported fs type: ', fstype
-    if not is_block(dev):
-        if(fstype in ('ext3', 'extN')):
-            force = '-F'
-        elif (fstype == 'reiserfs'):
-            force = ''
-        else:
-            print 'unsupported fs type: ', fstype
-    else:
-        force = ''
-    (ret, out) = run (mkfs, force, dev)
+
+    (ret, out) = run (mkfs, dev, block_cnt)
     if ret:
         panic("Unable to build fs:", dev)
     # enable hash tree indexing on fsswe
@@ -676,7 +678,7 @@ def block_dev(dev, size, fstype, format):
     if not is_block(dev):
         dev = init_loop(dev, size, fstype)
     if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
-        mkfs(fstype, dev)
+        mkfs(dev, size, fstype)
 
 #    else:
 #        panic("device:", dev,
@@ -869,14 +871,13 @@ class Network(Module):
             if not self.nid:
                 panic("unable to set nid for", self.net_type, self.nid)
             debug("nid:", self.nid)
-
         self.add_portals_module("linux/oslib", 'portals')
         if node_needs_router():
             self.add_portals_module("linux/router", 'kptlrouter')
         if self.net_type == 'tcp':
             self.add_portals_module("linux/socknal", 'ksocknal')
         if self.net_type == 'toe':
-            self.add_portals_odule("/linux/toenal", 'ktoenal')
+            self.add_portals_module("/linux/toenal", 'ktoenal')
         if self.net_type == 'elan':
             self.add_portals_module("/linux/rqswnal", 'kqswnal')
         if self.net_type == 'gm':
@@ -897,7 +898,7 @@ class Network(Module):
             lctl.add_route(net_type, gw, lo, hi)
             if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '':
                 srvdb = self.db.nid2server(lo)
-                if not srv:
+                if not srvdb:
                     panic("no server for nid", lo)
                 else:
                     srv = Network(srvdb)
@@ -905,14 +906,15 @@ class Network(Module):
 
             
         lctl.network(self.net_type, self.nid)
-        lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID")
+        if not is_prepared("RPCDEV_UUID"):
+            lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID")
 
     def cleanup(self):
         self.info(self.net_type, self.nid, self.port)
         for net_type, gw, lo, hi in self.db.get_route_tbl():
             if self.net_type in ('tcp', 'toe') and hi == '':
                 srvdb = self.db.nid2server(lo)
-                if not srv:
+                if not srvdb:
                     panic("no server for nid", lo)
                 else:
                     srv = Network(srvdb)
@@ -930,9 +932,10 @@ class Network(Module):
                 cleanup_error(e.rc)
               
         try:
-            lctl.cleanup("RPCDEV", "RPCDEV_UUID")
+            if is_prepared("RPCDEV_UUID"):
+                lctl.cleanup("RPCDEV", "RPCDEV_UUID")
         except CommandError, e:
-            print "cleanup failed: ", self.name
+            print "cleanup failed: RPCDEV"
             e.dump()
             cleanup_error(e.rc)
         try:
@@ -959,6 +962,8 @@ class LDLM(Module):
 class LOV(Module):
     def __init__(self,db):
         Module.__init__(self, 'LOV', db)
+        self.add_lustre_module('mdc', 'mdc')
+        self.add_lustre_module('lov', 'lov')
         self.mds_uuid = self.db.get_first_ref('mds')
         mds= self.db.lookup(self.mds_uuid)
         self.mds_name = mds.getName()
@@ -967,66 +972,49 @@ class LOV(Module):
         self.pattern = self.db.get_val_int('stripepattern', 0)
         self.devlist = self.db.get_refs('obd')
         self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
-        self.add_lustre_module('mdc', 'mdc')
-        self.add_lustre_module('lov', 'lov')
-
-    def prepare(self):
-        if is_prepared(self.uuid):
-            return
+        self.osclist = []
         for obd_uuid in self.devlist:
             obd = self.db.lookup(obd_uuid)
-            osc = get_osc(obd)
+            osc = get_osc(obd, self.name)
             if osc:
-                try:
-                    # Ignore connection failures, because the LOV will DTRT with
-                    # an unconnected OSC.
-                    osc.prepare(ignore_connect_failure=1)
-                except CommandError:
-                    print "Error preparing OSC %s (inactive)\n" % osc_uuid
+                self.osclist.append(osc)
             else:
-                panic('osc not found:', osc_uuid)
-        mdc_uuid = prepare_mdc(self.db, self.mds_uuid)
+                panic('osc not found:', obd_uuid)
+            
+    def prepare(self):
+        if is_prepared(self.uuid):
+            return
+        for osc in self.osclist:
+            try:
+                # Ignore connection failures, because the LOV will DTRT with
+                # an unconnected OSC.
+                osc.prepare(ignore_connect_failure=1)
+            except CommandError:
+                print "Error preparing OSC %s (inactive)\n" % osc.uuid
+        self.mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
         self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
                   self.stripe_off, self.pattern, self.devlist, self.mds_name)
         lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
-                    setup ="%s" % (mdc_uuid))
+                    setup ="%s" % (self.mdc_uuid))
 
     def cleanup(self):
-        if not is_prepared(self.uuid):
-            return
-        for obd_uuid in self.devlist:
-            obd = self.db.lookup(obd_uuid)
-            osc = get_osc(obd)
-            if osc:
-                osc.cleanup()
-            else:
-                panic('osc not found:', osc_uuid)
-        Module.cleanup(self)
-        cleanup_mdc(self.db, self.mds_uuid)
-
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
+        for osc in self.osclist:
+            osc.cleanup()
+        cleanup_mdc(self.db, self.name, self.mds_uuid)
 
     def load_module(self):
-        for obd_uuid in self.devlist:
-            obd = self.db.lookup(obd_uuid)
-            osc = get_osc(obd)
-            if osc:
-                osc.load_module()
-                break
-            else:
-                panic('osc not found:', osc_uuid)
+        for osc in self.osclist:
+            osc.load_module()
+            break
         Module.load_module(self)
 
-
     def cleanup_module(self):
         Module.cleanup_module(self)
-        for obd_uuid in self.devlist:
-            obd = self.db.lookup(obd_uuid)
-            osc = get_osc(obd)
-            if osc:
-                osc.cleanup_module()
-                break
-            else:
-                panic('osc not found:', osc_uuid)
+        for osc in self.osclist:
+            osc.cleanup_module()
+            break
 
 class LOVConfig(Module):
     def __init__(self,db):
@@ -1055,7 +1043,7 @@ class MDSDEV(Module):
         self.size = self.db.get_val_int('devsize', 0)
         self.fstype = self.db.get_val('fstype', '')
         # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
-        self.uuid = self.db.get_first_ref('mds')
+        self.uuid = self.db.get_first_ref('target')
         mds = self.db.lookup(self.uuid)
         self.name = mds.getName()
         self.lovconfig_uuids = mds.get_refs('lovconfig')
@@ -1090,60 +1078,26 @@ class MDSDEV(Module):
                 print "cleanup failed: ", self.name
                 e.dump()
                 cleanup_error(e.rc)
-        if not is_prepared(self.uuid):
-            return
-        Module.cleanup(self)
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
         clean_loop(self.devname)
 
-# Very unusual case, as there is no MDC element in the XML anymore
-# Builds itself from an MDS node
-class MDC(Module):
-    def __init__(self,db):
-        self.mds_uuid = db.getUUID()
-        self.mds_name = db.getName()
-        self.db = db
-        node_name =  config.select(self.mds_name)
-        if node_name:
-            self.mdd_uuid = self.db.get_mdd(node_name, self.mds_uuid)
-        else:
-            self.mdd_uuid = db.get_first_ref('active')
-        if not self.mdd_uuid:
-            panic("No MDSDEV found for MDS service:", self.mds_name)
-        self.module_name = 'MDC'
-        self.kmodule_list = []
-        self._server = None
-        self._connected = 0
-
-        host = socket.gethostname()
-        self.name = 'MDC_%s' % (self.mds_name)
-        self.uuid = '%s_%05x_%05x' % (self.name, int(random.random() * 1048576),
-                                      int(random.random() * 1048576))
-
-        self.lookup_server(self.mdd_uuid)
-        self.add_lustre_module('mdc', 'mdc')
-
-    def prepare(self):
-        if is_prepared(self.uuid):
-            return
-        self.info(self.mds_uuid)
-        srv = self.get_server()
-        lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-        lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
-                        setup ="%s %s" %(self.mds_uuid, srv.uuid))
-            
-class OBD(Module):
+class OSD(Module):
     def __init__(self, db):
-        Module.__init__(self, 'OBD', db)
-        self.obdtype = self.db.get_val('obdtype')
+        Module.__init__(self, 'OSD', db)
+        self.osdtype = self.db.get_val('osdtype')
         self.devname = self.db.get_val('devpath', '')
         self.size = self.db.get_val_int('devsize', 0)
         self.fstype = self.db.get_val('fstype', '')
-        self.active_target = self.db.get_first_ref('active')
+        self.uuid = self.db.get_first_ref('target')
+        ost = self.db.lookup(self.uuid)
+        self.name = ost.getName()
         # FIXME: if fstype not set, then determine based on kernel version
         self.format = self.db.get_val('autoformat', 'yes')
         if self.fstype == 'extN':
             self.add_lustre_module('extN', 'extN') 
-        self.add_lustre_module(self.obdtype, self.obdtype)
+        self.add_lustre_module('ost', 'ost')
+        self.add_lustre_module(self.osdtype, self.osdtype)
         if self.fstype:
             self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype))
 
@@ -1153,96 +1107,67 @@ class OBD(Module):
     def prepare(self):
         if is_prepared(self.uuid):
             return
-        self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
-        if self.obdtype == 'obdecho':
+        self.info(self.osdtype, self.devname, self.size, self.fstype, self.format)
+        if self.osdtype == 'obdecho':
             blkdev = ''
         else:
             blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
-        lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
+        lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid),
                     setup ="%s %s" %(blkdev, self.fstype))
-    def cleanup(self):
-        if not is_prepared(self.uuid):
-            return
-        Module.cleanup(self)
-        if not self.obdtype == 'obdecho':
-            clean_loop(self.devname)
-
-class COBD(Module):
-    def __init__(self, db):
-        Module.__init__(self, 'COBD', db)
-        self.real_uuid = self.db.get_first_ref('realobd')
-        self.cache_uuid = self.db.get_first_ref('cacheobd')
-        self.add_lustre_module('cobd' , 'cobd')
-
-    # need to check /proc/mounts and /etc/mtab before
-    # formatting anything.
-    # FIXME: check if device is already formatted.
-    def prepare(self):
-        if is_prepared(self.uuid):
-            return
-        self.info(self.real_uuid, self.cache_uuid)
-        lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid),
-                    setup ="%s %s" %(self.real_uuid, self.cache_uuid))
-
-class OST(Module):
-    def __init__(self,db):
-        Module.__init__(self, 'OST', db)
-        self.obd_uuid = self.db.get_first_ref('obd')
-        self.add_lustre_module('ost', 'ost')
-
-    def prepare(self):
-        if is_prepared(self.uuid):
-            return
-        self.info(self.obd_uuid)
-        lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
-                    setup ="%s" % (self.obd_uuid))
-
+        if not is_prepared('OSS_UUID'):
+            lctl.newdev(attach="ost %s %s" % ('OSS', 'OSS_UUID'),
+                        setup ="")
 
-# virtual interface for  OSC and LOV
-class VOSC(Module):
-    def __init__(self,db):
-        Module.__init__(self, 'VOSC', db)
-        if db.get_class() == 'lov':
-            self.osc = LOV(db)
-        else:
-            self.osc = get_osc(db)
-    def get_uuid(self):
-        return self.osc.uuid
-    def prepare(self):
-        self.osc.prepare()
     def cleanup(self):
-        self.osc.cleanup()
-    def load_module(self):
-        self.osc.load_module()
-    def cleanup_module(self):
-        self.osc.cleanup_module()
-        
+        if is_prepared('OSS_UUID'):
+            try:
+                lctl.cleanup("OSS", "OSS_UUID")
+            except CommandError, e:
+                print "cleanup failed: ", self.name
+                e.dump()
+                cleanup_error(e.rc)
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
+        if not self.osdtype == 'obdecho':
+            clean_loop(self.devname)
 
-class OSC(Module):
-    def __init__(self, db, obd_name, obd_uuid, ost_uuid):
+# Generic client module, used by OSC and MDC
+class Client(Module):
+    def __init__(self, db, module, owner, target_name, target_uuid):
+        self.target_name = target_name
+        self.target_uuid = target_uuid
         self.db = db
-        self.module_name = 'OSC'
-        self.name = 'OSC_%s' % (obd_name)
-        self.uuid = '%s_%05x' % (self.name, int(random.random() * 1048576))
+        node_name =  config.select(target_name)
+        if node_name:
+            self.tgt_dev_uuid = self.db.get_target_device(node_name, target_uuid)
+        else:
+            self.tgt_dev_uuid = db.get_first_ref('active')
+        if not self.tgt_dev_uuid:
+            panic("No target device found for target:", target_name)
         self.kmodule_list = []
         self._server = None
         self._connected = 0
 
-        self.obd_uuid = obd_uuid
-        self.ost_uuid = ost_uuid
-        debug("OSC:", obd_uuid, ost_uuid)
-        self.lookup_server(self.ost_uuid)
-        self.add_lustre_module('osc', 'osc')
+        self.module = module
+        self.module_name = string.upper(module)
+        self.name = '%s_%s_%s' % (self.module_name, owner, target_name)
+        self.uuid = '%05x_%s_%05x' % (int(random.random() * 1048576), self.name,
+                                      int(random.random() * 1048576))
+        self.uuid = self.uuid[0:36]
+        self.lookup_server(self.tgt_dev_uuid)
+        self.add_lustre_module(module, module)
 
     def prepare(self, ignore_connect_failure = 0):
         if is_prepared(self.uuid):
             return
-        self.info(self.obd_uuid, self.ost_uuid)
+        self.info(self.target_uuid)
         srv = self.get_server()
         try:
             if local_net(srv):
+                #debug("LOCAL NET")
                 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
             else:
+                #debug("NOT LOCAL NET")
                 r =  find_route(srv)
                 if r:
                     lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
@@ -1251,16 +1176,15 @@ class OSC(Module):
         except CommandError:
             if (ignore_connect_failure == 0):
                 pass
-            
-        lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
-                    setup ="%s %s" %(self.obd_uuid, srv.uuid))
+        lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
+                        setup ="%s %s" %(self.target_uuid, srv.uuid))
 
     def cleanup(self):
         srv = self.get_server()
         if local_net(srv):
             Module.cleanup(self)
         else:
-            self.info(self.obd_uuid, self.ost_uuid)
+            self.info(self.targt_uuid)
             r =  find_route(srv)
             if r:
                 try:
@@ -1270,7 +1194,61 @@ class OSC(Module):
                     e.dump()
                     cleanup_error(e.rc)
             Module.cleanup(self)
+
+
+
+class MDC(Client):
+    def __init__(self, db, owner, target_name, target_uuid):
+         Client.__init__(self, db, 'mdc', owner, target_name, target_uuid)
+
+class OSC(Client):
+    def __init__(self, db, owner, target_name, target_uuid):
+         Client.__init__(self, db, 'osc', owner, target_name, target_uuid)
+
             
+class COBD(Module):
+    def __init__(self, db):
+        Module.__init__(self, 'COBD', db)
+        self.real_uuid = self.db.get_first_ref('realobd')
+        self.cache_uuid = self.db.get_first_ref('cacheobd')
+        self.add_lustre_module('cobd' , 'cobd')
+
+    # need to check /proc/mounts and /etc/mtab before
+    # formatting anything.
+    # FIXME: check if device is already formatted.
+    def prepare(self):
+        if is_prepared(self.uuid):
+            return
+        self.info(self.real_uuid, self.cache_uuid)
+        lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid),
+                    setup ="%s %s" %(self.real_uuid, self.cache_uuid))
+
+
+# virtual interface for  OSC and LOV
+class VOSC(Module):
+    def __init__(self,db, owner):
+        Module.__init__(self, 'VOSC', db)
+        if db.get_class() == 'lov':
+            self.osc = LOV(db)
+        else:
+            self.osc = get_osc(db, owner)
+    def get_uuid(self):
+        return self.osc.uuid
+    def prepare(self):
+        self.osc.prepare()
+    def cleanup(self):
+        self.osc.cleanup()
+    def load_module(self):
+        self.osc.load_module()
+    def cleanup_module(self):
+        self.osc.cleanup_module()
+    def need_mdc(self):
+        return self.db.get_class() != 'lov'
+    def get_mdc_uuid(self):
+        if self.db.get_class() == 'lov':
+            return self.osc.mdc_uuid
+        return ''
+
 
 class ECHO_CLIENT(Module):
     def __init__(self,db):
@@ -1278,7 +1256,7 @@ class ECHO_CLIENT(Module):
         self.add_lustre_module('obdecho', 'obdecho')
         self.obd_uuid = self.db.get_first_ref('obd')
         obd = self.db.lookup(self.obd_uuid)
-        self.osc = VOSC(obd)
+        self.osc = VOSC(obd, self.name)
 
     def prepare(self):
         if is_prepared(self.uuid):
@@ -1290,8 +1268,8 @@ class ECHO_CLIENT(Module):
                     setup = self.osc.get_uuid())
 
     def cleanup(self):
-        if not is_prepared(self.uuid):
-            return
+        if is_prepared(self.uuid):
+            Module.cleanup(self)
         self.osc.cleanup()
 
     def load_module(self):
@@ -1308,18 +1286,22 @@ class Mountpoint(Module):
         self.path = self.db.get_val('path')
         self.mds_uuid = self.db.get_first_ref('mds')
         self.obd_uuid = self.db.get_first_ref('obd')
-        self.add_lustre_module('mdc', 'mdc')
-        self.add_lustre_module('llite', 'llite')
         obd = self.db.lookup(self.obd_uuid)
-        self.osc = VOSC(obd)
+        self.vosc = VOSC(obd, self.name)
+        if self.vosc.need_mdc():
+            self.add_lustre_module('mdc', 'mdc')
+        self.add_lustre_module('llite', 'llite')
 
 
     def prepare(self):
-        self.osc.prepare()
-        mdc_uuid = prepare_mdc(self.db, self.mds_uuid)
+        self.vosc.prepare()
+        if self.vosc.need_mdc():
+            mdc_uuid = prepare_mdc(self.db, self.name,  self.mds_uuid)
+        else:
+            mdc_uuid = self.vosc.get_mdc_uuid()
         self.info(self.path, self.mds_uuid, self.obd_uuid)
         cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
-              (self.osc.get_uuid(), mdc_uuid, self.path)
+              (self.vosc.get_uuid(), mdc_uuid, self.path)
         run("mkdir", self.path)
         ret, val = run(cmd)
         if ret:
@@ -1338,27 +1320,21 @@ class Mountpoint(Module):
         if fs_is_mounted(self.path):
             panic("fs is still mounted:", self.path)
 
-        self.osc.cleanup()
-        cleanup_mdc(self.db, self.mds_uuid)
+        self.vosc.cleanup()
+        if self.vosc.need_mdc():
+            cleanup_mdc(self.db, self.name, self.mds_uuid)
 
     def load_module(self):
-        self.osc.load_module()
+        self.vosc.load_module()
         Module.load_module(self)
     def cleanup_module(self):
         Module.cleanup_module(self)
-        self.osc.cleanup_module()
+        self.vosc.cleanup_module()
 
 
 # ============================================================
 # XML processing and query
 
-# OSC is no longer in the xml, so we have to fake it.
-# this is getting ugly and begging for another refactoring
-def get_osc(obd_dom):
-    obd = OBD(obd_dom)
-    osc = OSC(obd_dom, obd.name, obd.uuid, obd.active_target)
-    return osc
-
 class LustreDB:
     def lookup(self, uuid):
         """ lookup returns a new LustreDB instance"""
@@ -1419,10 +1395,10 @@ class LustreDB:
         return ost.lookup(uuid)
 
     def nid2server(self, nid):
-        netlist = self.parent.parent.attrs['network']
+        netlist = self.lookup_class('network')
         for net_db in netlist:
             if net_db.get_val('nid') == nid: 
-                return net
+                return net_db
         return None
     
     # the tag name is the service type
@@ -1438,7 +1414,7 @@ class LustreDB:
             ret = 10
         elif type in ('device', 'ldlm'):
             ret = 20
-        elif type in ('obd', 'mdd', 'cobd'):
+        elif type in ('osd', 'mdd', 'cobd'):
             ret = 30
         elif type in ('mdsdev','ost'):
             ret = 40
@@ -1470,23 +1446,33 @@ class LustreDB:
         list.sort()
         return list
 
-    # Find the mdsdev attached to node_name that points to
-    # mds_uuid
-    # node->profiles->mdsdev_refs->mds
-    def get_mdd(self, node_name, mds_uuid):
+    # Find the target_device for target on a node
+    # node->profiles->device_refs->target
+    def get_target_device(self, node_name, target_uuid):
         node_db = self.lookup_name(node_name)
         if not node_db:
             return None
         prof_list = node_db.get_refs('profile')
         for prof_uuid in prof_list:
             prof_db = node_db.lookup(prof_uuid)
-            mdd_list = prof_db.get_refs('mdsdev')
-            for mdd_uuid in mdd_list:
-                mdd = self.lookup(mdd_uuid)
-                if mdd.get_first_ref('mds') == mds_uuid:
-                    return mdd_uuid
+            ref_list = prof_db.get_all_refs()
+            for ref in ref_list:
+                dev = self.lookup(ref[1])
+                if dev and dev.get_first_ref('target') == target_uuid:
+                    return ref[1]
         return None
-        
+
+    # get all network uuids for this node
+    def get_networks(self):
+        ret = []
+        prof_list = self.get_refs('profile')
+        for prof_uuid in prof_list:
+            prof_db = self.lookup(prof_uuid)
+            net_list = prof_db.get_refs('network')
+            debug("get_networks():", prof_uuid, net_list)
+            for net_uuid in net_list:
+                ret.append(net_uuid)
+        return ret
 
 class LustreDB_XML(LustreDB):
     def __init__(self, dom, root_node):
@@ -1599,24 +1585,24 @@ class LustreDB_XML(LustreDB):
         """ Return the routes as a list of tuples of the form:
         [(type, gw, lo, hi),]"""
         res = []
-        tbl = self.dom_node.getElementsByTagName('route_tbl')
+        tbl = self.dom_node.getElementsByTagName('routetbl')
         for t in tbl:
             routes = t.getElementsByTagName('route')
             for r in routes:
                 lo = self.xmlattr(r, 'lo')
-                hi = self.xmlattr(r, 'hi', '')
+                hi = self.xmlattr(r, 'hi')
                 res.append((type, gw, lo, hi))
         return res
 
     def get_route_tbl(self):
         ret = []
-        tbls = self.dom_node.getElementsByTagName('route_tbl')
+        tbls = self.dom_node.getElementsByTagName('routetbl')
         for tbl in tbls:
             for r in tbl.getElementsByTagName('route'):
                 net_type = self.xmlattr(r, 'type')
                 gw = self.xmlattr(r, 'gw')
                 lo = self.xmlattr(r, 'lo')
-                hi = self.xmlattr(r,'hi', '')
+                hi = self.xmlattr(r, 'hi')
                 ret.append((net_type, gw, lo, hi))
         return ret
 
@@ -1652,7 +1638,7 @@ class LustreDB_LDAP(LustreDB):
             self.l.protocol_version=ldap.VERSION3
             # user and pw only needed if modifying db
             self.l.bind_s("", "", ldap.AUTH_SIMPLE);
-        except ldap.LDAPerror, e:
+        except ldap.LDAPError, e:
             panic(e)
             # FIXME, do something useful here
 
@@ -1749,28 +1735,27 @@ class LustreDB_LDAP(LustreDB):
 # MDC UUID hack - 
 # FIXME: clean this mess up!
 #
-saved_mdc = {}
-def prepare_mdc(db, mds_uuid):
-    global saved_mdc
+# OSC is no longer in the xml, so we have to fake it.
+# this is getting ugly and begging for another refactoring
+def get_osc(ost_db, owner):
+    osc = OSC(ost_db, owner, ost_db.getName(), ost_db.getUUID())
+    return osc
+
+def get_mdc(db, owner, mds_uuid):
     mds_db = db.lookup(mds_uuid);
     if not mds_db:
         panic("no mds:", mds_uuid)
-    if saved_mdc.has_key(mds_uuid):
-        return saved_mdc[mds_uuid]
-    mdc = MDC(mds_db)
+    mdc = MDC(mds_db, owner, mds_db.getName(), mds_uuid)
+    return mdc
+
+def prepare_mdc(db, owner, mds_uuid):
+    mdc = get_mdc(db, owner, mds_uuid)
     mdc.prepare()
-    saved_mdc[mds_uuid] = mdc.uuid
     return mdc.uuid
 
-def cleanup_mdc(db, mds_uuid):
-    global saved_mdc
-    mds_db = db.lookup(mds_uuid);
-    if not mds_db:
-        panic("no mds:", mds_uuid)
-    if not saved_mdc.has_key(mds_uuid):
-        mdc = MDC(mds_db)
-        mdc.cleanup()
-        saved_mdc[mds_uuid] = mdc.uuid
+def cleanup_mdc(db, owner, mds_uuid):
+    mdc = get_mdc(db, owner, mds_uuid)
+    mdc.cleanup()
         
 
 ############################################################
@@ -1780,13 +1765,13 @@ routes = []
 local_node = []
 router_flag = 0
 
-def init_node(node_db):
-    global local_node, router_flag
-    netlist = node_db.lookup_class('network')
-    for db in netlist:
-        type = db.get_val('nettype')
-        gw = db.get_val('nid')
-        local_node.append((type, gw))
+def add_local_interfaces(node_db):
+    global local_node
+    debug("add_local")
+    for netuuid in node_db.get_networks():
+        net = node_db.lookup(netuuid)
+        debug("add_local", netuuid)
+        local_node.append((net.get_val('nettype'), net.get_val('nid')))
 
 def node_needs_router():
     return router_flag
@@ -1800,20 +1785,26 @@ def init_route_config(lustre):
     for node_db in list:
         if node_db.get_val_int('router', 0):
             router_flag = 1
+            #debug("init_route_config: found router", node_db.getName())
             for (local_type, local_nid) in local_node:
+                #debug("init_route_config:", local_type, local_nid)
                 gw = None
-                netlist = node_db.lookup_class('network')
-                for db in netlist:
-                    if local_type == db.get_val('type'):
-                        gw = db.get_val('server')
+                for netuuid in node_db.get_networks():
+                    db = node_db.lookup(netuuid)
+                    if local_type == db.get_val('nettype'):
+                        gw = db.get_val('nid')
                         break
+                #debug("init_route_config: gw is", gw)
                 if not gw:
                     continue
-                for db in netlist:
-                    if local_type != db.get_val('type'):
+                for netuuid in node_db.get_networks():
+                    db = node_db.lookup(netuuid)
+                    #debug("init_route_config: tbl: ", db.get_route_tbl())
+                    if local_type != db.get_val('nettype'):
                         for route in db.get_routes(local_type, gw):
                             routes.append(route)
-    
+    #debug("init_route_config routes:", routes)
+
 
 def local_net(net):
     global local_node
@@ -1830,19 +1821,18 @@ def find_route(net):
     to = net.nid
     debug ('looking for route to', to_type,to)
     for r in routes:
+        #debug("find_route: ", r)
         if  r[2] == to:
             return r
     return None
            
-    
 
 ############################################################
 # lconf level logic
 # Start a service.
-def startService(db, module_flag):
+def newService(db):
     type = db.get_class()
     debug('Service:', type, db.getName(), db.getUUID())
-    # there must be a more dynamic way of doing this...
     n = None
     if type == 'ldlm':
         n = LDLM(db)
@@ -1850,39 +1840,19 @@ def startService(db, module_flag):
         n = LOV(db)
     elif type == 'network':
         n = Network(db)
-    elif type == 'obd':
-        n = OBD(db)
+    elif type == 'osd':
+        n = OSD(db)
     elif type == 'cobd':
         n = COBD(db)
-    elif type == 'ost':
-        n = OST(db)
     elif type == 'mdsdev':
         n = MDSDEV(db)
-    elif type == 'osc':
-        n = VOSC(db)
-    elif type == 'mdc':
-        n = MDC(db)
     elif type == 'mountpoint':
         n = Mountpoint(db)
     elif type == 'echoclient':
         n = ECHO_CLIENT(db)
     else:
         panic ("unknown service type:", type)
-
-    if module_flag:
-        if config.nomod():
-            return
-        if config.cleanup():
-            n.cleanup_module()
-        else:
-            n.load_module()
-    else:
-        if config.nosetup():
-            return
-        if config.cleanup():
-            n.cleanup()
-        else:
-            n.prepare()
+    return n
 
 #
 # Prepare the system to run lustre using a particular profile
@@ -1892,15 +1862,35 @@ def startService(db, module_flag):
 #  * make sure partitions are in place and prepared
 #  * initialize devices with lctl
 # Levels is important, and needs to be enforced.
-def startProfile(prof_db, module_flag):
-    if not prof_db:
-        panic("profile:", profile, "not found.")
-    services = prof_db.getServices()
-    if config.cleanup():
-        services.reverse()
+def for_each_profile(db, prof_list, operation):
+    for prof_uuid in prof_list:
+        prof_db = db.lookup(prof_uuid)
+        if not prof_db:
+            panic("profile:", profile, "not found.")
+        services = prof_db.getServices()
+        operation(services)
+        
+def doSetup(services):
     for s in services:
-        startService(s[1], module_flag)
+        n = newService(s[1])
+        n.prepare()
+    
+def doModules(services):
+    for s in services:
+        n = newService(s[1])
+        n.load_module()
 
+def doCleanup(services):
+    services.reverse()
+    for s in services:
+        n = newService(s[1])
+        n.cleanup()
+
+def doUnloadModules(services):
+    services.reverse()
+    for s in services:
+        n = newService(s[1])
+        n.cleanup_module()
 
 #
 # Load profile for 
@@ -1921,32 +1911,38 @@ def doHost(lustreDB, hosts):
     timeout = node_db.get_val_int('timeout', 0)
 
     if not router_flag:
-        init_node(node_db)
+        add_local_interfaces(node_db)
         init_route_config(lustreDB)
 
     # Two step process: (1) load modules, (2) setup lustre
     # if not cleaning, load modules first.
-    module_flag = not config.cleanup()
     prof_list = node_db.get_refs('profile')
-    for prof_uuid in prof_list:
-        prof_db = node_db.lookup(prof_uuid)
-        startProfile(prof_db, module_flag)
 
-    if not config.cleanup():
+    if config.cleanup():
+        if config.force():
+            # the command line can override this value
+            timeout = 5
+        sys_set_timeout(timeout)
+        sys_set_recovery_upcall(recovery_upcall)
+
+        for_each_profile(node_db, prof_list, doCleanup)
+        for_each_profile(node_db, prof_list, doUnloadModules)
+
+    else:
+        for_each_profile(node_db, prof_list, doModules)
+
         sys_set_debug_path()
         script = config.gdb_script()
         run(lctl.lctl, ' modules >', script)
         if config.gdb():
-            # dump /tmp/ogdb and sleep/pause here
             log ("The GDB module script is in", script)
+            # pause, so user has time to break and
+            # load the script
             time.sleep(5)
         sys_set_timeout(timeout)
         sys_set_recovery_upcall(recovery_upcall)
-            
-    module_flag = not module_flag
-    for prof_uuid in prof_list:
-        prof_db = node_db.lookup(prof_uuid)
-        startProfile(prof_db, module_flag)
+
+        for_each_profile(node_db, prof_list, doSetup)
 
 ############################################################
 # Command line processing
index 2217058..fb81dd3 100644 (file)
@@ -124,7 +124,7 @@ command_t cmdlist[] = {
          "type specific device configuration information\n"
          "usage: setup <args...>"},
         {"cleanup", jt_obd_cleanup, 0, "cleanup previously setup device\n"
-         "usage: cleanup"},
+         "usage: cleanup [force]"},
         {"detach", jt_obd_detach, 0,
          "remove driver (and name and uuid) from current device\n"
          "usage: detach"},
@@ -164,6 +164,15 @@ command_t cmdlist[] = {
         {"test_brw", jt_obd_test_brw, 0,
          "do <num> bulk read/writes (<npages> per I/O, on OST object <objid>)\n"
          "usage: test_brw [t]<num> [write [verbose [npages [[t]objid]]]]"},
+        {"get_stripe", jt_obd_get_stripe, 0,
+         "show stripe info for an echo client object\n"
+         "usage: get_stripe objid\n"},
+        {"set_stripe", jt_obd_set_stripe, 0,
+         "set stripe info for an echo client object\n"
+         "usage: set_stripe objid[=width!count[@offset][:id:id...]\n"},
+        {"unset_stripe", jt_obd_unset_stripe, 0,
+         "unset stripe info for an echo client object\n"
+         "usage: unset_stripe objid\n"},
         {"test_ldlm", jt_obd_test_ldlm, 0,
          "perform lock manager test\n"
          "usage: test_ldlm"},
@@ -180,6 +189,12 @@ command_t cmdlist[] = {
         {"newconn", jt_obd_newconn, 0, "newconn <olduuid> [newuuid]"},
         {"failconn", jt_obd_failconn, 0, "failconn <uuid>"},
         {"lookup", jt_obd_mdc_lookup, 0, "usage: lookup <directory> <file>"},
+        {"notransno", jt_obd_no_transno, 0,
+         "disable sending of committed-transno updates\n"
+         "usage: notransno"},
+        {"readonly", jt_obd_set_readonly, 0,
+         "disable writes to the underlying device\n"
+         "usage: readonly"},
 
         /* Debug commands */
         {"======== debug =========", jt_noop, 0, "debug"},
index 93777d6..1b75135 100644 (file)
@@ -11,7 +11,6 @@
 #include <errno.h>
 #include <sys/ioctl.h>
 #include <sys/types.h>
-#define        printk printf
 #include <linux/lustre_lib.h>
 #include <linux/lustre_lite.h>
 #include <linux/obd_lov.h>
@@ -34,14 +33,14 @@ char *              shortOpts = "ho:qv";
 char *         usageMsg = "[ --obd <obd uuid> | --query ] <dir|file> ...";
 
 int            max_ost_count = MAX_LOV_UUID_COUNT;
-obd_uuid_t *   obduuid;
+struct obd_uuid *      obduuid;
 __u32          obdcount;
 __u32          obdindex;
 char *         buf;
 int            buflen;
 struct obd_ioctl_data data;
 struct lov_desc desc;
-obd_uuid_t *   uuids;
+struct obd_uuid *      uuids;
 int            uuidslen;
 int            cfglen;
 struct lov_mds_md *lmm;
@@ -74,7 +73,7 @@ main (int argc, char **argv) {
                                exit(1);
                        }
 
-                       obduuid = (obd_uuid_t *)optarg;
+                       obduuid = (struct obd_uuid *)optarg;
                        break;
                case 'h':
                        usage(stdout);
@@ -155,7 +154,7 @@ init()
        }
 
        lmm = (struct lov_mds_md *)buf;
-       uuids = (obd_uuid_t *)buf;
+       uuids = (struct obd_uuid *)buf;
 }
 
 void
@@ -261,7 +260,7 @@ processFile(const char *path, const struct stat *sp, int flag, struct FTW *ftwp)
 __u32
 getobdindex(const char *path)
 {
-       obd_uuid_t *uuidp;
+       struct obd_uuid *uuidp;
        int fd;
        int rc;
        int i;
diff --git a/lustre/utils/llparser.pm b/lustre/utils/llparser.pm
new file mode 100644 (file)
index 0000000..5cee31f
--- /dev/null
@@ -0,0 +1,399 @@
+#!/usr/bin/perl
+# Copyright (C) 2002 Cluster File Systems, Inc.
+# Author: Hariharan Thantry <thantry@users.sourceforge.net>
+
+#   This file is part of Lustre, http://www.lustre.org.
+#
+#   Lustre is free software; you can redistribute it and/or
+#   modify it under the terms of version 2 of the GNU General Public
+#   License as published by the Free Software Foundation.
+#
+#   Lustre is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with Lustre; if not, write to the Free Software
+#   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+
+package llparser;
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw(parse_file print_rpcrelations parse_foptions %ll_subsystems 
+       %subsysnum %trace_masks $e_subsys $e_mask $e_processor $e_time 
+       $e_file $e_line $e_function $e_pid $e_stack $e_fmtstr $e_backref 
+       $e_treeparent $e_numchildren $e_youngestchild $e_next $e_pidhead 
+       $e_rpcsndrcv $e_rpcpid $e_rpcxid $e_rpcnid $e_rpcopc $e_rpcnext 
+       $e_curlineref $SEND $RCV);
+
+($e_subsys, 
+ $e_mask, 
+ $e_processor, 
+ $e_time, 
+ $e_file, 
+ $e_line, 
+ $e_function, 
+ $e_pid, 
+ $e_stack, 
+ $e_fmtstr, 
+ $e_treeparent, 
+ $e_numchildren,
+ $e_youngestchild, 
+ $e_pidhead,
+ $e_next, 
+ $e_backref) = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+
+($e_rpcpid,
+ $e_rpcxid,
+ $e_rpcnid,
+ $e_rpcopc,
+ $e_rpcnext, 
+ $e_rpcsndrcv,
+ $e_curlineref) = (0, 1, 2, 3, 4, 5, 6); 
+
+$SEND = 0;
+$RCV  = 1;
+
+$REGEX=qr/^\s*(\w+)\s*:\s*(\d+)\s*:\s*(\d+)\s*:\s*(\d+\.(?:\d+))\s*\(\s*([^:]+)\s*:\s*(\d+)\s*:\s*([^()]+)\s*\(\)\s*(?:(?:\d+)\s*\|\s*)?(\d+)\s*\+\s*(\d+)\s*(?:.*)\):(.*)$/;
+
+$RPCREGEX = qr/^\s*(?:Sending|Handling)\s*RPC\s*pid:xid:nid:opc\s*(\d+):(?:0x)?(\w+):(?:0x)?(\w+):(\d+)\s*$/;
+$FILEOPTIONREGEX = qr/(--server)|(-s)/;
+$SENDING = qr/Sending/;
+
+
+# Needs to match definition in portals/include/linux/kp30.h
+%ll_subsystems = ("00" => "UNDEFINED", "01" => "MDC", "02" => "MDS", 
+                 "03" => "OSC",  "04" => "OST",  "05" => "CLASS",
+                 "06" => "OBDFS","07" => "LLITE","08" => "RPC",
+                 "09" => "EXT2OBD","0a" => "PORTALS","0b" => "SOCKNAL",
+                 "0c" => "QSWNAL","0d" => "PINGER","0e" => "FILTER",
+                 "0f" => "TRACE","10" => "ECHO","11" => "LDLM",
+                 "12" => "LOV", "13" => "GMNAL","14" => "PTLROUTER" );
+
+%subsysnum;
+$subsysnum->{UNDEFINED} = 0;
+$subsysnum->{MDC} = 1;
+$subsysnum->{MDS} = 2;
+$subsysnum->{OSC} = 3;
+$subsysnum->{OST} = 4;
+$subsysnum->{CLASS} = 5;
+$subsysnum->{OBDFS} = 6;
+$subsysnum->{LLITE} = 7;
+$subsysnum->{RPC} = 8;
+$subsysnum->{EXT2OBD} = 9;
+$subsysnum->{PORTALS} = 10;
+$subsysnum->{SOCKNAL} = 11;
+$subsysnum->{QSWNAL} = 12;
+$subsysnum->{PINGER} = 13;
+$subsysnum->{FILTER} = 14;
+$subsysnum->{TRACE} = 15; # obdtrace, not to be confused with D_TRACE */
+$subsysnum->{ECHO} = 16;
+$subsysnum->{LDLM} = 17;
+$subsysnum->{LOV} = 18;
+$subsysnum->{GMNAL} = 19;
+$subsysnum->{PTLROUTER} = 20;
+
+%tracemasks;
+$tracemasks->{TRACE} = 1 << 0; # /* ENTRY/EXIT markers */
+$tracemasks->{INODE} = 1 << 1; #
+$tracemasks->{SUPER} = 1 << 2; #
+$tracemasks->{EXT2} = 1 << 3; # /* anything from ext2_debug */
+$tracemasks->{MALLOC} = 1 << 4; # /* print malloc, free information */
+$tracemasks->{CACHE} = 1 << 5; # /* cache-related items */
+$tracemasks->{INFO} = 1 << 6; # /* general information */
+$tracemasks->{IOCTL} = 1 << 7; # /* ioctl related information */
+$tracemasks->{BLOCKS} = 1 << 8; # /* ext2 block allocation */
+$tracemasks->{NET} = 1 << 9; # /* network communications */
+$tracemasks->{WARNING} = 1 << 10; #
+$tracemasks->{BUFFS} = 1 << 11; #
+$tracemasks->{OTHER} = 1 << 12; #
+$tracemasks->{DENTRY} = 1 << 13; #
+$tracemasks->{PORTALS} = 1 << 14; # /* ENTRY/EXIT markers */
+$tracemasks->{PAGE} = 1 << 15; # /* bulk page handling */
+$tracemasks->{DLMTRACE} = 1 << 16; #
+$tracemasks->{ERROR} = 1 << 17; # /* CERROR} = ...) == CDEBUG} = D_ERROR, ...) */
+$tracemasks->{EMERG} = 1 << 18; # /* CEMERG} = ...) == CDEBUG} = D_EMERG, ...) */
+$tracemasks->{HA} = 1 << 19; # /* recovery and failover */
+$tracemasks->{RPCTRACE} = 1 << 19; # /* recovery and failover */
+
+# Contains all the file names, the first filename is the 
+# client. After that are all servers.
+my @filearray = ();
+
+
+# Create backlinks between array entries based on the calling sequence
+# For each new PID encountered, the first entry will be present in the 
+# PID hash.
+
+sub create_links {
+    my $arrayref = shift @_;
+    my $pidhashref = shift @_;
+    my $stitchref = shift @_;
+    my %local_hash;
+    my $hash_lineref;
+    my $tmpfmtref;
+    my $tmpref;
+    my $firstlineaftermarker = 0;
+
+    foreach $lineref (@$arrayref) {
+       next if ($lineref->[$e_time] == 0); # Skip the client marker line
+       my $pidprevious = $pidhashref->{$lineref->[$e_pid]};
+       if ($pidprevious->[$e_next] == 0) {
+           $pidprevious->[$e_next] = $lineref;
+           if (exists $local_hash{$lineref->[$e_pid]} 
+               && $firstlineaftermarker) {
+               $hash_lineref=$local_hash{$lineref->[$e_pid]};
+               $hash_lineref->[$e_next] =$lineref;
+               $firstlineaftermarker = 0;
+           } 
+       } elsif ($local_hash{$lineref->[$e_pid]} == 0) {
+               # True only for the first line, the marker line.
+               $local_hash{$lineref->[$e_pid]}=$lineref;
+               #print "LINE ADDED TO HASH: @$lineref\n";
+               $firstlineaftermarker = 1; 
+       }
+       # Stack grows upward (assumes x86 kernel)
+       if ($lineref->[$e_stack] < $pidprevious->[$e_stack]) {
+           # lineref is not a child of pidprevious, find its parent
+         LINE: while(($lineref->[$e_stack] < $pidprevious->[$e_stack]) &&
+                     ($lineref->[$e_function] == $pidprevious->[$e_function])
+                     ) {
+                         #This second part of the comparision is a HACK  
+                         last LINE if ($pidprevious->[$e_backref] == 0); 
+                         $pidprevious = $pidprevious->[$e_backref];
+         }
+       }
+       if ($lineref->[$e_stack] > $pidprevious->[$e_stack]) {
+           # lineref is child of pidprevious, with the caveat that they must
+            # belong to different functions. This is a HACK 
+           # until CDEBUG is modified
+           while($lineref->[$e_function] eq $pidprevious->[$e_function]) {
+             last if ($pidprevious->[$e_backref] == 0);
+              $pidprevious = $pidprevious->[$e_backref];
+           }   
+
+           $lineref->[$e_backref] = $pidprevious;
+           $pidprevious->[$e_numchildren]++;
+       } else {
+           # lineref is sibling of pidprevious
+           $lineref->[$e_numchildren] = 0;
+           $lineref->[$e_backref] = $pidprevious->[$e_backref];
+           ($lineref->[$e_backref])->[$e_numchildren]++;
+       }
+
+       $pidhashref->{$lineref->[$e_pid]} = $lineref;
+       $lineref->[$e_youngestchild] = $lineref;
+       while ($pidprevious->[$e_backref] != 0) {
+           $pidprevious->[$e_youngestchild] = $lineref;
+           $pidprevious = $pidprevious->[$e_backref];
+       }
+       $pidprevious->[$e_youngestchild] = $lineref;
+       $lineref->[$e_pidhead]=$pidprevious;
+       
+        # Stitch together rpc's
+       if($lineref->[$e_fmtstr] =~ $RPCREGEX) {
+           #print "RPC LINE: @$lineref\n";
+           $tmpfmtref = [$1, $2, $3, $4, 0, 0, 0];
+           if ($lineref->[$e_fmtstr] =~ $SENDING) {
+               $tmpfmtref->[$e_rpcsndrcv] = $SEND;
+           } else { $tmpfmtref->[$e_rpcsndrcv] = $RCV; }
+           $tmpfmtref->[$e_curlineref] = $lineref;
+           $stitchref->{$lineref->[$e_time]} = $tmpfmtref;
+           
+       }
+           
+    }
+match_rpcs($stitchref);
+return $arrayref;      
+}
+
+
+
+
+# Main loop, parses the debug log
+
+sub parse_file {
+    my %hasharray;
+    my $input_files = shift;
+    
+    my $stitch_ref = shift;
+    my $pid = shift;
+    my $rpctrace = shift;
+    my $trace = shift;
+    my $nodlm = shift;
+    my $noclass = shift;
+    my $nonet = shift;
+
+    print "$pid, $rpctrace, $nodlm, $noclass, $nonet\n";
+    $backref = 0;
+    $treeparent = 0;
+    $numchildren = 0;
+    $youngestchild = 0;
+    $next = 0;
+    $pidhead = 0;
+    $iter = 0;
+                       
+    foreach $file (@$input_files) {
+       
+       open(FILEHANDLE, $file) or die "Can't open file: $file\n";
+       while(<FILEHANDLE>) {
+           if (/$REGEX/) {
+               @parsed_line=($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, 
+                             $treeparent, $numchildren, $youngestchild, 
+                             $pidhead, $next, $backref);
+               next if (($parsed_line[$e_pid] != $pid) && 
+                        ($pid) && ($iter == 0));
+               next if (($parsed_line[$e_mask] != $tracemasks->{RPCTRACE}) 
+                        && ($rpctrace));
+               next if ($trace && $parsed_line[$e_mask] != 
+                        $tracemasks->{TRACE});
+               next if ($nodlm && hex($parsed_line[$e_subsys]) == 
+                        $subsysnum->{LDLM});
+               next if ($noclass && hex($parsed_line[$e_subsys]) == 
+                        $subsysnum->{CLASS});
+               next if ($nonet && (hex($parsed_line[$e_subsys]) == 
+                                   $subsysnum->{RPC} ||
+                                   hex($parsed_line[$e_subsys]) == 
+                                   $subsysnum->{NET} ||        
+                                   hex($parsed_line[$e_subsys]) == 
+                                   $subsysnum->{PORTALS} ||
+                                   hex($parsed_line[$e_subsys]) == 
+                                   $subsysnum->{SOCKNAL} ||
+                                   hex($parsed_line[$e_subsys]) == 
+                                   $subsysnum->{QSWNAL} ||
+                                   hex($parsed_line[$e_subsys]) == 
+                                   $subsysnum->{GMNAL}));      
+               
+               
+               if (!exists($hasharray{$parsed_line[$e_pid]})) {
+                   # Push a marker for the beginning of this PID
+                   my @marker_line;
+                   $marker_line[$e_subsys] = 0;
+                   $marker_line[$e_mask] = 0;
+                   $marker_line[$e_processor] = 0;
+                   $marker_line[$e_time] = $parsed_line[$e_time];
+                   $marker_line[$e_file] = 0;
+                   $marker_line[$e_line] = 0;
+                   $marker_line[$e_function] = 0;
+                   $marker_line[$e_pid] = $parsed_line[$e_pid];
+                   # marker lines are everyone's parent, so stack value zero
+                   $marker_line[$e_stack] = 0; 
+                   $marker_line[$e_fmtstr] = "";
+                   $marker_line[$e_treeparent] = 0;
+                   $marker_line[$e_numchildren] = 0;
+                   $marker_line[$e_youngestchild] = 0;
+                   $marker_line[$e_pidhead] = 0;
+                   $marker_line[$e_next]= \@parsed_line;
+                   $marker_line[$e_backref] = 0;
+                   $hasharray{$parsed_line[$e_pid]} = \@marker_line;
+                   push @$array_parsed, [ @marker_line ];
+                   
+               }
+               push @$array_parsed, [ @parsed_line ];
+           }
+           
+       }
+       close(FILEHANDLE);
+       if ($iter == 0) {
+           # Insert end of client line marker, an all zero pattern;
+           @marker_line = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+           push @$array_parsed, [ @marker_line ]; 
+           
+       }
+       $iter ++;
+    }
+    
+    $array_parsed=create_links($array_parsed, \%hasharray, $stitch_ref);
+    #print_array($array_parsed);
+    return $array_parsed;
+}
+
+sub print_array {
+
+    my $arrayref = shift;
+    foreach $lineref(@$arrayref){
+       if ($lineref->[$e_backref]==0){
+               print "MARKER LINE(addr): $lineref contents: [@$lineref]\n";
+       } else {
+
+               print "REGULAR LINE (addr) :$lineref contents:[@$lineref]\n";
+       }
+    }
+    
+}
+
+sub print_rpcrelations {
+
+    my $rpchashref = shift;
+    foreach $rpckeys (sort keys %$rpchashref) {
+       $tmpref = $rpchashref->{$rpckeys};
+       #print "Key: $rpckeys, Contents: @$tmpref\n";
+
+    }
+
+}
+sub match_rpcs {
+    my $rpchashref = shift;
+    foreach $rpckeys (sort keys %$rpchashref) {
+       $tmpref = $rpchashref->{$rpckeys};
+       #print "MATCHING: $@tmpref...\n";
+       foreach $cmpkeys (sort keys %$rpchashref) {
+           next if($cmpkeys == $rpckeys);
+           $cmpref = $rpchashref->{$cmpkeys};
+        #   print "Line compared: @$cmpref\n";
+           next if ($tmpref->[$e_rpcsndrcv] == $cmpref->[$e_rpcsndrcv]);
+           next if ($tmpref->[$e_rpcpid] != $cmpref->[$e_rpcpid]);
+           next if ($tmpref->[$e_rpcxid] != $cmpref->[$e_rpcxid]);
+           if ($tmpref->[$e_rpcsndrcv] == $SEND) {
+               $tmpref->[$e_rpcnext] = $cmpkeys;
+               #print "MACTHED: KEY 1: $rpckeys CONTENTS: @$tmpref", 
+               #"KEY2: $cmpkeys CONTENTS: @$cmpref\n"
+               
+           }
+                   
+       }
+
+    }
+
+}
+
+sub getnextchild {
+    my $rootline = shift;
+    my $lineref = shift;
+    my $tempref = $lineref->[$e_next];
+    if ($tempref == 0)  {
+       return 0;
+    }
+
+    if (($tempref->[$e_stack] > $rootline->[$e_stack]) ||
+       (($tempref->[$e_stack] <= $rootline->[$e_stack]) &&
+        ($tempref->[$e_function] == $rootline->[$e_function])
+        )){
+       # Child
+       return $tempref;
+       
+    }
+       return 0;
+       
+       
+}
+
+
+sub parse_foptions {
+    
+    my $inarg = shift;
+    my $idx = 0;
+    foreach $elem(@$inarg) {
+       next if ($elem =~ /$FILEOPTIONREGEX/);
+       $filearray[$idx] = $elem;
+       $idx++;    
+    }
+    return \@filearray;
+}
+
+1;
+#$array_parsed=parse_file();
+#print_array($array_parsed);
index 4d40a5b..3de4eb4 100755 (executable)
@@ -65,18 +65,18 @@ Object creation command summary:
 
 -add ost
   --node node_name
-  --obd obd_name 
+  --ost ost_name 
   --lov lov_name 
   --dev path
   --size size
   --fstype extN|ext3
-  --obduuid uuid
+  --ostuuid uuid
   
 --add mtpt  - Mountpoint
   --node node_name
   --path /mnt/point
   --mds mds_name
-  --obd obd_name OR --lov lovname
+  --ost ost_name OR --lov lov_name
 """
     sys.exit(1)
 
@@ -217,18 +217,19 @@ class GenConfig:
         ldlm = self.newService("ldlm", name, uuid)
         return ldlm
 
-    def obd(self, name, uuid, fs, obdtype, devname, format, ost_uuid, dev_size=0):
-        obd = self.newService("obd", name, uuid)
-        obd.setAttribute('obdtype', obdtype)
-        obd.appendChild(self.ref("active", ost_uuid))
+    def osd(self, name, uuid, fs, osdtype, devname, format, ost_uuid, net_uuid, dev_size=0):
+        osd = self.newService("osd", name, uuid)
+        osd.setAttribute('osdtype', osdtype)
+        osd.appendChild(self.ref("target", ost_uuid))
+        osd.appendChild(self.ref("network", net_uuid))
         if fs:
-            self.addElement(obd, "fstype", fs)
+            self.addElement(osd, "fstype", fs)
         if devname:
-            dev = self.addElement(obd, "devpath", devname)
-            self.addElement(obd, "autoformat", format)
+            dev = self.addElement(osd, "devpath", devname)
+            self.addElement(osd, "autoformat", format)
             if dev_size:
-                self.addElement(obd, "devsize", "%s" % (dev_size))
-        return obd
+                self.addElement(osd, "devsize", "%s" % (dev_size))
+        return osd
 
     def cobd(self, name, uuid, real_uuid, cache_uuid):
         cobd = self.newService("cobd", name, uuid)
@@ -236,18 +237,21 @@ class GenConfig:
         cobd.appendChild(self.ref("cacheobd",cache_uuid))
         return cobd
 
-    def ost(self, name, uuid, obd_uuid, net_uuid):
+    def ost(self, name, uuid, osd_uuid):
         ost = self.newService("ost", name, uuid)
-        ost.appendChild(self.ref("network", net_uuid))
-        ost.appendChild(self.ref("obd", obd_uuid))
+        ost.appendChild(self.ref("active", osd_uuid))
         return ost
 
+    def oss(self, name, uuid):
+        oss = self.newService("oss", name, uuid)
+        return oss
+
     def lov(self, name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern):
         lov = self.newService("lov", name, uuid)
         lov.appendChild(self.ref("mds", mds_uuid))
-        lov.setAttribute("stripesize", stripe_sz)
-        lov.setAttribute("stripecount", stripe_cnt)
-        lov.setAttribute("stripepattern", pattern)
+        lov.setAttribute("stripesize", str(stripe_sz))
+        lov.setAttribute("stripecount", str(stripe_cnt))
+        lov.setAttribute("stripepattern", str(pattern))
         return lov
 
     def lovconfig(self, name, uuid, lov_uuid):
@@ -269,7 +273,7 @@ class GenConfig:
         if dev_size:
                 self.addElement(mdd, "devsize", "%s" % (dev_size))
         mdd.appendChild(self.ref("network", net_uuid))
-        mdd.appendChild(self.ref("mds", mds_uuid))
+        mdd.appendChild(self.ref("target", mds_uuid))
         return mdd
 
     def mountpoint(self, name, uuid, mds_uuid, osc_uuid, path):
@@ -345,6 +349,17 @@ def get_net_uuid(lustre, node_name):
 def lov_add_obd(gen, lov, osc_uuid):
     lov.appendChild(gen.ref("obd", osc_uuid))
                             
+def ref_exists(profile, uuid):
+    elist = profile.childNodes
+    for e in elist:
+        if e.nodeType == e.ELEMENT_NODE:
+            ref = e.getAttribute('uuidref')
+            if ref == uuid:
+                return 1
+    return 0
+        
+# ensure that uuid is not already in the profile
+# return true if uuid is added
 def node_add_profile(gen, node, ref, uuid):
     refname = "%s_ref" % "profile"
     ret = node.getElementsByTagName(refname)
@@ -352,7 +367,12 @@ def node_add_profile(gen, node, ref, uuid):
         error('node has no profile ref:', node)
     prof_uuid = ret[0].getAttribute('uuidref')
     profile = lookup(node.parentNode, prof_uuid)
+    if not profile:
+        error("no profile found:", prof_uuid)
+    if ref_exists(profile, uuid):
+        return 0
     profile.appendChild(gen.ref(ref, uuid))
+    return 1
     
 def get_attr(dom_node, attr, default=""):
     v = dom_node.getAttribute(attr)
@@ -400,7 +420,7 @@ def add_net(gen, lustre, options):
     nid = get_option(options, 'nid')
     net_type = get_option(options, 'nettype')
 
-    if net_type == 'tcp':
+    if net_type in ('tcp', 'toe'):
         port = get_option_int(options, 'port', DEFAULT_PORT)
         tcpbuf = get_option_int(options, 'tcpbuf', 0)
     elif net_type in ('elan', 'gm'):
@@ -476,9 +496,9 @@ def add_mds(gen, lustre, options):
 def add_ost(gen, lustre, options):
     node_name = get_option(options, 'node')
     lovname = get_option(options, 'lov', '')
-    obdtype = get_option(options, 'obdtype', 'obdfilter')
+    osdtype = get_option(options, 'osdtype', 'obdfilter', deprecated_tag="obdtype")
 
-    if obdtype == 'obdecho':
+    if osdtype == 'obdecho':
         fstype = ''
         devname = ''
         size = 0
@@ -488,38 +508,45 @@ def add_ost(gen, lustre, options):
         size = get_option(options, 'size', 0)
         fstype = get_option(options, 'fstype', 'extN')
         
-    obdname = get_option(options, 'obd', 'OBD_'+ node_name)
-    obdname = new_name(obdname)
-    ostname = new_name('OST_'+ obdname)
-    if options.has_key('obduuid'):
-        obd_uuid = options['obduuid']
-        obd = lookup(lustre, obd_uuid)
-        if obd:
-            error("Duplicate OBD UUID:", obd_uuid)
+    ostname = get_option(options, 'ost', '', deprecated_tag='obd')
+    if not ostname:
+        ostname = new_name('OST_'+ node_name)
+
+    osdname = new_name("OSD_" + ostname)
+    osd_uuid = get_option(options, 'osduuid', '', deprecated_tag = 'obduuid')
+    if osd_uuid and lookup(lustre, osd_uuid):
+            error("Duplicate OBD UUID:", osd_uuid)
     else:
-        obd_uuid = new_uuid(obdname)
-    ost_uuid = new_uuid(ostname)
+        osd_uuid = new_uuid(osdname)
+
+    ost_uuid = name2uuid(lustre, ostname, fatal=0)
+    if not ost_uuid:
+        ost_uuid = new_uuid(ostname)
+        ost = gen.ost(ostname, ost_uuid, osd_uuid)
+        lustre.appendChild(ost)
+        if lovname:
+            lov = findByName(lustre, lovname, "lov")
+            if not lov:
+                error('add_ost:', '"'+lovname+'"', "lov element not found.")
+            lov_add_obd(gen, lov, ost_uuid)
 
     net_uuid = get_net_uuid(lustre, node_name)
     if not net_uuid:
-        error("NODE: ", node_name, "not found")
+        error("NODE: No net network interface for", node_name, "found")
     
-    obd = gen.obd(obdname, obd_uuid, fstype, obdtype, devname, get_format_flag(options), ost_uuid,
-                  size)
-    ost = gen.ost(ostname, ost_uuid, obd_uuid, net_uuid)
-    
-    if lovname:
-        lov = findByName(lustre, lovname, "lov")
-        if not lov:
-            error('add_ost:', '"'+lovname+'"', "lov element not found.")
-        lov_add_obd(gen, lov, obd_uuid)
+    osd = gen.osd(osdname, osd_uuid, fstype, osdtype, devname, get_format_flag(options), ost_uuid,
+                  net_uuid, size)
 
     node = findByName(lustre, node_name, "node")
-    node_add_profile(gen, node, 'obd', obd_uuid)
-    node_add_profile(gen, node, 'ost', ost_uuid)
 
-    lustre.appendChild(obd)
-    lustre.appendChild(ost)
+##     if node_add_profile(gen, node, 'oss', oss_uuid):
+##         ossname = 'OSS'
+##         oss_uuid = new_uuid(ossname)
+##         oss = gen.oss(ossname, oss_uuid)
+##         lustre.appendChild(oss)
+
+    node_add_profile(gen, node, 'osd', osd_uuid)
+    lustre.appendChild(osd)
 
                    
 def add_cobd(gen, lustre, options):
@@ -542,7 +569,7 @@ def add_cobd(gen, lustre, options):
 def add_echo_client(gen, lustre, options):
     """ add an echo client to the profile for this node. """
     node_name = get_option(options, 'node')
-    lov_name = get_option(options, 'obd')
+    lov_name = get_option(options, 'ost')
 
     node = findByName(lustre, node_name, 'node')
 
@@ -552,7 +579,7 @@ def add_echo_client(gen, lustre, options):
 
     lov_uuid = name2uuid(lustre, lov_name, tag='lov', fatal=0)
     if not lov_uuid:
-        lov_uuid = name2uuid(lustre, lov_name, tag='obd', fatal=1)
+        lov_uuid = name2uuid(lustre, lov_name, tag='ost', fatal=1)
 
     echo = gen.echo_client(echoname, echo_uuid, lov_uuid)
     lustre.appendChild(echo)
@@ -567,9 +594,9 @@ def add_lov(gen, lustre, options):
         warning("name:", lov_orig, "already used. using:", name)
 
     mds_name = get_option(options, 'mds')
-    stripe_sz = get_option(options, 'stripe_sz')
-    stripe_cnt = get_option(options, 'stripe_cnt', 0)
-    pattern = get_option(options, 'stripe_pattern', 0)
+    stripe_sz = get_option_int(options, 'stripe_sz')
+    stripe_cnt = get_option_int(options, 'stripe_cnt', 0)
+    pattern = get_option_int(options, 'stripe_pattern', 0)
     uuid = new_uuid(name)
 
     ret = findByName(lustre, name, "lov")
@@ -597,9 +624,9 @@ def add_mtpt(gen, lustre, options):
     mds_name = get_option(options, 'mds')
     lov_name = get_option(options, 'lov', '')
     if lov_name == '':
-        lov_name = get_option(options, 'obd', '')
+        lov_name = get_option(options, 'ost', '', deprecated_tag='obd')
         if lov_name == '':
-            error("--add mtpt requires either --lov lov_name or --obd obd_name")
+            error("--add mtpt requires either --lov lov_name or --ost ost_name")
 
     name = new_name('MNT_'+ node_name)
 
@@ -610,7 +637,7 @@ def add_mtpt(gen, lustre, options):
     mds_uuid = name2uuid(lustre, mds_name, tag='mds')
     lov_uuid = name2uuid(lustre, lov_name, tag='lov', fatal=0)
     if not lov_uuid:
-        lov_uuid = name2uuid(lustre, lov_name, tag='obd', fatal=1)
+        lov_uuid = name2uuid(lustre, lov_name, tag='ost', fatal=1)
 
     uuid = new_uuid(name)
     mtpt = gen.mountpoint(name, uuid, mds_uuid, lov_uuid, path)
@@ -620,6 +647,7 @@ def add_mtpt(gen, lustre, options):
     node_add_profile(gen, node, "mountpoint", uuid)
     lustre.appendChild(mtpt)
 
+# obsolete, leaving behind for reference 
 def add_oscref(gen, lustre, options):
     """ create mtpt on a node """
     node_name = get_option(options, 'node')
@@ -644,30 +672,37 @@ def has_option(options, tag):
         return 1
     return 0
 
-def get_option(options, tag, default = None):
+def get_option(options, tag, default = None, deprecated_tag=None):
     """Look for tag in options hash and return the value if set. If not
     set, then if return default it is set, otherwise exception."""
     if options.has_key(tag):
         return options[tag]
+    elif deprecated_tag and options.has_key(deprecated_tag):
+            warning('--'+deprecated_tag, " is deprecated, please use:", '--'+tag)
+            return options[deprecated_tag]
     elif default != None:
         return default
     else:
-        raise OptionError("--add %s requires --%s value" % (options['add'], tag))
+        raise OptionError("--add %s requires --%s <value>" % (options['add'], tag))
         # this exception should print an error like '--add blah requires --<tag> value'
 
 def get_option_int(options, tag, default = None):
     """Return an integer option.  Raise exception if the value is not an int"""
     val = get_option(options, tag, default)
-    return int(val)
+    try:
+        n = int(val)
+    except ValueError:
+        raise OptionError("--%s <num> (value must be integer)" % (tag))        
+    return n
 
 def parse_cmdline(argv):
     short_opts = "ho:i:m:"
     long_opts = ["add=", "node=", "nettype=", "nid=", "tcpbuf=", "port=",
                  "echo_client=", "stripe_sz=", "stripe_cnt=", "stripe_pattern=",
                  "mds=", "route", "router", "merge=", "format", "reformat", "output=",
-                 "dev=", "size=", "obd=", "obdtype=", "obduuid=", "in=",
-                 "path=", "help", "batch=", "lov=", "gw=", "lo=", "hi=",
-                 "oscref", "osc=", "real_obd=", "cache_obd=", "fstype=",
+                 "dev=", "size=", "obd=", "ost=", "obdtype=", "osdtype=", "obduuid=", "in=",
+                 "osduuid=", "path=", "help", "batch=", "lov=", "gw=", "lo=", "hi=",
+                 "osc=", "real_obd=", "cache_obd=", "fstype=",
                  "timeout=", "recovery_upcall="]
     opts = []
     args = []
@@ -692,6 +727,8 @@ def parse_cmdline(argv):
             options['mds'] = a
         if o == "--obd":
             options['obd'] = a
+        if o == "--ost":
+            options['ost'] = a
 
         # node options
         if o == "--timeout":
@@ -728,10 +765,14 @@ def parse_cmdline(argv):
             options['osc'] = a
         if o == "--obdtype":
             options['obdtype'] = a
+        if o == "--osdtype":
+            options['osdtype'] = a
         if o == "--fstype":
             options['fstype'] = a
         if o == "--obduuid":
             options['obduuid'] = a
+        if o == "--osduuid":
+            options['osduuid'] = a
 
         # lov options
         if o == "--stripe_sz":
@@ -763,6 +804,7 @@ def parse_cmdline(argv):
         if o == "--format":
             options['format'] = 1
         if o  == "--reformat":
+            warning("the lmc --reformat option is not supported. Use lconf --reformat")
             options['reformat'] = 1
         if o  == "--batch":
             options['batch'] = a
@@ -816,8 +858,6 @@ def add(devtype, gen, lustre, options):
         add_node(gen, lustre, options)
     elif devtype == 'echo_client':
         add_echo_client(gen, lustre, options)
-    elif devtype == 'oscref':
-        add_oscref(gen, lustre, options)
     elif devtype == 'cobd':
         add_cobd(gen, lustre, options)
     else:
index 1aa9d91..39e2bdf 100644 (file)
 
 /******************  Functions ******************/
 
-void usage(char *pgm)
+void usage(char *prog)
 {
-       fprintf(stderr, "usage: %s <filename> <stripe size> <start stripe> <stripe count>\n", pgm);
-
-       fprintf(stderr, "\tstripe size: number of bytes in each stripe\n");
-       fprintf(stderr, "\tstripe start: OST index which holds first stripe\n");
-       fprintf(stderr, "\tstripe count: number of OSTs to stripe over\n");
+       fprintf(stderr, "usage: %s <filename> <stripe size> <stripe start> "
+                       "<stripe count>\n", prog);
+
+       fprintf(stderr,
+               "\tstripe size: number of bytes in each stripe (0 default)\n");
+       fprintf(stderr,
+               "\tstripe start: OST index of first stripe (-1 default)\n");
+       fprintf(stderr,
+               "\tstripe count: number of OSTs to stripe over (0 default)\n");
 }
 
 int create_file(char *name, long stripe_size, int stripe_offset,
@@ -60,21 +64,45 @@ int main(int argc, char *argv[])
        long st_size;
        int  st_offset,
             st_count;
+       char *end;
 
        /*  Check to make sure we have enough parameters  */
        if (argc != 5) {
                usage(argv[0]);
-               return(-1);
+               return 1;
        }
 
        /* Get the stripe size */
-       st_size = atol(argv[2]);
+       st_size = strtoul(argv[2], &end, 0);
+       if (*end != '\0') {
+               fprintf(stderr, "bad stripe size '%s'\n", argv[2]);
+               usage(argv[0]);
+               return 2;
+       }
+
+       /*
+       if (st_size & 4095) {
+               fprintf(stderr, "stripe size must be multiple of page size\n");
+               usage(argv[0]);
+               return 3;
+       }
+       */
 
        /* Get the stripe offset*/
-       st_offset = atoi(argv[3]);
+       st_offset = strtoul(argv[3], &end, 0);
+       if (*end != '\0') {
+               fprintf(stderr, "bad stripe offset '%s'\n", argv[3]);
+               usage(argv[0]);
+               return 4;
+       }
 
        /* Get the stripe count */
-       st_count = atoi(argv[4]);
+       st_count = strtoul(argv[4], &end, 0);
+       if (*end != '\0') {
+               fprintf(stderr, "bad stripe count '%s'\n", argv[4]);
+               usage(argv[0]);
+               return 5;
+       }
 
        /*  Create the file, as specified.  Return and display any errors.  */
        result = create_file(argv[1], st_size, st_offset, st_count);
index 8c329ff..8800b57 100644 (file)
@@ -35,7 +35,6 @@
 #include <stdio.h>
 #include <stdarg.h>
 #include <signal.h>
-#define printk printf
 
 #include <linux/lustre_lib.h>
 #include <linux/lustre_idl.h>
@@ -81,8 +80,11 @@ char *buf = rawbuf;
 int max = sizeof(rawbuf);
 
 static int thread;
-static struct lov_stripe_md saved_lsm;
-static char lsm_valid = 0;
+
+union lsm_buffer {
+        char                 space [4096];
+        struct lov_stripe_md lsm;
+} lsm_buffer;
 
 static int getfd(char *func);
 static char *cmdname(char *func);
@@ -190,6 +192,118 @@ static int parse_devname(char *func, char *name)
         return ret;
 }
 
+static char *
+lsm_string (struct lov_stripe_md *lsm)
+{
+        static char buffer[4096];
+        char       *p = buffer;
+        int         space = sizeof (buffer);
+        int         i;
+        int         nob;
+
+        *p = 0;
+        space--;
+        
+        nob = snprintf(p, space, LPX64, lsm->lsm_object_id);
+        p += nob;
+        space -= nob;
+        
+        if (lsm->lsm_stripe_count != 0) {
+                nob = snprintf (p, space, "=%u#%u@%d", 
+                                lsm->lsm_stripe_size,
+                                lsm->lsm_stripe_count,
+                                lsm->lsm_stripe_offset);
+                p += nob;
+                space -= nob;
+
+                for (i = 0; i < lsm->lsm_stripe_count; i++) {
+                        nob = snprintf (p, space, ":"LPX64,
+                                        lsm->lsm_oinfo[i].loi_id);
+                        p += nob;
+                        space -= nob;
+                }
+        }
+
+        if (space == 0) {                       /* probable overflow */
+                fprintf (stderr, "lsm_string() overflowed buffer\n");
+                abort ();
+        }
+        
+        return (buffer);
+}
+
+static void
+reset_lsmb (union lsm_buffer *lsmb) 
+{
+        memset (lsmb->space, 0, sizeof (lsmb->space));
+        lsmb->lsm.lsm_magic = LOV_MAGIC;
+        
+}
+
+static int 
+parse_lsm (union lsm_buffer *lsmb, char *string)
+{
+        struct lov_stripe_md *lsm = &lsmb->lsm;
+        char                 *end;
+        int                   i;
+        
+        /*
+         * object_id[=size#count[@offset][:id]*] 
+         */
+
+        reset_lsmb (lsmb);
+        
+        lsm->lsm_object_id = strtoull (string, &end, 0);
+        if (end == string)
+                return (-1);
+        string = end;
+        
+        if (*string == 0)
+                return (0);
+
+        if (*string != '=')
+                return (-1);
+        string++;
+        
+        lsm->lsm_stripe_size = strtoul (string, &end, 0);
+        if (end == string)
+                return (-1);
+        string = end;
+        
+        if (*string != '#')
+                return (-1);
+        string++;
+        
+        lsm->lsm_stripe_count = strtoul (string, &end, 0);
+        if (end == string)
+                return (-1);
+        string = end;
+
+        if (*string == '@') {
+                string++;
+                lsm->lsm_stripe_offset = strtol (string, &end, 0);
+                if (end == string)
+                        return (-1);
+                string = end;
+        }
+        
+        if (*string == 0)                       /* don't have to specify obj ids */
+                return (0);
+        
+        for (i = 0; i < lsm->lsm_stripe_count; i++) {
+                if (*string != ':')
+                        return (-1);
+                string++;
+                lsm->lsm_oinfo[i].loi_id = strtoull (string, &end, 0);
+                string = end;
+        }
+
+        if (*string != 0)
+                return (-1);
+        
+        return (0);
+}
+
 static char *cmdname(char *func)
 {
         static char buf[512];
@@ -446,7 +560,7 @@ int jt_obd_connect(int argc, char **argv)
 
         do_disconnect(argv[0], 1);
 
-#warning TODO: implement timeout per lctl usage for probe
+        /* XXX TODO: implement timeout per lctl usage for probe */
         if (argc != 1)
                 return CMD_HELP;
 
@@ -591,6 +705,24 @@ int jt_opt_threads(int argc, char **argv)
 int jt_obd_detach(int argc, char **argv)
 {
         struct obd_ioctl_data data;
+        int rc;
+
+        IOCINIT(data);
+
+        if (argc != 1)
+                return CMD_HELP;
+
+        rc = ioctl(fd, OBD_IOC_DETACH, buf);
+        if (rc < 0)
+                fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
+                        strerror(rc = errno));
+
+        return rc;
+}
+
+int jt_obd_cleanup(int argc, char **argv)
+{
+        struct obd_ioctl_data data;
         char force = 'F';
         int rc;
 
@@ -600,12 +732,14 @@ int jt_obd_detach(int argc, char **argv)
                 return CMD_HELP;
 
         if (argc == 2) {
+                if (strcmp(argv[1], "force"))
+                        return CMD_HELP;
                 data.ioc_inllen1 = 1;
                 data.ioc_inlbuf1 = &force;
         }
 
         IOC_PACK(argv[0], data);
-        rc = ioctl(fd, OBD_IOC_DETACH, buf);
+        rc = ioctl(fd, OBD_IOC_CLEANUP, buf);
         if (rc < 0)
                 fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
                         strerror(rc = errno));
@@ -613,7 +747,7 @@ int jt_obd_detach(int argc, char **argv)
         return rc;
 }
 
-int jt_obd_cleanup(int argc, char **argv)
+int jt_obd_no_transno(int argc, char **argv)
 {
         struct obd_ioctl_data data;
         int rc;
@@ -623,7 +757,25 @@ int jt_obd_cleanup(int argc, char **argv)
         if (argc != 1)
                 return CMD_HELP;
 
-        rc = ioctl(fd, OBD_IOC_CLEANUP, &data);
+        rc = ioctl(fd, OBD_IOC_NO_TRANSNO, &data);
+        if (rc < 0)
+                fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
+                        strerror(rc = errno));
+
+        return rc;
+}
+
+int jt_obd_set_readonly(int argc, char **argv)
+{
+        struct obd_ioctl_data data;
+        int rc;
+
+        IOCINIT(data);
+
+        if (argc != 1)
+                return CMD_HELP;
+
+        rc = ioctl(fd, OBD_IOC_SET_READONLY, &data);
         if (rc < 0)
                 fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
                         strerror(rc = errno));
@@ -808,12 +960,146 @@ int jt_obd_setup(int argc, char **argv)
         return rc;
 }
 
-/* The ioctl API has been extended to provide the LOV stripe metadata to the
- * caller when applicable.  This utility, however, only saves the LSM for the
- * latest CREATE.   It only saves the LSM when the ioctl indicates that it
- * is valid by overloading 'ioc_conn2' as a boolean. */
+/* Get echo client's stripe meta-data for the given object
+ */
+int jt_obd_get_stripe (int argc, char **argv)
+{
+        struct obd_ioctl_data data;
+        __u64 id;
+        int   rc;
+        char *end;
+        
+        if (argc != 2)
+                return (CMD_HELP);
+
+        id = strtoull (argv[1], &end, 0);
+        if (*end) {
+                fprintf (stderr, "Error: %s: invalid object id '%s'\n",
+                         cmdname (argv[0]), argv[1]);
+                return (CMD_HELP);
+        }
+
+        memset (&lsm_buffer, 0, sizeof (lsm_buffer));
+        
+        IOCINIT (data);
+        data.ioc_obdo1.o_id = id;
+        data.ioc_obdo1.o_mode = S_IFREG | 0644;
+        data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
+        data.ioc_pbuf1 = (char *)&lsm_buffer;
+        data.ioc_plen1 = sizeof (lsm_buffer);
+
+        IOC_PACK(argv[0], data);
+        rc = ioctl(fd, ECHO_IOC_GET_STRIPE, buf);
+        IOC_UNPACK(argv[0], data);
+
+        if (rc != 0) {
+                fprintf (stderr, "Error: %s: rc %d(%s)\n", 
+                         cmdname (argv[0]), rc, strerror (errno));
+                return (rc);
+        }
+        
+        printf ("%s\n", lsm_string (&lsm_buffer.lsm));
+        
+        return (rc);
+}
+
+/* Set stripe meta-data for 1 or more objects.  Object must be new to
+ * this echo client instance.
+ */
+int jt_obd_set_stripe (int argc, char **argv)
+{
+        struct obd_ioctl_data data;
+        char *end;
+        int count = 1;
+        int i;
+        int rc;
+
+        if (argc < 2 || argc > 3)
+                return CMD_HELP;
+
+        rc = parse_lsm (&lsm_buffer, argv[1]);
+        if (rc != 0) {
+                fprintf (stderr, "error: %s: invalid object '%s'\n",
+                         cmdname (argv[0]), argv[1]);
+                return CMD_HELP;
+        }
+
+        if (argc > 2) {
+                count = strtol (argv[2], &end, 0);
+                if (*end != 0) {
+                        fprintf (stderr, "error: %s: invalid count '%s'\n",
+                                 cmdname (argv[0]), argv[1]);
+                        return CMD_HELP;
+                }
+        }
+
+        for (i = 0; i < count; i++) 
+        {
+                IOCINIT (data);
+                data.ioc_obdo1.o_id = lsm_buffer.lsm.lsm_object_id + i;
+                data.ioc_obdo1.o_mode = S_IFREG | 0644;
+                data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
+                data.ioc_pbuf1 = (char *)&lsm_buffer;
+                data.ioc_plen1 = sizeof (lsm_buffer);
+                
+                IOC_PACK (argv[0], data);
+                rc = ioctl (fd, ECHO_IOC_SET_STRIPE, buf);
+                IOC_UNPACK (argv[0], data);
+                
+                if (rc != 0) {
+                        fprintf (stderr, "Error: %s: rc %d(%s)\n", 
+                                 cmdname (argv[0]), rc, strerror (errno));
+                        return (rc);
+                }
+        }
+        
+        return (0);
+}
+
+/* Clear stripe meta-data info for an object on this echo-client instance
+ */
+int jt_obd_unset_stripe (int argc, char **argv)
+{
+        struct obd_ioctl_data data;
+        char *end;
+        obd_id id;
+        int rc;
+
+        if (argc != 2)
+                return CMD_HELP;
+        
+        id = strtoll (argv[1], &end, 0);
+        if (*end == 0) {
+                fprintf (stderr, "error: %s: invalid object id '%s'\n",
+                         cmdname (argv[0]), argv[1]);
+                return CMD_HELP;
+        }
+                
+        IOCINIT (data);
+        data.ioc_obdo1.o_id = lsm_buffer.lsm.lsm_object_id;
+        data.ioc_obdo1.o_mode = S_IFREG | 0644;
+        data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
+        
+        IOC_PACK (argv[0], data);
+        rc = ioctl (fd, ECHO_IOC_SET_STRIPE, buf);
+        IOC_UNPACK (argv[0], data);
+        
+        if (rc != 0)
+                fprintf (stderr, "Error: %s: rc %d(%s)\n", 
+                         cmdname (argv[0]), rc, strerror (errno));
+
+        return (0);
+}
+
+/* Create one or more objects, arg[1] may describe stripe meta-data.  If
+ * not, defaults assumed.  This echo-client instances stashes the stripe
+ * object ids.  Use get_stripe on this node to print full lsm and
+ * set_stripe on another node to cut/paste between nodes.
+ */
 int jt_obd_create(int argc, char **argv)
 {
+        static __u64 base_id = 1;
+
         struct obd_ioctl_data data;
         struct timeval next_time;
         __u64 count = 1, next_count;
@@ -821,7 +1107,7 @@ int jt_obd_create(int argc, char **argv)
         char *end;
 
         IOCINIT(data);
-        if (argc < 2 || argc > 4)
+        if (argc < 2 || argc > 5)
                 return CMD_HELP;
 
         count = strtoull(argv[1], &end, 0);
@@ -848,26 +1134,36 @@ int jt_obd_create(int argc, char **argv)
                         return CMD_HELP;
         }
 
+        if (argc < 5)
+                reset_lsmb (&lsm_buffer);       /* will set default */
+        else {
+                rc = parse_lsm (&lsm_buffer, argv[4]);
+                if (rc != 0) {
+                        fprintf(stderr, "error: %s: invalid lsm '%s'\n",
+                                cmdname(argv[0]), argv[4]);
+                        return CMD_HELP;
+                }
+                base_id = lsm_buffer.lsm.lsm_object_id;
+        }
+                
         printf("%s: "LPD64" objects\n", cmdname(argv[0]), count);
         gettimeofday(&next_time, NULL);
         next_time.tv_sec -= verbose;
 
         for (i = 1, next_count = verbose; i <= count; i++) {
                 data.ioc_obdo1.o_mode = mode;
-                data.ioc_obdo1.o_id = i;
+                data.ioc_obdo1.o_id = base_id++;
                 data.ioc_obdo1.o_uid = 0;
                 data.ioc_obdo1.o_gid = 0;
                 data.ioc_obdo1.o_valid = OBD_MD_FLTYPE | OBD_MD_FLMODE |
-                                OBD_MD_FLID | OBD_MD_FLUID | OBD_MD_FLGID;;
+                                         OBD_MD_FLID | OBD_MD_FLUID | OBD_MD_FLGID;
 
-                data.ioc_inllen1 = sizeof(saved_lsm);
-                data.ioc_inlbuf1 = (char *)&saved_lsm;
+                data.ioc_plen1 = sizeof (lsm_buffer);
+                data.ioc_pbuf1 = (char *)&lsm_buffer;
 
                 IOC_PACK(argv[0], data);
                 rc = ioctl(fd, OBD_IOC_CREATE, buf);
                 IOC_UNPACK(argv[0], data);
-                fprintf(stderr, "lsm->lsm_o_id: "LPX64"\n",
-                        saved_lsm.lsm_object_id);
                 SHMEM_BUMP();
                 if (rc < 0) {
                         fprintf(stderr, "error: %s: #%d - %s\n",
@@ -881,8 +1177,6 @@ int jt_obd_create(int argc, char **argv)
                         break;
                 }
 
-                lsm_valid = data.ioc_conn2;
-
                 if (be_verbose(verbose, &next_time, i, &next_count, count))
                         printf("%s: #%d is object id "LPX64"\n",
                                cmdname(argv[0]), i, data.ioc_obdo1.o_id);
@@ -914,11 +1208,6 @@ int jt_obd_setattr(int argc, char **argv)
         }
         data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
 
-        if (lsm_valid == 1) {
-                data.ioc_inllen1 = sizeof(saved_lsm);
-                data.ioc_inlbuf1 = (char *)&saved_lsm;
-        }
-
         IOC_PACK(argv[0], data);
         rc = ioctl(fd, OBD_IOC_SETATTR, buf);
         if (rc < 0)
@@ -973,9 +1262,6 @@ int jt_obd_destroy(int argc, char **argv)
                 data.ioc_obdo1.o_mode = S_IFREG | 0644;
                 data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
 
-                data.ioc_inllen1 = sizeof(saved_lsm);
-                data.ioc_inlbuf1 = (char *)&saved_lsm;
-
                 IOC_PACK(argv[0], data);
                 rc = ioctl(fd, OBD_IOC_DESTROY, buf);
                 IOC_UNPACK(argv[0], data);
@@ -985,7 +1271,6 @@ int jt_obd_destroy(int argc, char **argv)
                                 cmdname(argv[0]), id, strerror(rc = errno));
                         break;
                 }
-                lsm_valid = 0;
 
                 if (be_verbose(verbose, &next_time, i, &next_count, count))
                         printf("%s: #%d is object id "LPX64"\n",
@@ -1016,11 +1301,6 @@ int jt_obd_getattr(int argc, char **argv)
         data.ioc_obdo1.o_valid = 0xffffffff;
         printf("%s: object id "LPX64"\n", cmdname(argv[0]),data.ioc_obdo1.o_id);
 
-        if (lsm_valid == 1) {
-                data.ioc_inllen1 = sizeof(saved_lsm);
-                data.ioc_inlbuf1 = (char *)&saved_lsm;
-        }
-
         IOC_PACK(argv[0], data);
         rc = ioctl(fd, OBD_IOC_GETATTR, buf);
         IOC_UNPACK(argv[0], data);
@@ -1195,11 +1475,6 @@ int jt_obd_test_brw(int argc, char **argv)
         data.ioc_count = len;
         data.ioc_offset = thr_offset * len * count;
 
-        if (lsm_valid == 1) {
-                data.ioc_inllen1 = sizeof(saved_lsm);
-                data.ioc_inlbuf1 = (char *)&saved_lsm;
-        }
-
         gettimeofday(&start, NULL);
         next_time.tv_sec = start.tv_sec - verbose;
         next_time.tv_usec = start.tv_usec;
@@ -1262,7 +1537,7 @@ int jt_obd_lov_setconfig(int argc, char **argv)
 {
         struct obd_ioctl_data data;
         struct lov_desc desc;
-        obd_uuid_t *uuidarray, *ptr;
+        struct obd_uuid *uuidarray, *ptr;
         int rc, i;
         char *end;
 
@@ -1273,13 +1548,13 @@ int jt_obd_lov_setconfig(int argc, char **argv)
 
         if (strlen(argv[1]) > sizeof(desc.ld_uuid) - 1) {
                 fprintf(stderr,
-                        "error: %s: LOV uuid '%s' longer than "LPSZ" characters\n",
+                        "error: %s: LOV uuid '%s' longer than "LPSZ" chars\n",
                         cmdname(argv[0]), argv[1], sizeof(desc.ld_uuid) - 1);
                 return -EINVAL;
         }
 
         memset(&desc, 0, sizeof(desc));
-        strncpy(desc.ld_uuid, argv[1], sizeof(desc.ld_uuid) - 1);
+        obd_str2uuid(&desc.ld_uuid, argv[1]);
         desc.ld_tgt_count = argc - 6;
         desc.ld_default_stripe_count = strtoul(argv[2], &end, 0);
         if (*end) {
@@ -1374,7 +1649,7 @@ int jt_obd_lov_getconfig(int argc, char **argv)
 {
         struct obd_ioctl_data data;
         struct lov_desc desc;
-        obd_uuid_t *uuidarray;
+        struct obd_uuid *uuidarray;
         char *path;
         int rc, tmpfd;
 
@@ -1396,7 +1671,7 @@ int jt_obd_lov_getconfig(int argc, char **argv)
         }
 
         memset(&desc, 0, sizeof(desc));
-        strncpy(desc.ld_uuid, argv[1], sizeof(desc.ld_uuid) - 1);
+        obd_str2uuid(&desc.ld_uuid, argv[1]);
         desc.ld_tgt_count = DEF_UUID_ARRAY_LEN;
 repeat:
         uuidarray = calloc(desc.ld_tgt_count, sizeof(*uuidarray));
@@ -1425,7 +1700,7 @@ repeat:
                 fprintf(stderr, "error: %s: ioctl error: %s\n",
                         cmdname(argv[0]), strerror(rc = errno));
         } else {
-                obd_uuid_t *ptr;
+                struct obd_uuid *ptr;
                 int i;
 
                 if (obd_ioctl_unpack(&data, buf, max)) {
diff --git a/lustre/utils/obdbarrier.c b/lustre/utils/obdbarrier.c
new file mode 100644 (file)
index 0000000..911ab5f
--- /dev/null
@@ -0,0 +1,223 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *   Author: Eric Barton <eeb@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "obdiolib.h"
+
+int
+parse_kmg (uint64_t *valp, char *str)
+{
+        uint64_t        val;
+        char            mod[32];
+
+        switch (sscanf (str, LPU64"%1[gGmMkK]", &val, mod))
+        {
+        default:
+                return (-1);
+
+        case 1:
+                *valp = val;
+                return (0);
+
+        case 2:
+                switch (*mod)
+                {
+                case 'g':
+                case 'G':
+                        *valp = val << 30;
+                        return (0);
+
+                case 'm':
+                case 'M':
+                        *valp = val << 20;
+                        return (0);
+
+                case 'k':
+                case 'K':
+                        *valp = val << 10;
+                        return (0);
+
+                default:
+                        *valp = val;
+                        return (0);
+                }
+        }
+}
+
+void
+usage (char *cmdname, int help) 
+{
+        char *name = strrchr (cmdname, '/');
+        
+        if (name == NULL)
+                name = cmdname;
+        
+        fprintf (help ? stdout : stderr,
+                 "usage: %s -d device -s size -o offset [-i id][-n reps][-l] oid\n",
+                 name);
+}
+
+int
+exponential_modulus (int i, int base)
+{
+       int   top = base;
+       int   mod = 1;
+       
+       for (;;) {
+               if (i < top)
+                       return (i%mod == 0);
+               
+               mod = top;
+               top *= base;
+       }
+}
+
+int
+main (int argc, char **argv) 
+{
+        uint64_t              bid = (((uint64_t)gethostid()) << 32) | getpid ();
+        int                   set_bid = 0;
+        uint64_t              oid;
+       int                   setup = 0;
+        int                   device = -1;
+       int                   npeers = 0;
+        int                   reps = 1;
+        char                  hostname[128];
+        struct obdio_conn    *conn;
+       struct obdio_barrier *b;
+       char                 *end;
+        uint64_t              val;
+        int                   rc;
+        int                   c;
+
+        setvbuf (stdout, NULL, _IOLBF, 0);
+        memset (hostname, 0, sizeof (hostname));
+        gethostname (hostname, sizeof (hostname));
+        hostname[sizeof(hostname) - 1] = 0;
+        
+        while ((c = getopt (argc, argv, "hsi:d:n:p:")) != -1)
+                switch (c) {
+                case 'h':
+                        usage (argv[0], 1);
+                        return (0);
+                        
+                case 'i':
+                        bid = strtoll (optarg, &end, 0);
+                        if (end == optarg || *end != 0) {
+                                fprintf (stderr, "Can't parse id %s\n",
+                                         optarg);
+                                return (1);
+                        }
+                        set_bid = 1;
+                        break;
+                        
+                case 's':
+                       setup = 1;
+                        break;
+                        
+                case 'd':
+                        device = strtol (optarg, &end, 0);
+                        if (end == optarg || *end != 0 || device < 0) {
+                                fprintf (stderr, "Can't parse device %s\n",
+                                         optarg);
+                                return (1);
+                        }
+                        break;
+
+                case 'n':
+                        if (parse_kmg (&val, optarg) != 0) {
+                                fprintf (stderr, "Can't parse reps %s\n",
+                                         optarg);
+                                return (1);
+                        }
+                        reps = (int)val;
+                        break;
+
+                case 'p':
+                        npeers = strtol (optarg, &end, 0);
+                       if (end == optarg || *end != 0 || npeers <= 0) {
+                                fprintf (stderr, "Can't parse npeers %s\n",
+                                         optarg);
+                                return (1);
+                        }
+                        break;
+
+                default:
+                        usage (argv[0], 0);
+                        return (1);
+        }
+
+        if ((!setup && !set_bid) ||
+            npeers <= 0 ||
+           device < 0 ||
+            optind == argc) {
+                fprintf (stderr, "%s not specified\n",
+                         (!setup && !set_bid) ? "id" :
+                         npeers <= 0 ? "npeers" :
+                         device < 0 ? "device" : "object id");
+                return (1);
+        }
+        
+        oid = strtoull (argv[optind], &end, 0);
+        if (end == argv[optind] || *end != 0) {
+                fprintf (stderr, "Can't parse object id %s\n",
+                         argv[optind]);
+                return (1);
+        }
+        
+        conn = obdio_connect (device);
+        if (conn == NULL)
+                return (1);
+
+       b = obdio_new_barrier (oid, bid, npeers);
+       if (b == NULL)
+               return (1);
+
+        rc = 0;
+       if (setup) {
+               rc = obdio_setup_barrier (conn, b);
+                if (rc == 0)
+                        printf ("Setup barrier: -d %d -i "LPX64" -p %d -n1 "LPX64"\n",
+                                device, bid, npeers, oid);
+       } else {
+               for (c = 0; c < reps; c++) {
+                       rc = obdio_barrier (conn, b);
+                       if (rc != 0)
+                               break;
+                       if (exponential_modulus (c, 10))
+                               printf ("%s: Barrier %d\n", hostname, c);
+               }
+       }
+
+       free (b);
+        
+        obdio_disconnect (conn);
+
+        return (rc == 0 ? 0 : 1);
+}
+
+
index acc5c5f..b8c210c 100644 (file)
@@ -38,6 +38,8 @@ int jt_obd_connect(int argc, char **argv);
 int jt_obd_disconnect(int argc, char **argv);
 int jt_obd_detach(int argc, char **argv);
 int jt_obd_cleanup(int argc, char **argv);
+int jt_obd_no_transno(int argc, char **argv);
+int jt_obd_set_readonly(int argc, char **argv);
 int jt_obd_newdev(int argc, char **argv);
 int jt_obd_list(int argc, char **argv);
 int jt_obd_attach(int argc, char **argv);
@@ -49,6 +51,9 @@ int jt_obd_destroy(int argc, char **argv);
 int jt_obd_getattr(int argc, char **argv);
 int jt_obd_test_getattr(int argc, char **argv);
 int jt_obd_test_brw(int argc, char **argv);
+int jt_obd_get_stripe(int argc, char **argv);
+int jt_obd_set_stripe(int argc, char **argv);
+int jt_obd_unset_stripe(int argc, char **argv);
 int jt_obd_lov_setconfig(int argc, char **argv);
 int jt_obd_lov_getconfig(int argc, char **argv);
 int jt_obd_test_ldlm(int argc, char **argv);
diff --git a/lustre/utils/obdio.c b/lustre/utils/obdio.c
new file mode 100644 (file)
index 0000000..ccee788
--- /dev/null
@@ -0,0 +1,304 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *   Author: Eric Barton <eeb@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "obdiolib.h"
+
+int
+obdio_test_fixed_extent (struct obdio_conn *conn, 
+                         uint32_t myhid, uint32_t mypid, 
+                         int reps, int locked, uint64_t oid, 
+                         uint64_t offset, uint32_t size)
+{
+        struct lustre_handle fh;
+        struct lustre_handle lh;
+        void                *space;
+        void                *buffer;
+        uint32_t            *ibuf;
+        int                  i;
+        int                  j;
+        int                  rc;
+        int                  rc2;
+        
+        rc = obdio_open (conn, oid, &fh);
+        if (rc != 0) {
+                fprintf (stderr, "Failed to open object "LPX64": %s\n",
+                         oid, strerror (errno));
+                return (rc);
+        }
+
+        buffer = obdio_alloc_aligned_buffer (&space, size);
+        if (buffer == NULL) {
+                fprintf (stderr, "Can't allocate buffer size %d\n", size);
+                rc = -1;
+                goto out_0;
+        }
+        
+        for (i = 0; i < reps; i++) {
+                ibuf = (uint32_t *) buffer;
+                for (j = 0; j < size / (4 * sizeof (*ibuf)); j++) {
+                        ibuf[0] = myhid;
+                        ibuf[1] = mypid;
+                        ibuf[2] = i;
+                        ibuf[3] = j;
+                        ibuf += 4;
+                }
+
+                if (locked) {
+                        rc = obdio_enqueue (conn, oid, LCK_PW, offset, size, &lh);
+                        if (rc != 0) {
+                                fprintf (stderr, "Error on enqueue "LPX64" @ "LPU64" for %u: %s\n",
+                                         oid, offset, size, strerror (errno));
+                                goto out_1;
+                        }
+                }
+                
+                rc = obdio_pwrite (conn, oid, buffer, size, offset);
+                if (rc != 0) {
+                        fprintf (stderr, "Error writing "LPX64" @ "LPU64" for %u: %s\n",
+                                 oid, offset, size, strerror (errno));
+                        if (locked)
+                                obdio_cancel (conn, &lh);
+                        rc = -1;
+                        goto out_1;
+                }
+                
+                memset (buffer, 0xbb, size);
+                
+                rc = obdio_pread (conn, oid, buffer, size, offset);
+                if (rc != 0) {
+                        fprintf (stderr, "Error reading "LPX64" @ "LPU64" for %u: %s\n",
+                                 oid, offset, size, strerror (errno));
+                        if (locked)
+                                obdio_cancel (conn, &lh);
+                        rc = -1;
+                        goto out_1;
+                }
+
+                if (locked) {
+                        rc = obdio_cancel (conn, &lh);
+                        if (rc != 0) {
+                                fprintf (stderr, "Error on cancel "LPX64" @ "LPU64" for %u: %s\n",
+                                         oid, offset, size, strerror (errno));
+                                rc = -1;
+                                goto out_1;
+                        }
+                }
+                
+                ibuf = (uint32_t *) buffer;
+                for (j = 0; j < size / (4 * sizeof (*ibuf)); j++) {
+                        if (ibuf[0] != myhid ||
+                            ibuf[1] != mypid ||
+                            ibuf[2] != i ||
+                            ibuf[3] != j) {
+                                fprintf (stderr, "Error checking "LPX64" @ "LPU64" for %u, chunk %d\n",
+                                         oid, offset, size, j);
+                                fprintf (stderr, "Expected [%x,%x,%x,%x], got [%x,%x,%x,%x]\n",
+                                         myhid, mypid, i, j, ibuf[0], ibuf[1], ibuf[2], ibuf[3]);
+                                rc = -1;
+                                goto out_1;
+                        }
+                        ibuf += 4;
+                }
+        }
+ out_1:
+        free (space);
+ out_0:
+        rc2 = obdio_close (conn, oid, &fh);
+        if (rc2 != 0)
+                fprintf (stderr, "Error closing object "LPX64": %s\n",
+                         oid, strerror (errno));
+        return (rc);
+}
+
+int
+parse_kmg (uint64_t *valp, char *str)
+{
+        uint64_t        val;
+        char            mod[32];
+
+        switch (sscanf (str, LPU64"%1[gGmMkK]", &val, mod))
+        {
+        default:
+                return (-1);
+
+        case 1:
+                *valp = val;
+                return (0);
+
+        case 2:
+                switch (*mod)
+                {
+                case 'g':
+                case 'G':
+                        *valp = val << 30;
+                        return (0);
+
+                case 'm':
+                case 'M':
+                        *valp = val << 20;
+                        return (0);
+
+                case 'k':
+                case 'K':
+                        *valp = val << 10;
+                        return (0);
+
+                default:
+                        *valp = val;
+                        return (0);
+                }
+        }
+}
+
+void
+usage (char *cmdname, int help) 
+{
+        char *name = strrchr (cmdname, '/');
+        
+        if (name == NULL)
+                name = cmdname;
+        
+        fprintf (help ? stdout : stderr,
+                 "usage: %s -d device -s size -o offset [-i id][-n reps][-l] oid\n",
+                 name);
+}
+
+int
+main (int argc, char **argv) 
+{
+        uint32_t           mypid = getpid ();
+        uint32_t           myhid = gethostid ();
+        uint64_t           oid;
+        uint64_t           base_offset = 0;
+        uint32_t           size = 0;
+        int                set_size = 0;
+        int                device = -1;
+        int                reps = 1;
+        int                locked = 0;
+        char              *end;
+        struct obdio_conn *conn;
+        uint64_t           val;
+        int                v1;
+        int                v2;
+        int                rc;
+        int                c;
+
+        while ((c = getopt (argc, argv, "hi:s:o:d:n:l")) != -1)
+                switch (c) {
+                case 'h':
+                        usage (argv[0], 1);
+                        return (0);
+                        
+                case 'i':
+                        switch (sscanf (optarg, "%i.%i", &v1, &v2)) {
+                        case 1:
+                                mypid = v1;
+                                break;
+                        case 2:
+                                myhid = v1;
+                                mypid = v2;
+                                break;
+                        default:
+                                fprintf (stderr, "Can't parse id %s\n",
+                                         optarg);
+                                return (1);
+                        }
+                        break;
+                        
+                case 's':
+                        if (parse_kmg (&val, optarg) != 0) {
+                                fprintf (stderr, "Can't parse size %s\n",
+                                         optarg);
+                                return (1);
+                        }
+                        size = (uint32_t)val;
+                        set_size++;
+                        break;
+                        
+                case 'o':
+                        if (parse_kmg (&val, optarg) != 0) {
+                                fprintf (stderr, "Can't parse offset %s\n",
+                                         optarg);
+                                return (1);
+                        }
+                        base_offset = val;
+                        break;
+
+                case 'd':
+                        device = strtol (optarg, &end, 0);
+                        if (end == optarg || *end != 0 || device < 0) {
+                                fprintf (stderr, "Can't parse device %s\n",
+                                         optarg);
+                                return (1);
+                        }
+                        break;
+                case 'n':
+                        if (parse_kmg (&val, optarg) != 0) {
+                                fprintf (stderr, "Can't parse reps %s\n",
+                                         optarg);
+                                return (1);
+                        }
+                        reps = (int)val;
+                        break;
+                case 'l':
+                        locked = 1;
+                        break;
+                default:
+                        usage (argv[0], 0);
+                        return (1);
+        }
+
+        if (!set_size ||
+            device < 0 ||
+            optind == argc) {
+                fprintf (stderr, "No %s specified\n",
+                         !set_size ? "size" :
+                         device < 0 ? "device" : "object id");
+                return (1);
+        }
+        
+        oid = strtoull (argv[optind], &end, 0);
+        if (end == argv[optind] || *end != 0) {
+                fprintf (stderr, "Can't parse object id %s\n",
+                         argv[optind]);
+                return (1);
+        }
+        
+        conn = obdio_connect (device);
+        if (conn == NULL)
+                return (1);
+        
+        rc = obdio_test_fixed_extent (conn, myhid, mypid, reps, locked, 
+                                      oid, base_offset, size);
+        
+        obdio_disconnect (conn);
+
+        return (rc == 0 ? 0 : 1);
+}
+
+
diff --git a/lustre/utils/obdiolib.c b/lustre/utils/obdiolib.c
new file mode 100644 (file)
index 0000000..ef95055
--- /dev/null
@@ -0,0 +1,465 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2003 Cluster File Systems, Inc.
+ *   Author: Eric Barton <eeb@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "obdiolib.h"
+
+void
+obdio_iocinit (struct obdio_conn *conn)
+{
+        memset (&conn->oc_data, 0, sizeof (conn->oc_data));
+        conn->oc_data.ioc_version = OBD_IOCTL_VERSION;
+        conn->oc_data.ioc_addr = conn->oc_conn_addr;
+        conn->oc_data.ioc_cookie = conn->oc_conn_cookie;
+        conn->oc_data.ioc_len = sizeof (conn->oc_data);
+}
+
+int
+obdio_ioctl (struct obdio_conn *conn, int cmd) 
+{
+        char *buf = conn->oc_buffer;
+        int   rc;
+        int   rc2;
+        
+        rc = obd_ioctl_pack (&conn->oc_data, &buf, sizeof (conn->oc_buffer));
+        if (rc != 0) {
+                fprintf (stderr, "obdio_ioctl: obd_ioctl_pack: %d (%s)\n", 
+                         rc, strerror (errno));
+                abort ();
+        }
+        
+        rc = ioctl (conn->oc_fd, cmd, buf);
+        if (rc != 0)
+                return (rc);
+        
+        rc2 = obd_ioctl_unpack (&conn->oc_data, buf, sizeof (conn->oc_buffer));
+        if (rc2 != 0) {
+                fprintf (stderr, "obdio_ioctl: obd_ioctl_unpack: %d (%s)\n",
+                         rc2, strerror (errno));
+                abort ();
+        }
+        
+        return (rc);
+}
+
+struct obdio_conn *
+obdio_connect (int device)
+{
+        struct obdio_conn  *conn;
+        int                 rc;
+
+        conn = malloc (sizeof (*conn));
+        if (conn == NULL) {
+                fprintf (stderr, "obdio_connect: no memory\n");
+                return (NULL);
+        }
+        memset (conn, 0, sizeof (*conn));
+        
+       conn->oc_fd = open ("/dev/obd", O_RDWR);
+       if (conn->oc_fd < 0) {
+                fprintf (stderr, "obdio_connect: Can't open /dev/obd: %s\n",
+                         strerror (errno));
+                goto failed;
+        }
+
+        obdio_iocinit (conn);
+        conn->oc_data.ioc_dev = device;
+        rc = obdio_ioctl (conn, OBD_IOC_DEVICE);
+        if (rc != 0) {
+                fprintf (stderr, "obdio_connect: Can't set device %d: %s\n",
+                         device, strerror (errno));
+                goto failed;
+        }
+        
+        obdio_iocinit (conn);
+        rc = obdio_ioctl (conn, OBD_IOC_CONNECT);
+        if (rc != 0) {
+                fprintf (stderr, "obdio_connect: Can't connect to device %d: %s\n",
+                         device, strerror (errno));
+                goto failed;
+        }
+        
+        conn->oc_conn_addr = conn->oc_data.ioc_addr;
+        conn->oc_conn_cookie = conn->oc_data.ioc_cookie;
+        return (conn);
+        
+ failed:
+        free (conn);
+        return (NULL);
+}
+
+void
+obdio_disconnect (struct obdio_conn *conn) 
+{
+        close (conn->oc_fd);
+        /* obdclass will automatically close on last ref */
+        free (conn);
+}
+
+int
+obdio_open (struct obdio_conn *conn, uint64_t oid, struct lustre_handle *fh) 
+{
+        int    rc;
+        
+        obdio_iocinit (conn);
+        
+        conn->oc_data.ioc_obdo1.o_id = oid;
+        conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
+        conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
+        
+        rc = obdio_ioctl (conn, OBD_IOC_OPEN);
+        
+        if (rc == 0)
+                memcpy (fh, obdo_handle(&conn->oc_data.ioc_obdo1), sizeof (*fh));
+
+        return (rc);
+}
+
+int
+obdio_close (struct obdio_conn *conn, uint64_t oid, struct lustre_handle *fh) 
+{
+        obdio_iocinit (conn);
+        
+
+        conn->oc_data.ioc_obdo1.o_id = oid;
+        conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
+        memcpy (obdo_handle (&conn->oc_data.ioc_obdo1), fh, sizeof (*fh));
+        conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | 
+                                          OBD_MD_FLMODE | OBD_MD_FLHANDLE;
+        
+        return (obdio_ioctl (conn, OBD_IOC_CLOSE));
+}
+
+int
+obdio_pread (struct obdio_conn *conn, uint64_t oid, 
+             char *buffer, uint32_t count, uint64_t offset) 
+{
+        obdio_iocinit (conn);
+        
+        conn->oc_data.ioc_obdo1.o_id = oid;
+        conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
+        conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
+
+        conn->oc_data.ioc_pbuf2 = buffer;
+        conn->oc_data.ioc_plen2 = count;
+        conn->oc_data.ioc_count = count;
+        conn->oc_data.ioc_offset = offset;
+
+        return (obdio_ioctl (conn, OBD_IOC_BRW_READ));
+}
+
+int
+obdio_pwrite (struct obdio_conn *conn, uint64_t oid, 
+              char *buffer, uint32_t count, uint64_t offset) 
+{
+        obdio_iocinit (conn);
+        
+        conn->oc_data.ioc_obdo1.o_id = oid;
+        conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
+        conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
+
+        conn->oc_data.ioc_pbuf2 = buffer;
+        conn->oc_data.ioc_plen2 = count;
+        conn->oc_data.ioc_count = count;
+        conn->oc_data.ioc_offset = offset;
+
+        return (obdio_ioctl (conn, OBD_IOC_BRW_WRITE));
+}
+
+int
+obdio_enqueue (struct obdio_conn *conn, uint64_t oid,
+               int mode, uint64_t offset, uint32_t count,
+               struct lustre_handle *lh)
+{
+        int   rc;
+        
+        obdio_iocinit (conn);
+        
+        conn->oc_data.ioc_obdo1.o_id = oid;
+        conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
+        conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
+
+        conn->oc_data.ioc_conn1 = mode;
+        conn->oc_data.ioc_count = count;
+        conn->oc_data.ioc_offset = offset;
+        
+        rc = obdio_ioctl (conn, ECHO_IOC_ENQUEUE);
+        
+        if (rc == 0)
+                memcpy (lh, obdo_handle (&conn->oc_data.ioc_obdo1), sizeof (*lh));
+        
+        return (rc);
+}
+
+int
+obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh)
+{
+        obdio_iocinit (conn);
+
+        memcpy (obdo_handle (&conn->oc_data.ioc_obdo1), lh, sizeof (*lh));
+        conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLHANDLE;
+        
+        return (obdio_ioctl (conn, ECHO_IOC_CANCEL));
+}
+
+void *
+obdio_alloc_aligned_buffer (void **spacep, int size) 
+{
+        int   pagesize = getpagesize();
+        void *space = malloc (size + pagesize - 1);
+        
+        *spacep = space;
+        if (space == NULL)
+                return (NULL);
+        
+        return ((void *)(((unsigned long)space + pagesize - 1) & ~(pagesize - 1)));
+}
+
+struct obdio_barrier *
+obdio_new_barrier (uint64_t oid, uint64_t id, int npeers) 
+{
+       struct obdio_barrier *b;
+
+       b = (struct obdio_barrier *)malloc (sizeof (*b));
+       if (b == NULL) {
+               fprintf (stderr, "obdio_new_barrier "LPX64": Can't allocate\n", oid);
+               return (NULL);
+       }
+       
+       b->ob_id = id;
+       b->ob_oid = oid;
+       b->ob_npeers = npeers;
+       b->ob_ordinal = 0;
+       b->ob_count = 0;
+       return (b);
+}
+
+int
+obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b)
+{
+        struct lustre_handle    fh;
+        struct lustre_handle    lh;
+        int                     rc;
+        int                     rc2;
+        void                   *space;
+        struct obdio_barrier   *fileb;
+
+       if (b->ob_ordinal != 0 ||
+           b->ob_count != 0) {
+               fprintf (stderr, "obdio_setup_barrier: invalid parameter\n");
+               abort ();
+       }
+       
+        rc = obdio_open (conn, b->ob_oid, &fh);
+        if (rc != 0) {
+                fprintf (stderr, "obdio_setup_barrier "LPX64": Failed to open object: %s\n",
+                         b->ob_oid, strerror (errno));
+                return (rc);
+        }
+        
+        fileb = (struct obdio_barrier *) obdio_alloc_aligned_buffer (&space, getpagesize ());
+        if (fileb == NULL) {
+                fprintf (stderr, "obdio_setup_barrier "LPX64": Can't allocate page buffer\n",
+                        b->ob_oid);
+                rc = -1;
+                goto out_0;
+        }
+        
+        memset (fileb, 0, getpagesize ());
+       *fileb = *b;
+        
+        rc = obdio_enqueue (conn, b->ob_oid, LCK_PW, 0, getpagesize (), &lh);
+        if (rc != 0) {
+                fprintf (stderr, "obdio_setup_barrier "LPX64": Error on enqueue: %s\n",
+                         b->ob_oid, strerror (errno));
+                goto out_1;
+        }
+        
+        rc = obdio_pwrite (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+       if (rc != 0)
+               fprintf (stderr, "obdio_setup_barrier "LPX64": Error on write: %s\n",
+                        b->ob_oid, strerror (errno));
+       
+       rc2 = obdio_cancel (conn, &lh);
+       if (rc == 0 && rc2 != 0) {
+               fprintf (stderr, "obdio_setup_barrier "LPX64": Error on cancel: %s\n",
+                        b->ob_oid, strerror (errno));
+               rc = rc2;
+       }
+ out_1:
+       free (space);
+ out_0:
+       rc2 = obdio_close (conn, b->ob_oid, &fh);
+       if (rc == 0 && rc2 != 0) {
+               fprintf (stderr, "obdio_setup_barrier "LPX64": Error on close: %s\n",
+                        b->ob_oid, strerror (errno));
+               rc = rc2;
+       }
+       
+       return (rc);
+}
+
+int
+obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b)
+{
+        struct lustre_handle   fh;
+       struct lustre_handle   lh;
+       int                    rc;
+       int                    rc2;
+        void                  *space;
+        struct obdio_barrier  *fileb;
+       char                  *mode;
+
+       rc = obdio_open (conn, b->ob_oid, &fh);
+       if (rc != 0) {
+               fprintf (stderr, "obdio_barrier "LPX64": Error on open: %s\n",
+                        b->ob_oid, strerror (errno));
+               return (rc);
+       }
+       
+        fileb = (struct obdio_barrier *) obdio_alloc_aligned_buffer (&space, getpagesize ());
+       if (fileb == NULL) {
+               fprintf (stderr, "obdio_barrier "LPX64": Can't allocate page buffer\n",
+                        b->ob_oid);
+               rc = -1;
+               goto out_0;
+       }
+
+        rc = obdio_enqueue (conn, b->ob_oid, LCK_PW, 0, getpagesize (), &lh);
+        if (rc != 0) {
+                fprintf (stderr, "obdio_barrier "LPX64": Error on PW enqueue: %s\n",
+                         b->ob_oid, strerror (errno));
+                goto out_1;
+        }
+       
+       memset (fileb, 0xeb, getpagesize ());
+       rc = obdio_pread (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+       if (rc != 0) {
+               fprintf (stderr, "obdio_barrier "LPX64": Error on initial read: %s\n",
+                        b->ob_oid, strerror (errno));
+               goto out_2;
+       }
+       
+       if (fileb->ob_id != b->ob_id ||
+           fileb->ob_oid != b->ob_oid ||
+           fileb->ob_npeers != b->ob_npeers ||
+           fileb->ob_count >= b->ob_npeers ||
+           fileb->ob_ordinal != b->ob_ordinal) {
+               fprintf (stderr, "obdio_barrier "LPX64": corrupt on initial read\n", b->ob_id);
+               fprintf (stderr, "  got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
+                        fileb->ob_id, fileb->ob_oid, fileb->ob_npeers, 
+                        fileb->ob_ordinal, fileb->ob_count);
+               fprintf (stderr, "  expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
+                        b->ob_id, b->ob_oid, b->ob_npeers, 
+                        b->ob_ordinal, b->ob_count);
+               rc = -1;
+               goto out_2;
+       }
+       
+       fileb->ob_count++;
+       if (fileb->ob_count == fileb->ob_npeers) { /* I'm the last joiner */
+               fileb->ob_count = 0;            /* join count for next barrier */
+               fileb->ob_ordinal++;            /* signal all joined */
+       }
+
+       rc = obdio_pwrite (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+       if (rc != 0) {
+               fprintf (stderr, "obdio_barrier "LPX64": Error on initial write: %s\n",
+                        b->ob_oid, strerror (errno));
+               goto out_2;
+       }
+
+       mode = "PW";
+       b->ob_ordinal++;                        /* now I wait... */
+       while (fileb->ob_ordinal != b->ob_ordinal) {
+
+               rc = obdio_cancel (conn, &lh);
+               if (rc != 0) {
+                       fprintf (stderr, "obdio_barrier "LPX64": Error on %s cancel: %s\n",
+                                b->ob_oid, mode, strerror (errno));
+                       goto out_1;
+               }
+
+               mode = "PR";
+               rc = obdio_enqueue (conn, b->ob_oid, LCK_PR, 0, getpagesize (), &lh);
+               if (rc != 0) {
+                       fprintf (stderr, "obdio_barrier "LPX64": Error on PR enqueue: %s\n",
+                                b->ob_oid, strerror (errno));
+                       goto out_1;
+               }
+               
+               memset (fileb, 0xeb, getpagesize ());
+               rc = obdio_pread (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+               if (rc != 0) {
+                       fprintf (stderr, "obdio_barrier "LPX64": Error on read: %s\n",
+                                b->ob_oid, strerror (errno));
+                       goto out_2;
+               }
+               
+               if (fileb->ob_id != b->ob_id ||
+                   fileb->ob_oid != b->ob_oid ||
+                   fileb->ob_npeers != b->ob_npeers ||
+                   fileb->ob_count >= b->ob_npeers ||
+                   (fileb->ob_ordinal != b->ob_ordinal - 1 &&
+                    fileb->ob_ordinal != b->ob_ordinal)) {
+                       fprintf (stderr, "obdio_barrier "LPX64": corrupt\n", b->ob_id);
+                       fprintf (stderr, "  got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
+                                fileb->ob_id, fileb->ob_oid, fileb->ob_npeers, 
+                                fileb->ob_ordinal, fileb->ob_count);
+                       fprintf (stderr, "  expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
+                                b->ob_id, b->ob_oid, b->ob_npeers, 
+                                b->ob_ordinal, b->ob_count);
+                       rc = -1;
+                       goto out_2;
+               }
+       }
+                       
+ out_2:
+       rc2 = obdio_cancel (conn, &lh);
+       if (rc == 0 && rc2 != 0) {
+               fprintf (stderr, "obdio_barrier "LPX64": Error on cancel: %s\n",
+                        b->ob_oid, strerror (errno));
+               rc = rc2;
+       }
+ out_1:
+       free (space);
+ out_0:
+       rc2 = obdio_close (conn, b->ob_oid, &fh);
+       if (rc == 0 && rc2 != 0) {
+               fprintf (stderr, "obdio_barrier "LPX64": Error on close: %s\n",
+                        b->ob_oid, strerror (errno));
+               rc = rc2;
+       }
+       
+       return (rc);
+}
+
+               
diff --git a/lustre/utils/obdiolib.h b/lustre/utils/obdiolib.h
new file mode 100644 (file)
index 0000000..9b06941
--- /dev/null
@@ -0,0 +1,70 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2003 Cluster File Systems, Inc.
+ *   Author: Eric Barton <eeb@clusterfs.com> 
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#ifndef _OBDIOLIB_H_
+#define _OBDIOLIB_H_
+
+#include <unistd.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/lustre_lib.h>
+#include <linux/lustre_idl.h>
+#include <linux/obd_class.h>
+
+struct obdio_conn {
+        int                   oc_fd;
+        uint64_t               oc_conn_addr;
+        uint64_t               oc_conn_cookie;
+        struct obd_ioctl_data  oc_data;
+        char                   oc_buffer[8192];
+};
+
+struct obdio_barrier {
+        uint64_t               ob_id;
+       uint64_t               ob_oid;
+        uint64_t               ob_npeers;
+        uint64_t               ob_ordinal;
+        uint64_t               ob_count;
+};
+       
+extern struct obdio_conn * obdio_connect (int device);
+extern void obdio_disconnect (struct obdio_conn *conn);
+extern int obdio_open (struct obdio_conn *conn, uint64_t oid, 
+                      struct lustre_handle *fh);
+extern int obdio_close (struct obdio_conn *conn, uint64_t oid, 
+                       struct lustre_handle *fh);
+extern int obdio_pread (struct obdio_conn *conn, uint64_t oid, 
+                       char *buffer, uint32_t count, uint64_t offset);
+extern int obdio_pwrite (struct obdio_conn *conn, uint64_t oid, 
+                        char *buffer, uint32_t count, uint64_t offset);
+extern int obdio_enqueue (struct obdio_conn *conn, uint64_t oid,
+                         int mode, uint64_t offset, uint32_t count,
+                         struct lustre_handle *lh);
+extern int obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh);
+extern void *obdio_alloc_aligned_buffer (void **spacep, int size);
+extern struct obdio_barrier *obdio_new_barrier (uint64_t oid, uint64_t id, int npeers) ;
+extern int obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b);
+extern int obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b);
+
+#endif
diff --git a/lustre/utils/obdstat.c b/lustre/utils/obdstat.c
new file mode 100644 (file)
index 0000000..1e23a31
--- /dev/null
@@ -0,0 +1,197 @@
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+struct one_stat {
+       char       *name;
+        int         fd;
+       long long   current;
+       long long   delta;
+};
+
+struct one_stat *read_bytes;
+struct one_stat *read_reqs;
+struct one_stat *write_bytes;
+struct one_stat *write_reqs;
+struct one_stat *getattr_reqs;
+struct one_stat *setattr_reqs;
+struct one_stat *create_reqs;
+struct one_stat *destroy_reqs;
+struct one_stat *statfs_reqs;
+struct one_stat *open_reqs;
+struct one_stat *close_reqs;
+struct one_stat *punch_reqs;
+
+struct one_stat *
+init_one_stat (char *basename, char *name) 
+{
+       char             fname[1024];
+       struct one_stat *stat = (struct one_stat *)malloc (sizeof (*stat));
+       
+       if (stat == NULL) {
+               fprintf (stderr, "Can't allocate stat %s: %s\n", 
+                        name, strerror (errno));
+               abort ();
+       }
+
+       snprintf (fname, sizeof (fname), "%s/%s", basename, name);
+
+       memset (stat, 0, sizeof (*stat));
+       stat->name = name;
+
+       stat->fd = open (fname, O_RDONLY);
+       if (stat->fd < 0 ) {
+               fprintf (stderr, "Can't open stat %s: %s\n", 
+                        fname, strerror (errno));
+               abort ();
+       }
+
+       return (stat);
+}
+
+void
+update_one_stat (struct one_stat *stat) 
+{
+        static char buffer[1024];
+       long long prev = stat->current;
+       int  nob;
+
+       lseek (stat->fd, 0, SEEK_SET);
+       nob = read (stat->fd, buffer, sizeof (buffer) - 1);
+       if (nob < 0) {
+               fprintf (stderr, "Can't read stat %s: %s\n",
+                        stat->name, strerror (errno));
+               abort ();
+       }
+       
+       buffer[nob] = 0;
+       if (sscanf (buffer, "%Ld", &stat->current) != 1) {
+               fprintf (stderr, "Can't parse stat %s: %s\n",
+                        stat->name, strerror (errno));
+               abort ();
+       }
+
+       stat->delta = stat->current - prev;
+}
+
+double
+timenow ()
+{
+       struct timeval tv;
+   
+       gettimeofday (&tv, NULL);
+       return (tv.tv_sec + tv.tv_usec / 1000000.0);
+}
+
+void
+do_stat (void)
+{
+       static double last = 0.0;
+       double now;
+       double t;
+   
+       now = timenow();
+
+       update_one_stat (read_bytes);
+       update_one_stat (read_reqs);
+       update_one_stat (write_bytes);
+       update_one_stat (write_reqs);
+       update_one_stat (getattr_reqs);
+       update_one_stat (setattr_reqs);
+       update_one_stat (open_reqs);
+       update_one_stat (close_reqs);
+       update_one_stat (create_reqs);
+       update_one_stat (destroy_reqs);
+       update_one_stat (statfs_reqs);
+       update_one_stat (punch_reqs);
+       
+       if (last == 0.0) {
+               printf ("R %Ld/%Ld W %Ld/%Ld attr %Ld/%Ld open %Ld/%Ld create %Ld/%Ld stat %Ld punch %Ld\n",
+                       read_bytes->current, read_reqs->current,
+                       write_bytes->current, write_reqs->current,
+                       getattr_reqs->current, setattr_reqs->current,
+                       open_reqs->current, close_reqs->current,
+                       create_reqs->current, destroy_reqs->current,
+                       statfs_reqs->current, punch_reqs->current);
+       } else {
+               t = now - last;
+
+               printf ("R %6Ld (%5d %6.2fMb)/s W %6Ld (%5d %6.2fMb)/s",
+                       read_reqs->delta, (int)(read_reqs->delta / t),
+                       read_bytes->delta / ((1<<20) * t),
+                       write_reqs->delta, (int)(write_reqs->delta / t),
+                       write_bytes->delta / ((1<<20) * t));
+               
+               if (getattr_reqs->delta != 0)
+                       printf (" ga:%Ld,%d/s", getattr_reqs->delta,
+                               (int)(getattr_reqs->delta / t));
+               
+               if (setattr_reqs->delta != 0)
+                       printf (" sa:%Ld", setattr_reqs->delta);
+
+               if (open_reqs->delta != 0)
+                       printf (" op:%Ld", open_reqs->delta);
+               
+               if (close_reqs->delta != 0)
+                       printf (" cl:%Ld", close_reqs->delta);
+
+               if (create_reqs->delta != 0)
+                       printf (" cx:%Ld", create_reqs->delta);
+               
+               if (destroy_reqs->delta != 0)
+                       printf (" dx:%Ld", destroy_reqs->delta);
+
+               if (statfs_reqs->delta != 0)
+                       printf (" st:%Ld", statfs_reqs->delta);
+               
+               if (punch_reqs->delta != 0)
+                       printf (" pu:%Ld", punch_reqs->delta);
+               
+               printf ("\n");
+       }
+
+       last = timenow();
+}
+
+int main (int argc, char **argv)
+{
+        char basedir[128];
+       int  interval = 0;
+
+       if (argc < 2) {
+          fprintf (stderr, "obd type not specified\n");
+          return (1);
+       }
+       
+       snprintf (basedir, sizeof (basedir), "/proc/sys/%s", argv[1]);
+   
+       if (argc > 2)
+               interval = atoi (argv[2]);
+
+       read_bytes = init_one_stat (basedir, "read_bytes");
+       read_reqs = init_one_stat (basedir, "read_reqs");
+       write_bytes = init_one_stat (basedir, "write_bytes");
+       write_reqs = init_one_stat (basedir, "write_reqs");
+       getattr_reqs = init_one_stat (basedir, "getattr_reqs");
+       setattr_reqs = init_one_stat (basedir, "setattr_reqs");
+       create_reqs = init_one_stat (basedir, "create_reqs");
+       destroy_reqs = init_one_stat (basedir, "destroy_reqs");
+       statfs_reqs = init_one_stat (basedir, "statfs_reqs");
+       open_reqs = init_one_stat (basedir, "open_reqs");
+       close_reqs = init_one_stat (basedir, "close_reqs");
+       punch_reqs = init_one_stat (basedir, "punch_reqs");
+
+       do_stat ();
+
+       if (interval == 0)
+               return (0);
+   
+       for (;;) {
+               sleep (interval);
+               do_stat ();
+       }
+}