+TBD
+ * version v0_5_20
+ * bug fixes
+ - Fix ldlm_lock_match on the MDS to avoid matching remote locks (592)
+ - Fix fsfilt_extN_readpage() to read a full page of directory
+ entries, or fake the remainder if PAGE_SIZE != blocksize (500)
+ - Avoid extra mdc_getattr() in ll_intent_lock when possible (534, 604)
+ - Fix imbalanced LOV object allocation and out-of-bound access (469)
+ - Most intent operations were removed, in favour of a new RPC mode
+ that does a single RPC to the server and bypasses most of the VFS
+ - All LDLM resource ID arrays were removed in favour of ldlm_res_id
+ - Aggressively cancel local locks on DLM servers
+ - mds_reint_unlink sends EA to the client if it's the last nlink.
+ client uses that EA to unlink OST objects.
+ - mds_reint_{rename,unlink,link} were rewritten to take ordered locks
+ - recursive symlinks were fixed (439)
+ - fixed NULL deref in DEBUG_REQ
+ - filter_update_lastobjid no longer calls sync, which annoyed extN
+ - fixed multi-client small-writes to a single file problem (445)
+ - fixed mtime updates during file writes (607)
+ - fixed vector writes on obdfilter causing problems when ENOSPC (670)
+ - fixed bug in obd_brw_read/write() (under guise of testing 367)
+ - fixed Linux OST size reporting problem (444, 656)
+ - OST now updates object mtime with writes or setattr (607, 619)
+ - client verifies file size before zeroing page past EOF (445)
+ - OST now writes last allocated objid to disk with allocation (108)
+ - LOV on echo now works (409)
+ * protocol changes
+ - mds_reint_unlink sends a new buffer, with the EA included. this
+ buffer is only valid if body->valid & OBD_MD_FLEASIZE, which is only
+ set if a regular file was being unlinked, and it was the last link
+ - use PtlGet from the target for bulk writes (315)
+ - OST now updates object mtime with writes or setattr (607, 619)
+ - LDLM now has a grant-time callback to revalidate locked items, if
+ necessary (604)
+ - Many MDS operations were reorganized to combat race conditions
+ * other changes
+ - Merge b_intel branch (updated lprocfs code) - now at /proc/fs/lustre
+ - configure check to avoid gcc version 2.96 20000731-2.96-98) (606)
+
2003-01-06 Andreas Dilger <adilger@clusterfs.com>
* version v0_5_19
* bug fixes
rm -f $(top_srcdir)/TAGS
rm -f $(top_srcdir)/tags
find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs etags -a
- find $(top_srcdir) -name '*.[hc]' | xargs etags -a
+ find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs etags -a
find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs ctags -a
- find $(top_srcdir) -name '*.[hc]' | xargs ctags -a
+ find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs ctags -a
AM_CPPFLAGS=-I$(top_builddir)/include
ia64 )
AC_MSG_RESULT($host_cpu)
- KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step'
+ KCFLAGS='-gstabs -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step'
KCPPFLAGS='-D__KERNEL__ -DMODULE'
MOD_LINK=elf64_ia64
;;
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2002 Cluster File Systems, Inc. <info@clusterfs.com>
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define DEBUG_SUBSYSTEM S_COBD
#include <linux/obd_class.h>
#include <linux/obd_cache.h>
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
-
-static int
-cobd_attach (struct obd_device *dev, obd_count len, void *data)
+static int cobd_attach(struct obd_device *dev, obd_count len, void *data)
{
- return (lprocfs_reg_obd (dev, status_var_nm_1, dev));
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_init_vars(&lvars);
+ return lprocfs_obd_attach(dev, lvars.obd_vars);
}
-static int
-cobd_detach (struct obd_device *dev)
+static int cobd_detach(struct obd_device *dev)
{
- return (lprocfs_dereg_obd (dev));
+ return lprocfs_obd_detach(dev);
}
static int
struct cache_obd *cobd = &dev->u.cobd;
struct obd_device *target;
struct obd_device *cache;
+ struct obd_uuid target_uuid;
+ struct obd_uuid cache_uuid;
int rc;
-
+
if (data->ioc_inlbuf1 == NULL ||
data->ioc_inlbuf2 == NULL)
return (-EINVAL);
-
- target = class_uuid2obd (data->ioc_inlbuf1);
- cache = class_uuid2obd (data->ioc_inlbuf2);
+
+ obd_str2uuid(&target_uuid, data->ioc_inlbuf1);
+ target = class_uuid2obd (&target_uuid);
+
+ obd_str2uuid(&cache_uuid, data->ioc_inlbuf2);
+ cache = class_uuid2obd (&cache_uuid);
if (target == NULL ||
cache == NULL)
return (-EINVAL);
-
- /* don't bother checking attached/setup;
- * obd_connect() should, and it can change underneath us */
- rc = obd_connect (&cobd->cobd_target, target, NULL, NULL, NULL);
+ /* don't bother checking attached/setup;
+ * obd_connect() should, and it can change underneath us */
+ rc = obd_connect (&cobd->cobd_target, target, &target_uuid, NULL, NULL);
if (rc != 0)
return (rc);
- rc = obd_connect (&cobd->cobd_cache, cache, NULL, NULL, NULL);
+ rc = obd_connect (&cobd->cobd_cache, cache, &cache_uuid, NULL, NULL);
if (rc != 0)
goto fail_0;
{
struct cache_obd *cobd = &dev->u.cobd;
int rc;
-
+
if (!list_empty (&dev->obd_exports))
return (-EBUSY);
-
+
rc = obd_disconnect (&cobd->cobd_cache);
if (rc != 0)
CERROR ("error %d disconnecting cache\n", rc);
-
+
rc = obd_disconnect (&cobd->cobd_target);
if (rc != 0)
CERROR ("error %d disconnecting target\n", rc);
static int
cobd_connect (struct lustre_handle *conn, struct obd_device *obd,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
int rc = class_connect (conn, obd, cluuid);
cobd_disconnect (struct lustre_handle *conn)
{
int rc = class_disconnect (conn);
-
+
CERROR ("rc %d\n", rc);
return (rc);
}
-static int
+static int
cobd_get_info(struct lustre_handle *conn, obd_count keylen,
void *key, obd_count *vallen, void **val)
{
/* intercept cache utilisation info? */
- return (obd_get_info (&cobd->cobd_target,
+ return (obd_get_info (&cobd->cobd_target,
keylen, key, vallen, val));
}
-static int
+static int
cobd_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
{
struct obd_device *obd = class_conn2obd(conn);
return (obd_statfs (&cobd->cobd_target, osfs));
}
-static int
+static int
cobd_getattr(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *lsm)
{
return (obd_getattr (&cobd->cobd_target, oa, lsm));
}
-static int
+static int
cobd_open(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *lsm)
+ struct lov_stripe_md *lsm, struct obd_trans_info *oti)
{
struct obd_device *obd = class_conn2obd(conn);
struct cache_obd *cobd;
}
cobd = &obd->u.cobd;
- return (obd_open (&cobd->cobd_target, oa, lsm));
+ return (obd_open (&cobd->cobd_target, oa, lsm, oti));
}
-static int
+static int
cobd_close(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *lsm)
+ struct lov_stripe_md *lsm, struct obd_trans_info *oti)
{
struct obd_device *obd = class_conn2obd(conn);
struct cache_obd *cobd;
}
cobd = &obd->u.cobd;
- return (obd_close (&cobd->cobd_target, oa, lsm));
+ return (obd_close (&cobd->cobd_target, oa, lsm, oti));
}
-static int
+static int
cobd_preprw(int cmd, struct lustre_handle *conn,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *nb,
- struct niobuf_local *res, void **desc_private)
+ struct niobuf_local *res, void **desc_private,
+ struct obd_trans_info *oti)
{
struct obd_device *obd = class_conn2obd(conn);
struct cache_obd *cobd;
if ((cmd & OBD_BRW_WRITE) != 0)
return -EOPNOTSUPP;
-
+
cobd = &obd->u.cobd;
- return (obd_preprw (cmd, &cobd->cobd_target,
- objcount, obj,
- niocount, nb,
- res, desc_private));
+ return (obd_preprw (cmd, &cobd->cobd_target,
+ objcount, obj,
+ niocount, nb,
+ res, desc_private, oti));
}
-static int
+static int
cobd_commitrw(int cmd, struct lustre_handle *conn,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_local *local,
- void *desc_private)
+ void *desc_private, struct obd_trans_info *oti)
{
struct obd_device *obd = class_conn2obd(conn);
struct cache_obd *cobd;
if ((cmd & OBD_BRW_WRITE) != 0)
return -EOPNOTSUPP;
-
+
cobd = &obd->u.cobd;
return (obd_commitrw (cmd, &cobd->cobd_target,
objcount, obj,
niocount, local,
- desc_private));
+ desc_private, oti));
}
-static inline int
+static inline int
cobd_brw(int cmd, struct lustre_handle *conn,
struct lov_stripe_md *lsm, obd_count oa_bufs,
- struct brw_page *pga, struct obd_brw_set *set)
+ struct brw_page *pga, struct obd_brw_set *set,
+ struct obd_trans_info *oti)
{
struct obd_device *obd = class_conn2obd(conn);
struct cache_obd *cobd;
if ((cmd & OBD_BRW_WRITE) != 0)
return -EOPNOTSUPP;
-
+
cobd = &obd->u.cobd;
- return (obd_brw (cmd, &cobd->cobd_target,
- lsm, oa_bufs, pga, set));
+ return (obd_brw (cmd, &cobd->cobd_target,
+ lsm, oa_bufs, pga, set, oti));
}
-static int
+static int
cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
void *karg, void *uarg)
{
o_iocontrol: cobd_iocontrol,
};
-static int __init
-cobd_init (void)
+static int __init cobd_init(void)
{
- int rc;
-
- printk (KERN_INFO "Lustre Caching OBD driver\n");
-
- rc = class_register_type (&cobd_ops, status_class_var,
- OBD_CACHE_DEVICENAME);
- return (rc);
+ struct lprocfs_static_vars lvars;
+ ENTRY;
+
+ printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n");
+
+ lprocfs_init_vars(&lvars);
+ RETURN(class_register_type(&cobd_ops, lvars.module_vars,
+ OBD_CACHE_DEVICENAME));
}
-static void __exit
-cobd_exit (void)
+static void __exit cobd_exit(void)
{
- class_unregister_type (OBD_CACHE_DEVICENAME);
+ class_unregister_type(OBD_CACHE_DEVICENAME);
}
-MODULE_AUTHOR("Cluster Filesystems Inc. <info@clusterfs.com>");
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
MODULE_DESCRIPTION("Lustre Caching OBD driver");
MODULE_LICENSE("GPL");
module_init(cobd_init);
module_exit(cobd_exit);
-
-
#include <linux/lustre_lite.h>
#include <linux/lprocfs_status.h>
-/*
- * Common STATUS namespace
- */
-
-static int rd_uuid (char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- struct obd_device* dev = (struct obd_device*)data;
-
- return (snprintf(page, count, "%s\n", dev->obd_uuid));
-}
-
-static int rd_target (char *page, char **start, off_t off, int count,
- int *eof, void *data)
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
+/* Common STATUS namespace */
+static int rd_target(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
struct obd_device *dev = (struct obd_device*)data;
- struct cache_obd *cobd = &dev->u.cobd;
- struct lustre_handle *conn = &cobd->cobd_target;
+ struct lustre_handle *conn = &dev->u.cobd.cobd_target;
struct obd_export *exp;
int rc;
rc = snprintf (page, count, "not set up\n");
else {
exp = class_conn2export (conn);
- LASSERT (exp != NULL);
- rc = snprintf(page, count, "%s\n", exp->exp_obd->obd_uuid);
+ LASSERT(exp != NULL);
+ rc = snprintf(page, count, "%s\n", exp->exp_obd->obd_uuid.uuid);
}
return (rc);
}
int *eof, void *data)
{
struct obd_device *dev = (struct obd_device*)data;
- struct cache_obd *cobd = &dev->u.cobd;
- struct lustre_handle *conn = &cobd->cobd_cache;
+ struct lustre_handle *conn = &dev->u.cobd.cobd_cache;
struct obd_export *exp;
int rc;
else {
exp = class_conn2export (conn);
LASSERT (exp != NULL);
- rc = snprintf(page, count, "%s\n", exp->exp_obd->obd_uuid);
+ rc = snprintf(page, count, "%s\n", exp->exp_obd->obd_uuid.uuid);
}
return (rc);
}
-struct lprocfs_vars status_var_nm_1[] = {
- {"status/uuid", rd_uuid, 0, 0},
- {"status/target_uuid", rd_target, 0, 0},
- {"status/cache_uuid", rd_cache, 0, 0},
- {0}
+struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "target_uuid", rd_target, 0, 0 },
+ { "cache_uuid", rd_cache, 0, 0 },
+ { 0 }
};
-int rd_numrefs(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- struct obd_type* class = (struct obd_type*)data;
-
- return (snprintf(page, count, "%d\n", class->typ_refcnt));
-}
-
-struct lprocfs_vars status_class_var[] = {
- {"status/num_refs", rd_numrefs, 0, 0},
- {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+ { 0 }
};
+#endif /* LPROCFS */
+
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-EXTRA_DIST = lustre2ldif.xsl lustre.dtd lustre.schema slapd-lustre.conf
+EXTRA_DIST = lustre.dtd lustre.schema slapd-lustre.conf lustre2ldif.xsl top.ldif
ldapconfdir = $(sysconfdir)/openldap
ldapschemadir = $(sysconfdir)/openldap/schema
ldapconf_SCRIPTS = slapd-lustre.conf
ldapschema_SCRIPTS = lustre.schema
+pkglibdir = '${exec_prefix}/usr/lib/$(PACKAGE)'
+pkglib_DATA = top.ldif lustre2ldif.xsl
include $(top_srcdir)/Rules
mdsRef: <value-of select="@uuidref"/>
</template>
+<template match="mdsdev_ref">
+mdsdevRef: <value-of select="@uuidref"/>
+</template>
+
<template match="mountpoint_ref">
mountpointRef: <value-of select="@uuidref"/>
</template>
AC_INIT
AC_CANONICAL_SYSTEM
-# Copyright (C) 2001 Cluster File Systems, Inc.
+# Copyright (C) 2001-2003 Cluster File Systems, Inc.
#
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
# Automake variables. Steal the version number from lustre.spec.in.
AM_INIT_AUTOMAKE(lustre, builtin([esyscmd], [sed -ne '/^%define version /{ s/.*version //; p; q; }' scripts/lustre.spec.in]))
#AM_MAINTAINER_MODE
+
AC_PROG_CC
+AC_MSG_CHECKING(for buggy compiler)
+CC_VERSION=`$CC -v 2>&1 | grep "^gcc version"`
+bad_cc() {
+ echo
+ echo " '$CC_VERSION'"
+ echo " has been known to generate bad code, "
+ echo " please get an updated compiler."
+ AC_MSG_ERROR(sorry)
+}
+case "$CC_VERSION" in
+ # ost_pack_niobuf putting 64bit NTOH temporaries on the stack
+ # without "sub $0xc,%esp" to protect the stack from being
+ # stomped on by interrupts (bug 606)
+ "gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-98)")
+ bad_cc
+ ;;
+ *)
+ AC_MSG_RESULT(no known problems)
+ ;;
+esac
+
AC_PROG_RANLIB
#
EXTNP = htree-ext3-2.4.18.diff linux-2.4.18ea-0.8.26.diff
EXTNP+= ext3-2.4.18-ino_sb_macro.diff extN-misc-fixup.diff extN-noread.diff
EXTNP+= extN-wantedi.diff
+#EXTNP+= extN-iget-debug.diff
EXTNC = balloc.c bitmap.c dir.c file.c fsync.c ialloc.c inode.c ioctl.c
EXTNC+= namei.c super.c symlink.c
EXTNI = extN_fs.h extN_fs_i.h extN_fs_sb.h extN_jbd.h quotaops.h
list='$(EXTN_EXTRA)'; for f in $$list; do $(RM) $(top_builddir)/$$f; done
if [ -f $(srcdir)/extN.patch-$(RELEASE) ]; then \
echo "applying patch $(srcdir)/extN.patch-$(RELEASE)"; \
- (cd $(top_builddir) && patch -p0) < $(srcdir)/extN.patch-$(RELEASE); \
+ (cd $(top_builddir) && patch -p0) < $(srcdir)/extN.patch-$(RELEASE);\
else \
- echo "If first patch fails, read NOTE in extN/Makefile.am"; \
list='$(EXTNP)'; \
- sed '/i_version/q' $(extN_orig)/namei.c | tail -2 | \
- grep extN_mark_inode_dirty >/dev/null && list="$(EXTN_FIXES) $$list"; \
+ grep -q "err = extN_mark_inode_dirty" $(extN_orig)/namei.c || \
+ list="ext3-use-after-free.diff $$list"; \
+ sed '/i_version/q' $(extN_orig)/namei.c | tail -2 | \
+ grep -q extN_mark_inode_dirty && list="$(EXTN_FIXES) $$list"; \
+ grep -q "if (do_sync_supers)" $(extN_orig)/super.c && \
+ list="ext3-unmount_sync.diff $$list"; \
for p in $$list; do \
echo "applying patch $$p"; \
sed $(SUB) $(srcdir)/$$p | \
- (cd $(top_builddir) && patch -p1) || exit $$?; \
+ (cd $(top_builddir) && patch -p1) || exit $$?; \
done; \
- echo "It is OK if the next patch says it is already applied"; \
+ echo "It is OK if the next patch says it is skipping this patch"; \
echo "applying patch $(srcdir)/extN-2.4.18-exports.diff"; \
(cd $(top_builddir) && \
patch -N -p1) < $(srcdir)/extN-2.4.18-exports.diff; \
--- /dev/null
+From adilger@clusterfs.com Mon Dec 2 10:26:44 2002
+Date: Mon, 2 Dec 2002 10:26:44 -0700
+From: Andreas Dilger <adilger@clusterfs.com>
+To: Lustre LLNL Mailing list <lc-lustre@llnl.gov>,
+ Lustre Development Mailing List <lustre-devel@lists.sourceforge.net>
+Subject: Re: data corrupting bug in 2.4.20 ext3, data=journal
+Message-ID: <20021202102644.H1422@schatzie.adilger.int>
+Mail-Followup-To: Lustre LLNL Mailing list <lc-lustre@llnl.gov>,
+ Lustre Development Mailing List <lustre-devel@lists.sourceforge.net>
+Mime-Version: 1.0
+Content-Type: text/plain; charset=us-ascii
+Content-Disposition: inline
+User-Agent: Mutt/1.2.5.1i
+X-GPG-Key: 1024D/0D35BED6
+X-GPG-Fingerprint: 7A37 5D79 BF1B CECA D44F 8A29 A488 39F5 0D35 BED6
+Status: RO
+Content-Length: 1160
+Lines: 39
+
+Here is the new-improved fix for the ext3 discarding data at umount bug
+discovered late last week. To be used instead of the previous ext3 fix.
+
+Sadly, this is completely unrelated to the problems Mike is having with
+ext3 under UML, since it is an unmount-time problem.
+
+----- Forwarded message from "Stephen C. Tweedie" <sct@redhat.com> -----
+The attached patch seems to fix things for me.
+
+Cheers,
+ Stephen
+
+
+--- linux-2.4-ext3merge/fs/ext3/super.c.=K0027=.orig 2002-12-02 15:35:13.000000000 +0000
++++ linux-2.4-ext3merge/fs/ext3/super.c 2002-12-02 15:35:14.000000000 +0000
+@@ -1640,7 +1640,12 @@
+ sb->s_dirt = 0;
+ target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
+
+- if (do_sync_supers) {
++ /*
++ * Tricky --- if we are unmounting, the write really does need
++ * to be synchronous. We can detect that by looking for NULL in
++ * sb->s_root.
++ */
++ if (do_sync_supers || !sb->s_root) {
+ unlock_super(sb);
+ log_wait_commit(EXT3_SB(sb)->s_journal, target);
+ lock_super(sb);
+
+
+----- End forwarded message -----
+
+Cheers, Andreas
+--
+Andreas Dilger
+http://sourceforge.net/projects/ext2resize/
+http://www-mddsp.enel.ucalgary.ca/People/adilger/
+
+
--- /dev/null
+
+
+If ext3_add_nondir() fails it will do an iput() of the inode. But we
+continue to run ext3_mark_inode_dirty() against the potentially-freed
+inode. This oopses when slab poisoning is enabled.
+
+Fix it so that we only run ext3_mark_inode_dirty() if the inode was
+successfully instantiated.
+
+This bug was added in 2.4.20-pre9.
+
+
+ fs/ext3/namei.c | 11 +++++------
+ 1 files changed, 5 insertions(+), 6 deletions(-)
+
+--- 24/fs/ext3/namei.c~ext3-use-after-free Sun Dec 15 11:27:50 2002
++++ 24-akpm/fs/ext3/namei.c Sun Dec 15 11:27:50 2002
+@@ -429,8 +429,11 @@ static int ext3_add_nondir(handle_t *han
+ {
+ int err = ext3_add_entry(handle, dentry, inode);
+ if (!err) {
+- d_instantiate(dentry, inode);
+- return 0;
++ err = ext3_mark_inode_dirty(handle, inode);
++ if (err == 0) {
++ d_instantiate(dentry, inode);
++ return 0;
++ }
+ }
+ ext3_dec_count(handle, inode);
+ iput(inode);
+@@ -465,7 +468,6 @@ static int ext3_create (struct inode * d
+ inode->i_fop = &ext3_file_operations;
+ inode->i_mapping->a_ops = &ext3_aops;
+ err = ext3_add_nondir(handle, dentry, inode);
+- ext3_mark_inode_dirty(handle, inode);
+ }
+ ext3_journal_stop(handle, dir);
+ return err;
+@@ -490,7 +492,6 @@ static int ext3_mknod (struct inode * di
+ if (!IS_ERR(inode)) {
+ init_special_inode(inode, mode, rdev);
+ err = ext3_add_nondir(handle, dentry, inode);
+- ext3_mark_inode_dirty(handle, inode);
+ }
+ ext3_journal_stop(handle, dir);
+ return err;
+@@ -934,7 +935,6 @@ static int ext3_symlink (struct inode *
+ }
+ inode->u.ext3_i.i_disksize = inode->i_size;
+ err = ext3_add_nondir(handle, dentry, inode);
+- ext3_mark_inode_dirty(handle, inode);
+ out_stop:
+ ext3_journal_stop(handle, dir);
+ return err;
+@@ -971,7 +971,6 @@ static int ext3_link (struct dentry * ol
+ atomic_inc(&inode->i_count);
+
+ err = ext3_add_nondir(handle, dentry, inode);
+- ext3_mark_inode_dirty(handle, inode);
+ ext3_journal_stop(handle, dir);
+ return err;
+ }
+
+_
--- /dev/null
+--- linux/fs/ext3/namei.c.orig Thu Jan 30 01:15:13 2003
++++ linux/fs/ext3/namei.c Sat Feb 1 00:33:46 2003
+@@ -710,6 +710,24 @@
+ return ret;
+ }
+
++static int extN_find_inode(struct inode *inode, unsigned long ino,
++ void *opaque)
++{
++ const char *name = NULL;
++ int len = 0;
++
++ if (opaque) {
++ struct dentry *dentry = opaque;
++ name = dentry->d_name.name;
++ len = dentry->d_name.len;
++ }
++ printk(KERN_INFO "finding inode %s:%lu (%p) count %d (%p = %*s)\n",
++ kdevname(inode->i_dev), ino, inode, atomic_read(&inode->i_count),
++ opaque, len, name ? name : "");
++
++ return 1;
++}
++
+ static struct dentry *extN_lookup(struct inode * dir, struct dentry *dentry)
+ {
+ struct inode * inode;
+@@ -724,7 +742,7 @@
+ if (bh) {
+ unsigned long ino = le32_to_cpu(de->inode);
+ brelse (bh);
+- inode = iget(dir->i_sb, ino);
++ inode = iget4(dir->i_sb, ino, extN_find_inode, dentry);
+
+ if (!inode)
+ return ERR_PTR(-EACCES);
+--- linux/fs/ext3/inode.c.orig Thu Jan 30 01:15:13 2003
++++ linux/fs/ext3/inode.c Sat Feb 1 00:34:45 2003
+@@ -166,6 +166,9 @@
+ */
+ void extN_put_inode (struct inode * inode)
+ {
++ printk(KERN_INFO "putting inode %s:%lu (%p) count %d\n",
++ kdevname(inode->i_dev), inode->i_ino, inode,
++ atomic_read(&inode->i_count));
+ extN_discard_prealloc (inode);
+ }
+
goto out_journal;
}
EXTN_SB(sb)->journal_bdev = bdev;
+@@ -1560,6 +1560,7 @@
+ unlock_kernel();
+ return ret;
+ }
++EXPORT_SYMBOL(extN_force_commit); /* here to avoid potential patch collisions */
+
+ /*
+ * Ext3 always journals updates to the superblock itself, so we don't
+ if (err) goto fail;
+
+ if (extN_set_bit(j, bh->b_data)) {
-+ printk(KERN_ERR "goal inode %lu unavailable", goal);
++ printk(KERN_ERR "goal inode %lu unavailable\n", goal);
+ /* Oh well, we tried. */
+ goto repeat;
+ }
static struct buffer_head * ext3_find_entry (struct dentry *dentry,
struct ext3_dir_entry_2 ** res_dir)
{
-@@ -119,10 +564,76 @@
+@@ -119,10 +564,70 @@
int num = 0;
int nblocks, i, err;
struct inode *dir = dentry->d_parent->d_inode;
-+ int namelen;
-+ const u8 *name;
-+ unsigned blocksize;
+ ext3_dirent *de, *top;
*res_dir = NULL;
sb = dir->i_sb;
-+ blocksize = sb->s_blocksize;
-+ namelen = dentry->d_name.len;
-+ name = dentry->d_name.name;
-+ if (namelen > EXT3_NAME_LEN)
++ if (dentry->d_name.len > EXT3_NAME_LEN)
+ return NULL;
+ if (ext3_dx && is_dx(dir)) {
-+ u32 hash = dx_hash (name, namelen);
++ u32 hash = dx_hash(dentry->d_name.name, dentry->d_name.len);
+ struct dx_frame frames[2], *frame;
+ if (!(frame = dx_probe (dir, hash, frames)))
+ return NULL;
+ if (!(bh = ext3_bread (NULL,dir, block, 0, &err)))
+ goto dxfail;
+ de = (ext3_dirent *) bh->b_data;
-+ top = (ext3_dirent *) ((char *) de + blocksize -
++ top = (ext3_dirent *) ((char *) de + sb->s_blocksize -
+ EXT3_DIR_REC_LEN(0));
+ for (; de < top; de = ext3_next_entry(de))
-+ if (ext3_match (namelen, name, de)) {
++ if (ext3_match(dentry->d_name.len, dentry->d_name.name, de)) {
+ if (!ext3_check_dir_entry("ext3_find_entry",
+ dir, de, bh,
+ (block<<EXT3_BLOCK_SIZE_BITS(sb))
nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
start = dir->u.ext3_i.i_dir_start_lookup;
if (start >= nblocks)
-@@ -237,6 +748,92 @@
+@@ -237,6 +748,90 @@
de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
}
+ struct buffer_head **bh,struct dx_frame *frame,
+ u32 hash, int *error)
+{
-+ unsigned blocksize = dir->i_sb->s_blocksize;
-+ unsigned count, continued;
++ unsigned count;
+ struct buffer_head *bh2;
+ u32 newblock;
-+ unsigned MAX_DX_MAP = PAGE_CACHE_SIZE/EXT3_DIR_REC_LEN(1) + 1;
+ u32 hash2;
+ struct dx_map_entry *map;
+ char *data1 = (*bh)->b_data, *data2, *data3;
+
+ data2 = bh2->b_data;
+
-+ map = kmalloc(sizeof(*map) * MAX_DX_MAP, GFP_KERNEL);
++ map = kmalloc(sizeof(*map) * PAGE_CACHE_SIZE/EXT3_DIR_REC_LEN(1) + 1,
++ GFP_KERNEL);
+ if (!map)
+ panic("no memory for do_split\n");
-+ count = dx_make_map ((ext3_dirent *) data1, blocksize, map);
++ count = dx_make_map((ext3_dirent *)data1, dir->i_sb->s_blocksize, map);
+ split = count/2; // need to adjust to actual middle
+ dx_sort_map (map, count);
+ hash2 = map[split].hash;
-+ continued = hash2 == map[split - 1].hash;
+ dxtrace(printk("Split block %i at %x, %i/%i\n",
+ dx_get_block(frame->at), hash2, split, count-split));
+
+ de = dx_copy_dirents (data1, data3, map, split);
+ memcpy(data1, data3, (char *) de + de->rec_len - data3);
+ de = (ext3_dirent *) ((char *) de - data3 + data1); // relocate de
-+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
-+ de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
-+ dxtrace(dx_show_leaf ((ext3_dirent *) data1, blocksize, 1));
-+ dxtrace(dx_show_leaf ((ext3_dirent *) data2, blocksize, 1));
++ de->rec_len = cpu_to_le16(data1 + dir->i_sb->s_blocksize - (char *)de);
++ de2->rec_len = cpu_to_le16(data2 + dir->i_sb->s_blocksize-(char *)de2);
++ dxtrace(dx_show_leaf((ext3_dirent *)data1, dir->i_sb->s_blocksize, 1));
++ dxtrace(dx_show_leaf((ext3_dirent *)data2, dir->i_sb->s_blocksize, 1));
+
+ /* Which block gets the new entry? */
+ if (hash >= hash2)
+ swap(*bh, bh2);
+ de = de2;
+ }
-+ dx_insert_block (frame, hash2 + continued, newblock);
++ dx_insert_block(frame, hash2 + (hash2 == map[split-1].hash), newblock);
+ ext3_journal_dirty_metadata (handle, bh2);
+ brelse (bh2);
+ ext3_journal_dirty_metadata (handle, frame->bh);
/*
* ext3_add_entry()
*
-@@ -251,6 +844,7 @@
- /*
- * AKPM: the journalling code here looks wrong on the error paths
- */
-+
- static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
+@@ -255,118 +849,278 @@
struct inode *inode)
{
-@@ -258,117 +852,281 @@
- const char *name = dentry->d_name.name;
- int namelen = dentry->d_name.len;
+ struct inode *dir = dentry->d_parent->d_inode;
+- const char *name = dentry->d_name.name;
+- int namelen = dentry->d_name.len;
unsigned long offset;
- unsigned short rec_len;
struct buffer_head * bh;
+ ext3_dirent *de;
+ struct super_block * sb = dir->i_sb;
int retval;
-+ unsigned short reclen = EXT3_DIR_REC_LEN(namelen);
++ unsigned short reclen = EXT3_DIR_REC_LEN(dentry->d_name.len);
- sb = dir->i_sb;
-+ unsigned blocksize = sb->s_blocksize;
+ unsigned nlen, rlen;
+ u32 block, blocks;
+ char *top;
- if (!namelen)
+- if (!namelen)
++ if (!dentry->d_name.len)
return -EINVAL;
- bh = ext3_bread (handle, dir, 0, 0, &retval);
- if (!bh)
+ u32 hash;
+ char *data1;
+
-+ hash = dx_hash(name, namelen);
++ hash = dx_hash(dentry->d_name.name, dentry->d_name.len);
+ /* FIXME: do something if dx_probe() fails here */
+ frame = dx_probe(dir, hash, frames);
+ entries = frame->entries;
+
+ data1 = bh->b_data;
+ de = (ext3_dirent *) data1;
-+ top = data1 + (0? 200: blocksize);
++ top = data1 + (0? 200: sb->s_blocksize);
+ while ((char *) de < top)
+ {
+ /* FIXME: check EEXIST and dir */
+ goto dxfail2;
+ node2 = (struct dx_node *)(bh2->b_data);
+ entries2 = node2->entries;
-+ node2->fake.rec_len = cpu_to_le16(blocksize);
++ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+ node2->fake.inode = 0;
+ BUFFER_TRACE(frame->bh, "get_write_access");
+ ext3_journal_get_write_access(handle, frame->bh);
+ if(!bh)
+ return retval;
+ de = (ext3_dirent *)bh->b_data;
-+ top = bh->b_data + blocksize - reclen;
++ top = bh->b_data + sb->s_blocksize - reclen;
+ while ((char *) de <= top) {
+ if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
+ bh, offset)) {
+ brelse (bh);
+ return -EIO;
+ }
-+ if (ext3_match (namelen, name, de)) {
++ if (ext3_match(dentry->d_name.len,dentry->d_name.name,de)) {
brelse (bh);
return -EEXIST;
- }
- ext3_journal_dirty_metadata(handle, bh);
+ nlen = EXT3_DIR_REC_LEN(de->name_len);
+ rlen = le16_to_cpu(de->rec_len);
-+ if ((de->inode? rlen - nlen: rlen) >= reclen)
++ if ((de->inode ? rlen - nlen: rlen) >= reclen)
+ goto add;
+ de = (ext3_dirent *)((char *)de + rlen);
+ offset += rlen;
+ return retval;
+ de = (ext3_dirent *) bh->b_data;
+ de->inode = 0;
-+ de->rec_len = cpu_to_le16(rlen = blocksize);
++ de->rec_len = cpu_to_le16(rlen = sb->s_blocksize);
+ nlen = 0;
+ goto add;
+
+ ext3_set_de_type(dir->i_sb, de, inode->i_mode);
+ } else
+ de->inode = 0;
-+ de->name_len = namelen;
-+ memcpy (de->name, name, namelen);
++ de->name_len = dentry->d_name.len;
++ memcpy (de->name, dentry->d_name.name, dentry->d_name.len);
+ /*
+ * XXX shouldn't update any times until successful
+ * completion of syscall, but too many callers depend
+
+ /* The 0th block becomes the root, move the dirents out */
+ de = (ext3_dirent *) &root->info;
-+ len = ((char *) root) + blocksize - (char *) de;
++ len = ((char *) root) + sb->s_blocksize - (char *) de;
+ memcpy (data1, de, len);
+ de = (ext3_dirent *) data1;
+ top = data1 + len;
+ while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top)
+ de = de2;
-+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
++ de->rec_len = cpu_to_le16(data1 + sb->s_blocksize - (char *)de);
+ /* Initialize the root; the dot dirents already exist */
+ de = (ext3_dirent *) (&root->dotdot);
-+ de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
++ de->rec_len = cpu_to_le16(sb->s_blocksize-EXT3_DIR_REC_LEN(2));
+ memset (&root->info, 0, sizeof(root->info));
+ root->info.info_length = sizeof(root->info);
+ entries = root->entries;
+ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
+
+ /* Initialize as for dx_probe */
-+ hash = dx_hash (name, namelen);
++ hash = dx_hash (dentry->d_name.name, dentry->d_name.len);
+ frame = frames;
+ frame->entries = entries;
+ frame->at = entries;
+ return -ENOENT;
}
-+
/*
- * ext3_delete_entry deletes a directory entry by merging it with the
- * previous entry
@@ -451,7 +1212,8 @@
struct inode * inode;
int err;
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
-@@ -465,6 +466,8 @@
- inode->i_fop = &extN_file_operations;
- inode->i_mapping->a_ops = &ext3_aops;
- err = ext3_add_nondir(handle, dentry, inode);
-+ if (err)
-+ ext3_xattr_drop_inode(handle, inode);
- ext3_mark_inode_dirty(handle, inode);
- }
- ext3_journal_stop(handle, dir);
-@@ -490,6 +493,8 @@
- if (!IS_ERR(inode)) {
- init_special_inode(inode, mode, rdev);
- err = ext3_add_nondir(handle, dentry, inode);
-+ if (err)
-+ ext3_xattr_drop_inode(handle, inode);
- ext3_mark_inode_dirty(handle, inode);
+@@ -435,6 +435,7 @@ static int ext3_add_nondir(handle_t *han
+ return 0;
+ }
}
- ext3_journal_stop(handle, dir);
++ ext3_xattr_drop_inode(handle, inode);
+ ext3_dec_count(handle, inode);
+ iput(inode);
+ return err;
@@ -514,7 +519,7 @@
if (IS_SYNC(dir))
handle->h_sync = 1;
ext3_mark_inode_dirty(handle, inode);
err = ext3_add_entry (handle, dentry, inode);
if (err)
-@@ -565,6 +566,7 @@
- return err;
-
- out_no_entry:
-+ ext3_xattr_drop_inode(handle, inode);
- inode->i_nlink = 0;
- ext3_mark_inode_dirty(handle, inode);
- iput (inode);
@@ -917,5 +919,5 @@
goto out_stop;
#ifndef _LPROCFS_SNMP_H
#define _LPROCFS_SNMP_H
-
-#ifndef LPROC_SNMP
-#define LPROC_SNMP
-#endif
-
+#include <linux/autoconf.h>
#include <linux/proc_fs.h>
-typedef enum {
- E_LPROC_OK = 0
-} lproc_error_t;
-
-struct lprocfs_vars{
+#ifndef LPROCFS
+#ifdef CONFIG_PROC_FS /* Ensure that /proc is configured */
+#define LPROCFS
+#endif
+#endif
- char* name;
- read_proc_t* read_fptr;
- write_proc_t* write_fptr;
- void* data;
+struct lprocfs_vars {
+ char *name;
+ read_proc_t *read_fptr;
+ write_proc_t *write_fptr;
+ void *data;
};
-#ifdef LPROC_SNMP
-
-struct proc_dir_entry* lprocfs_mkdir(const char *dname,
- struct proc_dir_entry *parent);
-struct proc_dir_entry* lprocfs_srch(struct proc_dir_entry *head,
- const char *name);
-void lprocfs_remove_all(struct proc_dir_entry *root);
-struct proc_dir_entry* lprocfs_new_dir(struct proc_dir_entry *root,
- const char *string,
- const char *tok);
-int lprocfs_new_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
- const char *tok, void *data);
-
-int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *var,
- void *data);
-int lprocfs_reg_obd(struct obd_device *device, struct lprocfs_vars *list,
- void *data);
-int lprocfs_dereg_obd(struct obd_device *device);
-struct proc_dir_entry* lprocfs_reg_mnt(char *mnt_name);
-int lprocfs_dereg_mnt(struct proc_dir_entry *root);
-
-int lprocfs_reg_class(struct obd_type *type, struct lprocfs_vars *list,
- void *data);
-int lprocfs_dereg_class(struct obd_type *class);
-int lprocfs_reg_main(void);
-int lprocfs_dereg_main(void);
-int lprocfs_ll_rd(char *page, char **start, off_t off, int count, int *eof,
- void *data);
-#else
-
-
-static inline int lprocfs_add_vars(struct proc_dir_entry *root,
- struct lprocfs_vars *var, void *data)
-{
- return 0;
-}
-
-static inline int lprocfs_reg_obd(struct obd_device* device,
- struct lprocfs_vars* list, void* data)
-{
- return 0;
-}
-
-static inline int lprocfs_dereg_obd(struct obd_device* device)
-{
- return 0;
-}
-
-static inline struct proc_dir_entry* lprocfs_reg_mnt(char *name)
-{
- return NULL;
-}
-
-static inline int lprocfs_dereg_mnt(struct proc_dir_entry* root)
-{
- return 0;
-}
-
-static inline int lprocfs_reg_class(struct obd_type* type,
- struct lprocfs_vars* list, void* data)
-{
- return 0;
-}
-
-static inline int lprocfs_dereg_class(struct obd_type* class)
-{
- return 0;
-}
+struct lprocfs_static_vars {
+ struct lprocfs_vars *module_vars;
+ struct lprocfs_vars *obd_vars;
+};
-static inline int lprocfs_reg_main(void)
-{
- return 0;
+/* class_obd.c */
+extern struct proc_dir_entry *proc_lustre_root;
+
+extern void lprocfs_init_vars(struct lprocfs_static_vars *var);
+extern void lprocfs_init_multi_vars(unsigned int idx,
+ struct lprocfs_static_vars *var);
+
+#define LPROCFS_INIT_MULTI_VARS(array, size) \
+void lprocfs_init_multi_vars(unsigned int idx, \
+ struct lprocfs_static_vars *x) \
+{ \
+ struct lprocfs_static_vars *glob = (struct lprocfs_static_vars*)array; \
+ LASSERT(glob != 0); \
+ LASSERT(idx < (unsigned int)(size)); \
+ x->module_vars = glob[idx].module_vars; \
+ x->obd_vars = glob[idx].obd_vars; \
+} \
+
+#define LPROCFS_INIT_VARS(vclass, vinstance) \
+void lprocfs_init_vars(struct lprocfs_static_vars *x) \
+{ \
+ x->module_vars = vclass; \
+ x->obd_vars = vinstance; \
+} \
+
+#ifdef LPROCFS
+/* lprocfs_status.c */
+extern int lprocfs_add_vars(struct proc_dir_entry *root,
+ struct lprocfs_vars *var,
+ void *data);
+
+extern struct proc_dir_entry *lprocfs_register(const char *name,
+ struct proc_dir_entry *parent,
+ struct lprocfs_vars *list,
+ void *data);
+
+extern void lprocfs_remove(struct proc_dir_entry *root);
+
+struct obd_device;
+extern int lprocfs_obd_attach(struct obd_device *dev, struct lprocfs_vars *list);
+extern int lprocfs_obd_detach(struct obd_device *dev);
+
+/* Generic callbacks */
+
+extern int lprocfs_rd_u64(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+extern int lprocfs_rd_uuid(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+extern int lprocfs_rd_name(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+extern int lprocfs_rd_numrefs(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+
+/* Statfs helpers */
+struct statfs;
+extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
+ int count, int *eof, struct statfs *sfs);
+extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
+ int count, int *eof, struct statfs *sfs);
+extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
+ int count, int *eof, struct statfs *sfs);
+extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
+ int count, int *eof, struct statfs *sfs);
+extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
+ int count, int *eof, struct statfs *sfs);
+extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
+ int count, int *eof, struct statfs *sfs);
+
+#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct) \
+int fct_name(char *page, char **start, off_t off, \
+ int count, int *eof, void *data) \
+{ \
+ struct statfs sfs; \
+ int rc = get_statfs_fct((struct obd_device*)data, &sfs); \
+ return (rc==0 \
+ ? lprocfs_##fct_name (page, start, off, count, eof, &sfs) \
+ : rc); \
}
-static inline int lprocfs_dereg_main(void)
-{
- return 0;
-}
+#else
-static inline int lprocfs_ll_rd(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- return 0;
-}
-#endif /* LPROC_SNMP */
+static inline struct proc_dir_entry *
+lprocfs_register(const char *name, struct proc_dir_entry *parent,
+ struct lprocfs_vars *list, void *data) { return NULL; }
+static inline int lprocfs_add_vars(struct proc_dir_entry *root,
+ struct lprocfs_vars *var,
+ void *data) { return 0; }
+static inline void lprocfs_remove(struct proc_dir_entry *root) {};
+struct obd_device;
+static inline int lprocfs_obd_attach(struct obd_device *dev,
+ struct lprocfs_vars *list) { return 0; }
+static inline int lprocfs_obd_detach(struct obd_device *dev) { return 0; }
+static inline int lprocfs_rd_u64(char *page, char **start, off_t off,
+ int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_uuid(char *page, char **start, off_t off,
+ int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_name(char *page, char **start, off_t off,
+ int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
+ int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
+ int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off,
+ int count, int *eof, void *data) { return 0; }
+
+/* Statfs helpers */
+struct statfs;
+static inline
+int lprocfs_rd_blksize(char *page, char **start, off_t off,
+ int count, int *eof, struct statfs *sfs) { return 0; }
+static inline
+int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
+ int count, int *eof, struct statfs *sfs) { return 0; }
+static inline
+int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
+ int count, int *eof, struct statfs *sfs) { return 0; }
+static inline
+int lprocfs_rd_filestotal(char *page, char **start, off_t off,
+ int count, int *eof, struct statfs *sfs) { return 0; }
+static inline
+int lprocfs_rd_filesfree(char *page, char **start, off_t off,
+ int count, int *eof, struct statfs *sfs) { return 0; }
+static inline
+int lprocfs_rd_filegroups(char *page, char **start, off_t off,
+ int count, int *eof, struct statfs *sfs) { return 0; }
+
+#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct) \
+int fct_name(char *page, char **start, off_t off, \
+ int count, int *eof, void *data) { *eof = 1; return 0; }
+
+#endif /* LPROCFS */
#endif /* LPROCFS_SNMP_H */
ELDLM_LOCK_CHANGED = 300,
ELDLM_LOCK_ABORTED = 301,
+ ELDLM_LOCK_REPLACED = 302,
ELDLM_NAMESPACE_EXISTS = 400,
ELDLM_BAD_NAMESPACE = 401
#define LDLM_FL_NO_CALLBACK (1 << 11) /* see ldlm_cli_cancel_unused */
#define LDLM_FL_HAS_INTENT (1 << 12) /* lock request has intent */
#define LDLM_FL_CANCELING (1 << 13) /* lock cancel has already been sent */
+#define LDLM_FL_LOCAL (1 << 14) // a local lock (ie, no srv/cli split)
/* The blocking callback is overloaded to perform two functions. These flags
* indicate which operation should be performed. */
typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock,
struct ldlm_lock_desc *new, void *data,
- __u32 data_len, int flag);
-
-typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, int flags);
+ int flag);
+typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, int flags, void *data);
+typedef int (*ldlm_granted_callback)(struct ldlm_lock *,
+ struct lustre_msg *, int offset);
struct ldlm_lock {
struct portals_handle l_handle; // must be first in the structure
ldlm_completion_callback l_completion_ast;
ldlm_blocking_callback l_blocking_ast;
+ ldlm_granted_callback l_granted_cb;
struct obd_export *l_export;
struct lustre_handle *l_connh;
__u32 l_flags;
struct lustre_handle l_remote_handle;
void *l_data;
- __u32 l_data_len;
+ void *l_cp_data;
struct ldlm_extent l_extent;
__u32 l_version[RES_VERSION_SIZE];
};
typedef int (*ldlm_res_compat)(struct ldlm_lock *child, struct ldlm_lock *new);
-typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock *,
+typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **,
void *req_cookie, ldlm_mode_t mode, int flags,
void *data);
ldlm_mode_t lr_most_restr;
__u32 lr_type; /* LDLM_PLAIN or LDLM_EXTENT */
struct ldlm_resource *lr_root;
- __u64 lr_name[RES_NAME_SIZE];
+ struct ldlm_res_id lr_name;
__u32 lr_version[RES_VERSION_SIZE];
atomic_t lr_refcount;
+
+ /* lr_tmp holds a list head temporarily, during the building of a work
+ * queue. see ldlm_add_ast_work_item and ldlm_run_ast_work */
void *lr_tmp;
};
struct obd_import led_import;
};
-static inline struct ldlm_extent *ldlm_res2extent(struct ldlm_resource *res)
-{
- return (struct ldlm_extent *)(res->lr_name);
-}
-
extern struct obd_ops ldlm_obd_ops;
extern char *ldlm_lockname[];
extern char *ldlm_typename[];
extern char *ldlm_it2str(int it);
-#define LDLM_DEBUG(lock, format, a...) \
+#define __LDLM_DEBUG(level, lock, format, a...) \
do { \
if (lock->l_resource == NULL) { \
- CDEBUG(D_DLMTRACE, "### " format \
+ CDEBUG(level, "### " format \
" ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\
"res: \?\? rrc=\?\? type: \?\?\? remote: "LPX64")\n" \
, ## a, lock, lock->l_handle.h_cookie, \
break; \
} \
if (lock->l_resource->lr_type == LDLM_EXTENT) { \
- CDEBUG(D_DLMTRACE, "### " format \
+ CDEBUG(level, "### " format \
" ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \
"res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64\
"] remote: "LPX64"\n" , ## a, \
lock->l_readers, lock->l_writers, \
ldlm_lockname[lock->l_granted_mode], \
ldlm_lockname[lock->l_req_mode], \
- lock->l_resource->lr_name[0], \
- lock->l_resource->lr_name[1], \
+ lock->l_resource->lr_name.name[0], \
+ lock->l_resource->lr_name.name[1], \
atomic_read(&lock->l_resource->lr_refcount), \
ldlm_typename[lock->l_resource->lr_type], \
lock->l_extent.start, lock->l_extent.end, \
break; \
} \
{ \
- CDEBUG(D_DLMTRACE, "### " format \
+ CDEBUG(level, "### " format \
" ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \
"res: "LPU64"/"LPU64" rrc: %d type: %s remote: "LPX64 \
"\n" , ## a, lock->l_resource->lr_namespace->ns_name, \
lock->l_readers, lock->l_writers, \
ldlm_lockname[lock->l_granted_mode], \
ldlm_lockname[lock->l_req_mode], \
- lock->l_resource->lr_name[0], \
- lock->l_resource->lr_name[1], \
+ lock->l_resource->lr_name.name[0], \
+ lock->l_resource->lr_name.name[1], \
atomic_read(&lock->l_resource->lr_refcount), \
ldlm_typename[lock->l_resource->lr_type], \
lock->l_remote_handle.cookie); \
} \
} while (0)
+#define LDLM_DEBUG(lock, format, a...) __LDLM_DEBUG(D_DLMTRACE, lock, format, a)
+#define LDLM_ERROR(lock, format, a...) __LDLM_DEBUG(D_ERROR, lock, format, a)
+
#define LDLM_DEBUG_NOLOCK(format, a...) \
CDEBUG(D_DLMTRACE, "### " format "\n" , ## a)
/* ldlm_extent.c */
int ldlm_extent_compat(struct ldlm_lock *, struct ldlm_lock *);
-int ldlm_extent_policy(struct ldlm_namespace *, struct ldlm_lock *, void *,
+int ldlm_extent_policy(struct ldlm_namespace *, struct ldlm_lock **, void *,
ldlm_mode_t, int flags, void *);
/* ldlm_lockd.c */
-int ldlm_handle_enqueue(struct ptlrpc_request *req);
+int ldlm_server_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
+ void *data, int flag);
+int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data);
+int ldlm_handle_enqueue(struct ptlrpc_request *req, ldlm_completion_callback,
+ ldlm_blocking_callback);
int ldlm_handle_convert(struct ptlrpc_request *req);
int ldlm_handle_cancel(struct ptlrpc_request *req);
int ldlm_del_waiting_lock(struct ldlm_lock *lock);
void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh);
struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *, int flags);
void ldlm_cancel_callback(struct ldlm_lock *);
-int ldlm_lock_set_data(struct lustre_handle *, void *data, int datalen);
+int ldlm_lock_set_data(struct lustre_handle *, void *data, void *cp_data);
void ldlm_lock_remove_from_lru(struct ldlm_lock *);
static inline struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *h)
#define LDLM_LOCK_PUT(lock) \
do { \
- /*LDLM_DEBUG(lock, "put");*/ \
+ /*LDLM_DEBUG((lock), "put");*/ \
ldlm_lock_put(lock); \
} while (0)
#define LDLM_LOCK_GET(lock) \
({ \
ldlm_lock_get(lock); \
- /*LDLM_DEBUG(lock, "get");*/ \
+ /*LDLM_DEBUG((lock), "get");*/ \
lock; \
})
void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode);
void ldlm_lock_addref_internal(struct ldlm_lock *, __u32 mode);
void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode);
-void ldlm_grant_lock(struct ldlm_lock *lock);
-int ldlm_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type,
- void *cookie, int cookielen, ldlm_mode_t mode,
- struct lustre_handle *lockh);
+void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode);
+void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen);
+int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *,
+ __u32 type, void *cookie, int cookielen, ldlm_mode_t mode,
+ struct lustre_handle *);
struct ldlm_lock *
ldlm_lock_create(struct ldlm_namespace *ns,
- struct lustre_handle *parent_lock_handle,
- __u64 *res_id, __u32 type, ldlm_mode_t mode, void *data,
- __u32 data_len);
-ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *, struct ldlm_lock *,
+ struct lustre_handle *parent_lock_handle, struct ldlm_res_id,
+ __u32 type, ldlm_mode_t mode, void *data, void *cp_data);
+ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *, struct ldlm_lock **,
void *cookie, int cookie_len, int *flags,
ldlm_completion_callback completion,
ldlm_blocking_callback blocking);
/* resource.c - internal */
struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns,
struct ldlm_resource *parent,
- __u64 *name, __u32 type, int create);
+ struct ldlm_res_id, __u32 type,
+ int create);
struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res);
int ldlm_resource_putref(struct ldlm_resource *res);
void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
void ldlm_namespace_dump(struct ldlm_namespace *);
void ldlm_resource_dump(struct ldlm_resource *);
int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
- __u64 new_resid[3]);
+ struct ldlm_res_id);
/* ldlm_request.c */
int ldlm_expired_completion_wait(void *data);
-int ldlm_completion_ast(struct ldlm_lock *lock, int flags);
+int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data);
int ldlm_cli_enqueue(struct lustre_handle *conn,
struct ptlrpc_request *req,
struct ldlm_namespace *ns,
struct lustre_handle *parent_lock_handle,
- __u64 *res_id,
+ struct ldlm_res_id,
__u32 type,
void *cookie, int cookielen,
ldlm_mode_t mode,
ldlm_completion_callback completion,
ldlm_blocking_callback callback,
void *data,
- __u32 data_len,
+ void *cp_data,
struct lustre_handle *lockh);
int ldlm_match_or_enqueue(struct lustre_handle *connh,
struct ptlrpc_request *req,
struct ldlm_namespace *ns,
struct lustre_handle *parent_lock_handle,
- __u64 *res_id,
+ struct ldlm_res_id,
__u32 type,
void *cookie, int cookielen,
ldlm_mode_t mode,
ldlm_completion_callback completion,
ldlm_blocking_callback callback,
void *data,
- __u32 data_len,
+ void *cp_data,
struct lustre_handle *lockh);
int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new,
void *data, __u32 data_len);
int ldlm_cli_convert(struct lustre_handle *, int new_mode, int *flags);
int ldlm_cli_cancel(struct lustre_handle *lockh);
-int ldlm_cli_cancel_unused(struct ldlm_namespace *, __u64 *, int flags);
+int ldlm_cli_cancel_unused(struct ldlm_namespace *, struct ldlm_res_id *,
+ int flags);
int ldlm_cancel_lru(struct ldlm_namespace *ns);
/* mds/handler.c */
/* This has to be here because recurisve inclusion sucks. */
int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
- void *data, __u32 data_len, int flag);
+ void *data, int flag);
#endif /* __KERNEL__ */
};
struct ost_export_data {
- __u8 oed_uuid[37]; /* client UUID */
+ struct obd_uuid oed_uuid; /* client UUID */
+};
+
+struct ec_export_data { /* echo client */
+ struct list_head eced_open_head;
+ struct list_head eced_locks;
};
struct obd_export {
__u64 exp_cookie;
+ struct obd_uuid exp_client_uuid;
struct list_head exp_obd_chain;
struct list_head exp_conn_chain;
struct obd_device *exp_obd;
struct filter_export_data eu_filter_data;
struct lov_export_data eu_lov_data;
struct ost_export_data eu_ost_data;
+ struct ec_export_data eu_ec_data;
} u;
};
#define exp_lov_data u.eu_lov_data
#define exp_filter_data u.eu_filter_data
#define exp_ost_data u.eu_ost_data
+#define exp_ec_data u.eu_ec_data
extern struct obd_export *class_conn2export(struct lustre_handle *conn);
extern struct obd_device *class_conn2obd(struct lustre_handle *conn);
int (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd,
void *handle, fsfilt_cb_t cb_func);
int (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs);
+ int (* fs_sync)(struct super_block *sb);
};
extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops);
return obd->obd_fsops->fs_statfs(fs, osfs);
}
+static inline int fsfilt_sync(struct obd_device *obd, struct super_block *fs)
+{
+ return obd->obd_fsops->fs_sync(fs);
+}
+
#endif /* __KERNEL__ */
#endif
/*
* GENERAL STUFF
*/
-typedef __u8 obd_uuid_t[37];
+struct obd_uuid {
+ __u8 uuid[37];
+};
+
+static inline void obd_str2uuid(struct obd_uuid *uuid, char *tmp)
+{
+ strncpy(uuid->uuid, tmp, sizeof(uuid->uuid));
+ uuid->uuid[sizeof(uuid->uuid) - 1] = '\0';
+}
/* FOO_REQUEST_PORTAL is for incoming requests on the FOO
* FOO_REPLY_PORTAL is for incoming replies on the FOO
#define PTLBD_REQUEST_PORTAL 19
#define PTLBD_REPLY_PORTAL 20
#define PTLBD_BULK_PORTAL 21
+#define MDS_GETATTR_PORTAL 22
#define SVC_KILLED 1
#define SVC_EVENT 2
#define MSG_LAST_REPLAY 1
#define MSG_RESENT 2
-/* XXX horrible interim hack -- see bug 578 */
-#define MSG_REPLAY_IN_PROGRESS 4
-
static inline int lustre_msg_get_flags(struct lustre_msg *msg)
{
return (msg->flags & MSG_GEN_FLAG_MASK);
return (msg->flags >> MSG_OP_FLAG_SHIFT);
}
+static inline void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags)
+{
+ msg->flags |= ((flags & MSG_GEN_FLAG_MASK) << MSG_OP_FLAG_SHIFT);
+}
+
static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
{
msg->flags &= ~MSG_OP_FLAG_MASK;
- msg->flags |= ((flags & MSG_GEN_FLAG_MASK) << MSG_OP_FLAG_SHIFT);
+ lustre_msg_add_op_flags(msg, flags);
}
-#define CONNMGR_REPLY 0
-#define CONNMGR_CONNECT 1
+/*
+ * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
+ */
+
+#define MSG_CONNECT_RECOVERING 0x1
+#define MSG_CONNECT_RECONNECT 0x2
+#define MSG_CONNECT_REPLAYABLE 0x4
/*
* OST requests: OBDO & OBD request records
__u32 flags;
};
-#define CONNMGR_REPLY 0
-#define CONNMGR_CONNECT 1
-
-struct connmgr_body {
- __u64 conn;
- __u64 conn_token;
- __u32 generation;
- obd_uuid_t conn_uuid;
-};
-
/* request structure for OST's */
#define OST_REQ_HAS_OA1 0x1
*/
/* opcodes */
-#define MDS_GETATTR 1
-#define MDS_OPEN 2
-#define MDS_CLOSE 3
-#define MDS_REINT 4
-#define MDS_READPAGE 6
-#define MDS_CONNECT 7
-#define MDS_DISCONNECT 8
-#define MDS_GETSTATUS 9
-#define MDS_STATFS 10
-#define MDS_GETLOVINFO 11
-#define MDS_GETATTR_NAME 12
+#define MDS_GETATTR 1
+#define MDS_GETATTR_NAME 2
+#define MDS_CLOSE 3
+#define MDS_REINT 4
+#define MDS_READPAGE 6
+#define MDS_CONNECT 7
+#define MDS_DISCONNECT 8
+#define MDS_GETSTATUS 9
+#define MDS_STATFS 10
+#define MDS_GETLOVINFO 11
#define REINT_SETATTR 1
#define REINT_CREATE 2
#define REINT_LINK 3
#define REINT_UNLINK 4
#define REINT_RENAME 5
-#define REINT_MAX 5
+#define REINT_OPEN 6
+#define REINT_MAX 6
+
+#define IT_INTENT_EXEC 1
+#define IT_OPEN_LOOKUP (1 << 1)
+#define IT_OPEN_NEG (1 << 2)
+#define IT_OPEN_POS (1 << 3)
+#define IT_OPEN_CREATE (1 << 4)
+#define IT_OPEN_OPEN (1 << 5)
+
+#define IT_UNLINK (1<<8)
#define REINT_OPCODE_MASK 0xff /* opcodes must fit into this mask */
#define REINT_REPLAYING 0x1000 /* masked into the opcode to indicate replay */
struct ll_fid fid2;
struct lustre_handle handle;
__u64 size;
+ __u64 blocks; /* XID, in the case of MDS_READPAGE */
__u32 ino; /* make this a __u64 */
__u32 valid;
__u32 fsuid;
__u32 rdev;
__u32 nlink;
__u32 generation;
+ __u32 suppgid;
};
/* This is probably redundant with OBD_MD_FLEASIZE, but we need an audit */
__u64 sa_atime;
__u64 sa_mtime;
__u64 sa_ctime;
+ __u32 sa_suppgid;
};
struct mds_rec_create {
__u32 cr_fsuid;
__u32 cr_fsgid;
__u32 cr_cap;
- __u32 cr_reserved;
+ __u32 cr_flags; /* for use with open */
__u32 cr_mode;
struct ll_fid cr_fid;
struct ll_fid cr_replayfid;
__u32 cr_gid;
__u64 cr_time;
__u64 cr_rdev;
+ __u32 cr_suppgid;
};
struct mds_rec_link {
__u32 lk_fsuid;
__u32 lk_fsgid;
__u32 lk_cap;
+ __u32 lk_suppgid;
struct ll_fid lk_fid1;
struct ll_fid lk_fid2;
};
__u32 ul_cap;
__u32 ul_reserved;
__u32 ul_mode;
+ __u32 ul_suppgid;
struct ll_fid ul_fid1;
struct ll_fid ul_fid2;
};
__u64 ld_default_stripe_size; /* in bytes */
__u64 ld_default_stripe_offset; /* in bytes */
__u32 ld_pattern; /* RAID 0,1 etc */
- obd_uuid_t ld_uuid;
+ struct obd_uuid ld_uuid;
};
/*
#define RES_NAME_SIZE 3
#define RES_VERSION_SIZE 4
+struct ldlm_res_id {
+ __u64 name[RES_NAME_SIZE];
+};
+
/* lock types */
typedef enum {
LCK_EX = 1,
* below, we're probably fine. */
struct ldlm_resource_desc {
__u32 lr_type;
- __u64 lr_name[RES_NAME_SIZE];
+ struct ldlm_res_id lr_name;
__u32 lr_version[RES_VERSION_SIZE];
};
struct ldlm_reply {
__u32 lock_flags;
__u32 lock_mode;
- __u64 lock_resource_name[RES_NAME_SIZE];
+ struct ldlm_res_id lock_resource_name;
struct lustre_handle lock_handle;
struct ldlm_extent lock_extent; /* XXX make this policy 1 &2 */
__u64 lock_policy_res1;
typedef int (*import_recover_t)(struct obd_import *imp, int phase);
#include <linux/lustre_idl.h>
+
struct obd_import {
import_recover_t imp_recover;
struct ptlrpc_connection *imp_connection;
int imp_flags;
int imp_level;
__u64 imp_last_xid;
+ __u64 imp_last_bulk_xid;
__u64 imp_max_transno;
- __u64 imp_peer_last_xid;
__u64 imp_peer_committed_transno;
- /* Protects flags, level, *_xid, *_list */
+ /* Protects flags, level, last_xid, *_list */
spinlock_t imp_lock;
};
* along with Lustre; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
- * Basic Lustre library routines.
+ * Basic Lustre library routines.
*
*/
int target_handle_connect(struct ptlrpc_request *req);
int target_handle_disconnect(struct ptlrpc_request *req);
int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
- char *cluuid);
+ struct obd_uuid *cluuid);
int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover);
int client_obd_disconnect(struct lustre_handle *conn);
int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf);
int client_obd_cleanup(struct obd_device * obddev);
-struct client_obd *client_conn2cli(struct lustre_handle *conn);
-struct obd_device *client_tgtuuid2obd(char *tgtuuid);
+struct client_obd *client_conn2cli(struct lustre_handle *conn);
+struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
int target_revoke_connection(struct recovd_data *rd, int phase);
+int obd_self_statfs(struct obd_device *dev, struct statfs *sfs);
+
/* l_lock.c */
-struct lustre_lock {
+struct lustre_lock {
int l_depth;
struct task_struct *l_owner;
struct semaphore l_sem;
/* FIXME: This needs to validate pointers and cookies */
static inline void *lustre_handle2object(struct lustre_handle *handle)
{
- if (handle)
+ if (handle)
return (void *)(unsigned long)(handle->addr);
- return NULL;
+ return NULL;
}
static inline void ldlm_object2handle(void *object, struct lustre_handle *handle)
printk("OBD ioctl: inlbuf3 not 0 terminated\n");
return 1;
}
-#endif
+#endif
return 0;
}
#define OBD_IOC_RECOVD_FAILCONN _IOWR('f', 136, long)
#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139 )
+#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, long)
+#define OBD_IOC_SET_READONLY _IOW ('f', 141, long)
#define OBD_GET_VERSION _IOWR ('f', 144, long)
+#define ECHO_IOC_GET_STRIPE _IOWR('f', 200, long)
+#define ECHO_IOC_SET_STRIPE _IOWR('f', 201, long)
+#define ECHO_IOC_ENQUEUE _IOWR('f', 202, long)
+#define ECHO_IOC_CANCEL _IOWR('f', 203, long)
+
+
/*
* l_wait_event is a flexible sleeping function, permitting simple caller
* configuration of interrupt and timeout sensitivity along with actions to
* be performed in the event of either exception.
*
* Common usage looks like this:
- *
+ *
* struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler,
* intr_handler, callback_data);
* rc = l_wait_event(waitq, condition, &lwi);
#endif
};
-
+/* interpet return codes from intent lookup */
+#define LL_LOOKUP_POSITIVE 1
+#define LL_LOOKUP_NEGATIVE 2
#define LL_SUPER_MAGIC 0x0BD00BD0
#define LL_SBI_NOLCK 0x1
struct ll_sb_info {
- obd_uuid_t ll_sb_uuid;
+ struct obd_uuid ll_sb_uuid;
struct lustre_handle ll_mdc_conn;
struct lustre_handle ll_osc_conn;
struct proc_dir_entry* ll_proc_root;
return ll_s2sbi(inode->i_sb);
}
+static inline void d_unhash_aliases(struct inode *inode)
+{
+ struct dentry *dentry = NULL;
+ struct list_head *tmp;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ ENTRY;
+
+ CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n",
+ inode->i_ino, inode->i_generation);
+
+ spin_lock(&dcache_lock);
+ list_for_each(tmp, &inode->i_dentry) {
+ dentry = list_entry(tmp, struct dentry, d_alias);
+
+ list_del_init(&dentry->d_hash);
+ dentry->d_flags |= DCACHE_LUSTRE_INVALID;
+ list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
+ }
+
+ spin_unlock(&dcache_lock);
+ EXIT;
+}
// FIXME: replace the name of this with LL_I to conform to kernel stuff
// static inline struct ll_inode_info *LL_I(struct inode *inode)
/* dcache.c */
void ll_intent_release(struct dentry *, struct lookup_intent *);
-int ll_set_dd(struct dentry *de);
/****
/* file.c */
extern struct file_operations ll_file_operations;
extern struct inode_operations ll_file_inode_operations;
+extern struct inode_operations ll_special_inode_operations;
struct ldlm_lock;
-int ll_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *, void *data,
- __u32 data_len, int flag);
+int ll_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *, void *data, int flag);
int ll_size_lock(struct inode *, struct lov_stripe_md *, obd_off start,
int mode, struct lustre_handle *);
int ll_size_unlock(struct inode *, struct lov_stripe_md *, int mode,
struct lustre_handle *);
-int ll_file_size(struct inode *inode, struct lov_stripe_md *md);
+int ll_file_size(struct inode *inode, struct lov_stripe_md *md,
+ struct lustre_handle *);
int ll_create_objects(struct super_block *sb, obd_id id, uid_t uid,
gid_t gid, struct lov_stripe_md **lsmp);
void ll_truncate(struct inode *inode);
/* super.c */
-void ll_update_inode(struct inode *, struct mds_body *);
+void ll_update_inode(struct inode *, struct mds_body *, struct lov_mds_md *);
/* symlink.c */
extern struct inode_operations ll_fast_symlink_inode_operations;
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2001 Cluster File Systems, Inc. <info@clusterfs.com>
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc. <info@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
*
#ifdef __KERNEL__
#include <linux/fs.h>
+#include <linux/kp30.h>
#include <linux/lustre_idl.h>
struct ldlm_lock_desc;
struct obd_export;
struct ptlrpc_request;
struct obd_device;
+struct ll_file_data;
#define LUSTRE_MDS_NAME "mds"
#define LUSTRE_MDT_NAME "mdt"
#define LUSTRE_MDC_NAME "mdc"
+struct mdc_rpc_lock {
+ struct semaphore rpcl_sem;
+ struct lookup_intent *rpcl_it;
+};
+extern struct mdc_rpc_lock mdc_rpc_lock;
+
+static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck)
+{
+ sema_init(&lck->rpcl_sem, 1);
+ lck->rpcl_it = NULL;
+}
+
+static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
+ struct lookup_intent *it)
+{
+ down(&lck->rpcl_sem);
+ if (it) {
+ lck->rpcl_it = it;
+ it->it_iattr = (void *)1;
+ }
+}
+
+static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
+ struct lookup_intent *it)
+{
+ if (it == NULL) {
+ LASSERT(it == lck->rpcl_it);
+ up(&lck->rpcl_sem);
+ return;
+ }
+ if (it && it->it_iattr) {
+ it->it_iattr = NULL;
+ LASSERT(it == lck->rpcl_it);
+ lck->rpcl_it = NULL;
+ up(&lck->rpcl_sem);
+ }
+}
+struct mdc_unlink_data {
+ struct inode *unl_dir;
+ struct inode *unl_de;
+ int unl_mode;
+ const char *unl_name;
+ int unl_len;
+};
+
struct mds_update_record {
__u32 ur_fsuid;
__u32 ur_fsgid;
__u32 ur_uid;
__u32 ur_gid;
__u64 ur_time;
+ __u32 ur_flags;
+ __u32 ur_suppgid;
};
#define MDS_LR_CLIENT 8192
#define MDS_CLIENT_SLOTS 17
#define MDS_MOUNT_RECOV 2
+#define MDS_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
/* Data stored per server at the head of the last_rcvd file. In le32 order. */
struct mds_server_data {
spinlock_t med_open_lock;
struct mds_client_data *med_mcd;
int med_off;
+ __u64 med_last_xid;
+ struct lustre_msg *med_last_reply;
+ int med_last_replen;
};
/* file data for open files on MDS */
struct mds_file_data {
struct list_head mfd_list;
- struct lustre_handle mfd_clienthandle;
__u64 mfd_servercookie;
struct file *mfd_file;
};
/* mds/mds_reint.c */
int mds_reint_rec(struct mds_update_record *r, int offset,
- struct ptlrpc_request *req);
+ struct ptlrpc_request *req, struct lustre_handle *);
+
+/* mds/mds_open.c */
+int mds_open(struct mds_update_record *rec, int offset,
+ struct ptlrpc_request *req, struct lustre_handle *);
/* lib/mds_updates.c */
void mds_unpack_body(struct mds_body *b);
int mds_update_unpack(struct ptlrpc_request *, int offset,
struct mds_update_record *);
-void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset,
- obd_id ino, int type);
-void mds_getattr_pack(struct ptlrpc_request *req, int offset,
+void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, obd_id ino,
+ int type, __u64 xid);
+void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset, int fl,
struct inode *inode, const char *name, int namelen);
-void mds_setattr_pack(struct ptlrpc_request *, int offset, struct inode *,
- struct iattr *, const char *name, int namelen);
+void mds_setattr_pack(struct ptlrpc_request *, struct inode *,
+ struct iattr *, void *ea, int ealen);
void mds_create_pack(struct ptlrpc_request *, int offset, struct inode *dir,
__u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
const char *name, int namelen, const void *data,
int datalen);
+void mds_open_pack(struct ptlrpc_request *, int offset, struct inode *dir,
+ __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
+ __u32 flags, const char *name, int namelen,
+ const void *data, int datalen);
void mds_unlink_pack(struct ptlrpc_request *, int offset, struct inode *inode,
struct inode *child, __u32 mode, const char *name,
int namelen);
struct lustre_handle *lockh);
struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
struct vfsmount **mnt);
-int mds_reint(struct ptlrpc_request *req, int offset);
-int mds_pack_md(struct mds_obd *mds, struct ptlrpc_request *req,
+int mds_reint(struct ptlrpc_request *req, int offset, struct lustre_handle *);
+int mds_pack_md(struct obd_device *mds, struct lustre_msg *msg,
int offset, struct mds_body *body, struct inode *inode);
/* mds/mds_fs.c */
char *filename, int namelen, unsigned long valid,
unsigned int ea_size, struct ptlrpc_request **request);
int mdc_setattr(struct lustre_handle *conn,
- struct inode *, struct iattr *iattr, struct ptlrpc_request **);
+ struct inode *, struct iattr *iattr,
+ void *ea, int ealen, struct ptlrpc_request **);
int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
struct ptlrpc_request **);
+void mdc_set_open_replay_data(struct ll_file_data *fd);
int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
struct lustre_handle *fh, struct ptlrpc_request **req);
int mdc_readpage(struct lustre_handle *conn, obd_id ino,
__u32 mode, const char *name, int namelen,
struct ptlrpc_request **);
int mdc_link(struct lustre_handle *conn,
- struct dentry *src, struct inode *dir, const char *name,
+ struct inode *src, struct inode *dir, const char *name,
int namelen, struct ptlrpc_request **);
int mdc_rename(struct lustre_handle *conn,
struct inode *src, struct inode *tgt, const char *old,
int oldlen, const char *new, int newlen,
struct ptlrpc_request **);
-int mdc_create_client(obd_uuid_t uuid, struct ptlrpc_client *cl);
+int mdc_create_client(struct obd_uuid uuid, struct ptlrpc_client *cl);
+void mdc_lock_set_inode(struct lustre_handle *lock, struct inode *inode);
/* Store the generation of a newly-created inode in |req| for replay. */
void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
*/
#define LDLM_NUM_THREADS 4
-#define LDLM_NEVENTS 1024
-#define LDLM_NBUFS 100
+#define LDLM_NEVENT_MAX 8192UL
+#define LDLM_NEVENTS min(num_physpages / 64, LDLM_NEVENT_MAX)
+#define LDLM_NBUF_MAX 256UL
+#define LDLM_NBUFS min(LDLM_NEVENTS / 16, LDLM_NBUF_MAX)
#define LDLM_BUFSIZE (8 * 1024)
#define LDLM_MAXREQSIZE 1024
#define MDT_NUM_THREADS 8
-#define MDS_NEVENTS 1024
-#define MDS_NBUFS 100
+#define MDS_NEVENT_MAX 8192UL
+#define MDS_NEVENTS min(num_physpages / 64, MDS_NEVENT_MAX)
+#define MDS_NBUF_MAX 512UL
+#define MDS_NBUFS min(MDS_NEVENTS / 16, MDS_NBUF_MAX)
#define MDS_BUFSIZE (8 * 1024)
-#define MDS_MAXREQSIZE 1024
+/* Assume file name length = FNAME_MAX = 256 (true for extN).
+ * path name length = PATH_MAX = 4096
+ * LOV MD size max = EA_MAX = 4000
+ * symlink: FNAME_MAX + PATH_MAX <- largest
+ * link: FNAME_MAX + PATH_MAX (mds_rec_link < mds_rec_create)
+ * rename: FNAME_MAX + FNAME_MAX
+ * open: FNAME_MAX + EA_MAX
+ *
+ * MDS_MAXREQSIZE ~= 4736 bytes =
+ * lustre_msg + ldlm_request + mds_body + mds_rec_create + FNAME_MAX + PATH_MAX
+ *
+ * Realistic size is about 512 bytes (20 character name + 128 char symlink),
+ * except in the open case where there are a large number of OSTs in a LOV.
+ */
+#define MDS_MAXREQSIZE (5 * 1024)
#define OST_NUM_THREADS 6
-#define OST_NEVENTS min(num_physpages / 16, 32768UL)
-#define OST_NBUFS min(OST_NEVENTS / 128, 1280UL)
-#define OST_BUFSIZE ((OST_NEVENTS > 4096UL ? 32 : 8) * 1024)
-#define OST_MAXREQSIZE (8 * 1024)
+#define OST_NEVENT_MAX 32768UL
+#define OST_NEVENTS min(num_physpages / 16, OST_NEVENT_MAX)
+#define OST_NBUF_MAX 1280UL
+#define OST_NBUFS min(OST_NEVENTS / 64, OST_NBUF_MAX)
+#define OST_BUFSIZE (8 * 1024)
+/* OST_MAXREQSIZE ~= 1896 bytes =
+ * lustre_msg + obdo + 16 * obd_ioobj + 64 * niobuf_remote
+ *
+ * single object with 16 pages is 576 bytes
+ */
+#define OST_MAXREQSIZE (2 * 1024)
#define PTLBD_NUM_THREADS 4
#define PTLBD_NEVENTS 1024
struct ptlrpc_connection {
struct list_head c_link;
struct lustre_peer c_peer;
- __u8 c_local_uuid[37]; /* XXX do we need this? */
- __u8 c_remote_uuid[37];
+ struct obd_uuid c_local_uuid; /* XXX do we need this? */
+ struct obd_uuid c_remote_uuid;
__u32 c_generation; /* changes upon new connection */
__u32 c_epoch; /* changes when peer changes */
struct ptlrpc_service *rq_svc;
void (*rq_replay_cb)(struct ptlrpc_request *);
+ void *rq_replay_data;
};
#define DEBUG_REQ(level, req, fmt, args...) \
do { \
CDEBUG(level, \
"@@@ " fmt " req@%p x"LPD64"/t"LPD64" o%d->%s:%d lens %d/%d ref %d fl " \
- "%x\n" , ## args, req, req->rq_xid, req->rq_reqmsg->transno, \
+ "%x/%x/%x rc %x\n" , ## args, req, req->rq_xid, \
+ req->rq_reqmsg ? req->rq_reqmsg->transno : -1, \
req->rq_reqmsg ? req->rq_reqmsg->opc : -1, \
- req->rq_connection ? (char *)req->rq_connection->c_remote_uuid : "<?>", \
+ req->rq_connection ? \
+ (char *)req->rq_connection->c_remote_uuid.uuid : "<?>", \
(req->rq_import && req->rq_import->imp_client) ? \
req->rq_import->imp_client->cli_request_portal : -1, \
req->rq_reqlen, req->rq_replen, \
- atomic_read (&req->rq_refcount), req->rq_flags); \
+ atomic_read (&req->rq_refcount), req->rq_flags, \
+ req->rq_reqmsg ? req->rq_reqmsg->flags : 0, \
+ req->rq_repmsg ? req->rq_repmsg->flags : 0, \
+ req->rq_status); \
} while (0)
struct ptlrpc_bulk_page {
typedef int (*svc_handler_t)(struct ptlrpc_request *req);
/* rpc/connection.c */
-void ptlrpc_readdress_connection(struct ptlrpc_connection *, obd_uuid_t uuid);
+void ptlrpc_readdress_connection(struct ptlrpc_connection *, struct obd_uuid *uuid);
struct ptlrpc_connection *ptlrpc_get_connection(struct lustre_peer *peer,
- obd_uuid_t uuid);
+ struct obd_uuid *uuid);
int ptlrpc_put_connection(struct ptlrpc_connection *c);
struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *);
void ptlrpc_init_connection(void);
/* rpc/niobuf.c */
int ptlrpc_check_bulk_sent(struct ptlrpc_bulk_desc *bulk);
int ptlrpc_check_bulk_received(struct ptlrpc_bulk_desc *bulk);
-int ptlrpc_send_bulk(struct ptlrpc_bulk_desc *);
-int ptlrpc_register_bulk(struct ptlrpc_bulk_desc *);
+int ptlrpc_bulk_put(struct ptlrpc_bulk_desc *);
+int ptlrpc_bulk_get(struct ptlrpc_bulk_desc *);
+int ptlrpc_register_bulk_put(struct ptlrpc_bulk_desc *);
+int ptlrpc_register_bulk_get(struct ptlrpc_bulk_desc *);
int ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *bulk);
struct obd_brw_set *obd_brw_set_new(void);
void obd_brw_set_add(struct obd_brw_set *, struct ptlrpc_bulk_desc *);
void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
struct ptlrpc_client *);
void ptlrpc_cleanup_client(struct obd_import *imp);
-__u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req);
-struct ptlrpc_connection *ptlrpc_uuid_to_connection(obd_uuid_t uuid);
+struct obd_uuid *ptlrpc_req_to_uuid(struct ptlrpc_request *req);
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid);
int ll_brw_sync_wait(struct obd_brw_set *, int phase);
void ptlrpc_continue_req(struct ptlrpc_request *req);
int ptlrpc_replay_req(struct ptlrpc_request *req);
void ptlrpc_restart_req(struct ptlrpc_request *req);
-void ptlrpc_abort_inflight(struct obd_import *imp);
+void ptlrpc_abort_inflight(struct obd_import *imp, int dying_import);
struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
int count, int *lengths, char **bufs);
void ptlrpc_free_req(struct ptlrpc_request *request);
void ptlrpc_req_finished(struct ptlrpc_request *request);
+struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req);
struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *);
void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk);
struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc);
void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *page);
+void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
+ struct obd_import *imp);
/* rpc/service.c */
struct ptlrpc_service *
ptlrpc_init_svc(__u32 nevents, __u32 nbufs, __u32 bufsize, __u32 max_req_size,
int req_portal, int rep_portal,
- obd_uuid_t uuid, svc_handler_t, char *name);
+ struct obd_uuid *uuid, svc_handler_t, char *name);
void ptlrpc_stop_all_threads(struct ptlrpc_service *svc);
int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc,
char *name);
};
struct lov_stripe_md {
- __u64 lsm_object_id; /* lov object id */
+ __u64 lsm_object_id; /* lov object id */
__u32 lsm_magic;
- __u32 lsm_stripe_size; /* size of the stripe */
- int lsm_stripe_offset; /* offset of first stripe in lmd_objects */
- int lsm_stripe_count; /* how many objects are being striped on */
+ __u32 lsm_stripe_size; /* size of the stripe */
+ unsigned lsm_stripe_offset; /* offset of first stripe in lmd_objects */
+ unsigned lsm_stripe_count; /* how many objects are being striped on */
struct lov_oinfo lsm_oinfo[0];
};
__u32 ouc_fsuid;
__u32 ouc_fsgid;
__u32 ouc_cap;
+ __u32 ouc_suppgid;
};
#define OBD_RUN_CTXT_MAGIC 0xC0FFEEAA
#define OBD_SET_CTXT_MAGIC(ctxt) do {} while(0)
#endif
+struct ost_server_data;
+
struct filter_obd {
char *fo_fstype;
struct super_block *fo_sb;
struct dentry *fo_dentry_O;
struct dentry *fo_dentry_O_mode[16];
spinlock_t fo_objidlock; /* protects fo_lastobjid increment */
- __u64 fo_lastobjid;
- __u64 fo_last_committed;
+ struct semaphore fo_transno_sem;
+ struct file *fo_rcvd_filp;
+ struct filter_server_data *fo_fsd;
+
+ __u64 fo_next_recovery_transno;
+ int fo_recoverable_clients;
struct file_operations *fo_fop;
struct inode_operations *fo_iop;
struct address_space_operations *fo_aops;
struct obd_import cl_import;
struct semaphore cl_sem;
int cl_conn_count;
- obd_uuid_t cl_target_uuid; /* XXX -> lustre_name */
+ struct obd_uuid cl_target_uuid; /* XXX -> lustre_name */
/* max_mds_easize is purely a performance thing so we don't have to
* call obd_size_wiremd() all the time. */
int cl_max_mds_easize;
struct mds_obd {
struct ptlrpc_service *mds_service;
+ struct ptlrpc_service *mds_getattr_service;
struct super_block *mds_sb;
struct vfsmount *mds_vfsmnt;
int mds_max_mdsize;
struct file *mds_rcvd_filp;
struct semaphore mds_transno_sem;
- __u64 mds_last_committed;
__u64 mds_last_rcvd;
__u64 mds_mount_count;
struct ll_fid mds_rootfid;
struct list_head mds_delayed_reply_queue;
spinlock_t mds_processing_task_lock;
pid_t mds_processing_task;
-
+ struct timer_list mds_recovery_timer;
+
int mds_has_lov_desc;
struct lov_desc mds_lov_desc;
};
struct ptlbd_obd {
/* server's */
struct ptlrpc_service *ptlbd_service;
+ struct file *filp;
/* client's */
struct ptlrpc_client bd_client;
struct obd_import bd_import;
struct ost_obd {
struct ptlrpc_service *ost_service;
- struct lustre_handle ost_conn; /* the local connection to the OBD */
};
struct echo_client_obd {
- struct lustre_handle conn; /* the local connection to osc/lov */
+ struct lustre_handle ec_conn; /* the local connection to osc/lov */
+ spinlock_t ec_lock;
+ struct list_head ec_objects;
+ int ec_lsmsize;
+ int ec_nstripes;
+ __u64 ec_unique;
};
struct cache_obd {
};
struct lov_tgt_desc {
- obd_uuid_t uuid;
+ struct obd_uuid uuid;
struct lustre_handle conn;
int active; /* is this target available for requests, etc */
};
struct dentry *dentry;
};
+struct obd_trans_info {
+ __u64 oti_transno;
+};
+
#define N_LOCAL_TEMP_PAGE 0x00000001
/* corresponds to one of the obd's */
/* common and UUID name of this device */
char *obd_name;
- obd_uuid_t obd_uuid;
+ struct obd_uuid obd_uuid;
int obd_minor;
int obd_flags;
struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */
/* a spinlock is OK for what we do now, may need a semaphore later */
spinlock_t obd_dev_lock;
+ __u64 obd_last_committed;
struct fsfilt_operations *obd_fsops;
union {
struct ext2_obd ext2;
int (*o_setup) (struct obd_device *dev, obd_count len, void *data);
int (*o_cleanup)(struct obd_device *dev);
int (*o_connect)(struct lustre_handle *conn, struct obd_device *src,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover);
int (*o_disconnect)(struct lustre_handle *conn);
int (*o_preallocate)(struct lustre_handle *, obd_count *req,
obd_id *ids);
int (*o_create)(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md **ea);
+ struct lov_stripe_md **ea, struct obd_trans_info *oti);
int (*o_destroy)(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *ea);
+ struct lov_stripe_md *ea, struct obd_trans_info *oti);
int (*o_setattr)(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *ea);
+ struct lov_stripe_md *ea, struct obd_trans_info *oti);
int (*o_getattr)(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *ea);
int (*o_open)(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *ea);
+ struct lov_stripe_md *ea, struct obd_trans_info *oti);
int (*o_close)(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *ea);
+ struct lov_stripe_md *ea, struct obd_trans_info *oti);
int (*o_brw)(int rw, struct lustre_handle *conn,
struct lov_stripe_md *ea, obd_count oa_bufs,
- struct brw_page *pgarr, struct obd_brw_set *);
+ struct brw_page *pgarr, struct obd_brw_set *,
+ struct obd_trans_info *oti);
int (*o_punch)(struct lustre_handle *conn, struct obdo *tgt,
struct lov_stripe_md *ea, obd_size count,
- obd_off offset);
+ obd_off offset, struct obd_trans_info *oti);
int (*o_sync)(struct lustre_handle *conn, struct obdo *tgt,
obd_size count, obd_off offset);
int (*o_migrate)(struct lustre_handle *conn, struct obdo *dst,
struct obdo *src, obd_size count, obd_off offset);
int (*o_copy)(struct lustre_handle *dstconn, struct obdo *dst,
struct lustre_handle *srconn, struct obdo *src,
- obd_size count, obd_off offset);
+ obd_size count, obd_off offset, struct obd_trans_info *);
int (*o_iterate)(struct lustre_handle *conn,
int (*)(obd_id, obd_gr, void *),
obd_id *startid, obd_gr group, void *data);
int (*o_preprw)(int cmd, struct lustre_handle *conn,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *remote,
- struct niobuf_local *local, void **desc_private);
+ struct niobuf_local *local, void **desc_private,
+ struct obd_trans_info *oti);
int (*o_commitrw)(int cmd, struct lustre_handle *conn,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_local *local,
- void *desc_private);
+ void *desc_private, struct obd_trans_info *oti);
int (*o_enqueue)(struct lustre_handle *conn, struct lov_stripe_md *md,
struct lustre_handle *parent_lock,
__u32 type, void *cookie, int cookielen, __u32 mode,
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
#include <linux/lprocfs_status.h>
#endif
-
/* OBD Device Declarations */
#define MAX_OBD_DEVICES 128
extern struct obd_device obd_dev[MAX_OBD_DEVICES];
-#define OBD_ATTACHED 0x1
-#define OBD_SET_UP 0x2
+#define OBD_ATTACHED 0x01
+#define OBD_SET_UP 0x02
+#define OBD_RECOVERING 0x04
+#define OBD_ABORT_RECOVERY 0x08
+#define OBD_REPLAYABLE 0x10
+#define OBD_NO_TRANSNO 0x20 /* XXX needs better name */
/* OBD Operations Declarations */
\
exp = class_conn2export(conn); \
if (!(exp)) { \
- CERROR("No export\n"); \
+ CERROR("No export for conn "LPX64":"LPX64"\n", \
+ conn->addr, conn->cookie); \
RETURN(-EINVAL); \
} \
\
}
static inline int obd_create(struct lustre_handle *conn, struct obdo *obdo,
- struct lov_stripe_md **ea)
+ struct lov_stripe_md **ea,
+ struct obd_trans_info *oti)
{
struct obd_export *exp;
int rc;
OBD_CHECK_SETUP(conn, exp);
OBD_CHECK_OP(exp->exp_obd, create);
- rc = OBP(exp->exp_obd, create)(conn, obdo, ea);
+ rc = OBP(exp->exp_obd, create)(conn, obdo, ea, oti);
RETURN(rc);
}
static inline int obd_destroy(struct lustre_handle *conn, struct obdo *obdo,
- struct lov_stripe_md *ea)
+ struct lov_stripe_md *ea,
+ struct obd_trans_info *oti)
{
struct obd_export *exp;
int rc;
OBD_CHECK_SETUP(conn, exp);
OBD_CHECK_OP(exp->exp_obd, destroy);
- rc = OBP(exp->exp_obd, destroy)(conn, obdo, ea);
+ rc = OBP(exp->exp_obd, destroy)(conn, obdo, ea, oti);
RETURN(rc);
}
}
static inline int obd_close(struct lustre_handle *conn, struct obdo *obdo,
- struct lov_stripe_md *ea)
+ struct lov_stripe_md *ea,
+ struct obd_trans_info *oti)
{
struct obd_export *exp;
int rc;
OBD_CHECK_SETUP(conn, exp);
OBD_CHECK_OP(exp->exp_obd, close);
- rc = OBP(exp->exp_obd, close)(conn, obdo, ea);
+ rc = OBP(exp->exp_obd, close)(conn, obdo, ea, oti);
RETURN(rc);
}
static inline int obd_open(struct lustre_handle *conn, struct obdo *obdo,
- struct lov_stripe_md *ea)
+ struct lov_stripe_md *ea, struct obd_trans_info *oti)
{
struct obd_export *exp;
int rc;
OBD_CHECK_SETUP(conn, exp);
OBD_CHECK_OP(exp->exp_obd, open);
- rc = OBP(exp->exp_obd, open)(conn, obdo, ea);
+ rc = OBP(exp->exp_obd, open)(conn, obdo, ea, oti);
RETURN(rc);
}
static inline int obd_setattr(struct lustre_handle *conn, struct obdo *obdo,
- struct lov_stripe_md *ea)
+ struct lov_stripe_md *ea,
+ struct obd_trans_info *oti)
{
struct obd_export *exp;
int rc;
OBD_CHECK_SETUP(conn, exp);
OBD_CHECK_OP(exp->exp_obd, setattr);
- rc = OBP(exp->exp_obd, setattr)(conn, obdo, ea);
+ rc = OBP(exp->exp_obd, setattr)(conn, obdo, ea, oti);
RETURN(rc);
}
static inline int obd_connect(struct lustre_handle *conn,
- struct obd_device *obd, obd_uuid_t cluuid,
+ struct obd_device *obd, struct obd_uuid *cluuid,
struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
}
static inline int obd_punch(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *ea,
- obd_size start, obd_size end)
+ struct lov_stripe_md *ea, obd_size start,
+ obd_size end, struct obd_trans_info *oti)
{
struct obd_export *exp;
int rc;
OBD_CHECK_SETUP(conn, exp);
OBD_CHECK_OP(exp->exp_obd, punch);
- rc = OBP(exp->exp_obd, punch)(conn, oa, ea, start, end);
+ rc = OBP(exp->exp_obd, punch)(conn, oa, ea, start, end, oti);
RETURN(rc);
}
static inline int obd_brw(int cmd, struct lustre_handle *conn,
struct lov_stripe_md *ea, obd_count oa_bufs,
- struct brw_page *pg, struct obd_brw_set *set)
+ struct brw_page *pg, struct obd_brw_set *set,
+ struct obd_trans_info *oti)
{
struct obd_export *exp;
int rc;
LBUG();
}
- rc = OBP(exp->exp_obd, brw)(cmd, conn, ea, oa_bufs, pg, set);
+ rc = OBP(exp->exp_obd, brw)(cmd, conn, ea, oa_bufs, pg, set, oti);
RETURN(rc);
}
static inline int obd_preprw(int cmd, struct lustre_handle *conn,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *remote,
- struct niobuf_local *local, void **desc_private)
+ struct niobuf_local *local, void **desc_private,
+ struct obd_trans_info *oti)
{
struct obd_export *exp;
int rc;
OBD_CHECK_OP(exp->exp_obd, preprw);
rc = OBP(exp->exp_obd, preprw)(cmd, conn, objcount, obj, niocount,
- remote, local, desc_private);
+ remote, local, desc_private, oti);
RETURN(rc);
}
static inline int obd_commitrw(int cmd, struct lustre_handle *conn,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_local *local,
- void *desc_private)
+ void *desc_private, struct obd_trans_info *oti)
{
struct obd_export *exp;
int rc;
OBD_CHECK_OP(exp->exp_obd, commitrw);
rc = OBP(exp->exp_obd, commitrw)(cmd, conn, objcount, obj, niocount,
- local, desc_private);
+ local, desc_private, oti);
RETURN(rc);
}
static inline void obd_handle2oa(struct obdo *oa, struct lustre_handle *handle)
{
- if (handle->addr) {
+ if (handle && handle->addr) {
struct lustre_handle *oa_handle = obdo_handle(oa);
memcpy(oa_handle, handle, sizeof(*handle));
oa->o_valid |= OBD_MD_FLHANDLE;
dst->i_atime = src->o_atime;
if (valid & OBD_MD_FLMTIME)
dst->i_mtime = src->o_mtime;
- if (valid & OBD_MD_FLCTIME)
+ if (valid & OBD_MD_FLCTIME && src->o_ctime > dst->i_ctime)
dst->i_ctime = src->o_ctime;
if (valid & OBD_MD_FLSIZE)
dst->i_size = src->o_size;
/* I'm as embarrassed about this as you are.
*
* <shaver> // XXX do not look into _superhack with remaining eye
- * <shaver> // XXX if this were any uglier, I'd get my own show on MTV */
+ * <shaver> // XXX if this were any uglier, I'd get my own show on MTV */
extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
+extern void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp,
+ int dying_import);
-int class_register_type(struct obd_ops *ops, struct lprocfs_vars* vars,
+int class_register_type(struct obd_ops *ops, struct lprocfs_vars* vars,
char *nm);
int class_unregister_type(char *nm);
int class_name2dev(char *name);
-int class_uuid2dev(char *uuid);
-struct obd_device *class_uuid2obd(char *uuid);
+int class_uuid2dev(struct obd_uuid *uuid);
+struct obd_device *class_uuid2obd(struct obd_uuid *uuid);
struct obd_export *class_new_export(struct obd_device *obddev);
struct obd_type *class_get_type(char *name);
void class_put_type(struct obd_type *type);
void class_destroy_export(struct obd_export *exp);
int class_connect(struct lustre_handle *conn, struct obd_device *obd,
- obd_uuid_t cluuid);
+ struct obd_uuid *cluuid);
int class_disconnect(struct lustre_handle *conn);
void class_disconnect_all(struct obd_device *obddev);
void obd_statfs_pack(struct obd_statfs *tgt, struct obd_statfs *src);
void obd_statfs_unpack(struct obd_statfs *tgt, struct obd_statfs *src);
+
+struct obd_class_user_state {
+ struct obd_device *ocus_current_obd;
+ struct list_head ocus_conns;
+};
+
+struct obd_class_user_conn {
+ struct list_head ocuc_chain;
+ struct lustre_handle ocuc_conn;
+};
+
#endif
/* sysctl.c */
/* uuid.c */
typedef __u8 class_uuid_t[16];
-//int class_uuid_parse(obd_uuid_t in, class_uuid_t out);
-void class_uuid_unparse(class_uuid_t in, obd_uuid_t out);
-#endif /* __LINUX_CLASS_OBD_H */
+//int class_uuid_parse(struct obd_uuid in, class_uuid_t out);
+void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out);
+#endif /* __LINUX_OBD_CLASS_H */
#define OBD_ECHO_DEVICENAME "obdecho"
#define OBD_ECHO_CLIENT_DEVICENAME "echo_client"
+struct ec_object
+{
+ struct list_head eco_obj_chain;
+ struct obd_device *eco_device;
+ int eco_refcount;
+ int eco_deleted;
+ obd_id eco_id;
+ struct lov_stripe_md *eco_lsm;
+};
+
+struct ec_open_object
+{
+ struct list_head ecoo_exp_chain;
+ struct ec_object *ecoo_object;
+ struct obdo ecoo_oa;
+ __u64 ecoo_cookie;
+};
+
+struct ec_lock
+{
+ struct list_head ecl_exp_chain;
+ struct lustre_handle ecl_handle;
+ struct ldlm_extent ecl_extent;
+ __u32 ecl_mode;
+ struct ec_object *ecl_object;
+ __u64 ecl_cookie;
+};
+
#endif
#define OBD_FILTER_DEVICENAME "obdfilter"
#endif
+#define FILTER_LR_SERVER_SIZE 512
+
+#define FILTER_LR_CLIENT_START 8192
+#define FILTER_LR_CLIENT_SIZE 128
+
+#define FILTER_MOUNT_RECOV 2
+#define FILTER_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
+
+/* Data stored per server at the head of the last_rcvd file. In le32 order. */
+struct filter_server_data {
+ __u8 fsd_uuid[37]; /* server UUID */
+ __u8 fsd_uuid_padding[3]; /* unused */
+ __u64 fsd_last_objid; /* last completed transaction ID */
+ __u64 fsd_last_rcvd; /* last completed transaction ID */
+ __u64 fsd_mount_count; /* FILTER incarnation number */
+ __u8 fsd_padding[FILTER_LR_SERVER_SIZE - 64]; /* */
+};
+
+/* Data stored per client in the last_rcvd file. In le32 order. */
+struct filter_client_data {
+ __u8 fcd_uuid[37]; /* client UUID */
+ __u8 fcd_uuid_padding[3]; /* unused */
+ __u64 fcd_last_rcvd; /* last completed transaction ID */
+ __u64 fcd_mount_count; /* FILTER incarnation number */
+ __u64 fcd_last_xid; /* client RPC xid for the last transaction */
+ __u8 fcd_padding[FILTER_LR_CLIENT_SIZE - 64];
+};
+
/* In-memory access to client data from OST struct */
struct filter_export_data {
struct list_head fed_open_head; /* files to close on disconnect */
spinlock_t fed_lock; /* protects fed_open_head */
+ struct filter_client_data *fed_fcd;
+ int fed_lr_off;
};
/* file data for open files on OST */
#define FILTER_FLAG_DESTROY 0x0001 /* destroy dentry on last file close */
+
#endif
extern unsigned long obd_fail_loc;
extern unsigned long obd_timeout;
extern char obd_recovery_upcall[128];
+extern unsigned long obd_sync_filter;
#define OBD_FAIL_MDS 0x100
#define OBD_FAIL_MDS_HANDLE_UNPACK 0x101
#define OBD_FAIL_MDS_STATFS_PACK 0x11d
#define OBD_FAIL_MDS_STATFS_NET 0x11e
#define OBD_FAIL_MDS_GETATTR_NAME_NET 0x11f
+#define OBD_FAIL_MDS_ALL_REPLY_NET 0x120
+#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x121
#define OBD_FAIL_OST 0x200
#define OBD_FAIL_OST_CONNECT_NET 0x201
#define OBD_FAIL_MDS_ALL_NET 0x01000000
#define OBD_FAIL_OST_ALL_NET 0x02000000
-#define OBD_FAIL_CHECK(id) ((obd_fail_loc & OBD_FAIL_MASK_LOC) == (id) && \
- ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!=\
+#define OBD_FAIL_CHECK(id) (((obd_fail_loc & OBD_FAIL_MASK_LOC) == \
+ ((id) & OBD_FAIL_MASK_LOC)) && \
+ ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!= \
(OBD_FAILED | OBD_FAIL_ONCE)))
#define OBD_FAIL_RETURN(id, ret) \
+EXPORT_SYMBOL(dev_clear_rdonly);
--- linux-2.4.18-17.8.0/drivers/block/loop.c~dev_read_only 2002-12-06 14:52:29.000000000 -0800
+++ linux-2.4.18-17.8.0-zab/drivers/block/loop.c 2002-12-06 14:52:29.000000000 -0800
-@@ -491,6 +491,11 @@ static int loop_make_request(request_que
+@@ -491,6 +491,9 @@ static int loop_make_request(request_que
spin_unlock_irq(&lo->lo_lock);
if (rw == WRITE) {
-+#ifdef CONFIG_DEV_RDONLY
+ if (dev_check_rdonly(rbh->b_rdev))
+ goto err;
-+#endif
+
if (lo->lo_flags & LO_FLAGS_READ_ONLY)
goto err;
} else if (rw == READA) {
--- linux-2.4.18-17.8.0/drivers/ide/ide-disk.c~dev_read_only 2002-12-06 14:52:29.000000000 -0800
+++ linux-2.4.18-17.8.0-zab/drivers/ide/ide-disk.c 2002-12-06 14:52:29.000000000 -0800
-@@ -557,6 +557,12 @@ static ide_startstop_t lba_48_rw_disk (i
+@@ -557,6 +557,10 @@ static ide_startstop_t lba_48_rw_disk (i
*/
static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
{
-+#ifdef CONFIG_DEV_RDONLY
+ if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
+ ide_end_request(1, HWGROUP(drive));
+ return ide_stopped;
+ }
-+#endif
if (IDE_CONTROL_REG)
OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
--- /dev/null
+ drivers/block/blkpg.c | 38 ++++++++++++++++++++++++++++++++++++++
+ drivers/block/loop.c | 5 +++++
+ drivers/ide/ide-disk.c | 6 ++++++
+ 3 files changed, 49 insertions(+)
+
+--- linux-2.4.19-hp2_pnnl2/drivers/block/blkpg.c~dev_read_only_hp Sun Jan 19 18:51:12 2003
++++ linux-2.4.19-hp2_pnnl2-root/drivers/block/blkpg.c Sun Jan 19 18:52:28 2003
+@@ -310,6 +310,42 @@ int blk_ioctl(kdev_t dev, unsigned int c
+
+ EXPORT_SYMBOL(blk_ioctl);
+
++
++#define NUM_DEV_NO_WRITE 16
++static int dev_no_write[NUM_DEV_NO_WRITE];
++
++/*
++ * Debug code for turning block devices "read-only" (will discard writes
++ * silently). This is for filesystem crash/recovery testing.
++ */
++void dev_set_rdonly(kdev_t dev, int no_write)
++{
++ if (dev) {
++ printk(KERN_WARNING "Turning device %s read-only\n",
++ bdevname(dev));
++ dev_no_write[no_write] = 0xdead0000 + dev;
++ }
++}
++
++int dev_check_rdonly(kdev_t dev) {
++ int i;
++
++ for (i = 0; i < NUM_DEV_NO_WRITE; i++) {
++ if ((dev_no_write[i] & 0xffff0000) == 0xdead0000 &&
++ dev == (dev_no_write[i] & 0xffff))
++ return 1;
++ }
++ return 0;
++}
++
++void dev_clear_rdonly(int no_write) {
++ dev_no_write[no_write] = 0;
++}
++
++EXPORT_SYMBOL(dev_set_rdonly);
++EXPORT_SYMBOL(dev_check_rdonly);
++EXPORT_SYMBOL(dev_clear_rdonly);
++
+ /**
+ * get_last_sector()
+ *
+--- linux-2.4.19-hp2_pnnl2/drivers/block/loop.c~dev_read_only_hp Sun Jan 19 18:51:12 2003
++++ linux-2.4.19-hp2_pnnl2-root/drivers/block/loop.c Sun Jan 19 18:51:12 2003
+@@ -474,6 +474,9 @@ static int loop_make_request(request_que
+ spin_unlock_irq(&lo->lo_lock);
+
+ if (rw == WRITE) {
++ if (dev_check_rdonly(rbh->b_rdev))
++ goto err;
++
+ if (lo->lo_flags & LO_FLAGS_READ_ONLY)
+ goto err;
+ } else if (rw == READA) {
+--- linux-2.4.19-hp2_pnnl2/drivers/ide/ide-disk.c~dev_read_only_hp Sun Jan 19 18:51:12 2003
++++ linux-2.4.19-hp2_pnnl2-root/drivers/ide/ide-disk.c Sun Jan 19 18:51:12 2003
+@@ -551,6 +551,10 @@ static ide_startstop_t lba_48_rw_disk (i
+ */
+ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+ {
++ if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
++ ide_end_request(1, HWGROUP(drive));
++ return ide_stopped;
++ }
+ if (IDE_CONTROL_REG)
+ OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
+
+
+_
- 0 files changed
+ fs/ext3/Makefile | 2 ++
+ fs/ext3/super.c | 2 +-
+ include/linux/fs.h | 1 +
+ kernel/ksyms.c | 5 +++++
+ 4 files changed, 9 insertions(+), 1 deletion(-)
---- linux-2.4.18-17.8.0/fs/ext3/Makefile~exports 2002-12-06 14:52:29.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/ext3/Makefile 2002-12-06 14:52:29.000000000 -0800
+--- linux-2.4.19-hp2_pnnl2/fs/ext3/Makefile~exports Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/ext3/Makefile Sun Jan 19 18:52:38 2003
@@ -9,6 +9,8 @@
O_TARGET := ext3.o
obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o
obj-m := $(O_TARGET)
---- linux-2.4.18-17.8.0/fs/ext3/super.c~exports 2002-12-06 14:52:29.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/ext3/super.c 2002-12-06 14:52:29.000000000 -0800
-@@ -1746,7 +1746,7 @@ static void __exit exit_ext3_fs(void)
+--- linux-2.4.19-hp2_pnnl2/fs/ext3/super.c~exports Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/ext3/super.c Sun Jan 19 18:52:38 2003
+@@ -1744,7 +1744,7 @@ static void __exit exit_ext3_fs(void)
unregister_filesystem(&ext3_fs_type);
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
---- linux-2.4.18-17.8.0/include/linux/fs.h~exports 2002-12-06 14:52:29.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/include/linux/fs.h 2002-12-06 14:52:29.000000000 -0800
-@@ -1046,6 +1046,7 @@ extern int unregister_filesystem(struct
+--- linux-2.4.19-hp2_pnnl2/include/linux/fs.h~exports Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/include/linux/fs.h Sun Jan 19 18:52:38 2003
+@@ -1020,6 +1020,7 @@ extern int unregister_filesystem(struct
extern struct vfsmount *kern_mount(struct file_system_type *);
extern int may_umount(struct vfsmount *);
extern long do_mount(char *, char *, char *, unsigned long, void *);
extern void umount_tree(struct vfsmount *);
#define kern_umount mntput
---- linux-2.4.18-17.8.0/kernel/ksyms.c~exports 2002-12-06 14:52:29.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/kernel/ksyms.c 2002-12-06 14:52:29.000000000 -0800
-@@ -306,6 +306,11 @@ EXPORT_SYMBOL_GPL(buffermem_pages);
- EXPORT_SYMBOL_GPL(nr_free_pages);
- EXPORT_SYMBOL_GPL(page_cache_size);
+--- linux-2.4.19-hp2_pnnl2/kernel/ksyms.c~exports Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/kernel/ksyms.c Sun Jan 19 18:52:38 2003
+@@ -308,6 +308,11 @@ EXPORT_SYMBOL(dcache_dir_fsync);
+ EXPORT_SYMBOL(dcache_readdir);
+ EXPORT_SYMBOL(dcache_dir_ops);
+/* lustre */
+EXPORT_SYMBOL(panic_notifier_list);
--- /dev/null
+
+
+
+ fs/ext3/Makefile | 2 ++
+ fs/ext3/super.c | 2 +-
+ include/linux/fs.h | 1 +
+ kernel/ksyms.c | 4 ++++
+ 4 files changed, 9 insertions(+), 1 deletion(-)
+
+--- linux-2.4.19-hp2_pnnl2/fs/ext3/Makefile~exports Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/ext3/Makefile Sun Jan 19 18:52:38 2003
+@@ -9,6 +9,8 @@
+
+ O_TARGET := ext3.o
+
++export-objs := super.o
++
+ obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ ioctl.o namei.o super.o symlink.o
+ obj-m := $(O_TARGET)
+--- linux-2.4.19-hp2_pnnl2/fs/ext3/super.c~exports Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/ext3/super.c Sun Jan 19 18:52:38 2003
+@@ -1744,7 +1744,7 @@ static void __exit exit_ext3_fs(void)
+ unregister_filesystem(&ext3_fs_type);
+ }
+
+-EXPORT_NO_SYMBOLS;
++EXPORT_SYMBOL(ext3_bread);
+
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+--- linux-2.4.19-hp2_pnnl2/include/linux/fs.h~exports Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/include/linux/fs.h Sun Jan 19 18:52:38 2003
+@@ -1020,6 +1020,7 @@ extern int unregister_filesystem(struct
+ extern struct vfsmount *kern_mount(struct file_system_type *);
+ extern int may_umount(struct vfsmount *);
+ extern long do_mount(char *, char *, char *, unsigned long, void *);
++struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data);
+ extern void umount_tree(struct vfsmount *);
+
+ #define kern_umount mntput
+--- linux-2.4.19-hp2_pnnl2/kernel/ksyms.c~exports Sun Jan 19 18:52:38 2003
++++ linux-2.4.19-hp2_pnnl2-root/kernel/ksyms.c Sun Jan 19 18:52:38 2003
+@@ -308,6 +308,10 @@ EXPORT_SYMBOL(dcache_dir_fsync);
+ EXPORT_SYMBOL(dcache_readdir);
+ EXPORT_SYMBOL(dcache_dir_ops);
+
++/* lustre */
++EXPORT_SYMBOL(pagecache_lock_cacheline);
++EXPORT_SYMBOL(do_kern_mount);
++
+ /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */
+ EXPORT_SYMBOL(default_llseek);
+ EXPORT_SYMBOL(dentry_open);
+
+_
--- /dev/null
+--- lum/fs/inode.c Sat Oct 19 11:42:42 2002
++++ linux-2.4.18-uml35-ext3online/fs/inode.c Mon Oct 14 00:41:20 2002
+@@ -606,7 +553,8 @@ static void dispose_list(struct list_hea
+ /*
+ * Invalidate all inodes for a device.
+ */
+-static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
++static int invalidate_list(struct list_head *head, struct super_block * sb,
++ struct list_head * dispose, int show)
+ {
+ struct list_head *next;
+ int busy = 0, count = 0;
+@@ -631,6 +579,11 @@ static int invalidate_list(struct list_h
+ count++;
+ continue;
+ }
++ if (show)
++ printk(KERN_ERR
++ "inode busy: dev %s:%lu (%p) mode %o count %u\n",
++ kdevname(sb->s_dev), inode->i_ino, inode,
++ inode->i_mode, atomic_read(&inode->i_count));
+ busy = 1;
+ }
+ /* only unused inodes may be cached with i_count zero */
+@@ -649,22 +601,23 @@ static int invalidate_list(struct list_h
+ /**
+ * invalidate_inodes - discard the inodes on a device
+ * @sb: superblock
++ * @show: whether we should display any busy inodes found
+ *
+ * Discard all of the inodes for a given superblock. If the discard
+ * fails because there are busy inodes then a non zero value is returned.
+ * If the discard is successful all the inodes have been discarded.
+ */
+
+-int invalidate_inodes(struct super_block * sb)
++int invalidate_inodes(struct super_block * sb, int show)
+ {
+ int busy;
+ LIST_HEAD(throw_away);
+
+ spin_lock(&inode_lock);
+- busy = invalidate_list(&inode_in_use, sb, &throw_away);
+- busy |= invalidate_list(&inode_unused, sb, &throw_away);
+- busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+- busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
++ busy = invalidate_list(&inode_in_use, sb, &throw_away, show);
++ busy |= invalidate_list(&inode_unused, sb, &throw_away, show);
++ busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show);
++ busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show);
+ spin_unlock(&inode_lock);
+
+ dispose_list(&throw_away);
+@@ -690,7 +643,7 @@ int invalidate_device(kdev_t dev, int do
+ * hold).
+ */
+ shrink_dcache_sb(sb);
+- res = invalidate_inodes(sb);
++ res = invalidate_inodes(sb, 0);
+ drop_super(sb);
+ }
+ invalidate_buffers(dev);
+--- lum/fs/super.c.orig Sat Oct 19 11:42:42 2002
++++ lum/fs/super.c Wed Oct 30 17:16:55 2002
+@@ -936,7 +936,7 @@
+ lock_super(sb);
+ lock_kernel();
+ sb->s_flags &= ~MS_ACTIVE;
+- invalidate_inodes(sb); /* bad name - it should be evict_inodes() */
++ invalidate_inodes(sb, 0); /* bad name - it should be evict_inodes() */
+ if (sop) {
+ if (sop->write_super && sb->s_dirt)
+ sop->write_super(sb);
+@@ -945,7 +945,7 @@
+ }
+
+ /* Forget any remaining inodes */
+- if (invalidate_inodes(sb)) {
++ if (invalidate_inodes(sb, 1)) {
+ printk(KERN_ERR "VFS: Busy inodes after unmount. "
+ "Self-destruct in 5 seconds. Have a nice day...\n");
+ }
+--- lum/include/linux/fs.h Wed Oct 30 17:10:42 2002
++++ lum/include/linux/fs.h.orig Tue Oct 22 23:15:00 2002
+@@ -1261,7 +1261,7 @@
+ extern void set_buffer_flushtime(struct buffer_head *);
+ extern void balance_dirty(void);
+ extern int check_disk_change(kdev_t);
+-extern int invalidate_inodes(struct super_block *);
++extern int invalidate_inodes(struct super_block *, int);
+ extern int invalidate_device(kdev_t, int);
+ extern void invalidate_inode_pages(struct inode *);
+ extern void invalidate_inode_pages2(struct address_space *);
+--- lum/fs/smbfs/inode.c.orig Mon Feb 25 12:38:09 2002
++++ lum/fs/smbfs/inode.c Thu Feb 6 21:34:26 2003
+@@ -166,7 +166,7 @@
+ {
+ VERBOSE("\n");
+ shrink_dcache_sb(SB_of(server));
+- invalidate_inodes(SB_of(server));
++ invalidate_inodes(SB_of(server), 0);
+ }
+
+ /*
--- /dev/null
+--- linux-chaos/fs/inode.c.b_io_export Wed Jan 29 16:56:15 2003
++++ linux-chaos/fs/inode.c Wed Jan 29 16:56:27 2003
+@@ -66,7 +66,8 @@
+ * NOTE! You also have to own the lock if you change
+ * the i_state of an inode while it is in use..
+ */
+-static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
++spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
++EXPORT_SYMBOL(inode_lock);
+
+ /*
+ * Statistics gathering..
+--- linux-chaos/fs/Makefile.b_io_export Wed Jan 29 16:56:45 2003
++++ linux-chaos/fs/Makefile Wed Jan 29 16:56:53 2003
+@@ -7,7 +7,7 @@
+
+ O_TARGET := fs.o
+
+-export-objs := filesystems.o open.o dcache.o buffer.o
++export-objs := filesystems.o open.o dcache.o buffer.o inode.o
+ mod-subdirs := nls
+
+ obj-y := open.o read_write.o devices.o file_table.o buffer.o \
+--- linux-chaos/mm/filemap.c.b_io_export Wed Jan 29 16:50:39 2003
++++ linux-chaos/mm/filemap.c Wed Jan 29 16:51:11 2003
+@@ -65,6 +65,7 @@
+ * pagecache_lock
+ */
+ spinlock_cacheline_t pagemap_lru_lock_cacheline = {SPIN_LOCK_UNLOCKED};
++EXPORT_SYMBOL(pagemap_lru_lock_cacheline);
+
+ #define CLUSTER_PAGES (1 << page_cluster)
+ #define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster)
+--- linux-chaos/mm/vmscan.c.b_io_export Wed Jan 29 16:51:58 2003
++++ linux-chaos/mm/vmscan.c Wed Jan 29 16:55:16 2003
+@@ -839,6 +839,7 @@
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&kswapd_done, &wait);
+ }
++EXPORT_SYMBOL(wakeup_kswapd);
+
+ static void wakeup_memwaiters(void)
+ {
+--- linux-chaos/mm/Makefile.b_io_export Wed Jan 29 16:52:46 2003
++++ linux-chaos/mm/Makefile Wed Jan 29 16:54:23 2003
+@@ -9,7 +9,7 @@
+
+ O_TARGET := mm.o
+
+-export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o
++export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o vmscan.c
+
+ obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
+ vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
+--- linux-chaos/mm/page_alloc.c.b_io_export Wed Jan 29 17:00:32 2003
++++ linux-chaos/mm/page_alloc.c Wed Jan 29 17:01:31 2003
+@@ -31,6 +31,7 @@
+ int nr_inactive_dirty_pages;
+ int nr_inactive_clean_pages;
+ pg_data_t *pgdat_list;
++EXPORT_SYMBOL(pgdat_list);
+
+ /*
+ * The zone_table array is used to look up the address of the
--- /dev/null
+
+
+
+ fs/jbd/commit.c | 27 +++++++++++++++++++++---
+ fs/jbd/journal.c | 1
+ fs/jbd/transaction.c | 56 ++++++++++++++++++++++++++++++++++++++++-----------
+ include/linux/jbd.h | 20 ++++++++++++++++++
+ 4 files changed, 90 insertions(+), 14 deletions(-)
+
+--- linux-2.4.19/fs/jbd/commit.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/jbd/commit.c Sun Jan 19 19:46:42 2003
+@@ -475,7 +475,7 @@ start_journal_io:
+ transaction's t_log_list queue, and metadata buffers are on
+ the t_iobuf_list queue.
+
+- Wait for the transactions in reverse order. That way we are
++ Wait for the buffers in reverse order. That way we are
+ less likely to be woken up until all IOs have completed, and
+ so we incur less scheduling load.
+ */
+@@ -566,8 +566,10 @@ start_journal_io:
+
+ jbd_debug(3, "JBD: commit phase 6\n");
+
+- if (is_journal_aborted(journal))
++ if (is_journal_aborted(journal)) {
++ unlock_journal(journal);
+ goto skip_commit;
++ }
+
+ /* Done it all: now write the commit record. We should have
+ * cleaned up our previous buffers by now, so if we are in abort
+@@ -577,6 +579,7 @@ start_journal_io:
+ descriptor = journal_get_descriptor_buffer(journal);
+ if (!descriptor) {
+ __journal_abort_hard(journal);
++ unlock_journal(journal);
+ goto skip_commit;
+ }
+
+@@ -600,7 +603,6 @@ start_journal_io:
+ put_bh(bh); /* One for getblk() */
+ journal_unlock_journal_head(descriptor);
+ }
+- lock_journal(journal);
+
+ /* End of a transaction! Finally, we can do checkpoint
+ processing: any buffers committed as a result of this
+@@ -609,6 +611,25 @@ start_journal_io:
+
+ skip_commit:
+
++ /* Call any callbacks that had been registered for handles in this
++ * transaction. It is up to the callback to free any allocated
++ * memory.
++ */
++ if (!list_empty(&commit_transaction->t_jcb)) {
++ struct list_head *p, *n;
++ int error = is_journal_aborted(journal);
++
++ list_for_each_safe(p, n, &commit_transaction->t_jcb) {
++ struct journal_callback *jcb;
++
++ jcb = list_entry(p, struct journal_callback, jcb_list);
++ list_del(p);
++ jcb->jcb_func(jcb, error);
++ }
++ }
++
++ lock_journal(journal);
++
+ jbd_debug(3, "JBD: commit phase 7\n");
+
+ J_ASSERT(commit_transaction->t_sync_datalist == NULL);
+--- linux-2.4.19/fs/jbd/journal.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/jbd/journal.c Sun Jan 19 19:46:42 2003
+@@ -58,6 +58,7 @@ EXPORT_SYMBOL(journal_sync_buffer);
+ #endif
+ EXPORT_SYMBOL(journal_flush);
+ EXPORT_SYMBOL(journal_revoke);
++EXPORT_SYMBOL(journal_callback_set);
+
+ EXPORT_SYMBOL(journal_init_dev);
+ EXPORT_SYMBOL(journal_init_inode);
+--- linux-2.4.19/fs/jbd/transaction.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/jbd/transaction.c Sun Jan 19 19:46:42 2003
+@@ -57,6 +57,7 @@ static transaction_t * get_transaction (
+ transaction->t_state = T_RUNNING;
+ transaction->t_tid = journal->j_transaction_sequence++;
+ transaction->t_expires = jiffies + journal->j_commit_interval;
++ INIT_LIST_HEAD(&transaction->t_jcb);
+
+ /* Set up the commit timer for the new transaction. */
+ J_ASSERT (!journal->j_commit_timer_active);
+@@ -201,6 +202,20 @@ repeat_locked:
+ return 0;
+ }
+
++/* Allocate a new handle. This should probably be in a slab... */
++static handle_t *new_handle(int nblocks)
++{
++ handle_t *handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
++ if (!handle)
++ return NULL;
++ memset(handle, 0, sizeof (handle_t));
++ handle->h_buffer_credits = nblocks;
++ handle->h_ref = 1;
++ INIT_LIST_HEAD(&handle->h_jcb);
++
++ return handle;
++}
++
+ /*
+ * Obtain a new handle.
+ *
+@@ -227,14 +242,11 @@ handle_t *journal_start(journal_t *journ
+ handle->h_ref++;
+ return handle;
+ }
+-
+- handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
++
++ handle = new_handle(nblocks);
+ if (!handle)
+ return ERR_PTR(-ENOMEM);
+- memset (handle, 0, sizeof (handle_t));
+
+- handle->h_buffer_credits = nblocks;
+- handle->h_ref = 1;
+ current->journal_info = handle;
+
+ err = start_this_handle(journal, handle);
+@@ -333,14 +345,11 @@ handle_t *journal_try_start(journal_t *j
+
+ if (is_journal_aborted(journal))
+ return ERR_PTR(-EIO);
+-
+- handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
++
++ handle = new_handle(nblocks);
+ if (!handle)
+ return ERR_PTR(-ENOMEM);
+- memset (handle, 0, sizeof (handle_t));
+
+- handle->h_buffer_credits = nblocks;
+- handle->h_ref = 1;
+ current->journal_info = handle;
+
+ err = try_start_this_handle(journal, handle);
+@@ -1328,6 +1337,28 @@ out:
+ #endif
+
+ /*
++ * Register a callback function for this handle. The function will be
++ * called when the transaction that this handle is part of has been
++ * committed to disk with the original callback data struct and the
++ * error status of the journal as parameters. There is no guarantee of
++ * ordering between handles within a single transaction, nor between
++ * callbacks registered on the same handle.
++ *
++ * The caller is responsible for allocating the journal_callback struct.
++ * This is to allow the caller to add as much extra data to the callback
++ * as needed, but reduce the overhead of multiple allocations. The caller
++ * allocated struct must start with a struct journal_callback at offset 0,
++ * and has the caller-specific data afterwards.
++ */
++void journal_callback_set(handle_t *handle,
++ void (*func)(struct journal_callback *jcb, int error),
++ struct journal_callback *jcb)
++{
++ list_add(&jcb->jcb_list, &handle->h_jcb);
++ jcb->jcb_func = func;
++}
++
++/*
+ * All done for a particular handle.
+ *
+ * There is not much action needed here. We just return any remaining
+@@ -1393,7 +1424,10 @@ int journal_stop(handle_t *handle)
+ wake_up(&journal->j_wait_transaction_locked);
+ }
+
+- /*
++ /* Move callbacks from the handle to the transaction. */
++ list_splice(&handle->h_jcb, &transaction->t_jcb);
++
++ /*
+ * If the handle is marked SYNC, we need to set another commit
+ * going! We also want to force a commit if the current
+ * transaction is occupying too much of the log, or if the
+--- linux-2.4.19/include/linux/jbd.h~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/include/linux/jbd.h Sun Jan 19 19:46:42 2003
+@@ -249,6 +249,13 @@ static inline struct journal_head *bh2jh
+ return bh->b_private;
+ }
+
++#define HAVE_JOURNAL_CALLBACK_STATUS
++struct journal_callback {
++ struct list_head jcb_list;
++ void (*jcb_func)(struct journal_callback *jcb, int error);
++ /* user data goes here */
++};
++
+ struct jbd_revoke_table_s;
+
+ /* The handle_t type represents a single atomic update being performed
+@@ -279,6 +286,12 @@ struct handle_s
+ operations */
+ int h_err;
+
++ /* List of application registered callbacks for this handle.
++ * The function(s) will be called after the transaction that
++ * this handle is part of has been committed to disk.
++ */
++ struct list_head h_jcb;
++
+ /* Flags */
+ unsigned int h_sync: 1; /* sync-on-close */
+ unsigned int h_jdata: 1; /* force data journaling */
+@@ -398,6 +411,10 @@ struct transaction_s
+
+ /* How many handles used this transaction? */
+ int t_handle_count;
++
++ /* List of registered callback functions for this transaction.
++ * Called when the transaction is committed. */
++ struct list_head t_jcb;
+ };
+
+
+@@ -646,6 +663,9 @@ extern int journal_flushpage(journal_t
+ extern int journal_try_to_free_buffers(journal_t *, struct page *, int);
+ extern int journal_stop(handle_t *);
+ extern int journal_flush (journal_t *);
++extern void journal_callback_set(handle_t *handle,
++ void (*fn)(struct journal_callback *,int),
++ struct journal_callback *jcb);
+
+ extern void journal_lock_updates (journal_t *);
+ extern void journal_unlock_updates (journal_t *);
--- /dev/null
+ arch/ia64/mm/init.c | 6 +++++
+ include/linux/slab.h | 1
+ kernel/ksyms.c | 1
+ mm/slab.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ 4 files changed, 61 insertions(+)
+
+--- linux-2.4.19-hp2_pnnl2/arch/ia64/mm/init.c~kmem_cache_validate_hp Sun Jan 19 18:59:23 2003
++++ linux-2.4.19-hp2_pnnl2-root/arch/ia64/mm/init.c Sun Jan 19 18:59:24 2003
+@@ -44,6 +44,12 @@ unsigned long vmalloc_end = VMALLOC_END_
+ static struct page *vmem_map;
+ static unsigned long num_dma_physpages;
+
++struct page *check_get_page(unsigned long kaddr)
++{
++#warning FIXME: Lustre team, is this solid?
++ return virt_to_page(kaddr);
++}
++
+ int
+ do_check_pgt_cache (int low, int high)
+ {
+--- linux-2.4.19-hp2_pnnl2/include/linux/slab.h~kmem_cache_validate_hp Sun Jan 19 18:59:23 2003
++++ linux-2.4.19-hp2_pnnl2-root/include/linux/slab.h Sun Jan 19 19:01:07 2003
+@@ -56,6 +56,7 @@ extern kmem_cache_t *kmem_cache_create(c
+ extern int kmem_cache_destroy(kmem_cache_t *);
+ extern int kmem_cache_shrink(kmem_cache_t *);
+ extern void *kmem_cache_alloc(kmem_cache_t *, int);
++extern int kmem_cache_validate(kmem_cache_t *cachep, void *objp);
+ extern void kmem_cache_free(kmem_cache_t *, void *);
+ extern unsigned int kmem_cache_size(kmem_cache_t *);
+
+--- linux-2.4.19-hp2_pnnl2/kernel/ksyms.c~kmem_cache_validate_hp Sun Jan 19 18:59:23 2003
++++ linux-2.4.19-hp2_pnnl2-root/kernel/ksyms.c Sun Jan 19 19:00:32 2003
+@@ -118,6 +118,7 @@ EXPORT_SYMBOL(kmem_find_general_cachep);
+ EXPORT_SYMBOL(kmem_cache_create);
+ EXPORT_SYMBOL(kmem_cache_destroy);
+ EXPORT_SYMBOL(kmem_cache_shrink);
++EXPORT_SYMBOL(kmem_cache_validate);
+ EXPORT_SYMBOL(kmem_cache_alloc);
+ EXPORT_SYMBOL(kmem_cache_free);
+ EXPORT_SYMBOL(kmem_cache_size);
+--- linux-2.4.19-hp2_pnnl2/mm/slab.c~kmem_cache_validate_hp Sun Jan 19 18:59:23 2003
++++ linux-2.4.19-hp2_pnnl2-root/mm/slab.c Sun Jan 19 18:59:24 2003
+@@ -1207,6 +1207,59 @@ failed:
+ * Called with the cache-lock held.
+ */
+
++extern struct page *check_get_page(unsigned long kaddr);
++struct page *page_mem_map(struct page *page);
++static int kmem_check_cache_obj (kmem_cache_t * cachep,
++ slab_t *slabp, void * objp)
++{
++ int i;
++ unsigned int objnr;
++
++#if DEBUG
++ if (cachep->flags & SLAB_RED_ZONE) {
++ objp -= BYTES_PER_WORD;
++ if ( *(unsigned long *)objp != RED_MAGIC2)
++ /* Either write before start, or a double free. */
++ return 0;
++ if (*(unsigned long *)(objp+cachep->objsize -
++ BYTES_PER_WORD) != RED_MAGIC2)
++ /* Either write past end, or a double free. */
++ return 0;
++ }
++#endif
++
++ objnr = (objp-slabp->s_mem)/cachep->objsize;
++ if (objnr >= cachep->num)
++ return 0;
++ if (objp != slabp->s_mem + objnr*cachep->objsize)
++ return 0;
++
++ /* Check slab's freelist to see if this obj is there. */
++ for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
++ if (i == objnr)
++ return 0;
++ }
++ return 1;
++}
++
++
++int kmem_cache_validate(kmem_cache_t *cachep, void *objp)
++{
++ struct page *page = check_get_page((unsigned long)objp);
++
++ if (!VALID_PAGE(page))
++ return 0;
++
++ if (!PageSlab(page))
++ return 0;
++
++ /* XXX check for freed slab objects ? */
++ if (!kmem_check_cache_obj(cachep, GET_PAGE_SLAB(page), objp))
++ return 0;
++
++ return (cachep == GET_PAGE_CACHE(page));
++}
++
+ #if DEBUG
+ static int kmem_extra_free_checks (kmem_cache_t * cachep,
+ slab_t *slabp, void * objp)
+
+_
- 0 files changed
+ include/linux/lustre_version.h | 1 +
+ 1 files changed, 1 insertion(+)
---- /dev/null 2002-08-30 16:31:37.000000000 -0700
-+++ linux-2.4.18-17.8.0-zab/include/linux/lustre_version.h 2002-12-06 14:52:30.000000000 -0800
+--- /dev/null Fri Aug 30 17:31:37 2002
++++ linux-2.4.18-18.8.0-l7-root/include/linux/lustre_version.h Mon Jan 20 12:24:45 2003
@@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 5
++#define LUSTRE_KERNEL_VERSION 10
_
- arch/i386/mm/init.c | 6 +
- arch/ia64/mm/init.c | 6 +
- drivers/block/blkpg.c | 35 ++++++
+
+
+
+ arch/i386/mm/init.c | 6
+ arch/ia64/mm/init.c | 6
+ drivers/block/blkpg.c | 35 ++++
drivers/block/loop.c | 5
- drivers/ide/ide-disk.c | 6 +
+ drivers/ide/ide-disk.c | 6
fs/dcache.c | 1
fs/ext3/Makefile | 2
fs/ext3/super.c | 2
- fs/jbd/commit.c | 27 ++++-
- fs/jbd/journal.c | 1
- fs/jbd/transaction.c | 56 ++++++++--
- fs/namei.c | 215 ++++++++++++++++++++++++++++++++---------
+ fs/namei.c | 296 ++++++++++++++++++++++++++++++++++-------
fs/nfsd/vfs.c | 2
- fs/open.c | 63 +++++++++---
- fs/stat.c | 30 ++++-
+ fs/open.c | 63 ++++++--
+ fs/stat.c | 30 +++-
include/linux/blkdev.h | 4
- include/linux/dcache.h | 31 +++++
- include/linux/fs.h | 14 ++
- include/linux/jbd.h | 20 +++
+ include/linux/dcache.h | 31 ++++
+ include/linux/fs.h | 23 +++
include/linux/lustre_version.h | 1
include/linux/slab.h | 1
- kernel/ksyms.c | 7 +
- mm/slab.c | 53 ++++++++++
- 23 files changed, 502 insertions(+), 86 deletions(-)
+ kernel/ksyms.c | 7
+ mm/slab.c | 53 +++++++
+ 19 files changed, 501 insertions(+), 73 deletions(-)
--- /dev/null Fri Aug 30 17:31:37 2002
-+++ linux-2.4.19-root/include/linux/lustre_version.h Sun Dec 15 16:58:43 2002
++++ linux-2.4.19-root/include/linux/lustre_version.h Sun Jan 19 19:54:00 2003
@@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 5
---- linux-2.4.19/arch/ia64/mm/init.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/arch/ia64/mm/init.c Sun Dec 15 16:58:43 2002
++#define LUSTRE_KERNEL_VERSION 7
+--- linux-2.4.19/arch/ia64/mm/init.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/arch/ia64/mm/init.c Sun Jan 19 19:46:42 2003
@@ -37,6 +37,12 @@ unsigned long MAX_DMA_ADDRESS = PAGE_OFF
static unsigned long totalram_pages;
int
do_check_pgt_cache (int low, int high)
{
---- linux-2.4.19/arch/i386/mm/init.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/arch/i386/mm/init.c Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/arch/i386/mm/init.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/arch/i386/mm/init.c Sun Jan 19 19:46:42 2003
@@ -43,6 +43,12 @@ unsigned long highstart_pfn, highend_pfn
static unsigned long totalram_pages;
static unsigned long totalhigh_pages;
int do_check_pgt_cache(int low, int high)
{
int freed = 0;
---- linux-2.4.19/drivers/block/blkpg.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/drivers/block/blkpg.c Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/drivers/block/blkpg.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/drivers/block/blkpg.c Sun Jan 19 19:46:42 2003
@@ -296,3 +296,38 @@ int blk_ioctl(kdev_t dev, unsigned int c
}
+EXPORT_SYMBOL(dev_set_rdonly);
+EXPORT_SYMBOL(dev_check_rdonly);
+EXPORT_SYMBOL(dev_clear_rdonly);
---- linux-2.4.19/drivers/block/loop.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/drivers/block/loop.c Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/drivers/block/loop.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/drivers/block/loop.c Sun Jan 19 19:46:42 2003
@@ -474,6 +474,11 @@ static int loop_make_request(request_que
spin_unlock_irq(&lo->lo_lock);
if (lo->lo_flags & LO_FLAGS_READ_ONLY)
goto err;
} else if (rw == READA) {
---- linux-2.4.19/drivers/ide/ide-disk.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/drivers/ide/ide-disk.c Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/drivers/ide/ide-disk.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/drivers/ide/ide-disk.c Sun Jan 19 19:46:42 2003
@@ -551,6 +551,12 @@ static ide_startstop_t lba_48_rw_disk (i
*/
static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
if (IDE_CONTROL_REG)
OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
---- linux-2.4.19/fs/ext3/Makefile~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/ext3/Makefile Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/ext3/Makefile~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/ext3/Makefile Sun Jan 19 19:46:42 2003
@@ -9,6 +9,8 @@
O_TARGET := ext3.o
obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o
obj-m := $(O_TARGET)
---- linux-2.4.19/fs/ext3/super.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/ext3/super.c Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/ext3/super.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/ext3/super.c Sun Jan 19 19:46:42 2003
@@ -1744,7 +1744,7 @@ static void __exit exit_ext3_fs(void)
unregister_filesystem(&ext3_fs_type);
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
---- linux-2.4.19/fs/jbd/commit.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/jbd/commit.c Sun Dec 15 16:58:43 2002
-@@ -475,7 +475,7 @@ start_journal_io:
- transaction's t_log_list queue, and metadata buffers are on
- the t_iobuf_list queue.
-
-- Wait for the transactions in reverse order. That way we are
-+ Wait for the buffers in reverse order. That way we are
- less likely to be woken up until all IOs have completed, and
- so we incur less scheduling load.
- */
-@@ -566,8 +566,10 @@ start_journal_io:
-
- jbd_debug(3, "JBD: commit phase 6\n");
-
-- if (is_journal_aborted(journal))
-+ if (is_journal_aborted(journal)) {
-+ unlock_journal(journal);
- goto skip_commit;
-+ }
-
- /* Done it all: now write the commit record. We should have
- * cleaned up our previous buffers by now, so if we are in abort
-@@ -577,6 +579,7 @@ start_journal_io:
- descriptor = journal_get_descriptor_buffer(journal);
- if (!descriptor) {
- __journal_abort_hard(journal);
-+ unlock_journal(journal);
- goto skip_commit;
- }
-
-@@ -600,7 +603,6 @@ start_journal_io:
- put_bh(bh); /* One for getblk() */
- journal_unlock_journal_head(descriptor);
- }
-- lock_journal(journal);
-
- /* End of a transaction! Finally, we can do checkpoint
- processing: any buffers committed as a result of this
-@@ -609,6 +611,25 @@ start_journal_io:
-
- skip_commit:
-
-+ /* Call any callbacks that had been registered for handles in this
-+ * transaction. It is up to the callback to free any allocated
-+ * memory.
-+ */
-+ if (!list_empty(&commit_transaction->t_jcb)) {
-+ struct list_head *p, *n;
-+ int error = is_journal_aborted(journal);
-+
-+ list_for_each_safe(p, n, &commit_transaction->t_jcb) {
-+ struct journal_callback *jcb;
-+
-+ jcb = list_entry(p, struct journal_callback, jcb_list);
-+ list_del(p);
-+ jcb->jcb_func(jcb, error);
-+ }
-+ }
-+
-+ lock_journal(journal);
-+
- jbd_debug(3, "JBD: commit phase 7\n");
-
- J_ASSERT(commit_transaction->t_sync_datalist == NULL);
---- linux-2.4.19/fs/jbd/journal.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/jbd/journal.c Sun Dec 15 16:58:43 2002
-@@ -58,6 +58,7 @@ EXPORT_SYMBOL(journal_sync_buffer);
- #endif
- EXPORT_SYMBOL(journal_flush);
- EXPORT_SYMBOL(journal_revoke);
-+EXPORT_SYMBOL(journal_callback_set);
-
- EXPORT_SYMBOL(journal_init_dev);
- EXPORT_SYMBOL(journal_init_inode);
---- linux-2.4.19/fs/jbd/transaction.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/jbd/transaction.c Sun Dec 15 16:58:43 2002
-@@ -57,6 +57,7 @@ static transaction_t * get_transaction (
- transaction->t_state = T_RUNNING;
- transaction->t_tid = journal->j_transaction_sequence++;
- transaction->t_expires = jiffies + journal->j_commit_interval;
-+ INIT_LIST_HEAD(&transaction->t_jcb);
-
- /* Set up the commit timer for the new transaction. */
- J_ASSERT (!journal->j_commit_timer_active);
-@@ -201,6 +202,20 @@ repeat_locked:
- return 0;
- }
-
-+/* Allocate a new handle. This should probably be in a slab... */
-+static handle_t *new_handle(int nblocks)
-+{
-+ handle_t *handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+ if (!handle)
-+ return NULL;
-+ memset(handle, 0, sizeof (handle_t));
-+ handle->h_buffer_credits = nblocks;
-+ handle->h_ref = 1;
-+ INIT_LIST_HEAD(&handle->h_jcb);
-+
-+ return handle;
-+}
-+
- /*
- * Obtain a new handle.
- *
-@@ -227,14 +242,11 @@ handle_t *journal_start(journal_t *journ
- handle->h_ref++;
- return handle;
- }
--
-- handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+
-+ handle = new_handle(nblocks);
- if (!handle)
- return ERR_PTR(-ENOMEM);
-- memset (handle, 0, sizeof (handle_t));
-
-- handle->h_buffer_credits = nblocks;
-- handle->h_ref = 1;
- current->journal_info = handle;
-
- err = start_this_handle(journal, handle);
-@@ -333,14 +345,11 @@ handle_t *journal_try_start(journal_t *j
-
- if (is_journal_aborted(journal))
- return ERR_PTR(-EIO);
--
-- handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+
-+ handle = new_handle(nblocks);
- if (!handle)
- return ERR_PTR(-ENOMEM);
-- memset (handle, 0, sizeof (handle_t));
-
-- handle->h_buffer_credits = nblocks;
-- handle->h_ref = 1;
- current->journal_info = handle;
-
- err = try_start_this_handle(journal, handle);
-@@ -1328,6 +1337,28 @@ out:
- #endif
-
- /*
-+ * Register a callback function for this handle. The function will be
-+ * called when the transaction that this handle is part of has been
-+ * committed to disk with the original callback data struct and the
-+ * error status of the journal as parameters. There is no guarantee of
-+ * ordering between handles within a single transaction, nor between
-+ * callbacks registered on the same handle.
-+ *
-+ * The caller is responsible for allocating the journal_callback struct.
-+ * This is to allow the caller to add as much extra data to the callback
-+ * as needed, but reduce the overhead of multiple allocations. The caller
-+ * allocated struct must start with a struct journal_callback at offset 0,
-+ * and has the caller-specific data afterwards.
-+ */
-+void journal_callback_set(handle_t *handle,
-+ void (*func)(struct journal_callback *jcb, int error),
-+ struct journal_callback *jcb)
-+{
-+ list_add(&jcb->jcb_list, &handle->h_jcb);
-+ jcb->jcb_func = func;
-+}
-+
-+/*
- * All done for a particular handle.
- *
- * There is not much action needed here. We just return any remaining
-@@ -1393,7 +1424,10 @@ int journal_stop(handle_t *handle)
- wake_up(&journal->j_wait_transaction_locked);
- }
-
-- /*
-+ /* Move callbacks from the handle to the transaction. */
-+ list_splice(&handle->h_jcb, &transaction->t_jcb);
-+
-+ /*
- * If the handle is marked SYNC, we need to set another commit
- * going! We also want to force a commit if the current
- * transaction is occupying too much of the log, or if the
---- linux-2.4.19/include/linux/blkdev.h~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/include/linux/blkdev.h Sun Dec 15 17:02:24 2002
+--- linux-2.4.19/include/linux/blkdev.h~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/include/linux/blkdev.h Sun Jan 19 21:05:55 2003
@@ -240,4 +240,8 @@ static inline unsigned int block_size(kd
return retval;
}
+int dev_check_rdonly(kdev_t);
+void dev_clear_rdonly(int);
#endif
---- linux-2.4.19/include/linux/slab.h~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/include/linux/slab.h Sun Dec 15 17:02:12 2002
+--- linux-2.4.19/include/linux/slab.h~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/include/linux/slab.h Sun Jan 19 21:05:52 2003
@@ -57,6 +57,7 @@ extern int kmem_cache_destroy(kmem_cache
extern int kmem_cache_shrink(kmem_cache_t *);
extern void *kmem_cache_alloc(kmem_cache_t *, int);
extern void *kmalloc(size_t, int);
extern void kfree(const void *);
---- linux-2.4.19/include/linux/jbd.h~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/include/linux/jbd.h Sun Dec 15 16:58:43 2002
-@@ -249,6 +249,13 @@ static inline struct journal_head *bh2jh
- return bh->b_private;
- }
-
-+#define HAVE_JOURNAL_CALLBACK_STATUS
-+struct journal_callback {
-+ struct list_head jcb_list;
-+ void (*jcb_func)(struct journal_callback *jcb, int error);
-+ /* user data goes here */
-+};
-+
- struct jbd_revoke_table_s;
-
- /* The handle_t type represents a single atomic update being performed
-@@ -279,6 +286,12 @@ struct handle_s
- operations */
- int h_err;
-
-+ /* List of application registered callbacks for this handle.
-+ * The function(s) will be called after the transaction that
-+ * this handle is part of has been committed to disk.
-+ */
-+ struct list_head h_jcb;
-+
- /* Flags */
- unsigned int h_sync: 1; /* sync-on-close */
- unsigned int h_jdata: 1; /* force data journaling */
-@@ -398,6 +411,10 @@ struct transaction_s
-
- /* How many handles used this transaction? */
- int t_handle_count;
-+
-+ /* List of registered callback functions for this transaction.
-+ * Called when the transaction is committed. */
-+ struct list_head t_jcb;
- };
-
-
-@@ -646,6 +663,9 @@ extern int journal_flushpage(journal_t
- extern int journal_try_to_free_buffers(journal_t *, struct page *, int);
- extern int journal_stop(handle_t *);
- extern int journal_flush (journal_t *);
-+extern void journal_callback_set(handle_t *handle,
-+ void (*fn)(struct journal_callback *,int),
-+ struct journal_callback *jcb);
-
- extern void journal_lock_updates (journal_t *);
- extern void journal_unlock_updates (journal_t *);
---- linux-2.4.19/kernel/ksyms.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/kernel/ksyms.c Sun Dec 15 17:03:55 2002
+--- linux-2.4.19/kernel/ksyms.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/kernel/ksyms.c Sun Jan 19 19:46:42 2003
@@ -264,6 +264,7 @@ EXPORT_SYMBOL(read_cache_page);
EXPORT_SYMBOL(set_page_dirty);
EXPORT_SYMBOL(vfs_readlink);
/* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */
EXPORT_SYMBOL(default_llseek);
EXPORT_SYMBOL(dentry_open);
---- linux-2.4.19/include/linux/dcache.h~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/include/linux/dcache.h Sun Dec 15 17:02:11 2002
+--- linux-2.4.19/include/linux/dcache.h~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/include/linux/dcache.h Sun Jan 19 19:46:42 2003
@@ -6,6 +6,34 @@
#include <asm/atomic.h>
#include <linux/mount.h>
};
/* the dentry parameter passed to d_hash and d_compare is the parent
---- linux-2.4.19/include/linux/fs.h~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/include/linux/fs.h Sun Dec 15 17:02:11 2002
+--- linux-2.4.19/include/linux/fs.h~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/include/linux/fs.h Sun Jan 19 21:05:40 2003
@@ -541,6 +541,7 @@ struct file {
/* needed for tty driver, and maybe others */
/*
* File types
-@@ -853,6 +856,7 @@ struct file_operations {
+@@ -853,16 +856,28 @@ struct file_operations {
struct inode_operations {
int (*create) (struct inode *,struct dentry *,int);
struct dentry * (*lookup) (struct inode *,struct dentry *);
+ struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
int (*link) (struct dentry *,struct inode *,struct dentry *);
++ int (*link2) (struct inode *,struct inode *, const char *, int);
int (*unlink) (struct inode *,struct dentry *);
++ int (*unlink2) (struct inode *, char *, int);
int (*symlink) (struct inode *,struct dentry *,const char *);
-@@ -863,6 +867,8 @@ struct inode_operations {
++ int (*symlink2) (struct inode *,const char *, int, const char *);
+ int (*mkdir) (struct inode *,struct dentry *,int);
++ int (*mkdir2) (struct inode *,char *, int,int);
+ int (*rmdir) (struct inode *,struct dentry *);
++ int (*rmdir2) (struct inode *, char *, int);
+ int (*mknod) (struct inode *,struct dentry *,int,int);
++ int (*mknod2) (struct inode *,char *, int,int,int);
+ int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
++ int (*rename2) (struct inode *, struct inode *,
++ char *oldname, int oldlen,
++ char *newname, int newlen);
int (*readlink) (struct dentry *, char *,int);
int (*follow_link) (struct dentry *, struct nameidata *);
+ int (*follow_link2) (struct dentry *, struct nameidata *,
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int);
int (*revalidate) (struct dentry *);
-@@ -999,6 +1005,7 @@ extern int unregister_filesystem(struct
+@@ -999,6 +1014,7 @@ extern int unregister_filesystem(struct
extern struct vfsmount *kern_mount(struct file_system_type *);
extern int may_umount(struct vfsmount *);
extern long do_mount(char *, char *, char *, unsigned long, void *);
-+struct vfsmount *do_kern_mount(char *type, int flags, char *name, void *data);
++struct vfsmount *do_kern_mount(const char *fstype, int flags, char *name, void *data);
extern void umount_tree(struct vfsmount *);
#define kern_umount mntput
-@@ -1329,6 +1336,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1329,6 +1345,7 @@ typedef int (*read_actor_t)(read_descrip
extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_walk(const char *, struct nameidata *));
extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
-@@ -1339,6 +1347,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1339,6 +1356,8 @@ extern struct dentry * lookup_one_len(co
extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
#define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
#define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
extern void iput(struct inode *);
extern void force_delete(struct inode *);
-@@ -1448,6 +1458,8 @@ extern struct file_operations generic_ro
+@@ -1448,6 +1467,8 @@ extern struct file_operations generic_ro
extern int vfs_readlink(struct dentry *, char *, int, const char *);
extern int vfs_follow_link(struct nameidata *, const char *);
extern int page_readlink(struct dentry *, char *, int);
extern int page_follow_link(struct dentry *, struct nameidata *);
extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.19/fs/dcache.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/dcache.c Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/dcache.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/dcache.c Sun Jan 19 19:46:42 2003
@@ -616,6 +616,7 @@ struct dentry * d_alloc(struct dentry *
dentry->d_op = NULL;
dentry->d_fsdata = NULL;
INIT_LIST_HEAD(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
---- linux-2.4.19/fs/nfsd/vfs.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/nfsd/vfs.c Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/nfsd/vfs.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/nfsd/vfs.c Sun Jan 19 19:46:42 2003
@@ -1295,7 +1295,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
err = nfserr_perm;
} else
if (!err && EX_ISSYNC(tfhp->fh_export)) {
nfsd_sync_dir(tdentry);
nfsd_sync_dir(fdentry);
---- linux-2.4.19/fs/namei.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/namei.c Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/namei.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/namei.c Sun Jan 19 19:46:42 2003
@@ -94,6 +94,12 @@
* XEmacs seems to be relying on it...
*/
break;
}
goto return_base;
-@@ -651,10 +683,21 @@ return_err:
+@@ -646,15 +678,28 @@ out_dput:
+ dput(dentry);
+ break;
+ }
++ if (err)
++ intent_release(nd->dentry, it);
+ path_release(nd);
+ return_err:
return err;
}
}
/* SMP-safe */
-@@ -757,7 +800,8 @@ int path_init(const char *name, unsigned
+@@ -757,7 +802,8 @@ int path_init(const char *name, unsigned
* needs parent already locked. Doesn't follow mounts.
* SMP-safe.
*/
{
struct dentry * dentry;
struct inode *inode;
-@@ -780,13 +824,16 @@ struct dentry * lookup_hash(struct qstr
+@@ -780,13 +826,16 @@ struct dentry * lookup_hash(struct qstr
goto out;
}
dentry = inode->i_op->lookup(inode, new);
unlock_kernel();
if (!dentry)
-@@ -798,6 +845,12 @@ out:
+@@ -798,6 +847,12 @@ out:
return dentry;
}
/* SMP-safe */
struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
{
-@@ -819,7 +872,7 @@ struct dentry * lookup_one_len(const cha
+@@ -819,7 +874,7 @@ struct dentry * lookup_one_len(const cha
}
this.hash = end_name_hash(hash);
access:
return ERR_PTR(-EACCES);
}
-@@ -851,6 +904,23 @@ int __user_walk(const char *name, unsign
+@@ -851,6 +906,23 @@ int __user_walk(const char *name, unsign
return err;
}
/*
* It's inline, so penalty for filesystems that don't use sticky bit is
* minimal.
-@@ -987,7 +1057,8 @@ exit_lock:
+@@ -987,7 +1059,8 @@ exit_lock:
* for symlinks (where the permissions are checked later).
* SMP-safe
*/
{
int acc_mode, error = 0;
struct inode *inode;
-@@ -1002,7 +1073,7 @@ int open_namei(const char * pathname, in
+@@ -1002,7 +1075,7 @@ int open_namei(const char * pathname, in
*/
if (!(flag & O_CREAT)) {
if (path_init(pathname, lookup_flags(flag), nd))
if (error)
return error;
dentry = nd->dentry;
-@@ -1012,6 +1083,10 @@ int open_namei(const char * pathname, in
+@@ -1012,6 +1085,10 @@ int open_namei(const char * pathname, in
/*
* Create - we need to know the parent.
*/
if (path_init(pathname, LOOKUP_PARENT, nd))
error = path_walk(pathname, nd);
if (error)
-@@ -1028,7 +1103,7 @@ int open_namei(const char * pathname, in
+@@ -1028,7 +1105,7 @@ int open_namei(const char * pathname, in
dir = nd->dentry;
down(&dir->d_inode->i_sem);
do_last:
error = PTR_ERR(dentry);
-@@ -1037,6 +1112,7 @@ do_last:
+@@ -1037,6 +1114,7 @@ do_last:
goto exit;
}
/* Negative dentry, just create the file */
if (!dentry->d_inode) {
error = vfs_create(dir->d_inode, dentry,
-@@ -1070,7 +1146,8 @@ do_last:
+@@ -1070,7 +1148,8 @@ do_last:
error = -ENOENT;
if (!dentry->d_inode)
goto exit_dput;
goto do_link;
dput(nd->dentry);
-@@ -1156,8 +1233,10 @@ ok:
+@@ -1156,8 +1235,10 @@ ok:
return 0;
exit_dput:
path_release(nd);
return error;
-@@ -1176,7 +1255,12 @@ do_link:
+@@ -1176,7 +1257,12 @@ do_link:
* are done. Procfs-like symlinks just set LAST_BIND.
*/
UPDATE_ATIME(dentry->d_inode);
dput(dentry);
if (error)
return error;
-@@ -1198,13 +1282,20 @@ do_link:
+@@ -1198,13 +1284,20 @@ do_link:
}
dir = nd->dentry;
down(&dir->d_inode->i_sem);
{
struct dentry *dentry;
-@@ -1212,7 +1303,7 @@ static struct dentry *lookup_create(stru
+@@ -1212,7 +1305,7 @@ static struct dentry *lookup_create(stru
dentry = ERR_PTR(-EEXIST);
if (nd->last_type != LAST_NORM)
goto fail;
if (IS_ERR(dentry))
goto fail;
if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1258,6 +1349,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1258,6 +1351,7 @@ asmlinkage long sys_mknod(const char * f
char * tmp;
struct dentry * dentry;
struct nameidata nd;
if (S_ISDIR(mode))
return -EPERM;
-@@ -1269,7 +1361,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1269,7 +1363,19 @@ asmlinkage long sys_mknod(const char * f
error = path_walk(tmp, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 0);
++
++ if (nd.dentry->d_inode->i_op->mknod2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mknod2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len,
++ mode, dev);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++
+ dentry = lookup_create(&nd, 0, &it);
error = PTR_ERR(dentry);
mode &= ~current->fs->umask;
-@@ -1287,6 +1379,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1287,9 +1393,11 @@ asmlinkage long sys_mknod(const char * f
default:
error = -EINVAL;
}
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
-@@ -1327,6 +1420,7 @@ asmlinkage long sys_mkdir(const char * p
++ out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1327,6 +1435,7 @@ asmlinkage long sys_mkdir(const char * p
{
int error = 0;
char * tmp;
tmp = getname(pathname);
error = PTR_ERR(tmp);
-@@ -1338,11 +1432,12 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1338,14 +1447,26 @@ asmlinkage long sys_mkdir(const char * p
error = path_walk(tmp, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 1);
++ if (nd.dentry->d_inode->i_op->mkdir2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mkdir2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len,
++ mode);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
+ dentry = lookup_create(&nd, 1, &it);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
-@@ -1426,6 +1521,7 @@ asmlinkage long sys_rmdir(const char * p
++out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1426,6 +1547,7 @@ asmlinkage long sys_rmdir(const char * p
char * name;
struct dentry *dentry;
struct nameidata nd;
name = getname(pathname);
if(IS_ERR(name))
-@@ -1448,10 +1544,11 @@ asmlinkage long sys_rmdir(const char * p
+@@ -1447,11 +1569,21 @@ asmlinkage long sys_rmdir(const char * p
+ error = -EBUSY;
goto exit1;
}
++ if (nd.dentry->d_inode->i_op->rmdir2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->rmdir2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
down(&nd.dentry->d_inode->i_sem);
- dentry = lookup_hash(&nd.last, nd.dentry);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
-@@ -1495,6 +1592,7 @@ asmlinkage long sys_unlink(const char *
+@@ -1495,6 +1627,7 @@ asmlinkage long sys_unlink(const char *
char * name;
struct dentry *dentry;
struct nameidata nd;
name = getname(pathname);
if(IS_ERR(name))
-@@ -1508,7 +1606,7 @@ asmlinkage long sys_unlink(const char *
+@@ -1507,8 +1640,17 @@ asmlinkage long sys_unlink(const char *
+ error = -EISDIR;
if (nd.last_type != LAST_NORM)
goto exit1;
++ if (nd.dentry->d_inode->i_op->unlink2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->unlink2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
down(&nd.dentry->d_inode->i_sem);
- dentry = lookup_hash(&nd.last, nd.dentry);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
-@@ -1516,6 +1614,7 @@ asmlinkage long sys_unlink(const char *
+@@ -1516,6 +1658,7 @@ asmlinkage long sys_unlink(const char *
goto slashes;
error = vfs_unlink(nd.dentry->d_inode, dentry);
exit2:
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
-@@ -1562,6 +1661,7 @@ asmlinkage long sys_symlink(const char *
+@@ -1562,6 +1705,7 @@ asmlinkage long sys_symlink(const char *
int error = 0;
char * from;
char * to;
from = getname(oldname);
if(IS_ERR(from))
-@@ -1576,10 +1676,12 @@ asmlinkage long sys_symlink(const char *
+@@ -1576,15 +1720,28 @@ asmlinkage long sys_symlink(const char *
error = path_walk(to, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 0);
++ if (nd.dentry->d_inode->i_op->symlink2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->symlink2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len,
++ from);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
+ it.it_data = from;
+ dentry = lookup_create(&nd, 0, &it);
error = PTR_ERR(dentry);
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
-@@ -1645,6 +1747,7 @@ asmlinkage long sys_link(const char * ol
++ out2:
+ path_release(&nd);
+-out:
++ out:
+ putname(to);
+ }
+ putname(from);
+@@ -1645,6 +1802,7 @@ asmlinkage long sys_link(const char * ol
int error;
char * from;
char * to;
from = getname(oldname);
if(IS_ERR(from))
-@@ -1657,7 +1760,7 @@ asmlinkage long sys_link(const char * ol
+@@ -1657,7 +1815,7 @@ asmlinkage long sys_link(const char * ol
error = 0;
if (path_init(from, LOOKUP_POSITIVE, &old_nd))
if (error)
goto exit;
if (path_init(to, LOOKUP_PARENT, &nd))
-@@ -1667,10 +1770,12 @@ asmlinkage long sys_link(const char * ol
+@@ -1667,10 +1825,22 @@ asmlinkage long sys_link(const char * ol
error = -EXDEV;
if (old_nd.mnt != nd.mnt)
goto out_release;
- new_dentry = lookup_create(&nd, 0);
++ if (nd.dentry->d_inode->i_op->link2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->link2(old_nd.dentry->d_inode,
++ nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out_release;
++ }
+ it.it_op = IT_LINK2;
+ new_dentry = lookup_create(&nd, 0, &it);
error = PTR_ERR(new_dentry);
dput(new_dentry);
}
up(&nd.dentry->d_inode->i_sem);
-@@ -1713,7 +1818,8 @@ exit:
+@@ -1713,7 +1883,8 @@ exit:
* locking].
*/
int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
struct inode *target;
-@@ -1771,6 +1877,7 @@ int vfs_rename_dir(struct inode *old_dir
+@@ -1771,6 +1942,7 @@ int vfs_rename_dir(struct inode *old_dir
error = -EBUSY;
else
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
if (target) {
if (!error)
target->i_flags |= S_DEAD;
-@@ -1792,7 +1899,8 @@ out_unlock:
+@@ -1792,7 +1964,8 @@ out_unlock:
}
int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
-@@ -1823,6 +1931,7 @@ int vfs_rename_other(struct inode *old_d
+@@ -1823,6 +1996,7 @@ int vfs_rename_other(struct inode *old_d
error = -EBUSY;
else
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
double_up(&old_dir->i_zombie, &new_dir->i_zombie);
if (error)
return error;
-@@ -1834,13 +1943,14 @@ int vfs_rename_other(struct inode *old_d
+@@ -1834,13 +2008,14 @@ int vfs_rename_other(struct inode *old_d
}
int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!error) {
if (old_dir == new_dir)
inode_dir_notify(old_dir, DN_RENAME);
-@@ -1857,6 +1967,7 @@ static inline int do_rename(const char *
+@@ -1857,6 +2032,7 @@ static inline int do_rename(const char *
int error = 0;
struct dentry * old_dir, * new_dir;
struct dentry * old_dentry, *new_dentry;
struct nameidata oldnd, newnd;
if (path_init(oldname, LOOKUP_PARENT, &oldnd))
-@@ -1885,7 +1996,7 @@ static inline int do_rename(const char *
-
+@@ -1883,9 +2059,23 @@ static inline int do_rename(const char *
+ if (newnd.last_type != LAST_NORM)
+ goto exit2;
+
++ if (old_dir->d_inode->i_op->rename2) {
++ lock_kernel();
++ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
++ new_dir->d_inode,
++ oldnd.last.name,
++ oldnd.last.len,
++ newnd.last.name,
++ newnd.last.len);
++ unlock_kernel();
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit2;
++ }
++
double_lock(new_dir, old_dir);
- old_dentry = lookup_hash(&oldnd.last, old_dir);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
-@@ -1901,18 +2012,21 @@ static inline int do_rename(const char *
+@@ -1901,18 +2091,21 @@ static inline int do_rename(const char *
if (newnd.last.name[newnd.last.len])
goto exit4;
}
dput(old_dentry);
exit3:
double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
-@@ -1961,7 +2075,8 @@ out:
+@@ -1961,7 +2154,8 @@ out:
}
static inline int
{
int res = 0;
char *name;
-@@ -1974,7 +2089,7 @@ __vfs_follow_link(struct nameidata *nd,
+@@ -1974,7 +2168,7 @@ __vfs_follow_link(struct nameidata *nd,
/* weird __emul_prefix() stuff did it */
goto out;
}
out:
if (current->link_count || res || nd->last_type!=LAST_NORM)
return res;
-@@ -1996,7 +2111,13 @@ fail:
+@@ -1996,7 +2190,13 @@ fail:
int vfs_follow_link(struct nameidata *nd, const char *link)
{
}
/* get the link contents into pagecache */
-@@ -2038,7 +2159,7 @@ int page_follow_link(struct dentry *dent
+@@ -2038,7 +2238,7 @@ int page_follow_link(struct dentry *dent
{
struct page *page = NULL;
char *s = page_getlink(dentry, &page);
if (page) {
kunmap(page);
page_cache_release(page);
---- linux-2.4.19/fs/open.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/open.c Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/open.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/open.c Sun Jan 19 19:46:42 2003
@@ -19,6 +19,9 @@
#include <asm/uaccess.h>
/*
* Find an empty file descriptor entry, and mark it busy.
*/
---- linux-2.4.19/fs/stat.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/fs/stat.c Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/fs/stat.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/fs/stat.c Sun Jan 19 19:46:42 2003
@@ -13,6 +13,7 @@
#include <asm/uaccess.h>
path_release(&nd);
}
return error;
---- linux-2.4.19/mm/slab.c~vanilla-2.4.19 Sun Dec 15 16:58:43 2002
-+++ linux-2.4.19-root/mm/slab.c Sun Dec 15 16:58:43 2002
+--- linux-2.4.19/mm/slab.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003
++++ linux-2.4.19-root/mm/slab.c Sun Jan 19 19:46:42 2003
@@ -1207,6 +1207,59 @@ failed:
* Called with the cache-lock held.
*/
- fs/dcache.c | 3
- fs/namei.c | 228 ++++++++++++++++++++++++++++++++++++++-----------
+ fs/dcache.c | 8 +
+ fs/namei.c | 288 ++++++++++++++++++++++++++++++++++++++++---------
fs/nfsd/vfs.c | 2
- fs/open.c | 53 +++++++++--
+ fs/open.c | 53 +++++++--
fs/stat.c | 9 +
- include/linux/dcache.h | 31 ++++++
- include/linux/fs.h | 13 ++
+ include/linux/dcache.h | 25 ++++
+ include/linux/fs.h | 22 +++
kernel/ksyms.c | 1
- 8 files changed, 278 insertions(+), 62 deletions(-)
+ 8 files changed, 345 insertions(+), 63 deletions(-)
---- linux-2.4.18-18.8.0-l4/fs/dcache.c~vfs_intent-2.4.18-18 Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/fs/dcache.c Sat Dec 14 06:31:22 2002
-@@ -150,6 +150,8 @@ repeat:
- unhash_it:
- list_del_init(&dentry->d_hash);
-
+--- linux-2.4.18-49chaos-lustre9/fs/dcache.c~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/fs/dcache.c Wed Jan 29 12:43:32 2003
+@@ -186,6 +186,13 @@ int d_invalidate(struct dentry * dentry)
+ spin_unlock(&dcache_lock);
+ return 0;
+ }
+
++ /* network invalidation by Lustre */
++ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++ spin_unlock(&dcache_lock);
++ return 0;
++ }
+
- kill_it: {
- struct dentry *parent;
- list_del(&dentry->d_child);
-@@ -645,6 +647,7 @@ struct dentry * d_alloc(struct dentry *
+ /*
+ * Check whether to do a partial shrink_dcache
+ * to get rid of unused child entries.
+@@ -645,6 +652,7 @@ struct dentry * d_alloc(struct dentry *
dentry->d_fsdata = NULL;
dentry->d_extra_attributes = NULL;
dentry->d_mounted = 0;
INIT_LIST_HEAD(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
---- linux-2.4.18-18.8.0-l4/fs/namei.c~vfs_intent-2.4.18-18 Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/fs/namei.c Sat Dec 14 06:37:21 2002
-@@ -1,3 +1,6 @@
-+
-+
-+
- /*
- * linux/fs/namei.c
- *
-@@ -94,6 +97,14 @@
+--- linux-2.4.18-49chaos-lustre9/fs/namei.c~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/fs/namei.c Wed Feb 5 16:23:06 2003
+@@ -94,6 +94,13 @@
* XEmacs seems to be relying on it...
*/
+
+}
+
-+
/* In order to reduce some races, while at the same time doing additional
* checking and hopefully speeding things up, we copy filenames to the
* kernel data space before using them..
-@@ -260,10 +271,19 @@ void path_release(struct nameidata *nd)
+@@ -260,10 +267,19 @@ void path_release(struct nameidata *nd)
* Internal lookup() using the new generic dcache.
* SMP-safe
*/
if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
dput(dentry);
-@@ -281,7 +301,8 @@ static struct dentry * cached_lookup(str
+@@ -281,11 +297,14 @@ static struct dentry * cached_lookup(str
* make sure that nobody added the entry to the dcache in the meantime..
* SMP-safe
*/
{
struct dentry * result;
struct inode *dir = parent->d_inode;
-@@ -300,6 +321,9 @@ static struct dentry * real_lookup(struc
+
++again:
++
+ down(&dir->i_sem);
+ /*
+ * First re-do the cached lookup just in case it was created
+@@ -300,6 +319,9 @@ static struct dentry * real_lookup(struc
result = ERR_PTR(-ENOMEM);
if (dentry) {
lock_kernel();
result = dir->i_op->lookup(dir, dentry);
unlock_kernel();
if (result)
-@@ -321,6 +345,12 @@ static struct dentry * real_lookup(struc
+@@ -321,6 +343,12 @@ static struct dentry * real_lookup(struc
dput(result);
result = ERR_PTR(-ENOENT);
}
+ if (!result->d_op->d_revalidate2(result, flags, it) &&
+ !d_invalidate(result)) {
+ dput(result);
-+ result = ERR_PTR(-ENOENT);
++ goto again;
+ }
}
return result;
}
-@@ -334,7 +364,8 @@ int max_recursive_link = 5;
+@@ -334,7 +362,8 @@ int max_recursive_link = 5;
* Without that kind of total limit, nasty chains of consecutive
* symlinks can cause almost arbitrarily long lookups.
*/
-static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
-+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd,
-+ struct lookup_intent *it)
++static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd,
++ struct lookup_intent *it)
{
int err;
if (current->link_count >= max_recursive_link)
-@@ -348,10 +379,14 @@ static inline int do_follow_link(struct
+@@ -348,10 +377,14 @@ static inline int do_follow_link(struct
current->link_count++;
current->total_link_count++;
UPDATE_ATIME(dentry->d_inode);
- err = dentry->d_inode->i_op->follow_link(dentry, nd);
-+ if (dentry->d_inode->i_op->follow_link2)
-+ err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+ else
-+ err = dentry->d_inode->i_op->follow_link(dentry, nd);
++ if (dentry->d_inode->i_op->follow_link2)
++ err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
++ else
++ err = dentry->d_inode->i_op->follow_link(dentry, nd);
current->link_count--;
return err;
loop:
-+ intent_release(dentry, it);
++ intent_release(dentry, it);
path_release(nd);
return -ELOOP;
}
-@@ -449,7 +484,8 @@ static inline void follow_dotdot(struct
+@@ -449,7 +482,8 @@ static inline void follow_dotdot(struct
*
* We expect 'base' to be positive and a directory.
*/
{
struct dentry *dentry;
struct inode *inode;
-@@ -526,12 +562,12 @@ int link_path_walk(const char * name, st
+@@ -526,12 +560,12 @@ int link_path_walk(const char * name, st
break;
}
/* This does the actual lookups.. */
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
-@@ -548,8 +584,8 @@ int link_path_walk(const char * name, st
+@@ -548,8 +582,8 @@ int link_path_walk(const char * name, st
if (!inode->i_op)
goto out_dput;
dput(dentry);
if (err)
goto return_err;
-@@ -565,7 +601,7 @@ int link_path_walk(const char * name, st
+@@ -565,7 +599,7 @@ int link_path_walk(const char * name, st
nd->dentry = dentry;
}
err = -ENOTDIR;
break;
continue;
/* here ends the main loop */
-@@ -592,12 +628,12 @@ last_component:
+@@ -592,12 +626,12 @@ last_component:
if (err < 0)
break;
}
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
-@@ -606,8 +642,10 @@ last_component:
+@@ -606,8 +640,9 @@ last_component:
;
inode = dentry->d_inode;
if ((lookup_flags & LOOKUP_FOLLOW)
- && inode && inode->i_op && inode->i_op->follow_link) {
- err = do_follow_link(dentry, nd);
-+ && inode && inode->i_op &&
-+ (inode->i_op->follow_link ||
-+ inode->i_op->follow_link2)) {
++ && inode && inode->i_op &&
++ (inode->i_op->follow_link || inode->i_op->follow_link2)) {
+ err = do_follow_link(dentry, nd, it);
dput(dentry);
if (err)
goto return_err;
-@@ -621,7 +659,8 @@ last_component:
+@@ -621,7 +656,8 @@ last_component:
goto no_inode;
if (lookup_flags & LOOKUP_DIRECTORY) {
err = -ENOTDIR;
- if (!inode->i_op || !inode->i_op->lookup)
-+ if (!inode->i_op || (!inode->i_op->lookup &&
-+ !inode->i_op->lookup2))
++ if (!inode->i_op ||
++ (!inode->i_op->lookup && !inode->i_op->lookup2))
break;
}
goto return_base;
-@@ -663,10 +702,21 @@ return_err:
+@@ -658,15 +694,28 @@ out_dput:
+ dput(dentry);
+ break;
+ }
++ if (err)
++ intent_release(nd->dentry, it);
+ path_release(nd);
+ return_err:
return err;
}
}
/* SMP-safe */
-@@ -751,6 +801,17 @@ walk_init_root(const char *name, struct
+@@ -751,6 +800,17 @@ walk_init_root(const char *name, struct
}
/* SMP-safe */
int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
{
int error = 0;
-@@ -779,7 +840,8 @@ int path_init(const char *name, unsigned
+@@ -779,7 +839,8 @@ int path_init(const char *name, unsigned
* needs parent already locked. Doesn't follow mounts.
* SMP-safe.
*/
{
struct dentry * dentry;
struct inode *inode;
-@@ -802,13 +864,16 @@ struct dentry * lookup_hash(struct qstr
+@@ -802,13 +863,16 @@ struct dentry * lookup_hash(struct qstr
goto out;
}
dentry = inode->i_op->lookup(inode, new);
unlock_kernel();
if (!dentry)
-@@ -820,6 +885,12 @@ out:
+@@ -820,6 +884,12 @@ out:
return dentry;
}
/* SMP-safe */
struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
{
-@@ -841,7 +912,7 @@ struct dentry * lookup_one_len(const cha
+@@ -841,7 +911,7 @@ struct dentry * lookup_one_len(const cha
}
this.hash = end_name_hash(hash);
access:
return ERR_PTR(-EACCES);
}
-@@ -872,6 +943,23 @@ int __user_walk(const char *name, unsign
+@@ -872,6 +942,23 @@ int __user_walk(const char *name, unsign
return err;
}
/*
* It's inline, so penalty for filesystems that don't use sticky bit is
* minimal.
-@@ -1045,14 +1133,17 @@ int may_open(struct nameidata *nd, int a
+@@ -1045,14 +1132,17 @@ int may_open(struct nameidata *nd, int a
return get_lease(inode, flag);
}
+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
-+ int flags, struct lookup_intent *it);
++ int flags, struct lookup_intent *it);
+
struct file *filp_open(const char * pathname, int open_flags, int mode)
{
struct dentry *dir;
int flag = open_flags;
struct nameidata nd;
-+ struct lookup_intent it = { .it_op = IT_OPEN };
++ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = open_flags };
int count = 0;
if ((flag+1) & O_ACCMODE)
-@@ -1066,7 +1157,7 @@ struct file *filp_open(const char * path
+@@ -1066,7 +1156,7 @@ struct file *filp_open(const char * path
* The simplest case - just a plain lookup.
*/
if (!(flag & O_CREAT)) {
if (error)
return ERR_PTR(error);
dentry = nd.dentry;
-@@ -1076,6 +1167,8 @@ struct file *filp_open(const char * path
+@@ -1076,6 +1166,8 @@ struct file *filp_open(const char * path
/*
* Create - we need to know the parent.
*/
-+ it.it_mode = mode;
-+ it.it_op |= IT_CREAT;
++ it.it_mode = mode;
++ it.it_op |= IT_CREAT;
error = path_lookup(pathname, LOOKUP_PARENT, &nd);
if (error)
return ERR_PTR(error);
-@@ -1091,7 +1184,7 @@ struct file *filp_open(const char * path
+@@ -1091,7 +1183,7 @@ struct file *filp_open(const char * path
dir = nd.dentry;
down(&dir->d_inode->i_sem);
do_last:
error = PTR_ERR(dentry);
-@@ -1100,6 +1193,7 @@ do_last:
+@@ -1100,6 +1192,7 @@ do_last:
goto exit;
}
/* Negative dentry, just create the file */
if (!dentry->d_inode) {
error = vfs_create(dir->d_inode, dentry,
-@@ -1134,7 +1228,8 @@ do_last:
+@@ -1134,7 +1227,8 @@ do_last:
error = -ENOENT;
if (!dentry->d_inode)
goto exit_dput;
- if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
-+ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
-+ dentry->d_inode->i_op->follow_link2))
++ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
++ dentry->d_inode->i_op->follow_link2))
goto do_link;
dput(nd.dentry);
-@@ -1149,11 +1244,13 @@ ok:
+@@ -1149,11 +1243,13 @@ ok:
if (!S_ISREG(nd.dentry->d_inode->i_mode))
open_flags &= ~O_TRUNC;
- return dentry_open(nd.dentry, nd.mnt, open_flags);
-+ return dentry_open_it(nd.dentry, nd.mnt, open_flags, &it);
++ return dentry_open_it(nd.dentry, nd.mnt, open_flags, &it);
exit_dput:
-+ intent_release(dentry, &it);
++ intent_release(dentry, &it);
dput(dentry);
exit:
-+ intent_release(nd.dentry, &it);
++ intent_release(nd.dentry, &it);
path_release(&nd);
return ERR_PTR(error);
-@@ -1172,7 +1269,12 @@ do_link:
+@@ -1172,7 +1268,12 @@ do_link:
* are done. Procfs-like symlinks just set LAST_BIND.
*/
UPDATE_ATIME(dentry->d_inode);
- error = dentry->d_inode->i_op->follow_link(dentry, &nd);
-+ if (dentry->d_inode->i_op->follow_link2)
-+ error = dentry->d_inode->i_op->follow_link2(dentry, &nd, &it);
-+ else
-+ error = dentry->d_inode->i_op->follow_link(dentry, &nd);
++ if (dentry->d_inode->i_op->follow_link2)
++ error = dentry->d_inode->i_op->follow_link2(dentry, &nd, &it);
++ else
++ error = dentry->d_inode->i_op->follow_link(dentry, &nd);
+ if (error)
+ intent_release(dentry, &it);
dput(dentry);
if (error)
return error;
-@@ -1194,13 +1296,15 @@ do_link:
+@@ -1194,13 +1295,15 @@ do_link:
}
dir = nd.dentry;
down(&dir->d_inode->i_sem);
{
struct dentry *dentry;
-@@ -1208,7 +1312,7 @@ static struct dentry *lookup_create(stru
+@@ -1208,7 +1311,7 @@ static struct dentry *lookup_create(stru
dentry = ERR_PTR(-EEXIST);
if (nd->last_type != LAST_NORM)
goto fail;
if (IS_ERR(dentry))
goto fail;
if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1254,6 +1358,7 @@ asmlinkage long sys_mknod(const char * f
- char * tmp;
- struct dentry * dentry;
- struct nameidata nd;
-+ struct lookup_intent it = { .it_op = IT_MKNOD, .it_mode = mode };
-
- if (S_ISDIR(mode))
- return -EPERM;
-@@ -1264,7 +1369,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1264,7 +1367,19 @@ asmlinkage long sys_mknod(const char * f
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 0);
-+ dentry = lookup_create(&nd, 0, &it);
++
++ if (nd.dentry->d_inode->i_op->mknod2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mknod2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len,
++ mode, dev);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++
++ dentry = lookup_create(&nd, 0, NULL);
error = PTR_ERR(dentry);
mode &= ~current->fs->umask;
-@@ -1282,6 +1387,7 @@ asmlinkage long sys_mknod(const char * f
- default:
- error = -EINVAL;
- }
-+ intent_release(dentry, &it);
+@@ -1285,6 +1400,7 @@ asmlinkage long sys_mknod(const char * f
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
-@@ -1322,6 +1428,7 @@ asmlinkage long sys_mkdir(const char * p
- {
- int error = 0;
- char * tmp;
-+ struct lookup_intent it = { .it_op = IT_MKDIR, .it_mode = mode };
-
- tmp = getname(pathname);
- error = PTR_ERR(tmp);
-@@ -1332,11 +1439,12 @@ asmlinkage long sys_mkdir(const char * p
++out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1332,7 +1448,17 @@ asmlinkage long sys_mkdir(const char * p
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 1);
-+ dentry = lookup_create(&nd, 1, &it);
++ if (nd.dentry->d_inode->i_op->mkdir2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mkdir2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len,
++ mode);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++ dentry = lookup_create(&nd, 1, NULL);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_mkdir(nd.dentry->d_inode, dentry,
- mode & ~current->fs->umask);
-+ intent_release(dentry, &it);
+@@ -1340,6 +1466,7 @@ asmlinkage long sys_mkdir(const char * p
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
-@@ -1420,6 +1528,7 @@ asmlinkage long sys_rmdir(const char * p
- char * name;
- struct dentry *dentry;
- struct nameidata nd;
-+ struct lookup_intent it = { .it_op = IT_RMDIR };
-
- name = getname(pathname);
- if(IS_ERR(name))
-@@ -1441,10 +1550,11 @@ asmlinkage long sys_rmdir(const char * p
++out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1440,8 +1567,17 @@ asmlinkage long sys_rmdir(const char * p
+ error = -EBUSY;
goto exit1;
}
++ if (nd.dentry->d_inode->i_op->rmdir2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->rmdir2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
down(&nd.dentry->d_inode->i_sem);
- dentry = lookup_hash(&nd.last, nd.dentry);
-+ dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
++ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_rmdir(nd.dentry->d_inode, dentry);
-+ intent_release(dentry, &it);
- dput(dentry);
- }
- up(&nd.dentry->d_inode->i_sem);
-@@ -1488,6 +1598,7 @@ asmlinkage long sys_unlink(const char *
- char * name;
- struct dentry *dentry;
- struct nameidata nd;
-+ struct lookup_intent it = { .it_op = IT_UNLINK };
-
- name = getname(pathname);
- if(IS_ERR(name))
-@@ -1500,7 +1611,7 @@ asmlinkage long sys_unlink(const char *
+@@ -1499,8 +1635,17 @@ asmlinkage long sys_unlink(const char *
+ error = -EISDIR;
if (nd.last_type != LAST_NORM)
goto exit1;
++ if (nd.dentry->d_inode->i_op->unlink2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->unlink2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
down(&nd.dentry->d_inode->i_sem);
- dentry = lookup_hash(&nd.last, nd.dentry);
-+ dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
++ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
-@@ -1508,6 +1619,7 @@ asmlinkage long sys_unlink(const char *
- goto slashes;
- error = vfs_unlink(nd.dentry->d_inode, dentry);
- exit2:
-+ intent_release(dentry, &it);
- dput(dentry);
- }
- up(&nd.dentry->d_inode->i_sem);
-@@ -1554,6 +1666,7 @@ asmlinkage long sys_symlink(const char *
- int error = 0;
- char * from;
- char * to;
-+ struct lookup_intent it = { .it_op = IT_SYMLINK };
-
- from = getname(oldname);
- if(IS_ERR(from))
-@@ -1567,10 +1680,12 @@ asmlinkage long sys_symlink(const char *
+@@ -1567,15 +1712,26 @@ asmlinkage long sys_symlink(const char *
error = path_lookup(to, LOOKUP_PARENT, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 0);
-+ it.it_data = from;
-+ dentry = lookup_create(&nd, 0, &it);
++ if (nd.dentry->d_inode->i_op->symlink2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->symlink2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len,
++ from);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++ dentry = lookup_create(&nd, 0, NULL);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_symlink(nd.dentry->d_inode, dentry, from);
-+ intent_release(dentry, &it);
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
-@@ -1635,6 +1750,7 @@ asmlinkage long sys_link(const char * ol
- {
- int error;
- char * to;
-+ struct lookup_intent it = { .it_op = IT_LINK };
-
- to = getname(newname);
- error = PTR_ERR(to);
-@@ -1642,7 +1758,7 @@ asmlinkage long sys_link(const char * ol
++ out2:
+ path_release(&nd);
+-out:
++ out:
+ putname(to);
+ }
+ putname(from);
+@@ -1642,7 +1798,7 @@ asmlinkage long sys_link(const char * ol
struct dentry *new_dentry;
struct nameidata nd, old_nd;
- error = __user_walk(oldname, LOOKUP_POSITIVE, &old_nd);
-+ error = __user_walk_it(oldname, LOOKUP_POSITIVE, &old_nd, &it);
++ error = __user_walk_it(oldname, LOOKUP_POSITIVE, &old_nd, NULL);
if (error)
goto exit;
error = path_lookup(to, LOOKUP_PARENT, &nd);
-@@ -1651,10 +1767,12 @@ asmlinkage long sys_link(const char * ol
+@@ -1651,7 +1807,17 @@ asmlinkage long sys_link(const char * ol
error = -EXDEV;
if (old_nd.mnt != nd.mnt)
goto out_release;
- new_dentry = lookup_create(&nd, 0);
-+ it.it_op = IT_LINK2;
-+ new_dentry = lookup_create(&nd, 0, &it);
++ if (nd.dentry->d_inode->i_op->link2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->link2(old_nd.dentry->d_inode,
++ nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out_release;
++ }
++ new_dentry = lookup_create(&nd, 0, NULL);
error = PTR_ERR(new_dentry);
if (!IS_ERR(new_dentry)) {
error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-+ intent_release(new_dentry, &it);
- dput(new_dentry);
- }
- up(&nd.dentry->d_inode->i_sem);
-@@ -1695,7 +1813,8 @@ exit:
+@@ -1695,7 +1861,8 @@ exit:
* locking].
*/
int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
struct inode *target;
-@@ -1753,6 +1872,7 @@ int vfs_rename_dir(struct inode *old_dir
+@@ -1753,6 +1920,7 @@ int vfs_rename_dir(struct inode *old_dir
error = -EBUSY;
else
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
if (target) {
if (!error)
target->i_flags |= S_DEAD;
-@@ -1774,7 +1894,8 @@ out_unlock:
+@@ -1774,7 +1942,8 @@ out_unlock:
}
int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
-@@ -1805,6 +1926,7 @@ int vfs_rename_other(struct inode *old_d
+@@ -1805,6 +1974,7 @@ int vfs_rename_other(struct inode *old_d
error = -EBUSY;
else
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
double_up(&old_dir->i_zombie, &new_dir->i_zombie);
if (error)
return error;
-@@ -1816,13 +1938,14 @@ int vfs_rename_other(struct inode *old_d
+@@ -1816,13 +1986,14 @@ int vfs_rename_other(struct inode *old_d
}
int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!error) {
if (old_dir == new_dir)
inode_dir_notify(old_dir, DN_RENAME);
-@@ -1839,6 +1962,7 @@ static inline int do_rename(const char *
- int error = 0;
- struct dentry * old_dir, * new_dir;
- struct dentry * old_dentry, *new_dentry;
-+ struct lookup_intent it = { .it_op = IT_RENAME };
- struct nameidata oldnd, newnd;
-
- error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
-@@ -1864,7 +1988,7 @@ static inline int do_rename(const char *
-
+@@ -1862,9 +2033,23 @@ static inline int do_rename(const char *
+ if (newnd.last_type != LAST_NORM)
+ goto exit2;
+
++ if (old_dir->d_inode->i_op->rename2) {
++ lock_kernel();
++ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
++ new_dir->d_inode,
++ oldnd.last.name,
++ oldnd.last.len,
++ newnd.last.name,
++ newnd.last.len);
++ unlock_kernel();
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit2;
++ }
++
double_lock(new_dir, old_dir);
- old_dentry = lookup_hash(&oldnd.last, old_dir);
-+ old_dentry = lookup_hash_it(&oldnd.last, old_dir, &it);
++ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
-@@ -1880,18 +2004,21 @@ static inline int do_rename(const char *
+@@ -1880,14 +2065,14 @@ static inline int do_rename(const char *
if (newnd.last.name[newnd.last.len])
goto exit4;
}
- new_dentry = lookup_hash(&newnd.last, new_dir);
-+ it.it_op = IT_RENAME2;
-+ new_dentry = lookup_hash_it(&newnd.last, new_dir, &it);
++ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto exit4;
lock_kernel();
error = vfs_rename(old_dir->d_inode, old_dentry,
- new_dir->d_inode, new_dentry);
-+ new_dir->d_inode, new_dentry, &it);
++ new_dir->d_inode, new_dentry, NULL);
unlock_kernel();
-+ intent_release(new_dentry, &it);
dput(new_dentry);
- exit4:
-+ intent_release(old_dentry, &it);
- dput(old_dentry);
- exit3:
- double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
-@@ -1940,7 +2067,8 @@ out:
+@@ -1940,7 +2125,8 @@ out:
}
static inline int
-__vfs_follow_link(struct nameidata *nd, const char *link)
-+__vfs_follow_link(struct nameidata *nd, const char *link,
-+ struct lookup_intent *it)
++__vfs_follow_link(struct nameidata *nd, const char *link,
++ struct lookup_intent *it)
{
int res = 0;
char *name;
-@@ -1953,7 +2081,7 @@ __vfs_follow_link(struct nameidata *nd,
+@@ -1953,7 +2139,7 @@ __vfs_follow_link(struct nameidata *nd,
/* weird __emul_prefix() stuff did it */
goto out;
}
out:
if (current->link_count || res || nd->last_type!=LAST_NORM)
return res;
-@@ -1975,7 +2103,13 @@ fail:
+@@ -1975,7 +2161,13 @@ fail:
int vfs_follow_link(struct nameidata *nd, const char *link)
{
+ return __vfs_follow_link(nd, link, NULL);
+}
+
-+int vfs_follow_link_it(struct nameidata *nd, const char *link,
-+ struct lookup_intent *it)
++int vfs_follow_link_it(struct nameidata *nd, const char *link,
++ struct lookup_intent *it)
+{
+ return __vfs_follow_link(nd, link, it);
}
/* get the link contents into pagecache */
-@@ -2017,7 +2151,7 @@ int page_follow_link(struct dentry *dent
+@@ -2017,7 +2209,7 @@ int page_follow_link(struct dentry *dent
{
struct page *page = NULL;
char *s = page_getlink(dentry, &page);
if (page) {
kunmap(page);
page_cache_release(page);
---- linux-2.4.18-18.8.0-l4/fs/nfsd/vfs.c~vfs_intent-2.4.18-18 Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/fs/nfsd/vfs.c Sat Dec 14 06:31:22 2002
+--- linux-2.4.18-49chaos-lustre9/fs/nfsd/vfs.c~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/fs/nfsd/vfs.c Wed Jan 29 12:43:32 2003
@@ -1298,7 +1298,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
err = nfserr_perm;
} else
unlock_kernel();
if (!err && EX_ISSYNC(tfhp->fh_export)) {
nfsd_sync_dir(tdentry);
---- linux-2.4.18-18.8.0-l4/fs/open.c~vfs_intent-2.4.18-18 Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/fs/open.c Sat Dec 14 06:31:22 2002
+--- linux-2.4.18-49chaos-lustre9/fs/open.c~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/fs/open.c Wed Jan 29 12:43:32 2003
@@ -19,6 +19,9 @@
#include <asm/uaccess.h>
struct nameidata nd;
struct inode * inode;
int error;
-+ struct lookup_intent it = { .it_op = IT_SETATTR };
++ struct lookup_intent it = { .it_op = IT_TRUNC };
error = -EINVAL;
if (length < 0) /* sorry, but loff_t says... */
/*
* Find an empty file descriptor entry, and mark it busy.
*/
---- linux-2.4.18-18.8.0-l4/fs/stat.c~vfs_intent-2.4.18-18 Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/fs/stat.c Sat Dec 14 06:31:22 2002
+--- linux-2.4.18-49chaos-lustre9/fs/stat.c~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/fs/stat.c Wed Jan 29 12:43:32 2003
@@ -13,6 +13,7 @@
#include <asm/uaccess.h>
path_release(&nd);
}
return error;
---- linux-2.4.18-18.8.0-l4/include/linux/dcache.h~vfs_intent-2.4.18-18 Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/include/linux/dcache.h Sat Dec 14 06:31:22 2002
-@@ -6,6 +6,34 @@
+--- linux-2.4.18-49chaos-lustre9/include/linux/dcache.h~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/include/linux/dcache.h Wed Jan 29 12:43:32 2003
+@@ -6,6 +6,27 @@
#include <asm/atomic.h>
#include <linux/mount.h>
-+#define IT_OPEN (1)
-+#define IT_CREAT (1<<1)
-+#define IT_MKDIR (1<<2)
-+#define IT_LINK (1<<3)
-+#define IT_LINK2 (1<<4)
-+#define IT_SYMLINK (1<<5)
-+#define IT_UNLINK (1<<6)
-+#define IT_RMDIR (1<<7)
-+#define IT_RENAME (1<<8)
-+#define IT_RENAME2 (1<<9)
-+#define IT_READDIR (1<<10)
-+#define IT_GETATTR (1<<11)
-+#define IT_SETATTR (1<<12)
-+#define IT_READLINK (1<<13)
-+#define IT_MKNOD (1<<14)
-+#define IT_LOOKUP (1<<15)
++#define IT_OPEN (1)
++#define IT_CREAT (1<<1)
++#define IT_READDIR (1<<2)
++#define IT_GETATTR (1<<3)
++#define IT_SETATTR (1<<4)
++#define IT_TRUNC (1<<5)
++#define IT_READLINK (1<<6)
++#define IT_LOOKUP (1<<7)
+
+struct lookup_intent {
+ int it_op;
+ int it_mode;
++ int it_flags;
+ int it_disposition;
+ int it_status;
+ struct iattr *it_iattr;
/*
* linux/include/linux/dcache.h
*
-@@ -78,6 +106,7 @@ struct dentry {
+@@ -78,6 +99,7 @@ struct dentry {
unsigned long d_time; /* used by d_revalidate */
struct dentry_operations *d_op;
struct super_block * d_sb; /* The root of the dentry tree */
unsigned long d_vfs_flags;
void * d_fsdata; /* fs-specific data */
void * d_extra_attributes; /* TUX-specific data */
-@@ -91,6 +120,8 @@ struct dentry_operations {
+@@ -91,6 +113,8 @@ struct dentry_operations {
int (*d_delete)(struct dentry *);
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
};
/* the dentry parameter passed to d_hash and d_compare is the parent
---- linux-2.4.18-18.8.0-l4/include/linux/fs.h~vfs_intent-2.4.18-18 Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/include/linux/fs.h Sat Dec 14 06:33:11 2002
+@@ -124,6 +148,7 @@ d_iput: no no yes
+ * s_nfsd_free_path semaphore will be down
+ */
+ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
++#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */
+
+ extern spinlock_t dcache_lock;
+
+--- linux-2.4.18-49chaos-lustre9/include/linux/fs.h~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/include/linux/fs.h Wed Jan 29 12:43:32 2003
@@ -576,6 +576,7 @@ struct file {
/* needed for tty driver, and maybe others */
/*
* File types
-@@ -897,6 +900,7 @@ struct file_operations {
+@@ -897,16 +900,28 @@ struct file_operations {
struct inode_operations {
int (*create) (struct inode *,struct dentry *,int);
struct dentry * (*lookup) (struct inode *,struct dentry *);
+ struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
int (*link) (struct dentry *,struct inode *,struct dentry *);
++ int (*link2) (struct inode *,struct inode *, const char *, int);
int (*unlink) (struct inode *,struct dentry *);
++ int (*unlink2) (struct inode *, const char *, int);
int (*symlink) (struct inode *,struct dentry *,const char *);
-@@ -907,6 +911,8 @@ struct inode_operations {
++ int (*symlink2) (struct inode *, const char *, int, const char *);
+ int (*mkdir) (struct inode *,struct dentry *,int);
++ int (*mkdir2) (struct inode *, const char *, int,int);
+ int (*rmdir) (struct inode *,struct dentry *);
++ int (*rmdir2) (struct inode *, const char *, int);
+ int (*mknod) (struct inode *,struct dentry *,int,int);
++ int (*mknod2) (struct inode *, const char *, int,int,int);
+ int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
++ int (*rename2) (struct inode *, struct inode *,
++ const char *oldname, int oldlen,
++ const char *newname, int newlen);
int (*readlink) (struct dentry *, char *,int);
int (*follow_link) (struct dentry *, struct nameidata *);
-+ int (*follow_link2) (struct dentry *, struct nameidata *,
-+ struct lookup_intent *it);
++ int (*follow_link2) (struct dentry *, struct nameidata *,
++ struct lookup_intent *it);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int);
int (*revalidate) (struct dentry *);
-@@ -1381,6 +1387,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1383,6 +1398,7 @@ typedef int (*read_actor_t)(read_descrip
extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_walk(const char *, struct nameidata *));
extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1392,6 +1399,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1394,6 +1410,8 @@ extern struct dentry * lookup_one_len(co
extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
#define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
#define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
extern void inode_init_once(struct inode *);
extern void iput(struct inode *);
-@@ -1492,6 +1501,8 @@ extern struct file_operations generic_ro
+@@ -1494,6 +1512,8 @@ extern struct file_operations generic_ro
extern int vfs_readlink(struct dentry *, char *, int, const char *);
extern int vfs_follow_link(struct nameidata *, const char *);
-+extern int vfs_follow_link_it(struct nameidata *, const char *,
-+ struct lookup_intent *it);
++extern int vfs_follow_link_it(struct nameidata *, const char *,
++ struct lookup_intent *it);
extern int page_readlink(struct dentry *, char *, int);
extern int page_follow_link(struct dentry *, struct nameidata *);
extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.18-18.8.0-l4/kernel/ksyms.c~vfs_intent-2.4.18-18 Sat Dec 14 06:31:22 2002
-+++ linux-2.4.18-18.8.0-l4-root/kernel/ksyms.c Sat Dec 14 06:31:22 2002
-@@ -293,6 +293,7 @@ EXPORT_SYMBOL(read_cache_page);
+--- linux-2.4.18-49chaos-lustre9/kernel/ksyms.c~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
++++ linux-2.4.18-49chaos-lustre9-root/kernel/ksyms.c Wed Jan 29 12:43:32 2003
+@@ -294,6 +294,7 @@ EXPORT_SYMBOL(read_cache_page);
EXPORT_SYMBOL(set_page_dirty);
EXPORT_SYMBOL(vfs_readlink);
EXPORT_SYMBOL(vfs_follow_link);
+ fs/dcache.c | 8 +
+ fs/namei.c | 287 ++++++++++++++++++++++++++++++++++++++++---------
+ fs/nfsd/vfs.c | 2
+ fs/open.c | 53 +++++++--
+ fs/stat.c | 9 +
+ include/linux/dcache.h | 25 ++++
+ include/linux/fs.h | 22 +++
+ kernel/ksyms.c | 1
+ 8 files changed, 344 insertions(+), 63 deletions(-)
-
-
- 0 files changed
-
---- linux-2.4.18-17.8.0/fs/dcache.c~vfs_intent 2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/dcache.c 2002-12-06 14:52:31.000000000 -0800
-@@ -150,6 +150,8 @@ repeat:
- unhash_it:
- list_del_init(&dentry->d_hash);
-
+--- linux-2.4.18-18.8.0-l7/fs/dcache.c~vfs_intent-2.4.18-18 Mon Jan 20 08:28:00 2003
++++ linux-2.4.18-18.8.0-l7-root/fs/dcache.c Mon Jan 20 08:54:54 2003
+@@ -186,6 +188,13 @@ int d_invalidate(struct dentry * dentry)
+ spin_unlock(&dcache_lock);
+ return 0;
+ }
+
++ /* network invalidation by Lustre */
++ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++ spin_unlock(&dcache_lock);
++ return 0;
++ }
+
- kill_it: {
- struct dentry *parent;
- list_del(&dentry->d_child);
-@@ -645,6 +647,7 @@ struct dentry * d_alloc(struct dentry *
+ /*
+ * Check whether to do a partial shrink_dcache
+ * to get rid of unused child entries.
+@@ -645,6 +654,7 @@ struct dentry * d_alloc(struct dentry *
dentry->d_fsdata = NULL;
dentry->d_extra_attributes = NULL;
dentry->d_mounted = 0;
INIT_LIST_HEAD(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
---- linux-2.4.18-17.8.0/fs/namei.c~vfs_intent 2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/namei.c 2002-12-06 14:52:31.000000000 -0800
-@@ -1,3 +1,6 @@
-+
-+
-+
- /*
- * linux/fs/namei.c
- *
-@@ -94,6 +97,14 @@
+--- linux-2.4.18-18.8.0-l7/fs/namei.c~vfs_intent-2.4.18-18 Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/fs/namei.c Wed Jan 22 22:53:28 2003
+@@ -94,6 +97,13 @@
* XEmacs seems to be relying on it...
*/
+
+}
+
-+
/* In order to reduce some races, while at the same time doing additional
* checking and hopefully speeding things up, we copy filenames to the
* kernel data space before using them..
*/
-static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd,
-+ struct lookup_intent *it)
++ struct lookup_intent *it)
{
int err;
if (current->link_count >= max_recursive_link)
current->total_link_count++;
UPDATE_ATIME(dentry->d_inode);
- err = dentry->d_inode->i_op->follow_link(dentry, nd);
-+ if (dentry->d_inode->i_op->follow_link2)
-+ err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+ else
-+ err = dentry->d_inode->i_op->follow_link(dentry, nd);
++ if (dentry->d_inode->i_op->follow_link2)
++ err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
++ else
++ err = dentry->d_inode->i_op->follow_link(dentry, nd);
current->link_count--;
return err;
loop:
-+ intent_release(dentry, it);
++ intent_release(dentry, it);
path_release(nd);
return -ELOOP;
}
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
-@@ -606,8 +642,10 @@ last_component:
+@@ -606,8 +642,9 @@ last_component:
;
inode = dentry->d_inode;
if ((lookup_flags & LOOKUP_FOLLOW)
- && inode && inode->i_op && inode->i_op->follow_link) {
- err = do_follow_link(dentry, nd);
+ && inode && inode->i_op &&
-+ (inode->i_op->follow_link ||
-+ inode->i_op->follow_link2)) {
++ (inode->i_op->follow_link || inode->i_op->follow_link2)) {
+ err = do_follow_link(dentry, nd, it);
dput(dentry);
if (err)
break;
}
goto return_base;
-@@ -663,10 +702,21 @@ return_err:
+@@ -658,15 +697,28 @@ out_dput:
+ dput(dentry);
+ break;
+ }
++ if (err)
++ intent_release(nd->dentry, it);
+ path_release(nd);
+ return_err:
return err;
}
}
/* SMP-safe */
-@@ -751,6 +801,17 @@ walk_init_root(const char *name, struct
+@@ -751,6 +803,17 @@ walk_init_root(const char *name, struct
}
/* SMP-safe */
int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
{
int error = 0;
-@@ -779,7 +840,8 @@ int path_init(const char *name, unsigned
+@@ -779,7 +842,8 @@ int path_init(const char *name, unsigned
* needs parent already locked. Doesn't follow mounts.
* SMP-safe.
*/
{
struct dentry * dentry;
struct inode *inode;
-@@ -802,13 +864,16 @@ struct dentry * lookup_hash(struct qstr
+@@ -802,13 +866,16 @@ struct dentry * lookup_hash(struct qstr
goto out;
}
dentry = inode->i_op->lookup(inode, new);
unlock_kernel();
if (!dentry)
-@@ -820,6 +885,12 @@ out:
+@@ -820,6 +887,12 @@ out:
return dentry;
}
/* SMP-safe */
struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
{
-@@ -841,7 +912,7 @@ struct dentry * lookup_one_len(const cha
+@@ -841,7 +914,7 @@ struct dentry * lookup_one_len(const cha
}
this.hash = end_name_hash(hash);
access:
return ERR_PTR(-EACCES);
}
-@@ -872,6 +943,23 @@ int __user_walk(const char *name, unsign
+@@ -872,6 +945,23 @@ int __user_walk(const char *name, unsign
return err;
}
/*
* It's inline, so penalty for filesystems that don't use sticky bit is
* minimal.
-@@ -1010,7 +1098,8 @@ exit_lock:
- * for symlinks (where the permissions are checked later).
- * SMP-safe
- */
--int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
-+int open_namei_it(const char *pathname, int flag, int mode,
-+ struct nameidata *nd, struct lookup_intent *it)
+@@ -1045,14 +1135,17 @@ int may_open(struct nameidata *nd, int a
+ return get_lease(inode, flag);
+ }
+
++extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++ int flags, struct lookup_intent *it);
++
+ struct file *filp_open(const char * pathname, int open_flags, int mode)
{
int acc_mode, error = 0;
- struct inode *inode;
-@@ -1024,7 +1113,7 @@ int open_namei(const char * pathname, in
+- struct inode *inode;
+ struct dentry *dentry;
+ struct dentry *dir;
+ int flag = open_flags;
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = open_flags };
+ int count = 0;
+
+ if ((flag+1) & O_ACCMODE)
+@@ -1066,7 +1159,7 @@ struct file *filp_open(const char * path
* The simplest case - just a plain lookup.
*/
if (!(flag & O_CREAT)) {
-- error = path_lookup(pathname, lookup_flags(flag), nd);
-+ error = path_lookup_it(pathname, lookup_flags(flag), nd, it);
+- error = path_lookup(pathname, lookup_flags(flag), &nd);
++ error = path_lookup_it(pathname, lookup_flags(flag), &nd, &it);
if (error)
- return error;
- dentry = nd->dentry;
-@@ -1034,6 +1123,10 @@ int open_namei(const char * pathname, in
+ return ERR_PTR(error);
+ dentry = nd.dentry;
+@@ -1076,6 +1169,8 @@ struct file *filp_open(const char * path
/*
* Create - we need to know the parent.
*/
-+ if (it) {
-+ it->it_mode = mode;
-+ it->it_op |= IT_CREAT;
-+ }
- error = path_lookup(pathname, LOOKUP_PARENT, nd);
++ it.it_mode = mode;
++ it.it_op |= IT_CREAT;
+ error = path_lookup(pathname, LOOKUP_PARENT, &nd);
if (error)
- return error;
-@@ -1049,7 +1142,7 @@ int open_namei(const char * pathname, in
+ return ERR_PTR(error);
+@@ -1091,7 +1186,7 @@ struct file *filp_open(const char * path
- dir = nd->dentry;
+ dir = nd.dentry;
down(&dir->d_inode->i_sem);
-- dentry = lookup_hash(&nd->last, nd->dentry);
-+ dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+- dentry = lookup_hash(&nd.last, nd.dentry);
++ dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
do_last:
error = PTR_ERR(dentry);
-@@ -1058,6 +1151,7 @@ do_last:
+@@ -1100,6 +1195,7 @@ do_last:
goto exit;
}
-+ it->it_mode = mode;
++ it.it_mode = mode;
/* Negative dentry, just create the file */
if (!dentry->d_inode) {
error = vfs_create(dir->d_inode, dentry,
-@@ -1091,7 +1185,8 @@ do_last:
+@@ -1134,7 +1230,8 @@ do_last:
error = -ENOENT;
if (!dentry->d_inode)
goto exit_dput;
- if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
+ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
-+ dentry->d_inode->i_op->follow_link2))
++ dentry->d_inode->i_op->follow_link2))
goto do_link;
- dput(nd->dentry);
-@@ -1177,8 +1272,10 @@ ok:
- return 0;
+ dput(nd.dentry);
+@@ -1149,11 +1246,13 @@ ok:
+ if (!S_ISREG(nd.dentry->d_inode->i_mode))
+ open_flags &= ~O_TRUNC;
+
+- return dentry_open(nd.dentry, nd.mnt, open_flags);
++ return dentry_open_it(nd.dentry, nd.mnt, open_flags, &it);
exit_dput:
-+ intent_release(dentry, it);
++ intent_release(dentry, &it);
dput(dentry);
exit:
-+ intent_release(nd->dentry, it);
- path_release(nd);
- return error;
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ return ERR_PTR(error);
-@@ -1197,7 +1294,12 @@ do_link:
+@@ -1172,7 +1271,12 @@ do_link:
* are done. Procfs-like symlinks just set LAST_BIND.
*/
UPDATE_ATIME(dentry->d_inode);
-- error = dentry->d_inode->i_op->follow_link(dentry, nd);
-+ if (dentry->d_inode->i_op->follow_link2)
-+ error = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+ else
-+ error = dentry->d_inode->i_op->follow_link(dentry, nd);
+- error = dentry->d_inode->i_op->follow_link(dentry, &nd);
++ if (dentry->d_inode->i_op->follow_link2)
++ error = dentry->d_inode->i_op->follow_link2(dentry, &nd, &it);
++ else
++ error = dentry->d_inode->i_op->follow_link(dentry, &nd);
+ if (error)
-+ intent_release(dentry, it);
++ intent_release(dentry, &it);
dput(dentry);
if (error)
return error;
-@@ -1219,13 +1321,20 @@ do_link:
+@@ -1194,13 +1298,15 @@ do_link:
}
- dir = nd->dentry;
+ dir = nd.dentry;
down(&dir->d_inode->i_sem);
-- dentry = lookup_hash(&nd->last, nd->dentry);
-+ dentry = lookup_hash_it(&nd->last, nd->dentry, it);
- putname(nd->last.name);
+- dentry = lookup_hash(&nd.last, nd.dentry);
++ dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
+ putname(nd.last.name);
goto do_last;
}
-+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd)
-+{
-+ return open_namei_it(pathname, flag, mode, nd, NULL);
-+}
-+
+
/* SMP-safe */
-static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
{
struct dentry *dentry;
-@@ -1233,7 +1342,7 @@ static struct dentry *lookup_create(stru
+@@ -1208,7 +1314,7 @@ static struct dentry *lookup_create(stru
dentry = ERR_PTR(-EEXIST);
if (nd->last_type != LAST_NORM)
goto fail;
if (IS_ERR(dentry))
goto fail;
if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1279,6 +1388,7 @@ asmlinkage long sys_mknod(const char * f
- char * tmp;
- struct dentry * dentry;
- struct nameidata nd;
-+ struct lookup_intent it = { .it_op = IT_MKNOD, .it_mode = mode };
-
- if (S_ISDIR(mode))
- return -EPERM;
-@@ -1289,7 +1399,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1264,7 +1370,19 @@ asmlinkage long sys_mknod(const char * f
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 0);
-+ dentry = lookup_create(&nd, 0, &it);
++
++ if (nd.dentry->d_inode->i_op->mknod2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mknod2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len,
++ mode, dev);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++
++ dentry = lookup_create(&nd, 0, NULL);
error = PTR_ERR(dentry);
mode &= ~current->fs->umask;
-@@ -1307,6 +1417,7 @@ asmlinkage long sys_mknod(const char * f
- default:
- error = -EINVAL;
- }
-+ intent_release(dentry, &it);
+@@ -1285,6 +1403,7 @@ asmlinkage long sys_mknod(const char * f
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
-@@ -1347,6 +1458,7 @@ asmlinkage long sys_mkdir(const char * p
- {
- int error = 0;
- char * tmp;
-+ struct lookup_intent it = { .it_op = IT_MKDIR, .it_mode = mode };
-
- tmp = getname(pathname);
- error = PTR_ERR(tmp);
-@@ -1357,11 +1469,12 @@ asmlinkage long sys_mkdir(const char * p
++ out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1332,7 +1451,17 @@ asmlinkage long sys_mkdir(const char * p
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 1);
-+ dentry = lookup_create(&nd, 1, &it);
++ if (nd.dentry->d_inode->i_op->mkdir2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mkdir2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len,
++ mode);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++ dentry = lookup_create(&nd, 1, NULL);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_mkdir(nd.dentry->d_inode, dentry,
- mode & ~current->fs->umask);
-+ intent_release(dentry, &it);
+@@ -1340,6 +1469,7 @@ asmlinkage long sys_mkdir(const char * p
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
-@@ -1445,6 +1558,7 @@ asmlinkage long sys_rmdir(const char * p
- char * name;
- struct dentry *dentry;
- struct nameidata nd;
-+ struct lookup_intent it = { .it_op = IT_RMDIR };
-
- name = getname(pathname);
- if(IS_ERR(name))
-@@ -1466,10 +1580,11 @@ asmlinkage long sys_rmdir(const char * p
++out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1440,8 +1570,17 @@ asmlinkage long sys_rmdir(const char * p
+ error = -EBUSY;
goto exit1;
}
++ if (nd.dentry->d_inode->i_op->rmdir2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->rmdir2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
down(&nd.dentry->d_inode->i_sem);
- dentry = lookup_hash(&nd.last, nd.dentry);
-+ dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
++ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_rmdir(nd.dentry->d_inode, dentry);
-+ intent_release(dentry, &it);
- dput(dentry);
- }
- up(&nd.dentry->d_inode->i_sem);
-@@ -1513,6 +1628,7 @@ asmlinkage long sys_unlink(const char *
- char * name;
- struct dentry *dentry;
- struct nameidata nd;
-+ struct lookup_intent it = { .it_op = IT_UNLINK };
-
- name = getname(pathname);
- if(IS_ERR(name))
-@@ -1525,7 +1641,7 @@ asmlinkage long sys_unlink(const char *
+@@ -1499,8 +1638,17 @@ asmlinkage long sys_unlink(const char *
+ error = -EISDIR;
if (nd.last_type != LAST_NORM)
goto exit1;
++ if (nd.dentry->d_inode->i_op->unlink2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->unlink2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
down(&nd.dentry->d_inode->i_sem);
- dentry = lookup_hash(&nd.last, nd.dentry);
-+ dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
++ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
-@@ -1533,6 +1649,7 @@ asmlinkage long sys_unlink(const char *
- goto slashes;
- error = vfs_unlink(nd.dentry->d_inode, dentry);
- exit2:
-+ intent_release(dentry, &it);
- dput(dentry);
- }
- up(&nd.dentry->d_inode->i_sem);
-@@ -1579,6 +1696,7 @@ asmlinkage long sys_symlink(const char *
- int error = 0;
- char * from;
- char * to;
-+ struct lookup_intent it = { .it_op = IT_SYMLINK };
-
- from = getname(oldname);
- if(IS_ERR(from))
-@@ -1592,10 +1710,12 @@ asmlinkage long sys_symlink(const char *
+@@ -1567,15 +1715,26 @@ asmlinkage long sys_symlink(const char *
error = path_lookup(to, LOOKUP_PARENT, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 0);
-+ it.it_data = from;
-+ dentry = lookup_create(&nd, 0, &it);
++ if (nd.dentry->d_inode->i_op->symlink2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->symlink2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len,
++ from);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++ dentry = lookup_create(&nd, 0, NULL);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_symlink(nd.dentry->d_inode, dentry, from);
-+ intent_release(dentry, &it);
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
-@@ -1660,6 +1780,7 @@ asmlinkage long sys_link(const char * ol
- {
- int error;
- char * to;
-+ struct lookup_intent it = { .it_op = IT_LINK };
-
- to = getname(newname);
- error = PTR_ERR(to);
-@@ -1667,7 +1788,7 @@ asmlinkage long sys_link(const char * ol
++ out2:
+ path_release(&nd);
+-out:
++ out:
+ putname(to);
+ }
+ putname(from);
+@@ -1642,7 +1801,7 @@ asmlinkage long sys_link(const char * ol
struct dentry *new_dentry;
struct nameidata nd, old_nd;
- error = __user_walk(oldname, LOOKUP_POSITIVE, &old_nd);
-+ error = __user_walk_it(oldname, LOOKUP_POSITIVE, &old_nd, &it);
++ error = __user_walk_it(oldname, LOOKUP_POSITIVE, &old_nd, NULL);
if (error)
goto exit;
error = path_lookup(to, LOOKUP_PARENT, &nd);
-@@ -1676,10 +1797,12 @@ asmlinkage long sys_link(const char * ol
+@@ -1651,7 +1810,17 @@ asmlinkage long sys_link(const char * ol
error = -EXDEV;
if (old_nd.mnt != nd.mnt)
goto out_release;
- new_dentry = lookup_create(&nd, 0);
-+ it.it_op = IT_LINK2;
-+ new_dentry = lookup_create(&nd, 0, &it);
++ if (nd.dentry->d_inode->i_op->link2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->link2(old_nd.dentry->d_inode,
++ nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out_release;
++ }
++ new_dentry = lookup_create(&nd, 0, NULL);
error = PTR_ERR(new_dentry);
if (!IS_ERR(new_dentry)) {
error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-+ intent_release(new_dentry, &it);
- dput(new_dentry);
- }
- up(&nd.dentry->d_inode->i_sem);
-@@ -1720,7 +1843,8 @@ exit:
+@@ -1695,7 +1864,8 @@ exit:
* locking].
*/
int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
struct inode *target;
-@@ -1778,6 +1902,7 @@ int vfs_rename_dir(struct inode *old_dir
+@@ -1753,6 +1923,7 @@ int vfs_rename_dir(struct inode *old_dir
error = -EBUSY;
else
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
if (target) {
if (!error)
target->i_flags |= S_DEAD;
-@@ -1799,7 +1924,8 @@ out_unlock:
+@@ -1774,7 +1945,8 @@ out_unlock:
}
int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
-@@ -1830,6 +1956,7 @@ int vfs_rename_other(struct inode *old_d
+@@ -1805,6 +1977,7 @@ int vfs_rename_other(struct inode *old_d
error = -EBUSY;
else
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
double_up(&old_dir->i_zombie, &new_dir->i_zombie);
if (error)
return error;
-@@ -1841,13 +1968,14 @@ int vfs_rename_other(struct inode *old_d
+@@ -1816,13 +1989,14 @@ int vfs_rename_other(struct inode *old_d
}
int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!error) {
if (old_dir == new_dir)
inode_dir_notify(old_dir, DN_RENAME);
-@@ -1864,6 +1992,7 @@ static inline int do_rename(const char *
- int error = 0;
- struct dentry * old_dir, * new_dir;
- struct dentry * old_dentry, *new_dentry;
-+ struct lookup_intent it = { .it_op = IT_RENAME };
- struct nameidata oldnd, newnd;
-
- error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
-@@ -1889,7 +2018,7 @@ static inline int do_rename(const char *
-
+@@ -1862,9 +2036,23 @@ static inline int do_rename(const char *
+ if (newnd.last_type != LAST_NORM)
+ goto exit2;
+
++ if (old_dir->d_inode->i_op->rename2) {
++ lock_kernel();
++ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
++ new_dir->d_inode,
++ oldnd.last.name,
++ oldnd.last.len,
++ newnd.last.name,
++ newnd.last.len);
++ unlock_kernel();
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit2;
++ }
++
double_lock(new_dir, old_dir);
- old_dentry = lookup_hash(&oldnd.last, old_dir);
-+ old_dentry = lookup_hash_it(&oldnd.last, old_dir, &it);
++ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
-@@ -1905,18 +2034,21 @@ static inline int do_rename(const char *
+@@ -1880,14 +2068,14 @@ static inline int do_rename(const char *
if (newnd.last.name[newnd.last.len])
goto exit4;
}
- new_dentry = lookup_hash(&newnd.last, new_dir);
-+ it.it_op = IT_RENAME2;
-+ new_dentry = lookup_hash_it(&newnd.last, new_dir, &it);
++ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto exit4;
lock_kernel();
error = vfs_rename(old_dir->d_inode, old_dentry,
- new_dir->d_inode, new_dentry);
-+ new_dir->d_inode, new_dentry, &it);
++ new_dir->d_inode, new_dentry, NULL);
unlock_kernel();
-+ intent_release(new_dentry, &it);
dput(new_dentry);
- exit4:
-+ intent_release(old_dentry, &it);
- dput(old_dentry);
- exit3:
- double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
-@@ -1965,7 +2097,8 @@ out:
+@@ -1940,7 +2127,8 @@ out:
}
static inline int
-__vfs_follow_link(struct nameidata *nd, const char *link)
+__vfs_follow_link(struct nameidata *nd, const char *link,
-+ struct lookup_intent *it)
++ struct lookup_intent *it)
{
int res = 0;
char *name;
-@@ -1978,7 +2111,7 @@ __vfs_follow_link(struct nameidata *nd,
+@@ -1953,7 +2141,7 @@ __vfs_follow_link(struct nameidata *nd,
/* weird __emul_prefix() stuff did it */
goto out;
}
out:
if (current->link_count || res || nd->last_type!=LAST_NORM)
return res;
-@@ -2000,7 +2133,13 @@ fail:
+@@ -1975,7 +2163,13 @@ fail:
int vfs_follow_link(struct nameidata *nd, const char *link)
{
+}
+
+int vfs_follow_link_it(struct nameidata *nd, const char *link,
-+ struct lookup_intent *it)
++ struct lookup_intent *it)
+{
+ return __vfs_follow_link(nd, link, it);
}
/* get the link contents into pagecache */
-@@ -2042,7 +2181,7 @@ int page_follow_link(struct dentry *dent
+@@ -2017,7 +2211,7 @@ int page_follow_link(struct dentry *dent
{
struct page *page = NULL;
char *s = page_getlink(dentry, &page);
if (page) {
kunmap(page);
page_cache_release(page);
---- linux-2.4.18-17.8.0/fs/nfsd/vfs.c~vfs_intent 2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/nfsd/vfs.c 2002-12-06 14:52:31.000000000 -0800
+--- linux-2.4.18-18.8.0-l7/fs/nfsd/vfs.c~vfs_intent-2.4.18-18 Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/fs/nfsd/vfs.c Mon Jan 20 12:25:10 2003
@@ -1298,7 +1298,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
err = nfserr_perm;
} else
unlock_kernel();
if (!err && EX_ISSYNC(tfhp->fh_export)) {
nfsd_sync_dir(tdentry);
---- linux-2.4.18-17.8.0/fs/open.c~vfs_intent 2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/open.c 2002-12-06 14:52:31.000000000 -0800
+--- linux-2.4.18-18.8.0-l7/fs/open.c~vfs_intent-2.4.18-18 Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/fs/open.c Wed Jan 22 10:39:31 2003
@@ -19,6 +19,9 @@
#include <asm/uaccess.h>
struct nameidata nd;
struct inode * inode;
int error;
-+ struct lookup_intent it = { .it_op = IT_SETATTR };
++ struct lookup_intent it = { .it_op = IT_TRUNC };
error = -EINVAL;
if (length < 0) /* sorry, but loff_t says... */
path_release(&nd);
}
return error;
-@@ -638,10 +661,16 @@ asmlinkage long sys_fchown(unsigned int
- * for the internal routines (ie open_namei()/follow_link() etc). 00 is
- * used by symlinks.
- */
-+extern int open_namei_it(const char *filename, int namei_flags, int mode,
-+ struct nameidata *nd, struct lookup_intent *it);
-+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
-+ int flags, struct lookup_intent *it);
-+
- struct file *filp_open(const char * filename, int flags, int mode)
- {
- int namei_flags, error;
- struct nameidata nd;
-+ struct lookup_intent it = { .it_op = IT_OPEN };
-
- namei_flags = flags;
- if ((namei_flags+1) & O_ACCMODE)
-@@ -649,18 +678,19 @@ struct file *filp_open(const char * file
- if (namei_flags & O_TRUNC)
- namei_flags |= 2;
-
-- error = open_namei(filename, namei_flags, mode, &nd);
-- if (!error)
-- return dentry_open(nd.dentry, nd.mnt, flags);
-+ error = open_namei_it(filename, namei_flags, mode, &nd, &it);
-+ if (error)
-+ return ERR_PTR(error);
-
-- return ERR_PTR(error);
-+ return dentry_open_it(nd.dentry, nd.mnt, flags, &it);
- }
-
- extern ssize_t do_readahead(struct file *file, unsigned long index, unsigned long nr);
+@@ -628,7 +651,8 @@ extern ssize_t do_readahead(struct file
/* for files over a certains size it doesn't pay to do readahead on open */
#define READAHEAD_CUTOFF 48000
{
struct file * f;
struct inode *inode;
-@@ -711,6 +741,7 @@ struct file *dentry_open(struct dentry *
+@@ -693,6 +717,7 @@ struct file *dentry_open(struct dentry *
do_readahead(f, 0, (48 * 1024) >> PAGE_SHIFT);
return f;
cleanup_all:
-@@ -725,11 +756,17 @@ cleanup_all:
+@@ -707,11 +732,17 @@ cleanup_all:
cleanup_file:
put_filp(f);
cleanup_dentry:
/*
* Find an empty file descriptor entry, and mark it busy.
*/
---- linux-2.4.18-17.8.0/fs/stat.c~vfs_intent 2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/fs/stat.c 2002-12-06 14:52:31.000000000 -0800
+--- linux-2.4.18-18.8.0-l7/fs/stat.c~vfs_intent-2.4.18-18 Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/fs/stat.c Mon Jan 20 12:25:10 2003
@@ -13,6 +13,7 @@
#include <asm/uaccess.h>
path_release(&nd);
}
return error;
---- linux-2.4.18-17.8.0/include/linux/dcache.h~vfs_intent 2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/include/linux/dcache.h 2002-12-06 14:52:31.000000000 -0800
-@@ -6,6 +6,34 @@
+--- linux-2.4.18-18.8.0-l7/include/linux/dcache.h~vfs_intent-2.4.18-18 Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/include/linux/dcache.h Wed Jan 22 19:38:12 2003
+@@ -6,6 +6,27 @@
#include <asm/atomic.h>
#include <linux/mount.h>
-+#define IT_OPEN (1)
-+#define IT_CREAT (1<<1)
-+#define IT_MKDIR (1<<2)
-+#define IT_LINK (1<<3)
-+#define IT_LINK2 (1<<4)
-+#define IT_SYMLINK (1<<5)
-+#define IT_UNLINK (1<<6)
-+#define IT_RMDIR (1<<7)
-+#define IT_RENAME (1<<8)
-+#define IT_RENAME2 (1<<9)
-+#define IT_READDIR (1<<10)
-+#define IT_GETATTR (1<<11)
-+#define IT_SETATTR (1<<12)
-+#define IT_READLINK (1<<13)
-+#define IT_MKNOD (1<<14)
-+#define IT_LOOKUP (1<<15)
++#define IT_OPEN (1)
++#define IT_CREAT (1<<1)
++#define IT_READDIR (1<<2)
++#define IT_GETATTR (1<<3)
++#define IT_SETATTR (1<<4)
++#define IT_TRUNC (1<<5)
++#define IT_READLINK (1<<6)
++#define IT_LOOKUP (1<<7)
+
+struct lookup_intent {
+ int it_op;
+ int it_mode;
++ int it_flags;
+ int it_disposition;
+ int it_status;
+ struct iattr *it_iattr;
/*
* linux/include/linux/dcache.h
*
-@@ -78,6 +106,7 @@ struct dentry {
+@@ -78,6 +99,7 @@ struct dentry {
unsigned long d_time; /* used by d_revalidate */
struct dentry_operations *d_op;
struct super_block * d_sb; /* The root of the dentry tree */
unsigned long d_vfs_flags;
void * d_fsdata; /* fs-specific data */
void * d_extra_attributes; /* TUX-specific data */
-@@ -91,6 +120,8 @@ struct dentry_operations {
+@@ -91,6 +113,8 @@ struct dentry_operations {
int (*d_delete)(struct dentry *);
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
};
/* the dentry parameter passed to d_hash and d_compare is the parent
---- linux-2.4.18-17.8.0/include/linux/fs.h~vfs_intent 2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/include/linux/fs.h 2002-12-06 14:52:31.000000000 -0800
+@@ -124,6 +148,7 @@ d_iput: no no yes
+ * s_nfsd_free_path semaphore will be down
+ */
+ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
++#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */
+
+ extern spinlock_t dcache_lock;
+
+--- linux-2.4.18-18.8.0-l7/include/linux/fs.h~vfs_intent-2.4.18-18 Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/include/linux/fs.h Wed Jan 22 22:46:13 2003
@@ -576,6 +576,7 @@ struct file {
/* needed for tty driver, and maybe others */
/*
* File types
-@@ -897,6 +900,7 @@ struct file_operations {
+@@ -897,16 +900,28 @@ struct file_operations {
struct inode_operations {
int (*create) (struct inode *,struct dentry *,int);
struct dentry * (*lookup) (struct inode *,struct dentry *);
+ struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
int (*link) (struct dentry *,struct inode *,struct dentry *);
++ int (*link2) (struct inode *,struct inode *, const char *, int);
int (*unlink) (struct inode *,struct dentry *);
++ int (*unlink2) (struct inode *, const char *, int);
int (*symlink) (struct inode *,struct dentry *,const char *);
-@@ -907,6 +911,8 @@ struct inode_operations {
++ int (*symlink2) (struct inode *, const char *, int, const char *);
+ int (*mkdir) (struct inode *,struct dentry *,int);
++ int (*mkdir2) (struct inode *, const char *, int,int);
+ int (*rmdir) (struct inode *,struct dentry *);
++ int (*rmdir2) (struct inode *, const char *, int);
+ int (*mknod) (struct inode *,struct dentry *,int,int);
++ int (*mknod2) (struct inode *, const char *, int,int,int);
+ int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
++ int (*rename2) (struct inode *, struct inode *,
++ const char *oldname, int oldlen,
++ const char *newname, int newlen);
int (*readlink) (struct dentry *, char *,int);
int (*follow_link) (struct dentry *, struct nameidata *);
+ int (*follow_link2) (struct dentry *, struct nameidata *,
-+ struct lookup_intent *it);
++ struct lookup_intent *it);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int);
int (*revalidate) (struct dentry *);
-@@ -1381,6 +1387,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1381,6 +1396,7 @@ typedef int (*read_actor_t)(read_descrip
extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_walk(const char *, struct nameidata *));
extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1392,6 +1399,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1392,6 +1408,8 @@ extern struct dentry * lookup_one_len(co
extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
#define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
#define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
extern void inode_init_once(struct inode *);
extern void iput(struct inode *);
-@@ -1492,6 +1501,8 @@ extern struct file_operations generic_ro
+@@ -1492,6 +1510,8 @@ extern struct file_operations generic_ro
extern int vfs_readlink(struct dentry *, char *, int, const char *);
extern int vfs_follow_link(struct nameidata *, const char *);
+extern int vfs_follow_link_it(struct nameidata *, const char *,
-+ struct lookup_intent *it);
++ struct lookup_intent *it);
extern int page_readlink(struct dentry *, char *, int);
extern int page_follow_link(struct dentry *, struct nameidata *);
extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.18-17.8.0/kernel/ksyms.c~vfs_intent 2002-12-06 14:52:31.000000000 -0800
-+++ linux-2.4.18-17.8.0-zab/kernel/ksyms.c 2002-12-06 14:52:31.000000000 -0800
+--- linux-2.4.18-18.8.0-l7/kernel/ksyms.c~vfs_intent-2.4.18-18 Mon Jan 20 12:25:10 2003
++++ linux-2.4.18-18.8.0-l7-root/kernel/ksyms.c Mon Jan 20 12:25:10 2003
@@ -293,6 +293,7 @@ EXPORT_SYMBOL(read_cache_page);
EXPORT_SYMBOL(set_page_dirty);
EXPORT_SYMBOL(vfs_readlink);
--- /dev/null
+ fs/dcache.c | 3
+ fs/namei.c | 306 ++++++++++++++++++++++++++++++++++++++++---------
+ fs/nfsd/vfs.c | 2
+ fs/open.c | 63 +++++++---
+ fs/stat.c | 29 +++-
+ include/linux/dcache.h | 31 ++++
+ include/linux/fs.h | 22 +++
+ kernel/ksyms.c | 1
+ 8 files changed, 384 insertions(+), 73 deletions(-)
+
+--- linux-2.4.19-hp2_pnnl2/fs/dcache.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/dcache.c Sun Jan 19 19:04:47 2003
+@@ -186,6 +188,13 @@ int d_invalidate(struct dentry * dentry)
+ spin_unlock(&dcache_lock);
+ return 0;
+ }
++
++ /* network invalidation by Lustre */
++ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++ spin_unlock(&dcache_lock);
++ return 0;
++ }
++
+ /*
+ * Check whether to do a partial shrink_dcache
+ * to get rid of unused child entries.
+@@ -616,6 +618,7 @@ struct dentry * d_alloc(struct dentry *
+ dentry->d_op = NULL;
+ dentry->d_fsdata = NULL;
+ dentry->d_mounted = 0;
++ dentry->d_it = NULL;
+ INIT_LIST_HEAD(&dentry->d_hash);
+ INIT_LIST_HEAD(&dentry->d_lru);
+ INIT_LIST_HEAD(&dentry->d_subdirs);
+--- linux-2.4.19-hp2_pnnl2/fs/namei.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/namei.c Sun Jan 19 19:35:55 2003
+@@ -94,6 +97,13 @@
+ * XEmacs seems to be relying on it...
+ */
+
++void intent_release(struct dentry *de, struct lookup_intent *it)
++{
++ if (it && de->d_op && de->d_op->d_intent_release)
++ de->d_op->d_intent_release(de, it);
++
++}
++
+ /* In order to reduce some races, while at the same time doing additional
+ * checking and hopefully speeding things up, we copy filenames to the
+ * kernel data space before using them..
+@@ -260,10 +271,19 @@ void path_release(struct nameidata *nd)
+ * Internal lookup() using the new generic dcache.
+ * SMP-safe
+ */
+-static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
++static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name,
++ int flags, struct lookup_intent *it)
+ {
+ struct dentry * dentry = d_lookup(parent, name);
+
++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
++ if (!dentry->d_op->d_revalidate2(dentry, flags, it) &&
++ !d_invalidate(dentry)) {
++ dput(dentry);
++ dentry = NULL;
++ }
++ return dentry;
++ } else
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
+ dput(dentry);
+@@ -281,11 +301,14 @@ static struct dentry * cached_lookup(str
+ * make sure that nobody added the entry to the dcache in the meantime..
+ * SMP-safe
+ */
+-static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
++static struct dentry *real_lookup(struct dentry *parent, struct qstr *name,
++ int flags, struct lookup_intent *it)
+ {
+ struct dentry * result;
+ struct inode *dir = parent->d_inode;
+
++again:
++
+ down(&dir->i_sem);
+ /*
+ * First re-do the cached lookup just in case it was created
+@@ -300,6 +321,9 @@ static struct dentry * real_lookup(struc
+ result = ERR_PTR(-ENOMEM);
+ if (dentry) {
+ lock_kernel();
++ if (dir->i_op->lookup2)
++ result = dir->i_op->lookup2(dir, dentry, it);
++ else
+ result = dir->i_op->lookup(dir, dentry);
+ unlock_kernel();
+ if (result)
+@@ -321,6 +345,12 @@ static struct dentry * real_lookup(struc
+ dput(result);
+ result = ERR_PTR(-ENOENT);
+ }
++ } else if (result->d_op && result->d_op->d_revalidate2) {
++ if (!result->d_op->d_revalidate2(result, flags, it) &&
++ !d_invalidate(result)) {
++ dput(result);
++ goto again;
++ }
+ }
+ return result;
+ }
+@@ -332,7 +362,8 @@ static struct dentry * real_lookup(struc
+ * Without that kind of total limit, nasty chains of consecutive
+ * symlinks can cause almost arbitrarily long lookups.
+ */
+-static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
++static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd,
++ struct lookup_intent *it)
+ {
+ int err;
+ if (current->link_count >= 5)
+@@ -346,10 +377,14 @@ static inline int do_follow_link(struct
+ current->link_count++;
+ current->total_link_count++;
+ UPDATE_ATIME(dentry->d_inode);
+- err = dentry->d_inode->i_op->follow_link(dentry, nd);
++ if (dentry->d_inode->i_op->follow_link2)
++ err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
++ else
++ err = dentry->d_inode->i_op->follow_link(dentry, nd);
+ current->link_count--;
+ return err;
+ loop:
++ intent_release(dentry, it);
+ path_release(nd);
+ return -ELOOP;
+ }
+@@ -447,7 +482,8 @@ static inline void follow_dotdot(struct
+ *
+ * We expect 'base' to be positive and a directory.
+ */
+-int link_path_walk(const char * name, struct nameidata *nd)
++int link_path_walk_it(const char *name, struct nameidata *nd,
++ struct lookup_intent *it)
+ {
+ struct dentry *dentry;
+ struct inode *inode;
+@@ -520,9 +556,9 @@ int link_path_walk(const char * name, st
+ break;
+ }
+ /* This does the actual lookups.. */
+- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
++ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
+ if (!dentry) {
+- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
++ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ break;
+@@ -539,8 +575,8 @@ int link_path_walk(const char * name, st
+ if (!inode->i_op)
+ goto out_dput;
+
+- if (inode->i_op->follow_link) {
+- err = do_follow_link(dentry, nd);
++ if (inode->i_op->follow_link || inode->i_op->follow_link2) {
++ err = do_follow_link(dentry, nd, NULL);
+ dput(dentry);
+ if (err)
+ goto return_err;
+@@ -556,7 +592,7 @@ int link_path_walk(const char * name, st
+ nd->dentry = dentry;
+ }
+ err = -ENOTDIR;
+- if (!inode->i_op->lookup)
++ if (!inode->i_op->lookup && !inode->i_op->lookup2)
+ break;
+ continue;
+ /* here ends the main loop */
+@@ -583,9 +619,9 @@ last_component:
+ if (err < 0)
+ break;
+ }
+- dentry = cached_lookup(nd->dentry, &this, 0);
++ dentry = cached_lookup(nd->dentry, &this, 0, it);
+ if (!dentry) {
+- dentry = real_lookup(nd->dentry, &this, 0);
++ dentry = real_lookup(nd->dentry, &this, 0, it);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ break;
+@@ -594,8 +630,9 @@ last_component:
+ ;
+ inode = dentry->d_inode;
+ if ((lookup_flags & LOOKUP_FOLLOW)
+- && inode && inode->i_op && inode->i_op->follow_link) {
+- err = do_follow_link(dentry, nd);
++ && inode && inode->i_op &&
++ (inode->i_op->follow_link || inode->i_op->follow_link2)) {
++ err = do_follow_link(dentry, nd, it);
+ dput(dentry);
+ if (err)
+ goto return_err;
+@@ -609,7 +647,8 @@ last_component:
+ goto no_inode;
+ if (lookup_flags & LOOKUP_DIRECTORY) {
+ err = -ENOTDIR;
+- if (!inode->i_op || !inode->i_op->lookup)
++ if (!inode->i_op ||
++ (!inode->i_op->lookup && !inode->i_op->lookup2))
+ break;
+ }
+ goto return_base;
+@@ -646,15 +685,28 @@ out_dput:
+ dput(dentry);
+ break;
+ }
++ if (err)
++ intent_release(nd->dentry, it);
+ path_release(nd);
+ return_err:
+ return err;
+ }
+
++int link_path_walk(const char * name, struct nameidata *nd)
++{
++ return link_path_walk_it(name, nd, NULL);
++}
++
++int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it)
++{
++ current->total_link_count = 0;
++ return link_path_walk_it(name, nd, it);
++}
++
+ int path_walk(const char * name, struct nameidata *nd)
+ {
+ current->total_link_count = 0;
+- return link_path_walk(name, nd);
++ return link_path_walk_it(name, nd, NULL);
+ }
+
+ /* SMP-safe */
+@@ -757,7 +809,8 @@ int path_init(const char *name, unsigned
+ * needs parent already locked. Doesn't follow mounts.
+ * SMP-safe.
+ */
+-struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
++struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base,
++ struct lookup_intent *it)
+ {
+ struct dentry * dentry;
+ struct inode *inode;
+@@ -780,13 +833,16 @@ struct dentry * lookup_hash(struct qstr
+ goto out;
+ }
+
+- dentry = cached_lookup(base, name, 0);
++ dentry = cached_lookup(base, name, 0, it);
+ if (!dentry) {
+ struct dentry *new = d_alloc(base, name);
+ dentry = ERR_PTR(-ENOMEM);
+ if (!new)
+ goto out;
+ lock_kernel();
++ if (inode->i_op->lookup2)
++ dentry = inode->i_op->lookup2(inode, new, it);
++ else
+ dentry = inode->i_op->lookup(inode, new);
+ unlock_kernel();
+ if (!dentry)
+@@ -798,6 +854,12 @@ out:
+ return dentry;
+ }
+
++struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
++{
++ return lookup_hash_it(name, base, NULL);
++}
++
++
+ /* SMP-safe */
+ struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
+ {
+@@ -819,7 +881,7 @@ struct dentry * lookup_one_len(const cha
+ }
+ this.hash = end_name_hash(hash);
+
+- return lookup_hash(&this, base);
++ return lookup_hash_it(&this, base, NULL);
+ access:
+ return ERR_PTR(-EACCES);
+ }
+@@ -851,6 +913,23 @@ int __user_walk(const char *name, unsign
+ return err;
+ }
+
++int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd,
++ struct lookup_intent *it)
++{
++ char *tmp;
++ int err;
++
++ tmp = getname(name);
++ err = PTR_ERR(tmp);
++ if (!IS_ERR(tmp)) {
++ err = 0;
++ if (path_init(tmp, flags, nd))
++ err = path_walk_it(tmp, nd, it);
++ putname(tmp);
++ }
++ return err;
++}
++
+ /*
+ * It's inline, so penalty for filesystems that don't use sticky bit is
+ * minimal.
+@@ -987,7 +1066,8 @@ exit_lock:
+ * for symlinks (where the permissions are checked later).
+ * SMP-safe
+ */
+-int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
++int open_namei_it(const char *pathname, int flag, int mode,
++ struct nameidata *nd, struct lookup_intent *it)
+ {
+ int acc_mode, error = 0;
+ struct inode *inode;
+@@ -1002,7 +1082,7 @@ int open_namei(const char * pathname, in
+ */
+ if (!(flag & O_CREAT)) {
+ if (path_init(pathname, lookup_flags(flag), nd))
+- error = path_walk(pathname, nd);
++ error = path_walk_it(pathname, nd, it);
+ if (error)
+ return error;
+ dentry = nd->dentry;
+@@ -1012,6 +1092,10 @@ int open_namei(const char * pathname, in
+ /*
+ * Create - we need to know the parent.
+ */
++ if (it) {
++ it->it_mode = mode;
++ it->it_op |= IT_CREAT;
++ }
+ if (path_init(pathname, LOOKUP_PARENT, nd))
+ error = path_walk(pathname, nd);
+ if (error)
+@@ -1028,7 +1112,7 @@ int open_namei(const char * pathname, in
+
+ dir = nd->dentry;
+ down(&dir->d_inode->i_sem);
+- dentry = lookup_hash(&nd->last, nd->dentry);
++ dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+
+ do_last:
+ error = PTR_ERR(dentry);
+@@ -1037,6 +1121,7 @@ do_last:
+ goto exit;
+ }
+
++ it->it_mode = mode;
+ /* Negative dentry, just create the file */
+ if (!dentry->d_inode) {
+ if (!IS_POSIXACL(dir->d_inode))
+@@ -1071,7 +1156,8 @@ do_last:
+ error = -ENOENT;
+ if (!dentry->d_inode)
+ goto exit_dput;
+- if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
++ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
++ dentry->d_inode->i_op->follow_link2))
+ goto do_link;
+
+ dput(nd->dentry);
+@@ -1157,8 +1243,10 @@ ok:
+ return 0;
+
+ exit_dput:
++ intent_release(dentry, it);
+ dput(dentry);
+ exit:
++ intent_release(nd->dentry, it);
+ path_release(nd);
+ return error;
+
+@@ -1177,7 +1265,12 @@ do_link:
+ * are done. Procfs-like symlinks just set LAST_BIND.
+ */
+ UPDATE_ATIME(dentry->d_inode);
+- error = dentry->d_inode->i_op->follow_link(dentry, nd);
++ if (dentry->d_inode->i_op->follow_link2)
++ error = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
++ else
++ error = dentry->d_inode->i_op->follow_link(dentry, nd);
++ if (error)
++ intent_release(dentry, it);
+ dput(dentry);
+ if (error)
+ return error;
+@@ -1199,13 +1292,20 @@ do_link:
+ }
+ dir = nd->dentry;
+ down(&dir->d_inode->i_sem);
+- dentry = lookup_hash(&nd->last, nd->dentry);
++ dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+ putname(nd->last.name);
+ goto do_last;
+ }
+
++int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd)
++{
++ return open_namei_it(pathname, flag, mode, nd, NULL);
++}
++
++
+ /* SMP-safe */
+-static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
++static struct dentry *lookup_create(struct nameidata *nd, int is_dir,
++ struct lookup_intent *it)
+ {
+ struct dentry *dentry;
+
+@@ -1213,7 +1313,7 @@ static struct dentry *lookup_create(stru
+ dentry = ERR_PTR(-EEXIST);
+ if (nd->last_type != LAST_NORM)
+ goto fail;
+- dentry = lookup_hash(&nd->last, nd->dentry);
++ dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+ if (IS_ERR(dentry))
+ goto fail;
+ if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
+@@ -1270,7 +1371,19 @@ asmlinkage long sys_mknod(const char * f
+ error = path_walk(tmp, &nd);
+ if (error)
+ goto out;
+- dentry = lookup_create(&nd, 0);
++
++ if (nd.dentry->d_inode->i_op->mknod2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mknod2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len,
++ mode, dev);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++
++ dentry = lookup_create(&nd, 0, NULL);
+ error = PTR_ERR(dentry);
+
+ if (!IS_POSIXACL(nd.dentry->d_inode))
+@@ -1289,6 +1402,7 @@ asmlinkage long sys_mknod(const char * f
+ dput(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
++out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1340,15 +1456,25 @@ asmlinkage long sys_mkdir(const char * p
+ error = path_walk(tmp, &nd);
+ if (error)
+ goto out;
+- dentry = lookup_create(&nd, 1);
++ if (nd.dentry->d_inode->i_op->mkdir2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mkdir2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len,
++ mode);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++ dentry = lookup_create(&nd, 1, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+- if (!IS_POSIXACL(nd.dentry->d_inode))
+- mode &= ~current->fs->umask;
+- error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
++ error = vfs_mkdir(nd.dentry->d_inode, dentry,
++ mode & ~current->fs->umask);
+ dput(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
++out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1450,8 +1578,17 @@ asmlinkage long sys_rmdir(const char * p
+ error = -EBUSY;
+ goto exit1;
+ }
++ if (nd.dentry->d_inode->i_op->rmdir2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->rmdir2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
+ down(&nd.dentry->d_inode->i_sem);
+- dentry = lookup_hash(&nd.last, nd.dentry);
++ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ error = vfs_rmdir(nd.dentry->d_inode, dentry);
+@@ -1510,8 +1649,17 @@ asmlinkage long sys_unlink(const char *
+ error = -EISDIR;
+ if (nd.last_type != LAST_NORM)
+ goto exit1;
++ if (nd.dentry->d_inode->i_op->unlink2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->unlink2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
+ down(&nd.dentry->d_inode->i_sem);
+- dentry = lookup_hash(&nd.last, nd.dentry);
++ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ /* Why not before? Because we want correct error value */
+@@ -1579,15 +1729,26 @@ asmlinkage long sys_symlink(const char *
+ error = path_walk(to, &nd);
+ if (error)
+ goto out;
+- dentry = lookup_create(&nd, 0);
++ if (nd.dentry->d_inode->i_op->symlink2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->symlink2(nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len,
++ from);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++ dentry = lookup_create(&nd, 0, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+ dput(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
++ out2:
+ path_release(&nd);
+-out:
++ out:
+ putname(to);
+ }
+ putname(from);
+@@ -1660,7 +1824,7 @@ asmlinkage long sys_link(const char * ol
+
+ error = 0;
+ if (path_init(from, LOOKUP_POSITIVE, &old_nd))
+- error = path_walk(from, &old_nd);
++ error = path_walk_it(from, &old_nd, NULL);
+ if (error)
+ goto exit;
+ if (path_init(to, LOOKUP_PARENT, &nd))
+@@ -1670,7 +1834,17 @@ asmlinkage long sys_link(const char * ol
+ error = -EXDEV;
+ if (old_nd.mnt != nd.mnt)
+ goto out_release;
+- new_dentry = lookup_create(&nd, 0);
++ if (nd.dentry->d_inode->i_op->link2) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->link2(old_nd.dentry->d_inode,
++ nd.dentry->d_inode,
++ nd.last.name,
++ nd.last.len);
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out_release;
++ }
++ new_dentry = lookup_create(&nd, 0, NULL);
+ error = PTR_ERR(new_dentry);
+ if (!IS_ERR(new_dentry)) {
+ error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+@@ -1716,7 +1892,8 @@ exit:
+ * locking].
+ */
+ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
+- struct inode *new_dir, struct dentry *new_dentry)
++ struct inode *new_dir, struct dentry *new_dentry,
++ struct lookup_intent *it)
+ {
+ int error;
+ struct inode *target;
+@@ -1753,6 +1923,7 @@ int vfs_rename_dir(struct inode *old_dir
+ error = -EBUSY;
+ else
+ error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
++ intent_release(new_dentry, it);
+ if (target) {
+ if (!error)
+ target->i_flags |= S_DEAD;
+@@ -1795,7 +1973,8 @@ out_unlock:
+ }
+
+ int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
+- struct inode *new_dir, struct dentry *new_dentry)
++ struct inode *new_dir, struct dentry *new_dentry,
++ struct lookup_intent *it)
+ {
+ int error;
+
+@@ -1826,6 +2005,7 @@ int vfs_rename_other(struct inode *old_d
+ error = -EBUSY;
+ else
+ error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
++ intent_release(new_dentry, it);
+ double_up(&old_dir->i_zombie, &new_dir->i_zombie);
+ if (error)
+ return error;
+@@ -1837,13 +2017,14 @@ int vfs_rename_other(struct inode *old_d
+ }
+
+ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+- struct inode *new_dir, struct dentry *new_dentry)
++ struct inode *new_dir, struct dentry *new_dentry,
++ struct lookup_intent *it)
+ {
+ int error;
+ if (S_ISDIR(old_dentry->d_inode->i_mode))
+- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
++ error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it);
+ else
+- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
++ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it);
+ if (!error) {
+ if (old_dir == new_dir)
+ inode_dir_notify(old_dir, DN_RENAME);
+@@ -1886,9 +2068,23 @@ static inline int do_rename(const char *
+ if (newnd.last_type != LAST_NORM)
+ goto exit2;
+
++ if (old_dir->d_inode->i_op->rename2) {
++ lock_kernel();
++ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
++ new_dir->d_inode,
++ oldnd.last.name,
++ oldnd.last.len,
++ newnd.last.name,
++ newnd.last.len);
++ unlock_kernel();
++ /* the file system want to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit2;
++ }
++
+ double_lock(new_dir, old_dir);
+
+- old_dentry = lookup_hash(&oldnd.last, old_dir);
++ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL);
+ error = PTR_ERR(old_dentry);
+ if (IS_ERR(old_dentry))
+ goto exit3;
+@@ -1904,14 +2100,14 @@ static inline int do_rename(const char *
+ if (newnd.last.name[newnd.last.len])
+ goto exit4;
+ }
+- new_dentry = lookup_hash(&newnd.last, new_dir);
++ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL);
+ error = PTR_ERR(new_dentry);
+ if (IS_ERR(new_dentry))
+ goto exit4;
+
+ lock_kernel();
+ error = vfs_rename(old_dir->d_inode, old_dentry,
+- new_dir->d_inode, new_dentry);
++ new_dir->d_inode, new_dentry, NULL);
+ unlock_kernel();
+
+ dput(new_dentry);
+@@ -1964,7 +2163,8 @@ out:
+ }
+
+ static inline int
+-__vfs_follow_link(struct nameidata *nd, const char *link)
++__vfs_follow_link(struct nameidata *nd, const char *link,
++ struct lookup_intent *it)
+ {
+ int res = 0;
+ char *name;
+@@ -1977,7 +2177,7 @@ __vfs_follow_link(struct nameidata *nd,
+ /* weird __emul_prefix() stuff did it */
+ goto out;
+ }
+- res = link_path_walk(link, nd);
++ res = link_path_walk_it(link, nd, it);
+ out:
+ if (current->link_count || res || nd->last_type!=LAST_NORM)
+ return res;
+@@ -1999,7 +2199,13 @@ fail:
+
+ int vfs_follow_link(struct nameidata *nd, const char *link)
+ {
+- return __vfs_follow_link(nd, link);
++ return __vfs_follow_link(nd, link, NULL);
++}
++
++int vfs_follow_link_it(struct nameidata *nd, const char *link,
++ struct lookup_intent *it)
++{
++ return __vfs_follow_link(nd, link, it);
+ }
+
+ /* get the link contents into pagecache */
+@@ -2041,7 +2247,7 @@ int page_follow_link(struct dentry *dent
+ {
+ struct page *page = NULL;
+ char *s = page_getlink(dentry, &page);
+- int res = __vfs_follow_link(nd, s);
++ int res = __vfs_follow_link(nd, s, NULL);
+ if (page) {
+ kunmap(page);
+ page_cache_release(page);
+--- linux-2.4.19-hp2_pnnl2/fs/nfsd/vfs.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/nfsd/vfs.c Sun Jan 19 19:37:57 2003
+@@ -1295,7 +1295,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ err = nfserr_perm;
+ } else
+ #endif
+- err = vfs_rename(fdir, odentry, tdir, ndentry);
++ err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
+ if (!err && EX_ISSYNC(tfhp->fh_export)) {
+ nfsd_sync_dir(tdentry);
+ nfsd_sync_dir(fdentry);
+--- linux-2.4.19-hp2_pnnl2/fs/open.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/open.c Sun Jan 19 19:41:00 2003
+@@ -19,6 +19,9 @@
+ #include <asm/uaccess.h>
+
+ #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
++extern int path_walk_it(const char *name, struct nameidata *nd,
++ struct lookup_intent *it);
++extern void intent_release(struct dentry *de, struct lookup_intent *it);
+
+ int vfs_statfs(struct super_block *sb, struct statfs *buf)
+ {
+@@ -118,12 +121,13 @@ static inline long do_sys_truncate(const
+ struct nameidata nd;
+ struct inode * inode;
+ int error;
++ struct lookup_intent it = { .it_op = IT_TRUNC };
+
+ error = -EINVAL;
+ if (length < 0) /* sorry, but loff_t says... */
+ goto out;
+
+- error = user_path_walk(path, &nd);
++ error = user_path_walk_it(path, &nd, &it);
+ if (error)
+ goto out;
+ inode = nd.dentry->d_inode;
+@@ -168,6 +172,7 @@ static inline long do_sys_truncate(const
+ put_write_access(inode);
+
+ dput_and_out:
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ out:
+ return error;
+@@ -259,8 +264,9 @@ asmlinkage long sys_utime(char * filenam
+ struct nameidata nd;
+ struct inode * inode;
+ struct iattr newattrs;
++ struct lookup_intent it = { .it_op = IT_SETATTR };
+
+- error = user_path_walk(filename, &nd);
++ error = user_path_walk_it(filename, &nd, &it);
+ if (error)
+ goto out;
+ inode = nd.dentry->d_inode;
+@@ -286,6 +292,7 @@ asmlinkage long sys_utime(char * filenam
+ }
+ error = notify_change(nd.dentry, &newattrs);
+ dput_and_out:
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ out:
+ return error;
+@@ -303,8 +310,9 @@ asmlinkage long sys_utimes(char * filena
+ struct nameidata nd;
+ struct inode * inode;
+ struct iattr newattrs;
++ struct lookup_intent it = { .it_op = IT_SETATTR };
+
+- error = user_path_walk(filename, &nd);
++ error = user_path_walk_it(filename, &nd, &it);
+
+ if (error)
+ goto out;
+@@ -331,6 +339,7 @@ asmlinkage long sys_utimes(char * filena
+ }
+ error = notify_change(nd.dentry, &newattrs);
+ dput_and_out:
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ out:
+ return error;
+@@ -347,6 +356,7 @@ asmlinkage long sys_access(const char *
+ int old_fsuid, old_fsgid;
+ kernel_cap_t old_cap;
+ int res;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+
+ if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
+ return -EINVAL;
+@@ -364,13 +374,14 @@ asmlinkage long sys_access(const char *
+ else
+ current->cap_effective = current->cap_permitted;
+
+- res = user_path_walk(filename, &nd);
++ res = user_path_walk_it(filename, &nd, &it);
+ if (!res) {
+ res = permission(nd.dentry->d_inode, mode);
+ /* SuS v2 requires we report a read only fs too */
+ if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
+ && !special_file(nd.dentry->d_inode->i_mode))
+ res = -EROFS;
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ }
+
+@@ -386,6 +397,7 @@ asmlinkage long sys_chdir(const char * f
+ int error;
+ struct nameidata nd;
+ char *name;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+
+ name = getname(filename);
+ error = PTR_ERR(name);
+@@ -394,7 +406,7 @@ asmlinkage long sys_chdir(const char * f
+
+ error = 0;
+ if (path_init(name,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd))
+- error = path_walk(name, &nd);
++ error = path_walk_it(name, &nd, &it);
+ putname(name);
+ if (error)
+ goto out;
+@@ -406,6 +418,7 @@ asmlinkage long sys_chdir(const char * f
+ set_fs_pwd(current->fs, nd.mnt, nd.dentry);
+
+ dput_and_out:
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ out:
+ return error;
+@@ -446,6 +459,7 @@ asmlinkage long sys_chroot(const char *
+ int error;
+ struct nameidata nd;
+ char *name;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+
+ name = getname(filename);
+ error = PTR_ERR(name);
+@@ -454,7 +468,7 @@ asmlinkage long sys_chroot(const char *
+
+ path_init(name, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
+- error = path_walk(name, &nd);
++ error = path_walk_it(name, &nd, &it);
+ putname(name);
+ if (error)
+ goto out;
+@@ -471,6 +485,7 @@ asmlinkage long sys_chroot(const char *
+ set_fs_altroot();
+ error = 0;
+ dput_and_out:
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ out:
+ return error;
+@@ -515,8 +530,9 @@ asmlinkage long sys_chmod(const char * f
+ struct inode * inode;
+ int error;
+ struct iattr newattrs;
++ struct lookup_intent it = { .it_op = IT_SETATTR };
+
+- error = user_path_walk(filename, &nd);
++ error = user_path_walk_it(filename, &nd, &it);
+ if (error)
+ goto out;
+ inode = nd.dentry->d_inode;
+@@ -536,6 +552,7 @@ asmlinkage long sys_chmod(const char * f
+ error = notify_change(nd.dentry, &newattrs);
+
+ dput_and_out:
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ out:
+ return error;
+@@ -605,10 +622,12 @@ asmlinkage long sys_chown(const char * f
+ {
+ struct nameidata nd;
+ int error;
++ struct lookup_intent it = { .it_op = IT_SETATTR };
+
+- error = user_path_walk(filename, &nd);
++ error = user_path_walk_it(filename, &nd, &it);
+ if (!error) {
+ error = chown_common(nd.dentry, user, group);
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ }
+ return error;
+@@ -618,10 +637,12 @@ asmlinkage long sys_lchown(const char *
+ {
+ struct nameidata nd;
+ int error;
++ struct lookup_intent it = { .it_op = IT_SETATTR };
+
+- error = user_path_walk_link(filename, &nd);
++ error = user_path_walk_link_it(filename, &nd, &it);
+ if (!error) {
+ error = chown_common(nd.dentry, user, group);
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ }
+ return error;
+@@ -655,10 +676,16 @@ asmlinkage long sys_fchown(unsigned int
+ * for the internal routines (ie open_namei()/follow_link() etc). 00 is
+ * used by symlinks.
+ */
++extern int open_namei_it(const char *filename, int namei_flags, int mode,
++ struct nameidata *nd, struct lookup_intent *it);
++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++ int flags, struct lookup_intent *it);
++
+ struct file *filp_open(const char * filename, int flags, int mode)
+ {
+ int namei_flags, error;
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags };
+
+ namei_flags = flags;
+ if ((namei_flags+1) & O_ACCMODE)
+@@ -666,14 +693,15 @@ struct file *filp_open(const char * file
+ if (namei_flags & O_TRUNC)
+ namei_flags |= 2;
+
+- error = open_namei(filename, namei_flags, mode, &nd);
+- if (!error)
+- return dentry_open(nd.dentry, nd.mnt, flags);
++ error = open_namei_it(filename, namei_flags, mode, &nd, &it);
++ if (error)
++ return ERR_PTR(error);
+
+- return ERR_PTR(error);
++ return dentry_open_it(nd.dentry, nd.mnt, flags, &it);
+ }
+
+-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++ int flags, struct lookup_intent *it)
+ {
+ struct file * f;
+ struct inode *inode;
+@@ -716,6 +744,7 @@ struct file *dentry_open(struct dentry *
+ }
+ f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+
++ intent_release(dentry, it);
+ return f;
+
+ cleanup_all:
+@@ -730,11 +759,17 @@ cleanup_all:
+ cleanup_file:
+ put_filp(f);
+ cleanup_dentry:
++ intent_release(dentry, it);
+ dput(dentry);
+ mntput(mnt);
+ return ERR_PTR(error);
+ }
+
++struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++{
++ return dentry_open_it(dentry, mnt, flags, NULL);
++}
++
+ /*
+ * Find an empty file descriptor entry, and mark it busy.
+ */
+--- linux-2.4.19-hp2_pnnl2/fs/stat.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/fs/stat.c Sun Jan 19 19:44:51 2003
+@@ -13,6 +13,7 @@
+
+ #include <asm/uaccess.h>
+
++extern void intent_release(struct dentry *de, struct lookup_intent *it);
+ /*
+ * Revalidate the inode. This is required for proper NFS attribute caching.
+ */
+@@ -135,13 +136,15 @@ static int cp_new_stat(struct inode * in
+ asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
+ {
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+ int error;
+
+- error = user_path_walk(filename, &nd);
++ error = user_path_walk_it(filename, &nd, &it);
+ if (!error) {
+ error = do_revalidate(nd.dentry);
+ if (!error)
+ error = cp_old_stat(nd.dentry->d_inode, statbuf);
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ }
+ return error;
+@@ -151,13 +154,15 @@ asmlinkage long sys_stat(char * filename
+ asmlinkage long sys_newstat(char * filename, struct stat * statbuf)
+ {
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+ int error;
+
+- error = user_path_walk(filename, &nd);
++ error = user_path_walk_it(filename, &nd, &it);
+ if (!error) {
+ error = do_revalidate(nd.dentry);
+ if (!error)
+ error = cp_new_stat(nd.dentry->d_inode, statbuf);
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ }
+ return error;
+@@ -172,13 +177,15 @@ asmlinkage long sys_newstat(char * filen
+ asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf)
+ {
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+ int error;
+
+- error = user_path_walk_link(filename, &nd);
++ error = user_path_walk_link_it(filename, &nd, &it);
+ if (!error) {
+ error = do_revalidate(nd.dentry);
+ if (!error)
+ error = cp_old_stat(nd.dentry->d_inode, statbuf);
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ }
+ return error;
+@@ -189,13 +196,15 @@ asmlinkage long sys_lstat(char * filenam
+ asmlinkage long sys_newlstat(char * filename, struct stat * statbuf)
+ {
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+ int error;
+
+- error = user_path_walk_link(filename, &nd);
++ error = user_path_walk_link_it(filename, &nd, &it);
+ if (!error) {
+ error = do_revalidate(nd.dentry);
+ if (!error)
+ error = cp_new_stat(nd.dentry->d_inode, statbuf);
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ }
+ return error;
+@@ -247,11 +256,12 @@ asmlinkage long sys_readlink(const char
+ {
+ struct nameidata nd;
+ int error;
++ struct lookup_intent it = { .it_op = IT_READLINK };
+
+ if (bufsiz <= 0)
+ return -EINVAL;
+
+- error = user_path_walk_link(path, &nd);
++ error = user_path_walk_link_it(path, &nd, &it);
+ if (!error) {
+ struct inode * inode = nd.dentry->d_inode;
+
+@@ -261,6 +271,7 @@ asmlinkage long sys_readlink(const char
+ UPDATE_ATIME(inode);
+ error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+ }
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ }
+ return error;
+@@ -333,12 +344,14 @@ asmlinkage long sys_stat64(char * filena
+ {
+ struct nameidata nd;
+ int error;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+
+- error = user_path_walk(filename, &nd);
++ error = user_path_walk_it(filename, &nd, &it);
+ if (!error) {
+ error = do_revalidate(nd.dentry);
+ if (!error)
+ error = cp_new_stat64(nd.dentry->d_inode, statbuf);
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ }
+ return error;
+@@ -348,12 +361,14 @@ asmlinkage long sys_lstat64(char * filen
+ {
+ struct nameidata nd;
+ int error;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+
+- error = user_path_walk_link(filename, &nd);
++ error = user_path_walk_link_it(filename, &nd, &it);
+ if (!error) {
+ error = do_revalidate(nd.dentry);
+ if (!error)
+ error = cp_new_stat64(nd.dentry->d_inode, statbuf);
++ intent_release(nd.dentry, &it);
+ path_release(&nd);
+ }
+ return error;
+--- linux-2.4.19-hp2_pnnl2/include/linux/dcache.h~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/include/linux/dcache.h Sun Jan 19 19:04:48 2003
+@@ -6,6 +6,27 @@
+ #include <asm/atomic.h>
+ #include <linux/mount.h>
+
++#define IT_OPEN (1)
++#define IT_CREAT (1<<1)
++#define IT_READDIR (1<<2)
++#define IT_GETATTR (1<<3)
++#define IT_SETATTR (1<<4)
++#define IT_TRUNC (1<<5)
++#define IT_READLINK (1<<6)
++#define IT_LOOKUP (1<<7)
++
++struct lookup_intent {
++ int it_op;
++ int it_mode;
++ int it_flags;
++ int it_disposition;
++ int it_status;
++ struct iattr *it_iattr;
++ __u64 it_lock_handle[2];
++ int it_lock_mode;
++ void *it_data;
++};
++
+ /*
+ * linux/include/linux/dcache.h
+ *
+@@ -78,6 +106,7 @@ struct dentry {
+ unsigned long d_time; /* used by d_revalidate */
+ struct dentry_operations *d_op;
+ struct super_block * d_sb; /* The root of the dentry tree */
++ struct lookup_intent *d_it;
+ unsigned long d_vfs_flags;
+ void * d_fsdata; /* fs-specific data */
+ unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
+@@ -90,6 +119,8 @@ struct dentry_operations {
+ int (*d_delete)(struct dentry *);
+ void (*d_release)(struct dentry *);
+ void (*d_iput)(struct dentry *, struct inode *);
++ int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *);
++ void (*d_intent_release)(struct dentry *, struct lookup_intent *);
+ };
+
+ /* the dentry parameter passed to d_hash and d_compare is the parent
+@@ -124,6 +148,7 @@ d_iput: no no yes
+ * s_nfsd_free_path semaphore will be down
+ */
+ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
++#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */
+
+ extern spinlock_t dcache_lock;
+
+--- linux-2.4.19-hp2_pnnl2/include/linux/fs.h~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/include/linux/fs.h Sun Jan 19 19:04:48 2003
+@@ -575,6 +575,7 @@ struct file {
+
+ /* needed for tty driver, and maybe others */
+ void *private_data;
++ struct lookup_intent *f_intent;
+
+ /* preallocated helper kiobuf to speedup O_DIRECT */
+ struct kiobuf *f_iobuf;
+@@ -815,7 +816,9 @@ extern int vfs_symlink(struct inode *, s
+ extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
+ extern int vfs_rmdir(struct inode *, struct dentry *);
+ extern int vfs_unlink(struct inode *, struct dentry *);
+-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
++int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++ struct inode *new_dir, struct dentry *new_dentry,
++ struct lookup_intent *it);
+
+ /*
+ * File types
+@@ -876,16 +879,28 @@ struct file_operations {
+ struct inode_operations {
+ int (*create) (struct inode *,struct dentry *,int);
+ struct dentry * (*lookup) (struct inode *,struct dentry *);
++ struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
+ int (*link) (struct dentry *,struct inode *,struct dentry *);
++ int (*link2) (struct inode *,struct inode *, const char *, int);
+ int (*unlink) (struct inode *,struct dentry *);
++ int (*unlink2) (struct inode *, const char *, int);
+ int (*symlink) (struct inode *,struct dentry *,const char *);
++ int (*symlink2) (struct inode *, const char *, int, const char *);
+ int (*mkdir) (struct inode *,struct dentry *,int);
++ int (*mkdir2) (struct inode *, const char *, int,int);
+ int (*rmdir) (struct inode *,struct dentry *);
++ int (*rmdir2) (struct inode *, const char *, int);
+ int (*mknod) (struct inode *,struct dentry *,int,int);
++ int (*mknod2) (struct inode *, const char *, int,int,int);
+ int (*rename) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *);
++ int (*rename2) (struct inode *, struct inode *,
++ const char *oldname, int oldlen,
++ const char *newname, int newlen);
+ int (*readlink) (struct dentry *, char *,int);
+ int (*follow_link) (struct dentry *, struct nameidata *);
++ int (*follow_link2) (struct dentry *, struct nameidata *,
++ struct lookup_intent *it);
+ void (*truncate) (struct inode *);
+ int (*permission) (struct inode *, int);
+ int (*revalidate) (struct dentry *);
+@@ -1354,6 +1369,7 @@ typedef int (*read_actor_t)(read_descrip
+ extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
+
+ extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
++extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it));
+ extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
+ extern int FASTCALL(path_walk(const char *, struct nameidata *));
+ extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
+@@ -1364,6 +1380,8 @@ extern struct dentry * lookup_one_len(co
+ extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
+ #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
+ #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
++#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it)
++#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it)
+
+ extern void inode_init_once(struct inode *);
+ extern void iput(struct inode *);
+@@ -1499,6 +1517,8 @@ extern struct file_operations generic_ro
+
+ extern int vfs_readlink(struct dentry *, char *, int, const char *);
+ extern int vfs_follow_link(struct nameidata *, const char *);
++extern int vfs_follow_link_it(struct nameidata *, const char *,
++ struct lookup_intent *it);
+ extern int page_readlink(struct dentry *, char *, int);
+ extern int page_follow_link(struct dentry *, struct nameidata *);
+ extern struct inode_operations page_symlink_inode_operations;
+--- linux-2.4.19-hp2_pnnl2/kernel/ksyms.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl2-root/kernel/ksyms.c Sun Jan 19 19:04:48 2003
+@@ -293,6 +293,7 @@ EXPORT_SYMBOL(read_cache_page);
+ EXPORT_SYMBOL(set_page_dirty);
+ EXPORT_SYMBOL(vfs_readlink);
+ EXPORT_SYMBOL(vfs_follow_link);
++EXPORT_SYMBOL(vfs_follow_link_it);
+ EXPORT_SYMBOL(page_readlink);
+ EXPORT_SYMBOL(page_follow_link);
+ EXPORT_SYMBOL(page_symlink_inode_operations);
+
+_
--- /dev/null
+drivers/block/blkpg.c
+drivers/block/loop.c
+drivers/ide/ide-disk.c
--- /dev/null
+fs/ext3/Makefile
+fs/ext3/super.c
+include/linux/fs.h
+kernel/ksyms.c
--- /dev/null
+fs/inode.c
+fs/block_dev.c
+fs/devfs/base.c
+fs/super.c
+include/linux/fs.h
--- /dev/null
+fs/inode.c
+fs/Makefile
+mm/filemap.c
+mm/vmscan.c
+mm/Makefile
+mm/page_alloc.c
--- /dev/null
+fs/jbd/commit.c
+fs/jbd/journal.c
+fs/jbd/transaction.c
+include/linux/jbd.h
arch/ia64/mm/init.c
include/linux/slab.h
kernel/ksyms.c
-kernel/ksyms.c.validate
mm/slab.c
--- /dev/null
+arch/i386/mm/init.c
+arch/ia64/mm/init.c
+include/linux/slab.h
+kernel/ksyms.c
+mm/slab.c
drivers/ide/ide-disk.c
fs/ext3/Makefile
fs/ext3/super.c
-fs/jbd/commit.c
-fs/jbd/journal.c
-fs/jbd/transaction.c
include/linux/blkdev.h
include/linux/slab.h
-include/linux/jbd.h
kernel/ksyms.c
include/linux/dcache.h
include/linux/fs.h
--- /dev/null
+fs/dcache.c
+fs/namei.c
+fs/nfsd/vfs.c
+fs/open.c
+fs/stat.c
+include/linux/dcache.h
+include/linux/fs.h
+kernel/ksyms.c
exports.patch
kmem_cache_validate.patch
lustre_version.patch
-vfs_intent.patch
+vfs_intent-2.4.18-18.patch
+invalidate_show.patch
+iod-rmap-exports.patch
-patch-2.4.18-hp1_pnnl18.2.8qsnet
+dev_read_only_hp.patch
+exports_hp.patch
+kmem_cache_validate_hp.patch
+jbd-transno-cb.patch
+lustre_version.patch
+vfs_intent_hp.patch
+invalidate_show.patch
uml_no_panic.patch
vfs_intent-2.4.18-18.patch
uml_compile_fixes.patch
+invalidate_show.patch
+iod-rmap-exports.patch
uml_no_panic.patch
vfs_intent.patch
uml_compile_fixes.patch
+invalidate_show.patch
vanilla-2.4.18
+invalidate_show.patch
-vanilla-2.4.19
+vanilla-2.4.19.patch
+jbd-transno-cb.patch
+invalidate_show.patch
DESC
-(undescribed patch)
+Required kernel function exports for Lustre.
EDESC
--- /dev/null
+DESC
+Required kernel function exports for Lustre.
+EDESC
--- /dev/null
+DESC
+Prints which inodes are busy at filesystem unmount time.
+EDESC
-series/rh-8.0
- redhat 2.4.18-14
- redhat 2.4.18-17
+series/chaos
+ chaos-39
series/rh-2.4.18-18
redhat 2.4.18-18
-series/hp-pnnl ** NOTE: equivalent to vanilla-2.4.18
- linux-2.4.18-hp1_pnnl18
- linux-2.4.18-hp1_pnnl19
-series/vanilla-2.4.18 ** Not officially supported
- linux-2.4.18
+series/hp-pnnl ** Note: functionally equivalent to 2.4.19
+ linux-2.4.18-hp2_pnnl2
series/vanilla-2.4.19 ** Not officially supported
linux-2.4.19
modulefs_DATA = ldlm.o
EXTRA_PROGRAMS = ldlm
-ldlm_SOURCES = l_lock.c ldlm_lock.c ldlm_resource.c ldlm_test.c ldlm_lockd.c \
+ldlm_SOURCES = l_lock.c ldlm_lock.c ldlm_resource.c ldlm_lockd.c \
ldlm_extent.c ldlm_request.c
include $(top_srcdir)/Rules
}
/* apply the internal policy by walking all the lists */
-int ldlm_extent_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
- void *req_cookie,
- ldlm_mode_t mode, int flags, void *data)
+int ldlm_extent_policy(struct ldlm_namespace *ns, struct ldlm_lock **lockp,
+ void *req_cookie, ldlm_mode_t mode, int flags,
+ void *data)
{
+ struct ldlm_lock *lock = *lockp;
struct ldlm_resource *res = lock->l_resource;
struct ldlm_extent *req_ex = req_cookie;
struct ldlm_extent new_ex;
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (c) 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
* Author: Peter Braam <braam@clusterfs.com>
* Author: Phil Schwan <phil@clusterfs.com>
*
return "creat";
case (IT_OPEN | IT_CREAT):
return "open|creat";
- case IT_MKDIR:
- return "mkdir";
- case IT_LINK:
- return "link";
- case IT_LINK2:
- return "link2";
- case IT_SYMLINK:
- return "symlink";
- case IT_UNLINK:
- return "unlink";
- case IT_RMDIR:
- return "rmdir";
- case IT_RENAME:
- return "rename";
- case IT_RENAME2:
- return "rename2";
case IT_READDIR:
return "readdir";
case IT_GETATTR:
return "getattr";
+ case IT_TRUNC:
+ return "truncate";
case IT_SETATTR:
return "setattr";
- case IT_READLINK:
- return "readlink";
- case IT_MKNOD:
- return "mknod";
case IT_LOOKUP:
return "lookup";
+ case IT_UNLINK:
+ return "unlink";
default:
CERROR("Unknown intent %d\n", it);
return "UNKNOWN";
static ldlm_res_policy ldlm_intent_policy_func;
-static int ldlm_plain_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+static int ldlm_plain_policy(struct ldlm_namespace *ns, struct ldlm_lock **lock,
void *req_cookie, ldlm_mode_t mode, int flags,
void *data)
{
EXIT;
}
-/* Only called with strict == 0 by recovery, to mark in-use locks as
- * should-be-destroyed */
+/* This used to have a 'strict' flact, which recovery would use to mark an
+ * in-use lock as needing-to-die. Lest I am ever tempted to put it back, I
+ * shall explain why it's gone: with the new hash table scheme, once you call
+ * ldlm_lock_destroy, you can never drop your final references on this lock.
+ * Because it's not in the hash table anymore. -phil */
void ldlm_lock_destroy(struct ldlm_lock *lock)
{
ENTRY;
l_lock(&lock->l_resource->lr_namespace->ns_lock);
if (!list_empty(&lock->l_children)) {
- LDLM_DEBUG(lock, "still has children (%p)!",
+ LDLM_ERROR(lock, "still has children (%p)!",
lock->l_children.next);
ldlm_lock_dump(D_ERROR, lock);
LBUG();
}
if (lock->l_readers || lock->l_writers) {
- LDLM_DEBUG(lock, "lock still has references");
- ldlm_lock_dump(D_OTHER, lock);
+ LDLM_ERROR(lock, "lock still has references");
+ ldlm_lock_dump(D_ERROR, lock);
+ LBUG();
}
if (!list_empty(&lock->l_res_link)) {
/* this is called by portals_handle2object with the handle lock taken */
static void lock_handle_addref(void *lock)
{
- ldlm_lock_get(lock);
+ LDLM_LOCK_GET((struct ldlm_lock *)lock);
}
/*
}
int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
- __u64 new_resid[3])
+ struct ldlm_res_id new_resid)
{
struct ldlm_resource *oldres = lock->l_resource;
ENTRY;
l_lock(&ns->ns_lock);
- if (memcmp(new_resid, lock->l_resource->lr_name,
+ if (memcmp(&new_resid, &lock->l_resource->lr_name,
sizeof(lock->l_resource->lr_name)) == 0) {
/* Nothing to do */
l_unlock(&ns->ns_lock);
RETURN(0);
}
- LASSERT(new_resid[0] != 0);
+ LASSERT(new_resid.name[0] != 0);
/* This function assumes that the lock isn't on any lists */
LASSERT(list_empty(&lock->l_res_link));
void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
{
- //lockh->addr = (__u64)(unsigned long)lock;
memset(&lockh->addr, 0x69, sizeof(lockh->addr));
lockh->cookie = lock->l_handle.h_cookie;
}
-/* if flags: atomically get the lock and set the flags.
+/* if flags: atomically get the lock and set the flags.
* Return NULL if flag already set
*/
/* It's unlikely but possible that someone marked the lock as
* destroyed after we did handle2object on it */
if (lock->l_destroyed) {
- CERROR("lock already destroyed: lock %p\n", lock);
+ CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
LDLM_LOCK_PUT(lock);
GOTO(out, retval);
}
}
static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
- struct ldlm_lock *new)
+ struct ldlm_lock *new,
+ void *data, int datalen)
{
struct ldlm_ast_work *w;
ENTRY;
GOTO(out, 0);
}
+ w->w_data = data;
+ w->w_datalen = datalen;
if (new) {
lock->l_flags |= LDLM_FL_AST_SENT;
w->w_blocking = 1;
w->w_lock = LDLM_LOCK_GET(lock);
list_add(&w->w_list, lock->l_resource->lr_tmp);
- out:
+ EXIT;
+ out:
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
return;
}
/* Args: unlocked lock */
int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
- __u64 *res_id, int flags);
+ struct ldlm_res_id, int flags);
-void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
{
- struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
struct ldlm_namespace *ns;
ENTRY;
- if (lock == NULL)
- LBUG();
-
LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
ns = lock->l_resource->lr_namespace;
- l_lock(&lock->l_resource->lr_namespace->ns_lock);
+ l_lock(&ns->ns_lock);
if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR) {
LASSERT(lock->l_readers > 0);
lock->l_readers--;
lock->l_writers--;
}
- /* If we received a blocked AST and this was the last reference,
- * run the callback. */
+ if (lock->l_flags & LDLM_FL_LOCAL &&
+ !lock->l_readers && !lock->l_writers) {
+ /* If this is a local lock on a server namespace and this was
+ * the last reference, cancel the lock. */
+ CDEBUG(D_INFO, "forcing cancel of local lock\n");
+ lock->l_flags |= LDLM_FL_CBPENDING;
+ }
+
if (!lock->l_readers && !lock->l_writers &&
(lock->l_flags & LDLM_FL_CBPENDING)) {
- if (!lock->l_resource->lr_namespace->ns_client &&
- lock->l_export)
+ /* If we received a blocked AST and this was the last reference,
+ * run the callback. */
+ if (!ns->ns_client && lock->l_export)
CERROR("FL_CBPENDING set on non-local lock--just a "
"warning\n");
LDLM_DEBUG(lock, "final decref done on cbpending lock");
- l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+ l_unlock(&ns->ns_lock);
/* FIXME: need a real 'desc' here */
lock->l_blocking_ast(lock, NULL, lock->l_data,
- lock->l_data_len, LDLM_CB_BLOCKING);
+ LDLM_CB_BLOCKING);
} else if (ns->ns_client && !lock->l_readers && !lock->l_writers) {
+ /* If this is a client-side namespace and this was the last
+ * reference, put it on the LRU. */
LASSERT(list_empty(&lock->l_lru));
LASSERT(ns->ns_nr_unused >= 0);
list_add_tail(&lock->l_lru, &ns->ns_unused_list);
ns->ns_nr_unused++;
- l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+ l_unlock(&ns->ns_lock);
ldlm_cancel_lru(ns);
} else {
- l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+ l_unlock(&ns->ns_lock);
}
LDLM_LOCK_PUT(lock); /* matches the ldlm_lock_get in addref */
- LDLM_LOCK_PUT(lock); /* matches the handle2lock above */
EXIT;
}
+void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
+{
+ struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
+ LASSERT(lock != NULL);
+ ldlm_lock_decref_internal(lock, mode);
+ LDLM_LOCK_PUT(lock);
+}
+
+/* This will drop a lock reference and mark it for destruction, but will not
+ * necessarily cancel the lock before returning. */
+void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
+{
+ struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
+ ENTRY;
+
+ LASSERT(lock != NULL);
+
+ LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
+ l_lock(&lock->l_resource->lr_namespace->ns_lock);
+ lock->l_flags |= LDLM_FL_CBPENDING;
+ ldlm_lock_decref_internal(lock, mode);
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+ LDLM_LOCK_PUT(lock);
+}
+
static int ldlm_lock_compat_list(struct ldlm_lock *lock, int send_cbs,
struct list_head *queue)
{
if (send_cbs && child->l_blocking_ast != NULL) {
CDEBUG(D_OTHER, "lock %p incompatible; sending "
"blocking AST.\n", child);
- ldlm_add_ast_work_item(child, lock);
+ ldlm_add_ast_work_item(child, lock, NULL, 0);
}
}
}
/* NOTE: called by
- - ldlm_handle_enqueuque - resource
-*/
-void ldlm_grant_lock(struct ldlm_lock *lock)
+ * - ldlm_lock_enqueue
+ * - ldlm_reprocess_queue
+ * - ldlm_lock_convert
+ */
+void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen)
{
struct ldlm_resource *res = lock->l_resource;
ENTRY;
if (lock->l_granted_mode < res->lr_most_restr)
res->lr_most_restr = lock->l_granted_mode;
- if (lock->l_completion_ast) {
- ldlm_add_ast_work_item(lock, NULL);
- }
+ if (lock->l_completion_ast != NULL)
+ ldlm_add_ast_work_item(lock, NULL, data, datalen);
+
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
EXIT;
}
-/* returns a referenced lock or NULL */
+/* returns a referenced lock or NULL. See the flag descriptions below, in the
+ * comment above ldlm_lock_match */
static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
struct ldlm_extent *extent,
- struct ldlm_lock *old_lock)
+ struct ldlm_lock *old_lock, int flags)
{
struct ldlm_lock *lock;
struct list_head *tmp;
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
if (lock == old_lock)
- continue;
+ break;
if (lock->l_flags & LDLM_FL_CBPENDING)
continue;
if (lock->l_destroyed)
continue;
+ if ((flags & LDLM_FL_LOCAL_ONLY) &&
+ !(lock->l_flags & LDLM_FL_LOCAL))
+ continue;
+
ldlm_lock_addref_internal(lock, mode);
return lock;
}
*
* Otherwise, all of the fields must be filled in, to match against.
*
+ * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the
+ * server (ie, connh is NULL)
+ * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted
+ * list will be considered
+ *
* Returns 1 if it finds an already-existing lock that is compatible; in this
* case, lockh is filled in with a addref()ed lock
*/
-int ldlm_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type,
- void *cookie, int cookielen, ldlm_mode_t mode,
- struct lustre_handle *lockh)
+int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
+ struct ldlm_res_id *res_id, __u32 type, void *cookie,
+ int cookielen, ldlm_mode_t mode,struct lustre_handle *lockh)
{
struct ldlm_resource *res;
struct ldlm_lock *lock, *old_lock = NULL;
LASSERT(old_lock);
ns = old_lock->l_resource->lr_namespace;
- res_id = old_lock->l_resource->lr_name;
+ res_id = &old_lock->l_resource->lr_name;
type = old_lock->l_resource->lr_type;
mode = old_lock->l_req_mode;
}
- res = ldlm_resource_get(ns, NULL, res_id, type, 0);
+ res = ldlm_resource_get(ns, NULL, *res_id, type, 0);
if (res == NULL) {
LASSERT(old_lock == NULL);
RETURN(0);
l_lock(&ns->ns_lock);
- if ((lock = search_queue(&res->lr_granted, mode, cookie, old_lock)))
+ lock = search_queue(&res->lr_granted, mode, cookie, old_lock, flags);
+ if (lock != NULL)
GOTO(out, rc = 1);
- if ((lock = search_queue(&res->lr_converting, mode, cookie, old_lock)))
+ if (flags & LDLM_FL_BLOCK_GRANTED)
+ GOTO(out, rc = 0);
+ lock = search_queue(&res->lr_converting, mode, cookie, old_lock, flags);
+ if (lock != NULL)
GOTO(out, rc = 1);
- if ((lock = search_queue(&res->lr_waiting, mode, cookie, old_lock)))
+ lock = search_queue(&res->lr_waiting, mode, cookie, old_lock, flags);
+ if (lock != NULL)
GOTO(out, rc = 1);
EXIT;
if (lock) {
ldlm_lock2handle(lock, lockh);
if (lock->l_completion_ast)
- lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC);
+ lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, NULL);
}
if (rc)
LDLM_DEBUG(lock, "matched");
/* Returns a referenced lock */
struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
struct lustre_handle *parent_lock_handle,
- __u64 * res_id, __u32 type,
- ldlm_mode_t mode, void *data, __u32 data_len)
+ struct ldlm_res_id res_id, __u32 type,
+ ldlm_mode_t mode, void *data, void *cp_data)
{
struct ldlm_resource *res, *parent_res = NULL;
struct ldlm_lock *lock, *parent_lock = NULL;
+ ENTRY;
if (parent_lock_handle) {
parent_lock = ldlm_handle2lock(parent_lock_handle);
lock->l_req_mode = mode;
lock->l_data = data;
- lock->l_data_len = data_len;
+ lock->l_cp_data = cp_data;
- return lock;
+ RETURN(lock);
}
ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
+ struct ldlm_lock **lockp,
void *cookie, int cookie_len,
int *flags,
ldlm_completion_callback completion,
ldlm_blocking_callback blocking)
{
struct ldlm_resource *res;
+ struct ldlm_lock *lock = *lockp;
int local;
ldlm_res_policy policy;
ENTRY;
if (!local && !(*flags & LDLM_FL_REPLAY) &&
(policy = ldlm_res_policy_table[res->lr_type])) {
int rc;
- rc = policy(ns, lock, cookie, lock->l_req_mode, *flags, NULL);
-
+ rc = policy(ns, lockp, cookie, lock->l_req_mode, *flags, NULL);
if (rc == ELDLM_LOCK_CHANGED) {
res = lock->l_resource;
*flags |= LDLM_FL_LOCK_CHANGED;
+ } else if (rc == ELDLM_LOCK_REPLACED) {
+ /* The lock that was returned has already been granted,
+ * and placed into lockp. Destroy the old one and our
+ * work here is done. */
+ ldlm_lock_destroy(lock);
+ LDLM_LOCK_PUT(lock);
+ *flags |= LDLM_FL_LOCK_CHANGED;
+ RETURN(0);
} else if (rc == ELDLM_LOCK_ABORTED) {
ldlm_lock_destroy(lock);
RETURN(rc);
/* The server returned a blocked lock, but it was granted before
* we got a chance to actually enqueue it. We don't need to do
* anything else. */
- *flags &= ~(LDLM_FL_BLOCK_GRANTED |
- LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
+ *flags &= ~(LDLM_FL_BLOCK_GRANTED |
+ LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
GOTO(out, ELDLM_OK);
}
ldlm_resource_unlink_lock(lock);
if (local) {
if (*flags & LDLM_FL_BLOCK_CONV)
- ldlm_resource_add_lock(res, res->lr_converting.prev,
- lock);
+ ldlm_resource_add_lock(res, &res->lr_converting, lock);
else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
- ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
+ ldlm_resource_add_lock(res, &res->lr_waiting, lock);
else
- ldlm_grant_lock(lock);
+ ldlm_grant_lock(lock, NULL, 0);
GOTO(out, ELDLM_OK);
} else if (*flags & LDLM_FL_REPLAY) {
if (*flags & LDLM_FL_BLOCK_CONV) {
- ldlm_resource_add_lock(res, res->lr_converting.prev,
- lock);
+ ldlm_resource_add_lock(res, &res->lr_converting, lock);
GOTO(out, ELDLM_OK);
} else if (*flags & LDLM_FL_BLOCK_WAIT) {
- ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
+ ldlm_resource_add_lock(res, &res->lr_waiting, lock);
GOTO(out, ELDLM_OK);
} else if (*flags & LDLM_FL_BLOCK_GRANTED) {
- ldlm_grant_lock(lock);
+ ldlm_grant_lock(lock, NULL, 0);
GOTO(out, ELDLM_OK);
}
/* If no flags, fall through to normal enqueue path. */
/* FIXME: We may want to optimize by checking lr_most_restr */
if (!list_empty(&res->lr_converting)) {
- ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
+ ldlm_resource_add_lock(res, &res->lr_waiting, lock);
*flags |= LDLM_FL_BLOCK_CONV;
GOTO(out, ELDLM_OK);
}
if (!list_empty(&res->lr_waiting)) {
- ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
+ ldlm_resource_add_lock(res, &res->lr_waiting, lock);
*flags |= LDLM_FL_BLOCK_WAIT;
GOTO(out, ELDLM_OK);
}
if (!ldlm_lock_compat(lock, 0)) {
- ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
+ ldlm_resource_add_lock(res, &res->lr_waiting, lock);
*flags |= LDLM_FL_BLOCK_GRANTED;
GOTO(out, ELDLM_OK);
}
- ldlm_grant_lock(lock);
+ if (lock->l_granted_cb != NULL && lock->l_data != NULL) {
+ /* We just -know- */
+ struct ptlrpc_request *req = lock->l_data;
+ lock->l_granted_cb(lock, req->rq_repmsg, 0);
+ }
+ ldlm_grant_lock(lock, NULL, 0);
EXIT;
out:
l_unlock(&ns->ns_lock);
RETURN(1);
list_del_init(&pending->l_res_link);
- ldlm_grant_lock(pending);
+ ldlm_grant_lock(pending, NULL, 0);
}
RETURN(0);
if (w->w_blocking)
rc = w->w_lock->l_blocking_ast
(w->w_lock, &w->w_desc, w->w_data,
- w->w_datalen, LDLM_CB_BLOCKING);
+ LDLM_CB_BLOCKING);
else
- rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags);
+ rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags,
+ w->w_data);
if (rc == -ERESTART)
retval = rc;
else if (rc)
(void)ldlm_namespace_foreach_res(ns, reprocess_one_queue, NULL);
}
-/* Must be called with resource->lr_lock not taken. */
void ldlm_reprocess_all(struct ldlm_resource *res)
{
struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
lock->l_flags |= LDLM_FL_CANCEL;
if (lock->l_blocking_ast)
lock->l_blocking_ast(lock, NULL, lock->l_data,
- lock->l_data_len,
LDLM_CB_CANCELING);
else
LDLM_DEBUG(lock, "no blocking ast");
struct ldlm_namespace *ns;
ENTRY;
+ ldlm_del_waiting_lock(lock);
+
res = lock->l_resource;
ns = res->lr_namespace;
ldlm_cancel_callback(lock);
- ldlm_del_waiting_lock(lock);
ldlm_resource_unlink_lock(lock);
ldlm_lock_destroy(lock);
l_unlock(&ns->ns_lock);
EXIT;
}
-int ldlm_lock_set_data(struct lustre_handle *lockh, void *data, int datalen)
+int ldlm_lock_set_data(struct lustre_handle *lockh, void *data, void *cp_data)
{
struct ldlm_lock *lock = ldlm_handle2lock(lockh);
ENTRY;
RETURN(-EINVAL);
lock->l_data = data;
- lock->l_data_len = datalen;
+ lock->l_cp_data = cp_data;
LDLM_LOCK_PUT(lock);
RETURN(0);
}
+/* This function is only called from one thread (per export); no locking around
+ * the list ops needed */
void ldlm_cancel_locks_for_export(struct obd_export *exp)
{
- struct list_head *iter, *n; /* MUST BE CALLED "n"! */
+ struct list_head *iter, *n;
list_for_each_safe(iter, n, &exp->exp_ldlm_data.led_held_locks) {
struct ldlm_lock *lock;
int granted = 0;
ENTRY;
+ LBUG();
+
res = lock->l_resource;
ns = res->lr_namespace;
/* If this is a local resource, put it on the appropriate list. */
if (res->lr_namespace->ns_client) {
- if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED))
- ldlm_resource_add_lock(res, res->lr_converting.prev,
- lock);
- else {
+ if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED)) {
+ ldlm_resource_add_lock(res, &res->lr_converting, lock);
+ } else {
/* This should never happen, because of the way the
* server handles conversions. */
LBUG();
res->lr_tmp = &rpc_list;
- ldlm_grant_lock(lock);
+ ldlm_grant_lock(lock, NULL, 0);
res->lr_tmp = NULL;
granted = 1;
/* FIXME: completion handling not with ns_lock held ! */
if (lock->l_completion_ast)
- lock->l_completion_ast(lock, 0);
+ lock->l_completion_ast(lock, 0, NULL);
}
} else {
/* FIXME: We should try the conversion right away and possibly
* return success without the need for an extra AST */
- ldlm_resource_add_lock(res, res->lr_converting.prev, lock);
+ ldlm_resource_add_lock(res, &res->lr_converting, lock);
*flags |= LDLM_FL_BLOCK_CONV;
}
{
char ver[128];
- if (!(portal_debug & level))
+ if (!((portal_debug | D_ERROR) & level))
return;
if (RES_VERSION_SIZE != 4)
lock->l_version[0], lock->l_version[1],
lock->l_version[2], lock->l_version[3]);
- CDEBUG(level, " -- Lock dump: %p (%s)\n", lock, ver);
+ CDEBUG(level, " -- Lock dump: %p (%s) (rc: %d)\n", lock, ver,
+ atomic_read(&lock->l_refc));
if (lock->l_export && lock->l_export->exp_connection)
CDEBUG(level, " Node: NID %x (rhandle: "LPX64")\n",
lock->l_export->exp_connection->c_peer.peer_nid,
CDEBUG(level, " Node: local\n");
CDEBUG(level, " Parent: %p\n", lock->l_parent);
CDEBUG(level, " Resource: %p ("LPD64")\n", lock->l_resource,
- lock->l_resource->lr_name[0]);
+ lock->l_resource->lr_name.name[0]);
CDEBUG(level, " Requested mode: %d, granted mode: %d\n",
(int)lock->l_req_mode, (int)lock->l_granted_mode);
CDEBUG(level, " Readers: %u ; Writers; %u\n",
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
* Author: Peter Braam <braam@clusterfs.com>
* Author: Phil Schwan <phil@clusterfs.com>
*
return ((timeout / HZ) + 1) * HZ;
}
+/* XXX should this be per-ldlm? */
static struct list_head waiting_locks_list;
static spinlock_t waiting_locks_spinlock;
static struct timer_list waiting_locks_timer;
RETURN(1);
}
-static int ldlm_server_blocking_ast(struct ldlm_lock *lock,
- struct ldlm_lock_desc *desc,
- void *data, __u32 data_len, int flag)
+int ldlm_server_blocking_ast(struct ldlm_lock *lock,
+ struct ldlm_lock_desc *desc,
+ void *data, int flag)
{
struct ldlm_request *body;
struct ptlrpc_request *req;
LASSERT(lock);
l_lock(&lock->l_resource->lr_namespace->ns_lock);
+ /* XXX This is necessary because, with the lock re-tasking, we actually
+ * _can_ get called in here twice. (bug 830) */
+ if (!list_empty(&lock->l_pending_chain)) {
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+ RETURN(0);
+ }
+
if (lock->l_destroyed) {
/* What's the point? */
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
req->rq_level = LUSTRE_CONN_RECOVD;
rc = ptlrpc_queue_wait(req);
if (rc == -ETIMEDOUT || rc == -EINTR) {
+ ldlm_del_waiting_lock(lock);
ldlm_expired_completion_wait(lock);
} else if (rc) {
CERROR("client returned %d from blocking AST for lock %p\n",
RETURN(rc);
}
-static int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags)
+int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
{
struct ldlm_request *body;
struct ptlrpc_request *req;
req->rq_level = LUSTRE_CONN_RECOVD;
rc = ptlrpc_queue_wait(req);
if (rc == -ETIMEDOUT || rc == -EINTR) {
+ ldlm_del_waiting_lock(lock);
ldlm_expired_completion_wait(lock);
} else if (rc) {
CERROR("client returned %d from completion AST for lock %p\n",
RETURN(rc);
}
-int ldlm_handle_enqueue(struct ptlrpc_request *req)
+int ldlm_handle_enqueue(struct ptlrpc_request *req,
+ ldlm_completion_callback completion_callback,
+ ldlm_blocking_callback blocking_callback)
{
struct obd_device *obddev = req->rq_export->exp_obd;
struct ldlm_reply *dlm_rep;
}
}
- /* XXX notice that this lock has no callback data: of course the
- export would be exactly what we may want to use here... */
+ /* The lock's callback data might be set in the policy function */
lock = ldlm_lock_create(obddev->obd_namespace,
&dlm_req->lock_handle2,
dlm_req->lock_desc.l_resource.lr_name,
&lock->l_export->exp_ldlm_data.led_held_locks);
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
- err = ldlm_lock_enqueue(obddev->obd_namespace, lock, cookie, cookielen,
- &flags, ldlm_server_completion_ast,
- ldlm_server_blocking_ast);
- if (err != ELDLM_OK)
+ err = ldlm_lock_enqueue(obddev->obd_namespace, &lock, cookie, cookielen,
+ &flags, completion_callback, blocking_callback);
+ if (err)
GOTO(out, err);
dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
memcpy(&dlm_rep->lock_extent, &lock->l_extent,
sizeof(lock->l_extent));
if (dlm_rep->lock_flags & LDLM_FL_LOCK_CHANGED) {
- memcpy(dlm_rep->lock_resource_name, lock->l_resource->lr_name,
+ memcpy(&dlm_rep->lock_resource_name, &lock->l_resource->lr_name,
sizeof(dlm_rep->lock_resource_name));
dlm_rep->lock_mode = lock->l_req_mode;
}
"(err=%d)", err);
req->rq_status = err;
+ /* The LOCK_CHANGED code in ldlm_lock_enqueue depends on this
+ * ldlm_reprocess_all. If this moves, revisit that code. -phil */
if (lock) {
if (!err)
ldlm_reprocess_all(lock->l_resource);
lock = ldlm_handle2lock(&dlm_req->lock_handle1);
if (!lock) {
- LDLM_DEBUG_NOLOCK("server-side cancel handler stale lock (lock "
- "%p)", (void *)(unsigned long)
- dlm_req->lock_handle1.addr);
+ CERROR("received cancel for unknown lock cookie "LPX64"\n",
+ dlm_req->lock_handle1.cookie);
+ LDLM_DEBUG_NOLOCK("server-side cancel handler stale lock "
+ "(cookie "LPU64")",
+ dlm_req->lock_handle1.cookie);
req->rq_status = ESTALE;
} else {
LDLM_DEBUG(lock, "server-side cancel handler START");
"callback (%p)", lock->l_blocking_ast);
if (lock->l_blocking_ast != NULL) {
lock->l_blocking_ast(lock, &dlm_req->lock_desc,
- lock->l_data, lock->l_data_len,
- LDLM_CB_BLOCKING);
+ lock->l_data, LDLM_CB_BLOCKING);
}
} else
LDLM_DEBUG(lock, "Lock still has references, will be"
memcpy(&lock->l_extent, &dlm_req->lock_desc.l_extent,
sizeof(lock->l_extent));
ldlm_resource_unlink_lock(lock);
- if (memcmp(dlm_req->lock_desc.l_resource.lr_name,
- lock->l_resource->lr_name,
- sizeof(__u64) * RES_NAME_SIZE) != 0) {
+ if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
+ &lock->l_resource->lr_name,
+ sizeof(lock->l_resource->lr_name)) != 0) {
ldlm_lock_change_resource(ns, lock,
dlm_req->lock_desc.l_resource.lr_name);
LDLM_DEBUG(lock, "completion AST, new resource");
}
lock->l_resource->lr_tmp = &ast_list;
- ldlm_grant_lock(lock);
+ ldlm_grant_lock(lock, req, sizeof(*req));
lock->l_resource->lr_tmp = NULL;
l_unlock(&ns->ns_lock);
LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work");
{
struct obd_device *obddev = class_conn2obd(conn);
struct ptlrpc_connection *connection;
+ struct obd_uuid uuid = { "ldlm" };
int err = 0;
ENTRY;
OBD_ALLOC(obddev->u.ldlm.ldlm_client,
sizeof(*obddev->u.ldlm.ldlm_client));
- connection = ptlrpc_uuid_to_connection("ldlm");
+ connection = ptlrpc_uuid_to_connection(&uuid);
if (!connection)
CERROR("No LDLM UUID found: assuming ldlm is local.\n");
switch (cmd) {
case IOC_LDLM_TEST:
- err = ldlm_test(obddev, conn);
- CERROR("-- done err %d\n", err);
+ //err = ldlm_test(obddev, conn);
+ err = 0;
+ CERROR("-- NO TESTS WERE RUN done err %d\n", err);
GOTO(out, err);
case IOC_LDLM_DUMP:
ldlm_dump_all_namespaces();
static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf)
{
struct ldlm_obd *ldlm = &obddev->u.ldlm;
+ struct obd_uuid uuid = {"self"};
int rc, i;
ENTRY;
ldlm->ldlm_cb_service =
ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE,
LDLM_MAXREQSIZE, LDLM_CB_REQUEST_PORTAL,
- LDLM_CB_REPLY_PORTAL, "self",
+ LDLM_CB_REPLY_PORTAL, &uuid,
ldlm_callback_handler, "ldlm_cbd");
if (!ldlm->ldlm_cb_service) {
ldlm->ldlm_cancel_service =
ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE,
LDLM_MAXREQSIZE, LDLM_CANCEL_REQUEST_PORTAL,
- LDLM_CANCEL_REPLY_PORTAL, "self",
+ LDLM_CANCEL_REPLY_PORTAL, &uuid,
ldlm_cancel_handler, "ldlm_canceld");
if (!ldlm->ldlm_cancel_service) {
}
static int ldlm_connect(struct lustre_handle *conn, struct obd_device *src,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
return class_connect(conn, src, cluuid);
CERROR("couldn't free ldlm lock slab\n");
}
-EXPORT_SYMBOL(ldlm_completion_ast);
-EXPORT_SYMBOL(ldlm_handle_enqueue);
-EXPORT_SYMBOL(ldlm_handle_cancel);
-EXPORT_SYMBOL(ldlm_handle_convert);
+/* ldlm_lock.c */
+EXPORT_SYMBOL(ldlm_lock2desc);
EXPORT_SYMBOL(ldlm_register_intent);
EXPORT_SYMBOL(ldlm_unregister_intent);
EXPORT_SYMBOL(ldlm_lockname);
EXPORT_SYMBOL(ldlm_typename);
-EXPORT_SYMBOL(__ldlm_handle2lock);
EXPORT_SYMBOL(ldlm_lock2handle);
+EXPORT_SYMBOL(__ldlm_handle2lock);
EXPORT_SYMBOL(ldlm_lock_put);
EXPORT_SYMBOL(ldlm_lock_match);
+EXPORT_SYMBOL(ldlm_lock_cancel);
EXPORT_SYMBOL(ldlm_lock_addref);
EXPORT_SYMBOL(ldlm_lock_decref);
+EXPORT_SYMBOL(ldlm_lock_decref_and_cancel);
EXPORT_SYMBOL(ldlm_lock_change_resource);
EXPORT_SYMBOL(ldlm_lock_set_data);
+EXPORT_SYMBOL(ldlm_it2str);
+EXPORT_SYMBOL(ldlm_lock_dump);
+EXPORT_SYMBOL(ldlm_lock_dump_handle);
+EXPORT_SYMBOL(ldlm_cancel_locks_for_export);
+EXPORT_SYMBOL(ldlm_reprocess_all_ns);
+
+/* ldlm_request.c */
+EXPORT_SYMBOL(ldlm_completion_ast);
+EXPORT_SYMBOL(ldlm_expired_completion_wait);
EXPORT_SYMBOL(ldlm_cli_convert);
EXPORT_SYMBOL(ldlm_cli_enqueue);
EXPORT_SYMBOL(ldlm_cli_cancel);
EXPORT_SYMBOL(ldlm_cli_cancel_unused);
EXPORT_SYMBOL(ldlm_match_or_enqueue);
-EXPORT_SYMBOL(ldlm_it2str);
+EXPORT_SYMBOL(ldlm_replay_locks);
+EXPORT_SYMBOL(ldlm_resource_foreach);
+EXPORT_SYMBOL(ldlm_namespace_foreach);
+EXPORT_SYMBOL(ldlm_namespace_foreach_res);
+
+/* ldlm_lockd.c */
+EXPORT_SYMBOL(ldlm_server_blocking_ast);
+EXPORT_SYMBOL(ldlm_server_completion_ast);
+EXPORT_SYMBOL(ldlm_handle_enqueue);
+EXPORT_SYMBOL(ldlm_handle_cancel);
+EXPORT_SYMBOL(ldlm_handle_convert);
+EXPORT_SYMBOL(ldlm_del_waiting_lock);
+
+#if 0
+/* ldlm_test.c */
EXPORT_SYMBOL(ldlm_test);
EXPORT_SYMBOL(ldlm_regression_start);
EXPORT_SYMBOL(ldlm_regression_stop);
-EXPORT_SYMBOL(ldlm_lock_dump);
-EXPORT_SYMBOL(ldlm_lock_dump_handle);
+#endif
+
+/* ldlm_resource.c */
EXPORT_SYMBOL(ldlm_namespace_new);
EXPORT_SYMBOL(ldlm_namespace_cleanup);
EXPORT_SYMBOL(ldlm_namespace_free);
EXPORT_SYMBOL(ldlm_namespace_dump);
-EXPORT_SYMBOL(ldlm_cancel_locks_for_export);
-EXPORT_SYMBOL(ldlm_replay_locks);
-EXPORT_SYMBOL(ldlm_resource_foreach);
-EXPORT_SYMBOL(ldlm_reprocess_all_ns);
-EXPORT_SYMBOL(ldlm_namespace_foreach);
-EXPORT_SYMBOL(ldlm_namespace_foreach_res);
+
+/* l_lock.c */
EXPORT_SYMBOL(l_lock);
EXPORT_SYMBOL(l_unlock);
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
else {
LDLM_DEBUG(lock, "timed out waiting for completion");
CERROR("lock %p timed out from %s\n", lock,
- conn->c_remote_uuid);
+ conn->c_remote_uuid.uuid);
ldlm_lock_dump(D_ERROR, lock);
class_signal_connection_failure(conn);
}
RETURN(0);
}
-int ldlm_completion_ast(struct ldlm_lock *lock, int flags)
+int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data)
{
struct l_wait_info lwi =
LWI_TIMEOUT_INTR(obd_timeout * HZ, ldlm_expired_completion_wait,
static int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
struct lustre_handle *parent_lockh,
- __u64 *res_id,
+ struct ldlm_res_id res_id,
__u32 type,
void *cookie, int cookielen,
ldlm_mode_t mode,
ldlm_completion_callback completion,
ldlm_blocking_callback blocking,
void *data,
- __u32 data_len,
+ void *cp_data,
struct lustre_handle *lockh)
{
struct ldlm_lock *lock;
LBUG();
}
- lock = ldlm_lock_create(ns, parent_lockh, res_id, type, mode, data,
- data_len);
+ lock = ldlm_lock_create(ns, parent_lockh, res_id, type, mode,
+ data, cp_data);
if (!lock)
GOTO(out_nolock, err = -ENOMEM);
LDLM_DEBUG(lock, "client-side local enqueue handler, new lock created");
ldlm_lock_addref_internal(lock, mode);
ldlm_lock2handle(lock, lockh);
- lock->l_connh = NULL;
+ lock->l_flags |= LDLM_FL_LOCAL;
- err = ldlm_lock_enqueue(ns, lock, cookie, cookielen, flags, completion,
+ err = ldlm_lock_enqueue(ns, &lock, cookie, cookielen, flags, completion,
blocking);
if (err != ELDLM_OK)
GOTO(out, err);
if (type == LDLM_EXTENT)
memcpy(cookie, &lock->l_extent, sizeof(lock->l_extent));
if ((*flags) & LDLM_FL_LOCK_CHANGED)
- memcpy(res_id, lock->l_resource->lr_name, sizeof(*res_id));
+ memcpy(&res_id, &lock->l_resource->lr_name, sizeof(res_id));
LDLM_DEBUG_NOLOCK("client-side local enqueue handler END (lock %p)",
lock);
if (lock->l_completion_ast)
- lock->l_completion_ast(lock, *flags);
+ lock->l_completion_ast(lock, *flags, NULL);
LDLM_DEBUG(lock, "client-side local enqueue END");
EXIT;
struct ptlrpc_request *req,
struct ldlm_namespace *ns,
struct lustre_handle *parent_lock_handle,
- __u64 *res_id,
+ struct ldlm_res_id res_id,
__u32 type,
void *cookie, int cookielen,
ldlm_mode_t mode,
ldlm_completion_callback completion,
ldlm_blocking_callback blocking,
void *data,
- __u32 data_len,
+ void *cp_data,
struct lustre_handle *lockh)
{
struct ldlm_lock *lock;
is_replay = *flags & LDLM_FL_REPLAY;
LASSERT(connh != NULL || !is_replay);
- if (connh == NULL)
- return ldlm_cli_enqueue_local(ns, parent_lock_handle, res_id,
- type, cookie, cookielen, mode,
- flags, completion, blocking, data,
- data_len, lockh);
+ if (connh == NULL) {
+ rc = ldlm_cli_enqueue_local(ns, parent_lock_handle, res_id,
+ type, cookie, cookielen, mode,
+ flags, completion, blocking, data,
+ cp_data, lockh);
+ RETURN(rc);
+ }
/* If we're replaying this lock, just check some invariants.
* If we're creating a new lock, get everything all setup nice. */
LASSERT(connh == lock->l_connh);
} else {
lock = ldlm_lock_create(ns, parent_lock_handle, res_id, type,
- mode, data, data_len);
+ mode, data, cp_data);
if (lock == NULL)
GOTO(out_nolock, rc = -ENOMEM);
+ /* ugh. I set this early (instead of waiting for _enqueue)
+ * because the completion AST might arrive early, and we need
+ * (in just this one case) to run the completion_cb even if it
+ * arrives before the reply. */
+ lock->l_completion_ast = completion;
LDLM_DEBUG(lock, "client-side enqueue START");
/* for the local lock, add the reference */
ldlm_lock_addref_internal(lock, mode);
LASSERT(!is_replay);
LDLM_DEBUG(lock, "client-side enqueue END (%s)",
rc == ELDLM_LOCK_ABORTED ? "ABORTED" : "FAILED");
+ /* Set a flag to prevent us from sending a CANCEL (bug 407) */
+ l_lock(&ns->ns_lock);
+ lock->l_flags |= LDLM_FL_CANCELING;
+ l_unlock(&ns->ns_lock);
+
ldlm_lock_decref(lockh, mode);
- /* FIXME: if we've already received a completion AST, this will
- * LBUG! */
ldlm_lock_destroy(lock);
GOTO(out_req, rc);
}
lock->l_req_mode = newmode;
}
- if (reply->lock_resource_name[0] !=
- lock->l_resource->lr_name[0]) {
+ if (reply->lock_resource_name.name[0] !=
+ lock->l_resource->lr_name.name[0]) {
CDEBUG(D_INFO, "remote intent success, locking %ld "
"instead of %ld\n",
- (long)reply->lock_resource_name[0],
- (long)lock->l_resource->lr_name[0]);
+ (long)reply->lock_resource_name.name[0],
+ (long)lock->l_resource->lr_name.name[0]);
ldlm_lock_change_resource(ns, lock,
reply->lock_resource_name);
}
if (!is_replay) {
- rc = ldlm_lock_enqueue(ns, lock, cookie, cookielen, flags,
+ l_lock(&ns->ns_lock);
+ lock->l_completion_ast = NULL;
+ rc = ldlm_lock_enqueue(ns, &lock, cookie, cookielen, flags,
completion, blocking);
+ l_unlock(&ns->ns_lock);
if (lock->l_completion_ast)
- lock->l_completion_ast(lock, *flags);
+ lock->l_completion_ast(lock, *flags, NULL);
}
LDLM_DEBUG(lock, "client-side enqueue END");
struct ptlrpc_request *req,
struct ldlm_namespace *ns,
struct lustre_handle *parent_lock_handle,
- __u64 *res_id,
+ struct ldlm_res_id res_id,
__u32 type,
void *cookie, int cookielen,
ldlm_mode_t mode,
ldlm_completion_callback completion,
ldlm_blocking_callback blocking,
void *data,
- __u32 data_len,
+ void *cp_data,
struct lustre_handle *lockh)
{
int rc;
ENTRY;
- rc = ldlm_lock_match(ns, res_id, type, cookie, cookielen, mode, lockh);
+ if (connh == NULL) {
+ /* Just to make sure that I understand things --phil */
+ LASSERT(*flags & LDLM_FL_LOCAL_ONLY);
+ }
+
+ LDLM_DEBUG_NOLOCK("resource "LPU64"/"LPU64, res_id.name[0],
+ res_id.name[1]);
+ rc = ldlm_lock_match(ns, *flags, &res_id, type, cookie, cookielen, mode,
+ lockh);
if (rc == 0) {
- rc = ldlm_cli_enqueue(connh, req, ns,
- parent_lock_handle, res_id, type, cookie,
- cookielen, mode, flags, completion,
- blocking, data, data_len, lockh);
+ rc = ldlm_cli_enqueue(connh, req, ns, parent_lock_handle,
+ res_id, type, cookie, cookielen, mode,
+ flags, completion, blocking, data,
+ cp_data, lockh);
if (rc != ELDLM_OK)
CERROR("ldlm_cli_enqueue: err: %d\n", rc);
RETURN(rc);
- } else
- RETURN(0);
+ }
+ RETURN(0);
}
int ldlm_cli_replay_enqueue(struct ldlm_lock *lock)
{
struct lustre_handle lockh;
+ struct ldlm_res_id junk;
int flags = LDLM_FL_REPLAY;
ldlm_lock2handle(lock, &lockh);
- return ldlm_cli_enqueue(lock->l_connh, NULL, NULL, NULL, NULL,
+ return ldlm_cli_enqueue(lock->l_connh, NULL, NULL, NULL, junk,
lock->l_resource->lr_type, NULL, 0, -1, &flags,
NULL, NULL, NULL, 0, &lockh);
}
/* Go to sleep until the lock is granted. */
/* FIXME: or cancelled. */
if (lock->l_completion_ast)
- lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC);
+ lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, NULL);
EXIT;
out:
LDLM_LOCK_PUT(lock);
RETURN(0);
if (lock->l_connh) {
+ int local_only;
+
LDLM_DEBUG(lock, "client-side cancel");
/* Set this flag to prevent others from getting new references*/
l_lock(&lock->l_resource->lr_namespace->ns_lock);
lock->l_flags |= LDLM_FL_CBPENDING;
ldlm_cancel_callback(lock);
+ local_only = (lock->l_flags & LDLM_FL_LOCAL_ONLY);
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+ if (local_only) {
+ CDEBUG(D_INFO, "not sending request (at caller's "
+ "instruction\n");
+ goto local_cancel;
+ }
+
req = ptlrpc_prep_req(class_conn2cliimp(lock->l_connh),
LDLM_CANCEL, 1, &size, NULL);
if (!req)
rc = ptlrpc_queue_wait(req);
ptlrpc_req_finished(req);
+ if (rc == ESTALE) {
+ CERROR("client/server out of sync\n");
+ LBUG();
+ }
if (rc != ELDLM_OK)
- GOTO(out, rc);
-
+ CERROR("Got rc %d from cancel RPC: canceling "
+ "anyway\n", rc);
+ local_cancel:
ldlm_lock_cancel(lock);
} else {
LDLM_DEBUG(lock, "client-side local cancel");
LDLM_DEBUG(lock, "client-side local cancel handler END");
}
- lock->l_flags |= LDLM_FL_CANCELING;
-
EXIT;
out:
LDLM_LOCK_PUT(lock);
}
int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
- __u64 *res_id, int flags)
+ struct ldlm_res_id res_id, int flags)
{
struct ldlm_resource *res;
struct list_head *tmp, *next, list = LIST_HEAD_INIT(list);
res = ldlm_resource_get(ns, NULL, res_id, 0, 0);
if (res == NULL) {
/* This is not a problem. */
- CDEBUG(D_INFO, "No resource "LPU64"\n", res_id[0]);
+ CDEBUG(D_INFO, "No resource "LPU64"\n", res_id.name[0]);
RETURN(0);
}
*
* If 'local_only' is true, throw the locks away without trying to notify the
* server. */
-int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, __u64 *res_id,
- int flags)
+int ldlm_cli_cancel_unused(struct ldlm_namespace *ns,
+ struct ldlm_res_id *res_id, int flags)
{
int i;
ENTRY;
RETURN(ELDLM_OK);
if (res_id)
- RETURN(ldlm_cli_cancel_unused_resource(ns, res_id, flags));
+ RETURN(ldlm_cli_cancel_unused_resource(ns, *res_id, flags));
l_lock(&ns->ns_lock);
for (i = 0; i < RES_HASH_SIZE; i++) {
if (rc)
CERROR("cancel_unused_res ("LPU64"): %d\n",
- res->lr_name[0], rc);
+ res->lr_name.name[0], rc);
ldlm_resource_putref(res);
}
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Peter Braam <braam@clusterfs.com>
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * This file is part of Lustre, http://www.lustre.org.
*
- * by Cluster File Systems, Inc.
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define DEBUG_SUBSYSTEM S_LDLM
int ldlm_proc_setup(struct obd_device *obd)
{
+ int rc;
ENTRY;
LASSERT(ldlm_ns_proc_dir == NULL);
- ldlm_ns_proc_dir = obd->obd_type->typ_procroot;
+ rc = lprocfs_obd_attach(obd, 0);
+ if (rc) {
+ CERROR("LProcFS failed in ldlm-init\n");
+ RETURN(rc);
+ }
+ ldlm_ns_proc_dir = obd->obd_proc_entry;
RETURN(0);
}
void ldlm_proc_cleanup(struct obd_device *obd)
{
- ldlm_ns_proc_dir = NULL;
+ if (ldlm_ns_proc_dir) {
+ lprocfs_obd_detach(obd);
+ ldlm_ns_proc_dir = NULL;
+ }
}
static int lprocfs_uint_rd(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
unsigned int *temp = (unsigned int *)data;
- int len;
- len = snprintf(page, count, "%u\n", *temp);
- return len;
+ return snprintf(page, count, "%u\n", *temp);
}
-#define MAX_STRING_SIZE 100
+#define MAX_STRING_SIZE 128
void ldlm_proc_namespace(struct ldlm_namespace *ns)
{
struct lprocfs_vars lock_vars[2];
- char lock_names[MAX_STRING_SIZE + 1];
+ char lock_name[MAX_STRING_SIZE + 1];
+
+ lock_name[MAX_STRING_SIZE] = '\0';
memset(lock_vars, 0, sizeof(lock_vars));
- snprintf(lock_names, MAX_STRING_SIZE, "%s/resource_count", ns->ns_name);
- lock_names[MAX_STRING_SIZE] = '\0';
- lock_vars[0].name = lock_names;
- lock_vars[0].read_fptr = lprocfs_ll_rd;
- lock_vars[0].write_fptr = NULL;
+ lock_vars[0].read_fptr = lprocfs_rd_u64;
+
+ lock_vars[0].name = lock_name;
+
+ snprintf(lock_name, MAX_STRING_SIZE, "%s/resource_count", ns->ns_name);
+
lock_vars[0].data = &ns->ns_resources;
lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
- memset(lock_vars, 0, sizeof(lock_vars));
- snprintf(lock_names, MAX_STRING_SIZE, "%s/lock_count", ns->ns_name);
- lock_names[MAX_STRING_SIZE] = '\0';
- lock_vars[0].name = lock_names;
- lock_vars[0].read_fptr = lprocfs_ll_rd;
- lock_vars[0].write_fptr = NULL;
+ snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_count", ns->ns_name);
lock_vars[0].data = &ns->ns_locks;
lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
- memset(lock_vars, 0, sizeof(lock_vars));
- snprintf(lock_names, MAX_STRING_SIZE, "%s/lock_unused_count",
+ snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_unused_count",
ns->ns_name);
- lock_names[MAX_STRING_SIZE] = '\0';
- lock_vars[0].name = lock_names;
- lock_vars[0].read_fptr = lprocfs_uint_rd;
- lock_vars[0].write_fptr = NULL;
lock_vars[0].data = &ns->ns_nr_unused;
+ lock_vars[0].read_fptr = lprocfs_uint_rd;
lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
}
#undef MAX_STRING_SIZE
/* If 'local_only' is true, don't try to tell the server, just cleanup.
* This is currently only used for recovery, and we make certain assumptions
- * as a result--notably, that we shouldn't cancel locks with refs. -phil */
+ * as a result--notably, that we shouldn't cancel locks with refs. -phil
+ *
+ * Called with the ns_lock held. */
static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
int local_only)
{
* will go away ... */
lock->l_flags |= LDLM_FL_CBPENDING;
/* ... without sending a CANCEL message. */
- lock->l_flags |= LDLM_FL_CANCELING;
+ lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+ /* ... and without calling the cancellation callback */
+ lock->l_flags |= LDLM_FL_CANCEL;
LDLM_LOCK_PUT(lock);
continue;
}
ldlm_lock_cancel(lock);
} else {
LDLM_DEBUG(lock, "Freeing a lock still held by a "
- "client node.\n");
+ "client node");
ldlm_resource_unlink_lock(lock);
ldlm_lock_destroy(lock);
RETURN(0);
}
-static __u32 ldlm_hash_fn(struct ldlm_resource *parent, __u64 *name)
+static __u32 ldlm_hash_fn(struct ldlm_resource *parent, struct ldlm_res_id name)
{
__u32 hash = 0;
int i;
for (i = 0; i < RES_NAME_SIZE; i++)
- hash += name[i];
+ hash += name.name[i];
hash += (__u32)((unsigned long)parent >> 4);
/* Args: locked namespace
* Returns: newly-allocated, referenced, unlocked resource */
-static struct ldlm_resource *ldlm_resource_add(struct ldlm_namespace *ns,
- struct ldlm_resource *parent,
- __u64 *name, __u32 type)
+static struct ldlm_resource *
+ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent,
+ struct ldlm_res_id name, __u32 type)
{
struct list_head *bucket;
struct ldlm_resource *res;
spin_unlock(&ns->ns_counter_lock);
l_lock(&ns->ns_lock);
- memcpy(res->lr_name, name, sizeof(res->lr_name));
+ memcpy(&res->lr_name, &name, sizeof(res->lr_name));
res->lr_namespace = ns;
ns->ns_refcount++;
/* Args: unlocked namespace
* Locks: takes and releases ns->ns_lock and res->lr_lock
* Returns: referenced, unlocked ldlm_resource or NULL */
-struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns,
- struct ldlm_resource *parent,
- __u64 *name, __u32 type, int create)
+struct ldlm_resource *
+ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
+ struct ldlm_res_id name, __u32 type, int create)
{
struct list_head *bucket, *tmp;
struct ldlm_resource *res = NULL;
list_for_each(tmp, bucket) {
res = list_entry(tmp, struct ldlm_resource, lr_hash);
- if (memcmp(res->lr_name, name, sizeof(res->lr_name)) == 0) {
+ if (memcmp(&res->lr_name, &name, sizeof(res->lr_name)) == 0) {
ldlm_resource_getref(res);
l_unlock(&ns->ns_lock);
RETURN(res);
l_lock(&res->lr_namespace->ns_lock);
ldlm_resource_dump(res);
- CDEBUG(D_OTHER, "About to grant this lock:\n");
+ CDEBUG(D_OTHER, "About to add this lock:\n");
ldlm_lock_dump(D_OTHER, lock);
+ if (lock->l_destroyed) {
+ CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
+ return;
+ }
+
LASSERT(list_empty(&lock->l_res_link));
- list_add(&lock->l_res_link, head);
+ list_add_tail(&lock->l_res_link, head);
l_unlock(&res->lr_namespace->ns_lock);
}
void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc)
{
desc->lr_type = res->lr_type;
- memcpy(desc->lr_name, res->lr_name, sizeof(desc->lr_name));
+ memcpy(&desc->lr_name, &res->lr_name, sizeof(desc->lr_name));
memcpy(desc->lr_version, res->lr_version, sizeof(desc->lr_version));
}
LBUG();
snprintf(name, sizeof(name), "%Lx %Lx %Lx",
- (unsigned long long)res->lr_name[0],
- (unsigned long long)res->lr_name[1],
- (unsigned long long)res->lr_name[2]);
+ (unsigned long long)res->lr_name.name[0],
+ (unsigned long long)res->lr_name.name[1],
+ (unsigned long long)res->lr_name.name[2]);
CDEBUG(D_OTHER, "--- Resource: %p (%s) (rc: %d)\n", res, name,
atomic_read(&res->lr_refcount));
*/
static int ldlm_test_blocking_ast(struct ldlm_lock *lock,
struct ldlm_lock_desc *new,
- void *data, __u32 data_len, int flag)
+ void *data, int flag)
{
int rc;
struct lustre_handle lockh;
/* blocking ast for basic tests. noop */
static int ldlm_blocking_ast(struct ldlm_lock *lock,
struct ldlm_lock_desc *new,
- void *data, __u32 data_len, int flag)
+ void *data, int flag)
{
ENTRY;
CERROR("ldlm_blocking_ast: lock=%p, new=%p, flag=%d\n", lock, new,
/* Completion ast for regression test.
* Does not sleep when blocked.
*/
-static int ldlm_test_completion_ast(struct ldlm_lock *lock, int flags)
+static int ldlm_test_completion_ast(struct ldlm_lock *lock, int flags, void *data)
{
struct ldlm_test_lock *lock_info;
ENTRY;
{
struct ldlm_namespace *ns;
struct ldlm_resource *res;
- __u64 res_id[RES_NAME_SIZE] = {1, 2, 3};
+ struct ldlm_res_id res_id = { .name = {1, 2, 3} };
ldlm_error_t err;
struct ldlm_lock *lock1, *lock;
int flags;
struct ldlm_namespace *ns;
struct ldlm_resource *res;
struct ldlm_lock *lock, *lock1, *lock2;
- __u64 res_id[RES_NAME_SIZE] = {0, 0, 0};
+ struct ldlm_res_id res_id = { .name = {0} };
struct ldlm_extent ext1 = {4, 6}, ext2 = {6, 9}, ext3 = {10, 11};
ldlm_error_t err;
int flags;
static int ldlm_test_network(struct obd_device *obddev,
struct lustre_handle *connh)
{
-
- __u64 res_id[RES_NAME_SIZE] = {1, 2, 3};
+ struct ldlm_res_id res_id = { .name = {1, 2, 3} };
struct ldlm_extent ext = {4, 6};
struct lustre_handle lockh1;
struct ldlm_lock *lock;
static int ldlm_do_enqueue(struct ldlm_test_thread *thread)
{
struct lustre_handle lockh;
- __u64 res_id[3] = {0};
+ struct ldlm_res_id res_id = { .name = {0} };
__u32 lock_mode;
struct ldlm_extent ext;
unsigned char random;
/* Pick a random resource from 1 to num_resources */
get_random_bytes(&random, sizeof(random));
- res_id[0] = random % num_resources;
+ res_id.name[0] = random % num_resources;
/* Pick a random lock mode */
get_random_bytes(&random, sizeof(random));
(num_extents - (int)ext.start) + ext.start;
LDLM_DEBUG_NOLOCK("about to enqueue with resource "LPX64", mode %d,"
- " extent "LPX64" -> "LPX64, res_id[0], lock_mode,
+ " extent "LPX64" -> "LPX64, res_id.name[0], lock_mode,
ext.start, ext.end);
rc = ldlm_match_or_enqueue(®ress_connh, NULL,
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
* Author: Peter J. Braam <braam@clusterfs.com>
* Author: Phil Schwan <phil@clusterfs.com>
* Author: Mike Shaver <shaver@clusterfs.com>
return &export->exp_obd->u.cli;
}
-struct obd_device *client_tgtuuid2obd(char *tgtuuid)
+struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid)
{
int i;
if ((strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0) ||
(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0)) {
struct client_obd *cli = &obd->u.cli;
- if (strncmp(tgtuuid, cli->cl_target_uuid,
- sizeof(cli->cl_target_uuid)) == 0)
+ if (strncmp(tgtuuid->uuid, cli->cl_target_uuid.uuid,
+ sizeof(cli->cl_target_uuid.uuid)) == 0)
return obd;
}
}
char *name;
struct client_obd *cli = &obddev->u.cli;
struct obd_import *imp = &cli->cl_import;
- obd_uuid_t server_uuid;
+ struct obd_uuid server_uuid;
ENTRY;
if (obddev->obd_type->typ_ops->o_brw) {
sema_init(&cli->cl_sem, 1);
cli->cl_conn_count = 0;
- memcpy(cli->cl_target_uuid, data->ioc_inlbuf1, data->ioc_inllen1);
- memcpy(server_uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2,
+ memcpy(cli->cl_target_uuid.uuid, data->ioc_inlbuf1, data->ioc_inllen1);
+ memcpy(server_uuid.uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2,
sizeof(server_uuid)));
- imp->imp_connection = ptlrpc_uuid_to_connection(server_uuid);
+ imp->imp_connection = ptlrpc_uuid_to_connection(&server_uuid);
if (!imp->imp_connection)
RETURN(-ENOENT);
}
int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
struct client_obd *cli = &obd->u.cli;
struct ptlrpc_request *request;
int rc, size[] = {sizeof(cli->cl_target_uuid),
sizeof(obd->obd_uuid) };
- char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid};
+ char *tmp[] = {cli->cl_target_uuid.uuid, obd->obd_uuid.uuid};
int rq_opc = (obd->obd_type->typ_ops->o_brw) ? OST_CONNECT :MDS_CONNECT;
struct ptlrpc_connection *c;
struct obd_import *imp = &cli->cl_import;
+ int msg_flags;
ENTRY;
down(&cli->cl_sem);
INIT_LIST_HEAD(&imp->imp_chain);
imp->imp_last_xid = 0;
imp->imp_max_transno = 0;
- imp->imp_peer_last_xid = 0;
imp->imp_peer_committed_transno = 0;
request = ptlrpc_prep_req(&cli->cl_import, rq_opc, 2, size, tmp);
if (rc)
GOTO(out_req, rc);
- if (rq_opc == MDS_CONNECT)
+ msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
+ if (rq_opc == MDS_CONNECT || msg_flags & MSG_CONNECT_REPLAYABLE) {
imp->imp_flags |= IMP_REPLAYABLE;
+ CDEBUG(D_HA, "connected to replayable target: %s\n", cli->cl_target_uuid.uuid);
+ }
imp->imp_level = LUSTRE_CONN_FULL;
imp->imp_handle.addr = request->rq_repmsg->addr;
imp->imp_handle.cookie = request->rq_repmsg->cookie;
if (cli->cl_conn_count)
GOTO(out_no_disconnect, rc = 0);
- ldlm_namespace_free(obd->obd_namespace);
- obd->obd_namespace = NULL;
- request = ptlrpc_prep_req(&cli->cl_import, rq_opc, 0, NULL,
- NULL);
+ if (obd->obd_namespace != NULL) {
+ ldlm_cli_cancel_unused(obd->obd_namespace, NULL, 0);
+ ldlm_namespace_free(obd->obd_namespace);
+ obd->obd_namespace = NULL;
+ }
+ request = ptlrpc_prep_req(&cli->cl_import, rq_opc, 0, NULL, NULL);
if (!request)
GOTO(out_req, rc = -ENOMEM);
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copryright (C) 2002 Cluster File Systems, Inc.
+ * Lustre Lite Update Records
*
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * This file is part of Lustre, http://www.lustre.org.
*
- * Lustre Lite Update Records
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/config.h>
#include <linux/errno.h>
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
#include <linux/locks.h> // for wait_on_buffer
-#else
+#else
#include <linux/buffer_head.h> // for wait_on_buffer
#endif
#include <linux/unistd.h>
void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
{
b->valid = OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME |
- OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLUID | OBD_MD_FLGID |
- OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER;
+ OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
+ OBD_MD_FLNLINK | OBD_MD_FLGENER;
b->ino = HTON__u32(inode->i_ino);
b->atime = HTON__u32(inode->i_atime);
b->mtime = HTON__u32(inode->i_mtime);
b->ctime = HTON__u32(inode->i_ctime);
b->mode = HTON__u32(inode->i_mode);
b->size = HTON__u64(inode->i_size);
+ b->blocks = HTON__u64(inode->i_blocks);
b->uid = HTON__u32(inode->i_uid);
b->gid = HTON__u32(inode->i_gid);
b->flags = HTON__u32(inode->i_flags);
b->rdev = HTON__u32(b->rdev);
b->nlink = HTON__u32(inode->i_nlink);
b->generation = HTON__u32(inode->i_generation);
+ b->suppgid = HTON__u32(-1);
}
b->rdev = HTON__u32(b->rdev);
b->nlink = HTON__u32(b->nlink);
b->generation = HTON__u32(b->generation);
+ b->suppgid = HTON__u32(b->suppgid);
}
-void mds_getattr_pack(struct ptlrpc_request *req, int offset,
- struct inode *inode,
- const char *name, int namelen)
+void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
+ int flags,
+ struct inode *inode, const char *name, int namelen)
{
struct mds_body *b;
b = lustre_msg_buf(req->rq_reqmsg, offset);
b->fsuid = HTON__u32(current->fsuid);
b->fsgid = HTON__u32(current->fsgid);
b->capability = HTON__u32(current->cap_effective);
+ b->valid = HTON__u32(valid);
+ b->flags = HTON__u32(flags);
+ if (in_group_p(inode->i_gid))
+ b->suppgid = HTON__u32(inode->i_gid);
+ else
+ b->suppgid = HTON__u32(-1);
ll_inode2fid(&b->fid1, inode);
if (name) {
}
void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset,
- obd_id ino, int type)
+ obd_id ino, int type, __u64 xid)
{
struct mds_body *b;
b->fid1.id = HTON__u64(ino);
b->fid1.f_type = HTON__u32(type);
b->size = HTON__u64(offset);
+ b->suppgid = HTON__u32(-1);
+ b->blocks = HTON__u64(xid);
}
char *tmp;
rec = lustre_msg_buf(req->rq_reqmsg, offset);
- /* XXX do something about time, uid, gid */
rec->cr_opcode = HTON__u32(REINT_CREATE);
rec->cr_fsuid = HTON__u32(current->fsuid);
rec->cr_fsgid = HTON__u32(current->fsgid);
LOGL0(data, datalen, tmp);
}
}
+/* packing of MDS records */
+void mds_open_pack(struct ptlrpc_request *req, int offset, struct inode *dir,
+ __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
+ __u32 flags,
+ const char *name, int namelen,
+ const void *data, int datalen)
+{
+ struct mds_rec_create *rec;
+ char *tmp;
+ rec = lustre_msg_buf(req->rq_reqmsg, offset);
+
+ /* XXX do something about time, uid, gid */
+ rec->cr_opcode = HTON__u32(REINT_OPEN);
+ rec->cr_fsuid = HTON__u32(current->fsuid);
+ rec->cr_fsgid = HTON__u32(current->fsgid);
+ rec->cr_cap = HTON__u32(current->cap_effective);
+ ll_inode2fid(&rec->cr_fid, dir);
+ memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
+ rec->cr_mode = HTON__u32(mode);
+ rec->cr_flags = HTON__u32(flags);
+ rec->cr_rdev = HTON__u64(rdev);
+ rec->cr_uid = HTON__u32(uid);
+ rec->cr_gid = HTON__u32(gid);
+ rec->cr_time = HTON__u64(time);
+ if (in_group_p(dir->i_gid))
+ rec->cr_suppgid = HTON__u32(dir->i_gid);
+ else
+ rec->cr_suppgid = HTON__u32(-1);
+
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1);
+ LOGL0(name, namelen, tmp);
+
+ if (data) {
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2);
+ LOGL0(data, datalen, tmp);
+ }
+}
-void mds_setattr_pack(struct ptlrpc_request *req, int offset,
+void mds_setattr_pack(struct ptlrpc_request *req,
struct inode *inode, struct iattr *iattr,
- const char *name, int namelen)
+ void *ea, int ealen)
{
- struct mds_rec_setattr *rec;
- rec = lustre_msg_buf(req->rq_reqmsg, offset);
+ struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, 0);
rec->sa_opcode = HTON__u32(REINT_SETATTR);
rec->sa_fsuid = HTON__u32(current->fsuid);
rec->sa_fsgid = HTON__u32(current->fsgid);
rec->sa_cap = HTON__u32(current->cap_effective);
ll_inode2fid(&rec->sa_fid, inode);
- rec->sa_valid = HTON__u32(iattr->ia_valid);
- rec->sa_mode = HTON__u32(iattr->ia_mode);
- rec->sa_uid = HTON__u32(iattr->ia_uid);
- rec->sa_gid = HTON__u32(iattr->ia_gid);
- rec->sa_size = HTON__u64(iattr->ia_size);
- rec->sa_atime = HTON__u64(iattr->ia_atime);
- rec->sa_mtime = HTON__u64(iattr->ia_mtime);
- rec->sa_ctime = HTON__u64(iattr->ia_ctime);
- rec->sa_attr_flags = HTON__u32(iattr->ia_attr_flags);
-
- if (namelen) {
- char *tmp;
- tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1);
- LOGL0(name, namelen, tmp);
+
+ if (iattr) {
+ rec->sa_valid = HTON__u32(iattr->ia_valid);
+ rec->sa_mode = HTON__u32(iattr->ia_mode);
+ rec->sa_uid = HTON__u32(iattr->ia_uid);
+ rec->sa_gid = HTON__u32(iattr->ia_gid);
+ rec->sa_size = HTON__u64(iattr->ia_size);
+ rec->sa_atime = HTON__u64(iattr->ia_atime);
+ rec->sa_mtime = HTON__u64(iattr->ia_mtime);
+ rec->sa_ctime = HTON__u64(iattr->ia_ctime);
+ rec->sa_attr_flags = HTON__u32(iattr->ia_attr_flags);
+
+ if ((iattr->ia_valid & ATTR_GID) && in_group_p(iattr->ia_gid))
+ rec->sa_suppgid = HTON__u32(iattr->ia_gid);
+ else if ((iattr->ia_valid & ATTR_MODE) &&
+ in_group_p(inode->i_gid))
+ rec->sa_suppgid = HTON__u32(inode->i_gid);
+ else
+ rec->sa_suppgid = HTON__u32(-1);
}
+
+ if (ealen)
+ memcpy(lustre_msg_buf(req->rq_reqmsg, 1), ea, ealen);
}
void mds_unlink_pack(struct ptlrpc_request *req, int offset,
rec->ul_fsgid = HTON__u32(current->fsgid);
rec->ul_cap = HTON__u32(current->cap_effective);
rec->ul_mode = HTON__u32(mode);
+ if (in_group_p(inode->i_gid))
+ rec->ul_suppgid = HTON__u32(inode->i_gid);
+ else
+ rec->ul_suppgid = HTON__u32(-1);
ll_inode2fid(&rec->ul_fid1, inode);
if (child)
ll_inode2fid(&rec->ul_fid2, child);
rec->lk_fsuid = HTON__u32(current->fsuid);
rec->lk_fsgid = HTON__u32(current->fsgid);
rec->lk_cap = HTON__u32(current->cap_effective);
+ if (in_group_p(dir->i_gid))
+ rec->lk_suppgid = HTON__u32(dir->i_gid);
+ else
+ rec->lk_suppgid = HTON__u32(-1);
ll_inode2fid(&rec->lk_fid1, inode);
ll_inode2fid(&rec->lk_fid2, dir);
mds_unpack_fid(&b->fid1);
mds_unpack_fid(&b->fid2);
b->size = NTOH__u64(b->size);
+ b->blocks = NTOH__u64(b->blocks);
b->valid = NTOH__u32(b->valid);
b->fsuid = NTOH__u32(b->fsuid);
b->fsgid = NTOH__u32(b->fsgid);
b->rdev = NTOH__u32(b->rdev);
b->nlink = NTOH__u32(b->nlink);
b->generation = NTOH__u32(b->generation);
+ b->suppgid = NTOH__u32(b->suppgid);
}
static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
r->ur_fsuid = NTOH__u32(rec->sa_fsuid);
r->ur_fsgid = NTOH__u32(rec->sa_fsgid);
r->ur_cap = NTOH__u32(rec->sa_cap);
+ r->ur_suppgid = NTOH__u32(rec->sa_suppgid);
r->ur_fid1 = &rec->sa_fid;
attr->ia_valid = NTOH__u32(rec->sa_valid);
attr->ia_mode = NTOH__u32(rec->sa_mode);
if (req->rq_reqmsg->bufcount == offset + 2) {
r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
r->ur_name = lustre_msg_buf(req->rq_reqmsg, offset + 1);
- } else
+ } else {
r->ur_namelen = 0;
+ }
RETURN(0);
}
r->ur_uid = NTOH__u32(rec->cr_uid);
r->ur_gid = NTOH__u32(rec->cr_gid);
r->ur_time = NTOH__u64(rec->cr_time);
+ r->ur_flags = NTOH__u32(rec->cr_flags);
+ r->ur_suppgid = NTOH__u32(rec->cr_suppgid);
r->ur_name = lustre_msg_buf(req->rq_reqmsg, offset + 1);
r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
r->ur_fsuid = NTOH__u32(rec->lk_fsuid);
r->ur_fsgid = NTOH__u32(rec->lk_fsgid);
r->ur_cap = NTOH__u32(rec->lk_cap);
+ r->ur_suppgid = NTOH__u32(rec->lk_suppgid);
r->ur_fid1 = &rec->lk_fid1;
r->ur_fid2 = &rec->lk_fid2;
r->ur_fsgid = NTOH__u32(rec->ul_fsgid);
r->ur_cap = NTOH__u32(rec->ul_cap);
r->ur_mode = NTOH__u32(rec->ul_mode);
+ r->ur_suppgid = NTOH__u32(rec->ul_suppgid);
r->ur_fid1 = &rec->ul_fid1;
r->ur_fid2 = &rec->ul_fid2;
[REINT_LINK] mds_link_unpack,
[REINT_UNLINK] mds_unlink_unpack,
[REINT_RENAME] mds_rename_unpack,
+ [REINT_OPEN] mds_create_unpack,
};
int mds_update_unpack(struct ptlrpc_request *req, int offset,
realop = rec->ur_opcode = NTOH__u32(*opcode);
realop &= REINT_OPCODE_MASK;
- if (realop < 0 || realop > REINT_MAX)
+ if (realop < 0 || realop > REINT_MAX) {
+ LBUG();
RETURN(-EFAULT);
+ }
rc = mds_unpackers[realop](req, offset, rec);
RETURN(rc);
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * lib/simple.c
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Aurhot: Andreas Dilger <adilger@clusterfs.com>
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * This file is part of Lustre, http://www.lustre.org.
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
*
- * by Peter Braam <braam@clusterfs.com>
- * and Andreas Dilger <adilger@clusterfs.com>
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define EXPORT_SYMTAB
current->fsuid = uc->ouc_fsuid;
current->fsgid = uc->ouc_fsgid;
current->cap_effective = uc->ouc_cap;
+ if (uc->ouc_suppgid != -1)
+ current->groups[current->ngroups++] = uc->ouc_suppgid;
}
set_fs(new_ctx->fs);
set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
current->fsuid = saved->fsuid;
current->fsgid = saved->fsgid;
current->cap_effective = saved->cap;
+
+ if (uc->ouc_suppgid != -1)
+ current->ngroups--;
}
/*
ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
- down(&dir->d_inode->i_sem);
dchild = lookup_one_len(name, dir, strlen(name));
if (IS_ERR(dchild))
GOTO(out_up, dchild);
if (err)
GOTO(out_err, err);
- up(&dir->d_inode->i_sem);
RETURN(dchild);
out_err:
dput(dchild);
dchild = ERR_PTR(err);
out_up:
- up(&dir->d_inode->i_sem);
return dchild;
}
ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
- down(&dir->d_inode->i_sem);
dchild = lookup_one_len(name, dir, strlen(name));
if (IS_ERR(dchild))
GOTO(out_up, dchild);
if (err)
GOTO(out_err, err);
- up(&dir->d_inode->i_sem);
RETURN(dchild);
out_err:
dput(dchild);
dchild = ERR_PTR(err);
out_up:
- up(&dir->d_inode->i_sem);
return dchild;
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
* Author: Peter J. Braam <braam@clusterfs.com>
* Author: Phil Schwan <phil@clusterfs.com>
* Author: Mike Shaver <shaver@clusterfs.com>
#include <linux/lustre_dlm.h>
int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
- char *cluuid)
+ struct obd_uuid *cluuid)
{
if (exp->exp_connection) {
struct lustre_handle *hdl;
hdl = &exp->exp_ldlm_data.led_import.imp_handle;
/* Might be a re-connect after a partition. */
if (!memcmp(conn, hdl, sizeof *conn)) {
- CERROR("%s reconnecting\n", cluuid);
+ CERROR("%s reconnecting\n", cluuid->uuid);
conn->addr = (__u64) (unsigned long)exp;
conn->cookie = exp->exp_cookie;
RETURN(EALREADY);
} else {
CERROR("%s reconnecting from %s, "
"handle mismatch (ours "LPX64"/"LPX64", "
- "theirs "LPX64"/"LPX64")\n", cluuid,
- exp->exp_connection->c_remote_uuid, hdl->addr,
+ "theirs "LPX64"/"LPX64")\n", cluuid->uuid,
+ exp->exp_connection->c_remote_uuid.uuid,
+ hdl->addr,
hdl->cookie, conn->addr, conn->cookie);
/* XXX disconnect them here? */
memset(conn, 0, sizeof *conn);
conn->addr = (__u64) (unsigned long)exp;
conn->cookie = exp->exp_cookie;
- CDEBUG(D_INFO, "existing export for UUID '%s' at %p\n", cluuid, exp);
+ CDEBUG(D_INFO, "existing export for UUID '%s' at %p\n", cluuid->uuid, exp);
CDEBUG(D_IOCTL,"connect: addr %Lx cookie %Lx\n",
(long long)conn->addr, (long long)conn->cookie);
RETURN(0);
int target_handle_connect(struct ptlrpc_request *req)
{
struct obd_device *target;
- struct obd_export *export;
+ struct obd_export *export = NULL;
struct obd_import *dlmimp;
struct lustre_handle conn;
- char *tgtuuid, *cluuid;
+ struct obd_uuid tgtuuid;
+ struct obd_uuid cluuid;
+ struct list_head *p;
int rc, i;
ENTRY;
- tgtuuid = lustre_msg_buf(req->rq_reqmsg, 0);
if (req->rq_reqmsg->buflens[0] > 37) {
CERROR("bad target UUID for connect\n");
GOTO(out, rc = -EINVAL);
}
+ obd_str2uuid(&tgtuuid, lustre_msg_buf(req->rq_reqmsg, 0));
- cluuid = lustre_msg_buf(req->rq_reqmsg, 1);
if (req->rq_reqmsg->buflens[1] > 37) {
CERROR("bad client UUID for connect\n");
GOTO(out, rc = -EINVAL);
}
+ obd_str2uuid(&cluuid, lustre_msg_buf(req->rq_reqmsg, 1));
- i = class_uuid2dev(tgtuuid);
+ i = class_uuid2dev(&tgtuuid);
if (i == -1) {
- CERROR("UUID '%s' not found for connect\n", tgtuuid);
+ CERROR("UUID '%s' not found for connect\n", tgtuuid.uuid);
GOTO(out, rc = -ENODEV);
}
conn.addr = req->rq_reqmsg->addr;
conn.cookie = req->rq_reqmsg->cookie;
- rc = obd_connect(&conn, target, cluuid, ptlrpc_recovd,
- target_revoke_connection);
- /* EALREADY indicates a reconnection, send the reply normally. */
- if (rc && rc != EALREADY)
+ rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+ if (rc)
GOTO(out, rc);
+ /* lctl gets a backstage, all-access pass. */
+ if (!strcmp(cluuid.uuid, "OBD_CLASS_UUID"))
+ goto dont_check_exports;
+
+ spin_lock(&target->obd_dev_lock);
+ list_for_each(p, &target->obd_exports) {
+ export = list_entry(p, struct obd_export, exp_obd_chain);
+ if (!memcmp(&cluuid, &export->exp_client_uuid,
+ sizeof(export->exp_client_uuid))) {
+ spin_unlock(&target->obd_dev_lock);
+ LASSERT(export->exp_obd == target);
+
+ rc = target_handle_reconnect(&conn, export, &cluuid);
+ break;
+ }
+ export = NULL;
+ }
+ /* If we found an export, we already unlocked. */
+ if (!export)
+ spin_unlock(&target->obd_dev_lock);
+
+ /* Tell the client if we're in recovery. */
+ if (target->obd_flags & OBD_RECOVERING)
+ lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
+
+ /* Tell the client if we support replayable requests */
+ if (target->obd_flags & OBD_REPLAYABLE)
+ lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_REPLAYABLE);
+
+ if (!export) {
+ if (target->obd_flags & OBD_RECOVERING) {
+ CERROR("denying connection for new client %s: "
+ "in recovery\n", cluuid.uuid);
+ rc = -EBUSY;
+ } else {
+ dont_check_exports:
+ rc = obd_connect(&conn, target, &cluuid, ptlrpc_recovd,
+ target_revoke_connection);
+ }
+ }
+
+ if (rc == EALREADY) {
+ /* We indicate the reconnection in a flag, not an error code. */
+ lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT);
+ rc = 0;
+ } else if (rc) {
+ GOTO(out, rc);
+ }
+
/* If all else goes well, this is our RPC return code. */
req->rq_status = rc;
- rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
- if (rc)
- GOTO(out, rc);
req->rq_repmsg->addr = conn.addr;
req->rq_repmsg->cookie = conn.cookie;
LASSERT(export);
req->rq_export = export;
- export->exp_connection = ptlrpc_get_connection(&req->rq_peer, cluuid);
+ export->exp_connection = ptlrpc_get_connection(&req->rq_peer, &cluuid);
if (req->rq_connection != NULL)
ptlrpc_put_connection(req->rq_connection);
req->rq_connection = ptlrpc_connection_addref(export->exp_connection);
RETURN(rc);
req->rq_status = obd_disconnect(conn);
-
+ req->rq_export = NULL;
RETURN(0);
}
int target_revoke_connection(struct recovd_data *rd, int phase)
{
struct ptlrpc_connection *conn = class_rd2conn(rd);
-
+
LASSERT(conn);
ENTRY;
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
#include <linux/lustre_idl.h>
#include <linux/lustre_dlm.h>
-extern struct address_space_operations ll_aops;
-
+/* should NOT be called with the dcache lock, see fs/dcache.c */
void ll_release(struct dentry *de)
{
ENTRY;
-
OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
EXIT;
}
-extern void d_delete_aliases(struct inode *);
+void ll_set_dd(struct dentry *de)
+{
+ ENTRY;
+ LASSERT(de != NULL);
+
+ lock_kernel();
+
+ if (de->d_fsdata == NULL) {
+ OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
+ sema_init(&ll_d2d(de)->lld_it_sem, 1);
+ }
+
+ unlock_kernel();
+
+ EXIT;
+}
+
void ll_intent_release(struct dentry *de, struct lookup_intent *it)
{
struct lustre_handle *handle;
ENTRY;
LASSERT(ll_d2d(de) != NULL);
+ mdc_put_rpc_lock(&mdc_rpc_lock, it);
if (it->it_lock_mode) {
handle = (struct lustre_handle *)it->it_lock_handle;
- if (it->it_op == IT_SETATTR) {
- int rc;
- ldlm_lock_decref(handle, it->it_lock_mode);
- rc = ldlm_cli_cancel(handle);
- if (rc < 0)
- CERROR("ldlm_cli_cancel: %d\n", rc);
- } else
+ if (it->it_op == IT_SETATTR)
+ ldlm_lock_decref_and_cancel(handle, it->it_lock_mode);
+ else
ldlm_lock_decref(handle, it->it_lock_mode);
- /* intent_release may be called multiple times, and we don't
- * want to double-decref this lock (see bug 494) */
+ /* intent_release may be called multiple times, from
+ this thread and we don't want to double-decref this
+ lock (see bug 494) */
it->it_lock_mode = 0;
}
if (de->d_it == it)
LL_GET_INTENT(de, it);
+ else
+ CERROR("STRANGE intent release: %p %p\n", de->d_it, it);
EXIT;
}
extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
static int revalidate2_finish(int flag, struct ptlrpc_request *request,
- struct dentry **de,
- struct lookup_intent *it,
- int offset, obd_id ino)
+ struct dentry **de, struct lookup_intent *it,
+ int offset, obd_id ino)
{
- ldlm_lock_set_data((struct lustre_handle *)it->it_lock_handle,
- (*de)->d_inode, sizeof(*((*de)->d_inode)));
+ struct mds_body *body;
+ struct lov_mds_md *lmm = NULL;
+ int rc = 0;
+ ENTRY;
+
+ if (!(flag & LL_LOOKUP_NEGATIVE)) {
+ body = lustre_msg_buf(request->rq_repmsg, offset);
+ if (body->valid & OBD_MD_FLEASIZE)
+ lmm = lustre_msg_buf(request->rq_repmsg, offset + 1);
+ ll_update_inode((*de)->d_inode, body, lmm);
+ mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle,
+ (*de)->d_inode);
+ } else
+ rc = -ENOENT;
+
ptlrpc_req_finished(request);
- return 0;
+ RETURN(rc);
}
int ll_have_md_lock(struct dentry *de)
{
struct ll_sb_info *sbi = ll_s2sbi(de->d_sb);
struct lustre_handle lockh;
- __u64 res_id[RES_NAME_SIZE] = {0};
+ struct ldlm_res_id res_id = { .name = {0} };
struct obd_device *obddev;
ENTRY;
RETURN(0);
obddev = class_conn2obd(&sbi->ll_mdc_conn);
- res_id[0] = de->d_inode->i_ino;
- res_id[1] = de->d_inode->i_generation;
+ res_id.name[0] = de->d_inode->i_ino;
+ res_id.name[1] = de->d_inode->i_generation;
- CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id[0]);
+ CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
- if (ldlm_lock_match(obddev->obd_namespace, res_id, LDLM_PLAIN,
- NULL, 0, LCK_PR, &lockh)) {
+ if (ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
+ &res_id, LDLM_PLAIN, NULL, 0, LCK_PR, &lockh)) {
ldlm_lock_decref(&lockh, LCK_PR);
RETURN(1);
}
- if (ldlm_lock_match(obddev->obd_namespace, res_id, LDLM_PLAIN,
- NULL, 0, LCK_PW, &lockh)) {
+ if (ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
+ &res_id, LDLM_PLAIN, NULL, 0, LCK_PW, &lockh)) {
ldlm_lock_decref(&lockh, LCK_PW);
RETURN(1);
}
RETURN(0);
}
+ if (it && it->it_op == IT_TRUNC)
+ it->it_op = IT_SETATTR;
+
+ if (it == NULL || it->it_op == IT_GETATTR) {
+ /* We could just return 1 immediately, but since we should only
+ * be called in revalidate2 if we already have a lock, let's
+ * verify that. */
+ struct inode *inode = de->d_inode;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct obd_device *obddev = class_conn2obd(&sbi->ll_mdc_conn);
+ struct ldlm_res_id res_id =
+ { .name = {inode->i_ino, (__u64)inode->i_generation} };
+ struct lustre_handle lockh;
+ rc = ldlm_lock_match(obddev->obd_namespace,
+ LDLM_FL_BLOCK_GRANTED, &res_id,
+ LDLM_PLAIN, NULL, 0, LCK_PR, &lockh);
+ if (rc) {
+ de->d_flags &= ~DCACHE_LUSTRE_INVALID;
+ if (it && it->it_op == IT_GETATTR) {
+ memcpy(it->it_lock_handle, &lockh,
+ sizeof(lockh));
+ it->it_lock_mode = LCK_PR;
+ LL_SAVE_INTENT(de, it);
+ } else {
+ ldlm_lock_decref(&lockh, LCK_PR);
+ }
+ RETURN(1);
+ }
+ rc = ldlm_lock_match(obddev->obd_namespace,
+ LDLM_FL_BLOCK_GRANTED, &res_id,
+ LDLM_PLAIN, NULL, 0, LCK_PW, &lockh);
+ if (rc) {
+ de->d_flags &= ~DCACHE_LUSTRE_INVALID;
+ if (it && it->it_op == IT_GETATTR) {
+ memcpy(it->it_lock_handle, &lockh,
+ sizeof(lockh));
+ it->it_lock_mode = LCK_PW;
+ LL_SAVE_INTENT(de, it);
+ } else {
+ ldlm_lock_decref(&lockh, LCK_PW);
+ }
+ RETURN(1);
+ }
+ if (S_ISDIR(de->d_inode->i_mode))
+ ll_invalidate_inode_pages(de->d_inode);
+ d_unhash_aliases(de->d_inode);
+ RETURN(0);
+ }
+
rc = ll_intent_lock(de->d_parent->d_inode, &de, it, revalidate2_finish);
- if (rc < 0) {
- /* Something bad happened; overwrite it_status? */
- CERROR("ll_intent_lock: %d\n", rc);
+ if (rc == -ESTALE)
+ RETURN(0);
+ if (rc < 0 && it->it_status) {
+ CERROR("ll_intent_lock: rc %d : it->it_status %d\n", rc,
+ it->it_status);
+ RETURN(0);
}
/* unfortunately ll_intent_lock may cause a callback and revoke our
dentry */
RETURN(1);
}
-int ll_set_dd(struct dentry *de)
-{
- ENTRY;
- LASSERT(de != NULL);
-
- lock_kernel();
-
- if (de->d_fsdata != NULL) {
- CERROR("dentry %p already has d_fsdata set\n", de);
- } else {
- OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
- sema_init(&ll_d2d(de)->lld_it_sem, 1);
- }
-
- unlock_kernel();
-
- RETURN(0);
-}
-
struct dentry_operations ll_d_ops = {
.d_revalidate2 = ll_revalidate2,
.d_intent_release = ll_intent_release,
* and moved here. AV
*
* Adapted for Lustre Light
- * Copyright (C) 2002, Cluster File Systems, Inc.
+ * Copyright (C) 2002-2003, Cluster File Systems, Inc.
*
*/
ENTRY;
if ((inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT <= page->index){
+ /* XXX why do we need this exactly, and why do we think that
+ * an all-zero directory page is useful?
+ */
+ CERROR("memsetting dir page %lu to zero (size %lld)\n",
+ page->index, inode->i_size);
memset(kmap(page), 0, PAGE_CACHE_SIZE);
kunmap(page);
GOTO(readpage_out, rc);
request = (struct ptlrpc_request *)it.it_data;
if (request)
ptlrpc_req_finished(request);
- if (rc != ELDLM_OK) {
+ if (rc < 0) {
CERROR("lock enqueue: err: %d\n", rc);
unlock_page(page);
RETURN(rc);
SetPageUptodate(page);
unlock_page(page);
- rc = ll_unlock(LCK_PR, &lockh);
+ ll_unlock(LCK_PR, &lockh);
+ mdc_put_rpc_lock(&mdc_rpc_lock, &it);
if (rc != ELDLM_OK)
CERROR("ll_unlock: err: %d\n", rc);
return rc;
limit = dir->i_size & ~PAGE_CACHE_MASK;
if (limit & (chunk_size - 1)) {
CERROR("limit %d dir size %lld index %ld\n",
- limit, dir->i_size, page->index);
+ limit, dir->i_size, page->index);
goto Ebadsize;
}
for (offs = limit; offs<PAGE_CACHE_SIZE; offs += chunk_size) {
// error = "inode out of bounds";
bad_entry:
CERROR("ext2_check_page: bad entry in directory #%lu: %s - "
- "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
- dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+ "offset=%lu+%u, inode=%lu, rec_len=%d, name_len=%d",
+ dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT), offs,
(unsigned long) le32_to_cpu(p->inode),
rec_len, p->name_len);
goto fail;
LBUG();
}
-static struct page * ll_get_page(struct inode *dir, unsigned long n)
+static struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
{
struct address_space *mapping = dir->i_mapping;
struct page *page = read_cache_page(mapping, n,
char *kaddr, *limit;
ext2_dirent *de;
struct page *page;
-
- page = ll_get_page(inode, n);
+
+ CDEBUG(D_EXT2, "reading %lu of dir %lu page %lu, size %llu\n",
+ PAGE_CACHE_SIZE, inode->i_ino, n, inode->i_size);
+ page = ll_get_dir_page(inode, n);
/* size might have been updated by mdc_readpage */
npages = dir_pages(inode);
offset = (char *)de - kaddr;
over = filldir(dirent, de->name, de->name_len,
- (n<<PAGE_CACHE_SHIFT) | offset,
- le32_to_cpu(de->inode), d_type);
+ (n<<PAGE_CACHE_SHIFT) | offset,
+ le32_to_cpu(de->inode), d_type);
if (over) {
ext2_put_page(page);
GOTO(done,0);
n = start;
do {
char *kaddr;
- page = ll_get_page(dir, n);
+ page = ll_get_dir_page(dir, n);
if (!IS_ERR(page)) {
kaddr = page_address(page);
de = (ext2_dirent *) kaddr;
struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
{
- struct page *page = ll_get_page(dir, 0);
+ struct page *page = ll_get_dir_page(dir, 0);
ext2_dirent *de = NULL;
if (!IS_ERR(page)) {
/* We take care of directory expansion in the same loop */
for (n = 0; n <= npages; n++) {
- page = ll_get_page(dir, n);
+ page = ll_get_dir_page(dir, n);
err = PTR_ERR(page);
if (IS_ERR(page))
goto out;
for (i = 0; i < npages; i++) {
char *kaddr;
ext2_dirent * de;
- page = ll_get_page(inode, i);
+ page = ll_get_dir_page(inode, i);
if (IS_ERR(page))
continue;
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * linux/fs/ext2/file.c
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Andreas Dilger <adilger@clusterfs.com>
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * This file is part of Lustre, http://www.lustre.org.
*
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
*
- * from
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
*
- * linux/fs/minix/file.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * ext2 fs regular file handling primitives
- *
- * 64-bit file support on 64-bit platforms by Jakub Jelinek
- * (jj@sunsite.ms.mff.cuni.cz)
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define DEBUG_SUBSYSTEM S_LLITE
int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
extern int ll_setattr(struct dentry *de, struct iattr *attr);
-static int ll_mdc_open(struct lustre_handle *mdc_conn, struct inode *inode,
- struct file *file, struct lov_mds_md *lmm, int lmm_size)
-{
- struct ptlrpc_request *req = NULL;
- struct ll_file_data *fd;
- int rc;
- ENTRY;
-
- LASSERT(!file->private_data);
-
- fd = kmem_cache_alloc(ll_file_data_slab, SLAB_KERNEL);
- if (!fd)
- RETURN(-ENOMEM);
-
- memset(fd, 0, sizeof(*fd));
- fd->fd_mdshandle.addr = (__u64)(unsigned long)file;
- get_random_bytes(&fd->fd_mdshandle.cookie,
- sizeof(fd->fd_mdshandle.cookie));
-
- rc = mdc_open(mdc_conn, inode->i_ino, S_IFREG | inode->i_mode,
- file->f_flags, lmm, lmm_size, &fd->fd_mdshandle, &req);
-
- /* This is the "reply" refcount. */
- ptlrpc_req_finished(req);
-
- if (rc)
- GOTO(out_fd, rc);
-
- fd->fd_req = req;
- file->private_data = fd;
-
- if (!fd->fd_mdshandle.addr ||
- fd->fd_mdshandle.addr == (__u64)(unsigned long)file) {
- CERROR("hmm, mdc_open didn't assign fd_mdshandle?\n");
- /* XXX handle this how, abort or is it non-fatal? */
- }
-
- file->f_flags &= ~O_LOV_DELAY_CREATE;
- RETURN(0);
-
-out_fd:
- fd->fd_mdshandle.cookie = DEAD_HANDLE_MAGIC;
- kmem_cache_free(ll_file_data_slab, fd);
-
- return -abs(rc);
-}
-
static int ll_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode,
struct file *file)
{
struct ll_file_data *fd = file->private_data;
struct ptlrpc_request *req = NULL;
unsigned long flags;
- struct obd_import *imp = fd->fd_req->rq_import;
+ struct obd_import *imp;
int rc;
+ ENTRY;
/* Complete the open request and remove it from replay list */
- DEBUG_REQ(D_HA, fd->fd_req, "matched open req %p", fd->fd_req);
rc = mdc_close(&ll_i2sbi(inode)->ll_mdc_conn, inode->i_ino,
inode->i_mode, &fd->fd_mdshandle, &req);
-
if (rc)
CERROR("inode %lu close failed: rc = %d\n", inode->i_ino, rc);
- ptlrpc_req_finished(req);
+ imp = fd->fd_req->rq_import;
+ LASSERT(imp != NULL);
spin_lock_irqsave(&imp->imp_lock, flags);
+
+ DEBUG_REQ(D_HA, fd->fd_req, "matched open req %p", fd->fd_req);
+
+ /* We held on to the request for replay until we saw a close for that
+ * file. Now that we've closed it, it gets replayed on the basis of
+ * its transno only. */
+ fd->fd_req->rq_flags &= ~PTL_RPC_FL_REPLAY;
+
if (fd->fd_req->rq_transno) {
- /* This caused an EA to be written, need to replay as a normal
- * transaction now. Our reference is now effectively owned
- * by the imp_replay_list, and we'll be committed just like
- * other transno-having requests now.
- */
- fd->fd_req->rq_flags &= ~PTL_RPC_FL_REPLAY;
+ /* This open created a file, so it needs replay as a
+ * normal transaction now. Our reference to it now
+ * effectively owned by the imp_replay_list, and it'll
+ * be committed just like other transno-having
+ * requests from here on out. */
+
+ /* We now retain this close request, so that it is
+ * replayed if the open is replayed. We duplicate the
+ * transno, so that we get freed at the right time,
+ * and rely on the difference in xid to keep
+ * everything ordered correctly.
+ *
+ * But! If this close was already given a transno
+ * (because it caused real unlinking of an
+ * open-unlinked file, f.e.), then we'll be ordered on
+ * the basis of that and we don't need to do anything
+ * magical here. */
+ if (!req->rq_transno) {
+ req->rq_transno = fd->fd_req->rq_transno;
+ ptlrpc_retain_replayable_request(req, imp);
+ }
spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+ /* Should we free_committed now? we always free before
+ * replay, so it's probably a wash. We could check to
+ * see if the fd_req should already be committed, in
+ * which case we can avoid the whole retain_replayable
+ * dance. */
} else {
/* No transno means that we can just drop our ref. */
spin_unlock_irqrestore(&imp->imp_lock, flags);
ptlrpc_req_finished(fd->fd_req);
}
+
+ /* Do this after the fd_req->rq_transno check, because we don't want
+ * to bounce off zero references. */
+ ptlrpc_req_finished(req);
fd->fd_mdshandle.cookie = DEAD_HANDLE_MAGIC;
file->private_data = NULL;
kmem_cache_free(ll_file_data_slab, fd);
- return -abs(rc);
+ RETURN(-abs(rc));
+}
+
+/* While this returns an error code, fput() the caller does not, so we need
+ * to make every effort to clean up all of our state here. Also, applications
+ * rarely check close errors and even if an error is returned they will not
+ * re-try the close call.
+ */
+static int ll_file_release(struct inode *inode, struct file *file)
+{
+ struct ll_file_data *fd;
+ struct obdo oa;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct lov_stripe_md *lsm = lli->lli_smd;
+ int rc = 0, rc2;
+
+ ENTRY;
+
+ fd = (struct ll_file_data *)file->private_data;
+ if (!fd) /* no process opened the file after an mcreate */
+ RETURN(rc = 0);
+
+ if (lsm != NULL) {
+ memset(&oa, 0, sizeof(oa));
+ oa.o_id = lsm->lsm_object_id;
+ oa.o_mode = S_IFREG;
+ oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
+ obd_handle2oa(&oa, &fd->fd_osthandle);
+ rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
+ if (rc)
+ CERROR("inode %lu object close failed: rc = %d\n",
+ inode->i_ino, rc);
+ }
+
+ mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+ rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
+ mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
+ if (rc2 && !rc)
+ rc = rc2;
+
+ if (atomic_dec_and_test(&lli->lli_open_count)) {
+ CDEBUG(D_INFO, "last close, cancelling unused locks\n");
+ rc2 = obd_cancel_unused(&sbi->ll_osc_conn, lsm, 0);
+ if (rc2 && !rc) {
+ rc = rc2;
+ CERROR("obd_cancel_unused: %d\n", rc);
+ }
+ } else
+ CDEBUG(D_INFO, "not last close, not cancelling unused locks\n");
+
+ RETURN(rc);
+}
+
+static int ll_local_open(struct file *file, struct lookup_intent *it)
+{
+ struct ptlrpc_request *req = it->it_data;
+ struct ll_file_data *fd;
+ struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
+ ENTRY;
+
+ LASSERT(!file->private_data);
+
+ fd = kmem_cache_alloc(ll_file_data_slab, SLAB_KERNEL);
+ /* We can't handle this well without reorganizing ll_file_open and
+ * ll_mdc_close, so don't even try right now. */
+ LASSERT(fd != NULL);
+
+ memset(fd, 0, sizeof(*fd));
+
+ memcpy(&fd->fd_mdshandle, &body->handle, sizeof(body->handle));
+ fd->fd_req = it->it_data;
+ file->private_data = fd;
+
+ RETURN(0);
}
static int ll_osc_open(struct lustre_handle *conn, struct inode *inode,
struct file *file, struct lov_stripe_md *lsm)
{
- struct ll_file_data *fd;
+ struct ll_file_data *fd = file->private_data;
struct obdo *oa;
int rc;
ENTRY;
oa->o_id = lsm->lsm_object_id;
oa->o_mode = S_IFREG;
oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
- OBD_MD_FLBLOCKS;
- rc = obd_open(conn, oa, lsm);
+ OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
+ rc = obd_open(conn, oa, lsm, NULL);
if (rc)
GOTO(out, rc);
- obdo_to_inode(inode, oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+ file->f_flags &= ~O_LOV_DELAY_CREATE;
+ obdo_to_inode(inode, oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
- fd = file->private_data;
obd_oa2handle(&fd->fd_osthandle, oa);
atomic_inc(&ll_i2info(inode)->lli_open_count);
* the mdc open was successful (hence stored stripe MD on MDS), otherwise
* other nodes could try to create different objects for the same file.
*/
-static int ll_create_open_obj(struct lustre_handle *conn, struct inode *inode,
- struct file *file, struct lov_stripe_md *lsm)
+static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
+ struct file *file, struct lov_stripe_md *lsm)
{
+ struct ptlrpc_request *req = NULL;
struct ll_inode_info *lli = ll_i2info(inode);
struct lov_mds_md *lmm = NULL;
int lmm_size = 0;
oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
OBD_MD_FLUID | OBD_MD_FLGID;
- rc = obd_create(conn, oa, &lsm);
+ rc = obd_create(conn, oa, &lsm, NULL);
if (rc) {
CERROR("error creating objects for inode %lu: rc = %d\n",
inode->i_ino, rc);
+ if (rc > 0) {
+ CERROR("obd_create returned invalid rc %d\n", rc);
+ rc = -EIO;
+ }
GOTO(out_oa, rc);
}
lmm_size = rc;
- rc = ll_mdc_open(&ll_i2sbi(inode)->ll_mdc_conn,inode,file,lmm,lmm_size);
+ /* Save the stripe MD with this file on the MDS */
+ rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, inode, NULL,
+ lmm, lmm_size, &req);
+ ptlrpc_req_finished(req);
obd_free_wiremd(conn, &lmm);
* MDS, we need to destroy the objects now or they will be leaked.
*/
if (rc) {
- CERROR("error MDS opening %lu with delayed create: rc %d\n",
+ CERROR("error: storing stripe MD for %lu: rc %d\n",
inode->i_ino, rc);
GOTO(out_destroy, rc);
}
obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
oa->o_id = lsm->lsm_object_id;
oa->o_valid |= OBD_MD_FLID;
- err = obd_destroy(conn, oa, lsm);
+ err = obd_destroy(conn, oa, lsm, NULL);
obd_free_memmd(conn, &lsm);
if (err)
CERROR("error uncreating inode %lu objects: rc %d\n",
* before returning in the O_LOV_DELAY_CREATE case and dropping it here
* or in ll_file_release(), but I'm not sure that is desirable/necessary.
*/
+extern int ll_it_open_error(int phase, struct lookup_intent *it);
+
static int ll_file_open(struct inode *inode, struct file *file)
{
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ll_inode_info *lli = ll_i2info(inode);
struct lustre_handle *conn = ll_i2obdconn(inode);
+ struct lookup_intent *it;
struct lov_stripe_md *lsm;
int rc = 0;
ENTRY;
+ LL_GET_INTENT(file->f_dentry, it);
+ rc = ll_it_open_error(IT_OPEN_OPEN, it);
+ if (rc)
+ RETURN(rc);
+
+ rc = ll_local_open(file, it);
+ if (rc)
+ LBUG();
+
+ mdc_set_open_replay_data((struct ll_file_data *)file->private_data);
+
lsm = lli->lli_smd;
if (lsm == NULL) {
if (file->f_flags & O_LOV_DELAY_CREATE) {
CDEBUG(D_INODE, "delaying object creation\n");
RETURN(0);
}
-
down(&lli->lli_open_sem);
if (!lli->lli_smd) {
- rc = ll_create_open_obj(conn, inode, file, NULL);
+ rc = ll_create_obj(conn, inode, file, NULL);
up(&lli->lli_open_sem);
+ if (rc)
+ GOTO(out_close, rc);
} else {
- CERROR("stripe already set on ino %lu\n", inode->i_ino);
+ CERROR("warning: stripe already set on ino %lu\n",
+ inode->i_ino);
up(&lli->lli_open_sem);
- rc = ll_mdc_open(&sbi->ll_mdc_conn, inode, file,NULL,0);
}
lsm = lli->lli_smd;
- } else
- rc = ll_mdc_open(&sbi->ll_mdc_conn, inode, file, NULL, 0);
-
- if (rc)
- RETURN(rc);
+ }
rc = ll_osc_open(conn, inode, file, lsm);
if (rc)
GOTO(out_close, rc);
RETURN(0);
-out_close:
+
+ out_close:
ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
return rc;
}
RETURN(rc);
}
-int ll_file_size(struct inode *inode, struct lov_stripe_md *lsm)
+/* This function is solely "sampling" the file size, and does not explicit
+ * locking on the size itself (see ll_size_lock() and ll_size_unlock()).
+ *
+ * XXX We need to optimize away the obd_getattr for decent performance here,
+ * by checking if we already have the size lock and considering our size
+ * authoritative in that case. In order to do that either the act of
+ * getting the size lock includes retrieving the file size, or the client
+ * keeps an atomic flag in the inode which indicates whether the size
+ * has been updated (see bug 280).
+ */
+int ll_file_size(struct inode *inode, struct lov_stripe_md *lsm,
+ struct lustre_handle *handle)
{
struct ll_sb_info *sbi = ll_i2sbi(inode);
- //struct lustre_handle lockh = { 0, 0 };
struct obdo oa;
- //int err;
int rc;
ENTRY;
LASSERT(lsm);
LASSERT(sbi);
- /* XXX do not yet need size lock - OST size always correct (sync write)
- rc = ll_size_lock(inode, lsm, 0, LCK_PR, &lockh);
- if (rc != ELDLM_OK) {
- CERROR("lock enqueue: %d\n", rc);
- RETURN(rc);
- }
- */
-
memset(&oa, 0, sizeof oa);
oa.o_id = lsm->lsm_object_id;
oa.o_mode = S_IFREG;
- oa.o_valid = OBD_MD_FLID|OBD_MD_FLTYPE|OBD_MD_FLSIZE|OBD_MD_FLBLOCKS;
+ oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
+ OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
+ obd_handle2oa(&oa, handle);
rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm);
if (!rc) {
- obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
- CDEBUG(D_INODE, LPX64" size %Lu/%Lu\n",
+ obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+ CDEBUG(D_INODE, "objid "LPX64" size %Lu/%Lu\n",
lsm->lsm_object_id, inode->i_size, inode->i_size);
}
- /* XXX do not need size lock, because OST size always correct (sync write)
- err = ll_size_unlock(inode, lsm, LCK_PR, &lockh);
- if (err != ELDLM_OK) {
- CERROR("lock cancel: %d\n", err);
- if (!rc)
- rc = err;
- }
- */
- RETURN(rc);
-}
-
-/* While this returns an error code, fput() the caller does not, so we need
- * to make every effort to clean up all of our state here. Also, applications
- * rarely check close errors and even if an error is returned they will not
- * re-try the close call.
- */
-static int ll_file_release(struct inode *inode, struct file *file)
-{
- struct ll_file_data *fd;
- struct obdo oa;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lov_stripe_md *lsm = lli->lli_smd;
- int rc, rc2;
-
- ENTRY;
-
- fd = (struct ll_file_data *)file->private_data;
- if (!fd) /* no process opened the file after an mcreate */
- RETURN(rc = 0);
-
- memset(&oa, 0, sizeof(oa));
- oa.o_id = lsm->lsm_object_id;
- oa.o_mode = S_IFREG;
- oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
- obd_handle2oa(&oa, &fd->fd_osthandle);
- rc = obd_close(&sbi->ll_osc_conn, &oa, lsm);
- if (rc)
- CERROR("inode %lu object close failed: rc = %d\n",
- inode->i_ino, rc);
-
- rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
- if (rc2 && !rc)
- rc = rc2;
-
- if (atomic_dec_and_test(&lli->lli_open_count)) {
- CDEBUG(D_INFO, "last close, cancelling unused locks\n");
- rc2 = obd_cancel_unused(&sbi->ll_osc_conn, lsm, 0);
- if (rc2 && !rc) {
- rc = rc2;
- CERROR("obd_cancel_unused: %d\n", rc);
- }
- } else
- CDEBUG(D_INFO, "not last close, not cancelling unused locks\n");
RETURN(rc);
}
static void ll_update_atime(struct inode *inode)
{
+#ifdef USE_ATIME
struct iattr attr;
attr.ia_atime = CURRENT_TIME;
/* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */
ll_inode_setattr(inode, &attr, 0);
+#else
+ /* update atime, but don't explicitly write it out just this change */
+ inode->i_atime = CURRENT_TIME;
+#endif
}
int ll_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
- void *data, __u32 data_len, int flag)
+ void *data, int flag)
{
struct inode *inode = data;
struct lustre_handle lockh = { 0, 0 };
int rc;
ENTRY;
- if (data_len != sizeof(struct inode))
- LBUG();
-
if (inode == NULL)
LBUG();
static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
loff_t *ppos)
{
- struct ll_file_data *fd = (struct ll_file_data *)filp->private_data;
+ struct ll_file_data *fd = filp->private_data;
struct inode *inode = filp->f_dentry->d_inode;
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct lustre_handle lockh = { 0, 0 };
ssize_t retval;
ENTRY;
- /* If we don't refresh the file size, generic_file_read may not even
- * call us */
- retval = ll_file_size(inode, lsm);
- if (retval < 0) {
- CERROR("ll_file_size: "LPSZ"\n", retval);
- RETURN(retval);
- }
-
if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) &&
!(sbi->ll_flags & LL_SBI_NOLCK)) {
struct ldlm_extent extent;
}
}
+ /* If we don't refresh the file size, generic_file_read may not even
+ * call us */
+ retval = ll_file_size(inode, lsm, &fd->fd_osthandle);
+ if (retval < 0) {
+ CERROR("ll_file_size: "LPSZ"\n", retval);
+ RETURN(retval);
+ }
+
CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld\n",
inode->i_ino, count, *ppos);
retval = generic_file_read(filp, buf, count, ppos);
static ssize_t
ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
{
- struct ll_file_data *fd = (struct ll_file_data *)file->private_data;
+ struct ll_file_data *fd = file->private_data;
struct inode *inode = file->f_dentry->d_inode;
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct lustre_handle lockh = { 0, 0 }, eof_lockh = { 0, 0 };
ENTRY;
if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) {
- struct obdo *oa;
-
- oa = obdo_alloc();
- if (!oa)
- RETURN(-ENOMEM);
-
err = ll_size_lock(inode, lsm, 0, LCK_PW, &eof_lockh);
- if (err) {
- obdo_free(oa);
+ if (err)
RETURN(err);
- }
- oa->o_id = lsm->lsm_object_id;
- oa->o_mode = inode->i_mode;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
- OBD_MD_FLBLOCKS;
- obd_handle2oa(oa, &fd->fd_osthandle);
- retval = obd_getattr(&sbi->ll_osc_conn, oa, lsm);
- if (retval) {
- obdo_free(oa);
+ /* Get size here so we know extent to enqueue write lock on. */
+ retval = ll_file_size(inode, lsm, &fd->fd_osthandle);
+ if (retval)
GOTO(out_eof, retval);
- }
- *ppos = oa->o_size;
- obdo_to_inode(inode, oa, oa->o_valid);
- obdo_free(oa);
+ *ppos = inode->i_size;
}
if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) &&
retval = generic_file_write(file, buf, count, ppos);
- if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) ||
- sbi->ll_flags & LL_SBI_NOLCK) {
+ if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) &&
+ !(sbi->ll_flags & LL_SBI_NOLCK)) {
err = obd_cancel(&sbi->ll_osc_conn, lsm, LCK_PW, &lockh);
- if (err != ELDLM_OK) {
+ if (err != ELDLM_OK)
CERROR("lock cancel: err: %d\n", err);
- GOTO(out_eof, retval = err);
- }
}
EXIT;
out_eof:
if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) {
err = ll_size_unlock(inode, lsm, LCK_PW, &eof_lockh);
- if (err && !retval)
- retval = err;
+ if (err)
+ CERROR("ll_size_unlock: %d\n", err);
}
return retval;
unsigned long arg)
{
struct ll_inode_info *lli = ll_i2info(inode);
- struct lustre_handle *conn;
+ struct lustre_handle *conn = ll_i2obdconn(inode);
struct lov_stripe_md *lsm;
int rc;
ENTRY;
CERROR("stripe already set for ino %lu\n", inode->i_ino);
/* If we haven't already done the open, do so now */
if (file->f_flags & O_LOV_DELAY_CREATE) {
- int rc2 = ll_file_open(inode, file);
+ int rc2 = ll_osc_open(conn, inode, file, lsm);
if (rc2)
RETURN(rc2);
}
RETURN(-EALREADY);
}
- conn = ll_i2obdconn(inode);
-
rc = obd_iocontrol(LL_IOC_LOV_SETSTRIPE, conn, 0, &lsm, (void *)arg);
- if (!rc)
- rc = ll_create_open_obj(conn, inode, file, lsm);
+ if (rc) {
+ up(&lli->lli_open_sem);
+ RETURN(rc);
+ }
+ rc = ll_create_obj(conn, inode, file, lsm);
up(&lli->lli_open_sem);
if (rc) {
int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
unsigned long arg)
{
- struct ll_file_data *fd = (struct ll_file_data *)file->private_data;
+ struct ll_file_data *fd = file->private_data;
struct lustre_handle *conn;
int flags;
switch(cmd) {
+ case TCGETS:
+ return -ENOTTY;
case LL_IOC_GETFLAGS:
/* Get the current value of the file flags */
return put_user(fd->fd_flags, (int *)arg);
switch (origin) {
case 2: {
struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_file_data *fd = file->private_data;
- retval = ll_file_size(inode, lli->lli_smd);
+ retval = ll_file_size(inode, lli->lli_smd, &fd->fd_osthandle);
if (retval)
RETURN(retval);
return 0;
}
-static int ll_inode_revalidate(struct dentry *dentry)
+int ll_inode_revalidate(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
struct lov_stripe_md *lsm;
RETURN(0);
}
- if (!ll_have_md_lock(dentry)) {
+ /* this is very tricky. it is unsafe to call ll_have_md_lock
+ when we have a referenced lock: because it may cause an RPC
+ below when the lock is marked CB_PENDING. That RPC may not
+ go out because someone else may be in another RPC waiting for
+ that lock*/
+ if (!(dentry->d_it && dentry->d_it->it_lock_mode) &&
+ !ll_have_md_lock(dentry)) {
struct ptlrpc_request *req = NULL;
struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
struct mds_body *body;
unsigned long valid = 0;
- int datalen = 0;
- int rc;
+ int datalen = 0, rc;
+ /* Why don't we update all valid MDS fields here, if we're
+ * doing an RPC anyways? -phil */
if (S_ISREG(inode->i_mode)) {
datalen = obd_size_wiremd(&sbi->ll_osc_conn, NULL);
valid |= OBD_MD_FLEASIZE;
}
body = lustre_msg_buf(req->rq_repmsg, 0);
- ll_update_inode(inode, body);
+ if (body->valid & OBD_MD_FLEASIZE)
+ ll_update_inode(inode, body,
+ lustre_msg_buf(req->rq_repmsg, 1));
+ else
+ ll_update_inode(inode, body, NULL);
ptlrpc_req_finished(req);
}
if (!lsm) /* object not yet allocated, don't validate size */
RETURN(0);
- RETURN(ll_file_size(inode, lsm));
+ /* XXX this should probably become an unconditional obd_getattr()
+ * so that we update the blocks count and mtime from the OST too.
+ */
+ RETURN(ll_file_size(inode, lsm, NULL));
}
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
revalidate: ll_inode_revalidate,
#endif
};
+
+struct inode_operations ll_special_inode_operations = {
+ setattr: ll_setattr,
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ getattr: ll_getattr,
+#else
+ revalidate: ll_inode_revalidate,
+#endif
+};
#include <linux/lustre_lite.h>
#include <linux/lprocfs_status.h>
+/* /proc/lustre/llite mount point registration */
-int rd_path(char* page, char **start, off_t off, int count, int *eof,
- void *data)
+#ifndef LPROCFS
+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+ struct super_block *sb, char *osc, char *mdc)
{
return 0;
}
+#else
-int rd_fstype(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- int len = 0;
- struct super_block *sb = (struct super_block*)data;
-
- len += snprintf(page, count, "%s\n", sb->s_type->name);
- return len;
-}
-
-int rd_blksize(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- int len = 0;
- struct super_block *sb = (struct super_block*)data;
- struct statfs mystats;
-
- (sb->s_op->statfs)(sb, &mystats);
- len += snprintf(page, count, "%lu\n", mystats.f_bsize);
- return len;
-
-}
+long long mnt_instance;
-int rd_kbytestotal(char* page, char **start, off_t off, int count, int *eof,
- void *data)
+static inline int lprocfs_llite_statfs(void *data, struct statfs *sfs)
{
- int len = 0;
struct super_block *sb = (struct super_block*)data;
- struct statfs mystats;
- __u32 blk_size;
- __u64 result;
-
- (sb->s_op->statfs)(sb, &mystats);
- blk_size = mystats.f_bsize;
- blk_size >>= 10;
- result = mystats.f_blocks;
-
- while(blk_size >>= 1)
- result <<= 1;
-
- len += snprintf(page, count, LPU64"\n", result);
- return len;
+ return (sb->s_op->statfs)(sb, sfs);
}
+DEFINE_LPROCFS_STATFS_FCT(rd_blksize, lprocfs_llite_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_llite_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, lprocfs_llite_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, lprocfs_llite_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, lprocfs_llite_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, lprocfs_llite_statfs);
-int rd_kbytesfree(char* page, char **start, off_t off, int count, int *eof,
- void *data)
+int rd_path(char *page, char **start, off_t off, int count, int *eof,
+ void *data)
{
- int len = 0;
- struct super_block *sb = (struct super_block*)data;
- struct statfs mystats;
- __u32 blk_size;
- __u64 result;
-
- (sb->s_op->statfs)(sb, &mystats);
- blk_size = mystats.f_bsize;
- blk_size >>= 10;
- result = mystats.f_bfree;
-
- while(blk_size >>= 1)
- result <<= 1;
-
- len += snprintf(page, count, LPU64"\n", result);
- return len;
+ return 0;
}
-int rd_filestotal(char* page, char **start, off_t off, int count, int *eof,
- void *data)
+int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
+ void *data)
{
- int len = 0;
struct super_block *sb = (struct super_block*)data;
- struct statfs mystats;
- (sb->s_op->statfs)(sb, &mystats);
- len += snprintf(page, count, LPU64"\n", (__u64)(mystats.f_files));
- return len;
+ *eof = 1;
+ return snprintf(page, count, "%s\n", sb->s_type->name);
}
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof,
- void *data)
+int rd_sb_uuid(char *page, char **start, off_t off, int count, int *eof,
+ void *data)
{
- int len = 0;
- struct super_block *sb = (struct super_block*)data;
- struct statfs mystats;
+ struct super_block *sb = (struct super_block *)data;
- (sb->s_op->statfs)(sb, &mystats);
- len += snprintf(page, count, LPU64"\n", (__u64)(mystats.f_ffree));
- return len;
+ *eof = 1;
+ return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid);
}
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- int len = 0;
- struct super_block *sb = (struct super_block*)data;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
-
- len += snprintf(page, count, "%s\n", sbi->ll_sb_uuid);
-
- return len;
-
-}
-int rd_dev_name(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- int len = 0;
- struct obd_device* dev = (struct obd_device*)data;
- len += snprintf(page, count, "%s\n", dev->obd_name);
- return len;
-}
+struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", rd_sb_uuid, 0, 0 },
+ { "mntpt_path", rd_path, 0, 0 },
+ { "fstype", rd_fstype, 0, 0 },
+ { "blocksize", rd_blksize, 0, 0 },
+ { "kbytestotal", rd_kbytestotal, 0, 0 },
+ { "kbytesfree", rd_kbytesfree, 0, 0 },
+ { "filestotal", rd_filestotal, 0, 0 },
+ { "filesfree", rd_filesfree, 0, 0 },
+ { "filegroups", rd_filegroups, 0, 0 },
+ { 0 }
+};
-int rd_dev_uuid(char* page, char **start, off_t off, int count, int *eof,
- void *data)
+#define MAX_STRING_SIZE 128
+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+ struct super_block *sb, char *osc, char *mdc)
{
- int len = 0;
- struct obd_device* dev = (struct obd_device*)data;
- len += snprintf(page, count, "%s\n", dev->obd_uuid);
- return len;
-}
+ struct lprocfs_vars lvars[2];
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct obd_device *obd;
+ char name[MAX_STRING_SIZE + 1];
+ struct obd_uuid uuid;
+ int err;
+ ENTRY;
+ memset(lvars, 0, sizeof(lvars));
-struct lprocfs_vars status_var_nm_1[] = {
- {"status/uuid", rd_uuid, 0, 0},
- {"status/mntpt_path", rd_path, 0, 0},
- {"status/fstype", rd_fstype, 0, 0},
- {"status/blocksize",rd_blksize, 0, 0},
- {"status/kbytestotal",rd_kbytestotal, 0, 0},
- {"status/kbytesfree", rd_kbytesfree, 0, 0},
- {"status/filestotal", rd_filestotal, 0, 0},
- {"status/filesfree", rd_filesfree, 0, 0},
- {"status/filegroups", rd_filegroups, 0, 0},
- {0}
-};
+ name[MAX_STRING_SIZE] = '\0';
+ lvars[0].name = name;
-/*
- * Proc registration function for Lustre
- * file system
- */
+ /* Mount info */
+ snprintf(name, MAX_STRING_SIZE, "fs%llu", mnt_instance);
+ mnt_instance++;
+ sbi->ll_proc_root = lprocfs_register(name, parent, NULL, NULL);
+ if (IS_ERR(sbi->ll_proc_root))
+ RETURN(err = PTR_ERR(sbi->ll_proc_root));
-#define MAX_STRING_SIZE 100
-void ll_proc_namespace(struct super_block* sb, char* osc, char* mdc)
-{
- char mnt_name[MAX_STRING_SIZE+1];
- char uuid_name[MAX_STRING_SIZE+1];
- struct lprocfs_vars d_vars[3];
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device* obd;
- int err;
+ /* Static configuration info */
+ err = lprocfs_add_vars(sbi->ll_proc_root, lprocfs_obd_vars, sb);
+ if (err)
+ RETURN(err);
- /* Register this mount instance with LProcFS */
- snprintf(mnt_name, MAX_STRING_SIZE, "mount_%s", sbi->ll_sb_uuid);
- mnt_name[MAX_STRING_SIZE] = '\0';
- sbi->ll_proc_root = lprocfs_reg_mnt(mnt_name);
- if (sbi->ll_proc_root == NULL) {
- CDEBUG(D_OTHER, "Could not register FS");
- return;
- }
- /* Add the static configuration info */
- err = lprocfs_add_vars(sbi->ll_proc_root,status_var_nm_1, sb);
- if (err) {
- CDEBUG(D_OTHER, "Unable to add procfs variables\n");
- return;
- }
- /* MDC */
- obd = class_uuid2obd(mdc);
- snprintf(mnt_name, MAX_STRING_SIZE, "status/%s/common_name",
- obd->obd_type->typ_name);
- mnt_name[MAX_STRING_SIZE] = '\0';
- memset(d_vars, 0, sizeof(d_vars));
- d_vars[0].read_fptr = rd_dev_name;
- d_vars[0].write_fptr = NULL;
- d_vars[0].name = mnt_name;
- snprintf(uuid_name, MAX_STRING_SIZE, "status/%s/uuid",
+ /* MDC info */
+ strncpy(uuid.uuid, mdc, sizeof(uuid.uuid));
+ obd = class_uuid2obd(&uuid);
+ snprintf(name, MAX_STRING_SIZE, "%s/common_name",
obd->obd_type->typ_name);
- uuid_name[MAX_STRING_SIZE] = '\0';
- d_vars[1].read_fptr = rd_dev_uuid;
- d_vars[1].write_fptr = NULL;
- d_vars[1].name = uuid_name;
-
- err = lprocfs_add_vars(sbi->ll_proc_root, d_vars, obd);
- if (err) {
- CDEBUG(D_OTHER, "Unable to add fs proc dynamic variables\n");
- return;
- }
- /* OSC or LOV*/
- obd = class_uuid2obd(osc);
-
- /* Reuse mnt_name */
- snprintf(mnt_name, MAX_STRING_SIZE,
- "status/%s/common_name", obd->obd_type->typ_name);
- mnt_name[MAX_STRING_SIZE] = '\0';
- memset(d_vars, 0, sizeof(d_vars));
- d_vars[0].read_fptr = rd_dev_name;
- d_vars[0].write_fptr = NULL;
- d_vars[0].name = mnt_name;
-
- snprintf(uuid_name, MAX_STRING_SIZE, "status/%s/uuid",
+ lvars[0].read_fptr = lprocfs_rd_name;
+ err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
+ if (err)
+ RETURN(err);
+
+ snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name);
+ lvars[0].read_fptr = lprocfs_rd_uuid;
+ err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
+ if (err < 0)
+ RETURN(err);
+
+ /* OSC */
+ strncpy(uuid.uuid, osc, sizeof(uuid.uuid));
+ obd = class_uuid2obd(&uuid);
+
+ snprintf(name, MAX_STRING_SIZE, "%s/common_name",
obd->obd_type->typ_name);
- uuid_name[MAX_STRING_SIZE] = '\0';
- d_vars[1].read_fptr = rd_dev_uuid;
- d_vars[1].write_fptr = NULL;
- d_vars[1].name = uuid_name;
-
- err = lprocfs_add_vars(sbi->ll_proc_root, d_vars, obd);
- if (err) {
- CDEBUG(D_OTHER, "Unable to add fs proc dynamic variables\n");
- return;
- }
+ lvars[0].read_fptr = lprocfs_rd_name;
+ err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
+ if (err)
+ RETURN(err);
+
+ snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name);
+ lvars[0].read_fptr = lprocfs_rd_uuid;
+ err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
+
+ RETURN(err);
}
+
#undef MAX_STRING_SIZE
+#endif /* LPROCFS */
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
*
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
+ * This file is part of Lustre, http://www.lustre.org.
*
- * from
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
*
- * linux/fs/ext2/namei.c
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * derived in small part from linux/fs/ext2/namei.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* David S. Miller (davem@caip.rutgers.edu), 1995
* Directory entry file type support and forward compatibility hooks
* for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
- *
- * Changes for use in OBDFS
- * Copyright (c) 1999, Seagate Technology Inc.
- * Copyright (C) 2001, Cluster File Systems, Inc.
- * Rewritten based on recent ext2 page cache use.
- *
*/
#include <linux/fs.h>
#include <linux/lustre_lite.h>
#include <linux/lustre_dlm.h>
-extern struct address_space_operations ll_aops;
+/* from dcache.c */
+extern void ll_set_dd(struct dentry *de);
/* from super.c */
extern void ll_change_inode(struct inode *inode);
return 0;
/* Apply the attributes in 'opaque' to this inode */
- ll_update_inode(inode, body);
+ ll_update_inode(inode, body, lic->lic_lmm);
return 1;
}
static int ll_intent_to_lock_mode(struct lookup_intent *it)
{
/* CREAT needs to be tested before open (both could be set) */
- if ((it->it_op & (IT_CREAT | IT_MKDIR | IT_SETATTR | IT_MKNOD))) {
+ if (it->it_op & (IT_CREAT | IT_SETATTR))
return LCK_PW;
- } else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_UNLINK |
- IT_RMDIR | IT_RENAME | IT_RENAME2 | IT_READLINK|
- IT_LINK | IT_LINK2 | IT_LOOKUP | IT_SYMLINK)) {
+ else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP))
return LCK_PR;
- }
LBUG();
RETURN(-EINVAL);
}
-#define LL_LOOKUP_POSITIVE 1
-#define LL_LOOKUP_NEGATIVE 2
+int ll_it_open_error(int phase, struct lookup_intent *it)
+{
+ if (it->it_disposition & IT_OPEN_OPEN) {
+ if (phase == IT_OPEN_OPEN)
+ return it->it_status;
+ else
+ return 0;
+ }
+
+ if (it->it_disposition & IT_OPEN_CREATE) {
+ if (phase == IT_OPEN_CREATE)
+ return it->it_status;
+ else
+ return 0;
+ }
+
+ if (it->it_disposition & IT_OPEN_LOOKUP) {
+ if (phase == IT_OPEN_LOOKUP)
+ return it->it_status;
+ else
+ return 0;
+ }
+ LBUG();
+ return 0;
+}
+
+#define IT_ENQ_COMPLETE (1<<16)
int ll_intent_lock(struct inode *parent, struct dentry **de,
- struct lookup_intent *it,
- intent_finish_cb intent_finish)
+ struct lookup_intent *it, intent_finish_cb intent_finish)
{
struct dentry *dentry = *de;
struct ll_sb_info *sbi = ll_i2sbi(parent);
struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
struct ptlrpc_request *request = NULL;
char *data = NULL;
- int rc, lock_mode, datalen = 0, offset, flag = LL_LOOKUP_POSITIVE;
+ int rc = 0, datalen = 0, offset, flag = 0;
obd_id ino = 0;
-
ENTRY;
if (it == NULL)
if (dentry->d_name.len > EXT2_NAME_LEN)
RETURN(-ENAMETOOLONG);
- lock_mode = ll_intent_to_lock_mode(it);
- if (it->it_op & IT_SYMLINK) {
- data = it->it_data;
- datalen = strlen(data) + 1;
- it->it_data = NULL;
+ if (!(it->it_disposition & IT_ENQ_COMPLETE)) {
+ rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it,
+ ll_intent_to_lock_mode(it), parent, dentry,
+ &lockh, data, datalen, parent,
+ sizeof(*parent));
+ if (rc < 0)
+ RETURN(rc);
+ memcpy(it->it_lock_handle, &lockh, sizeof(lockh));
}
- rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it, lock_mode, parent,
- dentry, &lockh, data, datalen, parent,sizeof(*parent));
- if (rc < 0)
- RETURN(rc);
- memcpy(it->it_lock_handle, &lockh, sizeof(lockh));
-
request = (struct ptlrpc_request *)it->it_data;
- /* it_disposition == 1 indicates that the server performed the
+
+ /* non-zero it_disposition indicates that the server performed the
* intent on our behalf. */
if (it->it_disposition) {
struct mds_body *mds_body;
int mode;
- obd_flag valid;
/* This long block is all about fixing up the local
* state so that it is correct as of the moment
ino = mds_body->fid1.id;
mode = mds_body->mode;
- if (it->it_op & (IT_CREAT | IT_MKDIR | IT_SYMLINK | IT_MKNOD)) {
+ /*We were called from revalidate2: did we find the same inode?*/
+ if ((*de)->d_inode &&
+ (ino != (*de)->d_inode->i_ino ||
+ mds_body->fid1.generation != (*de)->d_inode->i_generation)) {
+ it->it_disposition |= IT_ENQ_COMPLETE;
+ RETURN(-ESTALE);
+ }
+
+ /* If we're doing an IT_OPEN which did not result in an actual
+ * successful open, then we need to remove the bit which saves
+ * this request for unconditional replay. */
+ if (it->it_op & IT_OPEN &&
+ (!(it->it_disposition & IT_OPEN_OPEN) ||
+ it->it_status != 0))
+ request->rq_flags &= ~PTL_RPC_FL_REPLAY;
+
+ if (it->it_op & IT_CREAT) {
mdc_store_inode_generation(request, 2, 1);
- /* For create ops, we want the lookup to be negative,
- * unless the create failed in a way that indicates
- * that the file is already there */
- if (it->it_status == 0)
- atomic_inc(&request->rq_refcount);
- if (it->it_status != -EEXIST)
- GOTO(out, flag = LL_LOOKUP_NEGATIVE);
- /*
- * Fall through to update attibutes: it may already
- * have appeared in the namespace of another client
- */
+ /* The server will return to us, in it_disposition, an
+ * indication of exactly what it_status refers to.
+ *
+ * If IT_OPEN_OPEN is set, then it_status refers to the
+ * open() call, otherwise if IT_OPEN_CREATE is set, then
+ * it status is the creation failure mode. In either
+ * case, one of IT_OPEN_NEG or IT_OPEN_POS will be set,
+ * indicating whether the child lookup was successful.
+ *
+ * Else, if IT_OPEN_LOOKUP then it_status is the rc
+ * of the child lookup.
+ *
+ * Finally, if none of the bits are set, then the
+ * failure occurred while looking up the parent. */
+ rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
+ if (rc)
+ GOTO(drop_req, rc);
+
+ if (it->it_disposition & IT_OPEN_CREATE)
+ ptlrpc_request_addref(request);
+
+ if (it->it_disposition & IT_OPEN_NEG)
+ flag = LL_LOOKUP_NEGATIVE;
+ else
+ flag = LL_LOOKUP_POSITIVE;
+ } else if (it->it_op == IT_OPEN) {
+ LASSERT(!(it->it_disposition & IT_OPEN_CREATE));
+
+ rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
+ if (rc)
+ GOTO(drop_req, rc);
+
+ if (it->it_disposition & IT_OPEN_OPEN)
+ ptlrpc_request_addref(request);
+
+ if (it->it_disposition & IT_OPEN_NEG)
+ flag = LL_LOOKUP_NEGATIVE;
+ else
+ flag = LL_LOOKUP_POSITIVE;
} else if (it->it_op & (IT_GETATTR | IT_SETATTR | IT_LOOKUP |
IT_READLINK)) {
/* For check ops, we want the lookup to succeed */
it->it_data = NULL;
if (it->it_status)
- GOTO(out, flag = LL_LOOKUP_NEGATIVE);
- /* Fall through to update attibutes. */
- } else if (it->it_op & (IT_RENAME | IT_LINK)) {
- /* For rename, we want the source lookup to succeed */
- if (it->it_status) {
- it->it_data = NULL;
- GOTO(drop_req, rc = it->it_status);
- }
- /* Fall through to update attibutes. */
- } else if (it->it_op & (IT_UNLINK | IT_RMDIR)) {
- /* For remove ops, we want the lookup to succeed unless
- * the file truly doesn't exist */
- it->it_data = NULL;
- if (it->it_status == -ENOENT)
- GOTO(out, flag = LL_LOOKUP_NEGATIVE);
- /* No point in updating attributes that we're about to
- * unlink. -phil */
- GOTO(out, flag = LL_LOOKUP_POSITIVE);
- } else if (it->it_op == IT_OPEN) {
- it->it_data = NULL;
- if (it->it_status && it->it_status != -EEXIST)
- GOTO(out, flag = LL_LOOKUP_NEGATIVE);
- /* Fall through to update attibutes. */
- } else if (it->it_op & (IT_RENAME2 | IT_LINK2)) {
- it->it_data = NULL;
- /* This means the target lookup is negative */
- if (mds_body->valid == 0)
- GOTO(out, flag = LL_LOOKUP_NEGATIVE);
- /* XXX bug 289: should we maybe fall through here? -p */
- GOTO(out, flag = LL_LOOKUP_POSITIVE);
- }
-
- /* Do a getattr now that we have the lock, and fetch the
- * up-to-date stripe MD at the same time.
- */
- valid = OBD_MD_FLNOTOBD;
- if (it->it_op == IT_READLINK) {
- datalen = mds_body->size;
- valid |= OBD_MD_LINKNAME;
- } else if (S_ISREG(mode)) {
- datalen = obd_size_wiremd(&sbi->ll_osc_conn, NULL);
- valid |= OBD_MD_FLEASIZE;
- }
- ptlrpc_req_finished(request);
- request = NULL;
- rc = mdc_getattr(&sbi->ll_mdc_conn, ino, mode,
- valid, datalen, &request);
- if (rc) {
- CERROR("failure %d inode "LPX64"\n", rc, ino);
- GOTO(drop_req, rc = -abs(rc));
- }
- offset = 0;
+ flag = LL_LOOKUP_NEGATIVE;
+ else
+ flag = LL_LOOKUP_POSITIVE;
+ } else
+ LBUG();
} else {
obd_flag valid;
int mode;
if (S_ISREG(mode)) {
datalen = obd_size_wiremd(&sbi->ll_osc_conn, NULL),
valid |= OBD_MD_FLEASIZE;
+ } else {
+ valid |= OBD_MD_FLBLOCKS;
}
rc = mdc_getattr(&sbi->ll_mdc_conn, ino, mode, valid,
}
}
- out:
if (intent_finish != NULL) {
rc = intent_finish(flag, request, de, it, offset, ino);
dentry = *de; /* intent_finish may change *de */
ptlrpc_req_finished(request);
}
- if (it->it_disposition && it->it_op & (IT_RENAME | IT_LINK))
- it->it_data = dentry;
-
- /* this places the intent in the dentry so that the vfs_xxx
- * operation can lay its hands on it; but that is not
- * always needed...
- */
- if ( // it->it_status == 0 &&
- it->it_op != IT_RENAME &&
- it->it_op != IT_LINK &&
- it->it_op != IT_SETATTR &&
- it->it_op != IT_GETATTR &&
- it->it_op != IT_READDIR &&
- it->it_op != IT_LOOKUP) {
+ /* This places the intent in the dentry so that the vfs_xxx
+ * operation can lay its hands on it; but that is not always
+ * needed... (we need to save it in the GETATTR case for the
+ * benefit of ll_inode_revalidate -phil) */
+ if (it->it_op & (IT_OPEN | IT_GETATTR))
LL_SAVE_INTENT(dentry, it);
- } else {
+ else
CDEBUG(D_DENTRY,
"D_IT dentry %p fsdata %p intent: %s status %d\n",
dentry, ll_d2d(dentry), ldlm_it2str(it->it_op),
it->it_status);
- }
- if (rc < 0 || it->it_op == IT_LOOKUP)
+ if (it->it_op == IT_LOOKUP)
ll_intent_release(dentry, it);
RETURN(rc);
struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
/* We are called here with 'de' already on the aliases list. */
- if (dentry == de) {
+ if (dentry == de) {
CERROR("whoops\n");
continue;
}
d_rehash(dentry);
atomic_inc(&dentry->d_count);
iput(inode);
+ dentry->d_flags &= ~DCACHE_LUSTRE_INVALID;
return dentry;
}
struct inode *inode = NULL;
struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lmm = NULL};
- if (flag == LL_LOOKUP_POSITIVE) {
+ if (!(flag & LL_LOOKUP_NEGATIVE)) {
ENTRY;
lic.lic_body = lustre_msg_buf(request->rq_repmsg, offset);
/* We asked for a lock on the directory, and may have been
* granted a lock on the inode. Just in case, fixup the data
* pointer. */
- ldlm_lock_set_data((struct lustre_handle *)it->it_lock_handle,
- inode, sizeof(*inode));
-
- EXIT;
+ mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle,
+ inode);
} else {
ENTRY;
}
ptlrpc_req_finished(request);
dentry->d_op = &ll_d_ops;
- if (ll_d2d(dentry) == NULL) {
- ll_set_dd(dentry);
- }
+ ll_set_dd(dentry);
if (dentry == saved)
d_add(dentry, inode);
int rc;
ENTRY;
+ if (it && it->it_op == IT_TRUNC)
+ it->it_op = IT_SETATTR;
+
rc = ll_intent_lock(parent, &dentry, it, lookup2_finish);
if (rc < 0) {
- CERROR("ll_intent_lock: %d\n", rc);
+ CDEBUG(D_INFO, "ll_intent_lock: %d\n", rc);
RETURN(ERR_PTR(rc));
}
RETURN(dentry);
}
+/* We depend on "mode" being set with the proper file type/umask by now */
static struct inode *ll_create_node(struct inode *dir, const char *name,
int namelen, const void *data, int datalen,
int mode, __u64 extra,
ENTRY;
if (it && it->it_disposition) {
- int rc = it->it_status;
- if (rc) {
- CERROR("error creating MDS inode for %*s: rc = %d\n",
- namelen, name, rc);
- RETURN(ERR_PTR(rc));
- }
ll_invalidate_inode_pages(dir);
request = it->it_data;
body = lustre_msg_buf(request->rq_repmsg, 1);
/* We asked for a lock on the directory, but were
* granted a lock on the inode. Since we finally have
* an inode pointer, stuff it in the lock. */
- ldlm_lock_set_data((struct lustre_handle *)it->it_lock_handle,
- inode, sizeof(*inode));
+ mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle,
+ inode);
}
EXIT;
{
struct ptlrpc_request *request = NULL;
struct ll_sb_info *sbi = ll_i2sbi(dir);
+ struct mds_body *body;
+ struct lov_stripe_md *lsm = NULL;
+ struct lustre_handle lockh;
+ struct lookup_intent it = { .it_op = IT_UNLINK };
+ struct obdo *oa;
int err;
-
- ENTRY;
-
- err = mdc_unlink(&sbi->ll_mdc_conn, dir, child, mode, name, len,
- &request);
- ptlrpc_req_finished(request);
-
- RETURN(err);
-}
-
-int ll_mdc_link(struct dentry *src, struct inode *dir,
- const char *name, int len)
-{
- struct ptlrpc_request *request = NULL;
- int err;
- struct ll_sb_info *sbi = ll_i2sbi(dir);
-
+ struct mdc_unlink_data data;
ENTRY;
- err = mdc_link(&sbi->ll_mdc_conn, src, dir, name, len, &request);
- ptlrpc_req_finished(request);
-
- RETURN(err);
-}
-
-int ll_mdc_rename(struct inode *src, struct inode *tgt,
- struct dentry *old, struct dentry *new)
-{
- struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(src);
- int err;
-
- ENTRY;
+ data.unl_dir = dir;
+ data.unl_de = child;
+ data.unl_mode = mode;
+ data.unl_name = name;
+ data.unl_len = len;
+
+ err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_EX, dir,
+ NULL, &lockh, NULL, 0, &data, sizeof(data));
+ mdc_put_rpc_lock(&mdc_rpc_lock, &it);
+ request = (struct ptlrpc_request *)it.it_data;
+ if (err < 0)
+ GOTO(out, err);
+ if (it.it_status)
+ GOTO(out, err = it.it_status);
+ err = 0;
+
+ body = lustre_msg_buf(request->rq_repmsg, 1);
+ LASSERT(body != NULL);
+ if (!(body->valid & OBD_MD_FLEASIZE))
+ GOTO(out, 0);
+
+ /* The MDS sent back the EA because we unlinked the last reference
+ * to this file. Use this EA to unlink the objects on the OST */
+ err = obd_unpackmd(ll_i2obdconn(dir), &lsm,
+ lustre_msg_buf(request->rq_repmsg, 2));
+ if (err < 0)
+ CERROR("obd_unpackmd: %d\n", err);
+
+ oa = obdo_alloc();
+ if (oa == NULL)
+ GOTO(out_unlock, err = -ENOMEM);
+
+ oa->o_id = lsm->lsm_object_id;
+ oa->o_mode = body->mode & S_IFMT;
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
+
+ err = obd_destroy(ll_i2obdconn(dir), oa, lsm, NULL);
+ obdo_free(oa);
+ if (err)
+ CERROR("obd destroy objid 0x"LPX64" error %d\n",
+ lsm->lsm_object_id, err);
- err = mdc_rename(&sbi->ll_mdc_conn, src, tgt,
- old->d_name.name, old->d_name.len,
- new->d_name.name, new->d_name.len, &request);
+ obd_free_memmd(ll_i2obdconn(dir), &lsm);
+ out_unlock:
+ ldlm_lock_decref_and_cancel(&lockh, LCK_EX);
+ out:
ptlrpc_req_finished(request);
-
- RETURN(err);
+ return err;
}
/*
int rc = 0;
ENTRY;
- LL_GET_INTENT(dentry, it);
+ it = dentry->d_it;
+
+ rc = ll_it_open_error(IT_OPEN_CREATE, it);
+ if (rc) {
+ LL_GET_INTENT(dentry, it);
+ ptlrpc_req_finished(it->it_data);
+ RETURN(rc);
+ }
inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
NULL, 0, mode, 0, it);
- if (IS_ERR(inode))
+ if (IS_ERR(inode)) {
+ LL_GET_INTENT(dentry, it);
RETURN(PTR_ERR(inode));
+ }
+ /* no directory data updates when intents rule */
if (it && it->it_disposition) {
d_instantiate(dentry, inode);
- } else {
- /* no directory data updates when intents rule */
- rc = ext2_add_nondir(dentry, inode);
+ RETURN(0);
}
+ rc = ext2_add_nondir(dentry, inode);
RETURN(rc);
}
+static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
+ int rdev)
+{
+ struct ptlrpc_request *request = NULL;
+ time_t time = CURRENT_TIME;
+ struct ll_sb_info *sbi = ll_i2sbi(dir);
+ int err = -EMLINK;
+ ENTRY;
+
+ if (dir->i_nlink >= EXT2_LINK_MAX)
+ RETURN(err);
+
+ mode &= ~current->fs->umask;
+
+ switch (mode & S_IFMT) {
+ case 0: case S_IFREG:
+ mode |= S_IFREG; /* for mode = 0 case, fallthrough */
+ case S_IFCHR: case S_IFBLK:
+ case S_IFIFO: case S_IFSOCK:
+ err = mdc_create(&sbi->ll_mdc_conn, dir, name, len, NULL, 0,
+ mode, current->fsuid, current->fsgid, time,
+ rdev, &request);
+ ptlrpc_req_finished(request);
+ break;
+ case S_IFDIR:
+ err = -EPERM;
+ break;
+ default:
+ err = -EINVAL;
+ }
+ RETURN(err);
+}
+
static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode,
int rdev)
{
LL_GET_INTENT(dentry, it);
+ if ((mode & S_IFMT) == 0)
+ mode |= S_IFREG;
inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
NULL, 0, mode, rdev, it);
return rc;
}
+static int ll_symlink2(struct inode *dir, const char *name, int len,
+ const char *tgt)
+{
+ struct ptlrpc_request *request = NULL;
+ time_t time = CURRENT_TIME;
+ struct ll_sb_info *sbi = ll_i2sbi(dir);
+ int err = -EMLINK;
+ ENTRY;
+
+ if (dir->i_nlink >= EXT2_LINK_MAX)
+ RETURN(err);
+
+ err = mdc_create(&sbi->ll_mdc_conn, dir, name, len,
+ tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
+ current->fsuid, current->fsgid, time, 0, &request);
+ ptlrpc_req_finished(request);
+ RETURN(err);
+}
+
static int ll_symlink(struct inode *dir, struct dentry *dentry,
const char *symname)
{
RETURN(err);
}
+static int ll_link2(struct inode *src, struct inode *dir,
+ const char *name, int len)
+{
+ struct ptlrpc_request *request = NULL;
+ int err;
+ struct ll_sb_info *sbi = ll_i2sbi(dir);
+
+ ENTRY;
+
+ err = mdc_link(&sbi->ll_mdc_conn, src, dir, name, len, &request);
+ ptlrpc_req_finished(request);
+
+ RETURN(err);
+}
+
static int ll_link(struct dentry *old_dentry, struct inode * dir,
struct dentry *dentry)
{
if (inode->i_nlink >= EXT2_LINK_MAX)
return -EMLINK;
- rc = ll_mdc_link(old_dentry, dir,
- dentry->d_name.name, dentry->d_name.len);
+ rc = ll_link2(old_dentry->d_inode, dir,
+ dentry->d_name.name, dentry->d_name.len);
if (rc)
RETURN(rc);
return ext2_add_nondir(dentry, inode);
}
+static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
+{
+ struct ptlrpc_request *request = NULL;
+ time_t time = CURRENT_TIME;
+ struct ll_sb_info *sbi = ll_i2sbi(dir);
+ int err = -EMLINK;
+ ENTRY;
+
+ if (dir->i_nlink >= EXT2_LINK_MAX)
+ RETURN(err);
+
+ mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
+ err = mdc_create(&sbi->ll_mdc_conn, dir, name, len, NULL, 0,
+ mode, current->fsuid, current->fsgid,
+ time, 0, &request);
+ ptlrpc_req_finished(request);
+ RETURN(err);
+}
+
+
static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
struct lookup_intent *it;
goto out;
}
+static int ll_rmdir2(struct inode *dir, const char *name, int len)
+{
+ int rc;
+ ENTRY;
+
+ rc = ll_mdc_unlink(dir, NULL, S_IFDIR, name, len);
+ RETURN(rc);
+}
+
+static int ll_unlink2(struct inode *dir, const char *name, int len)
+{
+ int rc;
+ ENTRY;
+
+ rc = ll_mdc_unlink(dir, NULL, S_IFREG, name, len);
+ RETURN(rc);
+}
+
static int ll_common_unlink(struct inode *dir, struct dentry *dentry,
struct lookup_intent *it, __u32 mode)
{
struct ext2_dir_entry_2 * de;
struct page * page;
int rc = 0;
+ ENTRY;
if (it && it->it_disposition) {
rc = it->it_status;
ll_invalidate_inode_pages(dir);
inode->i_ctime = dir->i_ctime;
+ EXIT;
out_dec:
ext2_dec_count(inode);
out:
static int ll_unlink(struct inode *dir, struct dentry *dentry)
{
struct lookup_intent * it;
+ ENTRY;
LL_GET_INTENT(dentry, it);
- return ll_common_unlink(dir, dentry, it, S_IFREG);
+ RETURN(ll_common_unlink(dir, dentry, it, S_IFREG));
}
static int ll_rmdir(struct inode *dir, struct dentry *dentry)
RETURN(rc);
}
+static int ll_rename2(struct inode *src, struct inode *tgt,
+ const char *oldname, int oldlen,
+ const char *newname, int newlen)
+{
+ struct ptlrpc_request *request = NULL;
+ struct ll_sb_info *sbi = ll_i2sbi(src);
+ int err;
+ ENTRY;
+
+ err = mdc_rename(&sbi->ll_mdc_conn, src, tgt,
+ oldname, oldlen, newname, newlen, &request);
+ ptlrpc_req_finished(request);
+
+ RETURN(err);
+}
+
+
+
static int ll_rename(struct inode * old_dir, struct dentry * old_dentry,
struct inode * new_dir, struct dentry * new_dentry)
{
GOTO(out, err = it->it_status);
}
- err = ll_mdc_rename(old_dir, new_dir, old_dentry, new_dentry);
+ err = ll_rename2(old_dir, new_dir,
+ old_dentry->d_name.name, old_dentry->d_name.len,
+ new_dentry->d_name.name, new_dentry->d_name.len);
if (err)
goto out;
return err;
}
+extern int ll_inode_revalidate(struct dentry *dentry);
struct inode_operations ll_dir_inode_operations = {
- create: ll_create,
- lookup2: ll_lookup2,
- link: ll_link,
- unlink: ll_unlink,
- symlink: ll_symlink,
- mkdir: ll_mkdir,
- rmdir: ll_rmdir,
- mknod: ll_mknod,
- rename: ll_rename,
- setattr: ll_setattr
+ create: ll_create,
+ lookup2: ll_lookup2,
+ link: ll_link,
+ link2: ll_link2,
+ unlink: ll_unlink,
+ unlink2: ll_unlink2,
+ symlink: ll_symlink,
+ symlink2: ll_symlink2,
+ mkdir: ll_mkdir,
+ mkdir2: ll_mkdir2,
+ rmdir: ll_rmdir,
+ rmdir2: ll_rmdir2,
+ mknod: ll_mknod,
+ mknod2: ll_mknod2,
+ rename: ll_rename,
+ rename2: ll_rename2,
+ setattr: ll_setattr,
+ revalidate: ll_inode_revalidate,
};
*
* Lustre Lite I/O Page Cache
*
- * Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
pg.flag = create ? OBD_BRW_CREATE : 0;
set->brw_callback = ll_brw_sync_wait;
- rc = obd_brw(cmd, ll_i2obdconn(inode), lsm, 1, &pg, set);
+ rc = obd_brw(cmd, ll_i2obdconn(inode), lsm, 1, &pg, set, NULL);
if (rc) {
if (rc != -EIO)
CERROR("error from obd_brw: rc = %d\n", rc);
/* truncate == punch from new size to absolute end of file */
err = obd_punch(ll_i2obdconn(inode), &oa, lsm, inode->i_size,
- OBD_OBJECT_EOF);
+ OBD_OBJECT_EOF, NULL);
if (err)
CERROR("obd_truncate fails (%d) ino %lu\n", err, inode->i_ino);
else
if (from == 0 && to == PAGE_SIZE)
RETURN(0);
- /* We are writing to a new page, no need to read old data */
+ /* If are writing to a new page, no need to read old data. If we
+ * haven't already gotten the file size in ll_file_write() since
+ * we got our extent lock, we need to verify it here before we
+ * overwrite some other node's write (bug 445).
+ */
if (inode->i_size <= offset) {
- memset(addr, 0, PAGE_SIZE);
- GOTO(prepare_done, rc=0);
+ if (!S_ISBLK(inode->i_mode) && !(file->f_flags & O_APPEND)) {
+ struct ll_file_data *fd = file->private_data;
+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+
+ rc = ll_file_size(inode, lsm, &fd->fd_osthandle);
+ if (rc)
+ GOTO(prepare_done, rc);
+ }
+ if (inode->i_size <= offset) {
+ memset(addr, 0, PAGE_SIZE);
+ GOTO(prepare_done, rc=0);
+ }
}
rc = ll_brw(OBD_BRW_READ, inode, page, 0);
prepare_done:
if (!rc)
SetPageUptodate(page);
-
+ else
+ kunmap (page);
+
return rc;
}
pg.off, pg.count);
set->brw_callback = ll_brw_sync_wait;
- rc = obd_brw(OBD_BRW_WRITE, ll_i2obdconn(inode), md, 1, &pg, set);
+ rc = obd_brw(OBD_BRW_WRITE, ll_i2obdconn(inode), md, 1, &pg, set, NULL);
if (rc)
CERROR("error from obd_brw: rc = %d\n", rc);
else {
set->brw_callback = ll_brw_sync_wait;
rc = obd_brw(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
- ll_i2obdconn(inode), lsm, bufs_per_obdo, pga, set);
+ ll_i2obdconn(inode), lsm, bufs_per_obdo, pga, set, NULL);
if (rc)
CERROR("error from obd_brw: rc = %d\n", rc);
else {
*
* Lustre Light Super operations
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
*
- * Copryright (C) 2002 Cluster File Systems, Inc.
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define DEBUG_SUBSYSTEM S_LLITE
extern struct address_space_operations ll_dir_aops;
struct super_operations ll_super_operations;
+/* /proc/lustre/llite root that tracks llite mount points */
+struct proc_dir_entry *proc_lustre_fs_root;
+/* lproc_llite.c */
+extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+ struct super_block *sb,
+ char *osc, char *mdc);
+
extern int ll_recover(struct recovd_data *, int);
extern int ll_commitcbd_setup(struct ll_sb_info *);
extern int ll_commitcbd_cleanup(struct ll_sb_info *);
-extern void ll_proc_namespace(struct super_block* sb, char* osc, char* mdc);
-
static char *ll_read_opt(const char *opt, char *data)
{
char *value;
struct ptlrpc_connection *mdc_conn;
struct ll_read_inode2_cookie lic;
class_uuid_t uuid;
+ struct obd_uuid param_uuid;
ENTRY;
INIT_LIST_HEAD(&sbi->ll_conn_chain);
INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
generate_random_uuid(uuid);
- class_uuid_unparse(uuid, sbi->ll_sb_uuid);
+ class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
sb->u.generic_sbp = sbi;
GOTO(out_free, sb = NULL);
}
- obd = class_uuid2obd(mdc);
+ strncpy(param_uuid.uuid, mdc, sizeof(param_uuid.uuid));
+ obd = class_uuid2obd(¶m_uuid);
if (!obd) {
CERROR("MDC %s: not setup or attached\n", mdc);
GOTO(out_free, sb = NULL);
}
- err = obd_connect(&sbi->ll_mdc_conn, obd, sbi->ll_sb_uuid,
+ err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid,
ptlrpc_recovd, ll_recover);
if (err) {
CERROR("cannot connect to %s: rc = %d\n", mdc, err);
mdc_conn = sbi2mdc(sbi)->cl_import.imp_connection;
list_add(&mdc_conn->c_sb_chain, &sbi->ll_conn_chain);
- obd = class_uuid2obd(osc);
+ strncpy(param_uuid.uuid, osc, sizeof(param_uuid.uuid));
+ obd = class_uuid2obd(¶m_uuid);
if (!obd) {
CERROR("OSC %s: not setup or attached\n", osc);
GOTO(out_mdc, sb = NULL);
}
- err = obd_connect(&sbi->ll_osc_conn, obd, sbi->ll_sb_uuid,
+ err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid,
ptlrpc_recovd, ll_recover);
if (err) {
CERROR("cannot connect to %s: rc = %d\n", osc, err);
ptlrpc_req_finished(request);
request = NULL;
- ll_proc_namespace(sb, osc, mdc);
+
+ if (proc_lustre_fs_root) {
+ err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
+ osc, mdc);
+ if (err < 0)
+ CERROR("could not register mount in /proc/lustre");
+ }
out_dev:
if (mdc)
*/
mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
- lprocfs_dereg_mnt(sbi->ll_proc_root);
- sbi->ll_proc_root = NULL;
+ if (sbi->ll_proc_root) {
+ lprocfs_remove(sbi->ll_proc_root);
+ sbi->ll_proc_root = NULL;
+ }
obd_disconnect(&sbi->ll_mdc_conn);
obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
if (lli->lli_symlink_name) {
- OBD_FREE(lli->lli_symlink_name,strlen(lli->lli_symlink_name)+1);
+ OBD_FREE(lli->lli_symlink_name,
+ strlen(lli->lli_symlink_name) + 1);
lli->lli_symlink_name = NULL;
}
EXIT;
}
+#if 0
static void ll_delete_inode(struct inode *inode)
{
ENTRY;
oa->o_id = lsm->lsm_object_id;
obdo_from_inode(oa, inode, OBD_MD_FLID | OBD_MD_FLTYPE);
- err = obd_destroy(ll_i2obdconn(inode), oa, lsm);
+ err = obd_destroy(ll_i2obdconn(inode), oa, lsm, NULL);
obdo_free(oa);
if (err)
- CDEBUG(D_SUPER, "obd destroy objid "LPX64" error %d\n",
- lsm->lsm_object_id, err);
+ CDEBUG(D_INODE,
+ "inode %lu obd_destroy objid "LPX64" error %d\n",
+ inode->i_ino, lsm->lsm_object_id, err);
}
out:
clear_inode(inode);
EXIT;
}
+#endif
/* like inode_setattr, but doesn't mark the inode dirty */
-static int ll_attr2inode(struct inode * inode, struct iattr * attr, int trunc)
+static int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc)
{
unsigned int ia_valid = attr->ia_valid;
int error = 0;
*/
attr->ia_valid &= ~ATTR_SIZE;
if (attr->ia_valid) {
- err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, &request);
+ err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, NULL, 0,
+ &request);
if (err)
- CERROR("mdc_setattr fails (%d)\n", err);
+ CERROR("mdc_setattr fails: err = %d\n", err);
ptlrpc_req_finished(request);
+ if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+ struct obdo oa;
+ int err2;
+
+ CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
+ inode->i_ino, attr->ia_mtime);
+ oa.o_id = lsm->lsm_object_id;
+ oa.o_mode = S_IFREG;
+ oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
+ oa.o_mtime = attr->ia_mtime;
+ err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
+ if (err2) {
+ CERROR("obd_setattr fails: rc=%d\n", err);
+ if (!err)
+ err = err2;
+ }
+ }
}
RETURN(err);
RETURN(rc);
}
-void ll_update_inode(struct inode *inode, struct mds_body *body)
+void ll_update_inode(struct inode *inode, struct mds_body *body,
+ struct lov_mds_md *lmm)
{
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ if (lmm != NULL)
+ obd_unpackmd(ll_i2obdconn(inode), &lli->lli_smd, lmm);
+
if (body->valid & OBD_MD_FLID)
inode->i_ino = body->ino;
if (body->valid & OBD_MD_FLATIME)
inode->i_rdev = body->rdev;
if (body->valid & OBD_MD_FLSIZE)
inode->i_size = body->size;
+ if (body->valid & OBD_MD_FLBLOCKS)
+ inode->i_blocks = body->blocks;
}
static void ll_read_inode2(struct inode *inode, void *opaque)
sema_init(&lli->lli_open_sem, 1);
atomic_set(&lli->lli_open_count, 0);
- /* core attributes first */
- ll_update_inode(inode, body);
-
LASSERT(!lli->lli_smd);
- if (lic && lic->lic_lmm)
- obd_unpackmd(ll_i2obdconn(inode), &lli->lli_smd, lic->lic_lmm);
+
+ /* core attributes first */
+ ll_update_inode(inode, body, lic ? lic->lic_lmm : NULL);
/* Get the authoritative file size */
if (lli->lli_smd && (inode->i_mode & S_IFREG)) {
int rc;
LASSERT(lli->lli_smd->lsm_object_id != 0);
- rc = ll_file_size(inode, lli->lli_smd);
+ rc = ll_file_size(inode, lli->lli_smd, NULL);
if (rc) {
CERROR("ll_file_size: %d\n", rc);
ll_clear_inode(inode);
inode->i_op = &ll_fast_symlink_inode_operations;
EXIT;
} else {
+ inode->i_op = &ll_special_inode_operations;
init_special_inode(inode, inode->i_mode, inode->i_rdev);
EXIT;
}
list_entry(tmp, struct ptlrpc_request, rq_list);
CERROR("invalidating req xid "LPU64" op %d to %s:%d\n",
req->rq_xid, req->rq_reqmsg->opc,
- req->rq_connection->c_remote_uuid,
+ req->rq_connection->c_remote_uuid.uuid,
req->rq_import->imp_client->cli_request_portal);
req->rq_flags |= PTL_RPC_FL_ERR;
wake_up(&req->rq_wait_for_rep);
{
read_inode2: ll_read_inode2,
clear_inode: ll_clear_inode,
- delete_inode: ll_delete_inode,
+ // delete_inode: ll_delete_inode,
put_super: ll_put_super,
statfs: ll_statfs,
umount_begin: ll_umount_begin
static int __init init_lustre_lite(void)
{
- printk(KERN_INFO "Lustre Lite 0.5.14, info@clusterfs.com\n");
+ printk(KERN_INFO "Lustre Lite Client File System; "
+ "info@clusterfs.com\n");
ll_file_data_slab = kmem_cache_create("ll_file_data",
sizeof(struct ll_file_data), 0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (ll_file_data_slab == NULL)
return -ENOMEM;
+
+ proc_lustre_fs_root = proc_lustre_root ? proc_mkdir("llite", proc_lustre_root) : NULL;
+
return register_filesystem(&lustre_lite_fs_type);
}
{
unregister_filesystem(&lustre_lite_fs_type);
kmem_cache_destroy(ll_file_data_slab);
+
+ if (proc_lustre_fs_root) {
+ lprocfs_remove(proc_lustre_fs_root);
+ proc_lustre_fs_root = NULL;
+ }
}
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Lite Client File System v1.0");
+MODULE_DESCRIPTION("Lustre Lite Client File System");
MODULE_LICENSE("GPL");
module_init(init_lustre_lite);
*
* Lustre Light Super operations
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
*
- * Copryright (C) 2002 Cluster File Systems, Inc.
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define DEBUG_SUBSYSTEM S_LLITE
extern struct address_space_operations ll_dir_aops;
struct super_operations ll_super_operations;
+/* /proc/lustre/llite root that tracks llite mount points */
+struct proc_dir_entry *proc_lustre_fs_root;
+/* lproc_llite.c */
+extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+ struct super_block *sb,
+ char *osc, char *mdc);
+
extern int ll_init_inodecache(void);
extern void ll_destroy_inodecache(void);
extern int ll_recover(struct recovd_data *, int);
extern int ll_commitcbd_cleanup(struct ll_sb_info *);
int ll_read_inode2(struct inode *inode, void *opaque);
-extern void ll_proc_namespace(struct super_block* sb, char* osc, char* mdc)
+extern int ll_proc_namespace(struct super_block* sb, char* osc, char* mdc)
static char *ll_read_opt(const char *opt, char *data)
{
ptlrpc_req_finished(request);
request = NULL;
- ll_proc_namespace(sb, osc, mdc)
+
+ if (proc_lustre_fs_root) {
+ err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
+ osc, mdc);
+ if (err < 0)
+ CERROR("could not register mount in /proc/lustre");
+ }
+
out_dev:
if (mdc)
OBD_FREE(mdc, strlen(mdc) + 1);
*/
mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
- lprocfs_dereg_mnt(sbi->ll_proc_root);
+ if (sbi->ll_proc_root) {
+ lprocfs_remove(sbi->ll_proc_root);
sbi->ll_proc_root = NULL;
+ }
obd_disconnect(&sbi->ll_mdc_conn);
OBD_FREE(sbi, sizeof(*sbi));
if (attr->ia_valid) {
err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, &request);
if (err)
- CERROR("mdc_setattr fails (%d)\n", err);
+ CERROR("mdc_setattr fails: err = %d\n", err);
ptlrpc_req_finished(request);
+ if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+ struct obdo oa;
+ int err;
+
+ CDEBUG(D_ERROR, "setting mtime on OST\n");
+ oa.o_id = lsm->lsm_object_id;
+ oa.o_mode = S_IFREG;
+ oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
+ oa.o_mtime = attr->ia_mtime;
+ err = obd_setattr(&sbi->ll_osc_conn, &oa, lsm);
+ if (err) {
+ CERROR("obd_setattr fails: rc=%d\n", err);
+ if (!rc)
+ rc = err;
+ }
+ }
}
RETURN(err);
RETURN(rc);
}
-void ll_update_inode(struct inode *inode, struct mds_body *body)
+void ll_update_inode(struct inode *inode, struct mds_body *body,
+ struct lov_mds_md *lmm)
{
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ if (lmm != NULL)
+ obd_unpackmd(ll_i2obdconn(inode), &lli->lli_smd, lmm);
+
if (body->valid & OBD_MD_FLID)
inode->i_ino = body->ino;
if (body->valid & OBD_MD_FLATIME)
inode->i_rdev = to_kdev_t(body->rdev);
if (body->valid & OBD_MD_FLSIZE)
inode->i_size = body->size;
+ if (body->valid & OBD_MD_FLBLOCKS)
+ inode->i_blocks = body->blocks;
}
int ll_read_inode2(struct inode *inode, void *opaque)
sema_init(&lli->lli_open_sem, 1);
- /* core attributes first */
- ll_update_inode(inode, body);
-
LASSERT(!lli->lli_smd);
- if (lic && lic->lic_lmm)
- obd_unpackmd(ll_i2obdconn(inode), &lli->lli_smd, lic->lic_lmm);
+
+ /* core attributes first */
+ ll_update_inode(inode, body, lic ? lic->lic_lmm : NULL);
/* Get the authoritative file size */
if (lli->lli_smd && S_ISREG(inode->i_mode)) {
- rc = ll_file_size(inode, lli->lli_smd);
+ rc = ll_file_size(inode, lli->lli_smd, NULL);
if (rc) {
CERROR("ll_file_size: %d\n", rc);
ll_clear_inode(inode);
static int __init init_lustre_lite(void)
{
int rc;
- printk(KERN_INFO "Lustre Lite 0.5.14, info@clusterfs.com\n");
+ printk(KERN_INFO "Lustre Lite Client File System; "
+ "info@clusterfs.com\n");
rc = ll_init_inodecache();
if (rc)
return -ENOMEM;
ll_destroy_inodecache();
return -ENOMEM;
}
+
+ proc_lustre_fs_root = proc_lustre_root ?
+ proc_mkdir("llite", proc_lustre_root) : NULL;
+
return register_filesystem(&lustre_lite_fs_type);
}
unregister_filesystem(&lustre_lite_fs_type);
ll_destroy_inodecache();
kmem_cache_destroy(ll_file_data_slab);
+ if (proc_lustre_fs_root) {
+ lprocfs_remove(proc_lustre_fs_root);
+ proc_lustre_fs_root = NULL;
+ }
}
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Lite Client File System v1.0");
+MODULE_DESCRIPTION("Lustre Lite Client File System");
MODULE_LICENSE("GPL");
module_init(init_lustre_lite);
}
down(&lli->lli_open_sem);
-
rc = ll_readlink_internal(inode, &request, &symname);
+ up(&lli->lli_open_sem);
if (rc)
GOTO(out, rc);
rc = vfs_follow_link_it(nd, symname, it);
out:
- up(&lli->lli_open_sem);
ptlrpc_req_finished(request);
RETURN(rc);
}
+extern int ll_inode_revalidate(struct dentry *dentry);
extern int ll_setattr(struct dentry *de, struct iattr *attr);
struct inode_operations ll_fast_symlink_inode_operations = {
readlink: ll_readlink,
setattr: ll_setattr,
- follow_link2: ll_follow_link
+ follow_link2: ll_follow_link,
+ revalidate: ll_inode_revalidate
};
MODULE = lov
modulefs_DATA = lov.o
EXTRA_PROGRAMS = lov
-LINX=
+LINX=client.c
lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c $(LINX)
+client.c:
+ test -e client.c || ln -sf $(top_srcdir)/lib/client.c
+
include $(top_srcdir)/Rules
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * lov/lov.c
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@off.net>
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
* Peter Braam <braam@clusterfs.com>
- * Mike Shaver <shaver@off.net>
+ * Mike Shaver <shaver@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define EXPORT_SYMTAB
#include <asm/div64.h>
#include <linux/lprocfs_status.h>
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
static kmem_cache_t *lov_file_cache;
/* obd methods */
int lov_attach(struct obd_device *dev, obd_count len, void *data)
{
- return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_init_vars(&lvars);
+ return lprocfs_obd_attach(dev, lvars.obd_vars);
}
int lov_detach(struct obd_device *dev)
{
- return lprocfs_dereg_obd(dev);
+ return lprocfs_obd_detach(dev);
}
static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
struct ptlrpc_request *req = NULL;
struct lov_desc *desc = &lov->desc;
struct obd_export *exp;
struct lustre_handle mdc_conn;
- obd_uuid_t *uuidarray;
+ struct obd_uuid lov_mds_uuid = {"LOV_MDS_UUID"};
+ struct obd_uuid uuid;
+ char *tmp;
int rc, rc2, i;
ENTRY;
INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
/* retrieve LOV metadata from MDS */
- rc = obd_connect(&mdc_conn, lov->mdcobd, NULL, recovd, recover);
+ rc = obd_connect(&mdc_conn, lov->mdcobd, &lov_mds_uuid, recovd,recover);
if (rc) {
CERROR("cannot connect to mdc: rc = %d\n", rc);
GOTO(out_conn, rc);
memcpy(desc, lustre_msg_buf(req->rq_repmsg, 0), sizeof(*desc));
lov_unpackdesc(desc);
- if (req->rq_repmsg->buflens[1] < sizeof(*uuidarray)*desc->ld_tgt_count){
+ if (req->rq_repmsg->buflens[1] < sizeof(uuid.uuid)*desc->ld_tgt_count){
CERROR("LOV desc: invalid uuid array returned\n");
GOTO(out_conn, rc = -EINVAL);
}
- if (memcmp(obd->obd_uuid, desc->ld_uuid, sizeof(desc->ld_uuid))) {
+ if (memcmp(obd->obd_uuid.uuid, desc->ld_uuid.uuid,
+ sizeof(desc->ld_uuid.uuid))) {
CERROR("LOV desc: uuid %s not on mds device (%s)\n",
- obd->obd_uuid, desc->ld_uuid);
+ obd->obd_uuid.uuid, desc->ld_uuid.uuid);
GOTO(out_conn, rc = -EINVAL);
}
GOTO(out_conn, rc = -ENOMEM);
}
- uuidarray = lustre_msg_buf(req->rq_repmsg, 1);
- for (i = 0; i < desc->ld_tgt_count; i++)
- memcpy(lov->tgts[i].uuid, uuidarray[i], sizeof(*uuidarray));
-
+ tmp = lustre_msg_buf(req->rq_repmsg, 1);
for (i = 0; i < desc->ld_tgt_count; i++) {
- struct obd_device *tgt = client_tgtuuid2obd(uuidarray[i]);
+ struct obd_device *tgt;
+ struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" };
+
+ strncpy(uuid.uuid, tmp, sizeof(uuid.uuid));
+ memcpy(&lov->tgts[i].uuid, &uuid, sizeof(uuid));
+ tgt = client_tgtuuid2obd(&uuid);
+ tmp += sizeof(uuid.uuid);
if (!tgt) {
- CERROR("Target %s not attached\n", uuidarray[i]);
+ CERROR("Target %s not attached\n", uuid.uuid);
GOTO(out_disc, rc = -EINVAL);
}
if (!(tgt->obd_flags & OBD_SET_UP)) {
- CERROR("Target %s not set up\n", uuidarray[i]);
+ CERROR("Target %s not set up\n", uuid.uuid);
GOTO(out_disc, rc = -EINVAL);
}
- rc = obd_connect(&lov->tgts[i].conn, tgt, NULL, recovd,
+ rc = obd_connect(&lov->tgts[i].conn, tgt, &lov_osc_uuid, recovd,
recover);
if (rc) {
- CERROR("Target %s connect error %d\n", uuidarray[i],
+ CERROR("Target %s connect error %d\n", uuid.uuid,
rc);
GOTO(out_disc, rc);
}
-
+
rc = obd_iocontrol(IOC_OSC_REGISTER_LOV, &lov->tgts[i].conn,
sizeof(struct obd_device *), obd, NULL);
if (rc) {
CERROR("Target %s REGISTER_LOV error %d\n",
- uuidarray[i], rc);
+ uuid.uuid, rc);
GOTO(out_disc, rc);
}
while (i-- > 0) {
desc->ld_active_tgt_count--;
lov->tgts[i].active = 0;
+ memcpy(&uuid, &lov->tgts[i].uuid, sizeof(uuid));
rc2 = obd_disconnect(&lov->tgts[i].conn);
if (rc2)
- CERROR("LOV Target %s disconnect error: rc = %d\n",
- uuidarray[i], rc2);
+ CERROR("error: LOV target %s disconnect on OST idx %d: "
+ "rc = %d\n", uuid.uuid, i, rc2);
}
OBD_FREE(lov->tgts, lov->bufsize);
out_conn:
if (rc) {
if (lov->tgts[i].active) {
CERROR("Target %s disconnect error %d\n",
- lov->tgts[i].uuid, rc);
+ lov->tgts[i].uuid.uuid, rc);
}
rc = 0;
}
* -EBADF : The UUID is found, but the OBD is the wrong type (!)
* -EALREADY: The OSC is already marked (in)active
*/
-static int lov_set_osc_active(struct lov_obd *lov, obd_uuid_t uuid,
+static int lov_set_osc_active(struct lov_obd *lov, struct obd_uuid *uuid,
int activate)
{
struct obd_device *obd;
ENTRY;
CDEBUG(D_INFO, "Searching in lov %p for uuid %s (activate=%d)\n",
- lov, uuid, activate);
+ lov, uuid->uuid, activate);
spin_lock(&lov->lov_lock);
for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) {
CDEBUG(D_INFO, "lov idx %d is %s conn "LPX64"\n",
- i, tgt->uuid, tgt->conn.addr);
- if (strncmp(uuid, tgt->uuid, sizeof(tgt->uuid)) == 0)
+ i, tgt->uuid.uuid, tgt->conn.addr);
+ if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof(uuid->uuid)) == 0)
break;
}
}
CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LOV idx %d\n",
- obd->obd_name, obd->obd_uuid, obd->obd_minor, obd,
+ obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
obd->obd_type->typ_name, i);
if (strcmp(obd->obd_type->typ_name, "osc") != 0) {
LBUG();
{
struct obd_ioctl_data *data = buf;
struct lov_obd *lov = &obd->u.lov;
+ struct obd_uuid uuid;
int rc = 0;
ENTRY;
}
spin_lock_init(&lov->lov_lock);
- lov->mdcobd = class_uuid2obd(data->ioc_inlbuf1);
+ obd_str2uuid(&uuid, data->ioc_inlbuf1);
+ lov->mdcobd = class_uuid2obd(&uuid);
if (!lov->mdcobd) {
- CERROR("LOV %s cannot locate MDC %s\n", obd->obd_uuid,
+ CERROR("LOV %s cannot locate MDC %s\n", obd->obd_uuid.uuid,
data->ioc_inlbuf1);
rc = -EINVAL;
}
/* the LOV expects oa->o_id to be set to the LOV object id */
static int lov_create(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md **ea)
+ struct lov_stripe_md **ea, struct obd_trans_info *oti)
{
struct obd_export *export = class_conn2export(conn);
struct lov_obd *lov;
if (!*ea || lsm->lsm_stripe_offset >= ost_count) {
int mult = lsm->lsm_object_id * lsm->lsm_stripe_count;
int stripe_offset = mult % ost_count;
- int sub_offset = (mult / ost_count) % lsm->lsm_stripe_count;
+ int sub_offset = (mult / ost_count);
- ost_idx = stripe_offset + sub_offset;
+ ost_idx = (stripe_offset + sub_offset) % ost_count;
} else
ost_idx = lsm->lsm_stripe_offset;
/* create data objects with "parent" OA */
memcpy(tmp, oa, sizeof(*tmp));
/* XXX: LOV STACKING: use real "obj_mdp" sub-data */
- err = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp);
+ err = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp, oti);
if (err) {
if (lov->tgts[ost_idx].active) {
CERROR("error creating objid "LPX64" sub-object"
- "on OST idx %d: rc = %d\n",
- oa->o_id, ost_idx, err);
+ " on OST idx %d/%d: rc = %d\n", oa->o_id,
+ ost_idx, lsm->lsm_stripe_count, err);
+ if (err > 0) {
+ CERROR("obd_create returned invalid "
+ "err %d\n", err);
+ err = -EIO;
+ }
if (!rc)
rc = err;
}
return rc;
out_cleanup:
- while (i-- > 0) {
+ while (obj_alloc-- > 0) {
int err;
--loi;
/* destroy already created objects here */
memcpy(tmp, oa, sizeof(*tmp));
tmp->o_id = loi->loi_id;
- err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
+ err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL, NULL);
if (err)
CERROR("Failed to uncreate objid "LPX64" subobj "
LPX64" on OST idx %d: rc = %d\n",
}
static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *lsm)
+ struct lov_stripe_md *lsm, struct obd_trans_info *oti)
{
struct obdo tmp;
struct obd_export *export = class_conn2export(conn);
else
tmp.o_valid &= ~OBD_MD_FLHANDLE;
err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, &tmp,
- NULL);
+ NULL, NULL);
if (err && lov->tgts[loi->loi_ost_idx].active) {
- CERROR("Error destroying objid "LPX64" subobj "
+ CERROR("error: destroying objid "LPX64" subobj "
LPX64" on OST idx %d\n: rc = %d",
oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
if (!rc)
}
static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
- struct lov_stripe_md *lsm, int stripeno, int *new)
+ struct lov_stripe_md *lsm, int stripeno, int *set)
{
- if (*new) {
- obdo_cpy_md(tgt, src, valid);
- if (valid & OBD_MD_FLSIZE)
- tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
- *new = 0;
- } else {
+ if (*set) {
if (valid & OBD_MD_FLSIZE) {
/* this handles sparse files properly */
obd_size lov_size;
tgt->o_ctime = src->o_ctime;
if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
tgt->o_mtime = src->o_mtime;
+ } else {
+ obdo_cpy_md(tgt, src, valid);
+ if (valid & OBD_MD_FLSIZE)
+ tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
+ *set = 1;
}
}
struct lov_oinfo *loi;
struct lov_file_handles *lfh = NULL;
int i;
- int new = 1;
+ int set = 0;
ENTRY;
if (!lsm) {
err = obd_getattr(&lov->tgts[loi->loi_ost_idx].conn, &tmp,NULL);
if (err) {
if (lov->tgts[loi->loi_ost_idx].active) {
- CERROR("Error getattr objid "LPX64" subobj "
+ CERROR("error: getattr objid "LPX64" subobj "
LPX64" on OST idx %d: rc = %d\n",
oa->o_id, loi->loi_id, loi->loi_ost_idx,
err);
RETURN(err);
}
} else {
- lov_merge_attrs(oa, &tmp, tmp.o_valid, lsm, i, &new);
+ lov_merge_attrs(oa, &tmp, tmp.o_valid, lsm, i, &set);
}
}
- RETURN(0);
+ RETURN(set ? 0 : -EIO);
}
static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *lsm)
+ struct lov_stripe_md *lsm, struct obd_trans_info *oti)
{
struct obdo *tmp;
struct obd_export *export = class_conn2export(conn);
struct lov_obd *lov;
struct lov_oinfo *loi;
struct lov_file_handles *lfh = NULL;
- int rc = 0, i;
+ int rc = 0, i, set = 0;
ENTRY;
- /* Note that this code is currently unused, hence LBUG(), just
- * to know when/if it is ever revived that it needs cleanups.
- */
- LBUG();
-
if (!lsm) {
CERROR("LOV requires striping ea\n");
RETURN(-EINVAL);
/* size changes should go through punch and not setattr */
LASSERT(!(oa->o_valid & OBD_MD_FLSIZE));
+ /* for now, we only expect mtime updates here */
+ LASSERT(!(oa->o_valid & ~(OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME)));
+
tmp = obdo_alloc();
if (!tmp)
RETURN(-ENOMEM);
for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
int err;
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ continue;
+ }
+
obdo_cpy_md(tmp, oa, oa->o_valid);
if (lfh)
memcpy(obdo_handle(tmp), &lfh->lfh_handles[i],
- sizeof(lfh->lfh_handles[i]));
+ sizeof(lfh->lfh_handles[i]));
else
tmp->o_valid &= ~OBD_MD_FLHANDLE;
tmp->o_id = loi->loi_id;
- err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
+ err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+ NULL, NULL);
if (err) {
- CERROR("Error setattr objid "LPX64" subobj "LPX64
- " on OST idx %d: rc = %d\n",
- oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
- if (!rc)
- rc = err;
- }
+ if (lov->tgts[loi->loi_ost_idx].active) {
+ CERROR("error: setattr objid "LPX64" subobj "
+ LPX64" on OST idx %d: rc = %d\n",
+ oa->o_id, loi->loi_id, loi->loi_ost_idx,
+ err);
+ if (!rc)
+ rc = err;
+ }
+ } else
+ set = 1;
}
obdo_free(tmp);
+ if (!set && !rc)
+ rc = -EIO;
RETURN(rc);
}
static int lov_open(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *lsm)
+ struct lov_stripe_md *lsm, struct obd_trans_info *oti)
{
struct obdo *tmp; /* on the heap here, on the stack in lov_close? */
struct obd_export *export = class_conn2export(conn);
struct lov_oinfo *loi;
struct lov_file_handles *lfh = NULL;
struct lustre_handle *handle;
- int new = 1;
+ int set = 0;
int rc = 0, i;
ENTRY;
oa->o_size = 0;
oa->o_blocks = 0;
for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
-
if (lov->tgts[loi->loi_ost_idx].active == 0) {
CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
continue;
memcpy(tmp, oa, sizeof(*tmp));
tmp->o_id = loi->loi_id;
- rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
+ rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+ NULL, NULL);
if (rc) {
if (lov->tgts[loi->loi_ost_idx].active) {
- CERROR("Error open objid "LPX64" subobj "LPX64
+ CERROR("error: open objid "LPX64" subobj "LPX64
" on OST idx %d: rc = %d\n",
oa->o_id, lsm->lsm_oinfo[i].loi_id,
loi->loi_ost_idx, rc);
continue;
}
- lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &new);
+ lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &set);
if (tmp->o_valid & OBD_MD_FLHANDLE)
memcpy(&lfh->lfh_handles[i], obdo_handle(tmp),
}
handle = obdo_handle(oa);
-
+
lfh->lfh_count = lsm->lsm_stripe_count;
get_random_bytes(&lfh->lfh_cookie, sizeof(lfh->lfh_cookie));
-
+
handle->addr = (__u64)(unsigned long)lfh;
handle->cookie = lfh->lfh_cookie;
oa->o_valid |= OBD_MD_FLHANDLE;
list_add(&lfh->lfh_list, &export->exp_lov_data.led_open_head);
spin_unlock(&export->exp_lov_data.led_lock);
+ if (!set && !rc)
+ rc = -EIO;
out_tmp:
obdo_free(tmp);
RETURN(rc);
memcpy(obdo_handle(tmp), &lfh->lfh_handles[i],
sizeof(lfh->lfh_handles[i]));
- err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL);
- if (err) {
- CERROR("Error closing objid "LPX64" subobj "LPX64
- " on OST idx %d after open error: rc = %d\n",
+ err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+ NULL, NULL);
+ if (err && lov->tgts[loi->loi_ost_idx].active) {
+ CERROR("error: closing objid "LPX64" subobj "LPX64
+ " on OST idx %d after open error: rc=%d\n",
oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
}
}
-
+
OBD_FREE(lfh->lfh_handles,
lsm->lsm_stripe_count * sizeof(*lfh->lfh_handles));
out_lfh:
}
static int lov_close(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *lsm)
+ struct lov_stripe_md *lsm, struct obd_trans_info *oti)
{
struct obdo tmp;
struct obd_export *export = class_conn2export(conn);
else
tmp.o_valid &= ~OBD_MD_FLHANDLE;
- err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, &tmp, NULL);
+ err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, &tmp,
+ NULL, NULL);
if (err) {
- CERROR("Error close objid "LPX64" subobj "LPX64
- " on OST idx %d: rc = %d\n",
- oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
+ if (lov->tgts[loi->loi_ost_idx].active) {
+ CERROR("error: close objid "LPX64" subobj "LPX64
+ " on OST idx %d: rc = %d\n", oa->o_id,
+ loi->loi_id, loi->loi_ost_idx, err);
+ }
if (!rc)
rc = err;
}
* that the punch will affect. */
static int lov_punch(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *lsm,
- obd_off start, obd_off end)
+ obd_off start, obd_off end, struct obd_trans_info *oti)
{
struct obdo tmp;
struct obd_export *export = class_conn2export(conn);
tmp.o_valid &= ~OBD_MD_FLHANDLE;
err = obd_punch(&lov->tgts[loi->loi_ost_idx].conn, &tmp, NULL,
- starti, endi);
+ starti, endi, NULL);
if (err) {
- CERROR("Error punch objid "LPX64" subobj "LPX64
- " on OST idx %d: rc = %d\n",
- oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
+ if (lov->tgts[loi->loi_ost_idx].active) {
+ CERROR("error: punch objid "LPX64" subobj "LPX64
+ " on OST idx %d: rc = %d\n", oa->o_id,
+ loi->loi_id, loi->loi_ost_idx, err);
+ }
if (!rc)
rc = err;
}
static inline int lov_brw(int cmd, struct lustre_handle *conn,
struct lov_stripe_md *lsm, obd_count oa_bufs,
- struct brw_page *pga, struct obd_brw_set *set)
+ struct brw_page *pga, struct obd_brw_set *set,
+ struct obd_trans_info *oti)
{
struct {
int bufct;
if (si->bufct) {
LASSERT(shift < oa_bufs);
rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn,
- &si->lsm, si->bufct, &ioarr[shift], set);
+ &si->lsm, si->bufct, &ioarr[shift],
+ set, oti);
if (rc)
GOTO(out_ioarr, rc);
}
if (rc)
memset(lov_lockhp, 0, sizeof(*lov_lockhp));
if (rc && lov->tgts[loi->loi_ost_idx].active) {
- CERROR("Error enqueue objid "LPX64" subobj "LPX64
+ CERROR("error: enqueue objid "LPX64" subobj "LPX64
" on OST idx %d: rc = %d\n", lsm->lsm_object_id,
loi->loi_id, loi->loi_ost_idx, rc);
goto out_locks;
submd.lsm_stripe_count = 0;
err = obd_cancel(&lov->tgts[loi->loi_ost_idx].conn, &submd,
mode, lov_lockhp);
- if (err) {
- CERROR("Error cancelling objid "LPX64
- " on OST idx %d after enqueue error: rc = %d\n",
+ if (err && lov->tgts[loi->loi_ost_idx].active) {
+ CERROR("error: cancelling objid "LPX64" on OST "
+ "idx %d after enqueue error: rc = %d\n",
loi->loi_id, loi->loi_ost_idx, err);
}
}
mode, lov_lockhp);
if (err) {
if (lov->tgts[loi->loi_ost_idx].active) {
- CERROR("Error cancel objid "LPX64" subobj "
+ CERROR("error: cancel objid "LPX64" subobj "
LPX64" on OST idx %d: rc = %d\n",
lsm->lsm_object_id,
loi->loi_id, loi->loi_ost_idx, err);
err = obd_cancel_unused(&lov->tgts[loi->loi_ost_idx].conn,
&submd, flags);
if (err && lov->tgts[loi->loi_ost_idx].active) {
- CERROR("Error cancel unused objid "LPX64" subobj "LPX64
+ CERROR("error: cancel unused objid "LPX64" subobj "LPX64
" on OST idx %d: rc = %d\n", lsm->lsm_object_id,
loi->loi_id, loi->loi_ost_idx, err);
if (!rc)
err = obd_statfs(&lov->tgts[i].conn, &lov_sfs);
if (err) {
- CERROR("Error statfs OSC %s i %d: err = %d\n",
- lov->tgts[i].uuid, i, err);
- if (!rc)
- rc = err;
- continue; /* XXX or break? - probably OK to continue */
+ if (lov->tgts[i].active) {
+ CERROR("error: statfs OSC %s on OST idx %d: "
+ "err = %d\n",
+ lov->tgts[i].uuid.uuid, i, err);
+ if (!rc)
+ rc = err;
+ }
+ continue;
}
if (!set) {
memcpy(osfs, &lov_sfs, sizeof(lov_sfs));
*/
}
}
+ if (!set && !rc)
+ rc = -EIO;
RETURN(rc);
}
struct obd_device *obddev = class_conn2obd(conn);
struct lov_obd *lov = &obddev->u.lov;
int i, count = lov->desc.ld_tgt_count;
+ struct obd_uuid *uuidp;
int rc;
ENTRY;
switch (cmd) {
case IOC_LOV_SET_OSC_ACTIVE: {
struct obd_ioctl_data *data = karg;
- rc = lov_set_osc_active(lov,data->ioc_inlbuf1,data->ioc_offset);
+ uuidp = (struct obd_uuid *)data->ioc_inlbuf1;
+ rc = lov_set_osc_active(lov, uuidp, data->ioc_offset);
break;
}
case OBD_IOC_LOV_GET_CONFIG: {
struct obd_ioctl_data *data = karg;
struct lov_tgt_desc *tgtdesc;
struct lov_desc *desc;
- obd_uuid_t *uuidp;
char *buf = NULL;
buf = NULL;
RETURN(-EINVAL);
}
- if (sizeof(*uuidp) * count > data->ioc_inllen2) {
+ if (sizeof(uuidp->uuid) * count > data->ioc_inllen2) {
OBD_FREE(buf, len);
RETURN(-EINVAL);
}
desc = (struct lov_desc *)data->ioc_inlbuf1;
- uuidp = (obd_uuid_t *)data->ioc_inlbuf2;
memcpy(desc, &(lov->desc), sizeof(*desc));
+ uuidp = (struct obd_uuid *)data->ioc_inlbuf2;
tgtdesc = lov->tgts;
for (i = 0; i < count; i++, uuidp++, tgtdesc++)
- memcpy(uuidp, tgtdesc->uuid, sizeof(*uuidp));
+ obd_str2uuid(uuidp, tgtdesc->uuid.uuid);
rc = copy_to_user((void *)uarg, buf, len);
if (rc)
case LL_IOC_LOV_GETSTRIPE:
rc = lov_getstripe(conn, karg, uarg);
break;
- default:
+ default: {
+ int set = 0;
if (count == 0)
RETURN(-ENOTTY);
rc = 0;
err = obd_iocontrol(cmd, &lov->tgts[i].conn,
len, karg, uarg);
- if (err && !rc)
- rc = err;
+ if (err) {
+ if (lov->tgts[i].active) {
+ CERROR("error: iocontrol OSC %s on OST"
+ "idx %d: err = %d\n",
+ lov->tgts[i].uuid.uuid, i, err);
+ if (!rc)
+ rc = err;
+ }
+ } else
+ set = 1;
}
+ if (!set && !rc)
+ rc = -EIO;
+ }
}
RETURN(rc);
o_iocontrol: lov_iocontrol
};
-
-#define LOV_VERSION "v0.1"
-
static int __init lov_init(void)
{
+ struct lprocfs_static_vars lvars;
int rc;
- printk(KERN_INFO "Lustre Logical Object Volume driver " LOV_VERSION
- ", info@clusterfs.com\n");
+
+ printk(KERN_INFO "Lustre Logical Object Volume driver; "
+ "info@clusterfs.com\n");
lov_file_cache = kmem_cache_create("ll_lov_file_data",
sizeof(struct lov_file_handles),
0, 0, NULL, NULL);
if (!lov_file_cache)
RETURN(-ENOMEM);
- rc = class_register_type(&lov_obd_ops, status_class_var,
+ lprocfs_init_vars(&lvars);
+ rc = class_register_type(&lov_obd_ops, lvars.module_vars,
OBD_LOV_DEVICENAME);
RETURN(rc);
}
}
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver " LOV_VERSION);
+MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver");
MODULE_LICENSE("GPL");
module_init(lov_init);
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc. <adilger@clusterfs.com>
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Andreas Dilger <adilger@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
*
RETURN(-EINVAL);
}
if (lmm.lmm_stripe_count > lov->desc.ld_tgt_count) {
- CERROR("stripe count %d more than OST count %d\n",
- (int)lmm.lmm_stripe_count, lov->desc.ld_tgt_count);
+ CERROR("stripe count %u more than OST count %d\n",
+ lmm.lmm_stripe_count, lov->desc.ld_tgt_count);
RETURN(-EINVAL);
}
- if (lmm.lmm_stripe_offset >= lov->desc.ld_tgt_count) {
- CERROR("stripe offset %d more than max OST index %d\n",
- (int)lmm.lmm_stripe_count, lov->desc.ld_tgt_count);
+ if (lmm.lmm_stripe_offset >= lov->desc.ld_tgt_count &&
+ lmm.lmm_stripe_offset != 0xffffffff) {
+ CERROR("stripe offset %u more than max OST index %d\n",
+ lmm.lmm_stripe_offset, lov->desc.ld_tgt_count);
RETURN(-EINVAL);
}
if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) {
lmm.lmm_stripe_size, PAGE_SIZE);
RETURN(-EINVAL);
}
- if (lmm.lmm_stripe_size * lmm.lmm_stripe_count > ~0UL) {
+ if ((__u64)lmm.lmm_stripe_size * lmm.lmm_stripe_count > ~0UL) {
CERROR("stripe width %ux%u > %lu on 32-bit system\n",
lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL);
RETURN(-EINVAL);
RETURN(-ENOMEM);
lsm->lsm_magic = LOV_MAGIC;
- /* This is all validated in lov_create() */
lsm->lsm_stripe_count = stripe_count;
lsm->lsm_stripe_offset = lmm.lmm_stripe_offset;
lsm->lsm_stripe_size = lmm.lmm_stripe_size;
*/
#define DEBUG_SUBSYSTEM S_CLASS
-#include <linux/lustre_lite.h>
#include <linux/lprocfs_status.h>
+#include <linux/obd_class.h>
-/*
- * Common STATUS namespace
- */
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+#else
-int rd_uuid(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* dev = (struct obd_device*)data;
- return snprintf(page, count, "%s\n", dev->obd_uuid);
-}
+DEFINE_LPROCFS_STATFS_FCT(rd_blksize, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, obd_self_statfs);
int rd_stripesize(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
- struct obd_device *dev = (struct obd_device*)data;
+ struct obd_device *dev = (struct obd_device *)data;
struct lov_desc *desc = &dev->u.lov.desc;
+ *eof = 1;
return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size);
}
int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
- struct obd_device* dev = (struct obd_device*)data;
- struct lov_obd* lov = &dev->u.lov;
+ struct obd_device *dev = (struct obd_device *)data;
+ struct lov_desc *desc = &dev->u.lov.desc;
- return snprintf(page, count, LPU64"\n",
- lov->desc.ld_default_stripe_offset);
+ *eof = 1;
+ return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset);
}
int rd_stripetype(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
struct obd_device* dev = (struct obd_device*)data;
- struct lov_obd* lov = &dev->u.lov;
+ struct lov_desc *desc = &dev->u.lov.desc;
- return snprintf(page, count, "%u\n", lov->desc.ld_pattern);
+ *eof = 1;
+ return snprintf(page, count, "%u\n", desc->ld_pattern);
}
int rd_stripecount(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
- struct obd_device* dev = (struct obd_device*)data;
- struct lov_obd* lov = &dev->u.lov;
+ struct obd_device *dev = (struct obd_device *)data;
+ struct lov_desc *desc = &dev->u.lov.desc;
- return snprintf(page, count, "%u\n", lov->desc.ld_default_stripe_count);
+ *eof = 1;
+ return snprintf(page, count, "%u\n", desc->ld_default_stripe_count);
}
int rd_numobd(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
struct obd_device *dev = (struct obd_device*)data;
- struct lov_obd *lov = &dev->u.lov;
+ struct lov_desc *desc = &dev->u.lov.desc;
- return snprintf(page, count, "%u\n", lov->desc.ld_tgt_count);
+ *eof = 1;
+ return snprintf(page, count, "%u\n", desc->ld_tgt_count);
}
void *data)
{
struct obd_device* dev = (struct obd_device*)data;
- struct lov_obd* lov = &dev->u.lov;
-
- return snprintf(page, count, "%u\n", lov->desc.ld_active_tgt_count);
-}
-
-int rd_blksize(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-
-int rd_kbtotal(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-
-int rd_kbfree(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-int rd_filestotal(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
+ struct lov_desc *desc = &dev->u.lov.desc;
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
+ *eof = 1;
+ return snprintf(page, count, "%u\n", desc->ld_active_tgt_count);
}
int rd_target(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
- struct obd_device* dev = (struct obd_device*)data;
- int len = 0, i = 0;
- struct lov_obd* lov = &dev->u.lov;
- struct lov_tgt_desc* tgts = lov->tgts;
- while (i < lov->desc.ld_tgt_count) {
- len += snprintf(&page[len], count - len, "%d: %s %sACTIVE\n",
- i, tgts->uuid, tgts->active ? "" : "IN");
- i++;
- tgts++;
+ struct obd_device *dev = (struct obd_device*) data;
+ int len = 0, i;
+ struct lov_obd *lov = &dev->u.lov;
+ struct lov_tgt_desc *tgts = lov->tgts;
+
+ for (i = 0; i < lov->desc.ld_tgt_count; i++, tgts++) {
+ int cur;
+ cur = snprintf(&page[len], count, "%d: %s %sACTIVE\n",
+ i, tgts->uuid.uuid, tgts->active ? "" : "IN");
+ len += cur;
+ count -= cur;
}
+ *eof = 1;
return len;
}
-int rd_mdc(char* page, char **start, off_t off, int count, int *eof, void *data)
+int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data)
{
- struct obd_device* dev = (struct obd_device*)data;
- int len = 0;
- struct lov_obd* lov = &dev->u.lov;
- len += snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid);
- return len;
-}
+ struct obd_device *dev = (struct obd_device*) data;
+ struct lov_obd *lov = &dev->u.lov;
-struct lprocfs_vars status_var_nm_1[] = {
- {"status/uuid", rd_uuid, 0, 0},
- {"status/stripesize",rd_stripesize, 0, 0},
- {"status/stripeoffset",rd_stripeoffset, 0, 0},
- {"status/stripecount",rd_stripecount, 0, 0},
- {"status/stripetype", rd_stripetype, 0, 0},
- {"status/numobd",rd_numobd, 0, 0},
- {"status/activeobd", rd_activeobd, 0, 0},
- {"status/filestotal", rd_filestotal, 0, 0},
- {"status/filesfree", rd_filesfree, 0, 0},
- {"status/filegroups", rd_filegroups, 0, 0},
- {"status/blocksize", rd_blksize, 0, 0},
- {"status/kbytestotal", rd_kbtotal, 0, 0},
- {"status/kbytesfree", rd_kbfree, 0, 0},
- {"status/target_obd", rd_target, 0, 0},
- {"status/target_mdc", rd_mdc, 0, 0},
- {0}
+ *eof = 1;
+ return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid);
+}
+
+struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "stripesize", rd_stripesize, 0, 0 },
+ { "stripeoffset", rd_stripeoffset, 0, 0 },
+ { "stripecount", rd_stripecount, 0, 0 },
+ { "stripetype", rd_stripetype, 0, 0 },
+ { "numobd", rd_numobd, 0, 0 },
+ { "activeobd", rd_activeobd, 0, 0 },
+ { "filestotal", rd_filestotal, 0, 0 },
+ { "filesfree", rd_filesfree, 0, 0 },
+ { "filegroups", rd_filegroups, 0, 0 },
+ { "blocksize", rd_blksize, 0, 0 },
+ { "kbytestotal", rd_kbytestotal, 0, 0 },
+ { "kbytesfree", rd_kbytesfree, 0, 0 },
+ { "target_obd", rd_target, 0, 0 },
+ { "target_mdc", rd_mdc, 0, 0 },
+ { 0 }
};
-int rd_numrefs(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_type* class = (struct obd_type*)data;
-
- return snprintf(page, count, "%d\n", class->typ_refcnt);
-}
-
-struct lprocfs_vars status_class_var[]={
- {"status/num_refs", rd_numrefs, 0, 0},
- {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+ { 0 }
};
+
+#endif /* LPROCFS */
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
*/
#define DEBUG_SUBSYSTEM S_CLASS
-#include <linux/lustre_lite.h>
+#include <linux/obd_class.h>
#include <linux/lprocfs_status.h>
-
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
-
- struct obd_device* temp = (struct obd_device*)data;
- int len = 0;
- len += snprintf(page, count, "%s\n",temp->obd_uuid);
- return len;
-
-
-}
-int rd_blksize(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-int rd_kbtotal(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-int rd_kbfree(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-
-int rd_filestotal(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-int rd_conn_uuid(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- struct client_obd* cli = &temp->u.cli;
- struct obd_import* imp = &cli->cl_import;
- int len = 0;
-
- len += snprintf(page, count, "%s\n",imp->imp_connection->c_remote_uuid);
- return len;
-}
-
-int rd_server_uuid(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- struct client_obd* cli = &temp->u.cli;
- int len = 0;
-
- len += snprintf(page, count, "%s\n",cli->cl_target_uuid);
- return len;
-}
-
-int rd_server_name(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-
-}
-
-struct lprocfs_vars status_var_nm_1[] = {
- {"status/uuid", rd_uuid, 0, 0},
- {"status/blocksize",rd_blksize, 0, 0},
- {"status/kbytestotal",rd_kbtotal, 0, 0},
- {"status/kbytesfree", rd_kbfree, 0, 0},
- {"status/filestotal", rd_filestotal, 0, 0},
- {"status/filesfree", rd_filesfree, 0, 0},
- {"status/filegroups", rd_filegroups, 0, 0},
- {"status/mds_server_uuid", rd_server_uuid, 0, 0},
- {"status/mds_conn_uuid", rd_conn_uuid, 0, 0},
- {0}
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
+
+DEFINE_LPROCFS_STATFS_FCT(rd_blksize, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, obd_self_statfs);
+
+struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "blocksize", rd_blksize, 0, 0 },
+ { "kbytestotal", rd_kbytestotal, 0, 0 },
+ { "kbytesfree", rd_kbytesfree, 0, 0 },
+ { "filestotal", rd_filestotal, 0, 0 },
+ { "filesfree", rd_filesfree, 0, 0 },
+ { "filegroups", rd_filegroups, 0, 0 },
+ { "mds_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
+ { "mds_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 },
+ { 0 }
};
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_type* class = (struct obd_type*)data;
- int len = 0;
- len += snprintf(page, count, "%d\n", class->typ_refcnt);
- return len;
-}
-struct lprocfs_vars status_class_var[] = {
- {"status/num_refs", rd_numrefs, 0, 0},
- {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+ { 0 }
};
+
+#endif /* LPROCFS */
+
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.sf.net/projects/lustre/
*
* You should have received a copy of the GNU General Public License
* along with Lustre; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
*/
#define EXPORT_SYMTAB
#include <linux/obd_class.h>
#include <linux/lustre_mds.h>
+extern struct semaphore mdc_sem;
+
static int mdc_reint(struct ptlrpc_request *request, int level)
{
int rc;
+ __u32 *opcodeptr = lustre_msg_buf(request->rq_reqmsg, 0);
+
request->rq_level = level;
+ if (!(*opcodeptr == REINT_SETATTR))
+ mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+
rc = ptlrpc_queue_wait(request);
+ if (!(*opcodeptr == REINT_SETATTR))
+ mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
if (rc) {
- CERROR("error in handling %d\n", rc);
+ CDEBUG(D_INFO, "error in handling %d\n", rc);
} else {
/* For future resend/replays. */
- u32 *opcodeptr = lustre_msg_buf(request->rq_reqmsg, 0);
*opcodeptr |= REINT_REPLAYING;
}
return rc;
}
-int mdc_setattr(struct lustre_handle *conn,
- struct inode *inode, struct iattr *iattr,
+int mdc_setattr(struct lustre_handle *conn, struct inode *inode,
+ struct iattr *iattr, void *ea, int ealen,
struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
struct mds_rec_setattr *rec;
- int rc, size = sizeof(*rec);
+ int rc, bufcount = 1, size[2] = {sizeof(*rec), ealen};
ENTRY;
- req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 1, &size,
- NULL);
+ if (ealen > 0)
+ bufcount = 2;
+
+ req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount,
+ size, NULL);
if (!req)
RETURN(-ENOMEM);
- mds_setattr_pack(req, 0, inode, iattr, NULL, 0);
+ /* XXX FIXME bug 249 */
+ req->rq_request_portal = MDS_GETATTR_PORTAL;
+
+ mds_setattr_pack(req, inode, iattr, ea, ealen);
- size = sizeof(struct mds_body);
- req->rq_replen = lustre_msg_size(1, &size);
+ size[0] = sizeof(struct mds_body);
+ req->rq_replen = lustre_msg_size(1, size);
rc = mdc_reint(req, LUSTRE_CONN_FULL);
*request = req;
- if (rc == -ERESTARTSYS )
+ if (rc == -ERESTARTSYS)
rc = 0;
RETURN(rc);
goto resend;
}
- mdc_store_inode_generation(req, 0, 0);
+ if (!rc)
+ mdc_store_inode_generation(req, 0, 0);
*request = req;
RETURN(rc);
struct inode *child, __u32 mode, const char *name, int namelen,
struct ptlrpc_request **request)
{
- struct ptlrpc_request *req;
+ struct obd_device *obddev = class_conn2obd(conn);
+ struct ptlrpc_request *req = *request;
int rc, size[2] = {sizeof(struct mds_rec_unlink), namelen + 1};
ENTRY;
- req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size, NULL);
+ LASSERT(req == NULL);
+
+ req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
+ NULL);
if (!req)
RETURN(-ENOMEM);
-
- mds_unlink_pack(req, 0, dir, child, mode, name, namelen);
+ *request = req;
size[0] = sizeof(struct mds_body);
- req->rq_replen = lustre_msg_size(1, size);
+ size[1] = obddev->u.cli.cl_max_mds_easize;
+ req->rq_replen = lustre_msg_size(2, size);
+
+ mds_unlink_pack(req, 0, dir, child, mode, name, namelen);
rc = mdc_reint(req, LUSTRE_CONN_FULL);
- *request = req;
if (rc == -ERESTARTSYS)
rc = 0;
-
RETURN(rc);
}
int mdc_link(struct lustre_handle *conn,
- struct dentry *src, struct inode *dir, const char *name,
+ struct inode *src, struct inode *dir, const char *name,
int namelen, struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
int rc, size[2] = {sizeof(struct mds_rec_link), namelen + 1};
ENTRY;
- req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size, NULL);
+ req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
+ NULL);
if (!req)
RETURN(-ENOMEM);
- mds_link_pack(req, 0, src->d_inode, dir, name, namelen);
+ mds_link_pack(req, 0, src, dir, name, namelen);
size[0] = sizeof(struct mds_body);
req->rq_replen = lustre_msg_size(1, size);
rc = mdc_reint(req, LUSTRE_CONN_FULL);
*request = req;
- if (rc == -ERESTARTSYS )
+ if (rc == -ERESTARTSYS)
rc = 0;
RETURN(rc);
newlen + 1};
ENTRY;
- req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 3, size, NULL);
+ req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 3, size,
+ NULL);
if (!req)
RETURN(-ENOMEM);
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.sf.net/projects/lustre/
*
* You should have received a copy of the GNU General Public License
* along with Lustre; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
*/
#define EXPORT_SYMTAB
#define DEBUG_SUBSYSTEM S_MDC
#include <linux/module.h>
+#include <linux/pagemap.h>
#include <linux/miscdevice.h>
#include <linux/lustre_mds.h>
#include <linux/lustre_lite.h>
#define REQUEST_MINOR 244
extern int mds_queue_req(struct ptlrpc_request *);
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
+struct mdc_rpc_lock mdc_rpc_lock;
+EXPORT_SYMBOL(mdc_rpc_lock);
/* Helper that implements most of mdc_getstatus and signal_completed_replay. */
static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
body = lustre_msg_buf(req->rq_reqmsg, 0);
req->rq_level = level;
req->rq_replen = lustre_msg_size(1, &size);
-
+
mds_pack_req_body(req);
req->rq_reqmsg->flags |= msg_flags;
+ mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
rc = ptlrpc_queue_wait(req);
+ mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
if (!rc) {
body = lustre_msg_buf(req->rq_repmsg, 0);
size[0] = 512;
size[1] = 8192;
req->rq_replen = lustre_msg_size(2, size);
-
+ mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
rc = ptlrpc_queue_wait(req);
+ mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
out:
RETURN(rc);
if (!req)
GOTO(out, rc = -ENOMEM);
+ /* XXX FIXME bug 249 */
+ req->rq_request_portal = MDS_GETATTR_PORTAL;
+
body = lustre_msg_buf(req->rq_reqmsg, 0);
ll_ino2fid(&body->fid1, ino, 0, type);
body->valid = valid;
req->rq_replen = lustre_msg_size(bufcount, size);
mds_pack_req_body(req);
+ mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
rc = ptlrpc_queue_wait(req);
-
+ mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
if (!rc) {
body = lustre_msg_buf(req->rq_repmsg, 0);
mds_unpack_body(body);
CDEBUG(D_NET, "mode: %o\n", body->mode);
}
- EXIT;
+ GOTO(out, rc);
out:
*request = req;
return rc;
req->rq_replen = lustre_msg_size(bufcount, size);
mds_pack_req_body(req);
+ mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
rc = ptlrpc_queue_wait(req);
-
+ mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
if (!rc) {
body = lustre_msg_buf(req->rq_repmsg, 0);
mds_unpack_body(body);
return rc;
}
-void d_delete_aliases(struct inode *inode)
+/* This should be called with both the request and the reply still packed. */
+void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
+ int repoff)
{
- struct dentry *dentry = NULL;
- struct list_head *tmp;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- ENTRY;
-
- spin_lock(&dcache_lock);
- list_for_each(tmp, &inode->i_dentry) {
- dentry = list_entry(tmp, struct dentry, d_alias);
-
- list_del_init(&dentry->d_hash);
- list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
- }
+ struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff);
+ struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff);
- spin_unlock(&dcache_lock);
- EXIT;
+ memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
+ DEBUG_REQ(D_HA, req, "storing generation %x for ino "LPD64,
+ rec->cr_replayfid.generation, rec->cr_replayfid.id);
}
static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
- void *data, __u32 data_len, int flag)
+ void *data, int flag)
{
int rc;
struct lustre_handle lockh;
ENTRY;
+
switch (flag) {
case LDLM_CB_BLOCKING:
ldlm_lock2handle(lock, &lockh);
break;
case LDLM_CB_CANCELING: {
/* Invalidate all dentries associated with this inode */
- struct inode *inode;
+ struct inode *inode = lock->l_data;
LASSERT(data != NULL);
- LASSERT(data_len == sizeof(*inode));
/* XXX what tells us that 'data' is a valid inode at all?
* we should probably validate the lock handle first?
*/
- inode = igrab(data);
+
+ inode = igrab(inode);
if (inode == NULL) /* inode->i_state & I_FREEING */
break;
}
if (inode != inode->i_sb->s_root->d_inode)
- d_delete_aliases(inode);
+ d_unhash_aliases(inode);
iput(inode);
break;
RETURN(0);
}
-/* This should be called with both the request and the reply still packed. */
-void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
- int repoff)
-{
- struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff);
- struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff);
-
- memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
- DEBUG_REQ(D_HA, req, "storing generation %x for ino "LPD64,
- rec->cr_replayfid.generation, rec->cr_replayfid.id);
-}
-
/* We always reserve enough space in the reply packet for a stripe MD, because
* we don't know in advance the file type.
*
{
struct ptlrpc_request *req;
struct obd_device *obddev = class_conn2obd(conn);
- __u64 res_id[RES_NAME_SIZE] = {dir->i_ino, (__u64)dir->i_generation};
+ struct ldlm_res_id res_id =
+ { .name = {dir->i_ino, dir->i_generation} };
int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
int rc, flags = LDLM_FL_HAS_INTENT;
int repsize[3] = {sizeof(struct ldlm_reply),
sizeof(struct mds_body),
obddev->u.cli.cl_max_mds_easize};
+ struct mdc_unlink_data *d = data;
struct ldlm_reply *dlm_rep;
struct ldlm_intent *lit;
struct ldlm_request *lockreq;
LDLM_DEBUG_NOLOCK("mdsintent %s parent dir %lu",
ldlm_it2str(it->it_op), dir->i_ino);
- if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
- switch (it->it_op) {
- case IT_MKDIR:
- it->it_mode |= S_IFDIR;
- break;
- case (IT_CREAT|IT_OPEN):
- case IT_CREAT:
- it->it_mode |= S_IFREG;
- break;
- case IT_SYMLINK:
- it->it_mode |= S_IFLNK;
- break;
- }
+ if (it->it_op & IT_OPEN) {
+ it->it_mode |= S_IFREG;
it->it_mode &= ~current->fs->umask;
size[2] = sizeof(struct mds_rec_create);
size[3] = de->d_name.len + 1;
- size[4] = tgtlen + 1;
- req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
- size, NULL);
- if (!req)
- RETURN(-ENOMEM);
-
- /* pack the intent */
- lit = lustre_msg_buf(req->rq_reqmsg, 1);
- lit->opc = NTOH__u64((__u64)it->it_op);
-
- /* pack the intended request */
- mds_create_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
- current->fsgid, CURRENT_TIME, de->d_name.name,
- de->d_name.len, tgt, tgtlen);
- req->rq_replen = lustre_msg_size(3, repsize);
- } else if (it->it_op == IT_RENAME2) {
- struct dentry *old_de = it->it_data;
-
- size[2] = sizeof(struct mds_rec_rename);
- size[3] = old_de->d_name.len + 1;
- size[4] = de->d_name.len + 1;
- req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
- size, NULL);
- if (!req)
- RETURN(-ENOMEM);
-
- /* pack the intent */
- lit = lustre_msg_buf(req->rq_reqmsg, 1);
- lit->opc = NTOH__u64((__u64)it->it_op);
-
- /* pack the intended request */
- mds_rename_pack(req, 2, old_de->d_parent->d_inode, dir,
- old_de->d_name.name, old_de->d_name.len,
- de->d_name.name, de->d_name.len);
- req->rq_replen = lustre_msg_size(3, repsize);
- } else if (it->it_op == IT_LINK2) {
- struct dentry *old_de = it->it_data;
-
- size[2] = sizeof(struct mds_rec_link);
- size[3] = de->d_name.len + 1;
req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
size, NULL);
if (!req)
RETURN(-ENOMEM);
+ req->rq_flags |= PTL_RPC_FL_REPLAY;
+
/* pack the intent */
lit = lustre_msg_buf(req->rq_reqmsg, 1);
lit->opc = NTOH__u64((__u64)it->it_op);
/* pack the intended request */
- mds_link_pack(req, 2, old_de->d_inode, dir,
- de->d_name.name, de->d_name.len);
+ mds_open_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
+ current->fsgid, CURRENT_TIME, it->it_flags,
+ de->d_name.name, de->d_name.len, tgt, tgtlen);
req->rq_replen = lustre_msg_size(3, repsize);
- } else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) {
+ } else if (it->it_op & IT_UNLINK) {
size[2] = sizeof(struct mds_rec_unlink);
- size[3] = de->d_name.len + 1;
+ size[3] = d->unl_len + 1;
req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
size, NULL);
if (!req)
lit->opc = NTOH__u64((__u64)it->it_op);
/* pack the intended request */
- mds_unlink_pack(req, 2, dir, NULL,
- it->it_op == IT_UNLINK ? S_IFREG : S_IFDIR,
- de->d_name.name, de->d_name.len);
-
+ mds_unlink_pack(req, 2, d->unl_dir,
+ d->unl_de, d->unl_mode,
+ d->unl_name, d->unl_len);
req->rq_replen = lustre_msg_size(3, repsize);
- } else if (it->it_op & (IT_GETATTR | IT_RENAME | IT_LINK |
- IT_OPEN | IT_SETATTR | IT_LOOKUP | IT_READLINK)) {
+ } else if (it->it_op & (IT_GETATTR| IT_SETATTR | IT_LOOKUP)) {
+ int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
size[2] = sizeof(struct mds_body);
size[3] = de->d_name.len + 1;
lit->opc = NTOH__u64((__u64)it->it_op);
/* pack the intended request */
- mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
-
+ mds_getattr_pack(req, valid, 2, it->it_flags, dir,
+ de->d_name.name, de->d_name.len);
/* get ready for the reply */
req->rq_replen = lustre_msg_size(3, repsize);
} else if (it->it_op == IT_READDIR) {
/* get ready for the reply */
req->rq_replen = lustre_msg_size(1, repsize);
- } else {
+ } else {
LBUG();
RETURN(-EINVAL);
}
+ mdc_get_rpc_lock(&mdc_rpc_lock, it);
rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id,
lock_type, NULL, 0, lock_mode, &flags,
- ldlm_completion_ast, mdc_blocking_ast, data,
- datalen, lockh);
-
- if (it->it_op != IT_READDIR) {
- /* XXX This should become a lustre_msg flag, but for now... */
- __u32 *opp = lustre_msg_buf(req->rq_reqmsg, 2);
- *opp |= REINT_REPLAYING;
+ ldlm_completion_ast, mdc_blocking_ast, dir, NULL,
+ lockh);
+
+ /* If we successfully created, mark the request so that replay will
+ * do the right thing */
+ if (req->rq_transno) {
+ struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, 2);
+ rec->cr_opcode |= REINT_REPLAYING;
}
-
- if (rc == -ENOENT) {
- /* This can go when we're sure that this can never happen */
- LBUG();
+ /* Similarly, if we're going to replay this request, we don't want to
+ * actually get a lock, just perform the intent. */
+ if (req->rq_transno || (req->rq_flags & PTL_RPC_FL_REPLAY)) {
+ lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
+ lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
}
+
+ dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
+
+ /* This can go when we're sure that this can never happen */
+ LASSERT(rc != -ENOENT);
if (rc == ELDLM_LOCK_ABORTED) {
lock_mode = 0;
memset(lockh, 0, sizeof(*lockh));
- /* rc = 0 */
} else if (rc != 0) {
CERROR("ldlm_cli_enqueue: %d\n", rc);
RETURN(rc);
- } else {
- /* The server almost certainly gave us a lock other than the one
- * that we asked for. If we already have a matching lock, then
- * cancel this one--we don't need two. */
+ } else { /* rc = 0 */
struct ldlm_lock *lock = ldlm_handle2lock(lockh);
struct lustre_handle lockh2;
LASSERT(lock);
+ /* If the server gave us back a different lock mode, we should
+ * fix up our variables. */
+ if (lock->l_req_mode != lock_mode) {
+ ldlm_lock_addref(lockh, lock->l_req_mode);
+ ldlm_lock_decref(lockh, lock_mode);
+ lock_mode = lock->l_req_mode;
+ }
+
+ /* The server almost certainly gave us a lock other than the
+ * one that we asked for. If we already have a matching lock,
+ * then cancel this one--we don't need two. */
LDLM_DEBUG(lock, "matching against this");
memcpy(&lockh2, lockh, sizeof(lockh2));
- if (ldlm_lock_match(NULL, NULL, LDLM_PLAIN, NULL, 0, LCK_NL,
- &lockh2)) {
- /* We already have a lock; cancel the old one */
- ldlm_lock_decref(lockh, lock_mode);
- /* FIXME: bug 563 */
- //ldlm_cli_cancel(lockh);
+ if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL,
+ LDLM_PLAIN, NULL, 0, LCK_NL, &lockh2)) {
+ /* We already have a lock; cancel the new one */
+ ldlm_lock_decref_and_cancel(lockh, lock_mode);
memcpy(lockh, &lockh2, sizeof(lockh2));
}
LDLM_LOCK_PUT(lock);
}
- /* On replay, we don't want the lock granted. */
- lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
- lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
-
- dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
it->it_disposition = (int) dlm_rep->lock_policy_res1;
it->it_status = (int) dlm_rep->lock_policy_res2;
it->it_lock_mode = lock_mode;
it->it_data = req;
- RETURN(0);
+ RETURN(rc);
+}
+
+void mdc_lock_set_inode(struct lustre_handle *lockh, struct inode *inode)
+{
+ struct ldlm_lock *lock = ldlm_handle2lock(lockh);
+ ENTRY;
+
+ LASSERT(lock != NULL);
+ lock->l_data = inode;
+ LDLM_LOCK_PUT(lock);
+ EXIT;
}
int mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
int flags)
{
- __u64 res_id[RES_NAME_SIZE] = {inode->i_ino, inode->i_generation};
+ struct ldlm_res_id res_id =
+ { .name = {inode->i_ino, inode->i_generation} };
struct obd_device *obddev = class_conn2obd(conn);
ENTRY;
- RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags));
+ RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags));
}
-struct replay_open_data {
- struct lustre_handle *fh;
-};
-
static void mdc_replay_open(struct ptlrpc_request *req)
{
- int offset;
- struct replay_open_data *saved;
+ struct lustre_handle old, *file_fh = req->rq_replay_data;
+ struct list_head *tmp;
struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
- if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA)
- offset = 2;
- else
- offset = 1;
-
- saved = lustre_msg_buf(req->rq_reqmsg, offset);
mds_unpack_body(body);
+ memcpy(&old, file_fh, sizeof(old));
CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
- saved->fh->addr, saved->fh->cookie,
- body->handle.addr, body->handle.cookie);
- memcpy(saved->fh, &body->handle, sizeof(body->handle));
+ file_fh->addr, file_fh->cookie, body->handle.addr,
+ body->handle.cookie);
+ memcpy(file_fh, &body->handle, sizeof(body->handle));
+
+ /* A few frames up, ptlrpc_replay holds the lock, so this is safe. */
+ list_for_each(tmp, &req->rq_import->imp_sending_list) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ if (req->rq_reqmsg->opc != MDS_CLOSE)
+ continue;
+ body = lustre_msg_buf(req->rq_reqmsg, 0);
+ if (memcmp(&body->handle, &old, sizeof(old)))
+ continue;
+
+ DEBUG_REQ(D_HA, req, "updating close body with new fh");
+ memcpy(&body->handle, file_fh, sizeof(*file_fh));
+ }
}
-/* If lmm is non-NULL and lmm_size is non-zero, the stripe MD is stored on
- * the MDS. Otherwise, we have already read a copy from the MDS (probably
- * during mdc_enqueue() and we do not need to send it to the MDS again.
- *
- * In the future (when we support the non-intent case) we need to be able
- * to read the stripe MD from the MDS here (need to fix mds_open() too).
- */
-int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
- struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
- struct ptlrpc_request **request)
+void mdc_set_open_replay_data(struct ll_file_data *fd)
{
- struct mds_body *body;
- struct replay_open_data *replay_data;
- int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2;
- struct ptlrpc_request *req;
- ENTRY;
-
- if (lmm_size) {
- bufcount = 3;
- size[2] = size[1]; /* shuffle the replay data along */
- size[1] = lmm_size;
- }
-
- req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_OPEN, bufcount, size,
- NULL);
- if (!req)
- GOTO(out, rc = -ENOMEM);
-
- req->rq_flags |= PTL_RPC_FL_REPLAY;
- body = lustre_msg_buf(req->rq_reqmsg, 0);
-
- ll_ino2fid(&body->fid1, ino, 0, type);
- body->flags = HTON__u32(flags);
- memcpy(&body->handle, fh, sizeof(body->handle));
-
- if (lmm_size) {
- body->flags |= HTON__u32(OBD_MD_FLEASIZE);
- if (lmm) {
- CDEBUG(D_INODE, "sending %u bytes MD for ino "LPU64"\n",
- lmm_size, ino);
- lustre_msg_set_op_flags(req->rq_reqmsg,MDS_OPEN_HAS_EA);
- memcpy(lustre_msg_buf(req->rq_reqmsg,1), lmm, lmm_size);
- }
- }
-
- req->rq_replen = lustre_msg_size(1, size);
-
- rc = ptlrpc_queue_wait(req);
- if (!rc) {
- body = lustre_msg_buf(req->rq_repmsg, 0);
- mds_unpack_body(body);
- memcpy(fh, &body->handle, sizeof(*fh));
-
- /* If open is replayed, we need to fix up the fh. */
- req->rq_replay_cb = mdc_replay_open;
- replay_data = lustre_msg_buf(req->rq_reqmsg, lmm ? 2 : 1);
- replay_data->fh = fh;
- }
-
- EXIT;
- out:
- *request = req;
- return rc;
+ fd->fd_req->rq_replay_cb = mdc_replay_open;
+ fd->fd_req->rq_replay_data = &fd->fd_mdshandle;
}
int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
char *addr, struct ptlrpc_request **request)
{
- struct ptlrpc_connection *connection =
+ struct obd_import *imp = class_conn2cliimp(conn);
+ struct ptlrpc_connection *connection =
client_conn2cli(conn)->cl_import.imp_connection;
struct ptlrpc_request *req = NULL;
struct ptlrpc_bulk_desc *desc = NULL;
struct ptlrpc_bulk_page *bulk = NULL;
struct mds_body *body;
+ unsigned long flags;
int rc, size = sizeof(*body);
ENTRY;
if (desc == NULL)
GOTO(out, rc = -ENOMEM);
- req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_READPAGE, 1, &size,
- NULL);
+ req = ptlrpc_prep_req(imp, MDS_READPAGE, 1, &size, NULL);
if (!req)
GOTO(out2, rc = -ENOMEM);
bulk = ptlrpc_prep_bulk_page(desc);
- bulk->bp_buflen = PAGE_SIZE;
+ if (bulk == NULL)
+ GOTO(out2, rc = -ENOMEM);
+
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ bulk->bp_xid = ++imp->imp_last_bulk_xid;
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+ bulk->bp_buflen = PAGE_CACHE_SIZE;
bulk->bp_buf = addr;
- bulk->bp_xid = req->rq_xid;
+
desc->bd_ptl_ev_hdlr = NULL;
desc->bd_portal = MDS_BULK_PORTAL;
- rc = ptlrpc_register_bulk(desc);
+ rc = ptlrpc_register_bulk_put(desc);
if (rc) {
CERROR("couldn't setup bulk sink: error %d.\n", rc);
GOTO(out2, rc);
}
- mds_readdir_pack(req, offset, ino, type);
+ mds_readdir_pack(req, offset, ino, type, bulk->bp_xid);
req->rq_replen = lustre_msg_size(1, &size);
rc = ptlrpc_queue_wait(req);
req->rq_replen = lustre_msg_size(1, &size);
+ mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
rc = ptlrpc_queue_wait(req);
+ mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
if (rc)
GOTO(out, rc);
static int mdc_attach(struct obd_device *dev, obd_count len, void *data)
{
- return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_init_vars(&lvars);
+ return lprocfs_obd_attach(dev, lvars.obd_vars);
}
static int mdc_detach(struct obd_device *dev)
{
- return lprocfs_dereg_obd(dev);
+ return lprocfs_obd_detach(dev);
}
/* Send a mostly-dummy GETSTATUS request and indicate that we're done replay. */
static int signal_completed_replay(struct obd_import *imp)
{
struct ll_fid fid;
-
+
return send_getstatus(imp, &fid, LUSTRE_CONN_RECOVD, MSG_LAST_REPLAY);
}
int rc;
unsigned long flags;
struct ptlrpc_request *req;
+ struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
ENTRY;
switch(phase) {
case PTLRPC_RECOVD_PHASE_PREPARE:
- ldlm_cli_cancel_unused(imp->imp_obd->obd_namespace,
- NULL, LDLM_FL_LOCAL_ONLY);
+ ldlm_cli_cancel_unused(ns, NULL, LDLM_FL_LOCAL_ONLY);
RETURN(0);
+
+ case PTLRPC_RECOVD_PHASE_NOTCONN:
+ ldlm_namespace_cleanup(ns, 1);
+ ptlrpc_abort_inflight(imp, 0);
+ /* FALL THROUGH */
case PTLRPC_RECOVD_PHASE_RECOVER:
reconnect:
rc = ptlrpc_reconnect_import(imp, MDS_CONNECT, &req);
- /* We were still connected, just go about our business. */
- if (rc == EALREADY)
- GOTO(skip_replay, rc);
+ flags = req->rq_repmsg
+ ? lustre_msg_get_op_flags(req->rq_repmsg)
+ : 0;
+
+ if (rc == -EBUSY && (flags & MSG_CONNECT_RECOVERING))
+ CERROR("reconnect denied by recovery; should retry\n");
if (rc) {
- ptlrpc_req_finished(req);
- RETURN(rc);
- }
-
- /* We can't replay, which might be a problem. */
- if (!(lustre_msg_get_flags(req->rq_repmsg) &
- MSG_REPLAY_IN_PROGRESS)) {
if (phase != PTLRPC_RECOVD_PHASE_NOTCONN) {
- CERROR("can't replay, invalidating\n");
- ldlm_namespace_cleanup(imp->imp_obd->obd_namespace,
- 1);
- ptlrpc_abort_inflight(imp);
+ CERROR("can't reconnect, invalidating\n");
+ ldlm_namespace_cleanup(ns, 1);
+ ptlrpc_abort_inflight(imp, 0);
}
- goto skip_replay;
- }
-
- rc = ptlrpc_replay(imp);
- if (rc)
- RETURN(rc);
-
- rc = ldlm_replay_locks(imp);
- if (rc)
+ ptlrpc_req_finished(req);
RETURN(rc);
+ }
- rc = signal_completed_replay(imp);
- if (rc)
- RETURN(rc);
+ if (flags & MSG_CONNECT_RECOVERING) {
+ /* Replay if they want it. */
+ DEBUG_REQ(D_HA, req, "MDS wants replay");
+ rc = ptlrpc_replay(imp);
+ if (rc)
+ GOTO(check_rc, rc);
+
+ rc = ldlm_replay_locks(imp);
+ if (rc)
+ GOTO(check_rc, rc);
+
+ rc = signal_completed_replay(imp);
+ if (rc)
+ GOTO(check_rc, rc);
+ } else if (flags & MSG_CONNECT_RECONNECT) {
+ DEBUG_REQ(D_HA, req, "reconnecting to MDS\n");
+ /* Nothing else to do here. */
+ } else {
+ DEBUG_REQ(D_HA, req, "evicted: invalidating\n");
+ /* Otherwise, clean everything up. */
+ ldlm_namespace_cleanup(ns, 1);
+ ptlrpc_abort_inflight(imp, 0);
+ }
- skip_replay:
ptlrpc_req_finished(req);
spin_lock_irqsave(&imp->imp_lock, flags);
imp->imp_level = LUSTRE_CONN_FULL;
rc = ptlrpc_resend(imp);
if (rc)
- RETURN(rc);
+ GOTO(check_rc, rc);
RETURN(0);
-
- case PTLRPC_RECOVD_PHASE_NOTCONN:
- ldlm_namespace_cleanup(imp->imp_obd->obd_namespace, 1);
- ptlrpc_abort_inflight(imp);
- goto reconnect;
+ check_rc:
+ /* If we get disconnected in the middle, recovery has probably
+ * failed. Reconnect and find out.
+ */
+ if (rc == -ENOTCONN)
+ goto reconnect;
+ RETURN(rc);
default:
RETURN(-EINVAL);
}
static int mdc_connect(struct lustre_handle *conn, struct obd_device *obd,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
struct obd_import *imp = &obd->u.cli.cl_import;
static int __init ptlrpc_request_init(void)
{
- return class_register_type(&mdc_obd_ops, status_class_var,
+ struct lprocfs_static_vars lvars;
+ mdc_init_rpc_lock(&mdc_rpc_lock);
+ lprocfs_init_vars(&lvars);
+ return class_register_type(&mdc_obd_ops, lvars.module_vars,
LUSTRE_MDC_NAME);
}
class_unregister_type(LUSTRE_MDC_NAME);
}
-MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Metadata Client v1.0");
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Metadata Client");
MODULE_LICENSE("GPL");
-EXPORT_SYMBOL(d_delete_aliases);
EXPORT_SYMBOL(mdc_getstatus);
EXPORT_SYMBOL(mdc_getlovinfo);
EXPORT_SYMBOL(mdc_enqueue);
EXPORT_SYMBOL(mdc_readpage);
EXPORT_SYMBOL(mdc_setattr);
EXPORT_SYMBOL(mdc_close);
-EXPORT_SYMBOL(mdc_open);
+EXPORT_SYMBOL(mdc_lock_set_inode);
+EXPORT_SYMBOL(mdc_set_open_replay_data);
EXPORT_SYMBOL(mdc_store_inode_generation);
modulefs_DATA = mds.o
EXTRA_PROGRAMS = mds
-LINX= mds_updates.c simple.c target.c
+LINX= mds_updates.c mds_open.c simple.c target.c
mds_updates.c:
test -e mds_updates.c || ln -sf $(top_srcdir)/lib/mds_updates.c
* lustre/mds/handler.c
* Lustre Metadata Server (mds) request handler
*
- * Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
* Author: Peter Braam <braam@clusterfs.com>
* Author: Andreas Dilger <adilger@clusterfs.com>
* Author: Phil Schwan <phil@clusterfs.com>
#include <linux/locks.h>
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
#include <linux/buffer_head.h>
+#include <linux/workqueue.h>
#endif
#include <linux/obd_lov.h>
#include <linux/lustre_mds.h>
#include <linux/lustre_fsfilt.h>
#include <linux/lprocfs_status.h>
-static kmem_cache_t *mds_file_cache;
+kmem_cache_t *mds_file_cache;
extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count,
- obd_uuid_t *uuidarray);
+ struct obd_uuid *uuidarray);
extern int mds_get_lovdesc(struct mds_obd *obd, struct lov_desc *desc);
extern void mds_start_transno(struct mds_obd *mds);
extern int mds_finish_transno(struct mds_obd *mds, void *handle,
struct ptlrpc_request *req, int rc);
static int mds_cleanup(struct obd_device * obddev);
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
-
inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req)
{
return &req->rq_export->exp_obd->u.mds;
/* Assumes caller has already pushed into the kernel filesystem context */
static int mds_sendpage(struct ptlrpc_request *req, struct file *file,
- __u64 offset)
+ __u64 offset, __u64 xid)
{
struct ptlrpc_bulk_desc *desc;
struct ptlrpc_bulk_page *bulk;
if (bulk == NULL)
GOTO(cleanup_bulk, rc = -ENOMEM);
- OBD_ALLOC(buf, PAGE_SIZE);
+ OBD_ALLOC(buf, PAGE_CACHE_SIZE);
if (buf == NULL)
GOTO(cleanup_bulk, rc = -ENOMEM);
- rc = fsfilt_readpage(req->rq_export->exp_obd, file, buf, PAGE_SIZE,
- (loff_t *)&offset);
+ CDEBUG(D_EXT2, "reading %lu@"LPU64" from dir %lu (size %llu)\n",
+ PAGE_CACHE_SIZE, offset, file->f_dentry->d_inode->i_ino,
+ file->f_dentry->d_inode->i_size);
+ rc = fsfilt_readpage(req->rq_export->exp_obd, file, buf,
+ PAGE_CACHE_SIZE, (loff_t *)&offset);
- if (rc != PAGE_SIZE)
+ if (rc != PAGE_CACHE_SIZE)
GOTO(cleanup_buf, rc = -EIO);
- bulk->bp_xid = req->rq_xid;
+ bulk->bp_xid = xid;
bulk->bp_buf = buf;
- bulk->bp_buflen = PAGE_SIZE;
+ bulk->bp_buflen = PAGE_CACHE_SIZE;
desc->bd_ptl_ev_hdlr = NULL;
desc->bd_portal = MDS_BULK_PORTAL;
- rc = ptlrpc_send_bulk(desc);
+ rc = ptlrpc_bulk_put(desc);
if (rc)
GOTO(cleanup_buf, rc);
return rc;
}
-/*
- * Look up a named entry in a directory, and get an LDLM lock on it.
- * 'dir' is a inode for which an LDLM lock has already been taken.
- *
- * If we do not need an exclusive or write lock on this entry (e.g.
- * a read lock for attribute lookup only) then we do not hold the
- * directory semaphore on return. It is up to the caller to know what
- * type of lock it is getting, and clean up appropriately.
- */
-struct dentry *mds_name2locked_dentry(struct obd_device *obd,
- struct dentry *dir, struct vfsmount **mnt,
- char *name, int namelen, int lock_mode,
- struct lustre_handle *lockh,
- int dir_lock_mode)
-{
- struct dentry *dchild;
- int flags = 0, rc;
- __u64 res_id[3] = {0};
- ENTRY;
-
- down(&dir->d_inode->i_sem);
- dchild = lookup_one_len(name, dir, namelen);
- if (IS_ERR(dchild)) {
- CERROR("child lookup error %ld\n", PTR_ERR(dchild));
- up(&dir->d_inode->i_sem);
- LBUG();
- RETURN(dchild);
- }
- if (dir_lock_mode != LCK_EX && dir_lock_mode != LCK_PW) {
- up(&dir->d_inode->i_sem);
- ldlm_lock_decref(lockh, dir_lock_mode);
- }
-
- if (lock_mode == 0 || !dchild->d_inode)
- RETURN(dchild);
-
- res_id[0] = dchild->d_inode->i_ino;
- res_id[1] = dchild->d_inode->i_generation;
- rc = ldlm_match_or_enqueue(NULL, NULL, obd->obd_namespace, NULL,
- res_id, LDLM_PLAIN, NULL, 0, lock_mode,
- &flags, ldlm_completion_ast,
- mds_blocking_ast, NULL, 0, lockh);
- if (rc != ELDLM_OK) {
- l_dput(dchild);
- up(&dir->d_inode->i_sem);
- RETURN(ERR_PTR(-ENOLCK)); /* XXX translate ldlm code */
- }
-
- RETURN(dchild);
-}
-
+/* only valid locked dentries or errors should be returned */
struct dentry *mds_fid2locked_dentry(struct obd_device *obd, struct ll_fid *fid,
struct vfsmount **mnt, int lock_mode,
struct lustre_handle *lockh)
{
struct mds_obd *mds = &obd->u.mds;
struct dentry *de = mds_fid2dentry(mds, fid, mnt), *retval = de;
+ struct ldlm_res_id res_id = { .name = {0} };
int flags = 0, rc;
- __u64 res_id[3] = {0};
ENTRY;
if (IS_ERR(de))
RETURN(de);
- res_id[0] = de->d_inode->i_ino;
- res_id[1] = de->d_inode->i_generation;
- rc = ldlm_match_or_enqueue(NULL, NULL, obd->obd_namespace, NULL,
- res_id, LDLM_PLAIN, NULL, 0, lock_mode,
- &flags, ldlm_completion_ast,
- mds_blocking_ast, NULL, 0, lockh);
+ res_id.name[0] = de->d_inode->i_ino;
+ res_id.name[1] = de->d_inode->i_generation;
+ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+ res_id, LDLM_PLAIN, NULL, 0, lock_mode,
+ &flags, ldlm_completion_ast,
+ mds_blocking_ast, NULL, NULL, lockh);
if (rc != ELDLM_OK) {
l_dput(de);
retval = ERR_PTR(-ENOLCK); /* XXX translate ldlm code */
#define DCACHE_DISCONNECTED DCACHE_NFSD_DISCONNECTED
#endif
+
+
/* Look up an entry by inode number. */
+/* this function ONLY returns valid dget'd dentries with an initialized inode
+ or errors */
struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
struct vfsmount **mnt)
{
return result;
}
+static void mds_abort_recovery(void *data);
+
/* Establish a connection to the MDS.
*
* This will set up an export structure for the client to hold state data
* on the server, etc.
*/
static int mds_connect(struct lustre_handle *conn, struct obd_device *obd,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
struct obd_export *exp;
struct mds_export_data *med;
struct mds_client_data *mcd;
- struct list_head *p;
+ struct mds_obd *mds = &obd->u.mds;
int rc;
ENTRY;
if (!conn || !obd || !cluuid)
RETURN(-EINVAL);
- /* lctl gets a backstage, all-access pass. */
- if (!strcmp(cluuid, "OBD_CLASS_UUID"))
- goto dont_check_exports;
-
- spin_lock(&obd->obd_dev_lock);
- list_for_each(p, &obd->obd_exports) {
- exp = list_entry(p, struct obd_export, exp_obd_chain);
- mcd = exp->exp_mds_data.med_mcd;
- if (!mcd) {
- CERROR("FYI: NULL mcd - simultaneous connects\n");
- continue;
- }
- if (!memcmp(cluuid, mcd->mcd_uuid, sizeof mcd->mcd_uuid)) {
- spin_unlock(&obd->obd_dev_lock);
- LASSERT(exp->exp_obd == obd);
-
- RETURN(target_handle_reconnect(conn, exp, cluuid));
- }
- }
- spin_unlock(&obd->obd_dev_lock);
-
- if (obd->u.mds.mds_recoverable_clients != 0) {
- CERROR("denying connection for new client %s: in recovery\n",
- cluuid);
- RETURN(-EBUSY);
- }
+ /* Check for aborted recovery. */
+ spin_lock_bh(&mds->mds_processing_task_lock);
+ if (obd->obd_flags & OBD_ABORT_RECOVERY)
+ mds_abort_recovery(mds);
+ spin_unlock_bh(&mds->mds_processing_task_lock);
- dont_check_exports:
/* XXX There is a small race between checking the list and adding a
* new connection for the same UUID, but the real threat (list
* corruption when multiple different clients connect) is solved.
inline int mds_close_mfd(struct mds_file_data *mfd, struct mds_export_data *med)
{
struct file *file = mfd->mfd_file;
+ int rc;
+ struct dentry *de = NULL;
LASSERT(file->private_data == mfd);
+ LASSERT(mfd->mfd_servercookie != DEAD_HANDLE_MAGIC);
+
list_del(&mfd->mfd_list);
mfd->mfd_servercookie = DEAD_HANDLE_MAGIC;
kmem_cache_free(mds_file_cache, mfd);
- return filp_close(file, 0);
+ if (file->f_dentry->d_parent)
+ de = dget(file->f_dentry->d_parent);
+ rc = filp_close(file, 0);
+ if (de)
+ l_dput(de);
+ RETURN(rc);
}
static int mds_disconnect(struct lustre_handle *conn)
memcpy(desc, &mds->mds_lov_desc, sizeof *desc);
lov_packdesc(desc);
tgt_count = le32_to_cpu(desc->ld_tgt_count);
- if (tgt_count * sizeof(obd_uuid_t) > streq->repbuf) {
+ if (tgt_count * sizeof(struct obd_uuid) > streq->repbuf) {
CERROR("too many targets, enlarge client buffers\n");
req->rq_status = -ENOSPC;
RETURN(0);
}
int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
- void *data, __u32 data_len, int flag)
+ void *data, int flag)
{
int do_ast;
ENTRY;
/* XXX layering violation! -phil */
l_lock(&lock->l_resource->lr_namespace->ns_lock);
+ /* Get this: if mds_blocking_ast is racing with ldlm_intent_policy,
+ * such that mds_blocking_ast is called just before l_i_p takes the
+ * ns_lock, then by the time we get the lock, we might not be the
+ * correct blocking function anymore. So check, and return early, if
+ * so. */
+ if (lock->l_blocking_ast != mds_blocking_ast) {
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+ RETURN(0);
+ }
+
lock->l_flags |= LDLM_FL_CBPENDING;
do_ast = (!lock->l_readers && !lock->l_writers);
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
rc = ldlm_cli_cancel(&lockh);
if (rc < 0)
CERROR("ldlm_cli_cancel: %d\n", rc);
- } else
- LDLM_DEBUG(lock, "Lock still has references, will be"
+ } else {
+ LDLM_DEBUG(lock, "Lock still has references, will be "
"cancelled later");
+ }
RETURN(0);
}
-int mds_pack_md(struct mds_obd *mds, struct ptlrpc_request *req,
+int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg,
int offset, struct mds_body *body, struct inode *inode)
{
+ struct mds_obd *mds = &obd->u.mds;
struct lov_mds_md *lmm;
- int lmm_size = req->rq_repmsg->buflens[offset];
+ int lmm_size = msg->buflens[offset];
int rc;
+ ENTRY;
if (lmm_size == 0) {
- CDEBUG(D_INFO, "no space reserved for inode %lu MD\n", inode->i_ino);
+ CDEBUG(D_INFO, "no space reserved for inode %lu MD\n",
+ inode->i_ino);
RETURN(0);
}
- lmm = lustre_msg_buf(req->rq_repmsg, offset);
+ lmm = lustre_msg_buf(msg, offset);
/* I don't really like this, but it is a sanity check on the client
* MD request. However, if the client doesn't know how much space
* discarded right after unpacking, and the LOV can figure out the
* size itself from the ost count.
*/
- if ((rc = fsfilt_get_md(req->rq_export->exp_obd, inode,
- lmm, lmm_size)) < 0) {
- CDEBUG(D_INFO, "No md for ino %lu: rc = %d\n", inode->i_ino,rc);
+ if ((rc = fsfilt_get_md(obd, inode, lmm, lmm_size)) < 0) {
+ CDEBUG(D_INFO, "No md for ino %lu: rc = %d\n",
+ inode->i_ino, rc);
} else if (rc > 0) {
body->valid |= OBD_MD_FLEASIZE;
rc = 0;
return rc;
}
-static int mds_getattr_internal(struct mds_obd *mds, struct dentry *dentry,
+static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry,
struct ptlrpc_request *req,
struct mds_body *reqbody, int reply_off)
{
mds_pack_inode2body(body, inode);
if (S_ISREG(inode->i_mode) && reqbody->valid & OBD_MD_FLEASIZE) {
- rc = mds_pack_md(mds, req, reply_off + 1, body, inode);
+ rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1,
+ body, inode);
} else if (S_ISLNK(inode->i_mode) && reqbody->valid & OBD_MD_LINKNAME) {
char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1);
int len = req->rq_repmsg->buflens[reply_off + 1];
return(rc);
}
-static int mds_getattr_name(int offset, struct ptlrpc_request *req)
+static int mds_getattr_name(int offset, struct ptlrpc_request *req,
+ struct lustre_handle *child_lockh)
{
+ struct ldlm_intent *it = lustre_msg_buf(req->rq_reqmsg, 1);
+ int lock_mode;
struct mds_obd *mds = mds_req2mds(req);
struct obd_device *obd = req->rq_export->exp_obd;
struct obd_run_ctxt saved;
struct mds_body *body;
struct dentry *de = NULL, *dchild = NULL;
struct inode *dir;
- struct lustre_handle lockh;
- char *name;
- int namelen, flags = 0, lock_mode, rc = 0;
struct obd_ucred uc;
- __u64 res_id[3] = {0, 0, 0};
+ struct ldlm_res_id child_res_id = { .name = {0} };
+ struct lustre_handle parent_lockh;
+ int namelen, flags = 0, rc = 0;
+ char *name;
ENTRY;
LASSERT(!strcmp(obd->obd_type->typ_name, "mds"));
uc.ouc_fsuid = body->fsuid;
uc.ouc_fsgid = body->fsgid;
uc.ouc_cap = body->capability;
+ uc.ouc_suppgid = body->suppgid;
push_ctxt(&saved, &mds->mds_ctxt, &uc);
- de = mds_fid2dentry(mds, &body->fid1, NULL);
- if (IS_ERR(de)) {
+ /* Step 1: Lookup/lock parent */
+ de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_PR,
+ &parent_lockh);
+ if (IS_ERR(de))
GOTO(out_pre_de, rc = PTR_ERR(de));
- }
-
dir = de->d_inode;
- CDEBUG(D_INODE, "parent ino %lu, name %*s\n", dir->i_ino,namelen,name);
+ LASSERT(dir);
- lock_mode = LCK_PR;
- res_id[0] = dir->i_ino;
- res_id[1] = dir->i_generation;
-
- rc = ldlm_lock_match(obd->obd_namespace, res_id, LDLM_PLAIN,
- NULL, 0, lock_mode, &lockh);
- if (rc == 0) {
- LDLM_DEBUG_NOLOCK("enqueue res "LPU64, res_id[0]);
- rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
- res_id, LDLM_PLAIN, NULL, 0, lock_mode,
- &flags, ldlm_completion_ast,
- mds_blocking_ast, NULL, 0, &lockh);
- if (rc != ELDLM_OK) {
- CERROR("lock enqueue: err: %d\n", rc);
- GOTO(out_create_de, rc = -EIO);
- }
- }
- ldlm_lock_dump_handle(D_OTHER, &lockh);
+ CDEBUG(D_INODE, "parent ino %lu, name %*s\n", dir->i_ino,namelen,name);
- down(&dir->i_sem);
+ /* Step 2: Lookup child */
dchild = lookup_one_len(name, de, namelen - 1);
- up(&dir->i_sem);
if (IS_ERR(dchild)) {
CDEBUG(D_INODE, "child lookup error %ld\n", PTR_ERR(dchild));
- GOTO(out_create_dchild, rc = PTR_ERR(dchild));
+ GOTO(out_step_1, rc = PTR_ERR(dchild));
} else if (dchild->d_inode == NULL) {
- GOTO(out_create_dchild, rc = -ENOENT);
+ GOTO(out_step_2, rc = -ENOENT);
+ }
+
+ /* Step 3: Lock child */
+ if (it->opc == IT_SETATTR)
+ lock_mode = LCK_PW;
+ else
+ lock_mode = LCK_PR;
+ child_res_id.name[0] = dchild->d_inode->i_ino;
+ child_res_id.name[1] = dchild->d_inode->i_generation;
+ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+ child_res_id, LDLM_PLAIN, NULL, 0, lock_mode,
+ &flags, ldlm_completion_ast, mds_blocking_ast,
+ NULL, NULL, child_lockh);
+ if (rc != ELDLM_OK) {
+ CERROR("ldlm_cli_enqueue: %d\n", rc);
+ GOTO(out_step_2, rc = -EIO);
}
if (req->rq_repmsg == NULL)
mds_getattr_pack_msg(req, dchild->d_inode, offset);
- rc = mds_getattr_internal(mds, dchild, req, body, offset);
-
- EXIT;
-out_create_dchild:
+ rc = mds_getattr_internal(obd, dchild, req, body, offset);
+ if (rc)
+ GOTO(out_step_3, rc);
+ GOTO(out_step_2, rc); /* returns the lock to the client */
+ out_step_3:
+ ldlm_lock_decref(child_lockh, LCK_PR);
+ out_step_2:
l_dput(dchild);
- ldlm_lock_decref(&lockh, lock_mode);
-out_create_de:
+ out_step_1:
+ ldlm_lock_decref(&parent_lockh, LCK_PR);
l_dput(de);
-out_pre_de:
+ out_pre_de:
req->rq_status = rc;
pop_ctxt(&saved, &mds->mds_ctxt, &uc);
return rc;
static int mds_getattr(int offset, struct ptlrpc_request *req)
{
struct mds_obd *mds = mds_req2mds(req);
+ struct obd_device *obd = req->rq_export->exp_obd;
struct obd_run_ctxt saved;
struct dentry *de;
struct mds_body *body;
rc = mds_getattr_pack_msg(req, de->d_inode, offset);
- req->rq_status = mds_getattr_internal(mds, de, req, body, 0);
+ req->rq_status = mds_getattr_internal(obd, de, req, body, 0);
l_dput(de);
- EXIT;
+ GOTO(out_pop, rc);
out_pop:
pop_ctxt(&saved, &mds->mds_ctxt, &uc);
return rc;
RETURN(mfd);
}
+#if 0
+
static int mds_store_md(struct mds_obd *mds, struct ptlrpc_request *req,
int offset, struct mds_body *body, struct inode *inode)
{
RETURN(rc);
}
-static int mds_open(struct ptlrpc_request *req)
-{
- struct mds_obd *mds = mds_req2mds(req);
- struct mds_body *body;
- struct mds_export_data *med;
- struct mds_file_data *mfd;
- struct dentry *de;
- struct file *file;
- struct vfsmount *mnt;
- __u32 flags;
- struct list_head *tmp;
- int rc, size = sizeof(*body);
- ENTRY;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) {
- CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n");
- req->rq_status = -ENOMEM;
- RETURN(-ENOMEM);
- }
-
- rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
- if (rc) {
- CERROR("mds: pack error: rc = %d\n", rc);
- req->rq_status = rc;
- RETURN(rc);
- }
-
- body = lustre_msg_buf(req->rq_reqmsg, 0);
-
- /* was this animal open already and the client lost the reply? */
- /* XXX need some way to detect a reopen, to avoid locked list walks */
- med = &req->rq_export->exp_mds_data;
- spin_lock(&med->med_open_lock);
- list_for_each(tmp, &med->med_open_head) {
- mfd = list_entry(tmp, typeof(*mfd), mfd_list);
- if (!memcmp(&mfd->mfd_clienthandle, &body->handle,
- sizeof(mfd->mfd_clienthandle)) &&
- body->fid1.id == mfd->mfd_file->f_dentry->d_inode->i_ino) {
- de = mfd->mfd_file->f_dentry;
- spin_unlock(&med->med_open_lock);
- CERROR("Re opening "LPD64"\n", body->fid1.id);
- GOTO(out_pack, rc = 0);
- }
- }
- spin_unlock(&med->med_open_lock);
-
- mfd = kmem_cache_alloc(mds_file_cache, GFP_KERNEL);
- if (!mfd) {
- CERROR("mds: out of memory\n");
- req->rq_status = -ENOMEM;
- RETURN(0);
- }
-
- de = mds_fid2dentry(mds, &body->fid1, &mnt);
- if (IS_ERR(de))
- GOTO(out_free, rc = PTR_ERR(de));
-
- /* check if this inode has seen a delayed object creation */
- if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA) {
- rc = mds_store_md(mds, req, 1, body, de->d_inode);
- if (rc) {
- l_dput(de);
- mntput(mnt);
- GOTO(out_free, rc);
- }
- }
-
- flags = body->flags;
- /* dentry_open does a dput(de) and mntput(mnt) on error */
- file = dentry_open(de, mnt, flags & ~O_DIRECT);
- if (IS_ERR(file)) {
- rc = PTR_ERR(file);
- GOTO(out_free, 0);
- }
-
- file->private_data = mfd;
- mfd->mfd_file = file;
- memcpy(&mfd->mfd_clienthandle, &body->handle, sizeof(body->handle));
- get_random_bytes(&mfd->mfd_servercookie, sizeof(mfd->mfd_servercookie));
- spin_lock(&med->med_open_lock);
- list_add(&mfd->mfd_list, &med->med_open_head);
- spin_unlock(&med->med_open_lock);
-
-out_pack:
- body = lustre_msg_buf(req->rq_repmsg, 0);
- mds_pack_inode2fid(&body->fid1, de->d_inode);
- mds_pack_inode2body(body, de->d_inode);
- body->handle.addr = (__u64)(unsigned long)mfd;
- body->handle.cookie = mfd->mfd_servercookie;
- CDEBUG(D_INODE, "llite file "LPX64": addr %p, cookie "LPX64"\n",
- mfd->mfd_clienthandle.addr, mfd, mfd->mfd_servercookie);
- RETURN(0);
-
-out_free:
- mfd->mfd_servercookie = DEAD_HANDLE_MAGIC;
- kmem_cache_free(mds_file_cache, mfd);
- req->rq_status = rc;
- RETURN(0);
-}
+#endif
static int mds_close(struct ptlrpc_request *req)
{
body = lustre_msg_buf(req->rq_reqmsg, 0);
mfd = mds_handle2mfd(&body->handle);
- if (!mfd) {
+ if (mfd == NULL) {
DEBUG_REQ(D_ERROR, req, "no handle for file close "LPD64
": addr "LPX64", cookie "LPX64"\n",
body->fid1.id, body->handle.addr,
/* to make this asynchronous make sure that the handling function
doesn't send a reply when this function completes. Instead a
callback function would send the reply */
- rc = mds_sendpage(req, file, body->size);
+ /* body->blocks is actually the xid -phil */
+ rc = mds_sendpage(req, file, body->size, body->blocks);
filp_close(file, 0);
out_pop:
RETURN(0);
}
-int mds_reint(struct ptlrpc_request *req, int offset)
+int mds_reint(struct ptlrpc_request *req, int offset,
+ struct lustre_handle *lockh)
{
+ struct mds_update_record *rec; /* 116 bytes on the stack? no sir! */
int rc;
- struct mds_update_record rec;
- rc = mds_update_unpack(req, offset, &rec);
+ OBD_ALLOC(rec, sizeof(*rec));
+ if (rec == NULL)
+ RETURN(-ENOMEM);
+
+ rc = mds_update_unpack(req, offset, rec);
if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNPACK)) {
CERROR("invalid record\n");
- req->rq_status = -EINVAL;
- RETURN(0);
+ GOTO(out, req->rq_status = -EINVAL);
}
/* rc will be used to interrupt a for loop over multiple records */
- rc = mds_reint_rec(&rec, offset, req);
+ rc = mds_reint_rec(rec, offset, req, lockh);
+ out:
+ OBD_FREE(rec, sizeof(*rec));
return rc;
}
/* forward declaration */
int mds_handle(struct ptlrpc_request *req);
+static void abort_delayed_replies(struct mds_obd *mds)
+{
+ struct ptlrpc_request *req;
+ struct list_head *tmp, *n;
+ list_for_each_safe(tmp, n, &mds->mds_delayed_reply_queue) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ DEBUG_REQ(D_ERROR, req, "aborted:");
+ req->rq_status = -ENOTCONN;
+ req->rq_type = PTL_RPC_MSG_ERR;
+ ptlrpc_reply(req->rq_svc, req);
+ list_del(&req->rq_list);
+ OBD_FREE(req, sizeof *req);
+ }
+}
+
+static void mds_abort_recovery(void *data)
+{
+ struct mds_obd *mds = data;
+ struct obd_device *obd = list_entry(mds, struct obd_device, u.mds);
+ CERROR("disconnecting clients and aborting recovery\n");
+ mds->mds_recoverable_clients = 0;
+ obd->obd_flags &= ~(OBD_RECOVERING | OBD_ABORT_RECOVERY);
+ abort_delayed_replies(mds);
+ spin_unlock_bh(&mds->mds_processing_task_lock);
+ class_disconnect_all(obd);
+ spin_lock_bh(&mds->mds_processing_task_lock);
+}
+
+static void mds_recovery_expired(unsigned long castmeharder)
+{
+ struct mds_obd *mds = (struct mds_obd *)castmeharder;
+ struct obd_device *obd = list_entry(mds, struct obd_device, u.mds);
+ CERROR("recovery timed out, aborting\n");
+ spin_lock_bh(&mds->mds_processing_task_lock);
+ obd->obd_flags |= OBD_ABORT_RECOVERY;
+ wake_up(&mds->mds_next_transno_waitq);
+ spin_unlock_bh(&mds->mds_processing_task_lock);
+}
+
+static void reset_recovery_timer(struct mds_obd *mds)
+{
+ CDEBUG(D_ERROR, "timer will expire in %ld seconds\n",
+ MDS_RECOVERY_TIMEOUT / HZ);
+ mod_timer(&mds->mds_recovery_timer, jiffies + MDS_RECOVERY_TIMEOUT);
+}
+
+static void start_recovery_timer(struct mds_obd *mds)
+{
+ mds->mds_recovery_timer.function = mds_recovery_expired;
+ mds->mds_recovery_timer.data = (unsigned long)mds;
+ init_timer(&mds->mds_recovery_timer);
+ reset_recovery_timer(mds);
+}
+
+static void cancel_recovery_timer(struct mds_obd *mds)
+{
+ del_timer(&mds->mds_recovery_timer);
+}
+
static int check_for_next_transno(struct mds_obd *mds)
{
struct ptlrpc_request *req;
+ struct obd_device *obd = list_entry(mds, struct obd_device, u.mds);
req = list_entry(mds->mds_recovery_queue.next,
struct ptlrpc_request, rq_list);
LASSERT(req->rq_reqmsg->transno >= mds->mds_next_recovery_transno);
- return req->rq_reqmsg->transno == mds->mds_next_recovery_transno;
+
+ return req->rq_reqmsg->transno == mds->mds_next_recovery_transno ||
+ (obd->obd_flags & OBD_RECOVERING) == 0;
}
static void process_recovery_queue(struct mds_obd *mds)
{
struct ptlrpc_request *req;
+ struct obd_device *obd = list_entry(mds, struct obd_device, u.mds);
+ int aborted = 0;
ENTRY;
for (;;) {
- spin_lock(&mds->mds_processing_task_lock);
+ spin_lock_bh(&mds->mds_processing_task_lock);
LASSERT(mds->mds_processing_task == current->pid);
req = list_entry(mds->mds_recovery_queue.next,
struct ptlrpc_request, rq_list);
if (req->rq_reqmsg->transno != mds->mds_next_recovery_transno) {
- spin_unlock(&mds->mds_processing_task_lock);
+ spin_unlock_bh(&mds->mds_processing_task_lock);
CDEBUG(D_HA, "Waiting for transno "LPD64" (1st is "
LPD64")\n",
mds->mds_next_recovery_transno,
req->rq_reqmsg->transno);
wait_event(mds->mds_next_transno_waitq,
check_for_next_transno(mds));
+ spin_lock_bh(&mds->mds_processing_task_lock);
+ if (obd->obd_flags & OBD_ABORT_RECOVERY) {
+ mds_abort_recovery(mds);
+ aborted = 1;
+ }
+ spin_unlock_bh(&mds->mds_processing_task_lock);
+ if (aborted)
+ return;
continue;
}
list_del_init(&req->rq_list);
- spin_unlock(&mds->mds_processing_task_lock);
+ spin_unlock_bh(&mds->mds_processing_task_lock);
DEBUG_REQ(D_ERROR, req, "processing: ");
(void)mds_handle(req);
+ reset_recovery_timer(mds);
mds_fsync_super(mds->mds_sb);
OBD_FREE(req, sizeof *req);
- spin_lock(&mds->mds_processing_task_lock);
+ spin_lock_bh(&mds->mds_processing_task_lock);
mds->mds_next_recovery_transno++;
if (list_empty(&mds->mds_recovery_queue)) {
mds->mds_processing_task = 0;
- spin_unlock(&mds->mds_processing_task_lock);
+ spin_unlock_bh(&mds->mds_processing_task_lock);
break;
}
- spin_unlock(&mds->mds_processing_task_lock);
+ spin_unlock_bh(&mds->mds_processing_task_lock);
}
EXIT;
}
if (!transno) {
INIT_LIST_HEAD(&req->rq_list);
- DEBUG_REQ(D_ERROR, req, "not queueing");
+ DEBUG_REQ(D_HA, req, "not queueing");
return 1;
}
- spin_lock(&mds->mds_processing_task_lock);
+ spin_lock_bh(&mds->mds_processing_task_lock);
if (mds->mds_processing_task == current->pid) {
/* Processing the queue right now, don't re-add. */
LASSERT(list_empty(&req->rq_list));
- spin_unlock(&mds->mds_processing_task_lock);
+ spin_unlock_bh(&mds->mds_processing_task_lock);
return 1;
}
*/
if (transno == mds->mds_next_recovery_transno)
wake_up(&mds->mds_next_transno_waitq);
- spin_unlock(&mds->mds_processing_task_lock);
+ spin_unlock_bh(&mds->mds_processing_task_lock);
return 0;
}
* now, so we'll do the honours.
*/
mds->mds_processing_task = current->pid;
- spin_unlock(&mds->mds_processing_task_lock);
+ spin_unlock_bh(&mds->mds_processing_task_lock);
process_recovery_queue(mds);
return 0;
struct mds_obd *mds, int *process)
{
switch (req->rq_reqmsg->opc) {
- case MDS_CONNECT:
+ case MDS_CONNECT: /* This will never get here, but for completeness. */
case MDS_DISCONNECT:
*process = 1;
RETURN(0);
- case MDS_OPEN:
+ case MDS_CLOSE:
case MDS_GETSTATUS: /* used in unmounting */
case MDS_REINT:
case LDLM_ENQUEUE:
static int mds_queue_final_reply(struct ptlrpc_request *req, int rc)
{
struct mds_obd *mds = mds_req2mds(req);
+ struct obd_device *mds_obd = list_entry(mds, struct obd_device, u.mds);
struct ptlrpc_request *saved_req;
- spin_lock(&mds->mds_processing_task_lock);
+ spin_lock_bh(&mds->mds_processing_task_lock);
if (rc) {
/* Just like ptlrpc_error, but without the sending. */
lustre_pack_msg(0, NULL, NULL, &req->rq_replen,
ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace);
CDEBUG(D_ERROR,
"all clients recovered, sending delayed replies\n");
+ mds_obd->obd_flags &= ~OBD_RECOVERING;
list_for_each_safe(tmp, n, &mds->mds_delayed_reply_queue) {
req = list_entry(tmp, struct ptlrpc_request, rq_list);
DEBUG_REQ(D_ERROR, req, "delayed:");
list_del(&req->rq_list);
OBD_FREE(req, sizeof *req);
}
+ cancel_recovery_timer(mds);
} else {
CERROR("%d recoverable clients remain\n",
mds->mds_recoverable_clients);
}
- spin_unlock(&mds->mds_processing_task_lock);
+ spin_unlock_bh(&mds->mds_processing_task_lock);
return 1;
}
[REINT_CREATE] "create",
[REINT_LINK] "link",
[REINT_UNLINK] "unlink",
- [REINT_RENAME] "rename"
+ [REINT_RENAME] "rename",
+ [REINT_OPEN] "open",
};
int mds_handle(struct ptlrpc_request *req)
{
- int rc;
- int should_process;
+ int should_process, rc;
struct mds_obd *mds = NULL; /* quell gcc overwarning */
+ struct obd_device *mds_obd = NULL;
ENTRY;
rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
GOTO(out, rc);
}
+ OBD_FAIL_RETURN(OBD_FAIL_MDS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0);
+
LASSERT(!strcmp(req->rq_obd->obd_type->typ_name, LUSTRE_MDT_NAME));
if (req->rq_reqmsg->opc != MDS_CONNECT) {
+ struct mds_export_data *med;
if (req->rq_export == NULL) {
req->rq_status = -ENOTCONN;
GOTO(out, rc = -ENOTCONN);
}
- mds = mds_req2mds(req);
- if (mds->mds_recoverable_clients != 0) {
+ med = &req->rq_export->exp_mds_data;
+ mds_obd = req->rq_export->exp_obd;
+ mds = &mds_obd->u.mds;
+ spin_lock_bh(&mds->mds_processing_task_lock);
+ if (mds_obd->obd_flags & OBD_ABORT_RECOVERY)
+ mds_abort_recovery(mds);
+ spin_unlock_bh(&mds->mds_processing_task_lock);
+
+ if (mds_obd->obd_flags & OBD_RECOVERING) {
rc = filter_recovery_request(req, mds, &should_process);
if (rc || !should_process)
RETURN(rc);
+ } else if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+ if (req->rq_xid == med->med_last_xid) {
+ DEBUG_REQ(D_HA, req, "resending reply");
+ OBD_ALLOC(req->rq_repmsg, med->med_last_replen);
+ req->rq_replen = med->med_last_replen;
+ memcpy(req->rq_repmsg, med->med_last_reply,
+ req->rq_replen);
+ ptlrpc_reply(req->rq_svc, req);
+ return 0;
+ }
+ DEBUG_REQ(D_HA, req, "no reply for resend, continuing");
}
+
}
switch (req->rq_reqmsg->opc) {
mds = mds_req2mds(req);
mds_fsync_super(mds->mds_sb);
}
-
- /* Let the client know if it can replay. */
- if (mds->mds_recoverable_clients) {
- lustre_msg_add_flags(req->rq_repmsg,
- MSG_REPLAY_IN_PROGRESS);
- }
break;
case MDS_DISCONNECT:
rc = mds_getattr(0, req);
break;
- case MDS_GETATTR_NAME:
+ case MDS_GETATTR_NAME: {
+ struct lustre_handle lockh;
DEBUG_REQ(D_INODE, req, "getattr_name");
OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NAME_NET, 0);
- rc = mds_getattr_name(0, req);
+ rc = mds_getattr_name(0, req, &lockh);
+ if (rc == 0)
+ ldlm_lock_decref(&lockh, LCK_PR);
break;
-
+ }
case MDS_STATFS:
DEBUG_REQ(D_INODE, req, "statfs");
OBD_FAIL_RETURN(OBD_FAIL_MDS_STATFS_NET, 0);
break;
case MDS_REINT: {
- int size = sizeof(struct mds_body);
- int opc = *(u32 *)lustre_msg_buf(req->rq_reqmsg, 0),
- realopc = opc & REINT_OPCODE_MASK;
+ int opc = *(u32 *)lustre_msg_buf(req->rq_reqmsg, 0);
+ int size[2] = {sizeof(struct mds_body), mds->mds_max_mdsize};
+ int bufcount;
DEBUG_REQ(D_INODE, req, "reint (%s%s)",
- reint_names[realopc],
+ reint_names[opc & REINT_OPCODE_MASK],
opc & REINT_REPLAYING ? "|REPLAYING" : "");
OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET, 0);
- rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
- &req->rq_repmsg);
- if (rc) {
- req->rq_status = rc;
+ if (opc == REINT_UNLINK)
+ bufcount = 2;
+ else
+ bufcount = 1;
+
+ rc = lustre_pack_msg(bufcount, size, NULL,
+ &req->rq_replen, &req->rq_repmsg);
+ if (rc)
break;
- }
- rc = mds_reint(req, 0);
- OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET_REP, 0);
- break;
- }
- case MDS_OPEN:
- DEBUG_REQ(D_INODE, req, "open");
- OBD_FAIL_RETURN(OBD_FAIL_MDS_OPEN_NET, 0);
- rc = mds_open(req);
+ rc = mds_reint(req, 0, NULL);
+ OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET_REP, 0);
break;
+ }
case MDS_CLOSE:
DEBUG_REQ(D_INODE, req, "close");
case LDLM_ENQUEUE:
DEBUG_REQ(D_INODE, req, "enqueue");
OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
- rc = ldlm_handle_enqueue(req);
+ rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
+ ldlm_server_blocking_ast);
break;
case LDLM_CONVERT:
DEBUG_REQ(D_INODE, req, "convert");
/* If we're DISCONNECTing, the mds_export_data is already freed */
if (!rc && req->rq_reqmsg->opc != MDS_DISCONNECT) {
struct mds_export_data *med = &req->rq_export->exp_mds_data;
-
+ struct obd_device *obd = list_entry(mds, struct obd_device,
+ u.mds);
req->rq_repmsg->last_xid =
HTON__u64(le64_to_cpu(med->med_mcd->mcd_last_xid));
- req->rq_repmsg->last_committed =
- HTON__u64(mds->mds_last_committed);
+ if ((obd->obd_flags & OBD_NO_TRANSNO) == 0) {
+ req->rq_repmsg->last_committed =
+ HTON__u64(obd->obd_last_committed);
+ } else {
+ DEBUG_REQ(D_IOCTL, req,
+ "not sending last_committed update");
+ }
CDEBUG(D_INFO, "last_transno %Lu, last_committed %Lu, xid %d\n",
(unsigned long long)mds->mds_last_rcvd,
- (unsigned long long)mds->mds_last_committed,
+ (unsigned long long)obd->obd_last_committed,
cpu_to_le32(req->rq_xid));
}
out:
if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) {
- struct mds_obd *mds = mds_req2mds(req);
- LASSERT(mds->mds_recoverable_clients);
- DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply");
- return mds_queue_final_reply(req, rc);
- }
-
- /* XXX bug 578 */
- /* MDS_CONNECT / EALREADY (note: not -EALREADY!) isn't an error */
- if (rc && (req->rq_reqmsg->opc != MDS_CONNECT ||
- rc != EALREADY)) {
- DEBUG_REQ(D_ERROR, req, "processing error (%d)", rc);
- ptlrpc_error(req->rq_svc, req);
+ if (mds_obd && (mds_obd->obd_flags & OBD_RECOVERING)) {
+ DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply");
+ return mds_queue_final_reply(req, rc);
+ }
+ /* Lost a race with recovery; let the error path DTRT. */
+ rc = req->rq_status = -ENOTCONN;
+ }
+
+ if (req->rq_export && mds_obd &&
+ (mds_obd->obd_flags & OBD_RECOVERING) == 0) {
+ struct mds_export_data *med = &req->rq_export->exp_mds_data;
+ if (med->med_last_reply)
+ OBD_FREE(med->med_last_reply, med->med_last_replen);
+ OBD_ALLOC(med->med_last_reply, req->rq_replen);
+ med->med_last_replen = req->rq_replen;
+ med->med_last_xid = req->rq_xid;
+ memcpy(med->med_last_reply, req->rq_repmsg, req->rq_replen);
+ /* XXX serialize */
+ }
+
+ if (!OBD_FAIL_CHECK(OBD_FAIL_MDS_ALL_REPLY_NET | OBD_FAIL_ONCE)) {
+ if (rc) {
+ DEBUG_REQ(D_ERROR, req, "processing error (%d)", rc);
+ ptlrpc_error(req->rq_svc, req);
+ } else {
+ DEBUG_REQ(D_NET, req, "sending reply");
+ ptlrpc_reply(req->rq_svc, req);
+ }
} else {
- DEBUG_REQ(D_NET, req, "sending reply");
- ptlrpc_reply(req->rq_svc, req);
+ obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED;
+ DEBUG_REQ(D_ERROR, req, "dropping reply");
+ if (req->rq_repmsg)
+ OBD_FREE(req->rq_repmsg, req->rq_replen);
}
+
return 0;
}
GOTO(err_put, rc);
}
+ if (obddev->obd_flags & OBD_RECOVERING)
+ start_recovery_timer(mds);
+
obddev->obd_namespace =
ldlm_namespace_new("mds_server", LDLM_NAMESPACE_SERVER);
if (obddev->obd_namespace == NULL) {
RETURN(0);
}
-static int ldlm_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
- void *req_cookie, ldlm_mode_t mode, int flags,
- void *data)
+static int ldlm_intent_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock **lockp, void *req_cookie,
+ ldlm_mode_t mode, int flags, void *data)
{
struct ptlrpc_request *req = req_cookie;
+ struct ldlm_lock *lock = *lockp;
int rc = 0;
ENTRY;
/* an intent needs to be considered */
struct ldlm_intent *it = lustre_msg_buf(req->rq_reqmsg, 1);
struct mds_obd *mds = &req->rq_export->exp_obd->u.mds;
- struct mds_body *mds_rep;
+ struct mds_body *mds_body;
struct ldlm_reply *rep;
- __u64 new_resid[3] = {0, 0, 0}, old_res;
- int rc, size[3] = {sizeof(struct ldlm_reply),
+ struct lustre_handle lockh;
+ struct ldlm_lock *new_lock;
+ int rc, offset = 2, repsize[3] = {sizeof(struct ldlm_reply),
sizeof(struct mds_body),
mds->mds_max_mdsize};
LDLM_DEBUG(lock, "intent policy, opc: %s",
ldlm_it2str(it->opc));
- rc = lustre_pack_msg(3, size, NULL, &req->rq_replen,
+ rc = lustre_pack_msg(3, repsize, NULL, &req->rq_replen,
&req->rq_repmsg);
if (rc) {
rc = req->rq_status = -ENOMEM;
}
rep = lustre_msg_buf(req->rq_repmsg, 0);
- rep->lock_policy_res1 = 1;
+ rep->lock_policy_res1 = IT_INTENT_EXEC;
/* execute policy */
switch ((long)it->opc) {
+ case IT_OPEN:
case IT_CREAT|IT_OPEN:
- rc = mds_reint(req, 2);
- if (rc || (req->rq_status != 0 &&
- req->rq_status != -EEXIST)) {
- rep->lock_policy_res2 = req->rq_status;
+ rc = mds_reint(req, offset, &lockh);
+ /* We return a dentry to the client if IT_OPEN_POS is
+ * set, or if we make it to the OPEN portion of the
+ * programme (which implies that we created) */
+ if (!(rep->lock_policy_res1 & IT_OPEN_POS ||
+ rep->lock_policy_res1 & IT_OPEN_OPEN)) {
+ rep->lock_policy_res2 = rc;
RETURN(ELDLM_LOCK_ABORTED);
}
break;
- case IT_CREAT:
- case IT_MKDIR:
- case IT_MKNOD:
- case IT_RENAME2:
- case IT_LINK2:
- case IT_RMDIR:
- case IT_SYMLINK:
case IT_UNLINK:
- rc = mds_reint(req, 2);
- if (rc || (req->rq_status != 0 &&
- req->rq_status != -EISDIR &&
- req->rq_status != -ENOTDIR)) {
+ rc = mds_reint(req, offset, &lockh);
+ /* Don't return a lock if the unlink failed, or if we're
+ * not sending back an EA */
+ if (rc) {
+ rep->lock_policy_res2 = rc;
+ RETURN(ELDLM_LOCK_ABORTED);
+ }
+ if (req->rq_status != 0) {
rep->lock_policy_res2 = req->rq_status;
RETURN(ELDLM_LOCK_ABORTED);
}
+ mds_body = lustre_msg_buf(req->rq_repmsg, 1);
+ if (!(mds_body->valid & OBD_MD_FLEASIZE)) {
+ rep->lock_policy_res2 = rc;
+ RETURN(ELDLM_LOCK_ABORTED);
+ }
break;
case IT_GETATTR:
case IT_LOOKUP:
- case IT_OPEN:
case IT_READDIR:
- case IT_READLINK:
- case IT_RENAME:
- case IT_LINK:
case IT_SETATTR:
- rc = mds_getattr_name(2, req);
+ rc = mds_getattr_name(offset, req, &lockh);
/* FIXME: we need to sit down and decide on who should
* set req->rq_status, who should return negative and
* positive return values, and what they all mean. */
- if (rc || req->rq_status != 0) {
+ if (rc) {
+ rep->lock_policy_res2 = rc;
+ RETURN(ELDLM_LOCK_ABORTED);
+ }
+ if (req->rq_status != 0) {
rep->lock_policy_res2 = req->rq_status;
RETURN(ELDLM_LOCK_ABORTED);
}
break;
- case IT_READDIR|IT_OPEN:
- LBUG();
- break;
default:
CERROR("Unhandled intent "LPD64"\n", it->opc);
LBUG();
}
- /* We don't bother returning a lock to the client for a file
- * or directory we are removing.
- *
- * As for link and rename, there is no reason for the client
- * to get a lock on the target at this point. If they are
- * going to modify the file/directory later they will get a
- * lock at that time.
- */
- if (it->opc & (IT_UNLINK | IT_RMDIR | IT_LINK | IT_LINK2 |
- IT_RENAME | IT_RENAME2))
- RETURN(ELDLM_LOCK_ABORTED);
-
- rep->lock_policy_res2 = req->rq_status;
- mds_rep = lustre_msg_buf(req->rq_repmsg, 1);
-
- /* If the client is about to open a file that doesn't have an
- * MD stripe record, it's going to need a write lock.
- */
- if (it->opc & IT_OPEN && !(mds_rep->valid & OBD_MD_FLEASIZE)) {
- LDLM_DEBUG(lock, "open with no EA; returning PW lock");
- lock->l_req_mode = LCK_PW;
- }
-
if (flags & LDLM_FL_INTENT_ONLY) {
LDLM_DEBUG(lock, "INTENT_ONLY, aborting lock");
RETURN(ELDLM_LOCK_ABORTED);
}
- /* Give the client a lock on the child object, instead of the
- * parent that it requested. */
- new_resid[0] = NTOH__u32(mds_rep->ino);
- new_resid[1] = NTOH__u32(mds_rep->generation);
- if (new_resid[0] == 0)
- LBUG();
- old_res = lock->l_resource->lr_name[0];
- ldlm_lock_change_resource(ns, lock, new_resid);
- if (lock->l_resource == NULL) {
- LBUG();
- RETURN(-ENOMEM);
- }
- LDLM_DEBUG(lock, "intent policy, old res %ld",
- (long)old_res);
- RETURN(ELDLM_LOCK_CHANGED);
+ /* By this point, whatever function we called above must have
+ * filled in 'lockh' or returned an error. We want to give the
+ * new lock to the client instead of whatever lock it was about
+ * to get. */
+ new_lock = ldlm_handle2lock(&lockh);
+ LASSERT(new_lock != NULL);
+ mds_body = lustre_msg_buf(req->rq_repmsg, 1);
+ *lockp = new_lock;
+
+ /* Fixup the lock to be given to the client */
+ l_lock(&new_lock->l_resource->lr_namespace->ns_lock);
+ LASSERT(new_lock->l_readers + new_lock->l_writers == 1);
+ new_lock->l_readers = 0;
+ new_lock->l_writers = 0;
+
+ new_lock->l_export = req->rq_export;
+ list_add(&new_lock->l_export_chain,
+ &new_lock->l_export->exp_ldlm_data.led_held_locks);
+
+ /* We don't need to worry about completion_ast (which isn't set
+ * in 'lock' yet anyways), because this lock is already
+ * granted. */
+ new_lock->l_blocking_ast = lock->l_blocking_ast;
+
+ memcpy(&new_lock->l_remote_handle, &lock->l_remote_handle,
+ sizeof(lock->l_remote_handle));
+
+ new_lock->l_flags &= ~(LDLM_FL_LOCAL | LDLM_FL_AST_SENT |
+ LDLM_FL_CBPENDING);
+
+ LDLM_LOCK_PUT(new_lock);
+ l_unlock(&new_lock->l_resource->lr_namespace->ns_lock);
+
+ rep->lock_policy_res2 = req->rq_status;
+
+ RETURN(ELDLM_LOCK_REPLACED);
} else {
int size = sizeof(struct ldlm_reply);
rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
int mds_attach(struct obd_device *dev, obd_count len, void *data)
{
- return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_init_multi_vars(0, &lvars);
+ return lprocfs_obd_attach(dev, lvars.obd_vars);
}
int mds_detach(struct obd_device *dev)
{
- return lprocfs_dereg_obd(dev);
+ return lprocfs_obd_detach(dev);
+}
+
+int mdt_attach(struct obd_device *dev, obd_count len, void *data)
+{
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_init_multi_vars(1, &lvars);
+ return lprocfs_obd_attach(dev, lvars.obd_vars);
+}
+
+int mdt_detach(struct obd_device *dev)
+{
+ return lprocfs_obd_detach(dev);
}
static int mdt_setup(struct obd_device *obddev, obd_count len, void *buf)
{
- int i;
- // struct obd_ioctl_data* data = buf;
struct mds_obd *mds = &obddev->u.mds;
- int rc = 0;
+ struct obd_uuid uuid = { "self" };
+ int i, rc = 0;
ENTRY;
mds->mds_service = ptlrpc_init_svc(MDS_NEVENTS, MDS_NBUFS,
MDS_BUFSIZE, MDS_MAXREQSIZE,
MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
- "self", mds_handle, "mds");
+ &uuid, mds_handle, "mds");
if (!mds->mds_service) {
CERROR("failed to start service\n");
RETURN(rc = -ENOMEM);
}
}
+ mds->mds_getattr_service =
+ ptlrpc_init_svc(MDS_NEVENTS, MDS_NBUFS,
+ MDS_BUFSIZE, MDS_MAXREQSIZE,
+ MDS_GETATTR_PORTAL, MDC_REPLY_PORTAL,
+ &uuid, mds_handle, "mds");
+ if (!mds->mds_getattr_service) {
+ CERROR("failed to start getattr service\n");
+ GOTO(err_thread, rc = -ENOMEM);
+ }
+
+ for (i = 0; i < MDT_NUM_THREADS; i++) {
+ char name[32];
+ sprintf(name, "ll_mdt_attr_%02d", i);
+ rc = ptlrpc_start_thread(obddev, mds->mds_getattr_service,
+ name);
+ if (rc) {
+ CERROR("cannot start MDT getattr thread #%d: rc %d\n",
+ i, rc);
+ GOTO(err_thread2, rc);
+ }
+ }
+
RETURN(0);
+err_thread2:
+ ptlrpc_stop_all_threads(mds->mds_getattr_service);
+ ptlrpc_unregister_service(mds->mds_getattr_service);
err_thread:
ptlrpc_stop_all_threads(mds->mds_service);
ptlrpc_unregister_service(mds->mds_service);
- RETURN(rc);
+ return rc;
}
struct mds_obd *mds = &obddev->u.mds;
ENTRY;
+ ptlrpc_stop_all_threads(mds->mds_getattr_service);
+ ptlrpc_unregister_service(mds->mds_getattr_service);
+
ptlrpc_stop_all_threads(mds->mds_service);
ptlrpc_unregister_service(mds->mds_service);
static struct obd_ops mdt_obd_ops = {
o_owner: THIS_MODULE,
+ o_attach: mdt_attach,
+ o_detach: mdt_detach,
o_setup: mdt_setup,
o_cleanup: mdt_cleanup,
};
static int __init mds_init(void)
{
+ struct lprocfs_static_vars lvars;
mds_file_cache = kmem_cache_create("ll_mds_file_data",
sizeof(struct mds_file_data),
0, 0, NULL, NULL);
if (mds_file_cache == NULL)
return -ENOMEM;
- class_register_type(&mds_obd_ops, status_class_var, LUSTRE_MDS_NAME);
- class_register_type(&mdt_obd_ops, 0, LUSTRE_MDT_NAME);
+ lprocfs_init_multi_vars(0, &lvars);
+ class_register_type(&mds_obd_ops, lvars.module_vars, LUSTRE_MDS_NAME);
+ lprocfs_init_multi_vars(1, &lvars);
+ class_register_type(&mdt_obd_ops, lvars.module_vars, LUSTRE_MDT_NAME);
ldlm_register_intent(ldlm_intent_policy);
return 0;
CERROR("couldn't free MDS file cache\n");
}
-MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Metadata Server (MDS) v0.01");
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Metadata Server (MDS)");
MODULE_LICENSE("GPL");
module_init(mds_init);
#include <linux/lustre_fsfilt.h>
#include <linux/lprocfs_status.h>
-int rd_uuid(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- return snprintf(page, count, "%s\n", temp->obd_uuid);
-}
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_mds_obd_vars[] = { {0} };
+struct lprocfs_vars lprocfs_mds_module_vars[] = { {0} };
+struct lprocfs_vars lprocfs_mdt_obd_vars[] = { {0} };
+struct lprocfs_vars lprocfs_mdt_module_vars[] = { {0} };
-int rd_blksize(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- struct mds_obd *mds = &temp->u.mds;
- struct statfs mystats;
- int rc;
+#else
- rc = vfs_statfs(mds->mds_sb, &mystats);
- if (rc) {
- CERROR("mds: statfs failed: rc %d\n", rc);
- return 0;
- }
- return snprintf(page, count, LPU64"\n", (__u64)(mystats.f_bsize));
-}
-
-int rd_kbtotal(char *page, char **start, off_t off, int count, int *eof,
- void *data)
+static inline
+int lprocfs_mds_statfs(void *data, struct statfs *sfs)
{
- struct obd_device* temp = (struct obd_device*)data;
- struct mds_obd *mds = &temp->u.mds;
- struct statfs mystats;
- int rc;
- __u32 blk_size;
- __u64 result;
-
- rc = vfs_statfs(mds->mds_sb, &mystats);
- if (rc) {
- CERROR("mds: statfs failed: rc %d\n", rc);
- return 0;
- }
-
- blk_size = mystats.f_bsize;
- blk_size >>= 10;
- result = mystats.f_blocks;
- while(blk_size >>= 1)
- result <<= 1;
-
- return snprintf(page, count, LPU64"\n", result);
+ struct obd_device* dev = (struct obd_device*) data;
+ struct mds_obd *mds = &dev->u.mds;
+ return vfs_statfs(mds->mds_sb, sfs);
}
-int rd_kbfree(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- struct mds_obd *mds = &temp->u.mds;
- struct statfs mystats;
- int rc;
- __u32 blk_size;
- __u64 result;
-
- rc = vfs_statfs(mds->mds_sb, &mystats);
- if (rc) {
- CERROR("mds: statfs failed: rc %d\n", rc);
- return 0;
- }
- blk_size = mystats.f_bsize;
- blk_size >>= 10;
- result = mystats.f_blocks;
- while (blk_size >>= 1)
- result <<= 1;
-
- return snprintf(page, count, LPU64"\n", result);
-}
+DEFINE_LPROCFS_STATFS_FCT(rd_blksize, lprocfs_mds_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_mds_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, lprocfs_mds_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, lprocfs_mds_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, lprocfs_mds_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, lprocfs_mds_statfs);
int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
void *data)
return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
}
-int rd_filestotal(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- struct mds_obd *mds = &temp->u.mds;
- struct statfs mystats;
- int rc;
- rc = vfs_statfs(mds->mds_sb, &mystats);
- if (rc) {
- CERROR("mds: statfs failed: rc %d\n", rc);
- return 0;
- }
- return snprintf(page, count, LPU64"\n", (__u64)(mystats.f_files));
-}
-
-int rd_filesfree(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- struct mds_obd *mds = &temp->u.mds;
- struct statfs mystats;
- int rc, len = 0;
-
- rc = vfs_statfs(mds->mds_sb, &mystats);
- if (rc) {
- CERROR("mds: statfs failed: rc %d\n", rc);
- return 0;
- }
+struct lprocfs_vars lprocfs_mds_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "blocksize", rd_blksize, 0, 0 },
+ { "bytestotal", rd_kbytestotal, 0, 0 },
+ { "kbytesfree", rd_kbytesfree, 0, 0 },
+ { "fstype", rd_fstype, 0, 0 },
+ { "filestotal", rd_filestotal, 0, 0 },
+ { "filesfree", rd_filesfree, 0, 0 },
+ { "filegroups", rd_filegroups, 0, 0 },
+ { 0 }
+};
- len += snprintf(page, count, LPU64"\n", (__u64)(mystats.f_ffree));
- return len;
-}
+struct lprocfs_vars lprocfs_mds_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+ { 0 }
+};
-int rd_filegroups(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-struct lprocfs_vars status_var_nm_1[]={
- {"status/uuid", rd_uuid, 0, 0},
- {"status/blocksize",rd_blksize, 0, 0},
- {"status/kbytestotal",rd_kbtotal, 0, 0},
- {"status/kbytesfree", rd_kbfree, 0, 0},
- {"status/fstype", rd_fstype, 0, 0},
- {"status/filestotal", rd_filestotal, 0, 0},
- {"status/filesfree", rd_filesfree, 0, 0},
- {"status/filegroups", rd_filegroups, 0, 0},
- {0}
+struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { 0 }
};
-int rd_numrefs(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_type *class = (struct obd_type*)data;
+struct lprocfs_vars lprocfs_mdt_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+ { 0 }
+};
- return snprintf(page, count, "%d\n", class->typ_refcnt);
-}
+#endif
+struct lprocfs_static_vars lprocfs_array_vars[] = { {lprocfs_mds_module_vars,
+ lprocfs_mds_obd_vars},
+ {lprocfs_mdt_module_vars,
+ lprocfs_mdt_obd_vars}};
-struct lprocfs_vars status_class_var[]={
- {"status/num_refs", rd_numrefs, 0, 0},
- {0}
-};
+LPROCFS_INIT_MULTI_VARS(lprocfs_array_vars,
+ (sizeof(lprocfs_array_vars)/
+ sizeof(struct lprocfs_static_vars)))
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * linux/mds/mds_fs.c
- *
+ * mds/mds_fs.c
* Lustre Metadata Server (MDS) filesystem interface code
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
*
- * by Andreas Dilger <adilger@clusterfs.com>
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
*
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define EXPORT_SYMTAB
push_ctxt(&saved, &mds->mds_ctxt, NULL);
written = lustre_fwrite(mds->mds_rcvd_filp,
- (char *)med->med_mcd,
- sizeof(*med->med_mcd), &off);
+ (char *)med->med_mcd,
+ sizeof(*med->med_mcd), &off);
pop_ctxt(&saved, &mds->mds_ctxt, NULL);
if (written != sizeof(*med->med_mcd)) {
med->med_mcd->mcd_uuid, med->med_off);
}
+ if (med->med_last_reply) {
+ OBD_FREE(med->med_last_reply, med->med_last_replen);
+ med->med_last_reply = NULL;
+ }
OBD_FREE(med->med_mcd, sizeof(*med->med_mcd));
return 0;
}
CDEBUG(D_INODE, "last_rcvd has size %lu (msd + %lu clients)\n",
- last_rcvd_size, (last_rcvd_size - sizeof *msd) / sizeof *mcd);
+ last_rcvd_size, (last_rcvd_size - MDS_LR_CLIENT)/MDS_LR_SIZE);
/*
* When we do a clean MDS shutdown, we save the last_rcvd into
break;
}
+ memcpy(&exp->exp_client_uuid.uuid, mcd->mcd_uuid,
+ sizeof exp->exp_client_uuid.uuid);
med = &exp->exp_mds_data;
med->med_mcd = mcd;
mds_client_add(mds, med, cl_off);
mds->mds_last_rcvd = last_rcvd;
}
- mds->mds_last_committed = mds->mds_last_rcvd;
+ obddev->obd_last_committed = mds->mds_last_rcvd;
if (mds->mds_recoverable_clients) {
CERROR("RECOVERY: %d recoverable clients, last_rcvd "LPU64"\n",
mds->mds_recoverable_clients, mds->mds_last_rcvd);
- mds->mds_next_recovery_transno = mds->mds_last_committed + 1;
+ mds->mds_next_recovery_transno = obddev->obd_last_committed + 1;
+ obddev->obd_flags |= OBD_RECOVERING;
}
if (mcd)
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* linux/mds/mds_lov.c
- *
* Lustre Metadata Server (mds) handling of striped file data
*
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ * Author: Peter Braam <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
*
- * by Peter Braam <braam@clusterfs.com> &
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
*
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define EXPORT_SYMTAB
}
int mds_set_lovdesc(struct obd_device *obd, struct lov_desc *desc,
- obd_uuid_t *uuidarray)
+ struct obd_uuid *uuidarray)
{
struct mds_obd *mds = &obd->u.mds;
struct obd_run_ctxt saved;
#warning FIXME: if there is an existing LOVTGTS, verify existing UUIDs same
rc = 0;
for (i = 0; i < tgt_count ; i++) {
- rc = lustre_fwrite(f, uuidarray[i],
+ rc = lustre_fwrite(f, uuidarray[i].uuid,
sizeof(uuidarray[i]), &f->f_pos);
if (rc != sizeof(uuidarray[i])) {
CERROR("cannot write LOV UUID %s (%d)\n",
- uuidarray[i], i);
+ uuidarray[i].uuid, i);
if (rc >= 0)
rc = -EIO;
break;
return rc;
}
-int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,obd_uuid_t *uuidarray)
+int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,struct obd_uuid *uuidarray)
{
struct obd_run_ctxt saved;
struct file *f;
struct obd_device *obd = class_conn2obd(conn);
struct obd_ioctl_data *data = karg;
struct lov_desc *desc;
- obd_uuid_t *uuidarray;
+ struct obd_uuid *uuidarray;
int count;
int rc;
-
switch (cmd) {
case OBD_IOC_LOV_SET_CONFIG:
desc = (struct lov_desc *)data->ioc_inlbuf1;
}
count = desc->ld_tgt_count;
- uuidarray = (obd_uuid_t *)data->ioc_inlbuf2;
+ uuidarray = (struct obd_uuid *)data->ioc_inlbuf2;
if (sizeof(*uuidarray) * count != data->ioc_inllen2) {
CERROR("UUID array size wrong\n");
RETURN(-EINVAL);
}
count = desc->ld_tgt_count;
- uuidarray = (obd_uuid_t *)data->ioc_inlbuf2;
+ uuidarray = (struct obd_uuid *)data->ioc_inlbuf2;
if (sizeof(*uuidarray) * count != data->ioc_inllen2) {
CERROR("UUID array size wrong\n");
RETURN(-EINVAL);
rc = mds_get_lovtgts(&obd->u.mds, desc->ld_tgt_count, uuidarray);
RETURN(rc);
+
+ case OBD_IOC_SET_READONLY:
+ CERROR("setting device %s read-only\n",
+ ll_bdevname(obd->u.mds.mds_sb->s_dev));
+ dev_set_rdonly(obd->u.mds.mds_sb->s_dev, 2);
+ RETURN(0);
+
default:
RETURN(-EINVAL);
}
-
RETURN(0);
}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lustre/mds/handler.c
+ * Lustre Metadata Server (mds) request handler
+ *
+ * Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Mike Shaver <shaver@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define EXPORT_SYMTAB
+#define DEBUG_SUBSYSTEM S_MDS
+
+#include <linux/module.h>
+#include <linux/lustre_mds.h>
+#include <linux/lustre_dlm.h>
+#include <linux/init.h>
+#include <linux/obd_class.h>
+#include <linux/random.h>
+#include <linux/locks.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <linux/buffer_head.h>
+#include <linux/workqueue.h>
+#endif
+#include <linux/obd_lov.h>
+#include <linux/lustre_mds.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/lprocfs_status.h>
+
+extern kmem_cache_t *mds_file_cache;
+extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
+extern void mds_start_transno(struct mds_obd *mds);
+extern int mds_finish_transno(struct mds_obd *mds, void *handle,
+ struct ptlrpc_request *req, int rc);
+extern int enqueue_ordered_locks(int lock_mode, struct obd_device *obd,
+ struct ldlm_res_id *p1_res_id,
+ struct ldlm_res_id *p2_res_id,
+ struct ldlm_res_id *c1_res_id,
+ struct ldlm_res_id *c2_res_id,
+ struct lustre_handle *p1_lockh,
+ struct lustre_handle *p2_lockh,
+ struct lustre_handle *c1_lockh,
+ struct lustre_handle *c2_lockh);
+
+int mds_open(struct mds_update_record *rec, int offset,
+ struct ptlrpc_request *req, struct lustre_handle *child_lockh)
+{
+ struct mds_obd *mds = mds_req2mds(req);
+ struct obd_device *obd = req->rq_export->exp_obd;
+ struct ldlm_reply *rep = lustre_msg_buf(req->rq_repmsg, 0);
+ struct file *file;
+ struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
+ struct dentry *dchild, *parent;
+ struct mds_export_data *med;
+ struct mds_file_data *mfd = NULL;
+ struct ldlm_res_id child_res_id = { .name = {0} };
+ struct lustre_handle parent_lockh;
+ int rc = 0, parent_mode, child_mode = LCK_PR, lock_flags, created = 0;
+ ENTRY;
+
+#warning replay of open needs to be redone
+ /* was this animal open already and the client lost the reply? */
+ /* XXX need some way to detect a reopen, to avoid locked list walks */
+ med = &req->rq_export->exp_mds_data;
+#if 0
+ spin_lock(&med->med_open_lock);
+ list_for_each(tmp, &med->med_open_head) {
+ mfd = list_entry(tmp, typeof(*mfd), mfd_list);
+ if (!memcmp(&mfd->mfd_clienthandle, &body->handle,
+ sizeof(mfd->mfd_clienthandle)) &&
+ body->fid1.id == mfd->mfd_file->f_dentry->d_inode->i_ino) {
+ dchild = mfd->mfd_file->f_dentry;
+ spin_unlock(&med->med_open_lock);
+ CERROR("Re opening "LPD64"\n", body->fid1.id);
+ GOTO(out_pack, rc = 0);
+ }
+ }
+ spin_unlock(&med->med_open_lock);
+#endif
+ rep->lock_policy_res1 |= IT_OPEN_LOOKUP;
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) {
+ CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n");
+ req->rq_status = -ENOMEM;
+ RETURN(-ENOMEM);
+ }
+
+ /* Step 1: Find and lock the parent */
+ parent_mode = (rec->ur_flags & O_CREAT) ? LCK_PW : LCK_PR;
+ parent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, parent_mode,
+ &parent_lockh);
+ if (IS_ERR(parent)) {
+ rc = PTR_ERR(parent);
+ CERROR("parent lookup error %d\n", rc);
+ LBUG();
+ RETURN(rc);
+ }
+ LASSERT(parent->d_inode);
+
+ /* Step 2: Lookup the child */
+ dchild = lookup_one_len(lustre_msg_buf(req->rq_reqmsg, 3),
+ parent, req->rq_reqmsg->buflens[3] - 1);
+ if (IS_ERR(dchild))
+ GOTO(out_step_2, rc = PTR_ERR(dchild));
+
+ if (dchild->d_inode)
+ rep->lock_policy_res1 |= IT_OPEN_POS;
+ else
+ rep->lock_policy_res1 |= IT_OPEN_NEG;
+
+ /* Step 3: If the child was negative, and we're supposed to,
+ * create it. */
+ if ((rec->ur_flags & O_CREAT) && !dchild->d_inode) {
+ int err;
+ void *handle;
+ mds_start_transno(mds);
+ rep->lock_policy_res1 |= IT_OPEN_CREATE;
+ handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE);
+ if (IS_ERR(handle)) {
+ rc = PTR_ERR(handle);
+ mds_finish_transno(mds, handle, req, rc);
+ GOTO(out_step_3, rc);
+ }
+ rc = vfs_create(parent->d_inode, dchild, rec->ur_mode);
+ rc = mds_finish_transno(mds, handle, req, rc);
+ err = fsfilt_commit(obd, parent->d_inode, handle);
+ if (rc || err) {
+ CERROR("error on commit: err = %d\n", err);
+ if (!rc)
+ rc = err;
+ GOTO(out_step_3, rc);
+ }
+ created = 1;
+ child_mode = LCK_PW;
+ } else if (!dchild->d_inode) {
+ /* It's negative and we weren't supposed to create it */
+ GOTO(out_step_3, rc = -ENOENT);
+ }
+
+ /* Step 4: It's positive, so lock the child */
+ child_res_id.name[0] = dchild->d_inode->i_ino;
+ child_res_id.name[1] = dchild->d_inode->i_generation;
+ reacquire:
+ lock_flags = 0;
+ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+ child_res_id, LDLM_PLAIN, NULL, 0, child_mode,
+ &lock_flags, ldlm_completion_ast,
+ mds_blocking_ast, NULL, NULL, child_lockh);
+ if (rc != ELDLM_OK) {
+ CERROR("ldlm_cli_enqueue: %d\n", rc);
+ GOTO(out_step_3, rc = -EIO);
+ }
+
+ mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+ mds_pack_inode2body(body, dchild->d_inode);
+ if (S_ISREG(dchild->d_inode->i_mode)) {
+ rc = mds_pack_md(obd, req->rq_repmsg, 2, body, dchild->d_inode);
+ if (rc)
+ GOTO(out_step_4, rc);
+ } else {
+ /* If this isn't a regular file, we can't open it. */
+ GOTO(out_step_3, rc = 0); /* returns the lock to the client */
+ }
+
+ if (!created && (rec->ur_flags & O_CREAT) && (rec->ur_flags & O_EXCL)) {
+ /* File already exists, we didn't just create it, and we
+ * were passed O_EXCL; err-or. */
+ GOTO(out_step_3, rc = -EEXIST); // returns a lock to the client
+ }
+
+ /* If we're opening a file without an EA, the client needs a write
+ * lock. */
+ if (child_mode != LCK_PW && S_ISREG(dchild->d_inode->i_mode) &&
+ !(body->valid & OBD_MD_FLEASIZE)) {
+ ldlm_lock_decref(child_lockh, child_mode);
+ child_mode = LCK_PW;
+ goto reacquire;
+ }
+
+ /* Step 5: Open it */
+ rep->lock_policy_res1 |= IT_OPEN_OPEN;
+ mfd = kmem_cache_alloc(mds_file_cache, GFP_KERNEL);
+ if (!mfd) {
+ CERROR("mds: out of memory\n");
+ GOTO(out_step_4, req->rq_status = -ENOMEM);
+ }
+
+ /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
+ mntget(mds->mds_vfsmnt);
+ file = dentry_open(dchild,mds->mds_vfsmnt,
+ rec->ur_flags & ~(O_DIRECT | O_TRUNC));
+ if (IS_ERR(file))
+ GOTO(out_step_5, rc = PTR_ERR(file));
+
+ file->private_data = mfd;
+ mfd->mfd_file = file;
+ get_random_bytes(&mfd->mfd_servercookie, sizeof(mfd->mfd_servercookie));
+ spin_lock(&med->med_open_lock);
+ list_add(&mfd->mfd_list, &med->med_open_head);
+ spin_unlock(&med->med_open_lock);
+
+ body->handle.addr = (__u64)(unsigned long)mfd;
+ body->handle.cookie = mfd->mfd_servercookie;
+ CDEBUG(D_INODE, "file %p: mfd %p, cookie "LPX64"\n",
+ mfd->mfd_file, mfd, mfd->mfd_servercookie);
+ GOTO(out_step_2, rc = 0); /* returns a lock to the client */
+
+ out_step_5:
+ if (mfd != NULL) {
+ kmem_cache_free(mds_file_cache, mfd);
+ mfd = NULL;
+ }
+ out_step_4:
+ ldlm_lock_decref(child_lockh, child_mode);
+ out_step_3:
+ l_dput(dchild);
+ out_step_2:
+ l_dput(parent);
+ ldlm_lock_decref(&parent_lockh, parent_mode);
+ RETURN(rc);
+}
* linux/mds/mds_reint.c
* Lustre Metadata Server (mds) reintegration routines
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
*
* You should have received a copy of the GNU General Public License
* along with Lustre; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Andreas Dilger <adilger@clusterfs.com>
*/
#define EXPORT_SYMTAB
static void mds_last_rcvd_cb(struct obd_device *obd, __u64 last_rcvd, int error)
{
- struct mds_obd *mds = &obd->u.mds;
-
CDEBUG(D_HA, "got callback for last_rcvd "LPD64": rc = %d\n",
last_rcvd, error);
- if (!error && last_rcvd > mds->mds_last_committed)
- mds->mds_last_committed = last_rcvd;
+ if (!error && last_rcvd > obd->obd_last_committed)
+ obd->obd_last_committed = last_rcvd;
}
void mds_start_transno(struct mds_obd *mds)
return rc;
}
-/* In the write-back case, the client holds a lock on a subtree.
- * In the intent case, the client holds a lock on the child inode.
- * In the pathname case, the client (may) hold a lock on the child inode. */
+/* In the write-back case, the client holds a lock on a subtree (not supported).
+ * In the intent case, the client holds a lock on the child inode. */
static int mds_reint_setattr(struct mds_update_record *rec, int offset,
- struct ptlrpc_request *req)
+ struct ptlrpc_request *req,
+ struct lustre_handle *lh)
{
struct mds_obd *mds = mds_req2mds(req);
struct obd_device *obd = req->rq_export->exp_obd;
struct dentry *de;
struct inode *inode;
void *handle;
- struct lustre_handle child_lockh;
int rc = 0, err;
- if (req->rq_reqmsg->bufcount > offset + 1) {
- struct dentry *dir;
- struct lustre_handle dir_lockh;
- char *name;
- int namelen;
-
- /* a name was supplied by the client; fid1 is the directory */
- dir = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PR,
- &dir_lockh);
- if (IS_ERR(dir)) {
- LBUG();
- GOTO(out_setattr, rc = PTR_ERR(dir));
- }
-
- name = lustre_msg_buf(req->rq_reqmsg, offset + 1);
- namelen = req->rq_reqmsg->buflens[offset + 1] - 1;
- de = mds_name2locked_dentry(obd, dir, NULL, name, namelen,
- 0, &child_lockh, LCK_PR);
- l_dput(dir);
- if (IS_ERR(de)) {
- LBUG();
- GOTO(out_setattr_de, rc = PTR_ERR(de));
- }
- } else {
- de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
- if (!de || IS_ERR(de)) {
- GOTO(out_setattr_de, rc = PTR_ERR(de));
- }
- }
+ de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
+ if (IS_ERR(de))
+ GOTO(out_setattr, rc = PTR_ERR(de));
inode = de->d_inode;
+
+ LASSERT(inode);
CDEBUG(D_INODE, "ino %lu\n", inode->i_ino);
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE,
}
rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr);
-
- if (offset) {
- body = lustre_msg_buf(req->rq_repmsg, 1);
- mds_pack_inode2fid(&body->fid1, inode);
- mds_pack_inode2body(body, inode);
+ if (rc == 0 && S_ISREG(inode->i_mode) &&
+ req->rq_reqmsg->bufcount > 1) {
+ rc = fsfilt_set_md(obd, inode, handle,
+ lustre_msg_buf(req->rq_reqmsg, 1),
+ req->rq_reqmsg->buflens[1]);
}
- rc = mds_finish_transno(mds, handle, req, rc);
+ body = lustre_msg_buf(req->rq_repmsg, 0);
+ mds_pack_inode2fid(&body->fid1, inode);
+ mds_pack_inode2body(body, inode);
+ rc = mds_finish_transno(mds, handle, req, rc);
err = fsfilt_commit(obd, de->d_inode, handle);
if (err) {
CERROR("error on commit: err = %d\n", err);
}
static int mds_reint_create(struct mds_update_record *rec, int offset,
- struct ptlrpc_request *req)
+ struct ptlrpc_request *req,
+ struct lustre_handle *lh)
{
struct dentry *de = NULL;
struct mds_obd *mds = mds_req2mds(req);
struct inode *dir;
void *handle;
struct lustre_handle lockh;
- int rc = 0, err, lock_mode, type = rec->ur_mode & S_IFMT;
+ int rc = 0, err, type = rec->ur_mode & S_IFMT;
ENTRY;
- /* requests were at offset 2, replies go back at 1 */
- if (offset)
- offset = 1;
-
+ LASSERT(offset == 0);
LASSERT(!strcmp(req->rq_export->exp_obd->obd_type->typ_name, "mds"));
- lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_CW : LCK_PW;
-
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE))
GOTO(out_create, rc = -ESTALE);
- de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, lock_mode, &lockh);
+ de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW, &lockh);
if (IS_ERR(de)) {
rc = PTR_ERR(de);
CERROR("parent lookup error %d\n", rc);
GOTO(out_create, rc);
}
dir = de->d_inode;
- CDEBUG(D_INODE, "parent ino %lu name %s mode %o\n",
+ LASSERT(dir);
+ CDEBUG(D_INODE, "parent ino %lu creating name %s mode %o\n",
dir->i_ino, rec->ur_name, rec->ur_mode);
ldlm_lock_dump_handle(D_OTHER, &lockh);
- down(&dir->i_sem);
dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1);
if (IS_ERR(dchild)) {
- CERROR("child lookup error %ld\n", PTR_ERR(dchild));
- LBUG();
- GOTO(out_create_de, rc = -ESTALE);
- }
-
- if (dchild->d_inode) {
- struct mds_body *body;
- struct inode *inode = dchild->d_inode;
-
- CDEBUG(D_INODE, "child exists (dir %lu, name %s, ino %lu)\n",
- dir->i_ino, rec->ur_name, dchild->d_inode->i_ino);
-
- /* XXX check that mode is correct? */
-
- body = lustre_msg_buf(req->rq_repmsg, offset);
- mds_pack_inode2fid(&body->fid1, inode);
- mds_pack_inode2body(body, inode);
- if (S_ISREG(inode->i_mode))
- mds_pack_md(mds, req, offset + 1, body, inode);
-
- /* This isn't an error for RECREATE. */
- if (rec->ur_opcode & REINT_REPLAYING) {
- CDEBUG(D_INODE, "EEXIST suppressed for REPLAYING\n");
- rc = 0;
- } else {
- rc = -EEXIST;
- }
- GOTO(out_create_dchild, rc);
+ rc = PTR_ERR(dchild);
+ CERROR("child lookup error %d\n", rc);
+ GOTO(out_create_de, rc);
}
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE,
}
out_create_dchild:
l_dput(dchild);
- ldlm_lock_decref(&lockh, lock_mode);
out_create_de:
- up(&dir->i_sem);
+ ldlm_lock_decref(&lockh, LCK_PW);
l_dput(de);
out_create:
req->rq_status = rc;
goto out_create_commit;
}
+/* This function doesn't use ldlm_match_or_enqueue because we're always called
+ * with EX or PW locks, and the MDS is no longer allowed to match write locks,
+ * because they take the place of local semaphores.
+ *
+ * Two locks are taken in numerical order */
+int enqueue_ordered_locks(int lock_mode, struct obd_device *obd,
+ struct ldlm_res_id *p1_res_id,
+ struct ldlm_res_id *p2_res_id,
+ struct lustre_handle *p1_lockh,
+ struct lustre_handle *p2_lockh)
+{
+ struct ldlm_res_id res_id[2];
+ struct lustre_handle *handles[2] = {p1_lockh, p2_lockh};
+ int rc, flags;
+ ENTRY;
+
+ LASSERT(p1_res_id != NULL && p2_res_id != NULL);
+
+ CDEBUG(D_INFO, "locks before: "LPU64"/"LPU64"\n",
+ p1_res_id[0].name[0], p2_res_id[0].name[0]);
+
+ if (p1_res_id->name[0] < p2_res_id->name[0]) {
+ handles[0] = p1_lockh;
+ handles[1] = p2_lockh;
+ res_id[0] = *p1_res_id;
+ res_id[1] = *p2_res_id;
+ } else {
+ handles[1] = p1_lockh;
+ handles[0] = p2_lockh;
+ res_id[1] = *p1_res_id;
+ res_id[0] = *p2_res_id;
+ }
+
+ CDEBUG(D_INFO, "lock order: "LPU64"/"LPU64"\n",
+ p1_res_id[0].name[0], p2_res_id[0].name[0]);
+
+ flags = LDLM_FL_LOCAL_ONLY;
+ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, res_id[0],
+ LDLM_PLAIN, NULL, 0, lock_mode, &flags,
+ ldlm_completion_ast, mds_blocking_ast, NULL,
+ NULL, handles[0]);
+ if (rc != ELDLM_OK)
+ RETURN(-EIO);
+ ldlm_lock_dump_handle(D_OTHER, handles[0]);
+
+ if (memcmp(&res_id[0], &res_id[1], sizeof(res_id[0])) == 0) {
+ memcpy(handles[1], handles[0], sizeof(*(handles[1])));
+ ldlm_lock_addref(handles[1], lock_mode);
+ } else {
+ flags = LDLM_FL_LOCAL_ONLY;
+ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+ res_id[1], LDLM_PLAIN, NULL, 0, lock_mode,
+ &flags, ldlm_completion_ast,
+ mds_blocking_ast, NULL, 0, handles[1]);
+ if (rc != ELDLM_OK) {
+ ldlm_lock_decref(handles[0], lock_mode);
+ RETURN(-EIO);
+ }
+ }
+ ldlm_lock_dump_handle(D_OTHER, handles[1]);
+
+ RETURN(0);
+}
+
static int mds_reint_unlink(struct mds_update_record *rec, int offset,
- struct ptlrpc_request *req)
+ struct ptlrpc_request *req,
+ struct lustre_handle *child_lockh)
{
- struct dentry *de = NULL;
+ struct dentry *dir_de = NULL;
struct dentry *dchild = NULL;
struct mds_obd *mds = mds_req2mds(req);
struct obd_device *obd = req->rq_export->exp_obd;
struct mds_body *body = NULL;
+ struct inode *dir_inode, *child_inode;
+ struct lustre_handle *handle, parent_lockh;
+ struct ldlm_res_id child_res_id = { .name = {0} };
char *name;
- struct inode *dir, *inode;
- struct lustre_handle lockh, child_lockh;
- void *handle;
- int namelen, lock_mode, err, rc = 0;
+ int namelen, err, rc = 0, flags = 0, return_lock = 0;
ENTRY;
- /* a name was supplied by the client; fid1 is the directory */
- lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_PW : LCK_PW;
- de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, lock_mode, &lockh);
- if (IS_ERR(de)) {
- LBUG();
- RETURN(PTR_ERR(de));
- }
-
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
- GOTO(out_unlink, rc = -ENOENT);
+ GOTO(out, rc = -ENOENT);
+
+ /* Step 1: Lookup the parent by FID */
+ dir_de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW,
+ &parent_lockh);
+ if (IS_ERR(dir_de))
+ GOTO(out, rc = PTR_ERR(dir_de));
+ dir_inode = dir_de->d_inode;
+ LASSERT(dir_inode);
+ /* Step 2: Lookup the child */
name = lustre_msg_buf(req->rq_reqmsg, offset + 1);
namelen = req->rq_reqmsg->buflens[offset + 1] - 1;
-#warning "FIXME: if mds_name2locked_dentry decrefs this lock, we must not"
- memcpy(&child_lockh, &lockh, sizeof(child_lockh));
- dchild = mds_name2locked_dentry(obd, de, NULL, name, namelen,
- LCK_EX, &child_lockh, lock_mode);
-
- if (IS_ERR(dchild)) {
- LBUG();
- GOTO(out_unlink, rc = PTR_ERR(dchild));
- }
-
- dir = de->d_inode;
- inode = dchild->d_inode;
- DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu\n", dir->i_ino,
- inode ? inode->i_ino : 0);
- if (!inode) {
+ dchild = lookup_one_len(name, dir_de, namelen);
+ if (IS_ERR(dchild))
+ GOTO(out_step_2a, rc = PTR_ERR(dchild));
+ child_inode = dchild->d_inode;
+ if (child_inode == NULL) {
if (rec->ur_opcode & REINT_REPLAYING) {
CDEBUG(D_INODE,
"child missing (%lu/%s); OK for REPLAYING\n",
- dir->i_ino, rec->ur_name);
+ dir_inode->i_ino, rec->ur_name);
rc = 0;
} else {
CDEBUG(D_INODE,
"child doesn't exist (dir %lu, name %s)\n",
- dir->i_ino, rec->ur_name);
+ dir_inode->i_ino, rec->ur_name);
rc = -ENOENT;
}
- /* going to out_unlink_cancel causes an LBUG, don't know why */
- GOTO(out_unlink_dchild, rc);
+ GOTO(out_step_2b, rc);
}
- if (offset) {
- /* XXX offset? */
- offset = 1;
+ DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu",
+ dir_inode->i_ino, child_inode->i_ino);
- body = lustre_msg_buf(req->rq_repmsg, offset);
- mds_pack_inode2fid(&body->fid1, inode);
- mds_pack_inode2body(body, inode);
- }
+ /* Step 3: Get lock a lock on the child */
+ child_res_id.name[0] = child_inode->i_ino;
+ child_res_id.name[1] = child_inode->i_generation;
+
+ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+ child_res_id, LDLM_PLAIN, NULL, 0, LCK_EX,
+ &flags, ldlm_completion_ast, mds_blocking_ast,
+ NULL, NULL, child_lockh);
+ if (rc != ELDLM_OK)
+ GOTO(out_step_2b, rc);
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE,
- to_kdev_t(dir->i_sb->s_dev));
+ to_kdev_t(dir_inode->i_sb->s_dev));
+
+ /* Slightly magical; see ldlm_intent_policy */
+ if (offset)
+ offset = 1;
+
+ body = lustre_msg_buf(req->rq_repmsg, offset);
+ /* Step 4: Do the unlink: client decides between rmdir/unlink!
+ * (bug 72) */
mds_start_transno(mds);
- switch (rec->ur_mode /* & S_IFMT ? */) {
+ switch (rec->ur_mode & S_IFMT) {
case S_IFDIR:
- handle = fsfilt_start(obd, dir, FSFILT_OP_RMDIR);
+ handle = fsfilt_start(obd, dir_inode, FSFILT_OP_RMDIR);
if (IS_ERR(handle))
- GOTO(out_unlink_cancel_transno, rc = PTR_ERR(handle));
- rc = vfs_rmdir(dir, dchild);
+ GOTO(out_cancel_transno, rc = PTR_ERR(handle));
+ rc = vfs_rmdir(dir_inode, dchild);
break;
case S_IFREG:
- /* get OBD EA data first so client can also destroy object */
- if ((inode->i_mode & S_IFMT) == S_IFREG && offset)
- mds_pack_md(mds, req, offset + 1, body, inode);
+ /* If this is the last reference to this inode, get the OBD EA
+ * data first so the client can destroy OST objects */
+ if ((child_inode->i_mode & S_IFMT) == S_IFREG &&
+ child_inode->i_nlink == 1) {
+ mds_pack_inode2fid(&body->fid1, child_inode);
+ mds_pack_inode2body(body, child_inode);
+ mds_pack_md(obd, req->rq_repmsg, offset + 1,
+ body, child_inode);
+ if (body->valid & OBD_MD_FLEASIZE)
+ return_lock = 1;
+ }
/* no break */
case S_IFLNK:
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
- handle = fsfilt_start(obd, dir, FSFILT_OP_UNLINK);
+ handle = fsfilt_start(obd, dir_inode, FSFILT_OP_UNLINK);
if (IS_ERR(handle))
- GOTO(out_unlink_cancel_transno, rc = PTR_ERR(handle));
- rc = vfs_unlink(dir, dchild);
+ GOTO(out_cancel_transno, rc = PTR_ERR(handle));
+ rc = vfs_unlink(dir_inode, dchild);
break;
default:
CERROR("bad file type %o unlinking %s\n", rec->ur_mode, name);
handle = NULL;
LBUG();
- GOTO(out_unlink_cancel_transno, rc = -EINVAL);
+ GOTO(out_cancel_transno, rc = -EINVAL);
}
rc = mds_finish_transno(mds, handle, req, rc);
- err = fsfilt_commit(obd, dir, handle);
+ err = fsfilt_commit(obd, dir_inode, handle);
+ if (rc != 0 || err != 0) {
+ /* Don't unlink the OST objects if the MDS unlink failed */
+ body->valid = 0;
+ }
if (err) {
CERROR("error on commit: err = %d\n", err);
if (!rc)
rc = err;
}
- EXIT;
-
-out_unlink_cancel:
- ldlm_lock_decref(&child_lockh, LCK_EX);
- err = ldlm_cli_cancel(&child_lockh);
- if (err < 0) {
- CERROR("failed to cancel child inode lock: err = %d\n", err);
- if (!rc)
- rc = -ENOLCK; /*XXX translate LDLM lock error */
- }
-out_unlink_dchild:
+ GOTO(out_step_4, rc);
+ out_step_4:
+ if (rc != 0 || return_lock == 0)
+ ldlm_lock_decref(child_lockh, LCK_EX);
+ out_step_2b:
l_dput(dchild);
- up(&dir->i_sem);
-out_unlink:
- ldlm_lock_decref(&lockh, lock_mode);
- l_dput(de);
+ out_step_2a:
+ ldlm_lock_decref(&parent_lockh, LCK_EX);
+ l_dput(dir_de);
+ out:
req->rq_status = rc;
return 0;
-out_unlink_cancel_transno:
+ out_cancel_transno:
rc = mds_finish_transno(mds, handle, req, rc);
- goto out_unlink_cancel;
+ goto out_step_4;
}
static int mds_reint_link(struct mds_update_record *rec, int offset,
- struct ptlrpc_request *req)
+ struct ptlrpc_request *req, struct lustre_handle *lh)
{
struct obd_device *obd = req->rq_export->exp_obd;
struct dentry *de_src = NULL;
struct dentry *de_tgt_dir = NULL;
struct dentry *dchild = NULL;
struct mds_obd *mds = mds_req2mds(req);
- struct lustre_handle *handle, tgtlockh, srclockh;
- int lock_mode;
- __u64 res_id[3] = { 0 };
- int flags = 0;
- int rc = 0, err;
-
+ struct lustre_handle *handle, tgt_dir_lockh, src_lockh;
+ struct ldlm_res_id src_res_id = { .name = {0} };
+ struct ldlm_res_id tgt_dir_res_id = { .name = {0} };
+ int lock_mode, rc = 0, err;
ENTRY;
- de_src = mds_fid2dentry(mds, rec->ur_fid1, NULL);
- if (IS_ERR(de_src) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_LINK)) {
- GOTO(out_link, rc = -ESTALE);
- }
- /* plan to change the link count on this inode: write lock */
- lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_PW : LCK_PW;
- res_id[0] = de_src->d_inode->i_ino;
- res_id[1] = de_src->d_inode->i_generation;
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_LINK))
+ GOTO(out, rc = -ENOENT);
- rc = ldlm_lock_match(obd->obd_namespace, res_id, LDLM_PLAIN,
- NULL, 0, lock_mode, &srclockh);
- if (rc == 0) {
- LDLM_DEBUG_NOLOCK("enqueue res "LPU64, res_id[0]);
- rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
- res_id, LDLM_PLAIN, NULL, 0, lock_mode,
- &flags, ldlm_completion_ast,
- mds_blocking_ast, NULL, 0, &srclockh);
- if (rc != ELDLM_OK) {
- CERROR("lock enqueue: err: %d\n", rc);
- GOTO(out_link_src_put, rc = -EIO);
- }
- } else {
- ldlm_lock_dump_handle(D_OTHER, &srclockh);
- }
+ /* Step 1: Lookup the source inode and target directory by FID */
+ de_src = mds_fid2dentry(mds, rec->ur_fid1, NULL);
+ if (IS_ERR(de_src))
+ GOTO(out, rc = PTR_ERR(de_src));
de_tgt_dir = mds_fid2dentry(mds, rec->ur_fid2, NULL);
- if (IS_ERR(de_tgt_dir)) {
- GOTO(out_link_src, rc = -ESTALE);
- }
-
- lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_PW : LCK_PW;
- res_id[0] = de_tgt_dir->d_inode->i_ino;
- res_id[1] = de_tgt_dir->d_inode->i_generation;
-
- rc = ldlm_lock_match(obd->obd_namespace, res_id, LDLM_PLAIN,
- NULL, 0, lock_mode, &tgtlockh);
- if (rc == 0) {
- LDLM_DEBUG_NOLOCK("enqueue res "LPU64, res_id[0]);
- rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
- res_id, LDLM_PLAIN, NULL, 0, lock_mode,
- &flags, ldlm_completion_ast,
- mds_blocking_ast, NULL, 0, &tgtlockh);
- if (rc != ELDLM_OK) {
- CERROR("lock enqueue: err: %d\n", rc);
- GOTO(out_link_tgt_dir_put, rc = -EIO);
- }
- } else {
- ldlm_lock_dump_handle(D_OTHER, &tgtlockh);
- }
-
- down(&de_tgt_dir->d_inode->i_sem);
+ if (IS_ERR(de_tgt_dir))
+ GOTO(out_de_src, rc = PTR_ERR(de_tgt_dir));
+
+ CDEBUG(D_INODE, "linking %*s/%s to inode %lu\n",
+ de_tgt_dir->d_name.len, de_tgt_dir->d_name.name, rec->ur_name,
+ de_src->d_inode->i_ino);
+
+ /* Step 2: Take the two locks */
+ lock_mode = LCK_EX;
+ src_res_id.name[0] = de_src->d_inode->i_ino;
+ src_res_id.name[1] = de_src->d_inode->i_generation;
+ tgt_dir_res_id.name[0] = de_tgt_dir->d_inode->i_ino;
+ tgt_dir_res_id.name[1] = de_tgt_dir->d_inode->i_generation;
+
+ rc = enqueue_ordered_locks(LCK_EX, obd, &src_res_id, &tgt_dir_res_id,
+ &src_lockh, &tgt_dir_lockh);
+ if (rc != ELDLM_OK)
+ GOTO(out_tgt_dir, rc = -EIO);
+
+ /* Step 3: Lookup the child */
dchild = lookup_one_len(rec->ur_name, de_tgt_dir, rec->ur_namelen - 1);
if (IS_ERR(dchild)) {
CERROR("child lookup error %ld\n", PTR_ERR(dchild));
- GOTO(out_link_tgt_dir, rc = -ESTALE);
+ GOTO(out_drop_locks, rc = PTR_ERR(dchild));
}
if (dchild->d_inode) {
- struct inode *inode = dchild->d_inode;
- /* in intent case ship back attributes to client */
- if (offset) {
- struct mds_body *body =
- lustre_msg_buf(req->rq_repmsg, 1);
-
- mds_pack_inode2fid(&body->fid1, inode);
- mds_pack_inode2body(body, inode);
- if (S_ISREG(inode->i_mode))
- mds_pack_md(mds, req, 2, body, inode);
- }
if (rec->ur_opcode & REINT_REPLAYING) {
/* XXX verify that the link is to the the right file? */
- rc = 0;
CDEBUG(D_INODE,
"child exists (dir %lu, name %s) (REPLAYING)\n",
de_tgt_dir->d_inode->i_ino, rec->ur_name);
+ rc = 0;
} else {
- rc = -EEXIST;
- CERROR("child exists (dir %lu, name %s)\n",
+ CDEBUG(D_INODE, "child exists (dir %lu, name %s)\n",
de_tgt_dir->d_inode->i_ino, rec->ur_name);
+ rc = -EEXIST;
}
- GOTO(out_link_dchild, rc);
+ GOTO(out_drop_child, rc);
}
+ /* Step 4: Do it. */
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
to_kdev_t(de_src->d_inode->i_sb->s_dev));
if (IS_ERR(handle)) {
rc = PTR_ERR(handle);
mds_finish_transno(mds, handle, req, rc);
- GOTO(out_link_dchild, rc);
+ GOTO(out_drop_child, rc);
}
rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild);
if (!rc)
rc = err;
}
+
EXIT;
-out_link_dchild:
+out_drop_child:
l_dput(dchild);
-out_link_tgt_dir:
- ldlm_lock_decref(&tgtlockh, lock_mode);
-out_link_tgt_dir_put:
- up(&de_tgt_dir->d_inode->i_sem);
+out_drop_locks:
+ ldlm_lock_decref(&src_lockh, lock_mode);
+ ldlm_lock_decref(&tgt_dir_lockh, lock_mode);
+out_tgt_dir:
l_dput(de_tgt_dir);
-out_link_src:
- ldlm_lock_decref(&srclockh, lock_mode);
-out_link_src_put:
+out_de_src:
l_dput(de_src);
-out_link:
+out:
req->rq_status = rc;
return 0;
}
static int mds_reint_rename(struct mds_update_record *rec, int offset,
- struct ptlrpc_request *req)
+ struct ptlrpc_request *req,
+ struct lustre_handle *lockh)
{
struct obd_device *obd = req->rq_export->exp_obd;
struct dentry *de_srcdir = NULL;
struct dentry *de_old = NULL;
struct dentry *de_new = NULL;
struct mds_obd *mds = mds_req2mds(req);
- struct lustre_handle tgtlockh, srclockh, oldhandle;
- int flags = 0, lock_mode, rc = 0, err;
+ struct lustre_handle dlm_handles[4];
+ struct ldlm_res_id p1_res_id = { .name = {0} };
+ struct ldlm_res_id p2_res_id = { .name = {0} };
+ struct ldlm_res_id c1_res_id = { .name = {0} };
+ struct ldlm_res_id c2_res_id = { .name = {0} };
+ int rc = 0, err, lock_count = 3, flags = LDLM_FL_LOCAL_ONLY;
void *handle;
- __u64 res_id[3] = { 0 };
ENTRY;
de_srcdir = mds_fid2dentry(mds, rec->ur_fid1, NULL);
if (IS_ERR(de_srcdir))
- GOTO(out_rename, rc = -ESTALE);
-
- lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_PW : LCK_PW;
- res_id[0] = de_srcdir->d_inode->i_ino;
- res_id[1] = de_srcdir->d_inode->i_generation;
-
- rc = ldlm_lock_match(obd->obd_namespace, res_id, LDLM_PLAIN,
- NULL, 0, lock_mode, &srclockh);
- if (rc == 0) {
- LDLM_DEBUG_NOLOCK("enqueue res "LPU64, res_id[0]);
- rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
- res_id, LDLM_PLAIN, NULL, 0, lock_mode,
- &flags, ldlm_completion_ast,
- mds_blocking_ast, NULL, 0, &srclockh);
- if (rc != ELDLM_OK) {
- CERROR("lock enqueue: err: %d\n", rc);
- GOTO(out_rename_srcput, rc = -EIO);
- }
- } else {
- ldlm_lock_dump_handle(D_OTHER, &srclockh);
- }
-
+ GOTO(out, rc = PTR_ERR(de_srcdir));
de_tgtdir = mds_fid2dentry(mds, rec->ur_fid2, NULL);
if (IS_ERR(de_tgtdir))
- GOTO(out_rename_srcdir, rc = -ESTALE);
+ GOTO(out_put_srcdir, rc = PTR_ERR(de_tgtdir));
+
+ /* The idea here is that we need to get four locks in the end:
+ * one on each parent directory, one on each child. We need to take
+ * these locks in some kind of order (to avoid deadlocks), and the order
+ * I selected is "increasing resource number" order. We need to take
+ * the locks on the parent directories, however, before we can lookup
+ * the children. Thus the following plan:
+ *
+ * 1. Take locks on the parent(s), in order
+ * 2. Lookup the children
+ * 3. Take locks on the children, in order
+ * 4. Execute the rename
+ */
- lock_mode = (req->rq_reqmsg->opc == MDS_REINT) ? LCK_PW : LCK_PW;
- res_id[0] = de_tgtdir->d_inode->i_ino;
- res_id[1] = de_tgtdir->d_inode->i_generation;
+ /* Step 1: Take locks on the parent(s), in order */
+ p1_res_id.name[0] = de_srcdir->d_inode->i_ino;
+ p1_res_id.name[1] = de_srcdir->d_inode->i_generation;
- rc = ldlm_lock_match(obd->obd_namespace, res_id, LDLM_PLAIN,
- NULL, 0, lock_mode, &tgtlockh);
- if (rc == 0) {
- flags = 0;
- LDLM_DEBUG_NOLOCK("enqueue res "LPU64, res_id[0]);
- rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
- res_id, LDLM_PLAIN, NULL, 0, lock_mode,
- &flags, ldlm_completion_ast,
- mds_blocking_ast, NULL, 0, &tgtlockh);
- if (rc != ELDLM_OK) {
- CERROR("lock enqueue: err: %d\n", rc);
- GOTO(out_rename_tgtput, rc = -EIO);
- }
- } else {
- ldlm_lock_dump_handle(D_OTHER, &tgtlockh);
- }
+ p2_res_id.name[0] = de_tgtdir->d_inode->i_ino;
+ p2_res_id.name[1] = de_tgtdir->d_inode->i_generation;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- double_lock(de_tgtdir, de_srcdir);
-#endif
+ rc = enqueue_ordered_locks(LCK_EX, obd, &p1_res_id, &p2_res_id,
+ &(dlm_handles[0]), &(dlm_handles[1]));
+ if (rc != ELDLM_OK)
+ GOTO(out_put_tgtdir, rc);
+
+ /* Step 2: Lookup the children */
de_old = lookup_one_len(rec->ur_name, de_srcdir, rec->ur_namelen - 1);
if (IS_ERR(de_old)) {
CERROR("old child lookup error (%*s): %ld\n",
rec->ur_namelen - 1, rec->ur_name, PTR_ERR(de_old));
- GOTO(out_rename_tgtdir, rc = -ENOENT);
+ GOTO(out_step_2a, rc = PTR_ERR(de_old));
}
+ if (de_old->d_inode == NULL)
+ GOTO(out_step_2b, rc = -ENOENT);
+
de_new = lookup_one_len(rec->ur_tgt, de_tgtdir, rec->ur_tgtlen - 1);
if (IS_ERR(de_new)) {
CERROR("new child lookup error (%*s): %ld\n",
rec->ur_tgtlen - 1, rec->ur_tgt, PTR_ERR(de_new));
- GOTO(out_rename_deold, rc = -ENOENT);
+ GOTO(out_step_2b, rc = PTR_ERR(de_new));
}
- /* in intent case ship back attributes to client */
- if (offset) {
- struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
- struct inode *inode = de_new->d_inode;
-
- if (!inode) {
- body->valid = 0;
- } else {
- mds_pack_inode2fid(&body->fid1, inode);
- mds_pack_inode2body(body, inode);
- if (S_ISREG(inode->i_mode))
- mds_pack_md(mds, req, 2, body, inode);
- }
+ /* Step 3: Take locks on the children */
+ c1_res_id.name[0] = de_old->d_inode->i_ino;
+ c1_res_id.name[1] = de_old->d_inode->i_generation;
+ if (de_new->d_inode == NULL) {
+ flags = LDLM_FL_LOCAL_ONLY;
+ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+ c1_res_id, LDLM_PLAIN, NULL, 0, LCK_EX,
+ &flags, ldlm_completion_ast,
+ mds_blocking_ast, NULL, NULL,
+ &(dlm_handles[2]));
+ lock_count = 3;
+ } else {
+ c2_res_id.name[0] = de_new->d_inode->i_ino;
+ c2_res_id.name[1] = de_new->d_inode->i_generation;
+ rc = enqueue_ordered_locks(LCK_EX, obd, &c1_res_id, &c2_res_id,
+ &(dlm_handles[2]),
+ &(dlm_handles[3]));
+ lock_count = 4;
}
+ if (rc != ELDLM_OK)
+ GOTO(out_step_3, rc);
+ /* Step 4: Execute the rename */
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
to_kdev_t(de_srcdir->d_inode->i_sb->s_dev));
if (IS_ERR(handle)) {
rc = PTR_ERR(handle);
mds_finish_transno(mds, handle, req, rc);
- GOTO(out_rename_denew, rc);
+ GOTO(out_step_4, rc);
}
lock_kernel();
if (!rc)
rc = err;
}
- EXIT;
-out_rename_denew:
+ EXIT;
+ out_step_4:
+ ldlm_lock_decref(&(dlm_handles[2]), LCK_EX);
+ if (lock_count == 4)
+ ldlm_lock_decref(&(dlm_handles[3]), LCK_EX);
+ out_step_3:
l_dput(de_new);
-out_rename_deold:
- if (!rc) {
- res_id[0] = de_old->d_inode->i_ino;
- res_id[1] = de_old->d_inode->i_generation;
- flags = 0;
- /* Take an exclusive lock on the resource that we're
- * about to free, to force everyone to drop their
- * locks. */
- LDLM_DEBUG_NOLOCK("getting EX lock res "LPU64, res_id[0]);
- rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
- res_id, LDLM_PLAIN, NULL, 0, LCK_EX,
- &flags, ldlm_completion_ast,
- mds_blocking_ast, NULL, 0, &oldhandle);
- if (rc)
- CERROR("failed to get child inode lock (child ino "
- LPD64" dir ino %lu)\n",
- res_id[0], de_old->d_inode->i_ino);
- }
-
+ out_step_2b:
l_dput(de_old);
-
- if (!rc) {
- ldlm_lock_decref(&oldhandle, LCK_EX);
- rc = ldlm_cli_cancel(&oldhandle);
- if (rc < 0)
- CERROR("failed to cancel child inode lock ino "
- LPD64": %d\n", res_id[0], rc);
- }
-out_rename_tgtdir:
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- double_up(&de_srcdir->d_inode->i_sem, &de_tgtdir->d_inode->i_sem);
-#endif
- ldlm_lock_decref(&tgtlockh, lock_mode);
-out_rename_tgtput:
+ out_step_2a:
+ ldlm_lock_decref(&(dlm_handles[0]), LCK_EX);
+ ldlm_lock_decref(&(dlm_handles[1]), LCK_EX);
+ out_put_tgtdir:
l_dput(de_tgtdir);
-out_rename_srcdir:
- ldlm_lock_decref(&srclockh, lock_mode);
-out_rename_srcput:
+ out_put_srcdir:
l_dput(de_srcdir);
-out_rename:
+ out:
req->rq_status = rc;
return 0;
}
-typedef int (*mds_reinter) (struct mds_update_record *, int offset,
- struct ptlrpc_request *);
+typedef int (*mds_reinter)(struct mds_update_record *, int offset,
+ struct ptlrpc_request *, struct lustre_handle *);
static mds_reinter reinters[REINT_MAX + 1] = {
[REINT_SETATTR] mds_reint_setattr,
[REINT_UNLINK] mds_reint_unlink,
[REINT_LINK] mds_reint_link,
[REINT_RENAME] mds_reint_rename,
+ [REINT_OPEN] mds_open
};
int mds_reint_rec(struct mds_update_record *rec, int offset,
- struct ptlrpc_request *req)
+ struct ptlrpc_request *req, struct lustre_handle *lockh)
{
struct mds_obd *mds = mds_req2mds(req);
struct obd_run_ctxt saved;
struct obd_ucred uc;
- int realop = rec->ur_opcode & REINT_OPCODE_MASK;
- int rc;
+ int realop = rec->ur_opcode & REINT_OPCODE_MASK, rc;
+ ENTRY;
if (realop < 1 || realop > REINT_MAX) {
CERROR("opcode %d not valid (%sREPLAYING)\n", realop,
uc.ouc_fsuid = rec->ur_fsuid;
uc.ouc_fsgid = rec->ur_fsgid;
uc.ouc_cap = rec->ur_cap;
+ uc.ouc_suppgid = rec->ur_suppgid;
push_ctxt(&saved, &mds->mds_ctxt, &uc);
- rc = reinters[realop] (rec, offset, req);
+ rc = reinters[realop] (rec, offset, req, lockh);
pop_ctxt(&saved, &mds->mds_ctxt, &uc);
- return rc;
+ RETURN(rc);
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Object Devices Class Driver
+ *
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* These are the only exported functions, they provide some generic
* infrastructure for managing object devices
- *
- * Object Devices Class Driver
*/
#define EXPORT_SYMTAB
atomic_t obd_memory;
int obd_memmax;
+/* Root for /proc/lustre */
+struct proc_dir_entry *proc_lustre_root = NULL;
+
/* The following are visible and mutable through /proc/sys/lustre/. */
unsigned long obd_fail_loc;
unsigned long obd_timeout = 100;
char obd_recovery_upcall[128] = "/usr/lib/lustre/ha_assist";
+unsigned long obd_sync_filter; /* = 0, don't sync by default */
/* opening /dev/obd */
static int obd_class_open(struct inode * inode, struct file * file)
{
+ struct obd_class_user_state *ocus;
ENTRY;
- file->private_data = NULL;
+ OBD_ALLOC (ocus, sizeof (*ocus));
+ if (ocus == NULL)
+ return (-ENOMEM);
+
+ INIT_LIST_HEAD (&ocus->ocus_conns);
+ ocus->ocus_current_obd = NULL;
+ file->private_data = ocus;
+
MOD_INC_USE_COUNT;
RETURN(0);
}
+static int
+obd_class_add_user_conn (struct obd_class_user_state *ocus,
+ struct lustre_handle *conn)
+{
+ struct obd_class_user_conn *c;
+
+ /* NB holding obd_conf_sem */
+
+ OBD_ALLOC (c, sizeof (*c));
+ if (ocus == NULL)
+ return (-ENOMEM);
+
+ c->ocuc_conn = *conn;
+ list_add (&c->ocuc_chain, &ocus->ocus_conns);
+ return (0);
+}
+
+static void
+obd_class_remove_user_conn (struct obd_class_user_state *ocus,
+ struct lustre_handle *conn)
+{
+ struct list_head *e;
+ struct obd_class_user_conn *c;
+
+ /* NB holding obd_conf_sem or last reference */
+
+ list_for_each (e, &ocus->ocus_conns) {
+ c = list_entry (e, struct obd_class_user_conn, ocuc_chain);
+ if (!memcmp (conn, &c->ocuc_conn, sizeof (*conn))) {
+ list_del (&c->ocuc_chain);
+ OBD_FREE (c, sizeof (*c));
+ return;
+ }
+ }
+}
+
/* closing /dev/obd */
static int obd_class_release(struct inode * inode, struct file * file)
{
+ struct obd_class_user_state *ocus = file->private_data;
+ struct obd_class_user_conn *c;
ENTRY;
- // XXX drop lsm, connections here
- if (file->private_data)
- file->private_data = NULL;
+ while (!list_empty (&ocus->ocus_conns)) {
+ c = list_entry (ocus->ocus_conns.next,
+ struct obd_class_user_conn, ocuc_chain);
+ list_del (&c->ocuc_chain);
+
+ CDEBUG (D_IOCTL, "Auto-disconnect %p\n", &c->ocuc_conn);
+
+ down (&obd_conf_sem);
+ obd_disconnect (&c->ocuc_conn);
+ up (&obd_conf_sem);
+
+ OBD_FREE (c, sizeof (*c));
+ }
+
+ OBD_FREE (ocus, sizeof (*ocus));
+
MOD_DEC_USE_COUNT;
RETURN(0);
}
{
char *buf = NULL;
struct obd_ioctl_data *data;
- struct obd_device *obd = filp->private_data;
+ struct obd_class_user_state *ocus = filp->private_data;
+ struct obd_device *obd = ocus->ocus_current_obd;
struct lustre_handle conn;
int err = 0, len = 0, serialised = 0;
ENTRY;
case OBD_IOC_BRW_WRITE:
case OBD_IOC_BRW_READ:
case OBD_IOC_GETATTR:
+ case ECHO_IOC_ENQUEUE:
+ case ECHO_IOC_CANCEL:
break;
default:
down(&obd_conf_sem);
}
CDEBUG(D_IOCTL, "device %d\n", data->ioc_dev);
- filp->private_data = &obd_dev[data->ioc_dev];
+ ocus->ocus_current_obd = &obd_dev[data->ioc_dev];
GOTO(out, err=0);
}
status = "-";
l = snprintf(buf2, remains, "%2d %s %s %s %s %d\n",
i, status, obd->obd_type->typ_name,
- obd->obd_name, obd->obd_uuid, obd->obd_type->typ_refcnt);
+ obd->obd_name, obd->obd_uuid.uuid, obd->obd_type->typ_refcnt);
buf2 +=l;
remains -=l;
if (remains <= 0) {
* currently selected device.
*/
int dev;
+ struct obd_uuid uuid;
if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
CERROR("No UUID passed!\n");
}
CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
- dev = class_uuid2dev(data->ioc_inlbuf1);
+ obd_str2uuid(&uuid, data->ioc_inlbuf1);
+ dev = class_uuid2dev(&uuid);
data->ioc_dev = dev;
if (dev == -1) {
CDEBUG(D_IOCTL, "No device for name %s!\n",
int dev = -1;
int i;
- filp->private_data = NULL;
+ ocus->ocus_current_obd = NULL;
for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
struct obd_device *obd = &obd_dev[i];
if (!obd->obd_type) {
- filp->private_data = obd;
+ ocus->ocus_current_obd = obd;
dev = i;
break;
}
OBD_ALLOC(obd->obd_name, len);
if (!obd->obd_name) {
class_put_type(obd->obd_type);
+ obd->obd_type = NULL;
GOTO(out, err = -ENOMEM);
}
memcpy(obd->obd_name, data->ioc_inlbuf2, len);
OBD_FREE(obd->obd_name,
strlen(obd->obd_name) + 1);
class_put_type(obd->obd_type);
+ obd->obd_type = NULL;
GOTO(out, err=-EINVAL);
}
- memcpy(obd->obd_uuid, data->ioc_inlbuf3, len);
+ memcpy(obd->obd_uuid.uuid, data->ioc_inlbuf3, len);
}
/* do the attach */
if (OBP(obd, attach))
CERROR("OBD device %d not attached\n", obd->obd_minor);
GOTO(out, err=-ENODEV);
}
- if (!list_empty(&obd->obd_exports)) {
- if (!data->ioc_inlbuf1 || data->ioc_inlbuf1[0] != 'F') {
- CERROR("OBD device %d (%p) has exports\n",
- obd->obd_minor, obd);
- GOTO(out, err=-EBUSY);
- }
- forcibly_detach_exports(obd);
- }
if (OBP(obd, detach))
err = OBP(obd,detach)(obd);
CERROR("Device %d not attached\n", obd->obd_minor);
GOTO(out, err=-ENODEV);
}
-
- if ( OBT(obd) && OBP(obd, cleanup) )
+ if (!list_empty(&obd->obd_exports)) {
+ if (!data->ioc_inlbuf1 || data->ioc_inlbuf1[0] != 'F') {
+ CERROR("OBD device %d (%p) has exports\n",
+ obd->obd_minor, obd);
+ GOTO(out, err = -EBUSY);
+ }
+ forcibly_detach_exports(obd);
+ }
+ if (OBT(obd) && OBP(obd, cleanup))
err = obd_cleanup(obd);
if (!err) {
obd->obd_flags &= ~OBD_SET_UP;
obd->obd_type->typ_refcnt--;
}
- GOTO(out, err);
+ GOTO(out, err);
}
case OBD_IOC_CONNECT: {
- char * cluuid = "OBD_CLASS_UUID";
+ struct obd_uuid cluuid = { "OBD_CLASS_UUID" };
obd_data2conn(&conn, data);
- err = obd_connect(&conn, obd, cluuid, NULL, NULL);
+ err = obd_connect(&conn, obd, &cluuid, NULL, NULL);
CDEBUG(D_IOCTL, "assigned export "LPX64"\n", conn.addr);
obd_conn2data(data, &conn);
if (err)
GOTO(out, err);
+ err = obd_class_add_user_conn (ocus, &conn);
+ if (err != 0) {
+ obd_disconnect (&conn);
+ GOTO (out, err);
+ }
+
err = copy_to_user((void *)arg, data, sizeof(*data));
- if (err)
- err = -EFAULT;
- // XXX save connection data into file handle
+ if (err != 0) {
+ obd_class_remove_user_conn (ocus, &conn);
+ obd_disconnect (&conn);
+ GOTO (out, err=-EFAULT);
+ }
GOTO(out, err);
}
case OBD_IOC_DISCONNECT: {
obd_data2conn(&conn, data);
+ obd_class_remove_user_conn (ocus, &conn);
err = obd_disconnect(&conn);
GOTO(out, err);
}
+ case OBD_IOC_NO_TRANSNO: {
+ if (!(obd->obd_flags & OBD_ATTACHED)) {
+ CERROR("Device %d not attached\n", obd->obd_minor);
+ GOTO(out, err=-ENODEV);
+ }
+ CDEBUG(D_IOCTL,
+ "disabling committed-transno notifications on %d\n",
+ obd->obd_minor);
+ obd->obd_flags |= OBD_NO_TRANSNO;
+ GOTO(out, err = 0);
+ }
+
default:
obd_data2conn(&conn, data);
EXPORT_SYMBOL(obd_fail_loc);
EXPORT_SYMBOL(obd_timeout);
EXPORT_SYMBOL(obd_recovery_upcall);
+EXPORT_SYMBOL(obd_sync_filter);
EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
+EXPORT_SYMBOL(ptlrpc_abort_inflight_superhack);
+EXPORT_SYMBOL(proc_lustre_root);
EXPORT_SYMBOL(class_register_type);
EXPORT_SYMBOL(class_unregister_type);
obd_sysctl_init();
- err = lprocfs_reg_main();
-
+#ifdef LPROCFS
+ proc_lustre_root = proc_mkdir("lustre", proc_root_fs);
+ if (!proc_lustre_root)
+ printk(KERN_ERR "error registering /proc/fs/lustre\n");
+#else
+ proc_lustre_root = NULL;
+#endif
return 0;
}
static void __exit cleanup_obdclass(void)
{
- int i, err;
+ int i;
ENTRY;
misc_deregister(&obd_psdev);
obd_cleanup_caches();
obd_sysctl_clean();
- err = lprocfs_dereg_main();
+ if (proc_lustre_root) {
+ lprocfs_remove(proc_lustre_root);
+ proc_lustre_root = NULL;
+ }
CERROR("obd mem max: %d leaked: %d\n", obd_memmax,
atomic_read(&obd_memory));
/* Check that we're building against the appropriate version of the Lustre
* kernel patch */
#include <linux/lustre_version.h>
-#if (LUSTRE_KERNEL_VERSION != 5)
-# error Cannot continue: Your Lustre kernel patch is out of date
+#define LUSTRE_SOURCE_VERSION 10
+#if (LUSTRE_KERNEL_VERSION < LUSTRE_SOURCE_VERSION)
+# error Cannot continue: Your Lustre kernel patch is older than the sources
+#elif (LUSTRE_KERNEL_VERSION > LUSTRE_SOURCE_VERSION)
+# error Cannot continue: Your Lustre sources are older than the kernel patch
#endif
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#error "FIXME: this needs to be updated to match fsfilt_extN.c"
+
#define DEBUG_SUBSYSTEM S_FILTER
#include <linux/fs.h>
return rc;
}
+static int fsfilt_ext3_sync(struct super_block *sb)
+{
+ return ext3_force_commit(sb);
+}
+
static struct fsfilt_operations fsfilt_ext3_ops = {
fs_type: "ext3",
fs_owner: THIS_MODULE,
fs_journal_data: fsfilt_ext3_journal_data,
fs_set_last_rcvd: fsfilt_ext3_set_last_rcvd,
fs_statfs: fsfilt_ext3_statfs,
+ fs_sync: fsfilt_ext3_sync,
};
static int __init fsfilt_ext3_init(void)
* lustre/lib/fsfilt_extN.c
* Lustre filesystem abstraction routines
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
* Author: Andreas Dilger <adilger@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
* objcount inode blocks
* 1 superblock
* 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quota files
+ *
+ * 1 EXTN_DATA_TRANS_BLOCKS for the last_rcvd update.
*/
static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
{
ngdblocks = EXTN_SB(sb)->s_gdb_count;
needed += nbitmaps + ngdblocks;
+
+ /* last_rcvd update */
+ needed += EXTN_DATA_TRANS_BLOCKS;
#ifdef CONFIG_QUOTA
/* We assume that there will be 1 bit set in s_dquot.flags for each
}
static ssize_t fsfilt_extN_readpage(struct file *file, char *buf, size_t count,
- loff_t *offset)
+ loff_t *off)
{
struct inode *inode = file->f_dentry->d_inode;
int rc = 0;
if (S_ISREG(inode->i_mode))
- rc = file->f_op->read(file, buf, count, offset);
+ rc = file->f_op->read(file, buf, count, off);
else {
- struct buffer_head *bh;
-
- /* FIXME: this assumes the blocksize == count, but the calling
- * function will detect this as an error for now */
- bh = extN_bread(NULL, inode,
- *offset >> inode->i_sb->s_blocksize_bits,
- 0, &rc);
-
- if (bh) {
- memcpy(buf, bh->b_data, inode->i_blksize);
- brelse(bh);
- rc = inode->i_blksize;
+ const int blkbits = inode->i_sb->s_blocksize_bits;
+ const int blksize = inode->i_sb->s_blocksize;
+
+ CDEBUG(D_EXT2, "reading "LPSZ" at dir %lu+%llu\n",
+ count, inode->i_ino, *off);
+ while (count > 0) {
+ struct buffer_head *bh;
+
+ bh = NULL;
+ if (*off < inode->i_size) {
+ int err = 0;
+
+ bh = extN_bread(NULL, inode, *off >> blkbits,
+ 0, &err);
+
+ CDEBUG(D_EXT2, "read %u@%llu\n", blksize, *off);
+
+ if (bh) {
+ memcpy(buf, bh->b_data, blksize);
+ brelse(bh);
+ } else if (err) {
+ /* XXX in theory we should just fake
+ * this buffer and continue like ext3,
+ * especially if this is a partial read
+ */
+ CERROR("error read dir %lu+%llu: %d\n",
+ inode->i_ino, *off, err);
+ RETURN(err);
+ }
+ }
+ if (!bh) {
+ struct extN_dir_entry_2 *fake = (void *)buf;
+
+ CDEBUG(D_EXT2, "fake %u@%llu\n", blksize, *off);
+ memset(fake, 0, sizeof(*fake));
+ fake->rec_len = cpu_to_le32(blksize);
+ }
+ count -= blksize;
+ buf += blksize;
+ *off += blksize;
+ rc += blksize;
}
}
static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
void *handle, fsfilt_cb_t cb_func)
{
-#ifdef HAVE_JOURNAL_CALLBACK_STATUS
struct fsfilt_cb_data *fcb;
fcb = kmem_cache_alloc(fcb_cache, GFP_NOFS);
journal_callback_set(handle, fsfilt_extN_cb_func,
(struct journal_callback *)fcb);
unlock_kernel();
-#else
-#warning "no journal callback kernel patch, faking it..."
- static long next = 0;
-
- if (time_after(jiffies, next)) {
- CERROR("no journal callback kernel patch, faking it...\n");
- next = jiffies + 300 * HZ;
- }
-
- cb_func(obd, last_rcvd, 0);
-#endif
return 0;
}
return rc;
}
+static int fsfilt_extN_sync(struct super_block *sb)
+{
+ return extN_force_commit(sb);
+}
+
static struct fsfilt_operations fsfilt_extN_ops = {
fs_type: "extN",
fs_owner: THIS_MODULE,
fs_journal_data: fsfilt_extN_journal_data,
fs_set_last_rcvd: fsfilt_extN_set_last_rcvd,
fs_statfs: fsfilt_extN_statfs,
+ fs_sync: fsfilt_extN_sync,
};
static int __init fsfilt_extN_init(void)
return rc;
}
+static int fsfilt_reiserfs_sync(struct super_block *sb)
+{
+ CERROR("not implemented yet\n");
+ return -ENOSYS;
+}
+
static struct fsfilt_operations fsfilt_reiserfs_ops = {
fs_type: "reiserfs",
fs_owner: THIS_MODULE,
fs_journal_data: fsfilt_reiserfs_journal_data,
fs_set_last_rcvd: fsfilt_reiserfs_set_last_rcvd,
fs_statfs: fsfilt_reiserfs_statfs,
+ fs_sync: fsfilt_reiserfs_sync,
};
static int __init fsfilt_reiserfs_init(void)
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
kmem_cache_t *export_cachep = NULL;
int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
+void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp,
+ int dying_import);
/*
* support functions: we could use inter-module communication, but this
char *name)
{
struct obd_type *type;
- int rc;
+ int rc = 0;
ENTRY;
LASSERT(strnlen(name, 1024) < 1024); /* sanity check */
strcpy(type->typ_name, name);
list_add(&type->typ_chain, &obd_types);
- rc = lprocfs_reg_class(type, vars, type);
- if (rc != 0) {
+ type->typ_procroot = lprocfs_register(type->typ_name, proc_lustre_root,
+ vars, type);
+ if (IS_ERR(type->typ_procroot)) {
+ rc = PTR_ERR(type->typ_procroot);
+ type->typ_procroot = NULL;
list_del(&type->typ_chain);
- GOTO(failed, rc);
+ GOTO (failed, rc);
}
RETURN (0);
OBD_FREE(type->typ_ops, sizeof(*type->typ_ops));
RETURN(-EBUSY);
}
- if(type->typ_procroot)
- lprocfs_dereg_class(type);
+
+ if (type->typ_procroot) {
+ lprocfs_remove(type->typ_procroot);
+ type->typ_procroot = NULL;
+ }
list_del(&type->typ_chain);
OBD_FREE(type->typ_name, strlen(name) + 1);
return res;
}
-int class_uuid2dev(char *uuid)
+int class_uuid2dev(struct obd_uuid *uuid)
{
int res = -1;
int i;
for (i = 0; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
- if (strncmp(uuid, obd->obd_uuid, sizeof(obd->obd_uuid)) == 0) {
+ if (strncmp(uuid->uuid, obd->obd_uuid.uuid, sizeof(obd->obd_uuid.uuid)) == 0) {
res = i;
return res;
}
}
-struct obd_device *class_uuid2obd(char *uuid)
+struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
{
int i;
for (i = 0; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
- if (strncmp(uuid, obd->obd_uuid, sizeof(obd->obd_uuid)) == 0)
+ if (strncmp(uuid->uuid, obd->obd_uuid.uuid, sizeof(obd->obd_uuid.uuid)) == 0)
return obd;
}
ptlrpc_put_connection_superhack(exp->exp_connection);
}
+ /* Abort any inflight DLM requests and NULL out their (about to be
+ * freed) import. */
+ if (exp->exp_ldlm_data.led_import.imp_obd)
+ ptlrpc_abort_inflight_superhack(&exp->exp_ldlm_data.led_import,
+ 1);
+
exp->exp_cookie = DEAD_HANDLE_MAGIC;
kmem_cache_free(export_cachep, exp);
/* a connection defines an export context in which preallocation can
be managed. */
int class_connect(struct lustre_handle *conn, struct obd_device *obd,
- obd_uuid_t cluuid)
+ struct obd_uuid *cluuid)
{
struct obd_export * export;
if (conn == NULL) {
return -EINVAL;
}
+ if (cluuid == NULL) {
+ LBUG();
+ return -EINVAL;
+ }
+
export = class_new_export(obd);
if (!export)
return -ENOMEM;
conn->addr = (__u64) (unsigned long)export;
conn->cookie = export->exp_cookie;
+ memcpy(&export->exp_client_uuid, cluuid, sizeof(export->exp_client_uuid));
CDEBUG(D_IOCTL, "connect: addr %Lx cookie %Lx\n",
(long long)conn->addr, (long long)conn->cookie);
CERROR("force disconnecting %s:%s export %p\n",
export->exp_obd->obd_type->typ_name,
export->exp_connection ?
- (char *)export->exp_connection->c_remote_uuid :
+ (char *)export->exp_connection->c_remote_uuid.uuid :
"<unconnected>", export);
rc = obd_disconnect(&conn);
if (rc < 0) {
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Hariharan Thantry <thantry@users.sourceforge.net>
*
* This file is part of Lustre, http://www.lustre.org.
*
* You should have received a copy of the GNU General Public License
* along with Lustre; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Author: Hariharan Thantry thantry@users.sourceforge.net
*/
+
#define EXPORT_SYMTAB
#include <linux/config.h>
#include <linux/module.h>
#include <linux/version.h>
-#include <linux/proc_fs.h>
#include <linux/slab.h>
#include <linux/types.h>
#define DEBUG_SUBSYSTEM S_CLASS
-#include <linux/lustre_lite.h>
+#include <linux/obd_class.h>
#include <linux/lprocfs_status.h>
-#ifdef LPROC_SNMP
-
-#define DEFAULT_MODE 0444
-/*
- * Tokenizer array. Change this array to include special
- * characters for string tokenizing
- */
-const char tok[] = {'/', '\0'};
-
-/*
- * Externs
- */
-extern struct proc_dir_entry proc_root; /* Defined in proc/root.c */
+#ifdef LPROCFS
-/*
- * Globals
- */
-struct proc_dir_entry *proc_lustre_root;
-struct proc_dir_entry *proc_lustre_dev_root;
-struct proc_dir_entry *proc_lustre_fs_root;
-
-struct proc_dir_entry* lprocfs_mkdir(const char* dname,
- struct proc_dir_entry *parent)
-{
- struct proc_dir_entry *child_dir_entry;
- child_dir_entry = proc_mkdir(dname, parent);
- if (!child_dir_entry)
- CERROR("lustre: failed to create /proc entry %s\n", dname);
- return child_dir_entry;
-}
-
-struct proc_dir_entry* lprocfs_srch(struct proc_dir_entry* head,
- const char* name)
+struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
+ const char *name)
{
struct proc_dir_entry* temp;
+
if (!head)
return NULL;
+
temp = head->subdir;
while (temp != NULL) {
if (!strcmp(temp->name, name))
return temp;
+
temp = temp->next;
}
return NULL;
}
-void lprocfs_remove_all(struct proc_dir_entry* root)
+/* lprocfs API calls */
+
+int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
+ void *data)
+{
+ if ((root == NULL) || (list == NULL))
+ return -EINVAL;
+
+ while (list->name) {
+ struct proc_dir_entry *cur_root, *proc;
+ char *pathcopy, *cur, *next;
+ int pathsize = strlen(list->name)+1;
+
+ proc = NULL;
+ cur_root = root;
+
+ /* need copy of path for strsep */
+ OBD_ALLOC(pathcopy, pathsize);
+ if (!pathcopy)
+ return -ENOMEM;
+
+ next = pathcopy;
+ strcpy(pathcopy, list->name);
+
+ while (cur_root && (cur = strsep(&next, "/"))) {
+ if (*cur =='\0') /* skip double/trailing "/" */
+ continue;
+
+ proc = lprocfs_srch(cur_root, cur);
+ CDEBUG(D_OTHER, "cur_root=%s, cur=%s, next=%s, (%s)\n",
+ cur_root->name, cur, next,
+ (proc ? "exists" : "new"));
+ if (next)
+ cur_root = (proc ? proc :
+ proc_mkdir(cur, cur_root));
+ else if (!proc)
+ proc = create_proc_entry(cur, 0444, cur_root);
+ }
+
+ OBD_FREE(pathcopy, pathsize);
+
+ if ((cur_root==NULL) || (proc==NULL)) {
+ CERROR("LprocFS: No memory to create /proc entry %s",
+ list->name);
+ return -ENOMEM;
+ }
+
+ proc->read_proc = list->read_fptr;
+ proc->write_proc = list->write_fptr;
+ proc->data = (list->data ? list->data : data);
+ list++;
+ }
+ return 0;
+}
+
+void lprocfs_remove(struct proc_dir_entry* root)
{
struct proc_dir_entry *temp = root;
struct proc_dir_entry *rm_entry;
}
}
-#define MAX_STRING_SIZE 100
-struct proc_dir_entry* lprocfs_new_dir(struct proc_dir_entry* root,
- const char* string, const char* tok)
+struct proc_dir_entry *lprocfs_register(const char *name,
+ struct proc_dir_entry *parent,
+ struct lprocfs_vars *list, void *data)
{
- struct proc_dir_entry* new_root;
- struct proc_dir_entry* temp_entry;
- char temp_string[MAX_STRING_SIZE+1];
- char* my_str;
- char* mover_str;
-
- strncpy(temp_string, string, MAX_STRING_SIZE);
- temp_string[MAX_STRING_SIZE] = '\0';
-
- new_root = root;
- mover_str = temp_string;
- while ((my_str = strsep(&mover_str, tok))) {
- if (!*my_str)
- continue;
- CDEBUG(D_OTHER, "SEARCH= %s\t, ROOT=%s\n", my_str,
- new_root->name);
- temp_entry = lprocfs_srch(new_root, my_str);
- if (temp_entry == NULL) {
- CDEBUG(D_OTHER, "Adding: %s\n", my_str);
- temp_entry = lprocfs_mkdir(my_str, new_root);
- if (temp_entry == NULL) {
- CDEBUG(D_OTHER,
- "! Did not create new dir %s !!\n",
- my_str);
- return temp_entry;
- }
+ struct proc_dir_entry *newchild;
+
+ newchild = lprocfs_srch(parent, name);
+ if (newchild) {
+ CERROR(" Lproc: Attempting to register %s more than once \n",
+ name);
+ return NULL;
+ }
+
+ newchild = proc_mkdir(name, parent);
+ if (newchild && list) {
+ int rc = lprocfs_add_vars(newchild, list, data);
+ if (rc) {
+ lprocfs_remove(newchild);
+ return ERR_PTR(rc);
}
- new_root = temp_entry;
}
- return new_root;
+ return newchild;
}
-int lprocfs_new_vars(struct proc_dir_entry* root, struct lprocfs_vars* list,
- const char* tok, void* data)
-{
- struct proc_dir_entry *temp_root;
- struct proc_dir_entry *new_leaf;
- struct proc_dir_entry *new_parent;
- char temp_string[MAX_STRING_SIZE+1];
-
- if (list == NULL)
- return 0;
+/* Generic callbacks */
- while (list->name) {
- temp_root = lprocfs_new_dir(root, list->name, tok);
- if (temp_root == NULL) {
- CDEBUG(D_OTHER, "!LProcFS: Mods: No root!");
- return -ENOMEM;
- }
+int lprocfs_rd_u64(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ *eof = 1;
+ return snprintf(page, count, LPU64"\n", *(__u64 *)data);
+}
- /* Convert the last element into a leaf-node */
- strncpy(temp_string, temp_root->name, MAX_STRING_SIZE);
- temp_string[MAX_STRING_SIZE] = '\0';
- new_parent = temp_root->parent;
- remove_proc_entry(temp_root->name, new_parent);
- new_leaf = create_proc_entry(temp_string, DEFAULT_MODE,
- new_parent);
- if (new_leaf == NULL) {
- CERROR("LprocFS: No memory to create /proc entry %s",
- temp_string);
- return -ENOMEM;
- }
- new_leaf->read_proc = list->read_fptr;
- new_leaf->write_proc = list->write_fptr;
- if (data)
- new_leaf->data=data;
- else
- new_leaf->data=list->data;
- list++;
- }
- return 0;
+int lprocfs_rd_uuid(char* page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct obd_device* dev = (struct obd_device*)data;
+ *eof = 1;
+ return snprintf(page, count, "%s\n", dev->obd_uuid.uuid);
}
-#undef MAX_STRING_SIZE
-/*
- * API implementations
- */
-int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *var,
- void *data)
+
+int lprocfs_rd_name(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
- return lprocfs_new_vars(root, var, tok, data);
+ struct obd_device* dev = (struct obd_device *)data;
+
+ *eof = 1;
+ return snprintf(page, count, "%s\n", dev->obd_name);
}
-int lprocfs_reg_obd(struct obd_device *device, struct lprocfs_vars *list,
- void *data)
+int lprocfs_rd_blksize(char* page, char **start, off_t off, int count,
+ int *eof, struct statfs *sfs)
{
- struct proc_dir_entry* this_dev_root;
- int retval;
+ *eof = 1;
- if (lprocfs_srch(device->obd_type->typ_procroot, device->obd_name)) {
- CDEBUG(D_OTHER, "Device with name [%s] exists!",
- device->obd_name);
- return 0;
- }
+ return snprintf(page, count, "%lu\n", sfs->f_bsize);
+}
- /* Obtain this device root */
- this_dev_root = lprocfs_mkdir(device->obd_name,
- device->obd_type->typ_procroot);
+int lprocfs_rd_kbytestotal(char* page, char **start, off_t off, int count,
+ int *eof, struct statfs *sfs)
+{
+ __u32 blk_size = sfs->f_bsize >> 10;
+ __u64 result = sfs->f_blocks;
- device->obd_proc_entry = this_dev_root;
- retval = lprocfs_add_vars(this_dev_root, list, data);
+ while (blk_size >>= 1)
+ result <<= 1;
- return retval;
+ *eof = 1;
+ return snprintf(page, count, LPU64"\n", result);
}
-int lprocfs_dereg_obd(struct obd_device* device)
+int lprocfs_rd_kbytesfree(char* page, char **start, off_t off, int count,
+ int *eof, struct statfs *sfs)
{
- CDEBUG(D_OTHER, "LPROCFS removing device = %s\n", device->obd_name);
+ __u32 blk_size = sfs->f_bsize >> 10;
+ __u64 result = sfs->f_bfree;
- if (device == NULL) {
- CDEBUG(D_OTHER, "! LProcfs: Null pointer !\n");
- return 0;
- }
- if (device->obd_proc_entry == NULL) {
- CDEBUG(D_OTHER, "! Proc entry non-existent !");
- return 0;
- }
- lprocfs_remove_all(device->obd_proc_entry);
- device->obd_proc_entry = NULL;
- if (device->counters)
- OBD_FREE(device->counters, device->cntr_mem_size);
+ while (blk_size >>= 1)
+ result <<= 1;
- return 0;
+ *eof = 1;
+ return snprintf(page, count, LPU64"\n", result);
}
-struct proc_dir_entry* lprocfs_reg_mnt(char* mnt_name)
+int lprocfs_rd_filestotal(char* page, char **start, off_t off, int count,
+ int *eof, struct statfs *sfs)
{
- if (lprocfs_srch(proc_lustre_fs_root, mnt_name)) {
- CDEBUG(D_OTHER, "Mount with same name exists!");
- return 0;
- }
- return lprocfs_mkdir(mnt_name, proc_lustre_fs_root);
+ *eof = 1;
+ return snprintf(page, count, "%ld\n", sfs->f_files);
}
-int lprocfs_dereg_mnt(struct proc_dir_entry* root)
+int lprocfs_rd_filesfree(char* page, char **start, off_t off, int count,
+ int *eof, struct statfs *sfs)
{
- if (root == NULL) {
- CDEBUG(D_OTHER, "Non-existent root!");
- return 0;
- }
- lprocfs_remove_all(root);
- return 0;
+ *eof = 1;
+ return snprintf(page, count, "%ld\n", sfs->f_ffree);
}
-int lprocfs_reg_class(struct obd_type* type, struct lprocfs_vars* list,
- void* data)
+int lprocfs_rd_filegroups(char* page, char **start, off_t off, int count,
+ int *eof, struct statfs *sfs)
{
- struct proc_dir_entry* root;
- int retval;
- root = lprocfs_mkdir(type->typ_name, proc_lustre_dev_root);
- lprocfs_add_vars(root, list, data);
- type->typ_procroot = root;
- retval = lprocfs_add_vars(root, list, data);
- return retval;
+ *eof = 1;
+ return snprintf(page, count, "unimplemented\n");
}
-int lprocfs_dereg_class(struct obd_type* class)
+int lprocfs_rd_server_uuid(char* page, char **start, off_t off, int count,
+ int *eof, void *data)
{
- if (class == NULL) {
- CDEBUG(D_OTHER, "Non-existent class");
- return 0;
- }
- lprocfs_remove_all(class->typ_procroot);
- class->typ_procroot = NULL;
- CDEBUG(D_OTHER, "LPROCFS removed = %s\n", class->typ_name);
- return 0;
-
+ struct obd_device* obd = (struct obd_device*)data;
+ struct client_obd* cli = &obd->u.cli;
+ return snprintf(page, count, "%s\n", cli->cl_target_uuid.uuid);
}
-int lprocfs_reg_main()
+int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
- proc_lustre_root = lprocfs_mkdir("lustre", &proc_root);
- if (proc_lustre_root == NULL) {
- CERROR(" !! Cannot create /proc/lustre !! \n");
- return -EINVAL;
- }
+ struct obd_device *obd = (struct obd_device*)data;
+ struct ptlrpc_connection *conn = obd->u.cli.cl_import.imp_connection;
- proc_lustre_dev_root = lprocfs_mkdir("devices", proc_lustre_root);
- if (proc_lustre_dev_root == NULL) {
- CERROR(" !! Cannot create /proc/lustre/devices !! \n");
- return -EINVAL;
- }
- proc_lustre_fs_root = lprocfs_mkdir("mnt_pnt", proc_lustre_root);
+ *eof = 1;
+ return snprintf(page, count, "%s\n", conn->c_remote_uuid.uuid);
+}
- if (proc_lustre_fs_root == NULL) {
- CERROR(" !! Cannot create /proc/lustre/mnt_pnt !! \n");
- return -EINVAL;
- }
+int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct obd_type* class = (struct obd_type*) data;
- return 0;
+ *eof = 1;
+ return snprintf(page, count, "%d\n", class->typ_refcnt);
}
-int lprocfs_dereg_main()
+int lprocfs_obd_attach(struct obd_device *dev, struct lprocfs_vars *list)
{
- lprocfs_remove_all(proc_lustre_root);
- proc_lustre_root = NULL;
- proc_lustre_dev_root = NULL;
- proc_lustre_fs_root = NULL;
- return 0;
+ int rc = 0;
+ dev->obd_proc_entry = lprocfs_register(dev->obd_name,
+ dev->obd_type->typ_procroot,
+ list, dev);
+ if (IS_ERR(dev->obd_proc_entry)) {
+ rc = PTR_ERR(dev->obd_proc_entry);
+ dev->obd_proc_entry = NULL;
+ }
+ return rc;
}
-
-/*
- * Needs to go...
- */
-int lprocfs_ll_rd(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+int lprocfs_obd_detach(struct obd_device *dev)
{
- __u64 *temp = (__u64 *)data;
- int len;
- len = snprintf(page, count, LPU64"\n", *temp);
- return len;
+ if (dev && dev->obd_proc_entry) {
+ lprocfs_remove(dev->obd_proc_entry);
+ dev->obd_proc_entry = NULL;
+ }
+ return 0;
}
-#endif /* LPROC_SNMP */
+#endif /* LPROCFS*/
-EXPORT_SYMBOL(lprocfs_reg_obd);
-EXPORT_SYMBOL(lprocfs_dereg_obd);
-EXPORT_SYMBOL(lprocfs_reg_main);
-EXPORT_SYMBOL(lprocfs_dereg_main);
-EXPORT_SYMBOL(lprocfs_reg_mnt);
-EXPORT_SYMBOL(lprocfs_dereg_mnt);
+EXPORT_SYMBOL(lprocfs_register);
+EXPORT_SYMBOL(lprocfs_remove);
EXPORT_SYMBOL(lprocfs_add_vars);
-EXPORT_SYMBOL(lprocfs_reg_class);
-EXPORT_SYMBOL(lprocfs_dereg_class);
-EXPORT_SYMBOL(lprocfs_ll_rd);
-
-
+EXPORT_SYMBOL(lprocfs_obd_attach);
+EXPORT_SYMBOL(lprocfs_obd_detach);
+
+EXPORT_SYMBOL(lprocfs_rd_u64);
+EXPORT_SYMBOL(lprocfs_rd_uuid);
+EXPORT_SYMBOL(lprocfs_rd_name);
+EXPORT_SYMBOL(lprocfs_rd_server_uuid);
+EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
+EXPORT_SYMBOL(lprocfs_rd_numrefs);
+
+EXPORT_SYMBOL(lprocfs_rd_blksize);
+EXPORT_SYMBOL(lprocfs_rd_kbytestotal);
+EXPORT_SYMBOL(lprocfs_rd_kbytesfree);
+EXPORT_SYMBOL(lprocfs_rd_filestotal);
+EXPORT_SYMBOL(lprocfs_rd_filesfree);
+EXPORT_SYMBOL(lprocfs_rd_filegroups);
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc. <adilger@clusterfs.com>
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Andreas Dilger <adilger@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
*
#define EXPORT_SYMTAB
#include <linux/lustre_net.h>
#include <linux/obd_support.h>
+#include <linux/obd_class.h>
void obd_statfs_pack(struct obd_statfs *tgt, struct obd_statfs *src)
{
sfs->f_namelen = osfs->os_namelen;
}
+int obd_self_statfs(struct obd_device *obd, struct statfs *sfs)
+{
+ struct lustre_handle conn;
+ struct obd_export *export, *my_export = NULL;
+ struct obd_statfs osfs = { 0 };
+ int rc;
+ ENTRY;
+
+ if (list_empty(&obd->obd_exports)) {
+ export = my_export = class_new_export(obd);
+ if (export == NULL)
+ RETURN(-ENOMEM);
+ } else
+ export = list_entry(obd->obd_exports.next, typeof(*export),
+ exp_obd_chain);
+ conn.addr = (unsigned long)export;
+ conn.cookie = export->exp_cookie;
+
+ rc = obd_statfs(&conn, &osfs);
+ if (!rc)
+ statfs_unpack(sfs, &osfs);
+
+ if (my_export)
+ class_destroy_export(my_export);
+ RETURN(rc);
+}
+
EXPORT_SYMBOL(obd_statfs_pack);
EXPORT_SYMBOL(obd_statfs_unpack);
EXPORT_SYMBOL(statfs_pack);
EXPORT_SYMBOL(statfs_unpack);
+EXPORT_SYMBOL(obd_self_statfs);
#define OBD_TIMEOUT 6 /* RPC timeout before recovery/intr */
/* XXX move to /proc/sys/lustre/recovery? */
#define OBD_UPCALL 7 /* path to recovery upcall */
+/* XXX temporary, as we play with sync osts.. */
+#define OBD_SYNCFILTER 8
#define OBD_VARS_SLOT 2
/* XXX need to lock so we avoid update races with the recovery upcall! */
{OBD_UPCALL, "recovery_upcall", obd_recovery_upcall, 128, 0644, NULL,
&proc_dostring, &sysctl_string },
+ {OBD_SYNCFILTER, "filter_sync_on_commit", &obd_sync_filter, sizeof(int),
+ 0644, NULL, &proc_dointvec},
{ 0 }
};
memcpy(out+10, uu->node, 6);
}
-int class_uuid_parse(obd_uuid_t in, class_uuid_t uu)
+int class_uuid_parse(struct obd_uuid in, class_uuid_t uu)
{
struct uuid uuid;
int i;
}
#endif
-void class_uuid_unparse(class_uuid_t uu, obd_uuid_t out)
+void class_uuid_unparse(class_uuid_t uu, struct obd_uuid *out)
{
struct uuid uuid;
uuid_unpack(uu, &uuid);
- sprintf(out,
+ sprintf(out->uuid,
"%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
* Author: Peter Braam <braam@clusterfs.com>
* Author: Andreas Dilger <adilger@clusterfs.com>
*
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#define OBDECHO_VERSION "1.0"
-
#define EXPORT_SYMTAB
#include <linux/version.h>
#include <linux/lustre_dlm.h>
#include <linux/lprocfs_status.h>
-static atomic_t echo_page_rws;
-static atomic_t echo_getattrs;
-
-#define ECHO_PROC_STAT "sys/obdecho"
-#define ECHO_INIT_OBJID 0x1000000000000000ULL
+#define ECHO_INIT_OBJID 0x1000000000000000ULL
+#define ECHO_HANDLE_MAGIC 0xabcd0123fedc9876ULL
+
+#define ECHO_OBJECT0_NPAGES 16
+static struct page *echo_object0_pages[ECHO_OBJECT0_NPAGES];
+
+/* should be generic per-obd stats... */
+struct xprocfs_io_stat {
+ __u64 st_read_bytes;
+ __u64 st_read_reqs;
+ __u64 st_write_bytes;
+ __u64 st_write_reqs;
+ __u64 st_getattr_reqs;
+ __u64 st_setattr_reqs;
+ __u64 st_create_reqs;
+ __u64 st_destroy_reqs;
+ __u64 st_statfs_reqs;
+ __u64 st_open_reqs;
+ __u64 st_close_reqs;
+ __u64 st_punch_reqs;
+};
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
+static struct xprocfs_io_stat xprocfs_iostats[NR_CPUS];
+static struct proc_dir_entry *xprocfs_dir;
+
+#define XPROCFS_BUMP_MYCPU_IOSTAT(field, count) \
+do { \
+ xprocfs_iostats[smp_processor_id()].field += (count); \
+} while (0)
+
+#define DECLARE_XPROCFS_SUM_STAT(field) \
+static long long \
+xprocfs_sum_##field (void) \
+{ \
+ long long stat = 0; \
+ int i; \
+ \
+ for (i = 0; i < smp_num_cpus; i++) \
+ stat += xprocfs_iostats[i].field; \
+ return (stat); \
+}
-int echo_proc_read(char *page, char **start, off_t off, int count, int *eof,
- void *data)
+DECLARE_XPROCFS_SUM_STAT (st_read_bytes)
+DECLARE_XPROCFS_SUM_STAT (st_read_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_write_bytes)
+DECLARE_XPROCFS_SUM_STAT (st_write_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_getattr_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_setattr_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_create_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_destroy_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_statfs_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_open_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_close_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_punch_reqs)
+
+static int
+xprocfs_rd_stat (char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
- long long attrs = atomic_read(&echo_getattrs);
- long long pages = atomic_read(&echo_page_rws);
- int len;
-
+ long long (*fn)(void) = (long long(*)(void))data;
+ int len;
+
*eof = 1;
if (off != 0)
return (0);
- len = sprintf(page, "%Ld %Ld\n", attrs, pages);
-
+ len = snprintf (page, count, "%Ld\n", fn());
*start = page;
return (len);
}
+
-int echo_proc_write(struct file *file, const char *ubuffer,
- unsigned long count, void *data)
+static void
+xprocfs_add_stat(char *name, long long (*fn)(void))
{
- /* Ignore what we've been asked to write, and just zero the counters */
- atomic_set (&echo_page_rws, 0);
- atomic_set (&echo_getattrs, 0);
+ struct proc_dir_entry *entry;
+
+ entry = create_proc_entry (name, S_IFREG|S_IRUGO, xprocfs_dir);
+ if (entry == NULL) {
+ CERROR ("Can't add procfs stat %s\n", name);
+ return;
+ }
- return (count);
+ entry->data = fn;
+ entry->read_proc = xprocfs_rd_stat;
+ entry->write_proc = NULL;
}
-void echo_proc_init(void)
+static void
+xprocfs_init (char *name)
{
- struct proc_dir_entry *entry;
+ char dirname[64];
+
+ snprintf (dirname, sizeof (dirname), "sys/%s", name);
- entry = create_proc_entry(ECHO_PROC_STAT, S_IFREG|S_IRUGO|S_IWUSR,NULL);
-
- if (entry == NULL) {
- CERROR("couldn't create proc entry %s\n", ECHO_PROC_STAT);
+ xprocfs_dir = proc_mkdir (dirname, NULL);
+ if (xprocfs_dir == NULL) {
+ CERROR ("Can't make dir\n");
return;
}
- entry->data = NULL;
- entry->read_proc = echo_proc_read;
- entry->write_proc = echo_proc_write;
+ xprocfs_add_stat ("read_bytes", xprocfs_sum_st_read_bytes);
+ xprocfs_add_stat ("read_reqs", xprocfs_sum_st_read_reqs);
+ xprocfs_add_stat ("write_bytes", xprocfs_sum_st_write_bytes);
+ xprocfs_add_stat ("write_reqs", xprocfs_sum_st_write_reqs);
+ xprocfs_add_stat ("getattr_reqs", xprocfs_sum_st_getattr_reqs);
+ xprocfs_add_stat ("setattr_reqs", xprocfs_sum_st_setattr_reqs);
+ xprocfs_add_stat ("create_reqs", xprocfs_sum_st_create_reqs);
+ xprocfs_add_stat ("destroy_reqs", xprocfs_sum_st_destroy_reqs);
+ xprocfs_add_stat ("statfs_reqs", xprocfs_sum_st_statfs_reqs);
+ xprocfs_add_stat ("open_reqs", xprocfs_sum_st_open_reqs);
+ xprocfs_add_stat ("close_reqs", xprocfs_sum_st_close_reqs);
+ xprocfs_add_stat ("punch_reqs", xprocfs_sum_st_punch_reqs);
}
-void echo_proc_fini(void)
+void xprocfs_fini (void)
{
- remove_proc_entry(ECHO_PROC_STAT, 0);
+ if (xprocfs_dir == NULL)
+ return;
+
+ remove_proc_entry ("read_bytes", xprocfs_dir);
+ remove_proc_entry ("read_reqs", xprocfs_dir);
+ remove_proc_entry ("write_bytes", xprocfs_dir);
+ remove_proc_entry ("write_reqs", xprocfs_dir);
+ remove_proc_entry ("getattr_reqs", xprocfs_dir);
+ remove_proc_entry ("setattr_reqs", xprocfs_dir);
+ remove_proc_entry ("create_reqs", xprocfs_dir);
+ remove_proc_entry ("destroy_reqs", xprocfs_dir);
+ remove_proc_entry ("statfs_reqs", xprocfs_dir);
+ remove_proc_entry ("open_reqs", xprocfs_dir);
+ remove_proc_entry ("close_reqs", xprocfs_dir);
+ remove_proc_entry ("punch_reqs", xprocfs_dir);
+
+ remove_proc_entry (xprocfs_dir->name, xprocfs_dir->parent);
+ xprocfs_dir = NULL;
}
static int echo_connect(struct lustre_handle *conn, struct obd_device *obd,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
return class_connect(conn, obd, cluuid);
}
+static int echo_disconnect(struct lustre_handle *conn)
+{
+ struct obd_export *exp = class_conn2export(conn);
+
+ LASSERT (exp != NULL);
+
+ ldlm_cancel_locks_for_export (exp);
+ return (class_disconnect (conn));
+}
+
static __u64 echo_next_id(struct obd_device *obddev)
{
obd_id id;
}
int echo_create(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md **ea)
+ struct lov_stripe_md **ea, struct obd_trans_info *oti)
{
struct obd_device *obd = class_conn2obd(conn);
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_create_reqs, 1);
+
if (!obd) {
CERROR("invalid client "LPX64"\n", conn->addr);
return -EINVAL;
}
if (!(oa->o_mode && S_IFMT)) {
- CERROR("filter obd: no type!\n");
+ CERROR("echo obd: no type!\n");
return -ENOENT;
}
}
int echo_destroy(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *ea)
+ struct lov_stripe_md *ea, struct obd_trans_info *oti)
{
struct obd_device *obd = class_conn2obd(conn);
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_destroy_reqs, 1);
+
if (!obd) {
CERROR("invalid client "LPX64"\n", conn->addr);
RETURN(-EINVAL);
}
static int echo_open(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *md)
+ struct lov_stripe_md *md, struct obd_trans_info *oti)
{
+ struct lustre_handle *fh = obdo_handle (oa);
+ struct obd_device *obd = class_conn2obd (conn);
+
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_open_reqs, 1);
+
+ if (!obd) {
+ CERROR ("invalid client "LPX64"\n", conn->addr);
+ return (-EINVAL);
+ }
+
+ if (!(oa->o_valid & OBD_MD_FLID)) {
+ CERROR ("obdo missing FLID valid flag: %08x\n", oa->o_valid);
+ return (-EINVAL);
+ }
+
+ fh->addr = oa->o_id;
+ fh->cookie = ECHO_HANDLE_MAGIC;
+
+ oa->o_valid |= OBD_MD_FLHANDLE;
return 0;
}
static int echo_close(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *md)
+ struct lov_stripe_md *md, struct obd_trans_info *oti)
{
+ struct lustre_handle *fh = obdo_handle (oa);
+ struct obd_device *obd = class_conn2obd(conn);
+
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_close_reqs, 1);
+
+ if (!obd) {
+ CERROR("invalid client "LPX64"\n", conn->addr);
+ return (-EINVAL);
+ }
+
+ if (!(oa->o_valid & OBD_MD_FLHANDLE)) {
+ CERROR("obdo missing FLHANDLE valid flag: %08x\n", oa->o_valid);
+ return (-EINVAL);
+ }
+
+ if (fh->cookie != ECHO_HANDLE_MAGIC) {
+ CERROR ("invalid file handle on close: "LPX64"\n", fh->cookie);
+ return (-EINVAL);
+ }
+
return 0;
}
struct obd_device *obd = class_conn2obd(conn);
obd_id id = oa->o_id;
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_getattr_reqs, 1);
+
if (!obd) {
CERROR("invalid client "LPX64"\n", conn->addr);
RETURN(-EINVAL);
RETURN(-EINVAL);
}
- memcpy(oa, &obd->u.echo.oa, sizeof(*oa));
+ obdo_cpy_md(oa, &obd->u.echo.oa, oa->o_valid);
oa->o_id = id;
- oa->o_valid |= OBD_MD_FLID;
-
- atomic_inc(&echo_getattrs);
return 0;
}
static int echo_setattr(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *md)
+ struct lov_stripe_md *md, struct obd_trans_info *oti)
{
struct obd_device *obd = class_conn2obd(conn);
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_setattr_reqs, 1);
+
if (!obd) {
CERROR("invalid client "LPX64"\n", conn->addr);
RETURN(-EINVAL);
int echo_preprw(int cmd, struct lustre_handle *conn, int objcount,
struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb,
- struct niobuf_local *res, void **desc_private)
+ struct niobuf_local *res, void **desc_private, struct obd_trans_info *oti)
{
struct obd_device *obd;
struct niobuf_local *r = res;
int rc = 0;
int i;
-
ENTRY;
+ if ((cmd & OBD_BRW_WRITE) != 0)
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_write_reqs, 1);
+ else
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_read_reqs, 1);
+
obd = class_conn2obd(conn);
if (!obd) {
CERROR("invalid client "LPX64"\n", conn->addr);
for (i = 0; i < objcount; i++, obj++) {
int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL;
- int verify = obj->ioo_id != 0;
+ int isobj0 = obj->ioo_id == 0;
+ int verify = !isobj0;
int j;
for (j = 0 ; j < obj->ioo_bufcnt ; j++, nb++, r++) {
- r->page = alloc_pages(gfp_mask, 0);
- if (!r->page) {
- CERROR("can't get page %d/%d for id "LPU64"\n",
- j, obj->ioo_bufcnt, obj->ioo_id);
- GOTO(preprw_cleanup, rc = -ENOMEM);
+
+ if (isobj0 &&
+ (nb->offset >> PAGE_SHIFT) < ECHO_OBJECT0_NPAGES) {
+ r->page = echo_object0_pages[nb->offset >> PAGE_SHIFT];
+ /* Take extra ref so __free_pages() can be called OK */
+ get_page (r->page);
+ } else {
+ r->page = alloc_pages(gfp_mask, 0);
+ if (r->page == NULL) {
+ CERROR("can't get page %d/%d for id "LPU64"\n",
+ j, obj->ioo_bufcnt, obj->ioo_id);
+ GOTO(preprw_cleanup, rc = -ENOMEM);
+ }
}
+
atomic_inc(&obd->u.echo.eo_prep);
r->offset = nb->offset;
CDEBUG(D_PAGE, "$$$$ get page %p, addr %p@"LPU64"\n",
r->page, r->addr, r->offset);
- if (verify && cmd == OBD_BRW_READ)
- page_debug_setup(r->addr, r->len, r->offset,
- obj->ioo_id);
- else if (verify)
- page_debug_setup(r->addr, r->len,
- 0xecc0ecc0ecc0ecc0,
- 0xecc0ecc0ecc0ecc0);
+ if (cmd == OBD_BRW_READ) {
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_read_bytes, r->len);
+ if (verify)
+ page_debug_setup(r->addr, r->len, r->offset,
+ obj->ioo_id);
+ } else {
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_write_bytes, r->len);
+ if (verify)
+ page_debug_setup(r->addr, r->len,
+ 0xecc0ecc0ecc0ecc0,
+ 0xecc0ecc0ecc0ecc0);
+ }
}
}
CDEBUG(D_PAGE, "%d pages allocated after prep\n",
CERROR("cleaning up %ld pages (%d obdos)\n", (long)(r - res), objcount);
while (r-- > res) {
kunmap(r->page);
+ /* NB if this is an 'object0' page, __free_pages will just
+ * lose the extra ref gained above */
__free_pages(r->page, 0);
atomic_dec(&obd->u.echo.eo_prep);
}
int echo_commitrw(int cmd, struct lustre_handle *conn, int objcount,
struct obd_ioobj *obj, int niocount, struct niobuf_local *res,
- void *desc_private)
+ void *desc_private, struct obd_trans_info *oti)
{
struct obd_device *obd;
struct niobuf_local *r = res;
int rc = 0;
+ int vrc = 0;
int i;
ENTRY;
GOTO(commitrw_cleanup, rc = -EFAULT);
}
- atomic_inc(&echo_page_rws);
-
CDEBUG(D_PAGE, "$$$$ use page %p, addr %p@"LPU64"\n",
r->page, addr, r->offset);
- if (verify)
- page_debug_check("echo", addr, r->len,
- r->offset, obj->ioo_id);
-
+ if (verify) {
+ vrc = page_debug_check("echo", addr, r->len,
+ r->offset, obj->ioo_id);
+ /* check all the pages always */
+ if (vrc != 0 && rc == 0)
+ rc = vrc;
+ }
+
kunmap(page);
+ /* NB see comment above regarding object0 pages */
obd_kmap_put(1);
__free_pages(page, 0);
atomic_dec(&obd->u.echo.eo_prep);
}
CDEBUG(D_PAGE, "%d pages remain after commit\n",
atomic_read(&obd->u.echo.eo_prep));
- RETURN(0);
+ RETURN(rc);
commitrw_cleanup:
CERROR("cleaning up %ld pages (%d obdos)\n",
kunmap(page);
obd_kmap_put(1);
+ /* NB see comment above regarding object0 pages */
__free_pages(page, 0);
atomic_dec(&obd->u.echo.eo_prep);
}
{
ENTRY;
+ spin_lock_init(&obddev->u.echo.eo_lock);
+ obddev->u.echo.eo_lastino = ECHO_INIT_OBJID;
+
obddev->obd_namespace =
ldlm_namespace_new("echo-tgt", LDLM_NAMESPACE_SERVER);
if (obddev->obd_namespace == NULL) {
LBUG();
RETURN(-ENOMEM);
}
- spin_lock_init(&obddev->u.echo.eo_lock);
- obddev->u.echo.eo_lastino = ECHO_INIT_OBJID;
+ ptlrpc_init_client (LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
+ "echo_ldlm_cb_client", &obddev->obd_ldlm_client);
RETURN(0);
}
int echo_attach(struct obd_device *dev, obd_count len, void *data)
{
- return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_init_vars(&lvars);
+ return lprocfs_obd_attach(dev, lvars.obd_vars);
}
int echo_detach(struct obd_device *dev)
{
- return lprocfs_dereg_obd(dev);
+ return lprocfs_obd_detach(dev);
}
static struct obd_ops echo_obd_ops = {
o_attach: echo_attach,
o_detach: echo_detach,
o_connect: echo_connect,
- o_disconnect: class_disconnect,
+ o_disconnect: echo_disconnect,
o_create: echo_create,
o_destroy: echo_destroy,
o_open: echo_open,
extern int echo_client_init(void);
extern void echo_client_cleanup(void);
+static void
+echo_object0_pages_fini (void)
+{
+ int i;
+
+ for (i = 0; i < ECHO_OBJECT0_NPAGES; i++)
+ if (echo_object0_pages[i] != NULL) {
+ __free_pages (echo_object0_pages[i], 0);
+ echo_object0_pages[i] = NULL;
+ }
+}
+
+static int
+echo_object0_pages_init (void)
+{
+ struct page *pg;
+ int i;
+
+ for (i = 0; i < ECHO_OBJECT0_NPAGES; i++) {
+ int gfp_mask = (i < ECHO_OBJECT0_NPAGES/2) ? GFP_KERNEL : GFP_HIGHUSER;
+
+ pg = alloc_pages (gfp_mask, 0);
+ if (pg == NULL) {
+ echo_object0_pages_fini ();
+ return (-ENOMEM);
+ }
+
+ memset (kmap (pg), 0, PAGE_SIZE);
+ kunmap (pg);
+
+ echo_object0_pages[i] = pg;
+ }
+
+ return (0);
+}
+
static int __init obdecho_init(void)
{
+ struct lprocfs_static_vars lvars;
int rc;
- printk(KERN_INFO "Echo OBD driver " OBDECHO_VERSION
- " info@clusterfs.com\n");
+ printk(KERN_INFO "Lustre Echo OBD driver; info@clusterfs.com\n");
+
+ lprocfs_init_vars(&lvars);
- echo_proc_init();
- rc = class_register_type(&echo_obd_ops, status_class_var,
+ xprocfs_init ("echo");
+
+ rc = echo_object0_pages_init ();
+ if (rc != 0)
+ goto failed_0;
+
+ rc = class_register_type(&echo_obd_ops, lvars.module_vars,
OBD_ECHO_DEVICENAME);
- if (rc)
- RETURN(rc);
+ if (rc != 0)
+ goto failed_1;
rc = echo_client_init();
- if (rc)
- class_unregister_type(OBD_ECHO_DEVICENAME);
+ if (rc == 0)
+ RETURN (0);
+ class_unregister_type(OBD_ECHO_DEVICENAME);
+ failed_1:
+ echo_object0_pages_fini ();
+ failed_0:
+ xprocfs_fini ();
+
RETURN(rc);
}
static void __exit obdecho_exit(void)
{
- echo_proc_fini();
echo_client_cleanup();
class_unregister_type(OBD_ECHO_DEVICENAME);
+ echo_object0_pages_fini ();
+ xprocfs_fini ();
}
-MODULE_AUTHOR("Cluster Filesystems Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Testing Echo OBD driver " OBDECHO_VERSION);
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Testing Echo OBD driver");
MODULE_LICENSE("GPL");
module_init(obdecho_init);
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
#include <linux/version.h>
#include <linux/module.h>
#include <linux/fs.h>
+#include <linux/iobuf.h>
+#include <asm/div64.h>
#define DEBUG_SUBSYSTEM S_ECHO
#include <linux/obd_echo.h>
#include <linux/lustre_debug.h>
#include <linux/lprocfs_status.h>
+#include <linux/lustre_lite.h> /* for LL_IOC_LOV_SETSTRIPE */
-static int echo_iocontrol(unsigned int cmd, struct lustre_handle *obdconn, int len,
- void *karg, void *uarg)
+#if 0
+static void
+echo_printk_object (char *msg, struct ec_object *eco)
+{
+ struct lov_stripe_md *lsm = eco->eco_lsm;
+ int i;
+
+ printk (KERN_INFO "%s: object %p: "LPX64", refs %d%s: "LPX64
+ "=%u!%u@%d\n", msg, eco, eco->eco_id, eco->eco_refcount,
+ eco->eco_deleted ? "(deleted) " : "",
+ lsm->lsm_object_id, lsm->lsm_stripe_size,
+ lsm->lsm_stripe_count, lsm->lsm_stripe_offset);
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++)
+ printk (KERN_INFO " [%2u]"LPX64"\n",
+ lsm->lsm_oinfo[i].loi_ost_idx,
+ lsm->lsm_oinfo[i].loi_id);
+}
+#endif
+
+static struct ec_object *
+echo_find_object_locked (struct obd_device *obd, obd_id id)
{
- struct obd_device *obd = class_conn2obd(obdconn);
struct echo_client_obd *ec = &obd->u.echo_client;
- struct obd_ioctl_data *data = karg;
- int rw = OBD_BRW_READ, rc = 0;
- struct lov_stripe_md *lsm = NULL;
+ struct ec_object *eco = NULL;
+ struct list_head *el;
+
+ list_for_each (el, &ec->ec_objects) {
+ eco = list_entry (el, struct ec_object, eco_obj_chain);
+
+ if (eco->eco_id == id)
+ return (eco);
+ }
+ return (NULL);
+}
+
+static int
+echo_copyout_lsm (struct lov_stripe_md *lsm, void *ulsm, int ulsm_nob)
+{
+ int nob;
+
+ nob = offsetof (struct lov_stripe_md, lsm_oinfo[lsm->lsm_stripe_count]);
+ if (nob > ulsm_nob)
+ return (-EINVAL);
+
+ if (copy_to_user (ulsm, lsm, nob))
+ return (-EFAULT);
+
+ return (0);
+}
+
+static int
+echo_copyin_lsm (struct obd_device *obd, struct lov_stripe_md *lsm,
+ void *ulsm, int ulsm_nob)
+{
+ struct echo_client_obd *ec = &obd->u.echo_client;
+ int nob;
+
+ if (ulsm_nob < sizeof (*lsm))
+ return (-EINVAL);
+
+ if (copy_from_user (lsm, ulsm, sizeof (*lsm)))
+ return (-EFAULT);
+
+ nob = lsm->lsm_stripe_count * sizeof (lsm->lsm_oinfo[0]);
+
+ if (ulsm_nob < nob ||
+ lsm->lsm_stripe_count > ec->ec_nstripes ||
+ lsm->lsm_magic != LOV_MAGIC ||
+ (lsm->lsm_stripe_offset != 0 &&
+ lsm->lsm_stripe_offset != 0xffffffff &&
+ lsm->lsm_stripe_offset >= ec->ec_nstripes) ||
+ (lsm->lsm_stripe_size & (PAGE_SIZE - 1)) != 0 ||
+ ((__u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL))
+ return (-EINVAL);
+
+ LASSERT (ec->ec_lsmsize >= sizeof (*lsm) + nob);
+
+ if (copy_from_user(lsm->lsm_oinfo,
+ ((struct lov_stripe_md *)ulsm)->lsm_oinfo, nob))
+ return (-EFAULT);
+
+ return (0);
+}
+
+static struct ec_object *
+echo_allocate_object (struct obd_device *obd)
+{
+ struct echo_client_obd *ec = &obd->u.echo_client;
+ struct ec_object *eco;
+
+ OBD_ALLOC (eco, sizeof (*eco));
+ if (eco == NULL)
+ return (NULL);
+
+ OBD_ALLOC (eco->eco_lsm, ec->ec_lsmsize);
+ if (eco->eco_lsm == NULL) {
+ OBD_FREE (eco, sizeof (*eco));
+ return (NULL);
+ }
+
+ eco->eco_device = obd;
+ eco->eco_deleted = 0;
+ eco->eco_refcount = 0;
+ eco->eco_lsm->lsm_magic = LOV_MAGIC;
+ /* leave stripe count 0 by default */
+
+ return (eco);
+}
+
+static void
+echo_free_object (struct ec_object *eco)
+{
+ struct obd_device *obd = eco->eco_device;
+ struct echo_client_obd *ec = &obd->u.echo_client;
+
+ LASSERT (eco->eco_refcount == 0);
+ OBD_FREE (eco->eco_lsm, ec->ec_lsmsize);
+ OBD_FREE (eco, sizeof (*eco));
+}
+
+static int
+echo_create_object (struct obd_device *obd, int on_target, struct obdo *oa,
+ void *ulsm, int ulsm_nob)
+{
+ struct echo_client_obd *ec = &obd->u.echo_client;
+ struct ec_object *eco2;
+ struct ec_object *eco;
+ struct lov_stripe_md *lsm;
+ int rc;
+ int i;
+
+ if ((oa->o_valid & OBD_MD_FLID) == 0 && /* no obj id */
+ (on_target || /* set_stripe */
+ ec->ec_nstripes != 0)) { /* LOV */
+ CERROR ("No valid oid\n");
+ return (-EINVAL);
+ }
+
+ eco = echo_allocate_object (obd);
+ if (eco == NULL)
+ return (-ENOMEM);
+
+ lsm = eco->eco_lsm;
+
+ if (ulsm != NULL) {
+ rc = echo_copyin_lsm (obd, lsm, ulsm, ulsm_nob);
+ if (rc != 0)
+ goto failed;
+ }
+
+ /* setup object ID here for !on_target and LOV hint */
+ if ((oa->o_valid & OBD_MD_FLID) != 0)
+ eco->eco_id = lsm->lsm_object_id = oa->o_id;
+
+ /* defaults -> actual values */
+ if (lsm->lsm_stripe_offset == 0xffffffff)
+ lsm->lsm_stripe_offset = 0;
+
+ if (lsm->lsm_stripe_count == 0)
+ lsm->lsm_stripe_count = ec->ec_nstripes;
+
+ if (lsm->lsm_stripe_size == 0)
+ lsm->lsm_stripe_size = PAGE_SIZE;
+
+ /* setup stripes: indices + default ids if required */
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ if (lsm->lsm_oinfo[i].loi_id == 0)
+ lsm->lsm_oinfo[i].loi_id = lsm->lsm_object_id;
+
+ lsm->lsm_oinfo[i].loi_ost_idx =
+ (lsm->lsm_stripe_offset + i) % ec->ec_nstripes;
+ }
+
+ if (on_target) {
+ rc = obd_create (&ec->ec_conn, oa, &lsm, NULL);
+ if (rc != 0)
+ goto failed;
+
+ /* See what object ID we were given */
+ LASSERT ((oa->o_valid & OBD_MD_FLID) != 0);
+ eco->eco_id = lsm->lsm_object_id = oa->o_id;
+ }
+
+ spin_lock (&ec->ec_lock);
+
+ eco2 = echo_find_object_locked (obd, oa->o_id);
+ if (eco2 != NULL) { /* conflict */
+ spin_unlock (&ec->ec_lock);
+
+ CERROR ("Can't create object id "LPX64": id already exists%s\n",
+ oa->o_id, on_target ? " (undoing create)" : "");
+
+ if (on_target)
+ obd_destroy (&ec->ec_conn, oa, lsm, NULL);
+
+ rc = -EEXIST;
+ goto failed;
+ }
+
+ list_add (&eco->eco_obj_chain, &ec->ec_objects);
+ spin_unlock (&ec->ec_lock);
+ CDEBUG (D_INFO,
+ "created %p: "LPX64"=%u#%u&%d refs %d del %d\n",
+ eco, eco->eco_id,
+ eco->eco_lsm->lsm_stripe_size,
+ eco->eco_lsm->lsm_stripe_count,
+ eco->eco_lsm->lsm_stripe_offset,
+ eco->eco_refcount, eco->eco_deleted);
+ return (0);
+
+ failed:
+ echo_free_object (eco);
+ return (rc);
+}
+
+static int
+echo_get_object (struct ec_object **ecop, struct obd_device *obd, struct obdo *oa)
+{
+ struct echo_client_obd *ec = &obd->u.echo_client;
+ struct ec_object *eco;
+ struct ec_object *eco2;
+ int rc;
+
+ if ((oa->o_valid & OBD_MD_FLID) == 0)
+ {
+ CERROR ("No valid oid\n");
+ return (-EINVAL);
+ }
+
+ spin_lock (&ec->ec_lock);
+ eco = echo_find_object_locked (obd, oa->o_id);
+ if (eco != NULL) {
+ if (eco->eco_deleted) /* being deleted */
+ return (-EAGAIN); /* (see comment in cleanup) */
+
+ eco->eco_refcount++;
+ spin_unlock (&ec->ec_lock);
+ *ecop = eco;
+ CDEBUG (D_INFO,
+ "found %p: "LPX64"=%u#%u&%d refs %d del %d\n",
+ eco, eco->eco_id,
+ eco->eco_lsm->lsm_stripe_size,
+ eco->eco_lsm->lsm_stripe_count,
+ eco->eco_lsm->lsm_stripe_offset,
+ eco->eco_refcount, eco->eco_deleted);
+ return (0);
+ }
+ spin_unlock (&ec->ec_lock);
+
+ if (ec->ec_nstripes != 0) /* striping required */
+ return (-ENOENT);
+
+ eco = echo_allocate_object (obd);
+ if (eco == NULL)
+ return (-ENOMEM);
+
+ eco->eco_id = eco->eco_lsm->lsm_object_id = oa->o_id;
+
+ spin_lock (&ec->ec_lock);
+
+ eco2 = echo_find_object_locked (obd, oa->o_id);
+ if (eco2 == NULL) { /* didn't race */
+ list_add (&eco->eco_obj_chain, &ec->ec_objects);
+ spin_unlock (&ec->ec_lock);
+ eco->eco_refcount = 1;
+ *ecop = eco;
+ CDEBUG (D_INFO,
+ "created %p: "LPX64"=%u#%u&%d refs %d del %d\n",
+ eco, eco->eco_id,
+ eco->eco_lsm->lsm_stripe_size,
+ eco->eco_lsm->lsm_stripe_count,
+ eco->eco_lsm->lsm_stripe_offset,
+ eco->eco_refcount, eco->eco_deleted);
+ return (0);
+ }
+
+ if (eco2->eco_deleted)
+ rc = -EAGAIN; /* lose race */
+ else {
+ eco2->eco_refcount++; /* take existing */
+ *ecop = eco2;
+ rc = 0;
+ LASSERT (eco2->eco_id == eco2->eco_lsm->lsm_object_id);
+ CDEBUG (D_INFO,
+ "found(2) %p: "LPX64"=%u#%u&%d refs %d del %d\n",
+ eco2, eco2->eco_id,
+ eco2->eco_lsm->lsm_stripe_size,
+ eco2->eco_lsm->lsm_stripe_count,
+ eco2->eco_lsm->lsm_stripe_offset,
+ eco2->eco_refcount, eco2->eco_deleted);
+ }
+
+ spin_unlock (&ec->ec_lock);
+
+ echo_free_object (eco);
+ return (rc);
+}
+
+static void
+echo_put_object (struct ec_object *eco)
+{
+ struct obd_device *obd = eco->eco_device;
+ struct echo_client_obd *ec = &obd->u.echo_client;
+
+ /* Release caller's ref on the object.
+ * delete => mark for deletion when last ref goes
+ */
+
+ spin_lock (&ec->ec_lock);
+
+ eco->eco_refcount--;
+ LASSERT (eco->eco_refcount >= 0);
+
+ if (eco->eco_refcount != 0 ||
+ !eco->eco_deleted) {
+ spin_unlock (&ec->ec_lock);
+ return;
+ }
+
+ spin_unlock (&ec->ec_lock);
+
+ /* NB leave obj in the object list. We must prevent anyone from
+ * attempting to enqueue on this object number until we can be
+ * sure there will be no more lock callbacks.
+ */
+ obd_cancel_unused (&ec->ec_conn, eco->eco_lsm, 0);
+
+ /* now we can let it go */
+ spin_lock (&ec->ec_lock);
+ list_del (&eco->eco_obj_chain);
+ spin_unlock (&ec->ec_lock);
+
+ LASSERT (eco->eco_refcount == 0);
+
+ echo_free_object (eco);
+}
+
+static void
+echo_get_stripe_off_id (struct lov_stripe_md *lsm, obd_off *offp, obd_id *idp)
+{
+ unsigned long stripe_count;
+ unsigned long stripe_size;
+ unsigned long width;
+ unsigned long woffset;
+ int stripe_index;
+ obd_off offset;
+
+ if (lsm->lsm_stripe_count <= 1)
+ return;
+
+ offset = *offp;
+ stripe_size = lsm->lsm_stripe_size;
+ stripe_count = lsm->lsm_stripe_count;
+
+ /* width = # bytes in all stripes */
+ width = stripe_size * stripe_count;
+
+ /* woffset = offset within a width; offset = whole number of widths */
+ woffset = do_div (offset, width);
+
+ stripe_index = woffset / stripe_size;
+
+ *idp = lsm->lsm_oinfo[stripe_index].loi_id;
+ *offp = offset * stripe_size + woffset % stripe_size;
+}
+
+static int
+echo_client_kbrw (struct obd_device *obd, int rw,
+ struct obdo *oa, struct lov_stripe_md *lsm,
+ obd_off offset, obd_size count)
+{
+ struct echo_client_obd *ec = &obd->u.echo_client;
+ struct obd_brw_set *set;
+ obd_count npages;
+ struct brw_page *pga;
+ struct brw_page *pgp;
+ obd_off off;
+ int i;
+ int rc;
+ int verify;
+ int gfp_mask;
+
+ /* oa_id == 0 => speed test (no verification) else...
+ * oa & 1 => use HIGHMEM
+ */
+ verify = (oa->o_id != 0);
+ gfp_mask = ((oa->o_id & 1) == 0) ? GFP_KERNEL : GFP_HIGHUSER;
+
+ LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
+
+ if (count <= 0 ||
+ (count & (PAGE_SIZE - 1)) != 0 ||
+ (lsm != NULL &&
+ lsm->lsm_object_id != oa->o_id))
+ return (-EINVAL);
+
+ set = obd_brw_set_new();
+ if (set == NULL)
+ return (-ENOMEM);
+
+ /* XXX think again with misaligned I/O */
+ npages = count >> PAGE_SHIFT;
+
+ rc = -ENOMEM;
+ OBD_ALLOC(pga, npages * sizeof(*pga));
+ if (pga == NULL)
+ goto out_0;
+
+ for (i = 0, pgp = pga, off = offset;
+ i < npages;
+ i++, pgp++, off += PAGE_SIZE) {
+
+ LASSERT (pgp->pg == NULL); /* for cleanup */
+
+ rc = -ENOMEM;
+ pgp->pg = alloc_pages (gfp_mask, 0);
+ if (pgp->pg == NULL)
+ goto out_1;
+
+ pgp->count = PAGE_SIZE;
+ pgp->off = off;
+ pgp->flag = 0;
+
+ if (verify) {
+ void *addr = kmap(pgp->pg);
+ obd_off stripe_off = off;
+ obd_id stripe_id = oa->o_id;
+
+ if (rw == OBD_BRW_WRITE) {
+ echo_get_stripe_off_id(lsm, &stripe_off,
+ &stripe_id);
+ page_debug_setup(addr, pgp->count,
+ stripe_off, stripe_id);
+ } else {
+ page_debug_setup(addr, pgp->count,
+ 0xdeadbeef00c0ffee,
+ 0xdeadbeef00c0ffee);
+ }
+ kunmap(pgp->pg);
+ }
+ }
+
+ set->brw_callback = ll_brw_sync_wait;
+ rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, set, NULL);
+ if (rc == 0)
+ rc = ll_brw_sync_wait(set, CB_PHASE_START);
+
+ out_1:
+ if (rc != 0)
+ verify = 0;
+
+ for (i = 0, pgp = pga; i < npages; i++, pgp++) {
+ if (pgp->pg == NULL)
+ continue;
+
+ if (verify) {
+ void *addr = kmap(pgp->pg);
+ obd_off stripe_off = pgp->off;
+ obd_id stripe_id = oa->o_id;
+ int vrc;
+
+ echo_get_stripe_off_id (lsm, &stripe_off, &stripe_id);
+ vrc = page_debug_check("test_brw", addr, pgp->count,
+ stripe_off, stripe_id);
+ if (vrc != 0 && rc == 0)
+ rc = vrc;
+
+ kunmap(pgp->pg);
+ }
+ __free_pages(pgp->pg, 0);
+ }
+ OBD_FREE(pga, npages * sizeof(*pga));
+ out_0:
+ obd_brw_set_free(set);
+ return (rc);
+}
+
+static int
+echo_client_ubrw (struct obd_device *obd, int rw,
+ struct obdo *oa, struct lov_stripe_md *lsm,
+ obd_off offset, obd_size count, char *buffer)
+{
+ struct echo_client_obd *ec = &obd->u.echo_client;
+ struct obd_brw_set *set;
+ obd_count npages;
+ struct brw_page *pga;
+ struct brw_page *pgp;
+ obd_off off;
+ struct kiobuf *kiobuf;
+ int i;
+ int rc;
+
+ LASSERT (rw == OBD_BRW_WRITE ||
+ rw == OBD_BRW_READ);
+
+ /* NB: for now, only whole pages, page aligned */
+
+ if (count <= 0 ||
+ ((long)buffer & (PAGE_SIZE - 1)) != 0 ||
+ (count & (PAGE_SIZE - 1)) != 0 ||
+ (lsm != NULL && lsm->lsm_object_id != oa->o_id))
+ return (-EINVAL);
+
+ set = obd_brw_set_new();
+ if (set == NULL)
+ return (-ENOMEM);
+
+ /* XXX think again with misaligned I/O */
+ npages = count >> PAGE_SHIFT;
+
+ rc = -ENOMEM;
+ OBD_ALLOC(pga, npages * sizeof(*pga));
+ if (pga == NULL)
+ goto out_0;
+
+ rc = alloc_kiovec (1, &kiobuf);
+ if (rc != 0)
+ goto out_1;
+
+ rc = map_user_kiobuf ((rw == OBD_BRW_READ) ? READ : WRITE,
+ kiobuf, (unsigned long)buffer, count);
+ if (rc != 0)
+ goto out_2;
+
+ LASSERT (kiobuf->offset == 0);
+ LASSERT (kiobuf->nr_pages == npages);
+
+ for (i = 0, off = offset, pgp = pga;
+ i < npages;
+ i++, off += PAGE_SIZE, pgp++) {
+ pgp->off = off;
+ pgp->pg = kiobuf->maplist[i];
+ pgp->count = PAGE_SIZE;
+ pgp->flag = 0;
+ }
+
+ set->brw_callback = ll_brw_sync_wait;
+ rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, set, NULL);
+
+ if (rc == 0)
+ rc = ll_brw_sync_wait(set, CB_PHASE_START);
+
+ // if (rw == OBD_BRW_READ)
+ // mark_dirty_kiobuf (kiobuf, count);
+
+ unmap_kiobuf (kiobuf);
+ out_2:
+ free_kiovec (1, &kiobuf);
+ out_1:
+ OBD_FREE(pga, npages * sizeof(*pga));
+ out_0:
+ obd_brw_set_free(set);
+ return (rc);
+}
+
+static int
+echo_open (struct obd_export *exp, struct obdo *oa)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct echo_client_obd *ec = &obd->u.echo_client;
+ struct lustre_handle *ufh = obdo_handle (oa);
+ struct ec_open_object *ecoo;
+ struct ec_object *eco;
+ int rc;
+
+ rc = echo_get_object (&eco, obd, oa);
+ if (rc != 0)
+ return (rc);
+
+ rc = -ENOMEM;
+ OBD_ALLOC (ecoo, sizeof (*ecoo));
+ if (ecoo == NULL)
+ goto failed_0;
+
+ rc = obd_open (&ec->ec_conn, oa, eco->eco_lsm, NULL);
+ if (rc != 0)
+ goto failed_1;
+
+ memcpy (&ecoo->ecoo_oa, oa, sizeof (*oa));
+ ecoo->ecoo_object = eco;
+ /* ecoo takes ref from echo_get_object() above */
+
+ spin_lock (&ec->ec_lock);
+
+ list_add (&ecoo->ecoo_exp_chain,
+ &exp->exp_ec_data.eced_open_head);
+
+ ufh->addr = (__u64)((long) ecoo);
+ ufh->cookie = ecoo->ecoo_cookie = ec->ec_unique++;
+
+ spin_unlock (&ec->ec_lock);
+ return (0);
+
+ failed_1:
+ OBD_FREE (ecoo, sizeof (*ecoo));
+ failed_0:
+ echo_put_object (eco);
+ return (rc);
+}
+
+static int
+echo_close (struct obd_export *exp, struct obdo *oa)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct echo_client_obd *ec = &obd->u.echo_client;
+ struct lustre_handle *ufh = obdo_handle (oa);
+ struct ec_open_object *ecoo = NULL;
+ int found = 0;
+ struct list_head *el;
+ int rc;
+
+ if ((oa->o_valid & OBD_MD_FLHANDLE) == 0)
+ return (-EINVAL);
+
+ spin_lock (&ec->ec_lock);
+
+ list_for_each (el, &exp->exp_ec_data.eced_open_head) {
+ ecoo = list_entry (el, struct ec_open_object, ecoo_exp_chain);
+ if ((__u64)((long)ecoo) == ufh->addr) {
+ found = (ecoo->ecoo_cookie == ufh->cookie);
+ if (found)
+ list_del (&ecoo->ecoo_exp_chain);
+ break;
+ }
+ }
+
+ spin_unlock (&ec->ec_lock);
+
+ if (!found)
+ return (-EINVAL);
+
+ rc = obd_close (&ec->ec_conn, &ecoo->ecoo_oa,
+ ecoo->ecoo_object->eco_lsm, NULL);
+
+ echo_put_object (ecoo->ecoo_object);
+ OBD_FREE (ecoo, sizeof (*ecoo));
+
+ return (rc);
+}
+
+static int
+echo_ldlm_callback (struct ldlm_lock *lock, struct ldlm_lock_desc *new,
+ void *data, int flag)
+{
+ struct ec_object *eco = (struct ec_object *)data;
+ struct echo_client_obd *ec = &(eco->eco_device->u.echo_client);
+ struct lustre_handle lockh;
+ struct list_head *el;
+ int found = 0;
+ int rc;
+
+ ldlm_lock2handle (lock, &lockh);
+
+ /* #ifdef this out if we're not feeling paranoid */
+ spin_lock (&ec->ec_lock);
+ list_for_each (el, &ec->ec_objects) {
+ found = (eco == list_entry (el, struct ec_object, eco_obj_chain));
+ if (found)
+ break;
+ }
+ spin_unlock (&ec->ec_lock);
+ LASSERT (found);
+
+ switch (flag) {
+ case LDLM_CB_BLOCKING:
+ CDEBUG (D_INFO, "blocking callback on "LPX64", handle "LPX64"."LPX64"\n",
+ eco->eco_id, lockh.addr, lockh.cookie);
+ rc = ldlm_cli_cancel (&lockh);
+ if (rc != ELDLM_OK)
+ CERROR ("ldlm_cli_cancel failed: %d\n", rc);
+ break;
+
+ case LDLM_CB_CANCELING:
+ CDEBUG (D_INFO, "canceling callback on "LPX64", handle "LPX64"."LPX64"\n",
+ eco->eco_id, lockh.addr, lockh.cookie);
+ break;
+
+ default:
+ LBUG ();
+ }
+
+ return (0);
+}
+
+static int
+echo_enqueue (struct obd_export *exp, struct obdo *oa,
+ int mode, obd_off offset, obd_size nob)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct echo_client_obd *ec = &obd->u.echo_client;
+ struct lustre_handle *ulh = obdo_handle (oa);
+ struct ec_object *eco;
+ struct ec_lock *ecl;
+ int flags;
+ int rc;
+
+ if (!(mode == LCK_PR || mode == LCK_PW))
+ return (-EINVAL);
+
+ if ((offset & (PAGE_SIZE - 1)) != 0 ||
+ (nob & (PAGE_SIZE - 1)) != 0)
+ return (-EINVAL);
+
+ rc = echo_get_object (&eco, obd, oa);
+ if (rc != 0)
+ return (rc);
+
+ rc = -ENOMEM;
+ OBD_ALLOC (ecl, sizeof (*ecl));
+ if (ecl == NULL)
+ goto failed_0;
+
+ ecl->ecl_mode = mode;
+ ecl->ecl_object = eco;
+ ecl->ecl_extent.start = offset;
+ ecl->ecl_extent.end = (nob == 0) ? ((obd_off)-1) : (offset + nob - 1);
+
+ flags = 0;
+ rc = obd_enqueue (&ec->ec_conn, eco->eco_lsm, NULL,
+ LDLM_EXTENT, &ecl->ecl_extent, sizeof (ecl->ecl_extent),
+ mode, &flags, echo_ldlm_callback, eco, sizeof (*eco),
+ &ecl->ecl_handle);
+ if (rc != 0)
+ goto failed_1;
+
+ CDEBUG (D_INFO, "enqueue handle "LPX64"."LPX64"\n",
+ ecl->ecl_handle.addr, ecl->ecl_handle.cookie);
+
+ /* NB ecl takes object ref from echo_get_object() above */
+
+ spin_lock (&ec->ec_lock);
+
+ list_add (&ecl->ecl_exp_chain, &exp->exp_ec_data.eced_locks);
+
+ ulh->addr = (__u64)((long)ecl);
+ ulh->cookie = ecl->ecl_cookie = ec->ec_unique++;
+
+ spin_unlock (&ec->ec_lock);
+
+ oa->o_valid |= OBD_MD_FLHANDLE;
+ return (0);
+
+ failed_1:
+ OBD_FREE (ecl, sizeof (*ecl));
+ failed_0:
+ echo_put_object (eco);
+ return (rc);
+}
+
+static int
+echo_cancel (struct obd_export *exp, struct obdo *oa)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct echo_client_obd *ec = &obd->u.echo_client;
+ struct lustre_handle *ulh = obdo_handle (oa);
+ struct ec_lock *ecl = NULL;
+ int found = 0;
+ struct list_head *el;
+ int rc;
+
+ if ((oa->o_valid & OBD_MD_FLHANDLE) == 0)
+ return (-EINVAL);
+
+ spin_lock (&ec->ec_lock);
+
+ list_for_each (el, &exp->exp_ec_data.eced_locks) {
+ ecl = list_entry (el, struct ec_lock, ecl_exp_chain);
+
+ if ((__u64)((long)ecl) == ulh->addr) {
+ found = (ecl->ecl_cookie == ulh->cookie);
+ if (found)
+ list_del (&ecl->ecl_exp_chain);
+ break;
+ }
+ }
+
+ spin_unlock (&ec->ec_lock);
+
+ if (!found)
+ return (-ENOENT);
+
+ rc = obd_cancel (&ec->ec_conn,
+ ecl->ecl_object->eco_lsm,
+ ecl->ecl_mode,
+ &ecl->ecl_handle);
+
+ echo_put_object (ecl->ecl_object);
+ OBD_FREE (ecl, sizeof (*ecl));
+
+ return (rc);
+}
+
+static int echo_iocontrol(unsigned int cmd, struct lustre_handle *obdconn,
+ int len, void *karg, void *uarg)
+{
+ struct obd_export *exp = class_conn2export (obdconn);
+ struct obd_device *obd;
+ struct echo_client_obd *ec;
+ struct ec_object *eco;
+ struct obd_ioctl_data *data = karg;
+ int rw = OBD_BRW_READ;
+ int rc = 0;
ENTRY;
- if (obd == NULL) {
+ if (exp == NULL) {
CERROR("ioctl: No device\n");
GOTO(out, rc = -EINVAL);
}
- if (data->ioc_inllen1 == sizeof(*lsm)) {
- lsm = (struct lov_stripe_md *)data->ioc_inlbuf1;
- } else if (data->ioc_inllen1 != 0) {
- CERROR("nonzero ioc_inllen1 != sizeof(struct lov_stripe_md)\n");
- GOTO(out, rc = -EINVAL);
- }
+ obd = exp->exp_obd;
+ ec = &obd->u.echo_client;
switch (cmd) {
- case OBD_IOC_CREATE: {
- struct lov_stripe_md *tmp_lsm = NULL;
- rc = obd_create(&ec->conn, &data->ioc_obdo1, &tmp_lsm);
- if (lsm && tmp_lsm ) {
- memcpy(lsm, tmp_lsm, sizeof(*tmp_lsm));
- data->ioc_conn2 = 1;
- }
+ case OBD_IOC_CREATE: /* may create echo object */
+ if (!capable (CAP_SYS_ADMIN))
+ GOTO (out, rc = -EPERM);
+
+ rc = echo_create_object (obd, 1, &data->ioc_obdo1,
+ data->ioc_pbuf1, data->ioc_plen1);
+ GOTO(out, rc);
+ case OBD_IOC_DESTROY:
+ if (!capable (CAP_SYS_ADMIN))
+ GOTO (out, rc = -EPERM);
+
+ rc = echo_get_object (&eco, obd, &data->ioc_obdo1);
+ if (rc == 0) {
+ rc = obd_destroy(&ec->ec_conn, &data->ioc_obdo1,
+ eco->eco_lsm, NULL);
+ if (rc == 0)
+ eco->eco_deleted = 1;
+ echo_put_object(eco);
+ }
GOTO(out, rc);
- }
case OBD_IOC_GETATTR:
- rc = obd_getattr(&ec->conn, &data->ioc_obdo1, lsm);
+ rc = echo_get_object (&eco, obd, &data->ioc_obdo1);
+ if (rc == 0) {
+ rc = obd_getattr(&ec->ec_conn, &data->ioc_obdo1,
+ eco->eco_lsm);
+ echo_put_object(eco);
+ }
GOTO(out, rc);
case OBD_IOC_SETATTR:
- rc = obd_setattr(&ec->conn, &data->ioc_obdo1, lsm);
- GOTO(out, rc);
-
- case OBD_IOC_DESTROY:
- rc = obd_destroy(&ec->conn, &data->ioc_obdo1, lsm);
+ if (!capable (CAP_SYS_ADMIN))
+ GOTO (out, rc = -EPERM);
+
+ rc = echo_get_object (&eco, obd, &data->ioc_obdo1);
+ if (rc == 0) {
+ rc = obd_setattr(&ec->ec_conn, &data->ioc_obdo1,
+ eco->eco_lsm, NULL);
+ echo_put_object(eco);
+ }
GOTO(out, rc);
case OBD_IOC_OPEN:
- rc = obd_open(&ec->conn, &data->ioc_obdo1, lsm);
+ rc = echo_open (exp, &data->ioc_obdo1);
GOTO(out, rc);
case OBD_IOC_CLOSE:
- rc = obd_close(&ec->conn, &data->ioc_obdo1, lsm);
+ rc = echo_close (exp, &data->ioc_obdo1);
GOTO(out, rc);
case OBD_IOC_BRW_WRITE:
+ if (!capable (CAP_SYS_ADMIN))
+ GOTO (out, rc = -EPERM);
+
rw = OBD_BRW_WRITE;
- case OBD_IOC_BRW_READ: {
- struct lov_stripe_md tmp_lsm;
- struct obd_brw_set *set;
- obd_count pages = 0;
- struct brw_page *pga, *pgp;
- __u64 off, id = data->ioc_obdo1.o_id;
- int gfp_mask = (id & 1) ? GFP_HIGHUSER : GFP_KERNEL;
- int j, verify = (id != 0);
-
- if (lsm && lsm->lsm_object_id != id) {
- CERROR("LSM object ID ("LPU64") != id ("LPU64")\n",
- lsm->lsm_object_id, id);
- GOTO(out, rc = -EINVAL);
- }
-
- if (!lsm) {
- memset(&tmp_lsm, 0, sizeof(tmp_lsm));
- lsm = &tmp_lsm;
- lsm->lsm_object_id = id;
+ /* fall through */
+ case OBD_IOC_BRW_READ:
+ rc = echo_get_object (&eco, obd, &data->ioc_obdo1);
+ if (rc == 0) {
+ if (data->ioc_pbuf2 == NULL) // NULL user data pointer
+ rc = echo_client_kbrw(obd, rw, &data->ioc_obdo1,
+ eco->eco_lsm,
+ data->ioc_offset,
+ data->ioc_count);
+ else
+ rc = echo_client_ubrw(obd, rw, &data->ioc_obdo1,
+ eco->eco_lsm,
+ data->ioc_offset,
+ data->ioc_count,
+ data->ioc_pbuf2);
+ echo_put_object(eco);
}
+ GOTO(out, rc);
- if (data->ioc_count < 0) {
- CERROR("invalid buffer size: "LPD64"\n",
- data->ioc_count);
- GOTO(out, rc = -EINVAL);
+ case ECHO_IOC_GET_STRIPE:
+ rc = echo_get_object(&eco, obd, &data->ioc_obdo1);
+ if (rc == 0) {
+ rc = echo_copyout_lsm(eco->eco_lsm, data->ioc_pbuf1,
+ data->ioc_plen1);
+ echo_put_object(eco);
}
+ GOTO(out, rc);
- set = obd_brw_set_new();
- if (set == NULL)
- GOTO(out, rc = -ENOMEM);
-
- pages = data->ioc_count / PAGE_SIZE;
- off = data->ioc_offset;
-
- CDEBUG(D_INODE, "BRW %s with %d pages @ "LPX64"\n",
- rw == OBD_BRW_READ ? "read" : "write", pages, off);
- OBD_ALLOC(pga, pages * sizeof(*pga));
- if (!pga) {
- CERROR("no memory for %d BRW per-page data\n", pages);
- GOTO(brw_free, rc = -ENOMEM);
- }
+ case ECHO_IOC_SET_STRIPE:
+ if (!capable (CAP_SYS_ADMIN))
+ GOTO (out, rc = -EPERM);
- for (j = 0, pgp = pga; j < pages; j++, off += PAGE_SIZE, pgp++){
- pgp->pg = alloc_pages(gfp_mask, 0);
- if (!pgp->pg) {
- CERROR("no memory for brw pages\n");
- GOTO(brw_cleanup, rc = -ENOMEM);
- }
- pgp->count = PAGE_SIZE;
- pgp->off = off;
- pgp->flag = 0;
-
- if (verify) {
- void *addr = kmap(pgp->pg);
-
- if (rw == OBD_BRW_WRITE)
- page_debug_setup(addr, pgp->count,
- pgp->off, id);
- else
- page_debug_setup(addr, pgp->count,
- 0xdeadbeef00c0ffee,
- 0xdeadbeef00c0ffee);
- kunmap(pgp->pg);
+ if (data->ioc_pbuf1 == NULL) { /* unset */
+ rc = echo_get_object(&eco, obd, &data->ioc_obdo1);
+ if (rc == 0) {
+ eco->eco_deleted = 1;
+ echo_put_object(eco);
}
+ } else {
+ rc = echo_create_object(obd, 0, &data->ioc_obdo1,
+ data->ioc_pbuf1, data->ioc_plen1);
}
+ GOTO (out, rc);
+
+ case ECHO_IOC_ENQUEUE:
+ if (!capable (CAP_SYS_ADMIN))
+ GOTO (out, rc = -EPERM);
+
+ rc = echo_enqueue (exp, &data->ioc_obdo1,
+ data->ioc_conn1, /* lock mode */
+ data->ioc_offset, data->ioc_count); /* extent */
+ GOTO (out, rc);
+
+ case ECHO_IOC_CANCEL:
+ rc = echo_cancel (exp, &data->ioc_obdo1);
+ GOTO (out, rc);
- set->brw_callback = ll_brw_sync_wait;
- rc = obd_brw(rw, &ec->conn, lsm, j, pga, set);
- if (rc)
- CERROR("test_brw: error from obd_brw: rc = %d\n", rc);
- else {
- rc = ll_brw_sync_wait(set, CB_PHASE_START);
- if (rc)
- CERROR("test_brw: error from callback: rc = "
- "%d\n", rc);
- }
- EXIT;
- brw_cleanup:
- for (j = 0, pgp = pga; j < pages; j++, pgp++) {
- if (pgp->pg == NULL)
- continue;
-
- if (verify && !rc) {
- void *addr = kmap(pgp->pg);
-
- rc = page_debug_check("test_brw", addr,
- pgp->count, pgp->off, id);
- kunmap(pgp->pg);
- }
- __free_pages(pgp->pg, 0);
- }
- brw_free:
- obd_brw_set_free(set);
- OBD_FREE(pga, pages * sizeof(*pga));
- GOTO(out, rc);
- }
default:
CERROR ("echo_ioctl(): unrecognised ioctl %#x\n", cmd);
GOTO (out, rc = -ENOTTY);
struct obd_ioctl_data* data = buf;
struct echo_client_obd *ec = &obddev->u.echo_client;
struct obd_device *tgt;
+ struct obd_uuid uuid;
+ struct lov_stripe_md *lsm = NULL;
+ struct obd_uuid echo_uuid = { "ECHO_UUID" };
int rc;
ENTRY;
RETURN(-EINVAL);
}
- tgt = class_uuid2obd(data->ioc_inlbuf1);
+ obd_str2uuid(&uuid, data->ioc_inlbuf1);
+ tgt = class_uuid2obd(&uuid);
if (!tgt || !(tgt->obd_flags & OBD_ATTACHED) ||
!(tgt->obd_flags & OBD_SET_UP)) {
CERROR("device not attached or not set up (%d)\n",
RETURN(rc = -EINVAL);
}
- rc = obd_connect(&ec->conn, tgt, NULL, NULL, NULL);
- if (rc)
+ spin_lock_init (&ec->ec_lock);
+ INIT_LIST_HEAD (&ec->ec_objects);
+ ec->ec_unique = 0;
+
+ rc = obd_connect(&ec->ec_conn, tgt, &echo_uuid, NULL, NULL);
+ if (rc) {
CERROR("fail to connect to device %d\n", data->ioc_dev);
+ return (rc);
+ }
+
+ ec->ec_lsmsize = obd_alloc_memmd (&ec->ec_conn, &lsm);
+ if (ec->ec_lsmsize < 0) {
+ CERROR ("Can't get # stripes: %d\n", rc);
+ obd_disconnect (&ec->ec_conn);
+ rc = ec->ec_lsmsize;
+ } else {
+ ec->ec_nstripes = lsm->lsm_stripe_count;
+ obd_free_memmd (&ec->ec_conn, &lsm);
+ }
+
RETURN(rc);
}
static int echo_cleanup(struct obd_device * obddev)
{
+ struct list_head *el;
+ struct ec_object *eco;
struct echo_client_obd *ec = &obddev->u.echo_client;
int rc;
ENTRY;
RETURN(-EBUSY);
}
- rc = obd_disconnect(&ec->conn);
- if (rc) {
- CERROR("fail to disconnect device: %d\n", rc);
- RETURN(-EINVAL);
+ /* XXX assuming sole access */
+ while (!list_empty (&ec->ec_objects)) {
+ el = ec->ec_objects.next;
+ eco = list_entry (el, struct ec_object, eco_obj_chain);
+
+ LASSERT (eco->eco_refcount == 0);
+ eco->eco_refcount = 1;
+ eco->eco_deleted = 1;
+ echo_put_object (eco);
}
- RETURN(0);
+ rc = obd_disconnect (&ec->ec_conn);
+ if (rc != 0)
+ CERROR("fail to disconnect device: %d\n", rc);
+
+ RETURN (rc);
}
static int echo_connect(struct lustre_handle *conn, struct obd_device *src,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
- return class_connect(conn, src, cluuid);
+ struct obd_export *exp;
+ int rc;
+
+ rc = class_connect(conn, src, cluuid);
+ if (rc == 0) {
+ exp = class_conn2export (conn);
+ INIT_LIST_HEAD (&exp->exp_ec_data.eced_open_head);
+ INIT_LIST_HEAD (&exp->exp_ec_data.eced_locks);
+ }
+
+ RETURN (rc);
+}
+
+static int echo_disconnect(struct lustre_handle *conn)
+{
+ struct obd_export *exp = class_conn2export (conn);
+ struct obd_device *obd;
+ struct echo_client_obd *ec;
+ struct ec_open_object *ecoo;
+ struct ec_lock *ecl;
+ int rc;
+
+ if (exp == NULL)
+ return (-EINVAL);
+
+ obd = exp->exp_obd;
+ ec = &obd->u.echo_client;
+
+ /* no more contention on export's lock list */
+ while (!list_empty (&exp->exp_ec_data.eced_locks)) {
+ ecl = list_entry (exp->exp_ec_data.eced_locks.next,
+ struct ec_lock, ecl_exp_chain);
+ list_del (&ecl->ecl_exp_chain);
+
+ rc = obd_cancel (&ec->ec_conn, ecl->ecl_object->eco_lsm,
+ ecl->ecl_mode, &ecl->ecl_handle);
+
+ CERROR ("Cancel lock on object "LPX64" on disconnect (%d)\n",
+ ecl->ecl_object->eco_id, rc);
+
+ echo_put_object (ecl->ecl_object);
+ OBD_FREE (ecl, sizeof (*ecl));
+ }
+
+ /* no more contention on export's open handle list */
+ while (!list_empty (&exp->exp_ec_data.eced_open_head)) {
+ ecoo = list_entry (exp->exp_ec_data.eced_open_head.next,
+ struct ec_open_object, ecoo_exp_chain);
+ list_del (&ecoo->ecoo_exp_chain);
+
+ rc = obd_close (&ec->ec_conn, &ecoo->ecoo_oa,
+ ecoo->ecoo_object->eco_lsm, NULL);
+
+ CDEBUG (D_INFO, "Closed object "LPX64" on disconnect (%d)\n",
+ ecoo->ecoo_oa.o_id, rc);
+
+ echo_put_object (ecoo->ecoo_object);
+ OBD_FREE (ecoo, sizeof (*ecoo));
+ }
+
+ rc = class_disconnect (conn);
+ RETURN (rc);
}
static struct obd_ops echo_obd_ops = {
o_cleanup: echo_cleanup,
o_iocontrol: echo_iocontrol,
o_connect: echo_connect,
- o_disconnect: class_disconnect
+ o_disconnect: echo_disconnect
};
int echo_client_init(void)
{
- extern struct lprocfs_vars status_class_var[];
+ struct lprocfs_static_vars lvars;
- return class_register_type(&echo_obd_ops, status_class_var,
+ lprocfs_init_vars(&lvars);
+ return class_register_type(&echo_obd_ops, lvars.module_vars,
OBD_ECHO_CLIENT_DEVICENAME);
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
*/
#define DEBUG_SUBSYSTEM S_ECHO
-#include <linux/lustre_lite.h>
#include <linux/lprocfs_status.h>
+#include <linux/obd_class.h>
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- int len = 0;
- struct obd_device* dev = (struct obd_device*)data;
- len += snprintf(page, count, "%s\n", dev->obd_uuid);
- return len;
-
-}
-
-int rd_fstype(char* page, char **start, off_t off, int count, int *eof,
+int rd_fstype(char* page, char **start, off_t off, int count, int *eof,
void *data)
{
- int len = 0;
struct obd_device* dev = (struct obd_device*)data;
- len += snprintf(page, count, "%s\n", dev->u.echo.eo_fstype);
- return len;
-
+ int rc = snprintf(page, count, "%s\n", dev->u.echo.eo_fstype);
+ *eof = 1;
+ return rc;
}
-
-struct lprocfs_vars status_var_nm_1[] = {
- {"status/uuid", rd_uuid, 0, 0},
- {"status/fstype", rd_fstype, 0, 0},
- {0}
+struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "fstype", rd_fstype, 0, 0 },
+ { 0 }
};
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_type* class = (struct obd_type*)data;
- int len = 0;
- len += snprintf(page, count, "%d\n", class->typ_refcnt);
- return len;
-}
-
-struct lprocfs_vars status_class_var[] = {
- {"status/num_refs", rd_numrefs, 0, 0},
- {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+ { 0 }
};
+
+#endif /* LPROCFS */
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
*
* linux/fs/obdfilter/filter.c
*
- * Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
* Author: Peter Braam <braam@clusterfs.com>
* Author: Andreas Dilger <adilger@clusterfs.com>
*
*/
/*
- * Invariant: get O/R i_sem for lookup, if needed, before any journal ops
+ * Invariant: Get O/R i_sem for lookup, if needed, before any journal ops
* (which need to get journal_lock, may block if journal full).
+ *
+ * Invariant: Call filter_start_transno() before any journal ops to avoid the
+ * same deadlock problem. We can (and want) to get rid of the
+ * transno sem in favour of the dir/inode i_sem to avoid single
+ * threaded operation on the OST.
*/
#define EXPORT_SYMTAB
#include <linux/lustre_fsfilt.h>
#include <linux/lprocfs_status.h>
-extern struct lprocfs_vars status_class_var[];
-extern struct lprocfs_vars status_var_nm_1[];
static kmem_cache_t *filter_open_cache;
static kmem_cache_t *filter_dentry_cache;
+/* should be generic per-obd stats... */
+struct xprocfs_io_stat {
+ __u64 st_read_bytes;
+ __u64 st_read_reqs;
+ __u64 st_write_bytes;
+ __u64 st_write_reqs;
+ __u64 st_getattr_reqs;
+ __u64 st_setattr_reqs;
+ __u64 st_create_reqs;
+ __u64 st_destroy_reqs;
+ __u64 st_statfs_reqs;
+ __u64 st_open_reqs;
+ __u64 st_close_reqs;
+ __u64 st_punch_reqs;
+};
+
+static struct xprocfs_io_stat xprocfs_iostats[NR_CPUS];
+static struct proc_dir_entry *xprocfs_dir;
+
+#define XPROCFS_BUMP_MYCPU_IOSTAT(field, count) \
+do { \
+ xprocfs_iostats[smp_processor_id()].field += (count); \
+} while (0)
+
+#define DECLARE_XPROCFS_SUM_STAT(field) \
+static long long \
+xprocfs_sum_##field (void) \
+{ \
+ long long stat = 0; \
+ int i; \
+ \
+ for (i = 0; i < smp_num_cpus; i++) \
+ stat += xprocfs_iostats[i].field; \
+ return (stat); \
+}
+
+DECLARE_XPROCFS_SUM_STAT (st_read_bytes)
+DECLARE_XPROCFS_SUM_STAT (st_read_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_write_bytes)
+DECLARE_XPROCFS_SUM_STAT (st_write_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_getattr_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_setattr_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_create_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_destroy_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_statfs_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_open_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_close_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_punch_reqs)
+
+static int
+xprocfs_rd_stat (char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ long long (*fn)(void) = (long long(*)(void))data;
+ int len;
+
+ *eof = 1;
+ if (off != 0)
+ return (0);
+
+ len = snprintf (page, count, "%Ld\n", fn());
+ *start = page;
+ return (len);
+}
+
+
+static void
+xprocfs_add_stat(char *name, long long (*fn)(void))
+{
+ struct proc_dir_entry *entry;
+
+ entry = create_proc_entry (name, S_IFREG|S_IRUGO, xprocfs_dir);
+ if (entry == NULL) {
+ CERROR ("Can't add procfs stat %s\n", name);
+ return;
+ }
+
+ entry->data = fn;
+ entry->read_proc = xprocfs_rd_stat;
+ entry->write_proc = NULL;
+}
+
+static void
+xprocfs_init (char *name)
+{
+ char dirname[64];
+
+ snprintf (dirname, sizeof (dirname), "sys/%s", name);
+
+ xprocfs_dir = proc_mkdir ("sys/obdfilter", NULL);
+ if (xprocfs_dir == NULL) {
+ CERROR ("Can't make dir\n");
+ return;
+ }
+
+ xprocfs_add_stat ("read_bytes", xprocfs_sum_st_read_bytes);
+ xprocfs_add_stat ("read_reqs", xprocfs_sum_st_read_reqs);
+ xprocfs_add_stat ("write_bytes", xprocfs_sum_st_write_bytes);
+ xprocfs_add_stat ("write_reqs", xprocfs_sum_st_write_reqs);
+ xprocfs_add_stat ("getattr_reqs", xprocfs_sum_st_getattr_reqs);
+ xprocfs_add_stat ("setattr_reqs", xprocfs_sum_st_setattr_reqs);
+ xprocfs_add_stat ("create_reqs", xprocfs_sum_st_create_reqs);
+ xprocfs_add_stat ("destroy_reqs", xprocfs_sum_st_destroy_reqs);
+ xprocfs_add_stat ("statfs_reqs", xprocfs_sum_st_statfs_reqs);
+ xprocfs_add_stat ("open_reqs", xprocfs_sum_st_open_reqs);
+ xprocfs_add_stat ("close_reqs", xprocfs_sum_st_close_reqs);
+ xprocfs_add_stat ("punch_reqs", xprocfs_sum_st_punch_reqs);
+}
+
+void xprocfs_fini (void)
+{
+ if (xprocfs_dir == NULL)
+ return;
+
+ remove_proc_entry ("read_bytes", xprocfs_dir);
+ remove_proc_entry ("read_reqs", xprocfs_dir);
+ remove_proc_entry ("write_bytes", xprocfs_dir);
+ remove_proc_entry ("write_reqs", xprocfs_dir);
+ remove_proc_entry ("getattr_reqs", xprocfs_dir);
+ remove_proc_entry ("setattr_reqs", xprocfs_dir);
+ remove_proc_entry ("create_reqs", xprocfs_dir);
+ remove_proc_entry ("destroy_reqs", xprocfs_dir);
+ remove_proc_entry ("statfs_reqs", xprocfs_dir);
+ remove_proc_entry ("open_reqs", xprocfs_dir);
+ remove_proc_entry ("close_reqs", xprocfs_dir);
+ remove_proc_entry ("punch_reqs", xprocfs_dir);
+
+ remove_proc_entry (xprocfs_dir->name, xprocfs_dir->parent);
+ xprocfs_dir = NULL;
+}
+
#define S_SHIFT 12
static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = {
[0] NULL,
return obd_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
+static void filter_last_rcvd_cb(struct obd_device *obd, __u64 last_rcvd,
+ int error)
+{
+ CDEBUG(D_HA, "got callback for last_rcvd "LPD64": rc = %d\n",
+ last_rcvd, error);
+ if (!error && last_rcvd > obd->obd_last_committed)
+ obd->obd_last_committed = last_rcvd;
+}
+
+void filter_start_transno(struct obd_export *export)
+{
+ struct obd_device * obd = export->exp_obd;
+ ENTRY;
+
+ down(&obd->u.filter.fo_transno_sem);
+}
+
+/* Assumes caller has already pushed us into the kernel context. */
+int filter_finish_transno(struct obd_export *export, void *handle,
+ struct obd_trans_info *oti, int rc)
+{
+ __u64 last_rcvd;
+ struct obd_device *obd = export->exp_obd;
+ struct filter_obd *filter = &obd->u.filter;
+ struct filter_export_data *fed = &export->exp_filter_data;
+ struct filter_client_data *fcd = fed->fed_fcd;
+ loff_t off;
+ ssize_t written;
+
+ /* Propagate error code. */
+ if (rc)
+ GOTO(out, rc);
+
+ /* we don't allocate new transnos for replayed requests */
+#if 0
+ /* perhaps if transno already set? or should level be in oti? */
+ if (req->rq_level == LUSTRE_CONN_RECOVD)
+ GOTO(out, rc = 0);
+#endif
+
+ off = FILTER_LR_CLIENT_START + fed->fed_lr_off * FILTER_LR_CLIENT_SIZE;
+
+ last_rcvd = ++filter->fo_fsd->fsd_last_rcvd;
+ if (oti)
+ oti->oti_transno = last_rcvd;
+ fcd->fcd_last_rcvd = cpu_to_le64(last_rcvd);
+ fcd->fcd_mount_count = cpu_to_le64(filter->fo_fsd->fsd_mount_count);
+
+ /* get this from oti */
+#if 0
+ if (oti)
+ fcd->fcd_last_xid = cpu_to_le64(oti->oti_xid);
+ else
+#else
+ fcd->fcd_last_xid = 0;
+#endif
+ fsfilt_set_last_rcvd(obd, last_rcvd, handle, filter_last_rcvd_cb);
+ written = lustre_fwrite(filter->fo_rcvd_filp, (char *)fcd, sizeof(*fcd),
+ &off);
+ CDEBUG(D_INODE, "wrote trans #"LPD64" for client %s at #%d: written = "
+ LPSZ"\n", last_rcvd, fcd->fcd_uuid, fed->fed_lr_off, written);
+
+ if (written == sizeof(*fcd))
+ GOTO(out, rc = 0);
+ CERROR("error writing to last_rcvd file: rc = %d\n", rc);
+ if (written >= 0)
+ GOTO(out, rc = -EIO);
+
+ rc = 0;
+
+ EXIT;
+ out:
+
+ up(&filter->fo_transno_sem);
+ return rc;
+}
+
/* write the pathname into the string */
static int filter_id(char *buf, obd_id id, obd_mode mode)
{
};
#define LAST_RCVD "last_rcvd"
+#define INIT_OBJID 2
+
+/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
+#define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8)
+#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long))
+
+static unsigned long filter_last_rcvd_slots[FILTER_LR_MAX_CLIENT_WORDS];
+
+/* Add client data to the FILTER. We use a bitmap to locate a free space
+ * in the last_rcvd file if cl_off is -1 (i.e. a new client).
+ * Otherwise, we have just read the data from the last_rcvd file and
+ * we know its offset.
+ */
+int filter_client_add(struct filter_obd *filter,
+ struct filter_export_data *fed, int cl_off)
+{
+ int new_client = (cl_off == -1);
+
+ /* the bitmap operations can handle cl_off > sizeof(long) * 8, so
+ * there's no need for extra complication here
+ */
+ if (new_client) {
+ cl_off = find_first_zero_bit(filter_last_rcvd_slots,
+ FILTER_LR_MAX_CLIENTS);
+ repeat:
+ if (cl_off >= FILTER_LR_MAX_CLIENTS) {
+ CERROR("no client slots - fix FILTER_LR_MAX_CLIENTS\n");
+ return -ENOMEM;
+ }
+ if (test_and_set_bit(cl_off, filter_last_rcvd_slots)) {
+ CERROR("FILTER client %d: found bit is set in bitmap\n",
+ cl_off);
+ cl_off = find_next_zero_bit(filter_last_rcvd_slots,
+ FILTER_LR_MAX_CLIENTS,
+ cl_off);
+ goto repeat;
+ }
+ } else {
+ if (test_and_set_bit(cl_off, filter_last_rcvd_slots)) {
+ CERROR("FILTER client %d: bit already set in bitmap!\n",
+ cl_off);
+ LBUG();
+ }
+ }
+
+ CDEBUG(D_INFO, "client at offset %d with UUID '%s' added\n",
+ cl_off, fed->fed_fcd->fcd_uuid);
+
+ fed->fed_lr_off = cl_off;
+
+ if (new_client) {
+ struct obd_run_ctxt saved;
+ loff_t off = FILTER_LR_CLIENT_START +
+ (cl_off * FILTER_LR_CLIENT_SIZE);
+ ssize_t written;
+
+ push_ctxt(&saved, &filter->fo_ctxt, NULL);
+ written = lustre_fwrite(filter->fo_rcvd_filp,
+ (char *)fed->fed_fcd,
+ sizeof(*fed->fed_fcd), &off);
+ pop_ctxt(&saved, &filter->fo_ctxt, NULL);
+
+ if (written != sizeof(*fed->fed_fcd)) {
+ if (written < 0)
+ RETURN(written);
+ RETURN(-EIO);
+ }
+ CDEBUG(D_INFO, "wrote client fcd at off %u (len %u)\n",
+ FILTER_LR_CLIENT_START + (cl_off*FILTER_LR_CLIENT_SIZE),
+ (unsigned int)sizeof(*fed->fed_fcd));
+ }
+ return 0;
+}
+
+int filter_client_free(struct obd_export *exp)
+{
+ struct filter_export_data *fed = &exp->exp_filter_data;
+ struct filter_obd *filter = &exp->exp_obd->u.filter;
+ struct filter_client_data zero_fcd;
+ struct obd_run_ctxt saved;
+ int written;
+ loff_t off;
+
+ if (!fed->fed_fcd)
+ RETURN(0);
+
+ off = FILTER_LR_CLIENT_START + (fed->fed_lr_off*FILTER_LR_CLIENT_SIZE);
+
+ CDEBUG(D_INFO, "freeing client at offset %u (%lld)with UUID '%s'\n",
+ fed->fed_lr_off, off, fed->fed_fcd->fcd_uuid);
+
+ if (!test_and_clear_bit(fed->fed_lr_off, filter_last_rcvd_slots)) {
+ CERROR("FILTER client %u: bit already clear in bitmap!!\n",
+ fed->fed_lr_off);
+ LBUG();
+ }
+
+ memset(&zero_fcd, 0, sizeof zero_fcd);
+ push_ctxt(&saved, &filter->fo_ctxt, NULL);
+ written = lustre_fwrite(filter->fo_rcvd_filp, (const char *)&zero_fcd,
+ sizeof(zero_fcd), &off);
+
+ /* XXX: this write gets lost sometimes, unless this sync is here. */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ fsync_dev(filter->fo_rcvd_filp->f_dentry->d_inode->i_rdev);
+#else
+ file_fsync(filter->fo_rcvd_filp, filter->fo_rcvd_filp->f_dentry, 1);
+#endif
+ pop_ctxt(&saved, &filter->fo_ctxt, NULL);
+
+ if (written != sizeof(zero_fcd)) {
+ CERROR("error zeroing out client %s off %d in %s: %d\n",
+ fed->fed_fcd->fcd_uuid, fed->fed_lr_off, LAST_RCVD,
+ written);
+ } else {
+ CDEBUG(D_INFO,
+ "zeroed disconnecting client %s at off %d ("LPX64")\n",
+ fed->fed_fcd->fcd_uuid, fed->fed_lr_off, off);
+ }
+
+ OBD_FREE(fed->fed_fcd, sizeof(*fed->fed_fcd));
+
+ return 0;
+}
+
+static void filter_unpack_fsd(struct filter_server_data *fsd)
+{
+ fsd->fsd_last_objid = le64_to_cpu(fsd->fsd_last_objid);
+ fsd->fsd_last_rcvd = le64_to_cpu(fsd->fsd_last_rcvd);
+ fsd->fsd_mount_count = le64_to_cpu(fsd->fsd_mount_count);
+}
+
+static void filter_pack_fsd(struct filter_server_data *disk_fsd,
+ struct filter_server_data *fsd)
+{
+ memset(disk_fsd, 0, sizeof(*disk_fsd));
+ memcpy(disk_fsd->fsd_uuid, fsd->fsd_uuid, sizeof(fsd->fsd_uuid));
+ disk_fsd->fsd_last_objid = cpu_to_le64(fsd->fsd_last_objid);
+ disk_fsd->fsd_last_rcvd = cpu_to_le64(fsd->fsd_last_rcvd);
+ disk_fsd->fsd_mount_count = cpu_to_le64(fsd->fsd_mount_count);
+}
+
+static int filter_free_server_data(struct filter_obd *filter)
+{
+ OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd));
+ filter->fo_fsd = NULL;
+
+ return 0;
+}
+
+
+/* assumes caller has already in kernel ctxt */
+static int filter_update_server_data(struct file *filp,
+ struct filter_server_data *fsd)
+{
+ struct filter_server_data disk_fsd;
+ loff_t off = 0;
+ int rc;
+
+ CDEBUG(D_INODE, "server uuid : %s\n", fsd->fsd_uuid);
+ CDEBUG(D_INODE, "server last_objid: "LPU64"\n", fsd->fsd_last_objid);
+ CDEBUG(D_INODE, "server last_rcvd : "LPU64"\n", fsd->fsd_last_rcvd);
+ CDEBUG(D_INODE, "server last_mount: "LPU64"\n", fsd->fsd_mount_count);
+
+ filter_pack_fsd(&disk_fsd, fsd);
+ rc = lustre_fwrite(filp, (char *)&disk_fsd,
+ sizeof(disk_fsd), &off);
+ if (rc != sizeof(disk_fsd)) {
+ CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n",
+ rc);
+ RETURN(-EIO);
+ }
+ RETURN(0);
+}
+
+/* assumes caller has already in kernel ctxt */
+static int filter_init_server_data(struct obd_device *obd,
+ struct file * filp,
+ __u64 init_lastobjid)
+{
+ struct filter_obd *filter = &obd->u.filter;
+ struct filter_server_data *fsd;
+ struct filter_client_data *fcd = NULL;
+ struct inode *inode = filp->f_dentry->d_inode;
+ unsigned long last_rcvd_size = inode->i_size;
+ int cl_off;
+ loff_t off = 0;
+ int rc;
+
+ /* ensure padding in the struct is the correct size */
+ LASSERT (offsetof(struct filter_server_data, fsd_padding) +
+ sizeof(fsd->fsd_padding) == FILTER_LR_SERVER_SIZE);
+ LASSERT (offsetof(struct filter_client_data, fcd_padding) +
+ sizeof(fcd->fcd_padding) == FILTER_LR_CLIENT_SIZE);
+
+ OBD_ALLOC(fsd, sizeof(*fsd));
+ if (!fsd)
+ RETURN(-ENOMEM);
+ filter->fo_fsd = fsd;
+
+ if (last_rcvd_size == 0) {
+ CERROR("%s: initializing new last_rcvd\n", obd->obd_name);
+
+ memcpy(fsd->fsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->fsd_uuid));
+ fsd->fsd_last_objid = init_lastobjid;
+ fsd->fsd_last_rcvd = 0;
+ fsd->fsd_mount_count = 0;
+
+ } else {
+ ssize_t retval = lustre_fread(filp, (char *)fsd,
+ sizeof(*fsd),
+ &off);
+ if (retval != sizeof(*fsd)) {
+ CDEBUG(D_INODE,"OBD filter: error reading lastobjid\n");
+ GOTO(out, rc = -EIO);
+ }
+ filter_unpack_fsd(fsd);
+ }
+
+ CDEBUG(D_INODE, "%s: server last_objid: "LPU64"\n",
+ obd->obd_name, fsd->fsd_last_objid);
+ CDEBUG(D_INODE, "%s: server last_rcvd : "LPU64"\n",
+ obd->obd_name, fsd->fsd_last_rcvd);
+ CDEBUG(D_INODE, "%s: server last_mount: "LPU64"\n",
+ obd->obd_name, fsd->fsd_mount_count);
+
+ /*
+ * When we do a clean FILTER shutdown, we save the last_rcvd into
+ * the header. If we find clients with higher last_rcvd values
+ * then those clients may need recovery done.
+ */
+ /* off is adjusted by lustre_fread, so we don't adjust it in the loop */
+ for (off = FILTER_LR_CLIENT_START, cl_off = 0; off < last_rcvd_size;
+ cl_off++) {
+ __u64 last_rcvd;
+ int mount_age;
+
+ if (!fcd) {
+ OBD_ALLOC(fcd, sizeof(*fcd));
+ if (!fcd)
+ GOTO(err_fsd, rc = -ENOMEM);
+ }
+
+ rc = lustre_fread(filp, (char *)fcd, sizeof(*fcd), &off);
+ if (rc != sizeof(*fcd)) {
+ CERROR("error reading FILTER %s offset %d: rc = %d\n",
+ LAST_RCVD, cl_off, rc);
+ if (rc > 0) /* XXX fatal error or just abort reading? */
+ rc = -EIO;
+ break;
+ }
+
+ if (fcd->fcd_uuid[0] == '\0') {
+ CDEBUG(D_INFO, "skipping zeroed client at offset %d\n",
+ cl_off);
+ continue;
+ }
+
+ last_rcvd = le64_to_cpu(fcd->fcd_last_rcvd);
+
+ /* These exports are cleaned up by filter_disconnect(), so they
+ * need to be set up like real exports as filter_connect() does.
+ */
+ mount_age = fsd->fsd_mount_count -
+ le64_to_cpu(fcd->fcd_mount_count);
+ if (mount_age < FILTER_MOUNT_RECOV) {
+ CERROR("RCVRNG CLIENT uuid: %s off: %d lr: "LPU64
+ "srv lr: "LPU64" mnt: "LPU64" last mount: "LPU64
+ "\n", fcd->fcd_uuid, cl_off,
+ last_rcvd, fsd->fsd_last_rcvd,
+ le64_to_cpu(fcd->fcd_mount_count),
+ fsd->fsd_mount_count);
+#if 0
+ /* disabled until OST recovery is actually working */
+ struct obd_export *exp = class_new_export(obd);
+ struct filter_export_data *fed;
+
+ if (!exp) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ fed = &exp->exp_filter_data;
+ fed->fed_fcd = fcd;
+ filter_client_add(filter, fed, cl_off);
+ /* create helper if export init gets more complex */
+ INIT_LIST_HEAD(&fed->fed_open_head);
+ spin_lock_init(&fed->fed_lock);
+
+ fcd = NULL;
+ filter->fo_recoverable_clients++;
+#endif
+ } else {
+ CDEBUG(D_INFO,
+ "discarded client %d, UUID '%s', count %Ld\n",
+ cl_off, fcd->fcd_uuid,
+ (long long)le64_to_cpu(fcd->fcd_mount_count));
+ }
+
+ CDEBUG(D_OTHER, "client at offset %d has last_rcvd = %Lu\n",
+ cl_off, (unsigned long long)last_rcvd);
+
+ if (last_rcvd > filter->fo_fsd->fsd_last_rcvd)
+ filter->fo_fsd->fsd_last_rcvd = last_rcvd;
+ }
+
+ obd->obd_last_committed = filter->fo_fsd->fsd_last_rcvd;
+ if (filter->fo_recoverable_clients) {
+ CERROR("RECOVERY: %d recoverable clients, last_rcvd "LPU64"\n",
+ filter->fo_recoverable_clients,
+ filter->fo_fsd->fsd_last_rcvd);
+ filter->fo_next_recovery_transno = obd->obd_last_committed + 1;
+ obd->obd_flags |= OBD_RECOVERING;
+ }
+
+ if (fcd)
+ OBD_FREE(fcd, sizeof(*fcd));
+
+ fsd->fsd_mount_count++;
+
+ /* save it,so mount count and last_recvd is current */
+ rc = filter_update_server_data(filp, filter->fo_fsd);
+
+out:
+ RETURN(rc);
+
+err_fsd:
+ filter_free_server_data(filter);
+ RETURN(rc);
+}
/* setup the object store with correct subdirectories */
static int filter_prep(struct obd_device *obd)
struct file *file;
struct inode *inode;
int rc = 0;
- __u64 lastobjid = 2;
int mode = 0;
push_ctxt(&saved, &filter->fo_ctxt, NULL);
GOTO(out_O_mode, rc);
}
+ if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
+ CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD,
+ file->f_dentry->d_inode->i_mode);
+ GOTO(err_filp, rc = -ENOENT);
+ }
+
+ rc = fsfilt_journal_data(obd, file);
+ if (rc) {
+ CERROR("cannot journal data on %s: rc = %d\n", LAST_RCVD, rc);
+ GOTO(err_filp, rc);
+ }
/* steal operations */
inode = file->f_dentry->d_inode;
filter->fo_fop = file->f_op;
filter->fo_iop = inode->i_op;
filter->fo_aops = inode->i_mapping->a_ops;
- if (inode->i_size == 0) {
- __u64 disk_lastobjid = cpu_to_le64(lastobjid);
- ssize_t retval = file->f_op->write(file,(char *)&disk_lastobjid,
- sizeof(disk_lastobjid),
- &file->f_pos);
- if (retval != sizeof(disk_lastobjid)) {
- CDEBUG(D_INODE,"OBD filter: error writing lastobjid\n");
- filp_close(file, 0);
- GOTO(out_O_mode, rc = -EIO);
- }
- } else {
- __u64 disk_lastobjid;
- ssize_t retval = file->f_op->read(file, (char *)&disk_lastobjid,
- sizeof(disk_lastobjid),
- &file->f_pos);
- if (retval != sizeof(disk_lastobjid)) {
- CDEBUG(D_INODE,"OBD filter: error reading lastobjid\n");
- filp_close(file, 0);
- GOTO(out_O_mode, rc = -EIO);
- }
- lastobjid = le64_to_cpu(disk_lastobjid);
+ rc = filter_init_server_data(obd, file, INIT_OBJID);
+ if (rc) {
+ CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
+ GOTO(err_client, rc);
}
- filter->fo_lastobjid = lastobjid;
- filp_close(file, 0);
+ filter->fo_rcvd_filp = file;
rc = 0;
out:
return(rc);
+err_client:
+ class_disconnect_all(obd);
+err_filp:
+ if (filp_close(file, 0))
+ CERROR("can't close %s after error\n", LAST_RCVD);
+ filter->fo_rcvd_filp = NULL;
out_O_mode:
while (mode-- > 0) {
struct dentry *dentry = filter->fo_dentry_O_mode[mode];
{
struct obd_run_ctxt saved;
struct filter_obd *filter = &obd->u.filter;
- __u64 disk_lastobjid;
long rc;
- struct file *file;
int mode;
- push_ctxt(&saved, &filter->fo_ctxt, NULL);
- file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700);
- if (IS_ERR(file)) {
- CERROR("OBD filter: cannot create %s\n", LAST_RCVD);
- goto out;
- }
+ /* XXX: filter_update_lastobjid used to call fsync_dev. It might be
+ * best to start a transaction with h_sync, because we removed this
+ * from lastobjid */
- file->f_pos = 0;
- disk_lastobjid = cpu_to_le64(filter->fo_lastobjid);
- rc = file->f_op->write(file, (char *)&disk_lastobjid,
- sizeof(disk_lastobjid), &file->f_pos);
- if (rc != sizeof(disk_lastobjid))
+ push_ctxt(&saved, &filter->fo_ctxt, NULL);
+ rc = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
+ if (rc)
CERROR("OBD filter: error writing lastobjid: rc = %ld\n", rc);
+ filter_free_server_data(filter);
- rc = filp_close(file, NULL);
- if (rc)
- CERROR("OBD filter: cannot close status file: rc = %ld\n", rc);
+
+ if (filter->fo_rcvd_filp) {
+ /* broken sync at umount bug workaround */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ rc = fsync_dev(filter->fo_rcvd_filp->f_dentry->d_inode->i_rdev);
+#else
+ rc = file_fsync(filter->fo_rcvd_filp,
+ filter->fo_rcvd_filp->f_dentry, 1);
+#endif
+ filp_close(filter->fo_rcvd_filp, 0);
+ filter->fo_rcvd_filp = NULL;
+ if (rc)
+ CERROR("last_rcvd file won't closek rc = %ld\n", rc);
+ }
for (mode = 0; mode < (S_IFMT >> S_SHIFT); mode++) {
struct dentry *dentry = filter->fo_dentry_O_mode[mode];
}
}
f_dput(filter->fo_dentry_O);
-out:
pop_ctxt(&saved, &filter->fo_ctxt, NULL);
}
static __u64 filter_next_id(struct obd_device *obd)
{
obd_id id;
+ LASSERT(obd->u.filter.fo_fsd != NULL);
spin_lock(&obd->u.filter.fo_objidlock);
- id = ++obd->u.filter.fo_lastobjid;
+ id = ++obd->u.filter.fo_fsd->fsd_last_objid;
spin_unlock(&obd->u.filter.fo_objidlock);
return id;
/* parent i_sem is already held if needed for exclusivity */
static struct dentry *filter_fid2dentry(struct obd_device *obd,
struct dentry *dparent,
- __u64 id, int locked)
+ __u64 id, int lockit)
{
struct super_block *sb = obd->u.filter.fo_sb;
struct dentry *dchild;
}
len = sprintf(name, LPU64, id);
- CDEBUG(D_INODE, "opening object O/%*s/%s\n",
+ CDEBUG(D_INODE, "looking up object O/%*s/%s\n",
dparent->d_name.len, dparent->d_name.name, name);
- //if (!locked)
- //down(&dparent->d_inode->i_sem);
+ if (lockit)
+ down(&dparent->d_inode->i_sem);
dchild = lookup_one_len(name, dparent, len);
- //if (!locked)
- //up(&dparent->d_inode->i_sem);
+ if (lockit)
+ up(&dparent->d_inode->i_sem);
if (IS_ERR(dchild)) {
CERROR("child lookup error %ld\n", PTR_ERR(dchild));
RETURN(dchild);
spin_unlock(&fed->fed_lock);
CDEBUG(D_INODE, "opened objid "LPX64": rc = %p\n", id, file);
-
+ EXIT;
out:
- RETURN(file);
+ return file;
out_fdd:
kmem_cache_free(filter_dentry_cache, fdd);
}
/* Caller must hold i_sem on dir_dentry->d_inode */
+/* Caller must push us into kernel context */
static int filter_destroy_internal(struct obd_device *obd,
struct dentry *dir_dentry,
struct dentry *object_dentry)
{
- struct obd_run_ctxt saved;
struct inode *inode = object_dentry->d_inode;
int rc;
ENTRY;
inode->i_nlink, atomic_read(&inode->i_count));
}
- push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
rc = vfs_unlink(dir_dentry->d_inode, object_dentry);
- pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
if (rc)
CERROR("error unlinking objid %*s: rc %d\n",
RETURN(rc);
}
-static int filter_close_internal(struct obd_device *obd,
- struct filter_file_data *ffd)
+static int filter_close_internal(struct obd_export *export,
+ struct filter_file_data *ffd,
+ struct obd_trans_info *oti)
{
+ struct obd_device *obd = export->exp_obd;
+ struct filter_obd *filter = &obd->u.filter;
struct file *filp = ffd->ffd_file;
struct dentry *object_dentry = dget(filp->f_dentry);
struct filter_dentry_data *fdd = object_dentry->d_fsdata;
- int rc, rc2 = 0;
+ int rc, rc2;
ENTRY;
LASSERT(filp->private_data == ffd);
if (atomic_dec_and_test(&fdd->fdd_open_count) &&
fdd->fdd_flags & FILTER_FLAG_DESTROY) {
struct dentry *dir_dentry = filter_parent(obd, S_IFREG);
+ struct obd_run_ctxt saved;
+ void *handle;
down(&dir_dentry->d_inode->i_sem);
- /* XXX start transaction */
+ push_ctxt(&saved, &filter->fo_ctxt, NULL);
+ filter_start_transno(export);
+ handle = fsfilt_start(obd, dir_dentry->d_inode,
+ FSFILT_OP_UNLINK);
+ if (IS_ERR(handle)) {
+ rc = filter_finish_transno(export, handle, oti,
+ PTR_ERR(handle));
+ GOTO(out, rc);
+ }
/* XXX unlink from PENDING directory now too */
rc2 = filter_destroy_internal(obd, dir_dentry, object_dentry);
- /* XXX finish transaction */
if (rc2 && !rc)
rc = rc2;
+ rc = filter_finish_transno(export, handle, oti, rc);
+ rc2 = fsfilt_commit(obd, dir_dentry->d_inode, handle);
+ if (rc2) {
+ CERROR("error on commit, err = %d\n", rc2);
+ if (!rc)
+ rc = rc2;
+ }
+ out:
+ pop_ctxt(&saved, &filter->fo_ctxt, NULL);
up(&dir_dentry->d_inode->i_sem);
}
int rc = 0;
ENTRY;
- MOD_INC_USE_COUNT;
if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2)
- GOTO(err_dec, rc = -EINVAL);
+ RETURN(rc = -EINVAL);
obd->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
if (IS_ERR(obd->obd_fsops))
- GOTO(err_dec, rc = PTR_ERR(obd->obd_fsops));
+ RETURN(rc = PTR_ERR(obd->obd_fsops));
mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, NULL);
rc = PTR_ERR(mnt);
if (IS_ERR(mnt))
GOTO(err_ops, rc);
+ obd->obd_flags |= OBD_REPLAYABLE;
+
filter = &obd->u.filter;;
+ init_MUTEX(&filter->fo_transno_sem);
filter->fo_vfsmnt = mnt;
filter->fo_fstype = strdup(data->ioc_inlbuf2);
filter->fo_sb = mnt->mnt_root->d_inode->i_sb;
lock_kernel();
err_ops:
fsfilt_put_ops(obd->obd_fsops);
-err_dec:
- MOD_DEC_USE_COUNT;
return rc;
}
lock_kernel();
- MOD_DEC_USE_COUNT;
RETURN(0);
}
int filter_attach(struct obd_device *dev, obd_count len, void *data)
{
- return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_init_vars(&lvars);
+ return lprocfs_obd_attach(dev, lvars.obd_vars);
}
int filter_detach(struct obd_device *dev)
{
- return lprocfs_dereg_obd(dev);
+ return lprocfs_obd_detach(dev);
}
+/* nearly identical to mds_connect */
static int filter_connect(struct lustre_handle *conn, struct obd_device *obd,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
struct obd_export *exp;
+ struct filter_export_data *fed;
+ struct filter_client_data *fcd;
+ struct filter_obd *filter = &obd->u.filter;
int rc;
ENTRY;
- MOD_INC_USE_COUNT;
+
+ if (!conn || !obd || !cluuid)
+ RETURN(-EINVAL);
+
rc = class_connect(conn, obd, cluuid);
if (rc)
- GOTO(out_dec, rc);
+ RETURN(rc);
exp = class_conn2export(conn);
LASSERT(exp);
+ fed = &exp->exp_filter_data;
+
+ OBD_ALLOC(fcd, sizeof(*fcd));
+ if (!fcd) {
+ CERROR("filter: out of memory for client data\n");
+ GOTO(out_export, rc = -ENOMEM);
+ }
+
+ memcpy(fcd->fcd_uuid, cluuid, sizeof(fcd->fcd_uuid));
+ fed->fed_fcd = fcd;
+ fcd->fcd_mount_count = cpu_to_le64(filter->fo_fsd->fsd_mount_count);
INIT_LIST_HEAD(&exp->exp_filter_data.fed_open_head);
spin_lock_init(&exp->exp_filter_data.fed_lock);
-out:
+
+ rc = filter_client_add(filter, fed, -1);
+ if (rc)
+ GOTO(out_fcd, rc);
+
RETURN(rc);
-out_dec:
- MOD_DEC_USE_COUNT;
- goto out;
+out_fcd:
+ OBD_FREE(fcd, sizeof(*fcd));
+out_export:
+ class_disconnect(conn);
+
+ RETURN(rc);
}
+/* also incredibly similar to mds_disconnect */
static int filter_disconnect(struct lustre_handle *conn)
{
struct obd_export *exp = class_conn2export(conn);
list_del(&ffd->ffd_export_list);
spin_unlock(&fed->fed_lock);
- CERROR("force closing file %*s on disconnect\n",
+ CERROR("force close file %*s (hdl %p:"LPX64") on disconnect\n",
ffd->ffd_file->f_dentry->d_name.len,
- ffd->ffd_file->f_dentry->d_name.name);
+ ffd->ffd_file->f_dentry->d_name.name,
+ ffd, ffd->ffd_servercookie);
- filter_close_internal(exp->exp_obd, ffd);
+ filter_close_internal(exp, ffd, NULL);
spin_lock(&fed->fed_lock);
}
spin_unlock(&fed->fed_lock);
ldlm_cancel_locks_for_export(exp);
+ filter_client_free(exp);
+
rc = class_disconnect(conn);
- if (!rc)
- MOD_DEC_USE_COUNT;
/* XXX cleanup preallocated inodes */
RETURN(rc);
if (!dentry->d_inode) {
CERROR("%s on non-existent object: "LPX64"\n", what, oa->o_id);
f_dput(dentry);
+ LBUG();
RETURN(ERR_PTR(-ENOENT));
}
int rc = 0;
ENTRY;
- dentry = filter_oa2dentry(conn, oa, 0);
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_getattr_reqs, 1);
+
+ dentry = filter_oa2dentry(conn, oa, 1);
if (IS_ERR(dentry))
RETURN(PTR_ERR(dentry));
/* this is called from filter_truncate() until we have filter_punch() */
static int filter_setattr(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *md)
+ struct lov_stripe_md *md, struct obd_trans_info *oti)
{
struct obd_run_ctxt saved;
+ struct obd_export *export = class_conn2export(conn);
struct obd_device *obd = class_conn2obd(conn);
+ struct filter_obd *filter = &obd->u.filter;
struct dentry *dentry;
struct iattr iattr;
struct inode *inode;
- int rc;
+ void * handle;
+ int rc, rc2;
ENTRY;
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_setattr_reqs, 1);
+
dentry = filter_oa2dentry(conn, oa, 0);
if (IS_ERR(dentry))
iattr.ia_mode = (iattr.ia_mode & ~S_IFMT) | S_IFREG;
inode = dentry->d_inode;
- push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+ push_ctxt(&saved, &filter->fo_ctxt, NULL);
lock_kernel();
if (iattr.ia_valid & ATTR_SIZE)
down(&inode->i_sem);
- /* XXX start transaction */
+ filter_start_transno(export);
+ handle = fsfilt_start(obd, dentry->d_inode, FSFILT_OP_SETATTR);
+ if (IS_ERR(handle)) {
+ rc = filter_finish_transno(export, handle, oti,PTR_ERR(handle));
+ GOTO(out_unlock, rc);
+ }
+
if (inode->i_op->setattr)
rc = inode->i_op->setattr(dentry, &iattr);
else
rc = inode_setattr(inode, &iattr);
- /* XXX update last_rcvd, finish transaction */
+ rc = filter_finish_transno(export, handle, oti, rc);
+ rc2 = fsfilt_commit(obd, dentry->d_inode, handle);
+ if (rc2) {
+ CERROR("error on commit, err = %d\n", rc2);
+ if (!rc)
+ rc = rc2;
+ }
if (iattr.ia_valid & ATTR_SIZE) {
up(&inode->i_sem);
obdo_from_inode(oa, inode, oa->o_valid);
}
+out_unlock:
unlock_kernel();
- pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+ pop_ctxt(&saved, &filter->fo_ctxt, NULL);
f_dput(dentry);
RETURN(rc);
}
static int filter_open(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *ea)
+ struct lov_stripe_md *ea, struct obd_trans_info *oti)
{
struct obd_export *export;
struct lustre_handle *handle;
RETURN(-EINVAL);
}
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_open_reqs, 1);
+
filp = filter_obj_open(export, oa->o_id, oa->o_mode);
if (IS_ERR(filp))
GOTO(out, rc = PTR_ERR(filp));
} /* filter_open */
static int filter_close(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *ea)
+ struct lov_stripe_md *ea, struct obd_trans_info *oti)
{
struct obd_export *exp;
struct filter_file_data *ffd;
RETURN(-EINVAL);
}
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_close_reqs, 1);
+
if (!(oa->o_valid & OBD_MD_FLHANDLE)) {
CERROR("no handle for close of objid "LPX64"\n", oa->o_id);
RETURN(-EINVAL);
list_del(&ffd->ffd_export_list);
spin_unlock(&fed->fed_lock);
- rc = filter_close_internal(exp->exp_obd, ffd);
+ rc = filter_close_internal(exp, ffd, oti);
RETURN(rc);
} /* filter_close */
static int filter_create(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md **ea)
+ struct lov_stripe_md **ea, struct obd_trans_info *oti)
{
+ struct obd_export *export = class_conn2export(conn);
struct obd_device *obd = class_conn2obd(conn);
+ struct filter_obd *filter = &obd->u.filter;
struct obd_run_ctxt saved;
struct dentry *dir_dentry;
struct dentry *new;
struct iattr;
- int rc;
+ void *handle;
+ int err, rc;
ENTRY;
if (!obd) {
return -EINVAL;
}
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_create_reqs, 1);
+
oa->o_id = filter_next_id(obd);
- push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+ push_ctxt(&saved, &filter->fo_ctxt, NULL);
dir_dentry = filter_parent(obd, oa->o_mode);
down(&dir_dentry->d_inode->i_sem);
- new = filter_fid2dentry(obd, dir_dentry, oa->o_id, 1);
+ new = filter_fid2dentry(obd, dir_dentry, oa->o_id, 0);
if (IS_ERR(new))
GOTO(out, rc = PTR_ERR(new));
GOTO(out, rc = -EEXIST);
}
- /* XXX start transaction */
+ filter_start_transno(export);
+ handle = fsfilt_start(obd, dir_dentry->d_inode, FSFILT_OP_CREATE);
+ if (IS_ERR(handle)) {
+ rc = filter_finish_transno(export, handle, oti,PTR_ERR(handle));
+ GOTO(out_put, rc);
+ }
rc = vfs_create(dir_dentry->d_inode, new, oa->o_mode);
if (rc)
+ CERROR("create failed rc = %d\n", rc);
+
+ rc = filter_finish_transno(export, handle, oti, rc);
+ err = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
+ if (err) {
+ CERROR("unable to write lastobjid but file created\n");
+ if (!rc)
+ rc = err;
+ }
+ err = fsfilt_commit(obd, dir_dentry->d_inode, handle);
+ if (err) {
+ CERROR("error on commit, err = %d\n", err);
+ if (!rc)
+ rc = err;
+ }
+
+ if (rc)
GOTO(out_put, rc);
- /* XXX update last_rcvd+lastobjid on disk, finish transaction */
/* Set flags for fields we have set in the inode struct */
oa->o_valid = OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS |
f_dput(new);
out:
up(&dir_dentry->d_inode->i_sem);
- pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+ pop_ctxt(&saved, &filter->fo_ctxt, NULL);
return rc;
}
static int filter_destroy(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *ea)
+ struct lov_stripe_md *ea, struct obd_trans_info *oti)
{
+ struct obd_export *export = class_conn2export(conn);
struct obd_device *obd = class_conn2obd(conn);
+ struct filter_obd *filter = &obd->u.filter;
struct dentry *dir_dentry, *object_dentry;
struct filter_dentry_data *fdd;
- int rc;
+ struct obd_run_ctxt saved;
+ void *handle;
+ int rc, rc2;
ENTRY;
if (!obd) {
RETURN(-EINVAL);
}
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_destroy_reqs, 1);
+
CDEBUG(D_INODE, "destroying objid "LPX64"\n", oa->o_id);
dir_dentry = filter_parent(obd, oa->o_mode);
down(&dir_dentry->d_inode->i_sem);
- object_dentry = filter_oa2dentry(conn, oa, 1);
+ object_dentry = filter_oa2dentry(conn, oa, 0);
if (IS_ERR(object_dentry))
GOTO(out, rc = -ENOENT);
+ push_ctxt(&saved, &filter->fo_ctxt, NULL);
+ filter_start_transno(export);
+ handle = fsfilt_start(obd, dir_dentry->d_inode, FSFILT_OP_UNLINK);
+ if (IS_ERR(handle)) {
+ rc = filter_finish_transno(export, handle, oti,PTR_ERR(handle));
+ GOTO(out_ctxt, rc);
+ }
+
fdd = object_dentry->d_fsdata;
- /* XXX start transaction */
if (fdd && atomic_read(&fdd->fdd_open_count)) {
if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) {
fdd->fdd_flags |= FILTER_FLAG_DESTROY;
CDEBUG(D_INODE,
"repeat destroy of %dx open objid "LPX64"\n",
atomic_read(&fdd->fdd_open_count), oa->o_id);
- GOTO(out_dput, rc = 0);
+ GOTO(out_commit, rc = 0);
}
rc = filter_destroy_internal(obd, dir_dentry, object_dentry);
-out_dput:
- /* XXX update last_rcvd on disk, finish transaction */
+
+out_commit:
+ /* XXX save last_rcvd on disk */
+ rc = filter_finish_transno(export, handle, oti, rc);
+ rc2 = fsfilt_commit(obd, dir_dentry->d_inode, handle);
+ if (rc2) {
+ CERROR("error on commit, err = %d\n", rc2);
+ if (!rc)
+ rc = rc2;
+ }
+out_ctxt:
+ pop_ctxt(&saved, &filter->fo_ctxt, NULL);
f_dput(object_dentry);
EXIT;
/* NB start and end are used for punch, but not truncate */
static int filter_truncate(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *lsm,
- obd_off start, obd_off end)
+ obd_off start, obd_off end,
+ struct obd_trans_info *oti)
{
int error;
ENTRY;
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_punch_reqs, 1);
+
if (end != OBD_OBJECT_EOF)
CERROR("PUNCH not supported, only truncate works\n");
CDEBUG(D_INODE, "calling truncate for object "LPX64", valid = %x, "
"o_size = "LPD64"\n", oa->o_id, oa->o_valid, start);
oa->o_size = start;
- error = filter_setattr(conn, oa, NULL);
+ error = filter_setattr(conn, oa, NULL, oti);
RETURN(error);
}
}
#endif
-static int lustre_commit_write(struct page *page, unsigned from, unsigned to)
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+/* We should only change the file mtime (and not the ctime, like
+ * update_inode_times() in generic_file_write()) when we only change data.
+ */
+static inline void inode_update_time(struct inode *inode, int ctime_too)
+{
+ time_t now = CURRENT_TIME;
+ if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now))
+ return;
+ inode->i_mtime = now;
+ if (ctime_too)
+ inode->i_ctime = now;
+ mark_inode_dirty_sync(inode);
+}
+#endif
+
+static int lustre_commit_write(struct niobuf_local *lnb)
{
+ struct page *page = lnb->page;
+ unsigned from = lnb->offset & ~PAGE_MASK;
+ unsigned to = from + lnb->len;
struct inode *inode = page->mapping->host;
int err;
+ LASSERT(to <= PAGE_SIZE);
err = page->mapping->a_ops->commit_write(NULL, page, from, to);
if (!err && IS_SYNC(inode))
err = waitfor_one_page(page);
*/
if (!page) {
unsigned long addr;
- CDEBUG(D_PAGE, "ino %lu page %ld locked\n", inode->i_ino,index);
+ CDEBUG(D_ERROR,"ino %lu page %ld locked\n", inode->i_ino,index);
addr = __get_free_pages(GFP_KERNEL, 0); /* locked page */
if (!addr) {
CERROR("no memory for a temp page\n");
* pages, and the filesystems mark these buffers as BH_New if they
* were newly allocated from disk. We use the BH_New flag similarly.
*/
-static int filter_commit_write(struct page *page, unsigned from, unsigned to,
- int err)
+static int filter_commit_write(struct niobuf_local *lnb, int err)
{
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
if (err) {
unsigned block_start, block_end;
- struct buffer_head *bh, *head = page->buffers;
+ struct buffer_head *bh, *head = lnb->page->buffers;
unsigned blocksize = head->b_size;
- void *addr = page_address(page);
/* debugging: just seeing if this ever happens */
CERROR("called filter_commit_write for ino %lu:%lu on err %d\n",
- page->mapping->host->i_ino, page->index, err);
+ lnb->page->mapping->host->i_ino, lnb->page->index, err);
/* Currently one buffer per page, but in the future... */
for (bh = head, block_start = 0; bh != head || !block_start;
block_start = block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize;
if (buffer_new(bh))
- memset(addr + block_start, 0, blocksize);
+ memset(lnb->addr + block_start, 0, blocksize);
}
}
#endif
- return lustre_commit_write(page, from, to);
+ return lustre_commit_write(lnb);
}
static int filter_preprw(int cmd, struct lustre_handle *conn,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *nb,
- struct niobuf_local *res, void **desc_private)
+ struct niobuf_local *res, void **desc_private,
+ struct obd_trans_info *oti)
{
struct obd_run_ctxt saved;
+ struct obd_export *export;
struct obd_device *obd;
struct obd_ioobj *o;
struct niobuf_remote *rnb = nb;
int i;
ENTRY;
+ if ((cmd & OBD_BRW_WRITE) != 0)
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_write_reqs, 1);
+ else
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_read_reqs, 1);
+
memset(res, 0, niocount * sizeof(*res));
+ export = class_conn2export(conn);
obd = class_conn2obd(conn);
if (!obd) {
CDEBUG(D_IOCTL, "invalid client "LPX64"\n", conn->addr);
}
if (cmd & OBD_BRW_WRITE) {
+#warning "FIXME: we need to get inode->i_sem for each object here"
+ /* Even worse, we need to get locks on mulitple inodes (in
+ * order) or use the DLM to do the locking for us (and use
+ * the same locking in filter_setattr() for truncate. The
+ * handling gets very ugly when dealing with locked pages.
+ * It may be easier to just get rid of the locked page code
+ * (which has problems of its own) and either discover we do
+ * not need it anymore (i.e. it was a symptom of another bug)
+ * or ensure we get the page locks in an appropriate order.
+ */
+ /* Danger, Will Robinson! You are taking a lock here and also
+ * starting a transaction and releasing/finishing then in
+ * filter_commitrw(), so you must call fsfilt_commit() and
+ * finish_transno() if an error occurs in this function.
+ */
+ filter_start_transno(export);
*desc_private = fsfilt_brw_start(obd, objcount, fso,
niocount, nb);
if (IS_ERR(*desc_private))
obd_kmap_get(niocount, 1);
for (i = 0, o = obj; i < objcount; i++, o++) {
- struct dentry *dentry = fso->fso_dentry;
- struct inode *inode = dentry->d_inode;
+ struct dentry *dentry;
+ struct inode *inode;
int j;
+ dentry = fso[i].fso_dentry;
+ inode = dentry->d_inode;
+
for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) {
struct page *page;
else
lnb->dentry = dget(dentry);
- if (cmd & OBD_BRW_WRITE)
+ if (cmd & OBD_BRW_WRITE) {
page = filter_get_page_write(inode, rnb, lnb,
&pglocked);
- else
+
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_write_bytes,
+ rnb->len);
+ } else {
page = lustre_get_page_read(inode, rnb);
- if (IS_ERR(page)) {
- if (cmd & OBD_BRW_WRITE)
- fsfilt_commit(obd, dir_dentry->d_inode,
- *desc_private);
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_read_bytes,
+ rnb->len);
+ }
- GOTO(out_pages, rc = PTR_ERR(page));
+ if (IS_ERR(page)) {
+ rc = PTR_ERR(page);
+ f_dput(dentry);
+ GOTO(out_pages, rc);
}
lnb->addr = page_address(page);
}
}
- if (cmd & OBD_BRW_WRITE) {
- int err = fsfilt_commit(obd, dir_dentry->d_inode,
- *desc_private);
- if (err)
- GOTO(out_pages, rc = err);
- }
-
EXIT;
out:
OBD_FREE(fso, objcount * sizeof(*fso));
out_pages:
while (lnb-- > res) {
- CERROR("error cleanup on brw\n");
+ CERROR("%d error cleanup on brw\n", rc);
if (cmd & OBD_BRW_WRITE)
- filter_commit_write(lnb->page, 0, PAGE_SIZE, rc);
+ filter_commit_write(lnb, rc);
else
lustre_put_page(lnb->page);
+ f_dput(lnb->dentry);
}
obd_kmap_put(niocount);
+ goto out_err; /* dropped the dentry refs already (one per page) */
+
out_objinfo:
for (i = 0; i < objcount && fso[i].fso_dentry; i++)
f_dput(fso[i].fso_dentry);
-
+out_err:
+ if (cmd & OBD_BRW_WRITE) {
+ filter_finish_transno(export, *desc_private, oti, rc);
+ fsfilt_commit(obd, dir_dentry->d_inode, *desc_private);
+ }
goto out;
}
{
struct page *lpage;
int rc;
+ ENTRY;
lpage = lustre_get_page_write(lnb->dentry->d_inode, lnb->page->index);
if (IS_ERR(lpage)) {
rc = PTR_ERR(lpage);
CERROR("error getting locked page index %ld: rc = %d\n",
lnb->page->index, rc);
- GOTO(out, rc);
+ LBUG();
+ lustre_commit_write(lnb);
+ RETURN(rc);
}
/* lpage is kmapped in lustre_get_page_write() above and kunmapped in
* filter_get_page_write() and kunmapped in lustre_put_page() below.
*/
memcpy(page_address(lpage), page_address(lnb->page), PAGE_SIZE);
- rc = lustre_commit_write(lpage, 0, PAGE_SIZE);
+ lustre_put_page(lnb->page);
+
+ lnb->page = lpage;
+ rc = lustre_commit_write(lnb);
if (rc)
CERROR("error committing locked page %ld: rc = %d\n",
lnb->page->index, rc);
-out:
- lustre_put_page(lnb->page);
- return rc;
+ RETURN(rc);
+}
+
+static int filter_sync(struct obd_device *obd)
+{
+ RETURN(fsfilt_sync(obd, obd->u.filter.fo_sb));
}
static int filter_commitrw(int cmd, struct lustre_handle *conn,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_local *res,
- void *private)
+ void *desc_private, struct obd_trans_info *oti)
{
struct obd_run_ctxt saved;
struct obd_ioobj *o;
- struct niobuf_local *r;
+ struct niobuf_local *lnb;
+ struct obd_export *export = class_conn2export(conn);
struct obd_device *obd = class_conn2obd(conn);
int found_locked = 0;
int rc = 0;
push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
LASSERT(!current->journal_info);
- current->journal_info = private;
+ current->journal_info = desc_private;
- for (i = 0, o = obj, r = res; i < objcount; i++, o++) {
+ for (i = 0, o = obj, lnb = res; i < objcount; i++, o++) {
int j;
- for (j = 0 ; j < o->ioo_bufcnt ; j++, r++) {
- struct page *page = r->page;
-
- if (!page)
- LBUG();
-
- if (r->flags & N_LOCAL_TEMP_PAGE) {
+ if (cmd & OBD_BRW_WRITE)
+ inode_update_time(lnb->dentry->d_inode, 1);
+ for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
+ if (lnb->flags & N_LOCAL_TEMP_PAGE) {
found_locked++;
continue;
}
if (cmd & OBD_BRW_WRITE) {
- int err = filter_commit_write(page, 0,
- r->len, 0);
+ int err = filter_commit_write(lnb, 0);
if (!rc)
rc = err;
} else
- lustre_put_page(page);
+ lustre_put_page(lnb->page);
obd_kmap_put(1);
- f_dput(r->dentry);
+ f_dput(lnb->dentry);
}
}
- if (!found_locked)
- goto out_ctxt;
-
- for (i = 0, o = obj, r = res; i < objcount; i++, o++) {
+ for (i = 0, o = obj, lnb = res; found_locked > 0 && i < objcount;
+ i++, o++) {
int j;
- for (j = 0 ; j < o->ioo_bufcnt ; j++, r++) {
+ for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
int err;
- if (!(r->flags & N_LOCAL_TEMP_PAGE))
+ if (!(lnb->flags & N_LOCAL_TEMP_PAGE))
continue;
- err = filter_write_locked_page(r);
+ err = filter_write_locked_page(lnb);
obd_kmap_put(1);
if (!rc)
rc = err;
- f_dput(r->dentry);
+ f_dput(lnb->dentry);
+ found_locked--;
}
}
-out_ctxt:
+ if (cmd & OBD_BRW_WRITE) {
+ int err;
+ struct dentry *dir_dentry = filter_parent(obd, S_IFREG);
+
+ rc = filter_finish_transno(export, desc_private, oti, rc);
+ err = fsfilt_commit(obd, dir_dentry->d_inode, desc_private);
+ if (err)
+ rc = err;
+ if (obd_sync_filter) {
+ /* this can fail with ENOMEM, what should we do then? */
+ filter_sync(obd);
+ }
+ /* XXX <adilger> LASSERT(last_rcvd == last_committed)*/
+ }
+
LASSERT(!current->journal_info);
- current->journal_info = NULL;
pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
RETURN(rc);
static int filter_brw(int cmd, struct lustre_handle *conn,
struct lov_stripe_md *lsm, obd_count oa_bufs,
- struct brw_page *pga, struct obd_brw_set *set)
+ struct brw_page *pga, struct obd_brw_set *set,
+ struct obd_trans_info *oti)
{
struct obd_ioobj ioo;
struct niobuf_local *lnb;
ioo.ioo_bufcnt = oa_bufs;
ret = filter_preprw(cmd, conn, 1, &ioo, oa_bufs, rnb, lnb,
- &desc_private);
+ &desc_private, oti);
if (ret != 0)
GOTO(out, ret);
kunmap(virt);
}
- ret = filter_commitrw(cmd, conn, 1, &ioo, oa_bufs, lnb, desc_private);
+ ret = filter_commitrw(cmd, conn, 1, &ioo, oa_bufs, lnb, desc_private,
+ oti);
out:
if (lnb)
obd = class_conn2obd(conn);
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_statfs_reqs, 1);
+
RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs));
}
int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst,
struct lustre_handle *src_conn, struct obdo *src,
- obd_size count, obd_off offset)
+ obd_size count, obd_off offset, struct obd_trans_info *oti)
{
struct page *page;
struct lov_stripe_md srcmd, dstmd;
page->index = index;
set->brw_callback = ll_brw_sync_wait;
- err = obd_brw(OBD_BRW_READ, src_conn, &srcmd, 1, &pg, set);
+ err = obd_brw(OBD_BRW_READ, src_conn, &srcmd, 1, &pg, set,NULL);
obd_brw_set_free(set);
if (err) {
EXIT;
CDEBUG(D_INFO, "Read page %ld ...\n", page->index);
set->brw_callback = ll_brw_sync_wait;
- err = obd_brw(OBD_BRW_WRITE, dst_conn, &dstmd, 1, &pg, set);
+ err = obd_brw(OBD_BRW_WRITE, dst_conn, &dstmd, 1, &pg, set,oti);
obd_brw_set_free(set);
/* XXX should handle dst->o_size, dst->o_blocks here */
static int __init obdfilter_init(void)
{
- printk(KERN_INFO "Filtering OBD driver v0.001, info@clusterfs.com\n");
+ struct lprocfs_static_vars lvars;
+
+ printk(KERN_INFO "Lustre Filtering OBD driver; info@clusterfs.com\n");
filter_open_cache = kmem_cache_create("ll_filter_fdata",
sizeof(struct filter_file_data),
0, 0, NULL, NULL);
RETURN(-ENOMEM);
}
- return class_register_type(&filter_obd_ops, status_class_var,
+ xprocfs_init ("filter");
+
+ lprocfs_init_vars(&lvars);
+ return class_register_type(&filter_obd_ops, lvars.module_vars,
OBD_FILTER_DEVICENAME);
}
CERROR("couldn't free obdfilter dentry cache\n");
if (kmem_cache_destroy(filter_open_cache))
CERROR("couldn't free obdfilter open cache\n");
+ xprocfs_fini ();
}
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Filtering OBD driver v1.0");
+MODULE_DESCRIPTION("Lustre Filtering OBD driver");
MODULE_LICENSE("GPL");
module_init(obdfilter_init);
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
*/
#define DEBUG_SUBSYSTEM S_CLASS
-#include <linux/lustre_lite.h>
#include <linux/lprocfs_status.h>
+#include <linux/obd.h>
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof,
- void *data)
+static inline int lprocfs_filter_statfs(void *data, struct statfs *sfs)
{
- int len = 0;
- struct obd_device* dev = (struct obd_device*)data;
- len += snprintf(page, count, "%s\n", dev->obd_uuid);
- return len;
+ struct obd_device *dev = (struct obd_device *) data;
+ return vfs_statfs(dev->u.filter.fo_sb, sfs);
}
-int rd_blksize(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- struct statfs mystats;
- int len = 0;
- vfs_statfs(temp->u.filter.fo_sb, &mystats);
- len+=snprintf(page, count, "%ld\n", mystats.f_bsize);
- return len;
-}
-int rd_kbtotal(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- struct statfs mystats;
- int len = 0;
- __u32 blk_size;
- __u64 result;
-
- vfs_statfs(temp->u.filter.fo_sb, &mystats);
- blk_size = mystats.f_bsize;
- blk_size >>= 10;
- result = mystats.f_blocks;
- while(blk_size >>= 1){
- result <<= 1;
- }
- len+=snprintf(page, count, LPU64"\n", result);
- return len;
-}
+DEFINE_LPROCFS_STATFS_FCT(rd_blksize, lprocfs_filter_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_filter_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, lprocfs_filter_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, lprocfs_filter_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, lprocfs_filter_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, lprocfs_filter_statfs);
-int rd_kbfree(char* page, char **start, off_t off, int count, int *eof,
+int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
- struct obd_device* temp = (struct obd_device*)data;
- struct statfs mystats;
- int len = 0;
- __u32 blk_size;
- __u64 result;
-
- vfs_statfs(temp->u.filter.fo_sb, &mystats);
- blk_size = mystats.f_bsize;
- blk_size >>= 10;
- result = mystats.f_bfree;
- while(blk_size >>= 1){
- result <<= 1;
- }
- len += snprintf(page, count, LPU64"\n", result);
- return len;
+ struct obd_device *dev = (struct obd_device *)data;
+ return snprintf(page, count, "%s\n", dev->u.filter.fo_fstype);
}
-int rd_fstype(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- int len = 0;
- len += snprintf(page, count, "%s\n", temp->u.filter.fo_fstype);
- return len;
-}
-int rd_filestotal(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- struct statfs mystats;
- int len = 0;
- vfs_statfs(temp->u.filter.fo_sb, &mystats);
- len += snprintf(page, count, "%ld\n", mystats.f_files);
- return len;
-}
-
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- struct statfs mystats;
- int len = 0;
- vfs_statfs(temp->u.filter.fo_sb, &mystats);
- len += snprintf(page, count, "%ld\n", mystats.f_ffree);
- return len;
-}
-
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-struct lprocfs_vars status_var_nm_1[] = {
- {"status/uuid", rd_uuid, 0, 0},
- {"status/blocksize",rd_blksize, 0, 0},
- {"status/kbytestotal",rd_kbtotal, 0, 0},
- {"status/kbytesfree", rd_kbfree, 0, 0},
- {"status/filestotal", rd_filestotal, 0, 0},
- {"status/filesfree", rd_filesfree, 0, 0},
- {"status/filegroups", rd_filegroups, 0, 0},
- {"status/fstype", rd_fstype, 0, 0},
- {0}
+struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "blocksize", rd_blksize, 0, 0 },
+ { "kbytestotal", rd_kbytestotal, 0, 0 },
+ { "kbytesfree", rd_kbytesfree, 0, 0 },
+ { "filestotal", rd_filestotal, 0, 0 },
+ { "filesfree", rd_filesfree, 0, 0 },
+ { "filegroups", rd_filegroups, 0, 0 },
+ { "fstype", rd_fstype, 0, 0 },
+ { 0 }
};
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_type* class = (struct obd_type*)data;
- int len = 0;
- len += snprintf(page, count, "%d\n", class->typ_refcnt);
- return len;
-}
-struct lprocfs_vars status_class_var[] = {
- {"status/num_refs", rd_numrefs, 0, 0},
- {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+ { 0 }
};
+
+#endif /* LPROCFS */
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
*/
#define DEBUG_SUBSYSTEM S_CLASS
-#include <linux/lustre_lite.h>
+#include <linux/obd_class.h>
#include <linux/lprocfs_status.h>
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- int len = 0;
- struct obd_device* dev = (struct obd_device*)data;
- len += snprintf(page, count, "%s\n", dev->obd_uuid);
- return len;
-
-}
-int rd_blksize(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-int rd_kbytestotal(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-int rd_kbytesfree(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-int rd_filestotal(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-int rd_server_uuid(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
-
- struct obd_device* temp = (struct obd_device*)data;
- struct client_obd* cli = &temp->u.cli;
- int len = 0;
- len += snprintf(page, count, "%s\n",cli->cl_target_uuid);
- return len;
-
-
-}
-int rd_conn_uuid(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp=(struct obd_device*)data;
- struct client_obd* cli=&temp->u.cli;
- struct obd_import* imp=&cli->cl_import;
- int len = 0;
- len += snprintf(page, count, "%s\n",
- imp->imp_connection->c_remote_uuid);
- return len;
-
-}
-
-struct lprocfs_vars status_var_nm_1[] = {
- {"status/uuid", rd_uuid, 0, 0},
- {"status/blocksize",rd_blksize, 0, 0},
- {"status/kbytestotal", rd_kbytestotal, 0, 0},
- {"status/kbytesfree", rd_kbytesfree, 0, 0},
- {"status/filestotal", rd_filestotal, 0, 0},
- {"status/filesfree", rd_filesfree, 0, 0},
- {"status/filegroups", rd_filegroups, 0, 0},
- {"status/ost_server_uuid", rd_server_uuid, 0, 0},
- {"status/ost_conn_uuid", rd_conn_uuid, 0, 0},
- {0}
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
+
+DEFINE_LPROCFS_STATFS_FCT(rd_blksize, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, obd_self_statfs);
+DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, obd_self_statfs);
+
+struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "blocksize", rd_blksize, 0, 0 },
+ { "kbytestotal", rd_kbytestotal, 0, 0 },
+ { "kbytesfree", rd_kbytesfree, 0, 0 },
+ { "filestotal", rd_filestotal, 0, 0 },
+ { "filesfree", rd_filesfree, 0, 0 },
+ { "filegroups", rd_filegroups, 0, 0 },
+ { "ost_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
+ { "ost_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 },
+ { 0 }
};
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_type* class = (struct obd_type*)data;
- int len = 0;
- len += snprintf(page, count, "%d\n", class->typ_refcnt);
- return len;
-}
-struct lprocfs_vars status_class_var[] = {
- {"status/num_refs", rd_numrefs, 0, 0},
- {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+ { 0 }
};
+
+#endif /* LPROCFS */
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
* Author Peter Braam <braam@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
#include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
#include <linux/lprocfs_status.h>
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
-
static int osc_attach(struct obd_device *dev, obd_count len, void *data)
{
- return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_init_vars(&lvars);
+ return lprocfs_obd_attach(dev, lvars.obd_vars);
}
static int osc_detach(struct obd_device *dev)
{
- return lprocfs_dereg_obd(dev);
+ return lprocfs_obd_detach(dev);
}
/* Pack OSC object metadata for shipment to the MDS. */
RETURN(lsm_size);
}
+inline void oti_from_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
+{
+ if (oti && req->rq_repmsg)
+ oti->oti_transno = NTOH__u64(req->rq_repmsg->transno);
+ EXIT;
+}
+
static int osc_getattr(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *md)
{
body = lustre_msg_buf(request->rq_repmsg, 0);
CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
- if (oa)
- memcpy(oa, &body->oa, sizeof(*oa));
+ memcpy(oa, &body->oa, sizeof(*oa));
EXIT;
out:
}
static int osc_open(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *md)
+ struct lov_stripe_md *md, struct obd_trans_info *oti)
{
struct ptlrpc_request *request;
struct ost_body *body;
if (!request)
RETURN(-ENOMEM);
+#warning FIXME: request->rq_flags |= PTL_RPC_FL_REPLAY;
body = lustre_msg_buf(request->rq_reqmsg, 0);
#warning FIXME: pack only valid fields instead of memcpy, endianness
memcpy(&body->oa, oa, sizeof(*oa));
}
static int osc_close(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *md)
+ struct lov_stripe_md *md, struct obd_trans_info *oti)
{
struct ptlrpc_request *request;
struct ost_body *body;
}
static int osc_setattr(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *md)
+ struct lov_stripe_md *md, struct obd_trans_info *oti)
{
struct ptlrpc_request *request;
struct ost_body *body;
}
static int osc_create(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md **ea)
+ struct lov_stripe_md **ea, struct obd_trans_info *oti_in)
{
struct ptlrpc_request *request;
struct ost_body *body;
struct lov_stripe_md *lsm;
+ struct obd_trans_info *oti, trans_info;
int rc, size = sizeof(*body);
ENTRY;
RETURN(rc);
}
+ if (oti_in)
+ oti = oti_in;
+ else
+ oti = &trans_info;
+
request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_CREATE, 1, &size,
NULL);
if (!request)
lsm->lsm_object_id = oa->o_id;
lsm->lsm_stripe_count = 0;
*ea = lsm;
+
+ oti_from_request(oti, request);
+ CDEBUG(D_HA, "transno: "LPD64"\n", oti->oti_transno);
EXIT;
out_req:
ptlrpc_req_finished(request);
static int osc_punch(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *md, obd_size start,
- obd_size end)
+ obd_size end, struct obd_trans_info *oti)
{
struct ptlrpc_request *request;
struct ost_body *body;
}
static int osc_destroy(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *ea)
+ struct lov_stripe_md *ea, struct obd_trans_info *oti)
{
struct ptlrpc_request *request;
struct ost_body *body;
EXIT;
}
+
/* this is the callback function which is invoked by the Portals
* event handler associated with the bulk_sink queue and bulk_source queue.
*/
CERROR("obd_fail_loc=%x, skipping register_bulk\n",
OBD_FAIL_OSC_BRW_READ_BULK);
} else {
- rc = ptlrpc_register_bulk(desc);
+ rc = ptlrpc_register_bulk_put(desc);
if (rc)
GOTO(out_unmap, rc);
obd_brw_set_add(set, desc);
static int osc_brw_write(struct lustre_handle *conn, struct lov_stripe_md *lsm,
obd_count page_count, struct brw_page *pga,
- struct obd_brw_set *set)
+ struct obd_brw_set *set, struct obd_trans_info *oti)
{
struct obd_import *imp = class_conn2cliimp(conn);
struct ptlrpc_connection *connection = imp->imp_connection;
struct ptlrpc_request *request = NULL;
struct ptlrpc_bulk_desc *desc = NULL;
struct ost_body *body;
- struct niobuf_local *local = NULL;
- struct niobuf_remote *remote;
int rc, size[3] = {sizeof(*body)}, mapped = 0;
- int j;
+ unsigned long flags;
struct obd_ioobj *iooptr;
void *nioptr;
+ __u32 xid;
ENTRY;
size[1] = sizeof(struct obd_ioobj);
ost_pack_ioo(&iooptr, lsm, page_count);
/* end almost identical to brw_read case */
- OBD_ALLOC(local, page_count * sizeof(*local));
- if (!local)
- GOTO(out_desc, rc = -ENOMEM);
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ xid = ++imp->imp_last_xid; /* single xid for all pages */
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
obd_kmap_get(page_count, 0);
for (mapped = 0; mapped < page_count; mapped++) {
- local[mapped].addr = kmap(pga[mapped].pg);
+ struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
+ if (bulk == NULL)
+ GOTO(out_unmap, rc = -ENOMEM);
- CDEBUG(D_INFO, "kmap(pg) = %p ; pg->flags = %lx ; pg->refcount = "
- "%d ; page %d of %d\n",
- local[mapped].addr, pga[mapped].pg->flags,
- page_count(pga[mapped].pg),
- mapped, page_count - 1);
+ bulk->bp_xid = xid; /* single xid for all pages */
- local[mapped].offset = pga[mapped].off;
- local[mapped].len = pga[mapped].count;
+ bulk->bp_buf = kmap(pga[mapped].pg);
+ bulk->bp_page = pga[mapped].pg;
+ bulk->bp_buflen = PAGE_SIZE;
ost_pack_niobuf(&nioptr, pga[mapped].off, pga[mapped].count,
- pga[mapped].flag, 0);
- }
-
- size[1] = page_count * sizeof(*remote);
- request->rq_replen = lustre_msg_size(2, size);
- rc = ptlrpc_queue_wait(request);
- if (rc)
- GOTO(out_unmap, rc);
-
- nioptr = lustre_msg_buf(request->rq_repmsg, 1);
- if (!nioptr)
- GOTO(out_unmap, rc = -EINVAL);
-
- if (request->rq_repmsg->buflens[1] != size[1]) {
- CERROR("buffer length wrong (%d vs. %d)\n",
- request->rq_repmsg->buflens[1], size[1]);
- GOTO(out_unmap, rc = -EINVAL);
+ pga[mapped].flag, bulk->bp_xid);
}
- for (j = 0; j < page_count; j++) {
- struct ptlrpc_bulk_page *bulk;
-
- ost_unpack_niobuf(&nioptr, &remote);
-
- bulk = ptlrpc_prep_bulk_page(desc);
- if (!bulk)
- GOTO(out_unmap, rc = -ENOMEM);
-
- bulk->bp_buf = local[j].addr;
- bulk->bp_buflen = local[j].len;
- bulk->bp_xid = remote->xid;
- bulk->bp_page = pga[j].pg;
+ /*
+ * Register the bulk first, because the reply could arrive out of
+ * order, and we want to be ready for the bulk data.
+ *
+ * One reference is released when brw_finish is complete, the other
+ * when the caller removes us from the "set" list.
+ *
+ * On error, we never do the brw_finish, so we handle all decrefs.
+ */
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_WRITE_BULK)) {
+ CERROR("obd_fail_loc=%x, skipping register_bulk\n",
+ OBD_FAIL_OSC_BRW_WRITE_BULK);
+ } else {
+ rc = ptlrpc_register_bulk_get(desc);
+ if (rc)
+ GOTO(out_unmap, rc);
+ obd_brw_set_add(set, desc);
}
- if (desc->bd_page_count != page_count)
- LBUG();
-
- if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_WRITE_BULK))
- GOTO(out_unmap, rc = 0);
-
- OBD_FREE(local, page_count * sizeof(*local));
-
- /* One reference is released when brw_finish is complete, the other
- * when the caller removes it from the "set" list. */
- obd_brw_set_add(set, desc);
- rc = ptlrpc_send_bulk(desc);
+ request->rq_replen = lustre_msg_size(1, size);
+ rc = ptlrpc_queue_wait(request);
- /* XXX: Mike, same question as in osc_brw_read. */
-out_req:
+ /*
+ * XXX: If there is an error during the processing of the callback,
+ * such as a timeout in a sleep that it performs, brw_finish
+ * will never get called, and we'll leak the desc, fail to kunmap
+ * things, cats will live with dogs. One solution would be to
+ * export brw_finish as osc_brw_finish, so that the timeout case
+ * and its kin could call it for proper cleanup. An alternative
+ * would be for an error return from the callback to cause us to
+ * clean up, but that doesn't help the truly async cases (like
+ * LOV), which will immediately return from their PHASE_START
+ * callback, before any such cleanup-requiring error condition can
+ * be detected.
+ */
+ out_req:
ptlrpc_req_finished(request);
RETURN(rc);
out_unmap:
while (mapped-- > 0)
kunmap(pga[mapped].pg);
-
obd_kmap_put(page_count);
-
- OBD_FREE(local, page_count * sizeof(*local));
-out_desc:
ptlrpc_bulk_decref(desc);
goto out_req;
}
static int osc_brw(int cmd, struct lustre_handle *conn,
struct lov_stripe_md *md, obd_count page_count,
- struct brw_page *pga, struct obd_brw_set *set)
+ struct brw_page *pga, struct obd_brw_set *set,
+ struct obd_trans_info *oti)
{
ENTRY;
pages_per_brw = page_count;
if (cmd & OBD_BRW_WRITE)
- rc = osc_brw_write(conn, md, pages_per_brw, pga, set);
+ rc = osc_brw_write(conn, md, pages_per_brw, pga, set, oti);
else
rc = osc_brw_read(conn, md, pages_per_brw, pga, set);
int *flags, void *callback, void *data, int datalen,
struct lustre_handle *lockh)
{
- __u64 res_id[RES_NAME_SIZE] = { lsm->lsm_object_id };
+ struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
struct obd_device *obddev = class_conn2obd(connh);
struct ldlm_extent *extent = extentp;
int rc;
}
/* Next, search for already existing extent locks that will cover us */
- rc = ldlm_lock_match(obddev->obd_namespace, res_id, type, extent,
+ rc = ldlm_lock_match(obddev->obd_namespace, 0, &res_id, type, extent,
sizeof(extent), mode, lockh);
if (rc == 1)
/* We already have a lock, and it's referenced */
* locks out from other users right now, too. */
if (mode == LCK_PR) {
- rc = ldlm_lock_match(obddev->obd_namespace, res_id, type,
+ rc = ldlm_lock_match(obddev->obd_namespace, 0, &res_id, type,
extent, sizeof(extent), LCK_PW, lockh);
if (rc == 1) {
/* FIXME: This is not incredibly elegant, but it might
rc = ldlm_cli_enqueue(connh, NULL, obddev->obd_namespace, parent_lock,
res_id, type, extent, sizeof(extent), mode, flags,
- ldlm_completion_ast, callback, data, datalen,
+ ldlm_completion_ast, callback, data, NULL,
lockh);
RETURN(rc);
}
struct lov_stripe_md *lsm, int flags)
{
struct obd_device *obddev = class_conn2obd(connh);
- __u64 res_id[RES_NAME_SIZE] = { lsm->lsm_object_id };
+ struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
- return ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags);
+ return ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags);
}
static int osc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
ENTRY;
switch (cmd) {
+#if 0
case IOC_LDLM_TEST: {
err = ldlm_test(obddev, conn);
CERROR("-- done err %d\n", err);
CERROR("-- done err %d\n", err);
GOTO(out, err);
}
+#endif
case IOC_OSC_REGISTER_LOV: {
if (obddev->u.cli.cl_containing_lov)
GOTO(out, err = -EALREADY);
case OBD_IOC_LOV_GET_CONFIG: {
char *buf;
struct lov_desc *desc;
- obd_uuid_t *uuidp;
+ struct obd_uuid uuid;
buf = NULL;
len = 0;
GOTO(out, err = -EINVAL);
}
- if (data->ioc_inllen2 < sizeof(*uuidp)) {
+ if (data->ioc_inllen2 < sizeof(uuid.uuid)) {
OBD_FREE(buf, len);
GOTO(out, err = -EINVAL);
}
desc->ld_default_stripe_size = 0;
desc->ld_default_stripe_offset = 0;
desc->ld_pattern = 0;
- memcpy(desc->ld_uuid, obddev->obd_uuid, sizeof(*uuidp));
+ memcpy(desc->ld_uuid.uuid, obddev->obd_uuid.uuid, sizeof(uuid.uuid));
- uuidp = (obd_uuid_t *)data->ioc_inlbuf2;
- memcpy(uuidp, obddev->obd_uuid, sizeof(*uuidp));
+ memcpy(data->ioc_inlbuf2, obddev->obd_uuid.uuid,
+ sizeof(uuid.uuid));
err = copy_to_user((void *)uarg, buf, len);
if (err)
static void set_osc_active(struct obd_import *imp, int active)
{
- struct obd_device *notify_obd = imp->imp_obd->u.cli.cl_containing_lov;
+ struct obd_device *notify_obd;
+
+ LASSERT(imp->imp_obd);
+
+ notify_obd = imp->imp_obd->u.cli.cl_containing_lov;
if (notify_obd == NULL)
return;
if (!list_empty(¬ify_obd->obd_exports)) {
int rc;
struct lustre_handle fakeconn;
- struct obd_ioctl_data ioc_data;
+ struct obd_ioctl_data ioc_data = { 0 };
struct obd_export *exp =
list_entry(notify_obd->obd_exports.next,
struct obd_export, exp_obd_chain);
fakeconn.addr = (__u64)(unsigned long)exp;
fakeconn.cookie = exp->exp_cookie;
- ioc_data.ioc_inlbuf1 = imp->imp_obd->u.cli.cl_target_uuid;
+ ioc_data.ioc_inlbuf1 = &imp->imp_obd->u.cli.cl_target_uuid;
ioc_data.ioc_offset = active;
rc = obd_iocontrol(IOC_LOV_SET_OSC_ACTIVE, &fakeconn,
sizeof ioc_data, &ioc_data, NULL);
- if (rc)
+ if (rc) {
CERROR("disabling %s on LOV %p/%s: %d\n",
- imp->imp_obd->obd_uuid, notify_obd,
- notify_obd->obd_uuid, rc);
+ imp->imp_obd->u.cli.cl_target_uuid.uuid,
+ notify_obd, notify_obd->obd_uuid.uuid, rc);
+ }
} else {
CDEBUG(D_HA, "No exports for obd %p/%s, can't notify about "
- "%p\n", notify_obd, notify_obd->obd_uuid,
- imp->imp_obd->obd_uuid);
+ "%p\n", notify_obd, notify_obd->obd_uuid.uuid,
+ imp->imp_obd->obd_uuid.uuid);
}
}
case PTLRPC_RECOVD_PHASE_PREPARE: {
struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
ldlm_namespace_cleanup(ns, 1 /* no network ops */);
- ptlrpc_abort_inflight(imp);
+ ptlrpc_abort_inflight(imp, 0);
set_osc_active(imp, 0 /* inactive */);
RETURN(0);
}
}
static int osc_connect(struct lustre_handle *conn, struct obd_device *obd,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
struct obd_import *imp = &obd->u.cli.cl_import;
static int __init osc_init(void)
{
- RETURN(class_register_type(&osc_obd_ops, status_class_var,
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_init_vars(&lvars);
+ RETURN(class_register_type(&osc_obd_ops, lvars.module_vars,
LUSTRE_OSC_NAME));
}
}
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Object Storage Client (OSC) v1.0");
+MODULE_DESCRIPTION("Lustre Object Storage Client (OSC)");
MODULE_LICENSE("GPL");
module_init(osc_init);
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
*/
#define DEBUG_SUBSYSTEM S_OST
-#include <linux/lustre_lite.h>
+#include <linux/obd_class.h>
#include <linux/lprocfs_status.h>
-
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
-
- struct obd_device* temp = (struct obd_device*)data;
- int len = 0;
- len += snprintf(page, count, "%s\n", temp->obd_uuid);
- return len;
-
-
-}
-int rd_blksize(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
-
- struct obd_device* temp = (struct obd_device*)data;
- struct ost_obd *ost = &temp->u.ost;
- struct lustre_handle *conn = &ost->ost_conn;
- struct obd_statfs mystats;
- int len = 0;
-
- obd_statfs(conn, &mystats);
- len += snprintf(page, count, "%d\n", mystats.os_bsize);
- return len;
-
-}
-int rd_kbtotal(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- struct ost_obd *ost = &temp->u.ost;
- struct lustre_handle *conn = &ost->ost_conn;
- struct obd_statfs mystats;
- int len = 0;
- __u32 blk_size;
- __u64 result;
-
- obd_statfs(conn, &mystats);
- blk_size = mystats.os_bsize;
- blk_size >>= 10;
- result = mystats.os_blocks;
- while(blk_size >>= 1){
- result <<= 1;
- }
- len += snprintf(page, count, LPU64"\n", result);
- return len;
-
-}
-
-
-int rd_kbfree(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
-
- struct obd_device* temp = (struct obd_device*)data;
- struct ost_obd *ost = &temp->u.ost;
- struct lustre_handle *conn = &ost->ost_conn;
- struct obd_statfs mystats;
- int len = 0;
- __u32 blk_size;
- __u64 result;
-
- obd_statfs(conn, &mystats);
- blk_size = mystats.os_bsize;
- blk_size >>= 10;
- result = mystats.os_bfree;
- while(blk_size >>= 1){
- result <<= 1;
- }
- len += snprintf(page, count, LPU64"\n", result);
- return len;
-}
-
-int rd_filestotal(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* temp = (struct obd_device*)data;
- struct ost_obd *ost = &temp->u.ost;
- struct lustre_handle *conn = &ost->ost_conn;
- struct obd_statfs mystats;
- int len = 0;
-
- obd_statfs(conn, &mystats);
- len += snprintf(page, count, LPU64"\n",mystats.os_files);
- return len;
-
-}
-
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
-
- struct obd_device* temp = (struct obd_device*)data;
- struct ost_obd *ost = &temp->u.ost;
- struct lustre_handle *conn = &ost->ost_conn;
- struct obd_statfs mystats;
- int len = 0;
-
- obd_statfs(conn, &mystats);
- len += snprintf(page, count, LPU64"\n", mystats.os_ffree);
- return len;
-
-}
-
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- return 0;
-}
-
-struct lprocfs_vars status_var_nm_1[] = {
- {"status/uuid", rd_uuid, 0, 0},
- {"status/blocksize",rd_blksize, 0, 0},
- {"status/kbytesfree", rd_kbfree, 0, 0},
- {"status/kbytestotal", rd_kbtotal, 0, 0},
- {"status/filestotal", rd_filestotal, 0, 0},
- {"status/filesfree", rd_filesfree, 0, 0},
- {"status/filegroups", rd_filegroups, 0, 0},
- {0}
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
+struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { 0 }
};
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_type* class = (struct obd_type*)data;
- int len = 0;
- len += snprintf(page, count, "%d\n", class->typ_refcnt);
- return len;
-}
-
-struct lprocfs_vars status_class_var[] = {
- {"status/num_refs", rd_numrefs, 0, 0},
- {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+ { 0 }
};
-
+
+#endif /* LPROCFS */
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
* Author: Peter J. Braam <braam@clusterfs.com>
* Author: Phil Schwan <phil@clusterfs.com>
*
#include <linux/init.h>
#include <linux/lprocfs_status.h>
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
-static int ost_destroy(struct ptlrpc_request *req)
+static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti)
{
struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
struct ost_body *body;
if (rc)
RETURN(rc);
- req->rq_status = obd_destroy(conn, &body->oa, NULL);
+ req->rq_status = obd_destroy(conn, &body->oa, NULL, oti);
RETURN(0);
}
RETURN(0);
}
-static int ost_open(struct ptlrpc_request *req)
+static int ost_open(struct ptlrpc_request *req, struct obd_trans_info *oti)
{
struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
struct ost_body *body, *repbody;
repbody = lustre_msg_buf(req->rq_repmsg, 0);
/* FIXME: unpack only valid fields instead of memcpy, endianness */
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
- req->rq_status = obd_open(conn, &repbody->oa, NULL);
+ req->rq_status = obd_open(conn, &repbody->oa, NULL, oti);
RETURN(0);
}
-static int ost_close(struct ptlrpc_request *req)
+static int ost_close(struct ptlrpc_request *req, struct obd_trans_info *oti)
{
struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
struct ost_body *body, *repbody;
repbody = lustre_msg_buf(req->rq_repmsg, 0);
/* FIXME: unpack only valid fields instead of memcpy, endianness */
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
- req->rq_status = obd_close(conn, &repbody->oa, NULL);
+ req->rq_status = obd_close(conn, &repbody->oa, NULL, oti);
RETURN(0);
}
-static int ost_create(struct ptlrpc_request *req)
+static int ost_create(struct ptlrpc_request *req, struct obd_trans_info *oti)
{
struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
struct ost_body *body, *repbody;
repbody = lustre_msg_buf(req->rq_repmsg, 0);
/* FIXME: unpack only valid fields instead of memcpy, endianness */
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
- req->rq_status = obd_create(conn, &repbody->oa, NULL);
+ req->rq_status = obd_create(conn, &repbody->oa, NULL, oti);
RETURN(0);
}
-static int ost_punch(struct ptlrpc_request *req)
+static int ost_punch(struct ptlrpc_request *req, struct obd_trans_info *oti)
{
struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
struct ost_body *body, *repbody;
/* FIXME: unpack only valid fields instead of memcpy, endianness */
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
req->rq_status = obd_punch(conn, &repbody->oa, NULL,
- repbody->oa.o_size, repbody->oa.o_blocks);
+ repbody->oa.o_size, repbody->oa.o_blocks, oti);
RETURN(0);
}
-static int ost_setattr(struct ptlrpc_request *req)
+static int ost_setattr(struct ptlrpc_request *req, struct obd_trans_info *oti)
{
struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
struct ost_body *body, *repbody;
repbody = lustre_msg_buf(req->rq_repmsg, 0);
/* FIXME: unpack only valid fields instead of memcpy, endianness */
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
- req->rq_status = obd_setattr(conn, &repbody->oa, NULL);
+ req->rq_status = obd_setattr(conn, &repbody->oa, NULL, oti);
RETURN(0);
}
struct ost_body *body;
struct l_wait_info lwi;
void *desc_priv = NULL;
- int rc, cmd, i, j, objcount, niocount, size = sizeof(*body);
+ int cmd, i, j, objcount, niocount, size = sizeof(*body);
+ int rc = 0;
ENTRY;
body = lustre_msg_buf(req->rq_reqmsg, 0);
cmd = OBD_BRW_READ;
if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK))
- GOTO(out, rc = 0);
+ GOTO(out, req->rq_status = -EIO);
for (i = 0; i < objcount; i++) {
ost_unpack_ioo(&tmp1, &ioo);
LBUG();
GOTO(out, rc = -EFAULT);
}
- for (j = 0; j < ioo->ioo_bufcnt; j++)
+ for (j = 0; j < ioo->ioo_bufcnt; j++) {
+ /* XXX verify niobuf[j].offset > niobuf[j-1].offset */
ost_unpack_niobuf(&tmp2, &remote_nb);
+ }
}
OBD_ALLOC(local_nb, sizeof(*local_nb) * niocount);
ioo = lustre_msg_buf(req->rq_reqmsg, 1);
remote_nb = lustre_msg_buf(req->rq_reqmsg, 2);
req->rq_status = obd_preprw(cmd, conn, objcount, ioo, niocount,
- remote_nb, local_nb, &desc_priv);
+ remote_nb, local_nb, &desc_priv, NULL);
if (req->rq_status)
- GOTO(out, rc = 0);
+ GOTO(out, req->rq_status);
desc = ptlrpc_prep_bulk(req->rq_connection);
if (desc == NULL)
bulk->bp_buflen = remote_nb[i].len;
}
- rc = ptlrpc_send_bulk(desc);
+ rc = ptlrpc_bulk_put(desc);
if (rc)
GOTO(out_bulk, rc);
}
req->rq_status = obd_commitrw(cmd, conn, objcount, ioo, niocount,
- local_nb, desc_priv);
-
- rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+ local_nb, desc_priv, NULL);
out_bulk:
ptlrpc_bulk_decref(desc);
out_local:
OBD_FREE(local_nb, sizeof(*local_nb) * niocount);
out:
+ if (!rc)
+ /* Hmm, we don't return anything in this reply buffer?
+ * We should be returning per-page status codes and also
+ * per-object size, blocks count, mtime, ctime. (bug 593) */
+ rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
+ &req->rq_repmsg);
if (rc)
ptlrpc_error(req->rq_svc, req);
else
RETURN(rc);
}
-static int ost_brw_write(struct ptlrpc_request *req)
+static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
{
struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
struct ptlrpc_bulk_desc *desc;
void *tmp2, *end2;
struct niobuf_remote *remote_nb;
struct niobuf_local *local_nb = NULL;
- struct niobuf_local *lnb;
struct obd_ioobj *ioo;
struct ost_body *body;
struct l_wait_info lwi;
- int rc, cmd, i, j, objcount, niocount;
- int size[2] = {sizeof(*body)};
void *desc_priv = NULL;
- int reply_sent = 0;
- struct ptlrpc_service *srv;
- __u32 xid;
+ int cmd, i, j, objcount, niocount, size = sizeof(*body);
+ int rc = 0;
ENTRY;
body = lustre_msg_buf(req->rq_reqmsg, 0);
niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
cmd = OBD_BRW_WRITE;
+ if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK))
+ GOTO(out, req->rq_status = -EIO);
+
for (i = 0; i < objcount; i++) {
- ost_unpack_ioo((void *)&tmp1, &ioo);
+ ost_unpack_ioo(&tmp1, &ioo);
if (tmp2 + ioo->ioo_bufcnt > end2) {
- rc = -EFAULT;
- break;
+ LBUG();
+ GOTO(out, rc = -EFAULT);
+ }
+ for (j = 0; j < ioo->ioo_bufcnt; j++) {
+ /* XXX verify niobuf[j].offset > niobuf[j-1].offset */
+ ost_unpack_niobuf(&tmp2, &remote_nb);
}
- for (j = 0; j < ioo->ioo_bufcnt; j++)
- ost_unpack_niobuf((void *)&tmp2, &remote_nb);
}
- size[1] = niocount * sizeof(*remote_nb);
- rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg);
- if (rc)
- GOTO(out, rc);
- remote_nb = lustre_msg_buf(req->rq_repmsg, 1);
-
- OBD_ALLOC(local_nb, niocount * sizeof(*local_nb));
+ OBD_ALLOC(local_nb, sizeof(*local_nb)* niocount);
if (local_nb == NULL)
GOTO(out, rc = -ENOMEM);
/* The unpackers move tmp1 and tmp2, so reset them before using */
- tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
- tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
- req->rq_status = obd_preprw(cmd, conn, objcount, tmp1, niocount, tmp2,
- local_nb, &desc_priv);
- if (req->rq_status)
- GOTO(out_free, rc = 0); /* XXX is this correct? */
+ ioo = lustre_msg_buf(req->rq_reqmsg, 1);
+ remote_nb = lustre_msg_buf(req->rq_reqmsg, 2);
+ req->rq_status = obd_preprw(cmd, conn, objcount, ioo, niocount,
+ remote_nb, local_nb, &desc_priv, oti);
- if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK))
- GOTO(fail_preprw, rc = 0);
+ if (req->rq_status)
+ GOTO(out, rc = 0);
desc = ptlrpc_prep_bulk(req->rq_connection);
if (desc == NULL)
- GOTO(fail_preprw, rc = -ENOMEM);
+ GOTO(out_local, rc = -ENOMEM);
desc->bd_ptl_ev_hdlr = NULL;
desc->bd_portal = OSC_BULK_PORTAL;
- desc->bd_desc_private = desc_priv;
- memcpy(&(desc->bd_conn), &conn, sizeof(conn));
-
- srv = req->rq_obd->u.ost.ost_service;
- spin_lock(&srv->srv_lock);
- xid = srv->srv_xid++; /* single xid for all pages */
- spin_unlock(&srv->srv_lock);
- for (i = 0, lnb = local_nb; i < niocount; i++, lnb++) {
- struct ptlrpc_bulk_page *bulk;
+ for (i = 0; i < niocount; i++) {
+ struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
- bulk = ptlrpc_prep_bulk_page(desc);
if (bulk == NULL)
- GOTO(fail_bulk, rc = -ENOMEM);
-
- bulk->bp_xid = xid; /* single xid for all pages */
-
- bulk->bp_buf = lnb->addr;
- bulk->bp_page = lnb->page;
- bulk->bp_flags = lnb->flags;
- bulk->bp_dentry = lnb->dentry;
- bulk->bp_buflen = lnb->len;
- bulk->bp_cb = NULL;
-
- /* this advances remote_nb */
- ost_pack_niobuf((void **)&remote_nb, lnb->offset, lnb->len, 0,
- bulk->bp_xid);
+ GOTO(out_bulk, rc = -ENOMEM);
+ bulk->bp_xid = remote_nb[i].xid;
+ bulk->bp_buf = local_nb[i].addr;
+ bulk->bp_buflen = remote_nb[i].len;
}
- rc = ptlrpc_register_bulk(desc);
+ rc = ptlrpc_bulk_get(desc);
if (rc)
- GOTO(fail_bulk, rc);
-
- reply_sent = 1;
- ptlrpc_reply(req->rq_svc, req);
+ GOTO(out_bulk, rc);
lwi = LWI_TIMEOUT(obd_timeout * HZ, ost_bulk_timeout, desc);
rc = l_wait_event(desc->bd_waitq, desc->bd_flags & PTL_BULK_FL_RCVD,
&lwi);
if (rc) {
- if (rc != -ETIMEDOUT)
- LBUG();
+ LASSERT(rc == -ETIMEDOUT);
ptlrpc_abort_bulk(desc);
recovd_conn_fail(desc->bd_connection);
- obd_commitrw(cmd, conn, objcount, tmp1, niocount, local_nb,
- desc->bd_desc_private);
- } else {
- rc = obd_commitrw(cmd, conn, objcount, tmp1, niocount, local_nb,
- desc->bd_desc_private);
+ obd_commitrw(cmd, conn, objcount, ioo, niocount, local_nb,
+ desc_priv, oti);
+ GOTO(out_bulk, rc);
}
+ req->rq_status = obd_commitrw(cmd, conn, objcount, ioo, niocount,
+ local_nb, desc_priv, oti);
+
+ out_bulk:
ptlrpc_bulk_decref(desc);
- EXIT;
-out_free:
- OBD_FREE(local_nb, niocount * sizeof(*local_nb));
-out:
- if (!reply_sent) {
- if (rc) {
- OBD_FREE(req->rq_repmsg, req->rq_replen);
- req->rq_repmsg = NULL;
- ptlrpc_error(req->rq_svc, req);
- } else
- ptlrpc_reply(req->rq_svc, req);
- }
- return rc;
+ out_local:
+ OBD_FREE(local_nb, sizeof(*local_nb) * niocount);
+ out:
+ if (!rc)
+ /* Hmm, we don't return anything in this reply buffer?
+ * We should be returning per-page status codes and also
+ * per-object size, blocks count, mtime, ctime. (bug 593) */
+ rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
+ &req->rq_repmsg);
+ if (rc)
+ ptlrpc_error(req->rq_svc, req);
+ else
+ rc = ptlrpc_reply(req->rq_svc, req);
+ RETURN(rc);
+}
-fail_bulk:
- ptlrpc_free_bulk(desc);
-fail_preprw:
- /* FIXME: how do we undo the preprw? - answer = call commitrw */
- goto out_free;
+inline void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
+{
+ if (oti && req->rq_repmsg)
+ req->rq_repmsg->transno = HTON__u64(oti->oti_transno);
+ EXIT;
}
static int ost_handle(struct ptlrpc_request *req)
{
+ struct obd_trans_info trans_info = { 0, }, *oti = &trans_info;
int rc;
ENTRY;
GOTO(out, rc);
}
- if (req->rq_reqmsg->opc != OST_CONNECT &&
- req->rq_export == NULL) {
+ if (req->rq_reqmsg->opc != OST_CONNECT && req->rq_export == NULL) {
CERROR("lustre_ost: operation %d on unconnected OST\n",
req->rq_reqmsg->opc);
req->rq_status = -ENOTCONN;
case OST_CREATE:
CDEBUG(D_INODE, "create\n");
OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0);
- rc = ost_create(req);
+ rc = ost_create(req, oti);
break;
case OST_DESTROY:
CDEBUG(D_INODE, "destroy\n");
OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0);
- rc = ost_destroy(req);
+ rc = ost_destroy(req, oti);
break;
case OST_GETATTR:
CDEBUG(D_INODE, "getattr\n");
case OST_SETATTR:
CDEBUG(D_INODE, "setattr\n");
OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0);
- rc = ost_setattr(req);
+ rc = ost_setattr(req, oti);
break;
case OST_OPEN:
CDEBUG(D_INODE, "open\n");
OBD_FAIL_RETURN(OBD_FAIL_OST_OPEN_NET, 0);
- rc = ost_open(req);
+ rc = ost_open(req, oti);
break;
case OST_CLOSE:
CDEBUG(D_INODE, "close\n");
OBD_FAIL_RETURN(OBD_FAIL_OST_CLOSE_NET, 0);
- rc = ost_close(req);
+ rc = ost_close(req, oti);
break;
case OST_WRITE:
CDEBUG(D_INODE, "write\n");
OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
- rc = ost_brw_write(req);
+ rc = ost_brw_write(req, oti);
/* ost_brw sends its own replies */
RETURN(rc);
case OST_READ:
case OST_PUNCH:
CDEBUG(D_INODE, "punch\n");
OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
- rc = ost_punch(req);
+ rc = ost_punch(req, oti);
break;
case OST_STATFS:
CDEBUG(D_INODE, "statfs\n");
case LDLM_ENQUEUE:
CDEBUG(D_INODE, "enqueue\n");
OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
- rc = ldlm_handle_enqueue(req);
+ rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
+ ldlm_server_blocking_ast);
break;
case LDLM_CONVERT:
CDEBUG(D_INODE, "convert\n");
}
EXIT;
+ /* If we're DISCONNECTing, the export_data is already freed */
+ if (!rc && req->rq_reqmsg->opc != OST_DISCONNECT) {
+ struct obd_device *obd = req->rq_export->exp_obd;
+ if ((obd->obd_flags & OBD_NO_TRANSNO) == 0) {
+ req->rq_repmsg->last_committed =
+ HTON__u64(obd->obd_last_committed);
+ } else {
+ DEBUG_REQ(D_IOCTL, req,
+ "not sending last_committed update");
+ }
+ CDEBUG(D_INFO, "last_committed "LPU64", xid "LPX64"\n",
+ obd->obd_last_committed, HTON__u64(req->rq_xid));
+ }
+
out:
//req->rq_status = rc;
if (rc) {
CDEBUG(D_INODE, "sending reply\n");
if (req->rq_repmsg == NULL)
CERROR("handler for opcode %d returned rc=0 without "
- "creating rq_repmsg; needs to return rc != "
- "0!\n", req->rq_reqmsg->opc);
+ "creating rq_repmsg; needs to return rc != 0!\n",
+ req->rq_reqmsg->opc);
+ else
+ oti_to_request(oti, req);
ptlrpc_reply(req->rq_svc, req);
}
return 0;
}
-/* mount the file system (secretly) */
static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
{
- struct obd_ioctl_data* data = buf;
struct ost_obd *ost = &obddev->u.ost;
- struct obd_device *tgt;
+ struct obd_uuid self = { "self" };
int err;
int i;
ENTRY;
- if (data->ioc_inllen1 < 1) {
- CERROR("requires a TARGET OBD UUID\n");
- RETURN(-EINVAL);
- }
- if (data->ioc_inllen1 > 37) {
- CERROR("OBD UUID must be less than 38 characters\n");
- RETURN(-EINVAL);
- }
-
- tgt = class_uuid2obd(data->ioc_inlbuf1);
- if (!tgt || !(tgt->obd_flags & OBD_ATTACHED) ||
- !(tgt->obd_flags & OBD_SET_UP)) {
- CERROR("device not attached or not set up (%d)\n",
- data->ioc_dev);
- RETURN(err = -EINVAL);
- }
-
- err = obd_connect(&ost->ost_conn, tgt, NULL, NULL, NULL);
- if (err) {
- CERROR("fail to connect to device %d\n", data->ioc_dev);
- RETURN(err);
- }
-
ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS,
OST_BUFSIZE, OST_MAXREQSIZE,
OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
- "self", ost_handle, "ost");
+ &self, ost_handle, "ost");
if (!ost->ost_service) {
CERROR("failed to start service\n");
GOTO(error_disc, err = -ENOMEM);
RETURN(0);
error_disc:
- obd_disconnect(&ost->ost_conn);
RETURN(err);
}
static int ost_cleanup(struct obd_device * obddev)
{
struct ost_obd *ost = &obddev->u.ost;
- int err;
+ int err = 0;
ENTRY;
- if ( !list_empty(&obddev->obd_exports) ) {
- CERROR("still has clients!\n");
- RETURN(-EBUSY);
- }
-
ptlrpc_stop_all_threads(ost->ost_service);
ptlrpc_unregister_service(ost->ost_service);
- err = obd_disconnect(&ost->ost_conn);
- if (err)
- CERROR("lustre ost: fail to disconnect device\n");
-
RETURN(err);
}
int ost_attach(struct obd_device *dev, obd_count len, void *data)
{
- return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_init_vars(&lvars);
+ return lprocfs_obd_attach(dev, lvars.obd_vars);
}
int ost_detach(struct obd_device *dev)
{
- return lprocfs_dereg_obd(dev);
+ return lprocfs_obd_detach(dev);
}
/* This is so similar to mds_connect that it makes my heart weep: we should
* target_handle_connect.
*/
static int ost_connect(struct lustre_handle *conn,
- struct obd_device *obd, obd_uuid_t cluuid,
+ struct obd_device *obd, struct obd_uuid *cluuid,
struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
RETURN(-EINVAL);
/* lctl gets a backstage, all-access pass. */
- if (!strcmp(cluuid, "OBD_CLASS_UUID"))
+ if (!strcmp(cluuid->uuid, "OBD_CLASS_UUID"))
goto dont_check_exports;
spin_lock(&obd->obd_dev_lock);
list_for_each(p, &obd->obd_exports) {
exp = list_entry(p, struct obd_export, exp_obd_chain);
oed = &exp->exp_ost_data;
- if (!memcmp(cluuid, oed->oed_uuid, sizeof oed->oed_uuid)) {
+ if (!memcmp(cluuid->uuid, oed->oed_uuid.uuid,
+ sizeof(oed->oed_uuid.uuid))) {
spin_unlock(&obd->obd_dev_lock);
LASSERT(exp->exp_obd == obd);
LASSERT(exp);
oed = &exp->exp_ost_data;
- memcpy(oed->oed_uuid, cluuid, sizeof oed->oed_uuid);
+ memcpy(oed->oed_uuid.uuid, cluuid->uuid, sizeof(oed->oed_uuid.uuid));
RETURN(0);
}
-
/* use obd ops to offer management infrastructure */
static struct obd_ops ost_obd_ops = {
o_owner: THIS_MODULE,
static int __init ost_init(void)
{
- int rc;
-
- rc = class_register_type(&ost_obd_ops, status_class_var,
- LUSTRE_OST_NAME);
- RETURN(rc);
+ struct lprocfs_static_vars lvars;
+ ENTRY;
+ lprocfs_init_vars(&lvars);
+ RETURN(class_register_type(&ost_obd_ops, lvars.module_vars,
+ LUSTRE_OST_NAME));
}
static void __exit ost_exit(void)
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (c) 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
*
#define LOCAL_END_REQUEST
#include <linux/blk.h>
#include <linux/blkdev.h>
+#include <linux/blkpg.h>
#include <linux/devfs_fs_kernel.h>
static int ptlbd_size_size[PTLBD_MAX_MINOR];
unsigned int cmd, unsigned long arg)
{
struct ptlbd_obd *ptlbd;
+ int ret;
if ( ! capable(CAP_SYS_ADMIN) )
RETURN(-EPERM);
if ( IS_ERR(ptlbd) )
RETURN( PTR_ERR(ptlbd) );
- /* XXX getattr{,64} */
+ switch(cmd) {
+ case BLKFLSBUF:
+ ret = blk_ioctl(inode->i_rdev, cmd, arg);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
- RETURN(-EINVAL);
+ RETURN(ret);
}
static int ptlbd_release(struct inode *inode, struct file *file)
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (c) 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
*
struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
struct obd_import *imp = &ptlbd->bd_import;
struct obd_ioctl_data* data = buf;
- obd_uuid_t server_uuid;
+ struct obd_uuid server_uuid;
ENTRY;
if ( ptlbd->bd_import.imp_connection != NULL )
RETURN(-EINVAL);
}
- memcpy(server_uuid, data->ioc_inlbuf1, MIN(data->ioc_inllen1,
- sizeof(server_uuid)));
+ obd_str2uuid(&server_uuid, data->ioc_inlbuf1);
- imp->imp_connection = ptlrpc_uuid_to_connection(server_uuid);
+ imp->imp_connection = ptlrpc_uuid_to_connection(&server_uuid);
if (!imp->imp_connection)
RETURN(-ENOENT);
INIT_LIST_HEAD(&imp->imp_chain);
imp->imp_last_xid = 0;
imp->imp_max_transno = 0;
- imp->imp_peer_last_xid = 0;
imp->imp_peer_committed_transno = 0;
imp->imp_level = LUSTRE_CONN_FULL;
#if 0
static int ptlbd_cl_connect(struct lustre_handle *conn, struct obd_device *obd,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
ENTRY;
rc = class_connect(conn, obd, cluuid);
- if (rc)
+ if (rc)
RETURN(rc);
INIT_LIST_HEAD(&imp->imp_chain);
int ptlbd_cl_init(void)
{
- extern struct lprocfs_vars status_class_var[];
+ struct lprocfs_static_vars lvars;
- return class_register_type(&ptlbd_cl_obd_ops, status_class_var,
+ lprocfs_init_vars(&lvars);
+ return class_register_type(&ptlbd_cl_obd_ops, lvars.module_vars,
OBD_PTLBD_CL_DEVICENAME);
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (c) 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
*
#include <linux/lprocfs_status.h>
#include <linux/obd_ptlbd.h>
-static __u32 get_next_xid(struct obd_import *imp)
-{
- unsigned long flags;
- __u32 xid;
- spin_lock_irqsave(&imp->imp_lock, flags);
- xid = ++imp->imp_last_xid;
- spin_unlock_irqrestore(&imp->imp_lock, flags);
- return xid;
-}
-
-static int ptlbd_brw_callback(struct obd_brw_set *set, int phase)
-{
- ENTRY;
- RETURN(0);
-}
-
-static void decref_bulk_desc(void *data)
-{
- struct ptlrpc_bulk_desc *desc = data;
- ENTRY;
-
- ptlrpc_bulk_decref(desc);
- EXIT;
-}
-
-/* this is the callback function which is invoked by the Portals
- * event handler associated with the bulk_sink queue and bulk_source queue.
- */
-static void ptlbd_ptl_ev_hdlr(struct ptlrpc_bulk_desc *desc)
-{
- ENTRY;
-
- LASSERT(desc->bd_brw_set != NULL);
- LASSERT(desc->bd_brw_set->brw_callback != NULL);
-
- desc->bd_brw_set->brw_callback(desc->bd_brw_set, CB_PHASE_FINISH);
-
- prepare_work(&desc->bd_queue, decref_bulk_desc, desc);
- schedule_work(&desc->bd_queue);
-
- EXIT;
-}
-
-
-int ptlbd_write_put_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd,
- struct buffer_head *first_bh, unsigned int page_count)
-{
- struct obd_import *imp = &ptlbd->bd_import;
- struct ptlbd_op *op;
- struct ptlbd_niob *niob, *niobs;
- struct ptlbd_rsp *rsp;
- struct ptlrpc_request *req;
- struct ptlrpc_bulk_desc *desc;
- struct buffer_head *bh;
- int rc, size[2];
- struct obd_brw_set *set;
- ENTRY;
-
- size[0] = sizeof(struct ptlbd_op);
- size[1] = page_count * sizeof(struct ptlbd_niob);
-
- req = ptlrpc_prep_req(imp, cmd, 2, size, NULL);
- if (!req)
- GOTO(out, rc = -ENOMEM);
- /* XXX might not need these */
- req->rq_request_portal = PTLBD_REQUEST_PORTAL;
- req->rq_reply_portal = PTLBD_REPLY_PORTAL;
-
- op = lustre_msg_buf(req->rq_reqmsg, 0);
- niobs = lustre_msg_buf(req->rq_reqmsg, 1);
-
- /* XXX pack */
- op->op_cmd = cmd;
- op->op_lun = 0;
- op->op_niob_cnt = page_count;
- op->op__padding = 0;
- op->op_block_cnt = page_count;
-
- desc = ptlrpc_prep_bulk(imp->imp_connection);
- if ( desc == NULL )
- GOTO(out_req, rc = -ENOMEM);
- desc->bd_portal = PTLBD_BULK_PORTAL;
- desc->bd_ptl_ev_hdlr = ptlbd_ptl_ev_hdlr;
-
- /* XXX someone needs to free this */
- set = obd_brw_set_new();
- if (set == NULL)
- GOTO(out_desc, rc = -ENOMEM);
-
- set->brw_callback = ptlbd_brw_callback;
-
-#if 0
- xid = get_next_xid(imp);
-#endif
-
- for ( niob = niobs, bh = first_bh ; bh ; bh = bh->b_next, niob++ ) {
-#if 0
- struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
- if (bulk == NULL)
- GOTO(out_set, rc = -ENOMEM);
-#endif
-
-#if 0
- niob->n_xid = xid;
-#endif
- niob->n_block_nr = bh->b_blocknr;
- niob->n_offset = bh_offset(bh);
- niob->n_length = bh->b_size;
-
-
-#if 0
- bulk->bp_xid = xid;
- bulk->bp_buf = bh->b_data;
- bulk->bp_page = bh->b_page;
- bulk->bp_buflen = bh->b_size;
-#endif
- }
-
-
- size[0] = sizeof(struct ptlbd_rsp);
- size[1] = sizeof(struct ptlbd_niob) * page_count;
- req->rq_replen = lustre_msg_size(2, size);
-
- /* XXX find out how we're really supposed to manage levels */
- req->rq_level = imp->imp_level;
- rc = ptlrpc_queue_wait(req);
-
- rsp = lustre_msg_buf(req->rq_repmsg, 0);
-
- niob = lustre_msg_buf(req->rq_repmsg, 1);
- /* XXX check that op->num matches ours */
- for ( bh = first_bh ; bh ; bh = bh->b_next, niob++ ) {
- struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
- if (bulk == NULL)
- GOTO(out_set, rc = -ENOMEM);
-
- bulk->bp_xid = niob->n_xid;
- bulk->bp_page = bh->b_page;
- bulk->bp_buf = bh->b_data;
- bulk->bp_buflen = bh->b_size;
- }
-
- obd_brw_set_add(set, desc);
- rc = ptlrpc_send_bulk(desc);
-
- /* if there's an error, no brw_finish called, just like
- * osc_brw_read */
-
- GOTO(out_req, rc);
-
-out_set:
- obd_brw_set_free(set);
-out_desc:
- ptlrpc_bulk_decref(desc);
-out_req:
- ptlrpc_req_finished(req);
-out:
- RETURN(rc);
-}
-
-int ptlbd_read_put_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd,
- struct buffer_head *first_bh, unsigned int page_count)
+int ptlbd_send_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd,
+ struct buffer_head *first_bh)
{
struct obd_import *imp = &ptlbd->bd_import;
struct ptlbd_op *op;
struct ptlrpc_request *req;
struct ptlrpc_bulk_desc *desc;
struct buffer_head *bh;
+ unsigned long flags;
+ unsigned int page_count;
int rc, rep_size, size[2];
- struct obd_brw_set *set;
__u32 xid;
ENTRY;
+ LASSERT(cmd == PTLBD_READ || cmd == PTLBD_WRITE);
+
+ for ( page_count = 0, bh = first_bh ; bh ; bh = bh->b_next )
+ page_count++;
+
size[0] = sizeof(struct ptlbd_op);
size[1] = page_count * sizeof(struct ptlbd_niob);
req = ptlrpc_prep_req(imp, cmd, 2, size, NULL);
if (!req)
- GOTO(out, rc = -ENOMEM);
- /* XXX might not need these? */
- req->rq_request_portal = PTLBD_REQUEST_PORTAL;
- req->rq_reply_portal = PTLBD_REPLY_PORTAL;
+ RETURN(-ENOMEM);
op = lustre_msg_buf(req->rq_reqmsg, 0);
niobs = lustre_msg_buf(req->rq_reqmsg, 1);
if ( desc == NULL )
GOTO(out_req, rc = -ENOMEM);
desc->bd_portal = PTLBD_BULK_PORTAL;
- desc->bd_ptl_ev_hdlr = ptlbd_ptl_ev_hdlr;
-
- /* XXX someone needs to free this */
- set = obd_brw_set_new();
- if (set == NULL)
- GOTO(out_desc, rc = -ENOMEM);
-
- set->brw_callback = ptlbd_brw_callback;
+ desc->bd_ptl_ev_hdlr = NULL;
- xid = get_next_xid(imp);
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ xid = ++imp->imp_last_xid;
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
for ( niob = niobs, bh = first_bh ; bh ; bh = bh->b_next, niob++ ) {
struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
if (bulk == NULL)
- GOTO(out_set, rc = -ENOMEM);
+ GOTO(out_req, rc = -ENOMEM);
niob->n_xid = xid;
niob->n_block_nr = bh->b_blocknr;
bulk->bp_buflen = bh->b_size;
}
- /* XXX put in OBD_FAIL_CHECK for ptlbd? */
- rc = ptlrpc_register_bulk(desc);
- if (rc)
- GOTO(out_set, rc);
+ if ( cmd == PTLBD_READ )
+ rc = ptlrpc_register_bulk_put(desc);
+ else
+ rc = ptlrpc_register_bulk_get(desc);
- obd_brw_set_add(set, desc);
+ if (rc)
+ GOTO(out_desc, rc);
rep_size = sizeof(struct ptlbd_rsp);
req->rq_replen = lustre_msg_size(1, &rep_size);
req->rq_level = imp->imp_level;
rc = ptlrpc_queue_wait(req);
- rsp = lustre_msg_buf(req->rq_repmsg, 0);
-
- /* if there's an error, no brw_finish called, just like
- * osc_brw_read */
-
- GOTO(out_req, rc);
+ if ( rc == 0 ) {
+ rsp = lustre_msg_buf(req->rq_repmsg, 0);
+ /* XXX do stuff */
+ }
-out_set:
- obd_brw_set_free(set);
out_desc:
ptlrpc_bulk_decref(desc);
out_req:
ptlrpc_req_finished(req);
-out:
- RETURN(rc);
-}
-
-int ptlbd_send_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd,
- struct buffer_head *first_bh)
-{
- unsigned int page_count = 0;
- struct buffer_head *bh;
- int rc;
- ENTRY;
-
- for ( page_count = 0, bh = first_bh ; bh ; bh = bh->b_next )
- page_count++;
-
- switch (cmd) {
- case PTLBD_READ:
- rc = ptlbd_read_put_req(ptlbd, cmd,
- first_bh, page_count);
- break;
- case PTLBD_WRITE:
- rc = ptlbd_write_put_req(ptlbd, cmd,
- first_bh, page_count);
- break;
- default:
- rc = -EINVAL;
- break;
- };
-
RETURN(rc);
}
RETURN(1);
}
-#define SILLY_MAX 2048
-static struct page *pages[SILLY_MAX] = {NULL,};
-
-static struct page * fake_page(int block_nr)
-{
- if ( block_nr >= SILLY_MAX )
- return NULL;
-
- if (pages[block_nr] == NULL) {
- void *vaddr = (void *)get_free_page(GFP_KERNEL);
- pages[block_nr] = virt_to_page(vaddr);
- }
- return pages[block_nr];
-}
-
-static int ptlbd_put_write(struct ptlrpc_request *req)
+void ptlbd_do_filp(struct file *filp, int op, struct ptlbd_niob *niobs,
+ int page_count, struct list_head *page_list)
{
- struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
- struct ptlbd_op *op;
- struct ptlbd_niob *reply_niob, *request_niob;
- struct ptlbd_rsp *rsp;
- struct ptlrpc_bulk_desc *desc;
- struct ptlrpc_service *srv;
- struct l_wait_info lwi;
- int size[2];
- int i, page_count, rc;
- __u32 xid;
+ mm_segment_t old_fs;
+ struct list_head *pos;
+ ENTRY;
- op = lustre_msg_buf(req->rq_reqmsg, 0);
- request_niob = lustre_msg_buf(req->rq_reqmsg, 1);
- page_count = req->rq_reqmsg->buflens[1] / sizeof(struct ptlbd_niob);
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
- size[0] = sizeof(struct ptlbd_rsp);
- size[1] = sizeof(struct ptlbd_niob) * page_count;
- rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg);
- if (rc)
- GOTO(out, rc);
- reply_niob = lustre_msg_buf(req->rq_repmsg, 1);
+ list_for_each(pos, page_list) {
+ ssize_t ret;
+ struct page *page = list_entry(pos, struct page, list);
+ loff_t offset = (niobs->n_block_nr << PAGE_SHIFT) +
+ niobs->n_offset;
- desc = ptlrpc_prep_bulk(req->rq_connection);
- if (desc == NULL)
- GOTO(out, rc = -ENOMEM);
- desc->bd_ptl_ev_hdlr = NULL;
- desc->bd_portal = PTLBD_BULK_PORTAL;
- memcpy(&(desc->bd_conn), &conn, sizeof(conn)); /* XXX what? */
-
- srv = req->rq_obd->u.ptlbd.ptlbd_service;
- spin_lock(&srv->srv_lock);
- xid = srv->srv_xid++; /* single xid for all pages */
- spin_unlock(&srv->srv_lock);
-
- for ( i = 0; i < page_count; i++) {
- struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
- if (bulk == NULL)
- GOTO(out_desc, rc = -ENOMEM);
-
- reply_niob[i] = request_niob[i];
- reply_niob[i].n_xid = xid;
+ if ( op == PTLBD_READ )
+ ret = filp->f_op->read(filp, page_address(page),
+ niobs->n_length, &offset);
+ else
+ ret = filp->f_op->write(filp, page_address(page),
+ niobs->n_length, &offset);
- bulk->bp_xid = xid;
- bulk->bp_page = fake_page(request_niob[i].n_block_nr);
- bulk->bp_buf = page_address(bulk->bp_page);
- bulk->bp_buflen = request_niob[i].n_length;
+ niobs++;
}
- rc = ptlrpc_register_bulk(desc);
- if ( rc )
- GOTO(out_desc, rc);
-
- rsp = lustre_msg_buf(req->rq_reqmsg, 0);
- rsp->r_status = 42;
- rsp->r_error_cnt = 13;
- ptlrpc_reply(req->rq_svc, req);
-
- /* this synchronization probably isn't good enough */
- lwi = LWI_TIMEOUT(obd_timeout * HZ, ptlbd_bulk_timeout, desc);
- rc = l_wait_event(desc->bd_waitq, desc->bd_flags &PTL_BULK_FL_RCVD,
- &lwi);
-
-out_desc:
- ptlrpc_free_bulk(desc);
-out:
- RETURN(rc);
+ set_fs(old_fs);
+ EXIT;
}
-static int ptlbd_put_read(struct ptlrpc_request *req)
+int ptlbd_parse_req(struct ptlrpc_request *req)
{
struct ptlbd_op *op;
struct ptlbd_niob *niob, *niobs;
struct ptlbd_rsp *rsp;
struct ptlrpc_bulk_desc *desc;
+ struct file *filp = req->rq_obd->u.ptlbd.filp;
struct l_wait_info lwi;
- int size[1];
- int i, page_count, rc;
+ int size[1], wait_flag, i, page_count, rc;
+ struct list_head *pos, *n;
+ LIST_HEAD(tmp_pages);
+ ENTRY;
+
+ rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
+ if ( rc )
+ RETURN(rc);
op = lustre_msg_buf(req->rq_reqmsg, 0);
+ LASSERT(op->op_cmd == PTLBD_READ || op->op_cmd == PTLBD_WRITE);
+
niobs = lustre_msg_buf(req->rq_reqmsg, 1);
page_count = req->rq_reqmsg->buflens[1] / sizeof(struct ptlbd_niob);
desc = ptlrpc_prep_bulk(req->rq_connection);
if (desc == NULL)
GOTO(out, rc = -ENOMEM);
+ desc->bd_ptl_ev_hdlr = NULL;
desc->bd_portal = PTLBD_BULK_PORTAL;
for ( i = 0, niob = niobs ; i < page_count; niob++, i++) {
if (bulk == NULL)
GOTO(out_bulk, rc = -ENOMEM);
+ bulk->bp_page = alloc_page(GFP_KERNEL);
+ if (bulk->bp_page == NULL)
+ GOTO(out_bulk, rc = -ENOMEM);
+ list_add(&bulk->bp_page->list, &tmp_pages);
+
/*
* XXX what about the block number?
*/
bulk->bp_xid = niob->n_xid;
- bulk->bp_page = fake_page(niob->n_block_nr);
bulk->bp_buf = page_address(bulk->bp_page);
bulk->bp_buflen = niob->n_length;
}
- rc = ptlrpc_send_bulk(desc);
+ if ( op->op_cmd == PTLBD_READ ) {
+ ptlbd_do_filp(filp, PTLBD_READ, niobs, page_count, &tmp_pages);
+ rc = ptlrpc_bulk_put(desc);
+ wait_flag = PTL_BULK_FL_SENT;
+ } else {
+ rc = ptlrpc_bulk_get(desc);
+ wait_flag = PTL_BULK_FL_RCVD;
+ }
+
if ( rc )
GOTO(out_bulk, rc);
/* this synchronization probably isn't good enough */
lwi = LWI_TIMEOUT(obd_timeout * HZ, ptlbd_bulk_timeout, desc);
- rc = l_wait_event(desc->bd_waitq, desc->bd_flags &PTL_BULK_FL_SENT,
- &lwi);
+ rc = l_wait_event(desc->bd_waitq, desc->bd_flags & wait_flag, &lwi);
size[0] = sizeof(struct ptlbd_rsp);
rc = lustre_pack_msg(1, size, NULL, &req->rq_replen, &req->rq_repmsg);
rsp = lustre_msg_buf(req->rq_repmsg, 0);
if ( rsp == NULL )
GOTO(out, rc = -EINVAL);
+
+ ptlbd_do_filp(filp, PTLBD_WRITE, niobs, page_count, &tmp_pages);
rsp->r_error_cnt = 42;
rsp->r_status = 69;
ptlrpc_reply(req->rq_svc, req);
out_bulk:
- ptlrpc_free_bulk(desc);
-out:
- RETURN(rc);
-}
-
-
-int ptlbd_parse_req(struct ptlrpc_request *req)
-{
- struct ptlbd_op *op;
- int rc;
- ENTRY;
-
- rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
- if ( rc )
- RETURN(rc);
-
- op = lustre_msg_buf(req->rq_reqmsg, 0);
-
- switch(op->op_cmd) {
- case PTLBD_READ:
- ptlbd_put_read(req);
- break;
- case PTLBD_WRITE:
- ptlbd_put_write(req);
- break;
- default:
- CERROR("fix this %d\n", op->op_cmd);
- break;
+ list_for_each_safe(pos, n, &tmp_pages) {
+ struct page *page = list_entry(pos, struct page, list);
+ list_del(&page->list);
+ __free_page(page);
}
-
- RETURN(0);
-}
-
-
-#if 0
-int ptlbd_bh_req(int cmd, struct ptlbd_state *st, struct buffer_head *first_bh)
-{
- struct obd_brw_set *set = NULL;
- struct brw_page *pg = NULL;
- struct buffer_head *bh;
- int rc, i, pg_bytes = 0;
- ENTRY;
-
- for ( bh = first_bh ; bh ; bh = bh->b_reqnext )
- pg_bytes += sizeof(struct brw_page);
-
- OBD_ALLOC(pg, pg_bytes);
- if ( pg == NULL )
- GOTO(out, rc = -ENOMEM);
-
- set = obd_brw_set_new();
- if (set == NULL)
- GOTO(out, rc = -ENOMEM);
-
- for ( i = 0, bh = first_bh ; bh ; bh = bh->b_reqnext, i++) {
- pg[i].pg = bh->b_page;
- pg[i].off = bh_offset(bh);
- pg[i].count = bh->b_size;
- pg[i].flag = 0;
- }
-
- set->brw_callback = ll_brw_sync_wait;
- rc = obd_brw(cmd, /* lsm */NULL, num_pages, pg, set);
- if ( rc )
- GOTO(out, rc);
-
- rc = ll_brw_sync_wait(set, CB_PHASE_START);
- if (rc)
- CERROR("error from callback: rc = %d\n", rc);
-
+ ptlrpc_bulk_decref(desc);
out:
- if ( pg != NULL )
- OBD_FREE(pg, pg_bytes);
- if ( set != NULL )
- obd_brw_set_free(set);
-
- RETURN(rc);
+ RETURN(rc);
}
-#endif
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (c) 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
*
#include <linux/lprocfs_status.h>
#include <linux/obd_ptlbd.h>
-#if 0
-static int ptlbd_sv_callback(struct ptlrpc_request *req)
-{
- int rc;
- ENTRY;
-
- rc = ptlbd_parse_request(req);
-
- rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
- if ( rc )
- GOTO(out, rc);
-
- printk("callback got a friggin opc %d\n", req->rq_reqmsg->opc);
-
-out:
- RETURN(rc);
-}
-#endif
-
static int ptlbd_sv_already_setup = 1;
static int ptlbd_sv_setup(struct obd_device *obddev, obd_count len, void *buf)
{
-#if 0
- struct obd_ioctl_data* data = buf;
- obd_uuid_t server_uuid;
-#endif
+ struct obd_uuid self_uuid = { "self" };
struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
int rc;
ENTRY;
-#if 0
- if (data->ioc_inllen1 < 1) {
- CERROR("requires a PTLBD server UUID\n");
- RETURN(rc = -EINVAL);
- }
-
- if (data->ioc_inllen1 > 37) {
- CERROR("PTLBD server UUID must be less than 38 characters\n");
- RETURN(rc = -EINVAL);
- }
-
- memcpy(server_uuid, data->ioc_inlbuf1, MIN(data->ioc_inllen1,
- sizeof(server_uuid)));
+ ptlbd->filp = filp_open("/tmp/ptlbd-backing-file-la-la-la",
+ O_RDWR|O_CREAT, 0600);
+ if ( IS_ERR(ptlbd->filp) )
+ RETURN(PTR_ERR(ptlbd->filp));
-#endif
ptlbd->ptlbd_service =
ptlrpc_init_svc(PTLBD_NEVENTS, PTLBD_NBUFS, PTLBD_BUFSIZE,
PTLBD_MAXREQSIZE, PTLBD_REQUEST_PORTAL,
- PTLBD_REPLY_PORTAL, "self",
+ PTLBD_REPLY_PORTAL, &self_uuid,
ptlbd_parse_req, "ptlbd_sv");
- if (!ptlbd->ptlbd_service) {
- CERROR("failed to start service\n");
- RETURN(rc = -ENOMEM);
- }
+ if (ptlbd->ptlbd_service == NULL)
+ GOTO(out_filp, rc = -ENOMEM);
rc = ptlrpc_start_thread(obddev, ptlbd->ptlbd_service, "ptldb");
- if (rc) {
- CERROR("cannot start PTLBD thread: rc %d\n", rc);
- LBUG();
+ if (rc != 0)
GOTO(out_thread, rc);
- }
ptlbd_sv_already_setup = 1;
RETURN(0);
- out_thread:
+out_thread:
ptlrpc_stop_all_threads(ptlbd->ptlbd_service);
ptlrpc_unregister_service(ptlbd->ptlbd_service);
+out_filp:
+ filp_close(ptlbd->filp, NULL);
- return rc;
+ RETURN(rc);
}
static int ptlbd_sv_cleanup(struct obd_device *obddev)
ptlrpc_stop_all_threads(ptlbd->ptlbd_service);
ptlrpc_unregister_service(ptlbd->ptlbd_service);
+ if ( ! IS_ERR(ptlbd->filp) )
+ filp_close(ptlbd->filp, NULL);
ptlbd_sv_already_setup = 0;
RETURN(0);
}
-#if 0
-static int ptlbd_sv_connect(struct lustre_handle *conn, struct obd_device *src,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
- ptlrpc_recovery_cb_t recover)
-{
- return class_connect(conn, src, cluuid);
-}
-#endif
-
static struct obd_ops ptlbd_sv_obd_ops = {
o_owner: THIS_MODULE,
-/* o_iocontrol: ptlbd_iocontrol,*/
o_setup: ptlbd_sv_setup,
o_cleanup: ptlbd_sv_cleanup,
-#if 0
- o_connect: ptlbd_sv_connect,
- o_disconnect: class_disconnect
-#endif
};
int ptlbd_sv_init(void)
{
- extern struct lprocfs_vars status_class_var[];
+ struct lprocfs_static_vars lvars;
- return class_register_type(&ptlbd_sv_obd_ops, status_class_var,
+ lprocfs_init_vars(&lvars);
+ return class_register_type(&ptlbd_sv_obd_ops, lvars.module_vars,
OBD_PTLBD_SV_DEVICENAME);
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
cl->cli_name = name;
}
-__u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
+struct obd_uuid *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
{
- return req->rq_connection->c_remote_uuid;
+ return &req->rq_connection->c_remote_uuid;
}
-struct ptlrpc_connection *ptlrpc_uuid_to_connection(obd_uuid_t uuid)
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
{
struct ptlrpc_connection *c;
struct lustre_peer peer;
int err;
- err = kportal_uuid_to_peer(uuid, &peer);
+ err = kportal_uuid_to_peer(uuid->uuid, &peer);
if (err != 0) {
- CERROR("cannot find peer %s!\n", uuid);
+ CERROR("cannot find peer %s!\n", uuid->uuid);
return NULL;
}
c = ptlrpc_get_connection(&peer, uuid);
if (c) {
- memcpy(c->c_remote_uuid, uuid, sizeof(c->c_remote_uuid));
+ memcpy(c->c_remote_uuid.uuid,
+ uuid->uuid, sizeof(c->c_remote_uuid.uuid));
c->c_epoch++;
}
- CDEBUG(D_INFO, "%s -> %p\n", uuid, c);
+ CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c);
return c;
}
-void ptlrpc_readdress_connection(struct ptlrpc_connection *conn,obd_uuid_t uuid)
+void ptlrpc_readdress_connection(struct ptlrpc_connection *conn,struct obd_uuid *uuid)
{
struct lustre_peer peer;
int err;
- err = kportal_uuid_to_peer(uuid, &peer);
+ err = kportal_uuid_to_peer(uuid->uuid, &peer);
if (err != 0) {
- CERROR("cannot find peer %s!\n", uuid);
+ CERROR("cannot find peer %s!\n", uuid->uuid);
return;
}
if (PtlMDUnlink(desc->bd_md_h) != 0) {
CERROR("Near-miss on OST %s -- need to adjust "
"obd_timeout?\n",
- desc->bd_connection->c_remote_uuid);
+ desc->bd_connection->c_remote_uuid.uuid);
continue;
}
CERROR("IO of %d pages to/from %s:%d (conn %p) timed out\n",
- desc->bd_page_count, desc->bd_connection->c_remote_uuid,
+ desc->bd_page_count,
+ desc->bd_connection->c_remote_uuid.uuid,
desc->bd_portal, desc->bd_connection);
/* This one will "never" arrive, don't wait for it. */
{
struct ptlrpc_connection *conn;
struct ptlrpc_request *request;
- unsigned long flags;
int rc;
ENTRY;
request->rq_type = PTL_RPC_MSG_REQUEST;
request->rq_import = imp;
- /* XXX FIXME bug 625069 */
+ /* XXX FIXME bug 625069, now 249 */
request->rq_request_portal = imp->imp_client->cli_request_portal;
request->rq_reply_portal = imp->imp_client->cli_reply_portal;
INIT_LIST_HEAD(&request->rq_list);
atomic_set(&request->rq_refcount, 1);
- spin_lock_irqsave(&imp->imp_lock, flags);
- request->rq_xid = HTON__u32(++imp->imp_last_xid);
- spin_unlock_irqrestore(&imp->imp_lock, flags);
-
request->rq_reqmsg->magic = PTLRPC_MSG_MAGIC;
request->rq_reqmsg->version = PTLRPC_MSG_VERSION;
request->rq_reqmsg->opc = HTON__u32(opcode);
if (atomic_read(&request->rq_refcount) != 0) {
CERROR("freeing request %p (%d->%s:%d) with refcount %d\n",
request, request->rq_reqmsg->opc,
- request->rq_connection->c_remote_uuid,
+ request->rq_connection->c_remote_uuid.uuid,
request->rq_import->imp_client->cli_request_portal,
atomic_read (&request->rq_refcount));
/* LBUG(); */
}
if (err < 0) {
- DEBUG_REQ(D_ERROR, req, "status is %d", err);
+ DEBUG_REQ(D_INFO, req, "status is %d", err);
} else if (err > 0) {
/* XXX: translate this error from net to host */
DEBUG_REQ(D_INFO, req, "status is %d", err);
struct ptlrpc_request *req;
ENTRY;
+ LASSERT(imp != NULL);
+
#ifdef CONFIG_SMP
LASSERT(spin_is_locked(&imp->imp_lock));
#endif
- CDEBUG(D_HA, "committing for xid "LPU64", last_committed "LPU64"\n",
- imp->imp_peer_last_xid, imp->imp_peer_committed_transno);
+ CDEBUG(D_HA, "committing for last_committed "LPU64"\n",
+ imp->imp_peer_committed_transno);
list_for_each_safe(tmp, saved, &imp->imp_replay_list) {
req = list_entry(tmp, struct ptlrpc_request, rq_list);
DEBUG_REQ(D_HA, req, "committing (last_committed "LPU64")",
imp->imp_peer_committed_transno);
+ list_del_init(&req->rq_list);
__ptlrpc_req_finished(req, 1);
}
__ptlrpc_req_finished(req, 0);
}
spin_unlock_irqrestore(&imp->imp_lock, flags);
-
+
EXIT;
return;
}
req->rq_flags |= PTL_RPC_FL_TIMEOUT;
if (!req->rq_import) {
- DEBUG_REQ(D_ERROR, req, "NULL import");
- LBUG();
- RETURN(0);
+ DEBUG_REQ(D_HA, req, "NULL import; already cleaned up?");
+ RETURN(1);
}
if (!req->rq_import->imp_connection) {
RETURN(1); /* ignored, as of this writing */
}
+struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req)
+{
+ ENTRY;
+ atomic_inc(&req->rq_refcount);
+ RETURN(req);
+}
+
+void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
+ struct obd_import *imp)
+{
+ struct list_head *tmp;
+
+#ifdef CONFIG_SMP
+ LASSERT(spin_is_locked(&imp->imp_lock));
+#endif
+
+ LASSERT(imp->imp_flags & IMP_REPLAYABLE);
+ /* Balanced in ptlrpc_free_committed, usually. */
+ ptlrpc_request_addref(req);
+ list_for_each_prev(tmp, &imp->imp_replay_list) {
+ struct ptlrpc_request *iter =
+ list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ /* We may have duplicate transnos if we create and then
+ * open a file, or for closes retained if to match creating
+ * opens, so use req->rq_xid as a secondary key.
+ * (See bugs 684, 685, and 428.)
+ */
+ if (iter->rq_transno > req->rq_transno)
+ continue;
+
+ if (iter->rq_transno == req->rq_transno) {
+ LASSERT(iter->rq_xid != req->rq_xid);
+ if (iter->rq_xid > req->rq_xid)
+ continue;
+ }
+
+ list_add(&req->rq_list, &iter->rq_list);
+ return;
+ }
+
+ list_add_tail(&req->rq_list, &imp->imp_replay_list);
+}
+
int ptlrpc_queue_wait(struct ptlrpc_request *req)
{
int rc = 0;
init_waitqueue_head(&req->rq_wait_for_rep);
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ req->rq_xid = HTON__u32(++imp->imp_last_xid);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+
/* for distributed debugging */
- req->rq_reqmsg->status = HTON__u32(current->pid);
+ req->rq_reqmsg->status = HTON__u32(current->pid);
CDEBUG(D_RPCTRACE, "Sending RPC pid:xid:nid:opc %d:"LPU64":%x:%d\n",
NTOH__u32(req->rq_reqmsg->status), req->rq_xid,
conn->c_peer.peer_nid, NTOH__u32(req->rq_reqmsg->opc));
spin_lock_irqsave(&imp->imp_lock, flags);
- /*
+ /*
* If the import has been invalidated (such as by an OST failure), the
* request must fail with -EIO.
*/
(req->rq_level <= imp->imp_level) ||
(req->rq_flags & PTL_RPC_FL_ERR), &lwi);
- spin_lock_irqsave(&imp->imp_lock, flags);
- list_del_init(&req->rq_list);
-
if (req->rq_flags & PTL_RPC_FL_ERR)
rc = -EIO;
+ if (!req->rq_import)
+ RETURN(rc);
+
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ list_del_init(&req->rq_list);
+
if (rc) {
spin_unlock_irqrestore(&imp->imp_lock, flags);
RETURN(rc);
spin_lock_irqsave(&imp->imp_lock, flags);
if ((req->rq_flags & PTL_RPC_FL_REPLAY || req->rq_transno != 0)
&& rc >= 0) {
- /* Balanced in ptlrpc_free_committed, usually. */
- atomic_inc(&req->rq_refcount);
- list_add_tail(&req->rq_list, &imp->imp_replay_list);
+ ptlrpc_retain_replayable_request(req, imp);
}
if (req->rq_transno > imp->imp_max_transno) {
imp->imp_max_transno = req->rq_transno;
- } else if (req->rq_transno != 0 &&
- imp->imp_level == LUSTRE_CONN_FULL) {
- CDEBUG(D_HA, "got transno "LPD64" after "LPD64
- ": recovery may not work\n", req->rq_transno,
- imp->imp_max_transno);
}
/* Replay-enabled imports return commit-status information. */
- imp->imp_peer_last_xid = req->rq_repmsg->last_xid;
- imp->imp_peer_committed_transno =
- req->rq_repmsg->last_committed;
+ if (req->rq_repmsg->last_committed) {
+ imp->imp_peer_committed_transno =
+ req->rq_repmsg->last_committed;
+ }
ptlrpc_free_committed(imp);
spin_unlock_irqrestore(&imp->imp_lock, flags);
}
}
/* XXX looks a lot like super.c:invalidate_request_list, don't it? */
-void ptlrpc_abort_inflight(struct obd_import *imp)
+void ptlrpc_abort_inflight(struct obd_import *imp, int dying_import)
{
unsigned long flags;
struct list_head *tmp, *n;
DEBUG_REQ(D_HA, req, "inflight");
req->rq_flags |= PTL_RPC_FL_ERR;
+ if (dying_import)
+ req->rq_import = NULL;
wake_up(&req->rq_wait_for_rep);
}
DEBUG_REQ(D_HA, req, "aborting waiting req");
req->rq_flags |= PTL_RPC_FL_ERR;
+ if (dying_import)
+ req->rq_import = NULL;
wake_up(&req->rq_wait_for_rep);
}
}
/* If UUID is NULL, c->c_remote_uuid must be all zeroes
* If UUID is non-NULL, c->c_remote_uuid must match. */
-static int match_connection_uuid(struct ptlrpc_connection *c, obd_uuid_t uuid)
+static int match_connection_uuid(struct ptlrpc_connection *c, struct obd_uuid *uuid)
{
- obd_uuid_t zero_uuid = {0};
+ struct obd_uuid zero_uuid;
+ memset(&zero_uuid, 0, sizeof(zero_uuid));
if (uuid)
- return memcmp(c->c_remote_uuid, uuid, sizeof(uuid));
+ return memcmp(c->c_remote_uuid.uuid, uuid->uuid,
+ sizeof(uuid->uuid));
- return memcmp(c->c_remote_uuid, zero_uuid, sizeof(zero_uuid));
+ return memcmp(c->c_remote_uuid.uuid, &zero_uuid, sizeof(zero_uuid));
}
struct ptlrpc_connection *ptlrpc_get_connection(struct lustre_peer *peer,
- obd_uuid_t uuid)
+ struct obd_uuid *uuid)
{
struct list_head *tmp, *pos;
struct ptlrpc_connection *c;
c->c_epoch = 1;
c->c_bootcount = 0;
c->c_flags = 0;
- if (uuid)
- strcpy(c->c_remote_uuid, uuid);
+ if (uuid->uuid)
+ obd_str2uuid(&c->c_remote_uuid, uuid->uuid);
INIT_LIST_HEAD(&c->c_imports);
INIT_LIST_HEAD(&c->c_exports);
INIT_LIST_HEAD(&c->c_sb_chain);
list_for_each_safe(tmp, pos, &conn_list) {
c = list_entry(tmp, struct ptlrpc_connection, c_link);
CERROR("Connection %p/%s has refcount %d (nid=%lu)\n",
- c, c->c_remote_uuid, atomic_read(&c->c_refcount),
+ c, c->c_remote_uuid.uuid, atomic_read(&c->c_refcount),
(unsigned long)c->c_peer.peer_nid);
list_del(&c->c_link);
OBD_FREE(c, sizeof(*c));
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
#include <linux/obd_support.h>
#include <linux/lustre_net.h>
-ptl_handle_eq_t request_out_eq, reply_in_eq, reply_out_eq, bulk_source_eq,
- bulk_sink_eq;
+ptl_handle_eq_t request_out_eq, reply_in_eq, reply_out_eq,
+ bulk_put_source_eq, bulk_put_sink_eq,
+ bulk_get_source_eq, bulk_get_sink_eq;
static const ptl_handle_ni_t *socknal_nip = NULL, *toenal_nip = NULL,
*qswnal_nip = NULL, *gmnal_nip = NULL;
return 0;
}
-static int bulk_source_callback(ptl_event_t *ev)
+static int bulk_put_source_callback(ptl_event_t *ev)
{
struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
struct ptlrpc_bulk_page *bulk;
RETURN(0);
}
-static int bulk_sink_callback(ptl_event_t *ev)
+static int bulk_put_sink_callback(ptl_event_t *ev)
{
struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
struct ptlrpc_bulk_page *bulk;
RETURN(1);
}
+static int bulk_get_source_callback(ptl_event_t *ev)
+{
+ struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
+ struct ptlrpc_bulk_page *bulk;
+ struct list_head *tmp;
+ struct list_head *next;
+ ptl_size_t total = 0;
+ void (*event_handler)(struct ptlrpc_bulk_desc *);
+ ENTRY;
+
+ LASSERT(ev->type == PTL_EVENT_GET);
+
+ /* put with zero offset */
+ LASSERT(ev->offset == 0);
+ /* used iovs */
+ LASSERT((ev->mem_desc.options & PTL_MD_IOV) != 0);
+ /* 1 fragment for each page always */
+ LASSERT(ev->mem_desc.niov == desc->bd_page_count);
+
+ list_for_each_safe (tmp, next, &desc->bd_page_list) {
+ bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
+
+ total += bulk->bp_buflen;
+
+ if (bulk->bp_cb != NULL)
+ bulk->bp_cb(bulk);
+ }
+
+ LASSERT(ev->mem_desc.length == total);
+
+ /* We need to make a note of whether there's an event handler
+ * before we call wake_up, because if there is no event
+ * handler, 'desc' might be freed before we're scheduled again. */
+ event_handler = desc->bd_ptl_ev_hdlr;
+
+ desc->bd_flags |= PTL_BULK_FL_SENT;
+ wake_up(&desc->bd_waitq);
+ if (event_handler) {
+ LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
+ event_handler(desc);
+ }
+
+ RETURN(1);
+}
+
+
+static int bulk_get_sink_callback(ptl_event_t *ev)
+{
+ struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
+ struct ptlrpc_bulk_page *bulk;
+ struct list_head *tmp;
+ struct list_head *next;
+ ENTRY;
+
+ CDEBUG(D_NET, "got %s event %d\n",
+ (ev->type == PTL_EVENT_SENT) ? "SENT" :
+ (ev->type == PTL_EVENT_REPLY) ? "REPLY" : "UNEXPECTED",
+ ev->type);
+
+ LASSERT(ev->type == PTL_EVENT_SENT || ev->type == PTL_EVENT_REPLY);
+
+ LASSERT(atomic_read(&desc->bd_source_callback_count) > 0 &&
+ atomic_read(&desc->bd_source_callback_count) <= 2);
+
+ /* 1 fragment for each page always */
+ LASSERT(ev->mem_desc.niov == desc->bd_page_count);
+
+ if (atomic_dec_and_test(&desc->bd_source_callback_count)) {
+ void (*event_handler)(struct ptlrpc_bulk_desc *);
+
+ list_for_each_safe(tmp, next, &desc->bd_page_list) {
+ bulk = list_entry(tmp, struct ptlrpc_bulk_page,
+ bp_link);
+
+ if (bulk->bp_cb != NULL)
+ bulk->bp_cb(bulk);
+ }
+
+ /* We need to make a note of whether there's an event handler
+ * before we call wake_up, because if there is no event handler,
+ * 'desc' might be freed before we're scheduled again. */
+ event_handler = desc->bd_ptl_ev_hdlr;
+
+ desc->bd_flags |= PTL_BULK_FL_RCVD;
+ wake_up(&desc->bd_waitq);
+ if (event_handler) {
+ LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
+ event_handler(desc);
+ }
+ }
+
+ RETURN(0);
+}
+
int ptlrpc_init_portals(void)
{
int rc;
if (rc != PTL_OK)
CERROR("PtlEQAlloc failed: %d\n", rc);
- rc = PtlEQAlloc(ni, 1024, bulk_source_callback, &bulk_source_eq);
+ rc = PtlEQAlloc(ni, 1024, bulk_put_source_callback,
+ &bulk_put_source_eq);
+ if (rc != PTL_OK)
+ CERROR("PtlEQAlloc failed: %d\n", rc);
+
+ rc = PtlEQAlloc(ni, 1024, bulk_put_sink_callback, &bulk_put_sink_eq);
+ if (rc != PTL_OK)
+ CERROR("PtlEQAlloc failed: %d\n", rc);
+
+ rc = PtlEQAlloc(ni, 1024, bulk_get_source_callback,
+ &bulk_get_source_eq);
if (rc != PTL_OK)
CERROR("PtlEQAlloc failed: %d\n", rc);
- rc = PtlEQAlloc(ni, 1024, bulk_sink_callback, &bulk_sink_eq);
+ rc = PtlEQAlloc(ni, 1024, bulk_get_sink_callback, &bulk_get_sink_eq);
if (rc != PTL_OK)
CERROR("PtlEQAlloc failed: %d\n", rc);
PtlEQFree(request_out_eq);
PtlEQFree(reply_out_eq);
PtlEQFree(reply_in_eq);
- PtlEQFree(bulk_source_eq);
- PtlEQFree(bulk_sink_eq);
+ PtlEQFree(bulk_put_source_eq);
+ PtlEQFree(bulk_put_sink_eq);
+ PtlEQFree(bulk_get_source_eq);
+ PtlEQFree(bulk_get_sink_eq);
if (qswnal_nip != NULL)
inter_module_put("kqswnal_ni");
*/
#define DEBUG_SUBSYSTEM S_CLASS
-#include <linux/lustre_lite.h>
#include <linux/lprocfs_status.h>
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- int len = 0;
- len += snprintf(page, count, "%s\n",
- ((struct obd_device*)data)->obd_uuid);
- return len;
-}
-
-struct lprocfs_vars status_var_nm_1[] = {
- {"status/uuid", rd_uuid, 0, 0},
- {0}
+#ifndef LPROCFS
+struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+#else
+struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0},
+ { 0 }
};
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_type* class = (struct obd_type*)data;
- int len = 0;
- len += snprintf(page, count, "%d\n", class->typ_refcnt);
- return len;
-}
-struct lprocfs_vars status_class_var[] = {
- {"status/num_refs", rd_numrefs, 0, 0},
- {0}
+struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0},
+ { 0 }
};
+
+#endif /* LPROCFS */
+LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
#include <linux/obd.h>
extern ptl_handle_eq_t request_out_eq, reply_in_eq, reply_out_eq,
- bulk_source_eq, bulk_sink_eq;
+ bulk_put_source_eq, bulk_put_sink_eq,
+ bulk_get_source_eq, bulk_get_sink_eq;
static int ptl_send_buf(struct ptlrpc_request *request,
struct ptlrpc_connection *conn, int portal)
OBD_FREE (iov, desc->bd_page_count * sizeof (struct iovec));
}
-int ptlrpc_send_bulk(struct ptlrpc_bulk_desc *desc)
+int ptlrpc_bulk_put(struct ptlrpc_bulk_desc *desc)
{
int rc;
struct list_head *tmp, *next;
desc->bd_md.start = iov;
desc->bd_md.niov = 0;
desc->bd_md.length = 0;
- desc->bd_md.eventq = bulk_source_eq;
+ desc->bd_md.eventq = bulk_put_source_eq;
desc->bd_md.threshold = 2; /* SENT and ACK */
desc->bd_md.options = PTL_MD_OP_PUT | PTL_MD_IOV;
desc->bd_md.user_ptr = desc;
RETURN(0);
}
-int ptlrpc_register_bulk(struct ptlrpc_bulk_desc *desc)
+int ptlrpc_bulk_get(struct ptlrpc_bulk_desc *desc)
+{
+ int rc;
+ struct list_head *tmp, *next;
+ ptl_process_id_t remote_id;
+ __u32 xid = 0;
+ struct iovec *iov;
+ ENTRY;
+
+ iov = ptlrpc_get_bulk_iov (desc);
+ if (iov == NULL)
+ RETURN (-ENOMEM);
+
+ desc->bd_md.start = iov;
+ desc->bd_md.niov = 0;
+ desc->bd_md.length = 0;
+ desc->bd_md.eventq = bulk_get_sink_eq;
+ desc->bd_md.threshold = 2; /* SENT and REPLY */
+ desc->bd_md.options = PTL_MD_OP_GET | PTL_MD_IOV;
+ desc->bd_md.user_ptr = desc;
+
+ atomic_set(&desc->bd_source_callback_count, 2);
+
+ list_for_each_safe(tmp, next, &desc->bd_page_list) {
+ struct ptlrpc_bulk_page *bulk;
+ bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
+
+ LASSERT(desc->bd_md.niov < desc->bd_page_count);
+
+ if (desc->bd_md.niov == 0)
+ xid = bulk->bp_xid;
+ LASSERT(xid == bulk->bp_xid); /* should all be the same */
+
+ iov[desc->bd_md.niov].iov_base = bulk->bp_buf;
+ iov[desc->bd_md.niov].iov_len = bulk->bp_buflen;
+ if (iov[desc->bd_md.niov].iov_len <= 0) {
+ CERROR("bad bp_buflen[%d] @ %p: %d\n", desc->bd_md.niov,
+ bulk->bp_buf, bulk->bp_buflen);
+ CERROR("desc: xid %u, pages %d, ptl %d, ref %d\n",
+ xid, desc->bd_page_count, desc->bd_portal,
+ atomic_read(&desc->bd_refcount));
+ LBUG();
+ }
+ desc->bd_md.niov++;
+ desc->bd_md.length += bulk->bp_buflen;
+ }
+
+ LASSERT(desc->bd_md.niov == desc->bd_page_count);
+ LASSERT(desc->bd_md.niov != 0);
+
+ rc = PtlMDBind(desc->bd_connection->c_peer.peer_ni, desc->bd_md,
+ &desc->bd_md_h);
+
+ ptlrpc_put_bulk_iov (desc, iov); /*move down to reduce latency to send*/
+
+ if (rc != PTL_OK) {
+ CERROR("PtlMDBind failed: %d\n", rc);
+ LBUG();
+ RETURN(rc);
+ }
+
+ remote_id.nid = desc->bd_connection->c_peer.peer_nid;
+ remote_id.pid = 0;
+
+ CDEBUG(D_NET, "Sending %u pages %u bytes to portal %d nid "LPX64" pid "
+ "%d xid %d\n", desc->bd_md.niov, desc->bd_md.length,
+ desc->bd_portal, remote_id.nid, remote_id.pid, xid);
+
+ rc = PtlGet(desc->bd_md_h, remote_id, desc->bd_portal, 0, xid, 0);
+ if (rc != PTL_OK) {
+ CERROR("PtlGet("LPU64", %d, %d) failed: %d\n",
+ remote_id.nid, desc->bd_portal, xid, rc);
+ PtlMDUnlink(desc->bd_md_h);
+ LBUG();
+ RETURN(rc);
+ }
+
+ RETURN(0);
+}
+
+static int ptlrpc_register_bulk_shared(struct ptlrpc_bulk_desc *desc)
{
struct list_head *tmp, *next;
int rc;
desc->bd_md.niov = 0;
desc->bd_md.length = 0;
desc->bd_md.threshold = 1;
- desc->bd_md.options = PTL_MD_OP_PUT | PTL_MD_IOV;
desc->bd_md.user_ptr = desc;
- desc->bd_md.eventq = bulk_sink_eq;
list_for_each_safe(tmp, next, &desc->bd_page_list) {
struct ptlrpc_bulk_page *bulk;
return rc;
}
+int ptlrpc_register_bulk_get(struct ptlrpc_bulk_desc *desc)
+{
+ desc->bd_md.options = PTL_MD_OP_GET | PTL_MD_IOV;
+ desc->bd_md.eventq = bulk_get_source_eq;
+
+ return ptlrpc_register_bulk_shared(desc);
+}
+
+int ptlrpc_register_bulk_put(struct ptlrpc_bulk_desc *desc)
+{
+ desc->bd_md.options = PTL_MD_OP_PUT | PTL_MD_IOV;
+ desc->bd_md.eventq = bulk_put_sink_eq;
+
+ return ptlrpc_register_bulk_shared(desc);
+}
+
int ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc)
{
/* This should be safe: these handles are initialized to be
int rc;
ENTRY;
- if (req->rq_repmsg) {
- CERROR("req already has repmsg\n");
- LBUG();
+ if (!req->rq_repmsg) {
+ rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen,
+ &req->rq_repmsg);
+ if (rc)
+ RETURN(rc);
}
- rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
- if (rc)
- RETURN(rc);
req->rq_type = PTL_RPC_MSG_ERR;
source_id.pid = PTL_PID_ANY;
/* add a ref, which will be balanced in request_out_callback */
- atomic_inc(&request->rq_refcount);
+ ptlrpc_request_addref(request);
if (request->rq_replen != 0) {
if (request->rq_reply_md.start != NULL) {
rc = PtlMEUnlink(request->rq_reply_me_h);
if (len < required_len) {
CERROR("len: %d, required_len %d\n", len, required_len);
+ CERROR("bufcount: %d\n", m->bufcount);
+ for (i = 0; i < m->bufcount; i++)
+ CERROR("buffer %d length %d\n", i, m->buflens[i]);
RETURN(-EINVAL);
}
}
if (n < 0 || n >= m->bufcount) {
- CERROR("referencing bad sub buffer in %p (want %d, count %d)!\n",
- m, n, m->bufcount);
+ CERROR("referencing bad sub buffer in %p (want %d, count "
+ "%d)!\n", m, n, m->bufcount);
LBUG();
return NULL;
}
if (m->buflens[n] == 0) {
- CERROR("zero-length buffer requested for buffer %d in %p\n", n,
- m);
+ CERROR("zero-length buffer requested for buffer %d in %p\n",
+ n, m);
return NULL;
}
struct ptlrpc_connection *conn =
list_entry(tmp, struct ptlrpc_connection,
c_recovd_data.rd_managed_chain);
- CDEBUG(D_HA, " %p = %s (%d/%d)\n", conn, conn->c_remote_uuid,
+ CDEBUG(D_HA, " %p = %s (%d/%d)\n", conn,
+ conn->c_remote_uuid.uuid,
conn->c_recovd_data.rd_phase,
conn->c_recovd_data.rd_next_phase);
}
if (!list_empty(&rd->rd_managed_chain)) {
if (rd->rd_recovd == recovd && rd->rd_recover == recover) {
CDEBUG(D_HA, "conn %p/%s already setup for recovery\n",
- conn, conn->c_remote_uuid);
+ conn, conn->c_remote_uuid.uuid);
EXIT;
return;
}
CDEBUG(D_HA,
"conn %p/%s has recovery items %p/%p, making %p/%p\n",
- conn, conn->c_remote_uuid, rd->rd_recovd, rd->rd_recover,
+ conn, conn->c_remote_uuid.uuid, rd->rd_recovd, rd->rd_recover,
recovd, recover);
spin_lock(&rd->rd_recovd->recovd_lock);
list_del_init(&rd->rd_managed_chain);
spin_lock(&recovd->recovd_lock);
if (rd->rd_phase == RD_TROUBLED || rd->rd_phase == RD_PREPARING) {
CDEBUG(D_HA, "connection %p to %s already in recovery\n",
- conn, conn->c_remote_uuid);
+ conn, conn->c_remote_uuid.uuid);
spin_unlock(&recovd->recovd_lock);
EXIT;
return;
}
CERROR("connection %p to %s (%08x %08lx %08lx) failed\n", conn,
- conn->c_remote_uuid, conn->c_peer.peer_nid,
+ conn->c_remote_uuid.uuid, conn->c_peer.peer_nid,
conn->c_peer.peer_ni.nal_idx, conn->c_peer.peer_ni.handle_idx);
list_del(&rd->rd_managed_chain);
list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
if (rd->rd_phase != RD_IDLE) {
CDEBUG(D_HA,
"connection %p to %s failed in recovery: restarting\n",
- conn, conn->c_remote_uuid);
+ conn, conn->c_remote_uuid.uuid);
/* XXX call callback with PHASE_FAILED? */
rd->rd_next_phase = RD_TROUBLED;
}
ENTRY;
CDEBUG(D_HA, "connection %p (now to %s) fixed\n",
- conn, conn->c_remote_uuid);
+ conn, conn->c_remote_uuid.uuid);
spin_lock(&rd->rd_recovd->recovd_lock);
list_del(&rd->rd_managed_chain);
rd->rd_phase = RD_IDLE;
*
* Portal-RPC reconnection and replay operations, for use in recovery.
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Mike Shaver <shaver@clusterfs.com>
*
- * Copyright (C) 1996 Peter J. Braam <braam@stelias.com>
- * Copyright (C) 1999 Stelias Computing Inc. <braam@stelias.com>
- * Copyright (C) 1999 Seagate Technology Inc.
- * Copyright (C) 2001 Mountain View Data, Inc.
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * This file is part of Lustre, http://www.lustre.org.
*
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/config.h>
struct obd_device *obd = imp->imp_obd;
struct client_obd *cli = &obd->u.cli;
int size[] = { sizeof(cli->cl_target_uuid), sizeof(obd->obd_uuid) };
- char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid };
+ char *tmp[] = {cli->cl_target_uuid.uuid, obd->obd_uuid.uuid};
struct ptlrpc_connection *conn = imp->imp_connection;
- struct lustre_handle old_hdl;
- struct ptlrpc_request *request;
+ struct ptlrpc_request *req;
struct obd_export *ldlmexp;
+ struct lustre_handle old_hdl;
int rc;
- request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp);
- if (!request)
+ req = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp);
+ if (!req)
RETURN(-ENOMEM);
- request->rq_level = LUSTRE_CONN_NEW;
- request->rq_replen = lustre_msg_size(0, NULL);
+ req->rq_level = LUSTRE_CONN_NEW;
+ req->rq_replen = lustre_msg_size(0, NULL);
/*
* This address is the export that represents our client-side LDLM
* service (for ASTs). We should only have one on this list, so we
*/
ldlmexp = list_entry(obd->obd_exports.next, struct obd_export,
exp_obd_chain);
- request->rq_reqmsg->addr = (__u64)(unsigned long)ldlmexp;
- request->rq_reqmsg->cookie = ldlmexp->exp_cookie;
- rc = ptlrpc_queue_wait(request);
- switch (rc) {
- case EALREADY:
- case -EALREADY:
- /* already connected! */
+ req->rq_reqmsg->addr = (__u64)(unsigned long)ldlmexp;
+ req->rq_reqmsg->cookie = ldlmexp->exp_cookie;
+ rc = ptlrpc_queue_wait(req);
+ if (rc) {
+ CERROR("cannot connect to %s@%s: rc = %d\n",
+ cli->cl_target_uuid.uuid, conn->c_remote_uuid.uuid, rc);
+ GOTO(out_disc, rc);
+ }
+ if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECONNECT) {
memset(&old_hdl, 0, sizeof(old_hdl));
- if (!memcmp(&old_hdl.addr, &request->rq_repmsg->addr,
+ if (!memcmp(&old_hdl.addr, &req->rq_repmsg->addr,
sizeof (old_hdl.addr)) &&
- !memcmp(&old_hdl.cookie, &request->rq_repmsg->cookie,
+ !memcmp(&old_hdl.cookie, &req->rq_repmsg->cookie,
sizeof (old_hdl.cookie))) {
- CERROR("%s@%s didn't like our handle "LPX64"/"LPX64", failed\n",
- cli->cl_target_uuid, conn->c_remote_uuid,
+ CERROR("%s@%s didn't like our handle "LPX64"/"LPX64
+ ", failed\n", cli->cl_target_uuid.uuid,
+ conn->c_remote_uuid.uuid,
(__u64)(unsigned long)ldlmexp,
ldlmexp->exp_cookie);
GOTO(out_disc, rc = -ENOTCONN);
}
- old_hdl.addr = request->rq_repmsg->addr;
- old_hdl.cookie = request->rq_repmsg->cookie;
+ old_hdl.addr = req->rq_repmsg->addr;
+ old_hdl.cookie = req->rq_repmsg->cookie;
if (memcmp(&imp->imp_handle, &old_hdl, sizeof(old_hdl))) {
- CERROR("%s@%s changed handle from "LPX64"/"LPX64" to "LPX64"/"LPX64"; "
+ CERROR("%s@%s changed handle from "LPX64"/"LPX64
+ " to "LPX64"/"LPX64"; "
"copying, but this may foreshadow disaster\n",
- cli->cl_target_uuid, conn->c_remote_uuid,
+ cli->cl_target_uuid.uuid,
+ conn->c_remote_uuid.uuid,
old_hdl.addr, old_hdl.cookie,
imp->imp_handle.addr, imp->imp_handle.cookie);
- imp->imp_handle.addr = request->rq_repmsg->addr;
- imp->imp_handle.cookie = request->rq_repmsg->cookie;
- GOTO(out_disc, rc = EALREADY);
+ imp->imp_handle.addr = req->rq_repmsg->addr;
+ imp->imp_handle.cookie = req->rq_repmsg->cookie;
+ GOTO(out_disc, rc = 0);
}
CERROR("reconnected to %s@%s after partition\n",
- cli->cl_target_uuid, conn->c_remote_uuid);
- GOTO(out_disc, rc = EALREADY);
- case 0:
- old_hdl = imp->imp_handle;
- imp->imp_handle.addr = request->rq_repmsg->addr;
- imp->imp_handle.cookie = request->rq_repmsg->cookie;
- CERROR("now connected to %s@%s ("LPX64"/"LPX64", was "LPX64"/"LPX64")!\n",
- cli->cl_target_uuid, conn->c_remote_uuid,
- imp->imp_handle.addr, imp->imp_handle.cookie,
- old_hdl.addr, old_hdl.cookie);
+ cli->cl_target_uuid.uuid, conn->c_remote_uuid.uuid);
GOTO(out_disc, rc = 0);
- default:
- CERROR("cannot connect to %s@%s: rc = %d\n",
- cli->cl_target_uuid, conn->c_remote_uuid, rc);
- GOTO(out_disc, rc = -ENOTCONN); /* XXX preserve rc? */
}
+ old_hdl = imp->imp_handle;
+ imp->imp_handle.addr = req->rq_repmsg->addr;
+ imp->imp_handle.cookie = req->rq_repmsg->cookie;
+ CERROR("reconnected to %s@%s ("LPX64"/"LPX64", was "LPX64"/"
+ LPX64")!\n", cli->cl_target_uuid.uuid, conn->c_remote_uuid.uuid,
+ imp->imp_handle.addr, imp->imp_handle.cookie,
+ old_hdl.addr, old_hdl.cookie);
+ GOTO(out_disc, rc = 0);
+
out_disc:
- *reqptr = request;
+ *reqptr = req;
return rc;
}
ENTRY;
argv[0] = obd_recovery_upcall;
- argv[1] = conn->c_remote_uuid;
+ argv[1] = conn->c_remote_uuid.uuid;
argv[2] = NULL;
envp[0] = "HOME=/";
ptlrpc_free_committed(imp);
CDEBUG(D_HA, "import %p from %s has committed "LPD64"\n",
- imp, imp->imp_obd->u.cli.cl_target_uuid, committed);
+ imp, imp->imp_obd->u.cli.cl_target_uuid.uuid, committed);
list_for_each(tmp, &imp->imp_replay_list) {
req = list_entry(tmp, struct ptlrpc_request, rq_list);
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
#include <linux/init.h>
#include <linux/lprocfs_status.h>
-
-
extern int ptlrpc_init_portals(void);
extern void ptlrpc_exit_portals(void);
-extern struct lprocfs_vars status_var_nm_1[];
-extern struct lprocfs_vars status_class_var[];
int connmgr_setup(struct obd_device *obddev, obd_count len, void *buf)
{
c_recovd_data.rd_managed_chain);
LASSERT(conn->c_recovd_data.rd_recovd == recovd); /* sanity */
-
- if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1))
+#warning check buffer overflow in next line
+ if (!strcmp(conn->c_remote_uuid.uuid, data->ioc_inlbuf1))
break;
conn = NULL;
}
LASSERT(conn->c_recovd_data.rd_recovd == recovd);
- if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1))
+#warning check buffer overflow in next line
+ if (!strcmp(conn->c_remote_uuid.uuid, data->ioc_inlbuf1))
break;
conn = NULL;
}
spin_unlock(&recovd->recovd_lock);
recovd_conn_fail(conn);
spin_lock(&recovd->recovd_lock);
-
- /* Jump straight to the "failed" phase of recovery. */
- conn->c_recovd_data.rd_phase = RD_FAILED;
goto out;
}
if (data->ioc_inllen2) {
CERROR("conn %p UUID change %s -> %s\n",
- conn, conn->c_remote_uuid, data->ioc_inlbuf2);
- strcpy(conn->c_remote_uuid, data->ioc_inlbuf2);
+ conn, conn->c_remote_uuid.uuid, data->ioc_inlbuf2);
+ obd_str2uuid(&conn->c_remote_uuid, data->ioc_inlbuf2);
} else {
CERROR("conn %p UUID %s reconnected\n", conn,
- conn->c_remote_uuid);
+ conn->c_remote_uuid.uuid);
}
- ptlrpc_readdress_connection(conn, conn->c_remote_uuid);
+ ptlrpc_readdress_connection(conn, &conn->c_remote_uuid);
spin_unlock(&conn->c_lock);
conn->c_recovd_data.rd_phase = RD_PREPARED;
}
static int connmgr_connect(struct lustre_handle *conn, struct obd_device *src,
- obd_uuid_t cluuid, struct recovd_obd *recovd,
+ struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
return class_connect(conn, src, cluuid);
int connmgr_attach(struct obd_device *dev, obd_count len, void *data)
{
- return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_init_vars(&lvars);
+ return lprocfs_obd_attach(dev, lvars.obd_vars);
}
int conmgr_detach(struct obd_device *dev)
{
- return lprocfs_dereg_obd(dev);
+ return lprocfs_obd_detach(dev);
}
/* use obd ops to offer management infrastructure */
static int __init ptlrpc_init(void)
{
+ struct lprocfs_static_vars lvars;
int rc;
+ ENTRY;
+
rc = ptlrpc_init_portals();
if (rc)
RETURN(rc);
ptlrpc_init_connection();
- rc = class_register_type(&recovd_obd_ops, status_class_var,
+
+ lprocfs_init_vars(&lvars);
+ rc = class_register_type(&recovd_obd_ops, lvars.module_vars,
LUSTRE_HA_NAME);
if (rc)
RETURN(rc);
ptlrpc_put_connection_superhack = ptlrpc_put_connection;
- return 0;
+ ptlrpc_abort_inflight_superhack = ptlrpc_abort_inflight;
+ RETURN(0);
}
static void __exit ptlrpc_exit(void)
EXPORT_SYMBOL(ptlrpc_cleanup_connection);
/* niobuf.c */
-EXPORT_SYMBOL(ptlrpc_send_bulk);
-EXPORT_SYMBOL(ptlrpc_register_bulk);
+EXPORT_SYMBOL(ptlrpc_bulk_put);
+EXPORT_SYMBOL(ptlrpc_bulk_get);
+EXPORT_SYMBOL(ptlrpc_register_bulk_put);
+EXPORT_SYMBOL(ptlrpc_register_bulk_get);
EXPORT_SYMBOL(ptlrpc_abort_bulk);
EXPORT_SYMBOL(ptlrpc_reply);
EXPORT_SYMBOL(ptlrpc_error);
EXPORT_SYMBOL(ptlrpc_prep_req);
EXPORT_SYMBOL(ptlrpc_free_req);
EXPORT_SYMBOL(ptlrpc_req_finished);
+EXPORT_SYMBOL(ptlrpc_request_addref);
EXPORT_SYMBOL(ptlrpc_prep_bulk);
EXPORT_SYMBOL(ptlrpc_free_bulk);
EXPORT_SYMBOL(ptlrpc_prep_bulk_page);
EXPORT_SYMBOL(ptlrpc_free_bulk_page);
EXPORT_SYMBOL(ll_brw_sync_wait);
EXPORT_SYMBOL(ptlrpc_abort_inflight);
+EXPORT_SYMBOL(ptlrpc_retain_replayable_request);
/* service.c */
EXPORT_SYMBOL(ptlrpc_init_svc);
EXPORT_SYMBOL(ptlrpc_resend);
EXPORT_SYMBOL(ptlrpc_wake_delayed);
-MODULE_AUTHOR("Cluster File Systems, Inc <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Request Processor v1.0");
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Request Processor");
MODULE_LICENSE("GPL");
module_init(ptlrpc_init);
ptlrpc_init_svc(__u32 nevents, __u32 nbufs,
__u32 bufsize, __u32 max_req_size,
int req_portal, int rep_portal,
- obd_uuid_t uuid, svc_handler_t handler, char *name)
+ struct obd_uuid *uuid, svc_handler_t handler, char *name)
{
int err;
int rc, i;
service->srv_req_portal = req_portal;
service->srv_handler = handler;
- err = kportal_uuid_to_peer(uuid, &service->srv_self);
+ err = kportal_uuid_to_peer(uuid->uuid, &service->srv_self);
if (err) {
- CERROR("%s: cannot get peer for uuid '%s'\n", name, uuid);
+ CERROR("%s: cannot get peer for uuid '%s'\n", name,
+ uuid->uuid);
OBD_FREE(service, sizeof(*service));
RETURN(NULL);
}
if (request->rq_reqlen < sizeof(struct lustre_msg)) {
CERROR("incomplete request (%d): ptl %d from "LPX64" xid "
- LPD64"\n",
+ LPU64"\n",
request->rq_reqlen, svc->srv_req_portal,
event->initiator.nid, request->rq_xid);
goto out;
}
- CDEBUG(D_RPCTRACE, "Handling RPC pid:xid:nid:opc %d:"LPX64":"LPX64":%d\n",
+ CDEBUG(D_RPCTRACE, "Handling RPC pid:xid:nid:opc %d:"LPU64":"LPX64":%d\n",
NTOH__u32(request->rq_reqmsg->status),
request->rq_xid,
event->initiator.nid,
%define linuxdir @LINUX@
%define portalsdir @PORTALS@
%define portalslibdir @PORTALSLIB@
-Release: 0208282230chaos
+Release: 0301070810ltutor3
Summary: Lustre Lite File System
Name: lustre-lite
%description -n lustre-doc
Documentation and sample configuration files for Lustre
+%package -n lustre-ldap
+Summary: Configures openldap server for LDAP Lustre config database
+Group: Configuration
+Requires: openldap-servers, openldap-clients, python-ldap, 4Suite
+
+%description -n lustre-ldap
+Configures openldap server for LDAP Lustre config database
+
%prep
%setup -qn lustre-%{version}
ln -s $RPM_BUILD_ROOT/usr/src lustre-source
make distdir distdir=lustre-source/lustre-%{version}
+# ldap database directory
+mkdir -p $RPM_BUILD_ROOT/var/lib/ldap/lustre
+
%files
%attr(-, root, root) /usr/sbin/lmc
%attr(-, root, root) /usr/sbin/lctl
%attr(-, root, root) /usr/sbin/lconf
+%attr(-, root, root) /usr/sbin/llanalyze
+%attr(-, root, root) /usr/sbin/lfind
+%attr(-, root, root) /usr/sbin/lstripe
+%attr(-, root, root) /usr/sbin/mcreate
%attr(-, root, root) /usr/lib/lustre/examples/llmount.sh
%attr(-, root, root) /usr/lib/lustre/examples/llmountcleanup.sh
%attr(-, root, root) /usr/lib/lustre/examples/llecho.sh
%files -n lustre-source
%attr(-, root, root) /usr/src/lustre-%{version}
+%files -n lustre-ldap
+%attr(-, root, root) /etc/openldap/slapd-lustre.conf
+%attr(-, root, root) /etc/openldap/schema/lustre.schema
+%attr(-, root, root) /usr/lib/lustre/lustre2ldif.xsl
+%attr(-, root, root) /usr/lib/lustre/top.ldif
+%dir /var/lib/ldap/lustre
+%attr(700, ldap, ldap) /var/lib/ldap/lustre
+
%post
if [ ! -e /dev/obd ]; then
mknod /dev/obd c 10 241
%postun
depmod -ae || exit 0
+%post -n lustre-ldap
+if ! grep -q slapd-lustre /etc/openldap/slapd.conf; then
+ echo "include /etc/openldap/slapd-lustre.conf" >> /etc/openldap/slapd.conf
+fi
+
+%postun -n lustre-ldap
+slapd=/etc/openldap/slapd.conf
+if grep -q slapd-lustre $slapd; then
+ tmp=/tmp/lustre-ldap.$$
+ sed "/slapd-lustre/d" $slapd >> $tmp
+ cp $tmp $slapd
+ rm $tmp
+fi
+
%clean
#rm -rf $RPM_BUILD_ROOT
multifstat
checkstat
wantedi
+createtest
+open_delay
ostreq.sh runfailure-client-mds-recover.sh runfailure-mds \
runfailure-net runfailure-ost runiozone runregression-net.sh \
runtests runvmstat snaprun.sh tbox.sh common.sh
-noinst_PROGRAMS = openunlink testreq truncate directio openme writeme mcreate
+noinst_PROGRAMS = openunlink testreq truncate directio openme writeme open_delay
noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy
-noinst_PROGRAMS += stat createmany statmany mkdirmany multifstat
+noinst_PROGRAMS += stat createmany statmany mkdirmany multifstat createtest
# noinst_PROGRAMS += ldaptest
noinst_PROGRAMS += checkstat wantedi
+sbin_PROGRAMS = mcreate
# ldaptest_SOURCES = ldaptest.c
tchmod_SOURCES = tchmod.c
multifstat_SOURCES = multifstat.c
checkstat_SOURCES = checkstat.c
wantedi_SOURCES = wantedi.c
+createtest_SOURCES = createtest.c
+open_delay_SOURCES = open_delay.c
include $(top_srcdir)/Rules
--- /dev/null
+#!/bin/sh
+set -e
+
+#
+# Runs create.pl and rename.pl on a single mountpoint with increasing
+# load, varying debug levels
+#
+
+SRCDIR="`dirname $0`/"
+. $SRCDIR/common.sh
+
+MNT=${MNT:-/mnt/lustre}
+
+debug_client_on
+echo "create.pl, 1 mount, 1 thread, 10 ops, debug on"
+perl create.pl -- $MNT -1 10
+echo "create.pl, 1 mount, 1 thread, 100 ops, debug on"
+perl create.pl --silent -- $MNT -1 100
+echo "create.pl --mcreate=0, 1 mount, 1 thread, 10 ops, debug on"
+perl create.pl --mcreate=0 -- $MNT -1 10
+echo "create.pl --mcreate=0, 1 mount, 1 thread, 100 ops, debug on"
+perl create.pl --mcreate=0 --silent -- $MNT -1 100
+echo "rename.pl, 1 mount, 1 thread, 10 ops, debug on"
+perl rename.pl $MNT 10
+echo "rename.pl, 1 mount, 1 thread, 100 ops, debug on"
+perl rename.pl --silent $MNT 100
+
+debug_client_off
+echo "create.pl, 1 mount, 1 thread, 1000 ops, debug off"
+perl create.pl --silent -- $MNT -1 1000
+echo "create.pl --mcreate=0, 1 mount, 1 thread, 1000 ops, debug off"
+perl create.pl --silent --mcreate=0 -- $MNT -1 1000
+echo "rename.pl, 1 mount, 1 thread, 1000 ops, debug off"
+perl rename.pl --silent $MNT 1000
+
+debug_client_on
+echo "create.pl, 1 mount, 2 threads, 100 ops, debug on"
+perl create.pl --silent -- $MNT -1 100 &
+perl create.pl --silent -- $MNT -1 100 &
+wait
+echo "create.pl --mcreate=0, 1 mount, 2 threads, 100 ops, debug on"
+perl create.pl --silent --mcreate=0 -- $MNT -1 100 &
+perl create.pl --silent --mcreate=0 -- $MNT -1 100 &
+wait
+echo "rename.pl, 1 mount, 2 thread, 1000 ops, debug on"
+perl rename.pl --silent $MNT 1000 &
+perl rename.pl --silent $MNT 1000 &
+wait
+
+debug_client_off
+echo "create.pl, 1 mount, 2 threads, 2000 ops, debug off"
+perl create.pl --silent -- $MNT -1 2000 &
+perl create.pl --silent -- $MNT -1 2000 &
+wait
+echo "create.pl --mcreate=0, 1 mount, 2 threads, 2000 ops, debug off"
+perl create.pl --silent --mcreate=0 -- $MNT -1 2000 &
+perl create.pl --silent --mcreate=0 -- $MNT -1 2000 &
+wait
+echo "rename.pl, 1 mount, 2 threads, 2000 ops, debug off"
+perl rename.pl --silent $MNT 2000 &
+perl rename.pl --silent $MNT 2000 &
+wait
+
+debug_client_on
+echo "create.pl, 1 mount, 4 threads, 100 ops, debug on"
+for i in `seq 1 4`; do
+ perl create.pl --silent -- $MNT -1 100 &
+done
+wait
+echo "create.pl --mcreate=0, 1 mount, 4 threads, 100 ops, debug on"
+for i in `seq 1 4`; do
+ perl create.pl --silent --mcreate=0 -- $MNT -1 100 &
+done
+wait
+echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug on"
+for i in `seq 1 4`; do
+ perl rename.pl --silent $MNT 2000 &
+done
+wait
+
+debug_client_off
+echo "create.pl, 1 mount, 4 threads, 2000 ops, debug off"
+for i in `seq 1 4`; do
+ perl create.pl --silent -- $MNT -1 2000 &
+done
+wait
+echo "create.pl --mcreate=0, 1 mount, 4 threads, 2000 ops, debug off"
+for i in `seq 1 4`; do
+ perl create.pl --silent --mcreate=0 -- $MNT -1 2000 &
+done
+wait
+echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug off"
+for i in `seq 1 4`; do
+ perl rename.pl --silent $MNT 2000 &
+done
+wait
+
+debug_client_on
+echo "create.pl, 1 mount, 8 threads, 500 ops, debug on"
+for i in `seq 1 8`; do
+ perl create.pl --silent -- $MNT -1 500 &
+done
+wait
+echo "create.pl --mcreate=0, 1 mount, 8 threads, 500 ops, debug on"
+for i in `seq 1 8`; do
+ perl create.pl --silent --mcreate=0 -- $MNT -1 500 &
+done
+wait
+echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug on"
+for i in `seq 1 8`; do
+ perl rename.pl --silent $MNT 2000 &
+done
+wait
+
+debug_client_off
+echo "create.pl, 1 mount, 8 threads, 2000 ops, debug off"
+for i in `seq 1 8`; do
+ perl create.pl --silent -- $MNT -1 2000 &
+done
+wait
+echo "create.pl --mcreate=0, 1 mount, 8 threads, 2000 ops, debug off"
+for i in `seq 1 8`; do
+ perl create.pl --silent --mcreate=0 -- $MNT -1 2000 &
+done
+wait
+echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug off"
+for i in `seq 1 8`; do
+ perl rename.pl --silent $MNT 2000 &
+done
+wait
fi
[ "$SANITY" != "no" ] && sh sanity.sh
+ [ "$SANITY" != "no" ] && START=" " CLEAN=" " sh sanity.sh
if [ "$DBENCH" != "no" ]; then
mount | grep $MNT || sh llmount.sh
fi
mount | grep $MNT && sh llmountcleanup.sh
done
+
+[ "$SANITYN" != "no" ] && NAME=mount2 sh sanityN.sh
+
${LMC} --add ost --node $OST --obd obd1 --obdtype=obdecho -obduuid $OBD_UUID
# osc on client
-${LMC} --add oscref --node $CLIENT --echo_client obd1
+${LMC} --add echo_client --node $CLIENT --obd obd1
$LMC_REAL --batch $BATCH
rm -f $BATCH
--- /dev/null
+#!/bin/bash
+
+ mkdir /mnt/lustre/d22
+ mkdir /mnt/lustre/d22/etc
+ ./mcreate /mnt/lustre/d22/etc/foo
+ ls -ld /mnt/lustre/etc
+ ls -ld /mnt/lustre/d22/etc
#!/usr/bin/perl
use Getopt::Long;
-GetOptions("silent!"=> \$silent);
+my $silent = 0;
+my $mcreate = 1; # should we use mcreate or open?
+my $files = 5;
+
+GetOptions("silent!" => \$silent,
+ "mcreate=i" => \$mcreate,
+ "files=i" => \$files);
my $mtpt = shift || usage();
my $mount_count = shift || usage();
my $i = shift || usage();
-my $files = 5;
-my $mcreate = 0; # should we use mcreate or open?
+my $count = $i;
sub usage () {
- print "Usage: $0 <mount point prefix> <mount count> <iterations>\n";
+ print "Usage: $0 [--silent] [--mcreate=n] [--files=n] <mnt prefix> <mnt count> <iterations>\n";
print "example: $0 /mnt/lustre 2 50\n";
print " will test in /mnt/lustre1 and /mnt/lustre2\n";
print " $0 /mnt/lustre -1 50\n";
} else {
print "Unlink done [$$] $path: $!\n"if !$silent;
}
+ if (($count - $i) % 100 == 0) {
+ print STDERR ($count - $i) . " operations [" . $$ . "]\n";
+ }
}
+
+my $which = "";
+if ($mount_count > 0) {
+ $which = int(rand() * $mount_count) + 1;
+}
+for ($d = 0; $d < $files; $d++) {
+ unlink("$mtpt$which/$d");
+}
+
print "Done.\n";
#include <unistd.h>
#include <stdlib.h>
+void usage(char *prog)
+{
+ printf("usage: %s {-o|-m} filenamefmt count\n", prog);
+ printf(" %s {-o|-m} filenamefmt -seconds\n", prog);
+ printf(" %s {-o|-m} filenamefmt start count\n", prog);
+}
+
int main(int argc, char ** argv)
{
int i, rc = 0, do_open;
+ char format[4096], *fmt;
char filename[4096];
- long int start, last, end, count;
+ long start, last, end;
+ long begin = 0, count;
- if (argc != 4) {
- printf("Usage %s <-o|-m> filenamebase <count|-time>\n",
- argv[0]);
+ if (argc < 4 || argc > 5) {
+ usage(argv[0]);
return 1;
}
} else if (strcmp(argv[1], "-m") == 0) {
do_open = 0;
} else {
- printf("Usage %s {-o|-m} filenamebase <count|-time>\n",
- argv[0]);
+ usage(argv[0]);
return 1;
}
start = last = time(0);
- end = strtol(argv[3], NULL, 0);
-
- if (end > 0) {
- count = end;
- end = -1UL >> 1;
- } else {
- end = start - end;
- count = -1UL >> 1;
- }
+ if (argc == 4) {
+ end = strtol(argv[3], NULL, 0);
+ if (end > 0) {
+ count = end;
+ end = -1UL >> 1;
+ } else {
+ end = start - end;
+ count = -1UL >> 1;
+ }
+ } else {
+ end = -1UL >> 1;
+ begin = strtol(argv[3], NULL, 0);
+ count = strtol(argv[4], NULL, 0);
+ }
- for (i = 0; i < count && time(0) < end; i++) {
- sprintf(filename, "%s%d", argv[2], i);
+ if (strchr(argv[2], '%'))
+ fmt = argv[2];
+ else {
+ sprintf(format, "%s%%d", argv[2]);
+ fmt = format;
+ }
+ for (i = 0; i < count && time(0) < end; i++, begin++) {
+ sprintf(filename, fmt, begin);
if (do_open) {
int fd = open(filename, O_CREAT|O_RDWR, 0644);
if (fd < 0) {
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+#ifndef S_SHIFT
+#define S_SHIFT 12
+#endif
+
+int usage(char *prog)
+{
+ fprintf(stderr, "usage: %s <basename>\n", prog);
+ exit(1);
+}
+
+int main(int argc, char *argv[])
+{
+ char name[4096];
+ int i;
+
+ if (argc != 2)
+ usage(argv[0]);
+
+ umask(0);
+ for (i = 0; i <= S_IFMT; i += (1 << S_SHIFT)) {
+ struct stat st;
+ int mode = i | 0644;
+ int rc;
+
+ sprintf(name, "%s-mknod%06o", argv[1], mode);
+ rc = mknod(name, mode, 0x1234);
+ switch (i) {
+ case 0:
+ mode |= S_IFREG;
+ case S_IFREG:
+ case S_IFCHR: case S_IFBLK:
+ if (rc < 0 && getuid() != 0)
+ continue;
+ case S_IFSOCK: case S_IFIFO:
+ if (rc < 0) {
+ fprintf(stderr, "%s: ERROR mknod %s: %s\n",
+ argv[0], name, strerror(errno));
+ exit(10);
+ }
+ rc = stat(name, &st);
+ if (rc < 0) {
+ fprintf(stderr, "%s: ERROR stat %s: %s",
+ argv[0], name, strerror(errno));
+ exit(11);
+ }
+ if (st.st_mode != mode) {
+ fprintf(stderr, "%s: ERROR mode %s: %o != %o",
+ argv[0], name, st.st_mode, mode);
+ exit(12);
+ }
+ rc = unlink(name);
+ if (rc < 0) {
+ fprintf(stderr, "%s: ERROR unlink %s: %s",
+ argv[0], name, strerror(errno));
+ exit(13);
+ }
+ break;
+ default:
+ if (rc == 0) {
+ fprintf(stderr, "%s: ERROR: %s created\n",
+ argv[0], name);
+ exit(14);
+ }
+ }
+ }
+
+ for (i = 0; i <= S_IFMT; i += (1 << S_SHIFT)) {
+ struct stat st;
+ int mode;
+ int fd;
+ int rc;
+
+ mode = i | 0644;
+ sprintf(name, "%s-creat%06o", argv[1], mode);
+ fd = open(name, O_CREAT|O_RDONLY, mode);
+ if (fd < 0) {
+ fprintf(stderr, "%s: ERROR creat %s: %s\n",
+ argv[0], name, strerror(errno));
+ exit(21);
+ }
+ close(fd);
+ rc = stat(name, &st);
+ if (rc < 0) {
+ fprintf(stderr, "%s: ERROR stat %s: %s",
+ argv[0], name, strerror(errno));
+ exit(11);
+ }
+ if ((st.st_mode & S_IFMT) != S_IFREG) {
+ fprintf(stderr, "%s: ERROR mode %s: %o != %o",
+ argv[0], name, st.st_mode & S_IFMT, S_IFREG);
+ exit(12);
+ }
+ rc = unlink(name);
+ if (rc < 0) {
+ fprintf(stderr, "%s: ERROR unlink %s: %s\n",
+ argv[0], name, strerror(errno));
+ exit(20);
+ }
+ }
+
+ for (i = 0; i <= S_IFMT; i += (1 << S_SHIFT)) {
+ struct stat st;
+ int rc;
+
+ sprintf(name, "%s-mkdir%06o", argv[1], i | 0644);
+ rc = mkdir(name, i | 0664);
+ if (rc < 0) {
+ fprintf(stderr, "%s: ERROR mkdir %s: %s\n",
+ argv[0], name, strerror(errno));
+ exit(30);
+ }
+ rc = stat(name, &st);
+ if (rc < 0) {
+ fprintf(stderr, "%s: ERROR stat %s: %s",
+ argv[0], name, strerror(errno));
+ exit(11);
+ }
+ if ((st.st_mode & S_IFMT) != S_IFDIR) {
+ fprintf(stderr, "%s: ERROR mode %s: %o != %o",
+ argv[0], name, st.st_mode & S_IFMT, S_IFDIR);
+ exit(12);
+ }
+ rc = rmdir(name);
+ if (rc < 0) {
+ fprintf(stderr, "%s: ERROR rmdir %s: %s\n",
+ argv[0], name, strerror(errno));
+ exit(31);
+ }
+ }
+
+ printf("%s: SUCCESS\n", argv[0]);
+ return 0;
+}
#!/bin/bash
-config=${1:-$(basename $0 .sh).xml}
+LOV=${LOV:-0}
+while [ "$1" ]; do
+ case $1 in
+ --lov) LOV="1" ;;
+ *) [ -z $config ] && config=$1 || OPTS="$OPTS $1" ;;
+ esac
+ shift
+done
+
+config=${config:-$(basename $0 .sh).xml}
LMC=${LMC:-../utils/lmc -m $config}
+TMP=${TMP:-/tmp}
-SERVER=localhost
-CLIENT=localhost
+SERVER=${SERVER:-localhost}
+CLIENT=${CLIENT:-localhost}
+NET=${NET:-tcp}
# FIXME: make LMC not require MDS for obdecho LOV
-MDSDEV=$TMP/mds1
+MDSDEV=${MDSDEV:-$TMP/mds1}
MDSSIZE=10000
STRIPE_BYTES=65536
STRIPES_PER_OBJ=2 # 0 means stripe over all OSTs
-LOV=0
-while [ "$1" ]; do
- case $1 in
- --lov) LOV="1" ;;
- *) OPTS="$OPTS $1" ;;
- esac
- shift
-done
-
rm -f $config
# create nodes
$LMC --add node --node $SERVER || exit 1
-$LMC --add net --node $SERVER --nid $SERVER --nettype tcp || exit 2
+$LMC --add net --node $SERVER --nid $SERVER --nettype $NET || exit 2
if (($LOV)); then
$LMC --add mds --node $SERVER --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10
$LMC --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 11
- $LMC --add ost --node $SERVER --lov lov1 --obdtype=obdecho || exit 12
- $LMC --add ost --node $SERVER --lov lov1 --obdtype=obdecho || exit 13
+ $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 12
+ $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 13
OBD_NAME=lov1
else
- $LMC --add ost --obd obd1 --node $SERVER --obdtype=obdecho || exit 2
+ $LMC --add ost --ost obd1 --node $SERVER --osdtype=obdecho || exit 12
OBD_NAME=obd1
fi
if [ "$SERVER" != "$CLIENT" ]; then
$LMC --add node --node $CLIENT || exit 1
- $LMC --add net --node $CLIENT --nid $CLIENT --nettype tcp || exit 2
+ $LMC --add net --node $CLIENT --nid $CLIENT --nettype $NET || exit 2
fi
-$LMC --add echo_client --node $CLIENT --obd ${OBD_NAME} || exit 3
+$LMC --add echo_client --node $CLIENT --ost ${OBD_NAME} || exit 3
$name = $6;
$size = $7;
$addr = $8;
+
+ # we can't dump the log after portals has exited, so skip "leaks"
+ # from memory freed in the portals module unloading.
+ if ($func eq 'portals_handle_init') {
+ next;
+ }
printf("%8s %6d bytes at %s called %s (%s:%s:%d)\n", $type, $size,
$addr, $name, $file, $func, $lno);
} else {
--- /dev/null
+#!/bin/sh
+TMP=${TMP:-/tmp}
+cat /tmp/ogdb-`hostname` | while read JUNK M JUNK; do
+ MOD="../$M"
+ MAP=`echo $MOD | sed -e 's/\.o$/.map/'`
+ MODNAME=`basename $MOD | sed -e 's/\.o$//'`
+
+ nm $MOD > $MAP
+ echo namelist -a $PWD/$MOD
+ echo symtab -a $PWD/$MAP $MODNAME
+done
config=$NAME.xml
mkconfig=$NAME.sh
+if [ "$PORTALS" ]; then
+ portals_opt="--portals=$PORTALS"
+fi
+
+[ -x $LCONF ] || chmod a+rx $LCONF
+
sh $mkconfig $config || exit 1
-${LCONF} --reformat --gdb $config || exit 2
+${LCONF} $portals_opt --reformat --gdb $config || exit 2
+++ /dev/null
-#!/bin/sh
-# suggested boilerplate for test script
-
-LCONF=${LCONF:-../utils/lconf}
-NAME=${NAME:-local2-hack}
-
-config=$NAME.xml
-
-${LCONF} --reformat --gdb $config || exit 2
-
-../utils/lctl <<EOF
-newdev
-attach osc OSC2_localhost OSC2_localhost_UUID
-setup OBD_localhost_UUID NET_localhost_tcp_UUID
-newdev
-attach mdc MDC2_mds1 MDC2_uuid
-setup mds1_UUID NET_localhost_tcp_UUID
-quit
-EOF
-
-mount -t lustre_lite -o osc=OSC2_localhost_UUID,mdc=MDC2_uuid none /mnt/lustre2
+++ /dev/null
-#!/bin/sh
-# suggested boilerplate for test script
-
-LCONF=${LCONF:-../utils/lconf}
-NAME=${NAME:-local2-hack}
-
-config=$NAME.xml
-
-umount /mnt/lustre1
-umount /mnt/lustre2
-../utils/lctl <<EOF
-name2dev OSC2_localhost
-cleanup
-detach
-name2dev MDC2_mds1
-cleanup
-detach
-quit
-EOF
-
-${LCONF} --cleanup $config
config=$NAME.xml
mkconfig=$NAME.sh
+if [ "$PORTALS" ]; then
+ portals_opt="--portals=$PORTALS"
+fi
+
if [ ! -f $config ]; then
sh $mkconfig $config || exit 1
fi
sync; sleep 2; sync
-${LCONF} --cleanup --dump $TMP/debug $config
+${LCONF} $portals_opt --cleanup --dump $TMP/debug $config
+rc=$?
BUSY=`dmesg | grep -i destruct`
if [ "$BUSY" ]; then
echo "$BUSY" 1>&2
mv $TMP/debug $TMP/debug-busy.`date +%s`
- exit -1
+ exit 255
fi
LEAK_LUSTRE=`dmesg | tail -20 | grep -v "leaked: 0" | grep leaked`
LEAK_PORTALS=`dmesg | tail -20 | grep "Portals memory leaked"`
echo "$LEAK_LUSTRE" 1>&2
echo "$LEAK_PORTALS" 1>&2
mv $TMP/debug $TMP/debug-leak.`date +%s`
- exit -2
+ exit 254
fi
+
+exit $rc
+++ /dev/null
-#!/bin/sh
-
-umount /mnt/lustre2
-umount /mnt/lustre1
-../utils/lctl <<EOF
-name2dev OSC2_localhost
-cleanup
-detach
-name2dev MDC2_mds1
-cleanup
-detach
-quit
-EOF
-
-LCONF=${LCONF:-../utils/lconf}
-NAME=${NAME:-local}
-
-config=$NAME.xml
-mkconfig=./$NAME.sh
-
-if [ ! -f $config -o $mkconfig -nt $config ]; then
- sh $mkconfig $config || exit 1
-fi
-
-${LCONF} --cleanup --dump /tmp/debug $config
config=$NAME.xml
mkconfig=$NAME.sh
+if [ "$PORTALS" ]; then
+ portals_opt="--portals=$PORTALS"
+fi
+
if [ ! -f $config -o $mkconfig -nt $config ]; then
sh $mkconfig $config || exit 1
fi
-${LCONF} --gdb $config || exit 2
+${LCONF} $portals_opt --gdb $config || exit 2
${LMC} --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
# configure ost
-${LMC} --add ost --node localhost --obd obd1 --dev $OSTDEV --size $OSTSIZE || exit 30
+${LMC} --add ost --node localhost --ost obd1 --dev $OSTDEV --size $OSTSIZE || exit 30
# create client config
-${LMC} --add mtpt --node localhost --path /mnt/lustre --mds mds1 --obd obd1 || exit 40
+${LMC} --add mtpt --node localhost --path /mnt/lustre --mds mds1 --ost obd1 || exit 40
+++ /dev/null
-<?xml version='1.0' encoding='UTF-8'?>
-<lustre>
- <ldlm name='ldlm' uuid='ldlm_UUID'/>
- <node name='localhost' uuid='localhost_UUID'>
- <profile>
- <ldlm_ref uuidref='ldlm_UUID'/>
- <network_ref uuidref='NET_localhost_tcp_UUID'/>
- <mds_ref uuidref='mds1_UUID'/>
- <obd_ref uuidref='OBD_localhost_UUID'/>
- <ost_ref uuidref='OST_localhost_UUID'/>
- <mountpoint_ref uuidref='MNT_localhost_UUID'/>
- </profile>
- <network type='tcp' name='NET_localhost_tcp' uuid='NET_localhost_tcp_UUID'>
- <server>localhost</server>
- <port>988</port>
- </network>
- </node>
- <mds name='mds1' uuid='mds1_UUID'>
- <fstype>extN</fstype>
- <device size='50000'>/tmp/mds1</device>
- <autoformat>yes</autoformat>
- <network_ref uuidref='NET_localhost_tcp_UUID'/>
- <node_ref uuidref='localhost_UUID'/>
- </mds>
- <obd type='obdfilter' name='OBD_localhost' uuid='OBD_localhost_UUID'>
- <fstype>extN</fstype>
- <device size='200000'>/tmp/ost1</device>
- <autoformat>yes</autoformat>
- </obd>
- <osc name='OSC_localhost' uuid='OSC_localhost_UUID'>
- <ost_ref uuidref='OST_localhost_UUID'/>
- <obd_ref uuidref='OBD_localhost_UUID'/>
- </osc>
- <ost name='OST_localhost' uuid='OST_localhost_UUID'>
- <network_ref uuidref='NET_localhost_tcp_UUID'/>
- <obd_ref uuidref='OBD_localhost_UUID'/>
- </ost>
- <mountpoint name='MNT_localhost' uuid='MNT_localhost_UUID'>
- <mds_ref uuidref='mds1_UUID'/>
- <osc_ref uuidref='OSC_localhost_UUID'/>
- <path>/mnt/lustre1</path>
- </mountpoint>
-</lustre>
for (i=0 ; i < count ; i++) {
sprintf(dirname, "%s-%d", argv[1], i);
- rc = mkdir(dirname, S_IFREG| 0444);
+ rc = mkdir(dirname, 0444);
if (rc) {
printf("mkdir(%s) error: %s\n",
dirname, strerror(errno));
config=${1:-mount2.xml}
-LMC=${LMC:-../utils/lmc}
+LMC="${LMC:-../utils/lmc} -m $config"
TMP=${TMP:-/tmp}
-MDSDEV=$TMP/mds1
-MDSSIZE=50000
+MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSSIZE=${MDSSIZE:-50000}
-OSTDEV=$TMP/ost1
-OSTSIZE=100000
+OSTDEV=${OSTDEV:-$TMP/ost1}
+OSTSIZE=${OSTSIZE:-200000}
kver=`uname -r | cut -d "." -f 1,2`
;;
esac
+
+rm -f $config
+
# create nodes
-${LMC} -o $config --add net --node localhost --nid localhost --nettype tcp || exit 1
+${LMC} --add node --node localhost || exit 10
+${LMC} --add net --node localhost --nid localhost --nettype tcp || exit 11
# configure mds server
-${LMC} -m $config --add mds --format --node localhost $FSTYPE --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 2
+${LMC} --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
# configure ost
-${LMC} -m $config --add ost --format --obd obd1 --node localhost $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 3
+${LMC} --add ost --node localhost --obd obd1 --dev $OSTDEV --size $OSTSIZE || exit 30
# create client config
-${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --obd obd1 || exit 4
-${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre2 --mds mds1 --obd obd1 || exit 4
+${LMC} --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --obd obd1 || exit 40
+${LMC} --add mtpt --node localhost --path /mnt/lustre2 --mds mds1 --obd obd1 || exit 40
--- /dev/null
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_lite.h>
+#include <linux/obd_lov.h>
+
+int main(int argc, char **argv)
+{
+ int fd;
+
+ if (argc != 2) {
+ printf("Usage %s <filename>\n", argv[0]);
+ exit(1);
+ }
+
+ fd = open(argv[1], O_RDONLY | O_LOV_DELAY_CREATE);
+ if (fd == -1) {
+ printf("Error opening %s\n", argv[1]);
+ exit(1);
+ }
+
+ return 0;
+}
int main(int argc, char **argv)
{
+ char *fname, *fname2;
int fd, rc;
- if (argc != 2) {
- fprintf(stderr, "usage: %s filename\n", argv[0]);
+ if (argc < 2 || argc > 3) {
+ fprintf(stderr, "usage: %s filename [filename2]\n", argv[0]);
exit(1);
- } else {
- fprintf(stderr, "congratulations - program starting\n");
}
+ fname = argv[1];
+ if (argc == 3)
+ fname2 = argv[2];
+ else
+ fname2 = argv[1];
+
fprintf(stderr, "opening\n");
- fd = open(argv[1], O_RDWR | O_TRUNC | O_CREAT, 0644);
+ fd = open(fname, O_RDWR | O_TRUNC | O_CREAT, 0644);
if (fd == -1) {
fprintf(stderr, "open (normal) %s\n", strerror(errno));
exit(1);
exit(1);
}
- fprintf(stderr, "closing\n");
- rc = close(fd);
- if (rc) {
- fprintf(stderr, "close (normal) %s\n", strerror(errno));
- exit(1);
- }
-
- fprintf(stderr, "opening again\n");
- fd = open(argv[1], O_RDWR);
- if (fd == -1) {
- fprintf(stderr, "open (unlink) %s\n", strerror(errno));
- exit(1);
- }
-
-#if 0
- fprintf(stderr, "unlinking\n");
- rc = unlink(argv[1]);
- if (rc) {
- fprintf(stderr, "unlink %s\n", strerror(errno));
- exit(1);
- }
-#else
- printf("unlink %s and press enter\n", argv[1]);
- getc(stdin);
-#endif
+ if (argc == 3) {
+ fprintf(stderr, "closing %s\n", fname);
+ rc = close(fd);
+ if (rc) {
+ fprintf(stderr, "close (normal) %s\n", strerror(errno));
+ exit(1);
+ }
+
+ fprintf(stderr, "opening %s\n", fname2);
+ fd = open(fname2, O_RDWR);
+ if (fd == -1) {
+ fprintf(stderr, "open (unlink) %s\n", strerror(errno));
+ exit(1);
+ }
+
+ fprintf (stderr, "unlinking %s\n", fname2);
+ rc = unlink(fname2);
+ if (rc) {
+ fprintf(stderr, "unlink %s\n", strerror(errno));
+ exit(1);
+ }
+
+ if (access(fname2, F_OK) == 0) {
+ fprintf(stderr, "%s still exists\n", fname2);
+ exit(1);
+ }
+ } else {
+ printf("unlink %s and press enter\n", fname);
+ getc(stdin);
+ }
+
+ if (access(fname, F_OK) == 0) {
+ fprintf(stderr, "%s still exists\n", fname);
+ exit(1);
+ }
fprintf(stderr, "reading\n");
rc = read(fd, buf, strlen(T1) + 1);
--- /dev/null
+#!/bin/sh
+
+set -ex
+
+LUSTRE=${LUSTRE:-`dirname $0`/..}
+PATH=$PATH:$LUSTRE/utils:$LUSTRE/tests
+
+. $LUSTRE/../ltest/functional/llite/common/common.sh
+
+PDSH='pdsh -S -w'
+
+# XXX I wish all this stuff was in some default-config.sh somewhere
+MDSNODE=${MDSNODE:-dev2}
+OSTNODE=${OSTNODE:-dev3}
+CLIENT=${CLIENTNODE:-dev4}
+NETWORKTYPE=${NETWORKTYPE:-tcp}
+MOUNTPT=${MOUNTPT:-/mnt/lustre}
+CONFIG=recovery-small.xml
+MDSDEV=/tmp/mds
+OSTDEV=/tmp/ost
+MDSSIZE=100000
+OSTSIZE=100000
+
+do_mds() {
+ $PDSH $MDSNODE "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@"
+}
+
+do_client() {
+ $PDSH $CLIENT "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@"
+}
+
+do_ost() {
+ $PDSH $OSTNODE "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@"
+}
+
+drop_request() {
+ do_mds "echo 0x121 > /proc/sys/lustre/fail_loc"
+ do_client "$1"
+ do_mds "echo 0 > /proc/sys/lustre/fail_loc"
+}
+
+drop_reply() {
+ do_mds "echo 0x120 > /proc/sys/lustre/fail_loc"
+ do_client "$@"
+ do_mds "echo 0 > /proc/sys/lustre/fail_loc"
+}
+
+make_config() {
+ rm -f $CONFIG
+ for NODE in $CLIENT $MDSNODE $OSTNODE; do
+ lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \
+ --nettype $NETWORKTYPE || exit 4
+ done
+ lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV \
+ --size $MDSSIZE || exit 5
+ lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --dev $OSTDEV \
+ --size $OSTSIZE || exit 6
+ lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \
+ --ost ost1 || exit 7
+}
+
+start_mds() {
+ do_mds "lconf $@ $CONFIG"
+}
+
+shutdown_mds() {
+ do_mds "lconf $@ --cleanup $CONFIG"
+}
+
+start_ost() {
+ do_ost "lconf $@ $CONFIG"
+}
+
+shutdown_ost() {
+ do_ost "lconf $@ --cleanup $CONFIG"
+}
+
+mount_client() {
+ do_client "lconf $@ $CONFIG"
+}
+
+unmount_client() {
+ do_client "lconf $@ --cleanup $CONFIG"
+}
+
+setup() {
+ make_config
+ start_mds --reformat
+ start_ost --reformat
+ # XXX we should write our own upcall, when we move this somewhere better.
+ mount_client --timeout=10 \
+ --recovery_upcall=$PWD/../../ltest/functional/llite/09/client-upcall.sh
+}
+
+cleanup() {
+ unmount_client || true
+ shutdown_mds || true
+ shutdown_ost || true
+}
+
+replay() {
+ if [ $# -gt 1 ]; then
+ do_client "$1"
+ shift
+ fi
+ do_mds "sync"
+ do_mds 'echo -e "device \$mds1\\nprobe\\nnotransno\\nreadonly" | lctl'
+ do_client "$1" &
+ shutdown_mds -f
+ start_mds
+ wait
+ do_client "ls $MOUNPT" # trigger failover, if we haven't already
+}
+
+if [ ! -z "$ONLY" ]; then
+ eval "$ONLY"
+ exit $?
+fi
+
+setup
+drop_request "mcreate /mnt/lustre/1"
+drop_reply "mcreate /mnt/lustre/2"
+replay "mcreate /mnt/lustre/3"
+cleanup
--- /dev/null
+#!/usr/bin/perl
+use strict;
+use diagnostics;
+use Getopt::Long;
+
+sub usage () {
+ print "Usage: $0 <mount point prefix> <iterations>\n";
+ print "example: $0 --count=2 /mnt/lustre 50\n";
+ print " will test in /mnt/lustre1 and /mnt/lustre2\n";
+ print " $0 --count=0 /mnt/lustre 50\n";
+ print " will test in /mnt/lustre only\n";
+ exit;
+}
+my ($j, $k, $d, $f1, $f2, $path, $silent);
+my $count = 0;
+my $create = 10;
+
+GetOptions("silent!"=> \$silent,
+ "count=i" => \$count,
+ "create=i" => \$create);
+
+my $mtpt = shift || usage();
+my $i = shift || usage();
+my $total = $i;
+my $files = 6;
+my $dirs = 3;
+my $mcreate = 0; # should we use mcreate or open?
+
+my $which = "";
+if ($count > 0) {
+ $which = int(rand() * $count) + 1;
+}
+
+$k = $dirs;
+if ($create == 0) {
+ $k = 0;
+}
+while ($k--) {
+ $path = "$mtpt$which/$k";
+ my $rc = mkdir $path, 0755;
+ print "mkdir $path failed: $!\n" if !$rc;
+ $j = $files;
+ while ($j--) {
+ `./mcreate $path/$j`;
+ }
+}
+
+while ($i--) {
+ my $which = "";
+ if ($count > 0) {
+ $which = int(rand() * $count) + 1;
+ }
+ $d = int(rand() * $dirs);
+ $f1 = int(rand() * $files);
+ $f2 = int(rand() * $files);
+ print "[$$] $mtpt$which/$d/$f1 $mtpt$which/$d/$f2 ...\n" if !$silent;
+ my $rc = rename "$mtpt$which/$d/$f1", "$mtpt$which/$d/$f2";
+ print "[$$] done: $rc\n" if !$silent;
+ if (($total - $i) % 100 == 0) {
+ print STDERR "[" . $$ . "]" . ($total - $i) . " operations\n";
+ }
+}
+
+$k = $dirs;
+if ($create == 0) {
+ $k = 0;
+}
+while ($k--) {
+ $path = "$mtpt$which/$k";
+ $j = $files;
+ while ($j--) {
+ unlink "$path/$j";
+ }
+ my $rc = rmdir $path;
+ print "rmdir $path failed: $!\n" if !$rc;
+}
+
+print "Done.\n";
[ -z "$VERIFY" ] && VERIFY="-+d"
[ -z "$ODIR" ] && ODIR="-I"
[ -z "$REC" ] && REC=64
-[ -z "$FILE" ] && FILE=/mnt/lustre/test.$$
+[ -z "$FILE" ] && FILE=/mnt/lustre/iozone.$$
[ $1 ] && SIZE=$1
COUNT=0
rm -f endiozone
#!/bin/sh
-export PATH=/sbin:/usr/sbin:$PATH
-
SRCDIR="`dirname $0`/"
-. $SRCDIR/common.sh
+export PATH=/sbin:/usr/sbin:$SRCDIR:$PATH
+LOOPS=${LOOPS:-1}
COUNT=${COUNT:-1000000}
COUNT_10=`expr $COUNT / 10`
COUNT_100=`expr $COUNT / 100`
ENDRUN=endrun-`hostname`
-ECHONAME="`$OBDCTL device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -1`"
+ECHONAME="`lctl device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -1`"
if [ -z "$ECHONAME" ]; then
echo "$0: needs an ECHO_CLIENT set up first" 1>&2
fi
cleanup () {
- $OBDCTL --device \$$ECHONAME destroy $OID
+ lctl --device \$$ECHONAME destroy $OID
}
runthreads() {
;;
esac
- $OBDCTL --threads $THR v \$$ECHONAME $DO $CNT $RW $V $PGS $OID || exit 1
+ lctl --threads $THR v \$$ECHONAME $DO $CNT $RW $V $PGS $OID || exit 1
if [ -e $ENDRUN ]; then
rm $ENDRUN
fi
}
-[ -z "$OID" ] && OID=`$OBDCTL --device \\$$ECHONAME create 1 | awk '/is object id/ { print $6 }'`
+[ -z "$OID" ] && OID=`lctl --device \\$$ECHONAME create 1 | awk '/is object id/ { print $6 }'` && echo "created object $OID"
[ -z "$OID" ] && echo "error creating object" 1>&2 && exit 1
# TODO: obdctl needs to check on the progress of each forked thread
# (IPC SHM, sockets?) to see if it hangs.
-while date; do
+for i in `seq $LOOPS`; do
PG=1
- PGVW=16
- PGVR=16
+ PGVW=${PGVW:-16}
+ PGVR=${PGVR:-16}
# We use '--threads 1 X' instead of '--device X' so that
# obdctl can monitor the forked thread for progress (TODO).
#!/bin/sh
-export PATH=/sbin:/usr/sbin:$PATH
-
SRCDIR="`dirname $0`/"
-. $SRCDIR/common.sh
+export PATH=/sbin:/usr/sbin:$SRCDIR/../utils:$PATH
COUNT=${COUNT:-1000000}
COUNT_10=`expr $COUNT / 10`
ENDRUN=endrun-`hostname`
-ECHONAME="`$OBDCTL device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -1`"
+ECHONAME="`lctl device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -1`"
if [ -z "$ECHONAME" ]; then
echo "$0: needs an ECHO_CLIENT set up first" 1>&2
exit 1
fi
+cleanup () {
+ lctl --device \$$ECHONAME destroy $OID
+}
+
runthreads() {
THR=$1
DO=$2
test_getattr)
RW=
;;
-
test_brw_write)
DO=test_brw
RW=w
;;
-
test_brw_read)
DO=test_brw
RW=r
;;
esac
- $OBDCTL --threads $THR v \$$ECHONAME $DO $CNT $RW $V $PGS $OID || exit 1
+ lctl --threads $THR v \$$ECHONAME $DO $CNT $RW $V $PGS $OID || exit 1
- if [ -e endrun ]; then
- rm endrun
- echo "exiting because endrun file was found"
- exit 0
+ if [ -e $ENDRUN ]; then
+ rm $ENDRUN
+ echo "exiting because $ENDRUN file was found"
+ cleanup
fi
}
-[ -z "$OID" ] && OID=`$OBDCTL --device \\$$ECHONAME create 1 | awk '/is object id/ { print $6 }'`
+[ -z "$OID" ] && OID=`lctl --device \\$$ECHONAME create 1 | awk '/is object id/ { print $6 }'` && echo "created object $OID"
[ -z "$OID" ] && echo "error creating object" 1>&2 && exit 1
# TODO: obdctl needs to check on the progress of each forked thread
;;
test_brw_write)
PG=1
- PGV=16
+ PGV=${PGV:-16}
;;
test_brw_read)
PG=1
- PGV=16
+ PGV=${PGV:-16}
;;
esac
runthreads 1 $CMD 1 1 $PG
runthreads 1 $CMD 100 1 $PG
- debug_server_off
- debug_client_off
+ echo 0 > /proc/sys/portals/debug
runthreads 1 $CMD $COUNT_100 -10 $PG
[ "$PGV" ] && runthreads 1 $CMD $COUNT_1000 -10 $PGV
[ "$PGV" ] && runthreads 100 $CMD $COUNT_1000 -30 $PGV
done
-$OBDCTL --device \$$ECHONAME destroy $OID
+lctl --device \$$ECHONAME destroy $OID
echo "removing $DST"
rm -r $V $DST || fail "can't remove $DST" 37
+# mkdirmany test (bug 589)
+echo "running mkdirmany $OSCMT/base$$ 100"
+./mkdirmany $OSCMT/base$$ 100 || fail "mkdirmany failed"
+echo "removing mkdirmany directories"
+rmdir $OSCMT/base$$* || fail "mkdirmany cleanup failed"
+
NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
if [ $NOWUSED -gt $USED ]; then
echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2
set -e
+SRCDIR=`dirname $0`
+PATH=$SRCDIR:$SRCDIR/../utils:$PATH
+
CHECKSTAT=${CHECKSTAT:-"./checkstat -v"}
+CREATETEST=${CREATETEST:-createtest}
+LFIND=${LFIND:-lfind}
+LSTRIPE=${LSTRIPE:-lstripe}
+MCREATE=${MCREATE:-mcreate}
+TOEXCL=${TOEXCL:-toexcl}
+
MOUNT=${MOUNT:-/mnt/lustre}
+DIR=${DIR:-$MOUNT}
export NAME=$NAME
clean() {
echo -n "cln.."
- sh llmountcleanup.sh > /dev/null
+ sh llmountcleanup.sh > /dev/null || exit 20
}
CLEAN=${CLEAN:-clean}
start() {
echo -n "mnt.."
- sh llrmount.sh > /dev/null
- echo -n "done"
+ sh llrmount.sh > /dev/null || exit 10
+ echo "done"
}
START=${START:-start}
echo PASS
}
-mount | grep $MOUNT || $START
+mount | grep $MOUNT || sh llmount.sh
echo '== touch .../f ; rm .../f ======================== test 0'
-touch $MOUNT/f
-$CHECKSTAT -t file $MOUNT/f || error
-rm $MOUNT/f
-$CHECKSTAT -a $MOUNT/f || error
+touch $DIR/f
+$CHECKSTAT -t file $DIR/f || error
+rm $DIR/f
+$CHECKSTAT -a $DIR/f || error
pass
$CLEAN
$START
echo '== mkdir .../d1; mkdir .../d1/d2 ================= test 1'
-mkdir $MOUNT/d1
-mkdir $MOUNT/d1/d2
-$CHECKSTAT -t dir $MOUNT/d1/d2 || error
+mkdir $DIR/d1
+mkdir $DIR/d1/d2
+$CHECKSTAT -t dir $DIR/d1/d2 || error
pass
$CLEAN
$START
echo '== rmdir .../d1/d2; rmdir .../d1 ================= test 1b'
-rmdir $MOUNT/d1/d2
-rmdir $MOUNT/d1
-$CHECKSTAT -a $MOUNT/d1 || error
+rmdir $DIR/d1/d2
+rmdir $DIR/d1
+$CHECKSTAT -a $DIR/d1 || error
pass
$CLEAN
$START
echo '== mkdir .../d2; touch .../d2/f ================== test 2'
-mkdir $MOUNT/d2
-touch $MOUNT/d2/f
-$CHECKSTAT -t file $MOUNT/d2/f || error
+mkdir $DIR/d2
+touch $DIR/d2/f
+$CHECKSTAT -t file $DIR/d2/f || error
pass
$CLEAN
$START
echo '== rm -r .../d2; touch .../d2/f ================== test 2b'
-rm -r $MOUNT/d2
-$CHECKSTAT -a $MOUNT/d2 || error
+rm -r $DIR/d2
+$CHECKSTAT -a $DIR/d2 || error
pass
$CLEAN
$START
echo '== mkdir .../d3 ================================== test 3'
-mkdir $MOUNT/d3
-$CHECKSTAT -t dir $MOUNT/d3 || error
+mkdir $DIR/d3
+$CHECKSTAT -t dir $DIR/d3 || error
pass
$CLEAN
$START
echo '== touch .../d3/f ================================ test 3b'
-touch $MOUNT/d3/f
-$CHECKSTAT -t file $MOUNT/d3/f || error
+touch $DIR/d3/f
+$CHECKSTAT -t file $DIR/d3/f || error
pass
$CLEAN
$START
echo '== rm -r .../d3 ================================== test 3c'
-rm -r $MOUNT/d3
-$CHECKSTAT -a $MOUNT/d3 || error
+rm -r $DIR/d3
+$CHECKSTAT -a $DIR/d3 || error
pass
$CLEAN
$START
echo '== mkdir .../d4 ================================== test 4'
-mkdir $MOUNT/d4
-$CHECKSTAT -t dir $MOUNT/d4 || error
+mkdir $DIR/d4
+$CHECKSTAT -t dir $DIR/d4 || error
pass
$CLEAN
$START
echo '== mkdir .../d4/d2 =============================== test 4b'
-mkdir $MOUNT/d4/d2
-$CHECKSTAT -t dir $MOUNT/d4/d2 || error
+mkdir $DIR/d4/d2
+$CHECKSTAT -t dir $DIR/d4/d2 || error
pass
$CLEAN
$START
echo '== mkdir .../d5; mkdir .../d5/d2; chmod .../d5/d2 = test 5'
-mkdir $MOUNT/d5
-mkdir $MOUNT/d5/d2
-chmod 0666 $MOUNT/d5/d2
-$CHECKSTAT -t dir -p 0666 $MOUNT/d5/d2 || error
+mkdir $DIR/d5
+mkdir $DIR/d5/d2
+chmod 0707 $DIR/d5/d2
+$CHECKSTAT -t dir -p 0707 $DIR/d5/d2 || error
pass
$CLEAN
$START
echo '== touch .../f6; chmod .../f6 ==================== test 6'
-touch $MOUNT/f6
-chmod 0666 $MOUNT/f6
-$CHECKSTAT -t file -p 0666 $MOUNT/f6 || error
+touch $DIR/f6
+chmod 0666 $DIR/f6
+$CHECKSTAT -t file -p 0666 $DIR/f6 || error
pass
$CLEAN
$START
echo '== mkdir .../d7; mcreate .../d7/f; chmod .../d7/f = test 7'
-mkdir $MOUNT/d7
-./mcreate $MOUNT/d7/f
-chmod 0666 $MOUNT/d7/f
-$CHECKSTAT -t file -p 0666 $MOUNT/d7/f || error
+mkdir $DIR/d7
+$MCREATE $DIR/d7/f
+chmod 0666 $DIR/d7/f
+$CHECKSTAT -t file -p 0666 $DIR/d7/f || error
+pass
+$CLEAN
+$START
+
+echo '== mkdir .../d7; mcreate .../d7/f2; chmod .../d7/f2 = test 7b'
+$MCREATE $DIR/d7/f2
+echo -n foo > $DIR/d7/f2
+[ "`cat $DIR/d7/f2`" = "foo" ] || error
+$CHECKSTAT -t file -s 3 $DIR/d7/f2 || error
pass
$CLEAN
$START
echo '== mkdir .../d8; touch .../d8/f; chmod .../d8/f == test 8'
-mkdir $MOUNT/d8
-touch $MOUNT/d8/f
-chmod 0666 $MOUNT/d8/f
-$CHECKSTAT -t file -p 0666 $MOUNT/d8/f || error
+mkdir $DIR/d8
+touch $DIR/d8/f
+chmod 0666 $DIR/d8/f
+$CHECKSTAT -t file -p 0666 $DIR/d8/f || error
pass
$CLEAN
$START
echo '== mkdir .../d9 .../d9/d2 .../d9/d2/d3 =========== test 9'
-mkdir $MOUNT/d9
-mkdir $MOUNT/d9/d2
-mkdir $MOUNT/d9/d2/d3
-$CHECKSTAT -t dir $MOUNT/d9/d2/d3 || error
+mkdir $DIR/d9
+mkdir $DIR/d9/d2
+mkdir $DIR/d9/d2/d3
+$CHECKSTAT -t dir $DIR/d9/d2/d3 || error
pass
$CLEAN
$START
echo '== mkdir .../d10 .../d10/d2; touch .../d10/d2/f = test 10'
-mkdir $MOUNT/d10
-mkdir $MOUNT/d10/d2
-touch $MOUNT/d10/d2/f
-$CHECKSTAT -t file $MOUNT/d10/d2/f || error
+mkdir $DIR/d10
+mkdir $DIR/d10/d2
+touch $DIR/d10/d2/f
+$CHECKSTAT -t file $DIR/d10/d2/f || error
pass
$CLEAN
$START
echo '== mkdir .../d11 d11/d2; chmod .../d11/d2 ======= test 11'
-mkdir $MOUNT/d11
-mkdir $MOUNT/d11/d2
-chmod 0666 $MOUNT/d11/d2
-chmod 0555 $MOUNT/d11/d2
-$CHECKSTAT -t dir -p 0555 $MOUNT/d11/d2 || error
+mkdir $DIR/d11
+mkdir $DIR/d11/d2
+chmod 0666 $DIR/d11/d2
+chmod 0705 $DIR/d11/d2
+$CHECKSTAT -t dir -p 0705 $DIR/d11/d2 || error
pass
$CLEAN
$START
echo '== mkdir .../d12; touch .../d12/f; chmod .../d12/f == test 12'
-mkdir $MOUNT/d12
-touch $MOUNT/d12/f
-chmod 0666 $MOUNT/d12/f
-chmod 0555 $MOUNT/d12/f
-$CHECKSTAT -t file -p 0555 $MOUNT/d12/f || error
+mkdir $DIR/d12
+touch $DIR/d12/f
+chmod 0666 $DIR/d12/f
+chmod 0654 $DIR/d12/f
+$CHECKSTAT -t file -p 0654 $DIR/d12/f || error
pass
$CLEAN
$START
-echo '== mkdir .../d13; cp /etc/passwd .../d13/f; > .../d13/f == test 13'
-mkdir $MOUNT/d13
-cp /etc/hosts $MOUNT/d13/f
-> $MOUNT/d13/f
-$CHECKSTAT -t file -s 0 $MOUNT/d13/f || error
+echo '== mkdir .../d13; creat .../d13/f; .../d13/f; > .../d13/f == test 13'
+mkdir $DIR/d13
+dd if=/dev/zero of=$DIR/d13/f count=10
+> $DIR/d13/f
+$CHECKSTAT -t file -s 0 $DIR/d13/f || error
pass
$CLEAN
$START
-
echo '================================================== test 14'
-mkdir $MOUNT/d14
-touch $MOUNT/d14/f
-rm $MOUNT/d14/f
-$CHECKSTAT -a $MOUNT/d14/f || error
+mkdir $DIR/d14
+touch $DIR/d14/f
+rm $DIR/d14/f
+$CHECKSTAT -a $DIR/d14/f || error
pass
$CLEAN
$START
-
echo '================================================== test 15'
-mkdir $MOUNT/d15
-touch $MOUNT/d15/f
-mv $MOUNT/d15/f $MOUNT/d15/f2
-$CHECKSTAT -t file $MOUNT/d15/f2 || error
+mkdir $DIR/d15
+touch $DIR/d15/f
+mv $DIR/d15/f $DIR/d15/f2
+$CHECKSTAT -t file $DIR/d15/f2 || error
pass
$CLEAN
$START
echo '================================================== test 16'
-mkdir $MOUNT/d16
-touch $MOUNT/d16/f
-rm -rf $MOUNT/d16/f
-$CHECKSTAT -a $MOUNT/d16/f || error
+mkdir $DIR/d16
+touch $DIR/d16/f
+rm -rf $DIR/d16/f
+$CHECKSTAT -a $DIR/d16/f || error
pass
$CLEAN
$START
echo '== symlinks: create, remove (dangling and real) == test 17'
-mkdir $MOUNT/d17
-touch $MOUNT/d17/f
-ln -s $MOUNT/d17/f $MOUNT/d17/l-exist
-ln -s no-such-file $MOUNT/d17/l-dangle
-ls -l $MOUNT/d17
-$CHECKSTAT -l $MOUNT/d17/f $MOUNT/d17/l-exist || error
-$CHECKSTAT -f -t f $MOUNT/d17/l-exist || error
-$CHECKSTAT -l no-such-file $MOUNT/d17/l-dangle || error
-$CHECKSTAT -fa $MOUNT/d17/l-dangle || error
-rm -f $MOUNT/l-dangle
-rm -f $MOUNT/l-exist
-$CHECKSTAT -a $MOUNT/l-dangle || error
-$CHECKSTAT -a $MOUNT/l-exist || error
+mkdir $DIR/d17
+touch $DIR/d17/f
+ln -s $DIR/d17/f $DIR/d17/l-exist
+ln -s no-such-file $DIR/d17/l-dangle
+ls -l $DIR/d17
+$CHECKSTAT -l $DIR/d17/f $DIR/d17/l-exist || error
+$CHECKSTAT -f -t f $DIR/d17/l-exist || error
+$CHECKSTAT -l no-such-file $DIR/d17/l-dangle || error
+$CHECKSTAT -fa $DIR/d17/l-dangle || error
+rm -f $DIR/l-dangle
+rm -f $DIR/l-exist
+$CHECKSTAT -a $DIR/l-dangle || error
+$CHECKSTAT -a $DIR/l-exist || error
pass
$CLEAN
$START
echo "== touch .../f ; ls ... ========================= test 18"
-touch $MOUNT/f
-ls $MOUNT || error
+touch $DIR/f
+ls $DIR || error
pass
$CLEAN
$START
echo "== touch .../f ; ls -l ... ====================== test 19"
-touch $MOUNT/f
-ls -l $MOUNT
-rm $MOUNT/f
-$CHECKSTAT -a $MOUNT/f || error
+touch $DIR/f
+ls -l $DIR
+rm $DIR/f
+$CHECKSTAT -a $DIR/f || error
pass
$CLEAN
$START
echo "== touch .../f ; ls -l ... ====================== test 20"
-touch $MOUNT/f
-rm $MOUNT/f
+touch $DIR/f
+rm $DIR/f
echo "1 done"
-touch $MOUNT/f
-rm $MOUNT/f
+touch $DIR/f
+rm $DIR/f
echo "2 done"
-touch $MOUNT/f
-rm $MOUNT/f
+touch $DIR/f
+rm $DIR/f
echo "3 done"
-$CHECKSTAT -a $MOUNT/f || error
+$CHECKSTAT -a $DIR/f || error
pass
$CLEAN
$START
echo '== write to dangling link ======================== test 21'
-mkdir $MOUNT/d21
-[ -f $MOUNT/d21/dangle ] && rm -f $MOUNT/d21/dangle
-ln -s dangle $MOUNT/d21/link
-echo foo >> $MOUNT/d21/link
-cat $MOUNT/d21/dangle
-$CHECKSTAT -t link $MOUNT/d21/link || error
-$CHECKSTAT -f -t file $MOUNT/d21/link || error
+mkdir $DIR/d21
+[ -f $DIR/d21/dangle ] && rm -f $DIR/d21/dangle
+ln -s dangle $DIR/d21/link
+echo foo >> $DIR/d21/link
+cat $DIR/d21/dangle
+$CHECKSTAT -t link $DIR/d21/link || error
+$CHECKSTAT -f -t file $DIR/d21/link || error
pass
$CLEAN
$START
echo '== unpack tar archive as non-root user =========== test 22'
-mkdir $MOUNT/d22
-which sudo && chown 4711 $MOUNT/d22
+mkdir $DIR/d22
+which sudo && chown 4711 $DIR/d22
SUDO=`which sudo 2> /dev/null` && SUDO="$SUDO -u #4711" || SUDO=""
-$SUDO tar cf - /etc/hosts /etc/sysconfig/network | $SUDO tar xfC - $MOUNT/d22
-ls -lR $MOUNT/d22/etc
-$CHECKSTAT -t dir $MOUNT/d22/etc || error
-[ -z "$SUDO" ] || $CHECKSTAT -u \#4711 $MOUNT/d22/etc || error
+echo '**** FIX THIS TEST ****'
+SUDO=""
+$SUDO tar cf - /etc/hosts /etc/sysconfig/network | $SUDO tar xfC - $DIR/d22
+ls -lR $DIR/d22/etc
+$CHECKSTAT -t dir $DIR/d22/etc || error
+[ -z "$SUDO" ] || $CHECKSTAT -u \#4711 $DIR/d22/etc || error
pass
$CLEAN
$START
echo '== O_CREAT|O_EXCL in subdir ====================== test 23'
-mkdir $MOUNT/d23
-./toexcl $MOUNT/d23/f23
-./toexcl -e $MOUNT/d23/f23 || error
+mkdir $DIR/d23
+$TOEXCL $DIR/d23/f23
+$TOEXCL -e $DIR/d23/f23 || error
pass
$CLEAN
$START
echo '== rename sanity ================================= test24'
echo '-- same directory rename'
echo '-- test 24-R1: touch a ; rename a b'
-mkdir $MOUNT/R1
-touch $MOUNT/R1/f
-mv $MOUNT/R1/f $MOUNT/R1/g
-$CHECKSTAT -t file $MOUNT/R1/g || error
+mkdir $DIR/R1
+touch $DIR/R1/f
+mv $DIR/R1/f $DIR/R1/g
+$CHECKSTAT -t file $DIR/R1/g || error
pass
$CLEAN
$START
echo '-- test 24-R2: touch a b ; rename a b;'
-mkdir $MOUNT/R2
-touch $MOUNT/R2/{f,g}
-mv $MOUNT/R2/f $MOUNT/R2/g
-$CHECKSTAT -a $MOUNT/R2/f || error
-$CHECKSTAT -t file $MOUNT/R2/g || error
+mkdir $DIR/R2
+touch $DIR/R2/{f,g}
+mv $DIR/R2/f $DIR/R2/g
+$CHECKSTAT -a $DIR/R2/f || error
+$CHECKSTAT -t file $DIR/R2/g || error
pass
$CLEAN
$START
echo '-- test 24-R3: mkdir a ; rename a b;'
-mkdir $MOUNT/R3
-mkdir $MOUNT/R3/f
-mv $MOUNT/R3/f $MOUNT/R3/g
-$CHECKSTAT -a $MOUNT/R3/f || error
-$CHECKSTAT -t dir $MOUNT/R3/g || error
+mkdir $DIR/R3
+mkdir $DIR/R3/f
+mv $DIR/R3/f $DIR/R3/g
+$CHECKSTAT -a $DIR/R3/f || error
+$CHECKSTAT -t dir $DIR/R3/g || error
pass
$CLEAN
$START
echo '-- test 24-R4: mkdir a b ; rename a b;'
-mkdir $MOUNT/R4
-mkdir $MOUNT/R4/{f,g}
-perl -e "rename \"$MOUNT/R4/f\", \"$MOUNT/R4/g\";"
-$CHECKSTAT -a $MOUNT/R4/f || error
-$CHECKSTAT -t dir $MOUNT/R4/g || error
+mkdir $DIR/R4
+mkdir $DIR/R4/{f,g}
+perl -e "rename \"$DIR/R4/f\", \"$DIR/R4/g\";"
+$CHECKSTAT -a $DIR/R4/f || error
+$CHECKSTAT -t dir $DIR/R4/g || error
pass
$CLEAN
$START
echo '-- cross directory renames --'
echo '-- test 24-R5: touch a ; rename a b'
-mkdir $MOUNT/R5{a,b}
-touch $MOUNT/R5a/f
-mv $MOUNT/R5a/f $MOUNT/R5b/g
-$CHECKSTAT -a $MOUNT/R5a/f || error
-$CHECKSTAT -t file $MOUNT/R5b/g || error
+mkdir $DIR/R5{a,b}
+touch $DIR/R5a/f
+mv $DIR/R5a/f $DIR/R5b/g
+$CHECKSTAT -a $DIR/R5a/f || error
+$CHECKSTAT -t file $DIR/R5b/g || error
pass
$CLEAN
$START
echo '-- test 24-R6: touch a ; rename a b'
-mkdir $MOUNT/R6{a,b}
-touch $MOUNT/R6a/f $MOUNT/R6b/g
-mv $MOUNT/R6a/f $MOUNT/R6b/g
-$CHECKSTAT -a $MOUNT/R6a/f || error
-$CHECKSTAT -t file $MOUNT/R6b/g || error
+mkdir $DIR/R6{a,b}
+touch $DIR/R6a/f $DIR/R6b/g
+mv $DIR/R6a/f $DIR/R6b/g
+$CHECKSTAT -a $DIR/R6a/f || error
+$CHECKSTAT -t file $DIR/R6b/g || error
pass
$CLEAN
$START
echo '-- test 24-R7: touch a ; rename a b'
-mkdir $MOUNT/R7{a,b}
-mkdir $MOUNT/R7a/f
-mv $MOUNT/R7a/f $MOUNT/R7b/g
-$CHECKSTAT -a $MOUNT/R7a/f || error
-$CHECKSTAT -t dir $MOUNT/R7b/g || error
+mkdir $DIR/R7{a,b}
+mkdir $DIR/R7a/f
+mv $DIR/R7a/f $DIR/R7b/g
+$CHECKSTAT -a $DIR/R7a/f || error
+$CHECKSTAT -t dir $DIR/R7b/g || error
pass
$CLEAN
$START
echo '-- test 24-R8: touch a ; rename a b'
-mkdir $MOUNT/R8{a,b}
-mkdir $MOUNT/R8a/f $MOUNT/R8b/g
-perl -e "rename \"$MOUNT/R8a/f\", \"$MOUNT/R8b/g\";"
-$CHECKSTAT -a $MOUNT/R8a/f || error
-$CHECKSTAT -t dir $MOUNT/R8b/g || error
+mkdir $DIR/R8{a,b}
+mkdir $DIR/R8a/f $DIR/R8b/g
+perl -e "rename \"$DIR/R8a/f\", \"$DIR/R8b/g\";"
+$CHECKSTAT -a $DIR/R8a/f || error
+$CHECKSTAT -t dir $DIR/R8b/g || error
pass
$CLEAN
$START
echo "-- rename error cases"
echo "-- test 24-R9 target error: touch f ; mkdir a ; rename f a"
-mkdir $MOUNT/R9
-mkdir $MOUNT/R9/a
-touch $MOUNT/R9/f
-perl -e "rename \"$MOUNT/R9/f\", \"$MOUNT/R9/a\";"
-$CHECKSTAT -t file $MOUNT/R9/f || error
-$CHECKSTAT -t dir $MOUNT/R9/a || error
-$CHECKSTAT -a file $MOUNT/R9/a/f || error
+mkdir $DIR/R9
+mkdir $DIR/R9/a
+touch $DIR/R9/f
+perl -e "rename \"$DIR/R9/f\", \"$DIR/R9/a\";"
+$CHECKSTAT -t file $DIR/R9/f || error
+$CHECKSTAT -t dir $DIR/R9/a || error
+$CHECKSTAT -a file $DIR/R9/a/f || error
pass
$CLEAN
$START
echo "--test 24-R10 source does not exist"
-mkdir $MOUNT/R10
-perl -e "rename \"$MOUNT/R10/f\", \"$MOUNT/R10/g\""
-$CHECKSTAT -t dir $MOUNT/R10 || error
-$CHECKSTAT -a $MOUNT/R10/f || error
-$CHECKSTAT -a $MOUNT/R10/g || error
+mkdir $DIR/R10
+perl -e "rename \"$DIR/R10/f\", \"$DIR/R10/g\""
+$CHECKSTAT -t dir $DIR/R10 || error
+$CHECKSTAT -a $DIR/R10/f || error
+$CHECKSTAT -a $DIR/R10/g || error
pass
$CLEAN
$START
echo '== symlink sanity ================================ test25'
echo "--test 25.1 create file in symlinked directory"
-mkdir $MOUNT/d25
-ln -s d25 $MOUNT/s25
-touch $MOUNT/s25/foo
+mkdir $DIR/d25
+ln -s d25 $DIR/s25
+touch $DIR/s25/foo
pass
$CLEAN
$START
echo "--test 25.2 lookup file in symlinked directory"
-$CHECKSTAT -t file $MOUNT/s25/foo
+$CHECKSTAT -t file $DIR/s25/foo
pass
$CLEAN
$START
echo "--test 26 multiple component symlink"
-mkdir $MOUNT/d26
-mkdir $MOUNT/d26/d26-2
-ln -s d26/d26-2 $MOUNT/s26
-touch $MOUNT/s26/foo
+mkdir $DIR/d26
+mkdir $DIR/d26/d26-2
+ln -s d26/d26-2 $DIR/s26
+touch $DIR/s26/foo
pass
$CLEAN
$START
echo "--test 26.1 multiple component symlink at the end of a lookup"
-ln -s d26/d26-2/foo $MOUNT/s26-2
-touch $MOUNT/s26-2
+ln -s d26/d26-2/foo $DIR/s26-2
+touch $DIR/s26-2
pass
$CLEAN
$START
echo "--test 26.2 a chain of symlinks"
-mkdir $MOUNT/d26.2
-touch $MOUNT/d26.2/foo
-ln -s d26.2 $MOUNT/s26.2-1
-ln -s s26.2-1 $MOUNT/s26.2-2
-ln -s s26.2-2 $MOUNT/s26.2-3
-chmod 0666 $MOUNT/s26.2-3/foo
+mkdir $DIR/d26.2
+touch $DIR/d26.2/foo
+ln -s d26.2 $DIR/s26.2-1
+ln -s s26.2-1 $DIR/s26.2-2
+ln -s s26.2-2 $DIR/s26.2-3
+chmod 0666 $DIR/s26.2-3/foo
pass
$CLEAN
$START
-echo '== stripe sanity ================================= test27'
-echo "--test 26.1 create one stripe"
-mkdir $MOUNT/d27
-../utils/lstripe $MOUNT/d27/f0 4096 0 1
-$CHECKSTAT -t file $MOUNT/d27/f0
-echo "--test 26.2 write to one stripe file"
-cp /etc/hosts $MOUNT/d27/f0
+# recursive symlinks (bug 439)
+echo "--test 26.3 create multiple component recursive symlink"
+ln -s d26-3/foo $DIR/d26-3
pass
$CLEAN
$START
-echo "--test 26.3 create two stripes"
-../utils/lstripe $MOUNT/d27/f01 4096 0 2
-echo "--test 26.4 write to two stripe file"
-cp /etc/hosts $MOUNT/d27/f01
+echo "--test 26.3 unlink multiple component recursive symlink"
+rm $DIR/d26-3
pass
$CLEAN
$START
-echo "--test 26.5 lstripe existing file (should return error)"
-../utils/lstripe $MOUNT/d27/f12 4096 1 2
-! ../utils/lstripe $MOUNT/d27/f12 4096 1 2
+echo '== stripe sanity ================================= test27'
+echo "--test 27.1 create one stripe"
+mkdir $DIR/d27
+$LSTRIPE $DIR/d27/f0 8192 0 1
+$CHECKSTAT -t file $DIR/d27/f0
+echo "--test 27.2 write to one stripe file"
+cp /etc/hosts $DIR/d27/f0
+pass
+
+echo "--test 27.3 create two stripe file f01"
+$LSTRIPE $DIR/d27/f01 8192 0 2
+echo "--test 27.4 write to two stripe file file f01"
+dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4
+pass
+
+echo "--test 27.5 create file with default settings"
+$LSTRIPE $DIR/d27/fdef 0 -1 0
+$CHECKSTAT -t file $DIR/d27/fdef
+#dd if=/dev/zero of=$DIR/d27/fdef bs=4k count=4
+
+echo "--test 27.6 lstripe existing file (should return error)"
+$LSTRIPE $DIR/d27/f12 8192 1 2
+! $LSTRIPE $DIR/d27/f12 8192 1 2
+$CHECKSTAT -t file $DIR/d27/f12
+#dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4
+pass
+
+
+echo "--test 27.7 lstripe with bad stripe size (should return error on LOV)"
+$LSTRIPE $DIR/d27/fbad 100 1 2 || /bin/true
+dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4
pass
$CLEAN
$START
-echo "--test 26.6 lfind "
-../utils/lfind $MOUNT/d27
+echo "--test 27.8 lfind "
+$LFIND $DIR/d27
pass
$CLEAN
$START
-echo '== IT_GETATTR regression ======================== test28'
-mkdir $MOUNT/d28
-touch $MOUNT/d28/foo
-MDCDIR=${MDCDIR:-/proc/lustre/devices/ldlm/MDC_mds1}
+echo '== create/mknod/mkdir with bad file types ======== test28'
+mkdir $DIR/d28
+$CREATETEST $DIR/d28/ct || error
+pass
+
+echo '== IT_GETATTR regression ======================== test29'
+mkdir $MOUNT/d29
+touch $MOUNT/d29/foo
+ls -l $MOUNT/d29
+MDCDIR=${MDCDIR:-/proc/fs/lustre/ldlm/ldlm/MDC_MNT_localhost_mds1}
LOCKCOUNTORIG=`cat $MDCDIR/lock_count`
LOCKUNUSEDCOUNTORIG=`cat $MDCDIR/lock_unused_count`
-ls -l $MOUNT/d28
+ls -l $MOUNT/d29
LOCKCOUNTCURRENT=`cat $MDCDIR/lock_count`
LOCKUNUSEDCOUNTCURRENT=`cat $MDCDIR/lock_unused_count`
if [ $LOCKCOUNTCURRENT -gt $LOCKCOUNTORIG ] || [ $LOCKUNUSEDCOUNTCURRENT -gt $LOCKUNUSEDCOUNTORIG ]; then
$START
echo '== cleanup ============================================='
-rm -r $MOUNT/[Rdfs][1-9]*
+rm -r $DIR/[Rdfs][1-9]*
echo '======================= finished ======================='
exit
#!/bin/bash
-export NAME=$NAME
+set -e
+
+PATH=$PATH:.
+
+CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
+MOUNT1=${MOUNT1:-/mnt/lustre1}
+MOUNT2=${MOUNT2:-/mnt/lustre2}
+export NAME=${NAME:-mount2}
+
clean() {
- echo -n "cleanup..."
- sh llmount2-hackcleanup.sh > /dev/null
+ echo -n "cln.."
+ sh llmountcleanup.sh > /dev/null
}
-CLEAN=clean
+CLEAN=${CLEAN:-clean}
start() {
- echo -n "mounting..."
- sh llmount2-hack.sh > /dev/null
- echo -n "mounted"
+ echo -n "mnt.."
+ sh llrmount.sh > /dev/null
+ echo -n "done"
}
-START=start
+START=${START:-start}
error () {
- echo $1
+ echo FAIL
exit 1
}
-mkdir -p /mnt/lustre2
-mount | grep /mnt/lustre2 || $START
+pass() {
+ echo PASS
+}
+
+mkdir -p $MOUNT2
+mount | grep $MOUNT1 || sh llmount.sh
echo -n "test 1: check create on 2 mtpt's..."
-touch /mnt/lustre1/f1
-[ -f /mnt/lustre2/f1 ] || error "test 1 failure"
-echo "pass"
+touch $MOUNT1/f1
+[ -f $MOUNT2/f1 ] || error
+pass
+
+echo "test 2: check attribute updates on 2 mtpt's..."
+chmod 777 $MOUNT2/f1
+$CHECKSTAT -t file -p 0777 $MOUNT1/f1 || error
+pass
-echo -n "test 2: check attribute updates on 2 mtpt's..."
-chmod a+x /mnt/lustre2/f1
-[ -x /mnt/lustre1/f1 ] || error "test 2 failure"
-echo "pass"
+echo "test 2b: check cached attribute updates on 2 mtpt's..."
+touch $MOUNT1/f2b
+ls -l $MOUNT2/f2b
+chmod 777 $MOUNT2/f2b
+$CHECKSTAT -t file -p 0777 $MOUNT1/f2b || error
+pass
-echo -n "test 3: check after remount attribute updates on 2 mtpt's..."
-chmod a-x /mnt/lustre2/f1
+echo "test 2c: check cached attribute updates on 2 mtpt's..."
+touch $MOUNT1/f2c
+ls -l $MOUNT2/f2c
+chmod 777 $MOUNT1/f2c
+$CHECKSTAT -t file -p 0777 $MOUNT2/f2c || error
+pass
+
+echo "test 3: check after remount attribute updates on 2 mtpt's..."
+chmod a-x $MOUNT2/f1
$CLEAN
$START
+$CHECKSTAT -t file -p 0666 $MOUNT1/f1 || error
+pass
+
+echo "test 4: unlink on one mountpoint removes file on other..."
+rm $MOUNT2/f1
+$CHECKSTAT -a $MOUNT1/f1 || error
+pass
-[ ! -x /mnt/lustre1/f1 ] || error "test 3 failure"
-echo "pass"
+echo -n "test 5: symlink on one mtpt, readlink on another..."
+( cd $MOUNT1 ; ln -s this/is/good lnk )
-echo -n "test 4: symlink on one mtpt, readlink on another..."
-( cd /mnt/lustre1 ; ln -s this/is/good lnk )
+[ "this/is/good" = "`perl -e 'print readlink("/mnt/lustre2/lnk");'`" ] || error
+pass
-[ "Xthis/is/good" = X`perl -e 'print readlink("/mnt/lustre2/lnk");'` ] || error "test 4 fails"
-echo "pass"
+echo -n "test 6: fstat validation on multiple mount points..."
+./multifstat $MOUNT1/f6 $MOUNT2/f6
+pass
-echo -n "test 5: fstat validation on multiple mount points..."
-./multifstat /mnt/lustre1/fstatfile /mnt/lustre2/fstatfile || error "test 5 fails"
-echo "pass"
+echo "test 9: remove of open file on other node..."
+./openunlink $MOUNT1/f9 $MOUNT2/f9 || error
+pass
-echo -n "test 9: remove of open file on other node..."
-touch /mnt/lustre1/f9
-tail -f /mnt/lustre1/f9 &
-rm /mnt/lustre2/f9
-kill %1
-cat /mnt/lustre1/f9 && error "test 9 fails"
-echo "pass"
+echo -n "test 10: append of file with sub-page size on multiple mounts..."
+MTPT=1
+> $MOUNT2/f10
+for C in a b c d e f g h i j k l; do
+ MOUNT=`eval echo \\$MOUNT$MTPT`
+ echo -n $C >> $MOUNT/f10
+ [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
+done
+[ "`cat $MOUNT1/f10`" = "abcdefghijkl" ] && pass || error
+
+echo -n "test 11: write of file with sub-page size on multiple mounts..."
+MTPT=1
+OFFSET=0
+> $MOUNT2/f11
+for C in a b c d e f g h i j k l; do
+ MOUNT=`eval echo \\$MOUNT$MTPT`
+ echo -n $C | dd of=$MOUNT/f11 bs=1 seek=$OFFSET count=1
+ [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
+ OFFSET=`expr $OFFSET + 1`
+done
+[ "`cat $MOUNT1/f11`" = "abcdefghijkl" ] && pass || error
+
+rm -f $MOUNT1/f[0-9]* $MOUNT1/lnk
$CLEAN
LMC=${LMC-../utils/lmc}
TMP=${TMP:-/tmp}
-MDSDEV=$TMP/mds1
-MDSSIZE=50000
+MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSSIZE=${MDSSIZE:-50000}
-OSTDEV1=$TMP/ost1
-OSTDEV2=$TMP/ost2
-OSTSIZE=100000
+OSTDEV1=${OSTDEV1:-$TMP/ost1}
+OSTDEV2=${OSTDEV2:-$TMP/ost2}
+OSTSIZE=${OSTSIZE:-100000}
+
+NETTYPE=${NETTYPE:-tcp}
# NOTE - You can't have different MDS/OST nodes and also have clients on the
# MDS/OST nodes without using --endlevel and --startlevel during lconf.
# of the clients can be started, so plan accordingly.
# Three separate systems
-MDSNODE=uml1
-OSTNODES="uml2 uml2"
-CLIENTS="uml3"
+MDSNODE=${MDSNODE:-uml1}
+OSTNODES=${OSTNODES:-"uml2 uml2"}
+CLIENTS=${CLIENTS:-"uml3"}
# Single system with additional clients
#MDSNODE=uml1
rm -f $config
+h2tcp () {
+ case $1 in
+ client) echo '\*' ;;
+ *) echo $1 ;;
+ esac
+}
+
+h2elan () {
+ case $1 in
+ client) echo '\*' ;;
+ *) echo $1 | sed "s/[^0-9]*//" ;;
+ esac
+}
+
# create nodes
-for NODE in $MDSNODE $OSTNODES $CLIENTS; do
- eval [ \$$NODE ] && continue
- ${LMC} -m $config --add net --node $NODE --nid $NODE --nettype tcp || exit 1
- eval "$NODE=done"
+echo -n "adding NET for:"
+for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | sort -u`; do
+ echo -n " $NODE"
+ ${LMC} -m $config --add net --node $NODE --nid `h2$NETTYPE $NODE` --nettype elan || exit 1
done
# configure mds server
+echo; echo "adding MDS on: $MDSNODE"
${LMC} -m $config --add mds --format --node $MDSNODE --mds mds1 --dev $MDSDEV --size $MDSSIZE ||exit 10
# configure ost
-${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
+${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 1 --stripe_pattern 0 || exit 20
COUNT=1
+echo -n "adding OST on:"
for NODE in $OSTNODES; do
eval OSTDEV=\$OSTDEV$COUNT
+ echo -n " $NODE"
+ OSTDEV=${OSTDEV:-$OSTDEV1}
${LMC} -m $config --add ost --node $NODE --lov lov1 --dev $OSTDEV --size $OSTSIZE || exit 21
COUNT=`expr $COUNT + 1`
done
# create client config(s)
+echo; echo -n "adding CLIENT on:"
for NODE in $CLIENTS; do
+ echo -n " $NODE"
${LMC} -m $config --add mtpt --node $NODE --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
done
+echo
lfind
lstripe
lconf
+obdstat
+obdio
+obdbarrier
CPPFLAGS = $(HAVE_LIBREADLINE)
obdctl_LDADD := $(LIBREADLINE)
lctl_LDADD := $(LIBREADLINE) -lptlctl
-sbin_PROGRAMS = lctl lfind lstripe obdctl
-sbin_SCRIPTS = lconf lmc
+sbin_PROGRAMS = lctl lfind lstripe obdctl obdio obdbarrier obdstat
+sbin_SCRIPTS = lconf lmc llanalyze
obdctl_SOURCES = parser.c obdctl.c obd.c parser.h obdctl.h
lctl_SOURCES = parser.c obd.c lctl.c parser.h
+obdio_SOURCES = obdio.c obdiolib.c obdiolib.h
+obdbarrier_SOURCES = obdbarrier.c obdiolib.c obdiolib.h
lfind_SOURCES = lfind.c
lstripe_SOURCES = lstripe.c
lfind_CPPFLAGS = -D_XOPEN_SOURCE=500
# Based in part on the XML obdctl modifications done by Brian Behlendorf
import sys, getopt, types
-import string, os, stat, popen2, socket, time, random, fcntl, FCNTL, select
+import string, os, stat, popen2, socket, time, random, fcntl, select
import re, exceptions
import xml.dom.minidom
+if sys.version[0] == '1':
+ from FCNTL import F_GETFL, F_SETFL
+else:
+ from fcntl import F_GETFL, F_SETFL
+
# Global parameters
TCP_ACCEPTOR = ''
MAXTCPBUF = 1048576
Levels are aproximatly like:
10 - network
20 - device, ldlm
- 30 - obd, mdd
+ 30 - osd, mdd
40 - mds, ost
50 - mdc, osc
60 - lov
raise CommandError('lctl', "unable to find lctl binary.")
def set_nonblock(self, fd):
- fl = fcntl.fcntl(fd, FCNTL.F_GETFL)
- fcntl.fcntl(fd, FCNTL.F_SETFL, fl | os.O_NDELAY)
+ fl = fcntl.fcntl(fd, F_GETFL)
+ fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
def run(self, cmds):
"""
cmds = """
ignore_errors
device $%s
- cleanup
- detach %s
+ cleanup %s
+ detach
quit""" % (name, ('', 'force')[config.force()])
self.run(cmds)
# build fs according to type
# fixme: dangerous
-def mkfs(fstype, dev):
+def mkfs(dev, devsize, fstype):
+ block_cnt = ''
+ if devsize:
+ # devsize is in 1k, and fs block count is in 4k
+ block_cnt = devsize/4
+
if(fstype in ('ext3', 'extN')):
- mkfs = 'mkfs.ext2 -j -b 4096'
+ mkfs = 'mkfs.ext2 -j -b 4096 -F '
elif (fstype == 'reiserfs'):
- mkfs = 'mkfs.reiserfs -f'
+ mkfs = 'mkreiserfs -ff'
else:
print 'unsupported fs type: ', fstype
- if not is_block(dev):
- if(fstype in ('ext3', 'extN')):
- force = '-F'
- elif (fstype == 'reiserfs'):
- force = ''
- else:
- print 'unsupported fs type: ', fstype
- else:
- force = ''
- (ret, out) = run (mkfs, force, dev)
+
+ (ret, out) = run (mkfs, dev, block_cnt)
if ret:
panic("Unable to build fs:", dev)
# enable hash tree indexing on fsswe
if not is_block(dev):
dev = init_loop(dev, size, fstype)
if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
- mkfs(fstype, dev)
+ mkfs(dev, size, fstype)
# else:
# panic("device:", dev,
if not self.nid:
panic("unable to set nid for", self.net_type, self.nid)
debug("nid:", self.nid)
-
self.add_portals_module("linux/oslib", 'portals')
if node_needs_router():
self.add_portals_module("linux/router", 'kptlrouter')
if self.net_type == 'tcp':
self.add_portals_module("linux/socknal", 'ksocknal')
if self.net_type == 'toe':
- self.add_portals_odule("/linux/toenal", 'ktoenal')
+ self.add_portals_module("/linux/toenal", 'ktoenal')
if self.net_type == 'elan':
self.add_portals_module("/linux/rqswnal", 'kqswnal')
if self.net_type == 'gm':
lctl.add_route(net_type, gw, lo, hi)
if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '':
srvdb = self.db.nid2server(lo)
- if not srv:
+ if not srvdb:
panic("no server for nid", lo)
else:
srv = Network(srvdb)
lctl.network(self.net_type, self.nid)
- lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID")
+ if not is_prepared("RPCDEV_UUID"):
+ lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID")
def cleanup(self):
self.info(self.net_type, self.nid, self.port)
for net_type, gw, lo, hi in self.db.get_route_tbl():
if self.net_type in ('tcp', 'toe') and hi == '':
srvdb = self.db.nid2server(lo)
- if not srv:
+ if not srvdb:
panic("no server for nid", lo)
else:
srv = Network(srvdb)
cleanup_error(e.rc)
try:
- lctl.cleanup("RPCDEV", "RPCDEV_UUID")
+ if is_prepared("RPCDEV_UUID"):
+ lctl.cleanup("RPCDEV", "RPCDEV_UUID")
except CommandError, e:
- print "cleanup failed: ", self.name
+ print "cleanup failed: RPCDEV"
e.dump()
cleanup_error(e.rc)
try:
class LOV(Module):
def __init__(self,db):
Module.__init__(self, 'LOV', db)
+ self.add_lustre_module('mdc', 'mdc')
+ self.add_lustre_module('lov', 'lov')
self.mds_uuid = self.db.get_first_ref('mds')
mds= self.db.lookup(self.mds_uuid)
self.mds_name = mds.getName()
self.pattern = self.db.get_val_int('stripepattern', 0)
self.devlist = self.db.get_refs('obd')
self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
- self.add_lustre_module('mdc', 'mdc')
- self.add_lustre_module('lov', 'lov')
-
- def prepare(self):
- if is_prepared(self.uuid):
- return
+ self.osclist = []
for obd_uuid in self.devlist:
obd = self.db.lookup(obd_uuid)
- osc = get_osc(obd)
+ osc = get_osc(obd, self.name)
if osc:
- try:
- # Ignore connection failures, because the LOV will DTRT with
- # an unconnected OSC.
- osc.prepare(ignore_connect_failure=1)
- except CommandError:
- print "Error preparing OSC %s (inactive)\n" % osc_uuid
+ self.osclist.append(osc)
else:
- panic('osc not found:', osc_uuid)
- mdc_uuid = prepare_mdc(self.db, self.mds_uuid)
+ panic('osc not found:', obd_uuid)
+
+ def prepare(self):
+ if is_prepared(self.uuid):
+ return
+ for osc in self.osclist:
+ try:
+ # Ignore connection failures, because the LOV will DTRT with
+ # an unconnected OSC.
+ osc.prepare(ignore_connect_failure=1)
+ except CommandError:
+ print "Error preparing OSC %s (inactive)\n" % osc.uuid
+ self.mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
self.stripe_off, self.pattern, self.devlist, self.mds_name)
lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
- setup ="%s" % (mdc_uuid))
+ setup ="%s" % (self.mdc_uuid))
def cleanup(self):
- if not is_prepared(self.uuid):
- return
- for obd_uuid in self.devlist:
- obd = self.db.lookup(obd_uuid)
- osc = get_osc(obd)
- if osc:
- osc.cleanup()
- else:
- panic('osc not found:', osc_uuid)
- Module.cleanup(self)
- cleanup_mdc(self.db, self.mds_uuid)
-
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
+ for osc in self.osclist:
+ osc.cleanup()
+ cleanup_mdc(self.db, self.name, self.mds_uuid)
def load_module(self):
- for obd_uuid in self.devlist:
- obd = self.db.lookup(obd_uuid)
- osc = get_osc(obd)
- if osc:
- osc.load_module()
- break
- else:
- panic('osc not found:', osc_uuid)
+ for osc in self.osclist:
+ osc.load_module()
+ break
Module.load_module(self)
-
def cleanup_module(self):
Module.cleanup_module(self)
- for obd_uuid in self.devlist:
- obd = self.db.lookup(obd_uuid)
- osc = get_osc(obd)
- if osc:
- osc.cleanup_module()
- break
- else:
- panic('osc not found:', osc_uuid)
+ for osc in self.osclist:
+ osc.cleanup_module()
+ break
class LOVConfig(Module):
def __init__(self,db):
self.size = self.db.get_val_int('devsize', 0)
self.fstype = self.db.get_val('fstype', '')
# overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
- self.uuid = self.db.get_first_ref('mds')
+ self.uuid = self.db.get_first_ref('target')
mds = self.db.lookup(self.uuid)
self.name = mds.getName()
self.lovconfig_uuids = mds.get_refs('lovconfig')
print "cleanup failed: ", self.name
e.dump()
cleanup_error(e.rc)
- if not is_prepared(self.uuid):
- return
- Module.cleanup(self)
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
clean_loop(self.devname)
-# Very unusual case, as there is no MDC element in the XML anymore
-# Builds itself from an MDS node
-class MDC(Module):
- def __init__(self,db):
- self.mds_uuid = db.getUUID()
- self.mds_name = db.getName()
- self.db = db
- node_name = config.select(self.mds_name)
- if node_name:
- self.mdd_uuid = self.db.get_mdd(node_name, self.mds_uuid)
- else:
- self.mdd_uuid = db.get_first_ref('active')
- if not self.mdd_uuid:
- panic("No MDSDEV found for MDS service:", self.mds_name)
- self.module_name = 'MDC'
- self.kmodule_list = []
- self._server = None
- self._connected = 0
-
- host = socket.gethostname()
- self.name = 'MDC_%s' % (self.mds_name)
- self.uuid = '%s_%05x_%05x' % (self.name, int(random.random() * 1048576),
- int(random.random() * 1048576))
-
- self.lookup_server(self.mdd_uuid)
- self.add_lustre_module('mdc', 'mdc')
-
- def prepare(self):
- if is_prepared(self.uuid):
- return
- self.info(self.mds_uuid)
- srv = self.get_server()
- lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
- lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
- setup ="%s %s" %(self.mds_uuid, srv.uuid))
-
-class OBD(Module):
+class OSD(Module):
def __init__(self, db):
- Module.__init__(self, 'OBD', db)
- self.obdtype = self.db.get_val('obdtype')
+ Module.__init__(self, 'OSD', db)
+ self.osdtype = self.db.get_val('osdtype')
self.devname = self.db.get_val('devpath', '')
self.size = self.db.get_val_int('devsize', 0)
self.fstype = self.db.get_val('fstype', '')
- self.active_target = self.db.get_first_ref('active')
+ self.uuid = self.db.get_first_ref('target')
+ ost = self.db.lookup(self.uuid)
+ self.name = ost.getName()
# FIXME: if fstype not set, then determine based on kernel version
self.format = self.db.get_val('autoformat', 'yes')
if self.fstype == 'extN':
self.add_lustre_module('extN', 'extN')
- self.add_lustre_module(self.obdtype, self.obdtype)
+ self.add_lustre_module('ost', 'ost')
+ self.add_lustre_module(self.osdtype, self.osdtype)
if self.fstype:
self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype))
def prepare(self):
if is_prepared(self.uuid):
return
- self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
- if self.obdtype == 'obdecho':
+ self.info(self.osdtype, self.devname, self.size, self.fstype, self.format)
+ if self.osdtype == 'obdecho':
blkdev = ''
else:
blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
- lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
+ lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid),
setup ="%s %s" %(blkdev, self.fstype))
- def cleanup(self):
- if not is_prepared(self.uuid):
- return
- Module.cleanup(self)
- if not self.obdtype == 'obdecho':
- clean_loop(self.devname)
-
-class COBD(Module):
- def __init__(self, db):
- Module.__init__(self, 'COBD', db)
- self.real_uuid = self.db.get_first_ref('realobd')
- self.cache_uuid = self.db.get_first_ref('cacheobd')
- self.add_lustre_module('cobd' , 'cobd')
-
- # need to check /proc/mounts and /etc/mtab before
- # formatting anything.
- # FIXME: check if device is already formatted.
- def prepare(self):
- if is_prepared(self.uuid):
- return
- self.info(self.real_uuid, self.cache_uuid)
- lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid),
- setup ="%s %s" %(self.real_uuid, self.cache_uuid))
-
-class OST(Module):
- def __init__(self,db):
- Module.__init__(self, 'OST', db)
- self.obd_uuid = self.db.get_first_ref('obd')
- self.add_lustre_module('ost', 'ost')
-
- def prepare(self):
- if is_prepared(self.uuid):
- return
- self.info(self.obd_uuid)
- lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
- setup ="%s" % (self.obd_uuid))
-
+ if not is_prepared('OSS_UUID'):
+ lctl.newdev(attach="ost %s %s" % ('OSS', 'OSS_UUID'),
+ setup ="")
-# virtual interface for OSC and LOV
-class VOSC(Module):
- def __init__(self,db):
- Module.__init__(self, 'VOSC', db)
- if db.get_class() == 'lov':
- self.osc = LOV(db)
- else:
- self.osc = get_osc(db)
- def get_uuid(self):
- return self.osc.uuid
- def prepare(self):
- self.osc.prepare()
def cleanup(self):
- self.osc.cleanup()
- def load_module(self):
- self.osc.load_module()
- def cleanup_module(self):
- self.osc.cleanup_module()
-
+ if is_prepared('OSS_UUID'):
+ try:
+ lctl.cleanup("OSS", "OSS_UUID")
+ except CommandError, e:
+ print "cleanup failed: ", self.name
+ e.dump()
+ cleanup_error(e.rc)
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
+ if not self.osdtype == 'obdecho':
+ clean_loop(self.devname)
-class OSC(Module):
- def __init__(self, db, obd_name, obd_uuid, ost_uuid):
+# Generic client module, used by OSC and MDC
+class Client(Module):
+ def __init__(self, db, module, owner, target_name, target_uuid):
+ self.target_name = target_name
+ self.target_uuid = target_uuid
self.db = db
- self.module_name = 'OSC'
- self.name = 'OSC_%s' % (obd_name)
- self.uuid = '%s_%05x' % (self.name, int(random.random() * 1048576))
+ node_name = config.select(target_name)
+ if node_name:
+ self.tgt_dev_uuid = self.db.get_target_device(node_name, target_uuid)
+ else:
+ self.tgt_dev_uuid = db.get_first_ref('active')
+ if not self.tgt_dev_uuid:
+ panic("No target device found for target:", target_name)
self.kmodule_list = []
self._server = None
self._connected = 0
- self.obd_uuid = obd_uuid
- self.ost_uuid = ost_uuid
- debug("OSC:", obd_uuid, ost_uuid)
- self.lookup_server(self.ost_uuid)
- self.add_lustre_module('osc', 'osc')
+ self.module = module
+ self.module_name = string.upper(module)
+ self.name = '%s_%s_%s' % (self.module_name, owner, target_name)
+ self.uuid = '%05x_%s_%05x' % (int(random.random() * 1048576), self.name,
+ int(random.random() * 1048576))
+ self.uuid = self.uuid[0:36]
+ self.lookup_server(self.tgt_dev_uuid)
+ self.add_lustre_module(module, module)
def prepare(self, ignore_connect_failure = 0):
if is_prepared(self.uuid):
return
- self.info(self.obd_uuid, self.ost_uuid)
+ self.info(self.target_uuid)
srv = self.get_server()
try:
if local_net(srv):
+ #debug("LOCAL NET")
lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
else:
+ #debug("NOT LOCAL NET")
r = find_route(srv)
if r:
lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
except CommandError:
if (ignore_connect_failure == 0):
pass
-
- lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
- setup ="%s %s" %(self.obd_uuid, srv.uuid))
+ lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
+ setup ="%s %s" %(self.target_uuid, srv.uuid))
def cleanup(self):
srv = self.get_server()
if local_net(srv):
Module.cleanup(self)
else:
- self.info(self.obd_uuid, self.ost_uuid)
+ self.info(self.targt_uuid)
r = find_route(srv)
if r:
try:
e.dump()
cleanup_error(e.rc)
Module.cleanup(self)
+
+
+
+class MDC(Client):
+ def __init__(self, db, owner, target_name, target_uuid):
+ Client.__init__(self, db, 'mdc', owner, target_name, target_uuid)
+
+class OSC(Client):
+ def __init__(self, db, owner, target_name, target_uuid):
+ Client.__init__(self, db, 'osc', owner, target_name, target_uuid)
+
+class COBD(Module):
+ def __init__(self, db):
+ Module.__init__(self, 'COBD', db)
+ self.real_uuid = self.db.get_first_ref('realobd')
+ self.cache_uuid = self.db.get_first_ref('cacheobd')
+ self.add_lustre_module('cobd' , 'cobd')
+
+ # need to check /proc/mounts and /etc/mtab before
+ # formatting anything.
+ # FIXME: check if device is already formatted.
+ def prepare(self):
+ if is_prepared(self.uuid):
+ return
+ self.info(self.real_uuid, self.cache_uuid)
+ lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid),
+ setup ="%s %s" %(self.real_uuid, self.cache_uuid))
+
+
+# virtual interface for OSC and LOV
+class VOSC(Module):
+ def __init__(self,db, owner):
+ Module.__init__(self, 'VOSC', db)
+ if db.get_class() == 'lov':
+ self.osc = LOV(db)
+ else:
+ self.osc = get_osc(db, owner)
+ def get_uuid(self):
+ return self.osc.uuid
+ def prepare(self):
+ self.osc.prepare()
+ def cleanup(self):
+ self.osc.cleanup()
+ def load_module(self):
+ self.osc.load_module()
+ def cleanup_module(self):
+ self.osc.cleanup_module()
+ def need_mdc(self):
+ return self.db.get_class() != 'lov'
+ def get_mdc_uuid(self):
+ if self.db.get_class() == 'lov':
+ return self.osc.mdc_uuid
+ return ''
+
class ECHO_CLIENT(Module):
def __init__(self,db):
self.add_lustre_module('obdecho', 'obdecho')
self.obd_uuid = self.db.get_first_ref('obd')
obd = self.db.lookup(self.obd_uuid)
- self.osc = VOSC(obd)
+ self.osc = VOSC(obd, self.name)
def prepare(self):
if is_prepared(self.uuid):
setup = self.osc.get_uuid())
def cleanup(self):
- if not is_prepared(self.uuid):
- return
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
self.osc.cleanup()
def load_module(self):
self.path = self.db.get_val('path')
self.mds_uuid = self.db.get_first_ref('mds')
self.obd_uuid = self.db.get_first_ref('obd')
- self.add_lustre_module('mdc', 'mdc')
- self.add_lustre_module('llite', 'llite')
obd = self.db.lookup(self.obd_uuid)
- self.osc = VOSC(obd)
+ self.vosc = VOSC(obd, self.name)
+ if self.vosc.need_mdc():
+ self.add_lustre_module('mdc', 'mdc')
+ self.add_lustre_module('llite', 'llite')
def prepare(self):
- self.osc.prepare()
- mdc_uuid = prepare_mdc(self.db, self.mds_uuid)
+ self.vosc.prepare()
+ if self.vosc.need_mdc():
+ mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
+ else:
+ mdc_uuid = self.vosc.get_mdc_uuid()
self.info(self.path, self.mds_uuid, self.obd_uuid)
cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
- (self.osc.get_uuid(), mdc_uuid, self.path)
+ (self.vosc.get_uuid(), mdc_uuid, self.path)
run("mkdir", self.path)
ret, val = run(cmd)
if ret:
if fs_is_mounted(self.path):
panic("fs is still mounted:", self.path)
- self.osc.cleanup()
- cleanup_mdc(self.db, self.mds_uuid)
+ self.vosc.cleanup()
+ if self.vosc.need_mdc():
+ cleanup_mdc(self.db, self.name, self.mds_uuid)
def load_module(self):
- self.osc.load_module()
+ self.vosc.load_module()
Module.load_module(self)
def cleanup_module(self):
Module.cleanup_module(self)
- self.osc.cleanup_module()
+ self.vosc.cleanup_module()
# ============================================================
# XML processing and query
-# OSC is no longer in the xml, so we have to fake it.
-# this is getting ugly and begging for another refactoring
-def get_osc(obd_dom):
- obd = OBD(obd_dom)
- osc = OSC(obd_dom, obd.name, obd.uuid, obd.active_target)
- return osc
-
class LustreDB:
def lookup(self, uuid):
""" lookup returns a new LustreDB instance"""
return ost.lookup(uuid)
def nid2server(self, nid):
- netlist = self.parent.parent.attrs['network']
+ netlist = self.lookup_class('network')
for net_db in netlist:
if net_db.get_val('nid') == nid:
- return net
+ return net_db
return None
# the tag name is the service type
ret = 10
elif type in ('device', 'ldlm'):
ret = 20
- elif type in ('obd', 'mdd', 'cobd'):
+ elif type in ('osd', 'mdd', 'cobd'):
ret = 30
elif type in ('mdsdev','ost'):
ret = 40
list.sort()
return list
- # Find the mdsdev attached to node_name that points to
- # mds_uuid
- # node->profiles->mdsdev_refs->mds
- def get_mdd(self, node_name, mds_uuid):
+ # Find the target_device for target on a node
+ # node->profiles->device_refs->target
+ def get_target_device(self, node_name, target_uuid):
node_db = self.lookup_name(node_name)
if not node_db:
return None
prof_list = node_db.get_refs('profile')
for prof_uuid in prof_list:
prof_db = node_db.lookup(prof_uuid)
- mdd_list = prof_db.get_refs('mdsdev')
- for mdd_uuid in mdd_list:
- mdd = self.lookup(mdd_uuid)
- if mdd.get_first_ref('mds') == mds_uuid:
- return mdd_uuid
+ ref_list = prof_db.get_all_refs()
+ for ref in ref_list:
+ dev = self.lookup(ref[1])
+ if dev and dev.get_first_ref('target') == target_uuid:
+ return ref[1]
return None
-
+
+ # get all network uuids for this node
+ def get_networks(self):
+ ret = []
+ prof_list = self.get_refs('profile')
+ for prof_uuid in prof_list:
+ prof_db = self.lookup(prof_uuid)
+ net_list = prof_db.get_refs('network')
+ debug("get_networks():", prof_uuid, net_list)
+ for net_uuid in net_list:
+ ret.append(net_uuid)
+ return ret
class LustreDB_XML(LustreDB):
def __init__(self, dom, root_node):
""" Return the routes as a list of tuples of the form:
[(type, gw, lo, hi),]"""
res = []
- tbl = self.dom_node.getElementsByTagName('route_tbl')
+ tbl = self.dom_node.getElementsByTagName('routetbl')
for t in tbl:
routes = t.getElementsByTagName('route')
for r in routes:
lo = self.xmlattr(r, 'lo')
- hi = self.xmlattr(r, 'hi', '')
+ hi = self.xmlattr(r, 'hi')
res.append((type, gw, lo, hi))
return res
def get_route_tbl(self):
ret = []
- tbls = self.dom_node.getElementsByTagName('route_tbl')
+ tbls = self.dom_node.getElementsByTagName('routetbl')
for tbl in tbls:
for r in tbl.getElementsByTagName('route'):
net_type = self.xmlattr(r, 'type')
gw = self.xmlattr(r, 'gw')
lo = self.xmlattr(r, 'lo')
- hi = self.xmlattr(r,'hi', '')
+ hi = self.xmlattr(r, 'hi')
ret.append((net_type, gw, lo, hi))
return ret
self.l.protocol_version=ldap.VERSION3
# user and pw only needed if modifying db
self.l.bind_s("", "", ldap.AUTH_SIMPLE);
- except ldap.LDAPerror, e:
+ except ldap.LDAPError, e:
panic(e)
# FIXME, do something useful here
# MDC UUID hack -
# FIXME: clean this mess up!
#
-saved_mdc = {}
-def prepare_mdc(db, mds_uuid):
- global saved_mdc
+# OSC is no longer in the xml, so we have to fake it.
+# this is getting ugly and begging for another refactoring
+def get_osc(ost_db, owner):
+ osc = OSC(ost_db, owner, ost_db.getName(), ost_db.getUUID())
+ return osc
+
+def get_mdc(db, owner, mds_uuid):
mds_db = db.lookup(mds_uuid);
if not mds_db:
panic("no mds:", mds_uuid)
- if saved_mdc.has_key(mds_uuid):
- return saved_mdc[mds_uuid]
- mdc = MDC(mds_db)
+ mdc = MDC(mds_db, owner, mds_db.getName(), mds_uuid)
+ return mdc
+
+def prepare_mdc(db, owner, mds_uuid):
+ mdc = get_mdc(db, owner, mds_uuid)
mdc.prepare()
- saved_mdc[mds_uuid] = mdc.uuid
return mdc.uuid
-def cleanup_mdc(db, mds_uuid):
- global saved_mdc
- mds_db = db.lookup(mds_uuid);
- if not mds_db:
- panic("no mds:", mds_uuid)
- if not saved_mdc.has_key(mds_uuid):
- mdc = MDC(mds_db)
- mdc.cleanup()
- saved_mdc[mds_uuid] = mdc.uuid
+def cleanup_mdc(db, owner, mds_uuid):
+ mdc = get_mdc(db, owner, mds_uuid)
+ mdc.cleanup()
############################################################
local_node = []
router_flag = 0
-def init_node(node_db):
- global local_node, router_flag
- netlist = node_db.lookup_class('network')
- for db in netlist:
- type = db.get_val('nettype')
- gw = db.get_val('nid')
- local_node.append((type, gw))
+def add_local_interfaces(node_db):
+ global local_node
+ debug("add_local")
+ for netuuid in node_db.get_networks():
+ net = node_db.lookup(netuuid)
+ debug("add_local", netuuid)
+ local_node.append((net.get_val('nettype'), net.get_val('nid')))
def node_needs_router():
return router_flag
for node_db in list:
if node_db.get_val_int('router', 0):
router_flag = 1
+ #debug("init_route_config: found router", node_db.getName())
for (local_type, local_nid) in local_node:
+ #debug("init_route_config:", local_type, local_nid)
gw = None
- netlist = node_db.lookup_class('network')
- for db in netlist:
- if local_type == db.get_val('type'):
- gw = db.get_val('server')
+ for netuuid in node_db.get_networks():
+ db = node_db.lookup(netuuid)
+ if local_type == db.get_val('nettype'):
+ gw = db.get_val('nid')
break
+ #debug("init_route_config: gw is", gw)
if not gw:
continue
- for db in netlist:
- if local_type != db.get_val('type'):
+ for netuuid in node_db.get_networks():
+ db = node_db.lookup(netuuid)
+ #debug("init_route_config: tbl: ", db.get_route_tbl())
+ if local_type != db.get_val('nettype'):
for route in db.get_routes(local_type, gw):
routes.append(route)
-
+ #debug("init_route_config routes:", routes)
+
def local_net(net):
global local_node
to = net.nid
debug ('looking for route to', to_type,to)
for r in routes:
+ #debug("find_route: ", r)
if r[2] == to:
return r
return None
-
############################################################
# lconf level logic
# Start a service.
-def startService(db, module_flag):
+def newService(db):
type = db.get_class()
debug('Service:', type, db.getName(), db.getUUID())
- # there must be a more dynamic way of doing this...
n = None
if type == 'ldlm':
n = LDLM(db)
n = LOV(db)
elif type == 'network':
n = Network(db)
- elif type == 'obd':
- n = OBD(db)
+ elif type == 'osd':
+ n = OSD(db)
elif type == 'cobd':
n = COBD(db)
- elif type == 'ost':
- n = OST(db)
elif type == 'mdsdev':
n = MDSDEV(db)
- elif type == 'osc':
- n = VOSC(db)
- elif type == 'mdc':
- n = MDC(db)
elif type == 'mountpoint':
n = Mountpoint(db)
elif type == 'echoclient':
n = ECHO_CLIENT(db)
else:
panic ("unknown service type:", type)
-
- if module_flag:
- if config.nomod():
- return
- if config.cleanup():
- n.cleanup_module()
- else:
- n.load_module()
- else:
- if config.nosetup():
- return
- if config.cleanup():
- n.cleanup()
- else:
- n.prepare()
+ return n
#
# Prepare the system to run lustre using a particular profile
# * make sure partitions are in place and prepared
# * initialize devices with lctl
# Levels is important, and needs to be enforced.
-def startProfile(prof_db, module_flag):
- if not prof_db:
- panic("profile:", profile, "not found.")
- services = prof_db.getServices()
- if config.cleanup():
- services.reverse()
+def for_each_profile(db, prof_list, operation):
+ for prof_uuid in prof_list:
+ prof_db = db.lookup(prof_uuid)
+ if not prof_db:
+ panic("profile:", profile, "not found.")
+ services = prof_db.getServices()
+ operation(services)
+
+def doSetup(services):
for s in services:
- startService(s[1], module_flag)
+ n = newService(s[1])
+ n.prepare()
+
+def doModules(services):
+ for s in services:
+ n = newService(s[1])
+ n.load_module()
+def doCleanup(services):
+ services.reverse()
+ for s in services:
+ n = newService(s[1])
+ n.cleanup()
+
+def doUnloadModules(services):
+ services.reverse()
+ for s in services:
+ n = newService(s[1])
+ n.cleanup_module()
#
# Load profile for
timeout = node_db.get_val_int('timeout', 0)
if not router_flag:
- init_node(node_db)
+ add_local_interfaces(node_db)
init_route_config(lustreDB)
# Two step process: (1) load modules, (2) setup lustre
# if not cleaning, load modules first.
- module_flag = not config.cleanup()
prof_list = node_db.get_refs('profile')
- for prof_uuid in prof_list:
- prof_db = node_db.lookup(prof_uuid)
- startProfile(prof_db, module_flag)
- if not config.cleanup():
+ if config.cleanup():
+ if config.force():
+ # the command line can override this value
+ timeout = 5
+ sys_set_timeout(timeout)
+ sys_set_recovery_upcall(recovery_upcall)
+
+ for_each_profile(node_db, prof_list, doCleanup)
+ for_each_profile(node_db, prof_list, doUnloadModules)
+
+ else:
+ for_each_profile(node_db, prof_list, doModules)
+
sys_set_debug_path()
script = config.gdb_script()
run(lctl.lctl, ' modules >', script)
if config.gdb():
- # dump /tmp/ogdb and sleep/pause here
log ("The GDB module script is in", script)
+ # pause, so user has time to break and
+ # load the script
time.sleep(5)
sys_set_timeout(timeout)
sys_set_recovery_upcall(recovery_upcall)
-
- module_flag = not module_flag
- for prof_uuid in prof_list:
- prof_db = node_db.lookup(prof_uuid)
- startProfile(prof_db, module_flag)
+
+ for_each_profile(node_db, prof_list, doSetup)
############################################################
# Command line processing
"type specific device configuration information\n"
"usage: setup <args...>"},
{"cleanup", jt_obd_cleanup, 0, "cleanup previously setup device\n"
- "usage: cleanup"},
+ "usage: cleanup [force]"},
{"detach", jt_obd_detach, 0,
"remove driver (and name and uuid) from current device\n"
"usage: detach"},
{"test_brw", jt_obd_test_brw, 0,
"do <num> bulk read/writes (<npages> per I/O, on OST object <objid>)\n"
"usage: test_brw [t]<num> [write [verbose [npages [[t]objid]]]]"},
+ {"get_stripe", jt_obd_get_stripe, 0,
+ "show stripe info for an echo client object\n"
+ "usage: get_stripe objid\n"},
+ {"set_stripe", jt_obd_set_stripe, 0,
+ "set stripe info for an echo client object\n"
+ "usage: set_stripe objid[=width!count[@offset][:id:id...]\n"},
+ {"unset_stripe", jt_obd_unset_stripe, 0,
+ "unset stripe info for an echo client object\n"
+ "usage: unset_stripe objid\n"},
{"test_ldlm", jt_obd_test_ldlm, 0,
"perform lock manager test\n"
"usage: test_ldlm"},
{"newconn", jt_obd_newconn, 0, "newconn <olduuid> [newuuid]"},
{"failconn", jt_obd_failconn, 0, "failconn <uuid>"},
{"lookup", jt_obd_mdc_lookup, 0, "usage: lookup <directory> <file>"},
+ {"notransno", jt_obd_no_transno, 0,
+ "disable sending of committed-transno updates\n"
+ "usage: notransno"},
+ {"readonly", jt_obd_set_readonly, 0,
+ "disable writes to the underlying device\n"
+ "usage: readonly"},
/* Debug commands */
{"======== debug =========", jt_noop, 0, "debug"},
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/types.h>
-#define printk printf
#include <linux/lustre_lib.h>
#include <linux/lustre_lite.h>
#include <linux/obd_lov.h>
char * usageMsg = "[ --obd <obd uuid> | --query ] <dir|file> ...";
int max_ost_count = MAX_LOV_UUID_COUNT;
-obd_uuid_t * obduuid;
+struct obd_uuid * obduuid;
__u32 obdcount;
__u32 obdindex;
char * buf;
int buflen;
struct obd_ioctl_data data;
struct lov_desc desc;
-obd_uuid_t * uuids;
+struct obd_uuid * uuids;
int uuidslen;
int cfglen;
struct lov_mds_md *lmm;
exit(1);
}
- obduuid = (obd_uuid_t *)optarg;
+ obduuid = (struct obd_uuid *)optarg;
break;
case 'h':
usage(stdout);
}
lmm = (struct lov_mds_md *)buf;
- uuids = (obd_uuid_t *)buf;
+ uuids = (struct obd_uuid *)buf;
}
void
__u32
getobdindex(const char *path)
{
- obd_uuid_t *uuidp;
+ struct obd_uuid *uuidp;
int fd;
int rc;
int i;
--- /dev/null
+#!/usr/bin/perl
+# Copyright (C) 2002 Cluster File Systems, Inc.
+# Author: Hariharan Thantry <thantry@users.sourceforge.net>
+
+# This file is part of Lustre, http://www.lustre.org.
+#
+# Lustre is free software; you can redistribute it and/or
+# modify it under the terms of version 2 of the GNU General Public
+# License as published by the Free Software Foundation.
+#
+# Lustre is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Lustre; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+
+package llparser;
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw(parse_file print_rpcrelations parse_foptions %ll_subsystems
+ %subsysnum %trace_masks $e_subsys $e_mask $e_processor $e_time
+ $e_file $e_line $e_function $e_pid $e_stack $e_fmtstr $e_backref
+ $e_treeparent $e_numchildren $e_youngestchild $e_next $e_pidhead
+ $e_rpcsndrcv $e_rpcpid $e_rpcxid $e_rpcnid $e_rpcopc $e_rpcnext
+ $e_curlineref $SEND $RCV);
+
+($e_subsys,
+ $e_mask,
+ $e_processor,
+ $e_time,
+ $e_file,
+ $e_line,
+ $e_function,
+ $e_pid,
+ $e_stack,
+ $e_fmtstr,
+ $e_treeparent,
+ $e_numchildren,
+ $e_youngestchild,
+ $e_pidhead,
+ $e_next,
+ $e_backref) = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+
+($e_rpcpid,
+ $e_rpcxid,
+ $e_rpcnid,
+ $e_rpcopc,
+ $e_rpcnext,
+ $e_rpcsndrcv,
+ $e_curlineref) = (0, 1, 2, 3, 4, 5, 6);
+
+$SEND = 0;
+$RCV = 1;
+
+$REGEX=qr/^\s*(\w+)\s*:\s*(\d+)\s*:\s*(\d+)\s*:\s*(\d+\.(?:\d+))\s*\(\s*([^:]+)\s*:\s*(\d+)\s*:\s*([^()]+)\s*\(\)\s*(?:(?:\d+)\s*\|\s*)?(\d+)\s*\+\s*(\d+)\s*(?:.*)\):(.*)$/;
+
+$RPCREGEX = qr/^\s*(?:Sending|Handling)\s*RPC\s*pid:xid:nid:opc\s*(\d+):(?:0x)?(\w+):(?:0x)?(\w+):(\d+)\s*$/;
+$FILEOPTIONREGEX = qr/(--server)|(-s)/;
+$SENDING = qr/Sending/;
+
+
+# Needs to match definition in portals/include/linux/kp30.h
+%ll_subsystems = ("00" => "UNDEFINED", "01" => "MDC", "02" => "MDS",
+ "03" => "OSC", "04" => "OST", "05" => "CLASS",
+ "06" => "OBDFS","07" => "LLITE","08" => "RPC",
+ "09" => "EXT2OBD","0a" => "PORTALS","0b" => "SOCKNAL",
+ "0c" => "QSWNAL","0d" => "PINGER","0e" => "FILTER",
+ "0f" => "TRACE","10" => "ECHO","11" => "LDLM",
+ "12" => "LOV", "13" => "GMNAL","14" => "PTLROUTER" );
+
+%subsysnum;
+$subsysnum->{UNDEFINED} = 0;
+$subsysnum->{MDC} = 1;
+$subsysnum->{MDS} = 2;
+$subsysnum->{OSC} = 3;
+$subsysnum->{OST} = 4;
+$subsysnum->{CLASS} = 5;
+$subsysnum->{OBDFS} = 6;
+$subsysnum->{LLITE} = 7;
+$subsysnum->{RPC} = 8;
+$subsysnum->{EXT2OBD} = 9;
+$subsysnum->{PORTALS} = 10;
+$subsysnum->{SOCKNAL} = 11;
+$subsysnum->{QSWNAL} = 12;
+$subsysnum->{PINGER} = 13;
+$subsysnum->{FILTER} = 14;
+$subsysnum->{TRACE} = 15; # obdtrace, not to be confused with D_TRACE */
+$subsysnum->{ECHO} = 16;
+$subsysnum->{LDLM} = 17;
+$subsysnum->{LOV} = 18;
+$subsysnum->{GMNAL} = 19;
+$subsysnum->{PTLROUTER} = 20;
+
+%tracemasks;
+$tracemasks->{TRACE} = 1 << 0; # /* ENTRY/EXIT markers */
+$tracemasks->{INODE} = 1 << 1; #
+$tracemasks->{SUPER} = 1 << 2; #
+$tracemasks->{EXT2} = 1 << 3; # /* anything from ext2_debug */
+$tracemasks->{MALLOC} = 1 << 4; # /* print malloc, free information */
+$tracemasks->{CACHE} = 1 << 5; # /* cache-related items */
+$tracemasks->{INFO} = 1 << 6; # /* general information */
+$tracemasks->{IOCTL} = 1 << 7; # /* ioctl related information */
+$tracemasks->{BLOCKS} = 1 << 8; # /* ext2 block allocation */
+$tracemasks->{NET} = 1 << 9; # /* network communications */
+$tracemasks->{WARNING} = 1 << 10; #
+$tracemasks->{BUFFS} = 1 << 11; #
+$tracemasks->{OTHER} = 1 << 12; #
+$tracemasks->{DENTRY} = 1 << 13; #
+$tracemasks->{PORTALS} = 1 << 14; # /* ENTRY/EXIT markers */
+$tracemasks->{PAGE} = 1 << 15; # /* bulk page handling */
+$tracemasks->{DLMTRACE} = 1 << 16; #
+$tracemasks->{ERROR} = 1 << 17; # /* CERROR} = ...) == CDEBUG} = D_ERROR, ...) */
+$tracemasks->{EMERG} = 1 << 18; # /* CEMERG} = ...) == CDEBUG} = D_EMERG, ...) */
+$tracemasks->{HA} = 1 << 19; # /* recovery and failover */
+$tracemasks->{RPCTRACE} = 1 << 19; # /* recovery and failover */
+
+# Contains all the file names, the first filename is the
+# client. After that are all servers.
+my @filearray = ();
+
+
+# Create backlinks between array entries based on the calling sequence
+# For each new PID encountered, the first entry will be present in the
+# PID hash.
+
+sub create_links {
+ my $arrayref = shift @_;
+ my $pidhashref = shift @_;
+ my $stitchref = shift @_;
+ my %local_hash;
+ my $hash_lineref;
+ my $tmpfmtref;
+ my $tmpref;
+ my $firstlineaftermarker = 0;
+
+ foreach $lineref (@$arrayref) {
+ next if ($lineref->[$e_time] == 0); # Skip the client marker line
+ my $pidprevious = $pidhashref->{$lineref->[$e_pid]};
+ if ($pidprevious->[$e_next] == 0) {
+ $pidprevious->[$e_next] = $lineref;
+ if (exists $local_hash{$lineref->[$e_pid]}
+ && $firstlineaftermarker) {
+ $hash_lineref=$local_hash{$lineref->[$e_pid]};
+ $hash_lineref->[$e_next] =$lineref;
+ $firstlineaftermarker = 0;
+ }
+ } elsif ($local_hash{$lineref->[$e_pid]} == 0) {
+ # True only for the first line, the marker line.
+ $local_hash{$lineref->[$e_pid]}=$lineref;
+ #print "LINE ADDED TO HASH: @$lineref\n";
+ $firstlineaftermarker = 1;
+ }
+ # Stack grows upward (assumes x86 kernel)
+ if ($lineref->[$e_stack] < $pidprevious->[$e_stack]) {
+ # lineref is not a child of pidprevious, find its parent
+ LINE: while(($lineref->[$e_stack] < $pidprevious->[$e_stack]) &&
+ ($lineref->[$e_function] == $pidprevious->[$e_function])
+ ) {
+ #This second part of the comparision is a HACK
+ last LINE if ($pidprevious->[$e_backref] == 0);
+ $pidprevious = $pidprevious->[$e_backref];
+ }
+ }
+ if ($lineref->[$e_stack] > $pidprevious->[$e_stack]) {
+ # lineref is child of pidprevious, with the caveat that they must
+ # belong to different functions. This is a HACK
+ # until CDEBUG is modified
+ while($lineref->[$e_function] eq $pidprevious->[$e_function]) {
+ last if ($pidprevious->[$e_backref] == 0);
+ $pidprevious = $pidprevious->[$e_backref];
+ }
+
+ $lineref->[$e_backref] = $pidprevious;
+ $pidprevious->[$e_numchildren]++;
+ } else {
+ # lineref is sibling of pidprevious
+ $lineref->[$e_numchildren] = 0;
+ $lineref->[$e_backref] = $pidprevious->[$e_backref];
+ ($lineref->[$e_backref])->[$e_numchildren]++;
+ }
+
+ $pidhashref->{$lineref->[$e_pid]} = $lineref;
+ $lineref->[$e_youngestchild] = $lineref;
+ while ($pidprevious->[$e_backref] != 0) {
+ $pidprevious->[$e_youngestchild] = $lineref;
+ $pidprevious = $pidprevious->[$e_backref];
+ }
+ $pidprevious->[$e_youngestchild] = $lineref;
+ $lineref->[$e_pidhead]=$pidprevious;
+
+ # Stitch together rpc's
+ if($lineref->[$e_fmtstr] =~ $RPCREGEX) {
+ #print "RPC LINE: @$lineref\n";
+ $tmpfmtref = [$1, $2, $3, $4, 0, 0, 0];
+ if ($lineref->[$e_fmtstr] =~ $SENDING) {
+ $tmpfmtref->[$e_rpcsndrcv] = $SEND;
+ } else { $tmpfmtref->[$e_rpcsndrcv] = $RCV; }
+ $tmpfmtref->[$e_curlineref] = $lineref;
+ $stitchref->{$lineref->[$e_time]} = $tmpfmtref;
+
+ }
+
+ }
+match_rpcs($stitchref);
+return $arrayref;
+}
+
+
+
+
+# Main loop, parses the debug log
+
+sub parse_file {
+ my %hasharray;
+ my $input_files = shift;
+
+ my $stitch_ref = shift;
+ my $pid = shift;
+ my $rpctrace = shift;
+ my $trace = shift;
+ my $nodlm = shift;
+ my $noclass = shift;
+ my $nonet = shift;
+
+ print "$pid, $rpctrace, $nodlm, $noclass, $nonet\n";
+ $backref = 0;
+ $treeparent = 0;
+ $numchildren = 0;
+ $youngestchild = 0;
+ $next = 0;
+ $pidhead = 0;
+ $iter = 0;
+
+ foreach $file (@$input_files) {
+
+ open(FILEHANDLE, $file) or die "Can't open file: $file\n";
+ while(<FILEHANDLE>) {
+ if (/$REGEX/) {
+ @parsed_line=($1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
+ $treeparent, $numchildren, $youngestchild,
+ $pidhead, $next, $backref);
+ next if (($parsed_line[$e_pid] != $pid) &&
+ ($pid) && ($iter == 0));
+ next if (($parsed_line[$e_mask] != $tracemasks->{RPCTRACE})
+ && ($rpctrace));
+ next if ($trace && $parsed_line[$e_mask] !=
+ $tracemasks->{TRACE});
+ next if ($nodlm && hex($parsed_line[$e_subsys]) ==
+ $subsysnum->{LDLM});
+ next if ($noclass && hex($parsed_line[$e_subsys]) ==
+ $subsysnum->{CLASS});
+ next if ($nonet && (hex($parsed_line[$e_subsys]) ==
+ $subsysnum->{RPC} ||
+ hex($parsed_line[$e_subsys]) ==
+ $subsysnum->{NET} ||
+ hex($parsed_line[$e_subsys]) ==
+ $subsysnum->{PORTALS} ||
+ hex($parsed_line[$e_subsys]) ==
+ $subsysnum->{SOCKNAL} ||
+ hex($parsed_line[$e_subsys]) ==
+ $subsysnum->{QSWNAL} ||
+ hex($parsed_line[$e_subsys]) ==
+ $subsysnum->{GMNAL}));
+
+
+ if (!exists($hasharray{$parsed_line[$e_pid]})) {
+ # Push a marker for the beginning of this PID
+ my @marker_line;
+ $marker_line[$e_subsys] = 0;
+ $marker_line[$e_mask] = 0;
+ $marker_line[$e_processor] = 0;
+ $marker_line[$e_time] = $parsed_line[$e_time];
+ $marker_line[$e_file] = 0;
+ $marker_line[$e_line] = 0;
+ $marker_line[$e_function] = 0;
+ $marker_line[$e_pid] = $parsed_line[$e_pid];
+ # marker lines are everyone's parent, so stack value zero
+ $marker_line[$e_stack] = 0;
+ $marker_line[$e_fmtstr] = "";
+ $marker_line[$e_treeparent] = 0;
+ $marker_line[$e_numchildren] = 0;
+ $marker_line[$e_youngestchild] = 0;
+ $marker_line[$e_pidhead] = 0;
+ $marker_line[$e_next]= \@parsed_line;
+ $marker_line[$e_backref] = 0;
+ $hasharray{$parsed_line[$e_pid]} = \@marker_line;
+ push @$array_parsed, [ @marker_line ];
+
+ }
+ push @$array_parsed, [ @parsed_line ];
+ }
+
+ }
+ close(FILEHANDLE);
+ if ($iter == 0) {
+ # Insert end of client line marker, an all zero pattern;
+ @marker_line = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ push @$array_parsed, [ @marker_line ];
+
+ }
+ $iter ++;
+ }
+
+ $array_parsed=create_links($array_parsed, \%hasharray, $stitch_ref);
+ #print_array($array_parsed);
+ return $array_parsed;
+}
+
+sub print_array {
+
+ my $arrayref = shift;
+ foreach $lineref(@$arrayref){
+ if ($lineref->[$e_backref]==0){
+ print "MARKER LINE(addr): $lineref contents: [@$lineref]\n";
+ } else {
+
+ print "REGULAR LINE (addr) :$lineref contents:[@$lineref]\n";
+ }
+ }
+
+}
+
+sub print_rpcrelations {
+
+ my $rpchashref = shift;
+ foreach $rpckeys (sort keys %$rpchashref) {
+ $tmpref = $rpchashref->{$rpckeys};
+ #print "Key: $rpckeys, Contents: @$tmpref\n";
+
+ }
+
+}
+sub match_rpcs {
+ my $rpchashref = shift;
+ foreach $rpckeys (sort keys %$rpchashref) {
+ $tmpref = $rpchashref->{$rpckeys};
+ #print "MATCHING: $@tmpref...\n";
+ foreach $cmpkeys (sort keys %$rpchashref) {
+ next if($cmpkeys == $rpckeys);
+ $cmpref = $rpchashref->{$cmpkeys};
+ # print "Line compared: @$cmpref\n";
+ next if ($tmpref->[$e_rpcsndrcv] == $cmpref->[$e_rpcsndrcv]);
+ next if ($tmpref->[$e_rpcpid] != $cmpref->[$e_rpcpid]);
+ next if ($tmpref->[$e_rpcxid] != $cmpref->[$e_rpcxid]);
+ if ($tmpref->[$e_rpcsndrcv] == $SEND) {
+ $tmpref->[$e_rpcnext] = $cmpkeys;
+ #print "MACTHED: KEY 1: $rpckeys CONTENTS: @$tmpref",
+ #"KEY2: $cmpkeys CONTENTS: @$cmpref\n"
+
+ }
+
+ }
+
+ }
+
+}
+
+sub getnextchild {
+ my $rootline = shift;
+ my $lineref = shift;
+ my $tempref = $lineref->[$e_next];
+ if ($tempref == 0) {
+ return 0;
+ }
+
+ if (($tempref->[$e_stack] > $rootline->[$e_stack]) ||
+ (($tempref->[$e_stack] <= $rootline->[$e_stack]) &&
+ ($tempref->[$e_function] == $rootline->[$e_function])
+ )){
+ # Child
+ return $tempref;
+
+ }
+ return 0;
+
+
+}
+
+
+sub parse_foptions {
+
+ my $inarg = shift;
+ my $idx = 0;
+ foreach $elem(@$inarg) {
+ next if ($elem =~ /$FILEOPTIONREGEX/);
+ $filearray[$idx] = $elem;
+ $idx++;
+ }
+ return \@filearray;
+}
+
+1;
+#$array_parsed=parse_file();
+#print_array($array_parsed);
-add ost
--node node_name
- --obd obd_name
+ --ost ost_name
--lov lov_name
--dev path
--size size
--fstype extN|ext3
- --obduuid uuid
+ --ostuuid uuid
--add mtpt - Mountpoint
--node node_name
--path /mnt/point
--mds mds_name
- --obd obd_name OR --lov lovname
+ --ost ost_name OR --lov lov_name
"""
sys.exit(1)
ldlm = self.newService("ldlm", name, uuid)
return ldlm
- def obd(self, name, uuid, fs, obdtype, devname, format, ost_uuid, dev_size=0):
- obd = self.newService("obd", name, uuid)
- obd.setAttribute('obdtype', obdtype)
- obd.appendChild(self.ref("active", ost_uuid))
+ def osd(self, name, uuid, fs, osdtype, devname, format, ost_uuid, net_uuid, dev_size=0):
+ osd = self.newService("osd", name, uuid)
+ osd.setAttribute('osdtype', osdtype)
+ osd.appendChild(self.ref("target", ost_uuid))
+ osd.appendChild(self.ref("network", net_uuid))
if fs:
- self.addElement(obd, "fstype", fs)
+ self.addElement(osd, "fstype", fs)
if devname:
- dev = self.addElement(obd, "devpath", devname)
- self.addElement(obd, "autoformat", format)
+ dev = self.addElement(osd, "devpath", devname)
+ self.addElement(osd, "autoformat", format)
if dev_size:
- self.addElement(obd, "devsize", "%s" % (dev_size))
- return obd
+ self.addElement(osd, "devsize", "%s" % (dev_size))
+ return osd
def cobd(self, name, uuid, real_uuid, cache_uuid):
cobd = self.newService("cobd", name, uuid)
cobd.appendChild(self.ref("cacheobd",cache_uuid))
return cobd
- def ost(self, name, uuid, obd_uuid, net_uuid):
+ def ost(self, name, uuid, osd_uuid):
ost = self.newService("ost", name, uuid)
- ost.appendChild(self.ref("network", net_uuid))
- ost.appendChild(self.ref("obd", obd_uuid))
+ ost.appendChild(self.ref("active", osd_uuid))
return ost
+ def oss(self, name, uuid):
+ oss = self.newService("oss", name, uuid)
+ return oss
+
def lov(self, name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern):
lov = self.newService("lov", name, uuid)
lov.appendChild(self.ref("mds", mds_uuid))
- lov.setAttribute("stripesize", stripe_sz)
- lov.setAttribute("stripecount", stripe_cnt)
- lov.setAttribute("stripepattern", pattern)
+ lov.setAttribute("stripesize", str(stripe_sz))
+ lov.setAttribute("stripecount", str(stripe_cnt))
+ lov.setAttribute("stripepattern", str(pattern))
return lov
def lovconfig(self, name, uuid, lov_uuid):
if dev_size:
self.addElement(mdd, "devsize", "%s" % (dev_size))
mdd.appendChild(self.ref("network", net_uuid))
- mdd.appendChild(self.ref("mds", mds_uuid))
+ mdd.appendChild(self.ref("target", mds_uuid))
return mdd
def mountpoint(self, name, uuid, mds_uuid, osc_uuid, path):
def lov_add_obd(gen, lov, osc_uuid):
lov.appendChild(gen.ref("obd", osc_uuid))
+def ref_exists(profile, uuid):
+ elist = profile.childNodes
+ for e in elist:
+ if e.nodeType == e.ELEMENT_NODE:
+ ref = e.getAttribute('uuidref')
+ if ref == uuid:
+ return 1
+ return 0
+
+# ensure that uuid is not already in the profile
+# return true if uuid is added
def node_add_profile(gen, node, ref, uuid):
refname = "%s_ref" % "profile"
ret = node.getElementsByTagName(refname)
error('node has no profile ref:', node)
prof_uuid = ret[0].getAttribute('uuidref')
profile = lookup(node.parentNode, prof_uuid)
+ if not profile:
+ error("no profile found:", prof_uuid)
+ if ref_exists(profile, uuid):
+ return 0
profile.appendChild(gen.ref(ref, uuid))
+ return 1
def get_attr(dom_node, attr, default=""):
v = dom_node.getAttribute(attr)
nid = get_option(options, 'nid')
net_type = get_option(options, 'nettype')
- if net_type == 'tcp':
+ if net_type in ('tcp', 'toe'):
port = get_option_int(options, 'port', DEFAULT_PORT)
tcpbuf = get_option_int(options, 'tcpbuf', 0)
elif net_type in ('elan', 'gm'):
def add_ost(gen, lustre, options):
node_name = get_option(options, 'node')
lovname = get_option(options, 'lov', '')
- obdtype = get_option(options, 'obdtype', 'obdfilter')
+ osdtype = get_option(options, 'osdtype', 'obdfilter', deprecated_tag="obdtype")
- if obdtype == 'obdecho':
+ if osdtype == 'obdecho':
fstype = ''
devname = ''
size = 0
size = get_option(options, 'size', 0)
fstype = get_option(options, 'fstype', 'extN')
- obdname = get_option(options, 'obd', 'OBD_'+ node_name)
- obdname = new_name(obdname)
- ostname = new_name('OST_'+ obdname)
- if options.has_key('obduuid'):
- obd_uuid = options['obduuid']
- obd = lookup(lustre, obd_uuid)
- if obd:
- error("Duplicate OBD UUID:", obd_uuid)
+ ostname = get_option(options, 'ost', '', deprecated_tag='obd')
+ if not ostname:
+ ostname = new_name('OST_'+ node_name)
+
+ osdname = new_name("OSD_" + ostname)
+ osd_uuid = get_option(options, 'osduuid', '', deprecated_tag = 'obduuid')
+ if osd_uuid and lookup(lustre, osd_uuid):
+ error("Duplicate OBD UUID:", osd_uuid)
else:
- obd_uuid = new_uuid(obdname)
- ost_uuid = new_uuid(ostname)
+ osd_uuid = new_uuid(osdname)
+
+ ost_uuid = name2uuid(lustre, ostname, fatal=0)
+ if not ost_uuid:
+ ost_uuid = new_uuid(ostname)
+ ost = gen.ost(ostname, ost_uuid, osd_uuid)
+ lustre.appendChild(ost)
+ if lovname:
+ lov = findByName(lustre, lovname, "lov")
+ if not lov:
+ error('add_ost:', '"'+lovname+'"', "lov element not found.")
+ lov_add_obd(gen, lov, ost_uuid)
net_uuid = get_net_uuid(lustre, node_name)
if not net_uuid:
- error("NODE: ", node_name, "not found")
+ error("NODE: No net network interface for", node_name, "found")
- obd = gen.obd(obdname, obd_uuid, fstype, obdtype, devname, get_format_flag(options), ost_uuid,
- size)
- ost = gen.ost(ostname, ost_uuid, obd_uuid, net_uuid)
-
- if lovname:
- lov = findByName(lustre, lovname, "lov")
- if not lov:
- error('add_ost:', '"'+lovname+'"', "lov element not found.")
- lov_add_obd(gen, lov, obd_uuid)
+ osd = gen.osd(osdname, osd_uuid, fstype, osdtype, devname, get_format_flag(options), ost_uuid,
+ net_uuid, size)
node = findByName(lustre, node_name, "node")
- node_add_profile(gen, node, 'obd', obd_uuid)
- node_add_profile(gen, node, 'ost', ost_uuid)
- lustre.appendChild(obd)
- lustre.appendChild(ost)
+## if node_add_profile(gen, node, 'oss', oss_uuid):
+## ossname = 'OSS'
+## oss_uuid = new_uuid(ossname)
+## oss = gen.oss(ossname, oss_uuid)
+## lustre.appendChild(oss)
+
+ node_add_profile(gen, node, 'osd', osd_uuid)
+ lustre.appendChild(osd)
def add_cobd(gen, lustre, options):
def add_echo_client(gen, lustre, options):
""" add an echo client to the profile for this node. """
node_name = get_option(options, 'node')
- lov_name = get_option(options, 'obd')
+ lov_name = get_option(options, 'ost')
node = findByName(lustre, node_name, 'node')
lov_uuid = name2uuid(lustre, lov_name, tag='lov', fatal=0)
if not lov_uuid:
- lov_uuid = name2uuid(lustre, lov_name, tag='obd', fatal=1)
+ lov_uuid = name2uuid(lustre, lov_name, tag='ost', fatal=1)
echo = gen.echo_client(echoname, echo_uuid, lov_uuid)
lustre.appendChild(echo)
warning("name:", lov_orig, "already used. using:", name)
mds_name = get_option(options, 'mds')
- stripe_sz = get_option(options, 'stripe_sz')
- stripe_cnt = get_option(options, 'stripe_cnt', 0)
- pattern = get_option(options, 'stripe_pattern', 0)
+ stripe_sz = get_option_int(options, 'stripe_sz')
+ stripe_cnt = get_option_int(options, 'stripe_cnt', 0)
+ pattern = get_option_int(options, 'stripe_pattern', 0)
uuid = new_uuid(name)
ret = findByName(lustre, name, "lov")
mds_name = get_option(options, 'mds')
lov_name = get_option(options, 'lov', '')
if lov_name == '':
- lov_name = get_option(options, 'obd', '')
+ lov_name = get_option(options, 'ost', '', deprecated_tag='obd')
if lov_name == '':
- error("--add mtpt requires either --lov lov_name or --obd obd_name")
+ error("--add mtpt requires either --lov lov_name or --ost ost_name")
name = new_name('MNT_'+ node_name)
mds_uuid = name2uuid(lustre, mds_name, tag='mds')
lov_uuid = name2uuid(lustre, lov_name, tag='lov', fatal=0)
if not lov_uuid:
- lov_uuid = name2uuid(lustre, lov_name, tag='obd', fatal=1)
+ lov_uuid = name2uuid(lustre, lov_name, tag='ost', fatal=1)
uuid = new_uuid(name)
mtpt = gen.mountpoint(name, uuid, mds_uuid, lov_uuid, path)
node_add_profile(gen, node, "mountpoint", uuid)
lustre.appendChild(mtpt)
+# obsolete, leaving behind for reference
def add_oscref(gen, lustre, options):
""" create mtpt on a node """
node_name = get_option(options, 'node')
return 1
return 0
-def get_option(options, tag, default = None):
+def get_option(options, tag, default = None, deprecated_tag=None):
"""Look for tag in options hash and return the value if set. If not
set, then if return default it is set, otherwise exception."""
if options.has_key(tag):
return options[tag]
+ elif deprecated_tag and options.has_key(deprecated_tag):
+ warning('--'+deprecated_tag, " is deprecated, please use:", '--'+tag)
+ return options[deprecated_tag]
elif default != None:
return default
else:
- raise OptionError("--add %s requires --%s value" % (options['add'], tag))
+ raise OptionError("--add %s requires --%s <value>" % (options['add'], tag))
# this exception should print an error like '--add blah requires --<tag> value'
def get_option_int(options, tag, default = None):
"""Return an integer option. Raise exception if the value is not an int"""
val = get_option(options, tag, default)
- return int(val)
+ try:
+ n = int(val)
+ except ValueError:
+ raise OptionError("--%s <num> (value must be integer)" % (tag))
+ return n
def parse_cmdline(argv):
short_opts = "ho:i:m:"
long_opts = ["add=", "node=", "nettype=", "nid=", "tcpbuf=", "port=",
"echo_client=", "stripe_sz=", "stripe_cnt=", "stripe_pattern=",
"mds=", "route", "router", "merge=", "format", "reformat", "output=",
- "dev=", "size=", "obd=", "obdtype=", "obduuid=", "in=",
- "path=", "help", "batch=", "lov=", "gw=", "lo=", "hi=",
- "oscref", "osc=", "real_obd=", "cache_obd=", "fstype=",
+ "dev=", "size=", "obd=", "ost=", "obdtype=", "osdtype=", "obduuid=", "in=",
+ "osduuid=", "path=", "help", "batch=", "lov=", "gw=", "lo=", "hi=",
+ "osc=", "real_obd=", "cache_obd=", "fstype=",
"timeout=", "recovery_upcall="]
opts = []
args = []
options['mds'] = a
if o == "--obd":
options['obd'] = a
+ if o == "--ost":
+ options['ost'] = a
# node options
if o == "--timeout":
options['osc'] = a
if o == "--obdtype":
options['obdtype'] = a
+ if o == "--osdtype":
+ options['osdtype'] = a
if o == "--fstype":
options['fstype'] = a
if o == "--obduuid":
options['obduuid'] = a
+ if o == "--osduuid":
+ options['osduuid'] = a
# lov options
if o == "--stripe_sz":
if o == "--format":
options['format'] = 1
if o == "--reformat":
+ warning("the lmc --reformat option is not supported. Use lconf --reformat")
options['reformat'] = 1
if o == "--batch":
options['batch'] = a
add_node(gen, lustre, options)
elif devtype == 'echo_client':
add_echo_client(gen, lustre, options)
- elif devtype == 'oscref':
- add_oscref(gen, lustre, options)
elif devtype == 'cobd':
add_cobd(gen, lustre, options)
else:
/****************** Functions ******************/
-void usage(char *pgm)
+void usage(char *prog)
{
- fprintf(stderr, "usage: %s <filename> <stripe size> <start stripe> <stripe count>\n", pgm);
-
- fprintf(stderr, "\tstripe size: number of bytes in each stripe\n");
- fprintf(stderr, "\tstripe start: OST index which holds first stripe\n");
- fprintf(stderr, "\tstripe count: number of OSTs to stripe over\n");
+ fprintf(stderr, "usage: %s <filename> <stripe size> <stripe start> "
+ "<stripe count>\n", prog);
+
+ fprintf(stderr,
+ "\tstripe size: number of bytes in each stripe (0 default)\n");
+ fprintf(stderr,
+ "\tstripe start: OST index of first stripe (-1 default)\n");
+ fprintf(stderr,
+ "\tstripe count: number of OSTs to stripe over (0 default)\n");
}
int create_file(char *name, long stripe_size, int stripe_offset,
long st_size;
int st_offset,
st_count;
+ char *end;
/* Check to make sure we have enough parameters */
if (argc != 5) {
usage(argv[0]);
- return(-1);
+ return 1;
}
/* Get the stripe size */
- st_size = atol(argv[2]);
+ st_size = strtoul(argv[2], &end, 0);
+ if (*end != '\0') {
+ fprintf(stderr, "bad stripe size '%s'\n", argv[2]);
+ usage(argv[0]);
+ return 2;
+ }
+
+ /*
+ if (st_size & 4095) {
+ fprintf(stderr, "stripe size must be multiple of page size\n");
+ usage(argv[0]);
+ return 3;
+ }
+ */
/* Get the stripe offset*/
- st_offset = atoi(argv[3]);
+ st_offset = strtoul(argv[3], &end, 0);
+ if (*end != '\0') {
+ fprintf(stderr, "bad stripe offset '%s'\n", argv[3]);
+ usage(argv[0]);
+ return 4;
+ }
/* Get the stripe count */
- st_count = atoi(argv[4]);
+ st_count = strtoul(argv[4], &end, 0);
+ if (*end != '\0') {
+ fprintf(stderr, "bad stripe count '%s'\n", argv[4]);
+ usage(argv[0]);
+ return 5;
+ }
/* Create the file, as specified. Return and display any errors. */
result = create_file(argv[1], st_size, st_offset, st_count);
#include <stdio.h>
#include <stdarg.h>
#include <signal.h>
-#define printk printf
#include <linux/lustre_lib.h>
#include <linux/lustre_idl.h>
int max = sizeof(rawbuf);
static int thread;
-static struct lov_stripe_md saved_lsm;
-static char lsm_valid = 0;
+
+union lsm_buffer {
+ char space [4096];
+ struct lov_stripe_md lsm;
+} lsm_buffer;
static int getfd(char *func);
static char *cmdname(char *func);
return ret;
}
+static char *
+lsm_string (struct lov_stripe_md *lsm)
+{
+ static char buffer[4096];
+ char *p = buffer;
+ int space = sizeof (buffer);
+ int i;
+ int nob;
+
+ *p = 0;
+ space--;
+
+ nob = snprintf(p, space, LPX64, lsm->lsm_object_id);
+ p += nob;
+ space -= nob;
+
+ if (lsm->lsm_stripe_count != 0) {
+ nob = snprintf (p, space, "=%u#%u@%d",
+ lsm->lsm_stripe_size,
+ lsm->lsm_stripe_count,
+ lsm->lsm_stripe_offset);
+ p += nob;
+ space -= nob;
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ nob = snprintf (p, space, ":"LPX64,
+ lsm->lsm_oinfo[i].loi_id);
+ p += nob;
+ space -= nob;
+ }
+ }
+
+ if (space == 0) { /* probable overflow */
+ fprintf (stderr, "lsm_string() overflowed buffer\n");
+ abort ();
+ }
+
+ return (buffer);
+}
+
+static void
+reset_lsmb (union lsm_buffer *lsmb)
+{
+ memset (lsmb->space, 0, sizeof (lsmb->space));
+ lsmb->lsm.lsm_magic = LOV_MAGIC;
+
+}
+
+static int
+parse_lsm (union lsm_buffer *lsmb, char *string)
+{
+ struct lov_stripe_md *lsm = &lsmb->lsm;
+ char *end;
+ int i;
+
+ /*
+ * object_id[=size#count[@offset][:id]*]
+ */
+
+ reset_lsmb (lsmb);
+
+ lsm->lsm_object_id = strtoull (string, &end, 0);
+ if (end == string)
+ return (-1);
+ string = end;
+
+ if (*string == 0)
+ return (0);
+
+ if (*string != '=')
+ return (-1);
+ string++;
+
+ lsm->lsm_stripe_size = strtoul (string, &end, 0);
+ if (end == string)
+ return (-1);
+ string = end;
+
+ if (*string != '#')
+ return (-1);
+ string++;
+
+ lsm->lsm_stripe_count = strtoul (string, &end, 0);
+ if (end == string)
+ return (-1);
+ string = end;
+
+ if (*string == '@') {
+ string++;
+ lsm->lsm_stripe_offset = strtol (string, &end, 0);
+ if (end == string)
+ return (-1);
+ string = end;
+ }
+
+ if (*string == 0) /* don't have to specify obj ids */
+ return (0);
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ if (*string != ':')
+ return (-1);
+ string++;
+ lsm->lsm_oinfo[i].loi_id = strtoull (string, &end, 0);
+ string = end;
+ }
+
+ if (*string != 0)
+ return (-1);
+
+ return (0);
+}
+
static char *cmdname(char *func)
{
static char buf[512];
do_disconnect(argv[0], 1);
-#warning TODO: implement timeout per lctl usage for probe
+ /* XXX TODO: implement timeout per lctl usage for probe */
if (argc != 1)
return CMD_HELP;
int jt_obd_detach(int argc, char **argv)
{
struct obd_ioctl_data data;
+ int rc;
+
+ IOCINIT(data);
+
+ if (argc != 1)
+ return CMD_HELP;
+
+ rc = ioctl(fd, OBD_IOC_DETACH, buf);
+ if (rc < 0)
+ fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
+ strerror(rc = errno));
+
+ return rc;
+}
+
+int jt_obd_cleanup(int argc, char **argv)
+{
+ struct obd_ioctl_data data;
char force = 'F';
int rc;
return CMD_HELP;
if (argc == 2) {
+ if (strcmp(argv[1], "force"))
+ return CMD_HELP;
data.ioc_inllen1 = 1;
data.ioc_inlbuf1 = &force;
}
IOC_PACK(argv[0], data);
- rc = ioctl(fd, OBD_IOC_DETACH, buf);
+ rc = ioctl(fd, OBD_IOC_CLEANUP, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
return rc;
}
-int jt_obd_cleanup(int argc, char **argv)
+int jt_obd_no_transno(int argc, char **argv)
{
struct obd_ioctl_data data;
int rc;
if (argc != 1)
return CMD_HELP;
- rc = ioctl(fd, OBD_IOC_CLEANUP, &data);
+ rc = ioctl(fd, OBD_IOC_NO_TRANSNO, &data);
+ if (rc < 0)
+ fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
+ strerror(rc = errno));
+
+ return rc;
+}
+
+int jt_obd_set_readonly(int argc, char **argv)
+{
+ struct obd_ioctl_data data;
+ int rc;
+
+ IOCINIT(data);
+
+ if (argc != 1)
+ return CMD_HELP;
+
+ rc = ioctl(fd, OBD_IOC_SET_READONLY, &data);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
return rc;
}
-/* The ioctl API has been extended to provide the LOV stripe metadata to the
- * caller when applicable. This utility, however, only saves the LSM for the
- * latest CREATE. It only saves the LSM when the ioctl indicates that it
- * is valid by overloading 'ioc_conn2' as a boolean. */
+/* Get echo client's stripe meta-data for the given object
+ */
+int jt_obd_get_stripe (int argc, char **argv)
+{
+ struct obd_ioctl_data data;
+ __u64 id;
+ int rc;
+ char *end;
+
+ if (argc != 2)
+ return (CMD_HELP);
+
+ id = strtoull (argv[1], &end, 0);
+ if (*end) {
+ fprintf (stderr, "Error: %s: invalid object id '%s'\n",
+ cmdname (argv[0]), argv[1]);
+ return (CMD_HELP);
+ }
+
+ memset (&lsm_buffer, 0, sizeof (lsm_buffer));
+
+ IOCINIT (data);
+ data.ioc_obdo1.o_id = id;
+ data.ioc_obdo1.o_mode = S_IFREG | 0644;
+ data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
+ data.ioc_pbuf1 = (char *)&lsm_buffer;
+ data.ioc_plen1 = sizeof (lsm_buffer);
+
+ IOC_PACK(argv[0], data);
+ rc = ioctl(fd, ECHO_IOC_GET_STRIPE, buf);
+ IOC_UNPACK(argv[0], data);
+
+ if (rc != 0) {
+ fprintf (stderr, "Error: %s: rc %d(%s)\n",
+ cmdname (argv[0]), rc, strerror (errno));
+ return (rc);
+ }
+
+ printf ("%s\n", lsm_string (&lsm_buffer.lsm));
+
+ return (rc);
+}
+
+/* Set stripe meta-data for 1 or more objects. Object must be new to
+ * this echo client instance.
+ */
+int jt_obd_set_stripe (int argc, char **argv)
+{
+ struct obd_ioctl_data data;
+ char *end;
+ int count = 1;
+ int i;
+ int rc;
+
+ if (argc < 2 || argc > 3)
+ return CMD_HELP;
+
+ rc = parse_lsm (&lsm_buffer, argv[1]);
+ if (rc != 0) {
+ fprintf (stderr, "error: %s: invalid object '%s'\n",
+ cmdname (argv[0]), argv[1]);
+ return CMD_HELP;
+ }
+
+ if (argc > 2) {
+ count = strtol (argv[2], &end, 0);
+ if (*end != 0) {
+ fprintf (stderr, "error: %s: invalid count '%s'\n",
+ cmdname (argv[0]), argv[1]);
+ return CMD_HELP;
+ }
+ }
+
+ for (i = 0; i < count; i++)
+ {
+ IOCINIT (data);
+ data.ioc_obdo1.o_id = lsm_buffer.lsm.lsm_object_id + i;
+ data.ioc_obdo1.o_mode = S_IFREG | 0644;
+ data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
+ data.ioc_pbuf1 = (char *)&lsm_buffer;
+ data.ioc_plen1 = sizeof (lsm_buffer);
+
+ IOC_PACK (argv[0], data);
+ rc = ioctl (fd, ECHO_IOC_SET_STRIPE, buf);
+ IOC_UNPACK (argv[0], data);
+
+ if (rc != 0) {
+ fprintf (stderr, "Error: %s: rc %d(%s)\n",
+ cmdname (argv[0]), rc, strerror (errno));
+ return (rc);
+ }
+ }
+
+ return (0);
+}
+
+/* Clear stripe meta-data info for an object on this echo-client instance
+ */
+int jt_obd_unset_stripe (int argc, char **argv)
+{
+ struct obd_ioctl_data data;
+ char *end;
+ obd_id id;
+ int rc;
+
+ if (argc != 2)
+ return CMD_HELP;
+
+ id = strtoll (argv[1], &end, 0);
+ if (*end == 0) {
+ fprintf (stderr, "error: %s: invalid object id '%s'\n",
+ cmdname (argv[0]), argv[1]);
+ return CMD_HELP;
+ }
+
+ IOCINIT (data);
+ data.ioc_obdo1.o_id = lsm_buffer.lsm.lsm_object_id;
+ data.ioc_obdo1.o_mode = S_IFREG | 0644;
+ data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
+
+ IOC_PACK (argv[0], data);
+ rc = ioctl (fd, ECHO_IOC_SET_STRIPE, buf);
+ IOC_UNPACK (argv[0], data);
+
+ if (rc != 0)
+ fprintf (stderr, "Error: %s: rc %d(%s)\n",
+ cmdname (argv[0]), rc, strerror (errno));
+
+ return (0);
+}
+
+/* Create one or more objects, arg[1] may describe stripe meta-data. If
+ * not, defaults assumed. This echo-client instances stashes the stripe
+ * object ids. Use get_stripe on this node to print full lsm and
+ * set_stripe on another node to cut/paste between nodes.
+ */
int jt_obd_create(int argc, char **argv)
{
+ static __u64 base_id = 1;
+
struct obd_ioctl_data data;
struct timeval next_time;
__u64 count = 1, next_count;
char *end;
IOCINIT(data);
- if (argc < 2 || argc > 4)
+ if (argc < 2 || argc > 5)
return CMD_HELP;
count = strtoull(argv[1], &end, 0);
return CMD_HELP;
}
+ if (argc < 5)
+ reset_lsmb (&lsm_buffer); /* will set default */
+ else {
+ rc = parse_lsm (&lsm_buffer, argv[4]);
+ if (rc != 0) {
+ fprintf(stderr, "error: %s: invalid lsm '%s'\n",
+ cmdname(argv[0]), argv[4]);
+ return CMD_HELP;
+ }
+ base_id = lsm_buffer.lsm.lsm_object_id;
+ }
+
printf("%s: "LPD64" objects\n", cmdname(argv[0]), count);
gettimeofday(&next_time, NULL);
next_time.tv_sec -= verbose;
for (i = 1, next_count = verbose; i <= count; i++) {
data.ioc_obdo1.o_mode = mode;
- data.ioc_obdo1.o_id = i;
+ data.ioc_obdo1.o_id = base_id++;
data.ioc_obdo1.o_uid = 0;
data.ioc_obdo1.o_gid = 0;
data.ioc_obdo1.o_valid = OBD_MD_FLTYPE | OBD_MD_FLMODE |
- OBD_MD_FLID | OBD_MD_FLUID | OBD_MD_FLGID;;
+ OBD_MD_FLID | OBD_MD_FLUID | OBD_MD_FLGID;
- data.ioc_inllen1 = sizeof(saved_lsm);
- data.ioc_inlbuf1 = (char *)&saved_lsm;
+ data.ioc_plen1 = sizeof (lsm_buffer);
+ data.ioc_pbuf1 = (char *)&lsm_buffer;
IOC_PACK(argv[0], data);
rc = ioctl(fd, OBD_IOC_CREATE, buf);
IOC_UNPACK(argv[0], data);
- fprintf(stderr, "lsm->lsm_o_id: "LPX64"\n",
- saved_lsm.lsm_object_id);
SHMEM_BUMP();
if (rc < 0) {
fprintf(stderr, "error: %s: #%d - %s\n",
break;
}
- lsm_valid = data.ioc_conn2;
-
if (be_verbose(verbose, &next_time, i, &next_count, count))
printf("%s: #%d is object id "LPX64"\n",
cmdname(argv[0]), i, data.ioc_obdo1.o_id);
}
data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
- if (lsm_valid == 1) {
- data.ioc_inllen1 = sizeof(saved_lsm);
- data.ioc_inlbuf1 = (char *)&saved_lsm;
- }
-
IOC_PACK(argv[0], data);
rc = ioctl(fd, OBD_IOC_SETATTR, buf);
if (rc < 0)
data.ioc_obdo1.o_mode = S_IFREG | 0644;
data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
- data.ioc_inllen1 = sizeof(saved_lsm);
- data.ioc_inlbuf1 = (char *)&saved_lsm;
-
IOC_PACK(argv[0], data);
rc = ioctl(fd, OBD_IOC_DESTROY, buf);
IOC_UNPACK(argv[0], data);
cmdname(argv[0]), id, strerror(rc = errno));
break;
}
- lsm_valid = 0;
if (be_verbose(verbose, &next_time, i, &next_count, count))
printf("%s: #%d is object id "LPX64"\n",
data.ioc_obdo1.o_valid = 0xffffffff;
printf("%s: object id "LPX64"\n", cmdname(argv[0]),data.ioc_obdo1.o_id);
- if (lsm_valid == 1) {
- data.ioc_inllen1 = sizeof(saved_lsm);
- data.ioc_inlbuf1 = (char *)&saved_lsm;
- }
-
IOC_PACK(argv[0], data);
rc = ioctl(fd, OBD_IOC_GETATTR, buf);
IOC_UNPACK(argv[0], data);
data.ioc_count = len;
data.ioc_offset = thr_offset * len * count;
- if (lsm_valid == 1) {
- data.ioc_inllen1 = sizeof(saved_lsm);
- data.ioc_inlbuf1 = (char *)&saved_lsm;
- }
-
gettimeofday(&start, NULL);
next_time.tv_sec = start.tv_sec - verbose;
next_time.tv_usec = start.tv_usec;
{
struct obd_ioctl_data data;
struct lov_desc desc;
- obd_uuid_t *uuidarray, *ptr;
+ struct obd_uuid *uuidarray, *ptr;
int rc, i;
char *end;
if (strlen(argv[1]) > sizeof(desc.ld_uuid) - 1) {
fprintf(stderr,
- "error: %s: LOV uuid '%s' longer than "LPSZ" characters\n",
+ "error: %s: LOV uuid '%s' longer than "LPSZ" chars\n",
cmdname(argv[0]), argv[1], sizeof(desc.ld_uuid) - 1);
return -EINVAL;
}
memset(&desc, 0, sizeof(desc));
- strncpy(desc.ld_uuid, argv[1], sizeof(desc.ld_uuid) - 1);
+ obd_str2uuid(&desc.ld_uuid, argv[1]);
desc.ld_tgt_count = argc - 6;
desc.ld_default_stripe_count = strtoul(argv[2], &end, 0);
if (*end) {
{
struct obd_ioctl_data data;
struct lov_desc desc;
- obd_uuid_t *uuidarray;
+ struct obd_uuid *uuidarray;
char *path;
int rc, tmpfd;
}
memset(&desc, 0, sizeof(desc));
- strncpy(desc.ld_uuid, argv[1], sizeof(desc.ld_uuid) - 1);
+ obd_str2uuid(&desc.ld_uuid, argv[1]);
desc.ld_tgt_count = DEF_UUID_ARRAY_LEN;
repeat:
uuidarray = calloc(desc.ld_tgt_count, sizeof(*uuidarray));
fprintf(stderr, "error: %s: ioctl error: %s\n",
cmdname(argv[0]), strerror(rc = errno));
} else {
- obd_uuid_t *ptr;
+ struct obd_uuid *ptr;
int i;
if (obd_ioctl_unpack(&data, buf, max)) {
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Eric Barton <eeb@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "obdiolib.h"
+
+int
+parse_kmg (uint64_t *valp, char *str)
+{
+ uint64_t val;
+ char mod[32];
+
+ switch (sscanf (str, LPU64"%1[gGmMkK]", &val, mod))
+ {
+ default:
+ return (-1);
+
+ case 1:
+ *valp = val;
+ return (0);
+
+ case 2:
+ switch (*mod)
+ {
+ case 'g':
+ case 'G':
+ *valp = val << 30;
+ return (0);
+
+ case 'm':
+ case 'M':
+ *valp = val << 20;
+ return (0);
+
+ case 'k':
+ case 'K':
+ *valp = val << 10;
+ return (0);
+
+ default:
+ *valp = val;
+ return (0);
+ }
+ }
+}
+
+void
+usage (char *cmdname, int help)
+{
+ char *name = strrchr (cmdname, '/');
+
+ if (name == NULL)
+ name = cmdname;
+
+ fprintf (help ? stdout : stderr,
+ "usage: %s -d device -s size -o offset [-i id][-n reps][-l] oid\n",
+ name);
+}
+
+int
+exponential_modulus (int i, int base)
+{
+ int top = base;
+ int mod = 1;
+
+ for (;;) {
+ if (i < top)
+ return (i%mod == 0);
+
+ mod = top;
+ top *= base;
+ }
+}
+
+int
+main (int argc, char **argv)
+{
+ uint64_t bid = (((uint64_t)gethostid()) << 32) | getpid ();
+ int set_bid = 0;
+ uint64_t oid;
+ int setup = 0;
+ int device = -1;
+ int npeers = 0;
+ int reps = 1;
+ char hostname[128];
+ struct obdio_conn *conn;
+ struct obdio_barrier *b;
+ char *end;
+ uint64_t val;
+ int rc;
+ int c;
+
+ setvbuf (stdout, NULL, _IOLBF, 0);
+ memset (hostname, 0, sizeof (hostname));
+ gethostname (hostname, sizeof (hostname));
+ hostname[sizeof(hostname) - 1] = 0;
+
+ while ((c = getopt (argc, argv, "hsi:d:n:p:")) != -1)
+ switch (c) {
+ case 'h':
+ usage (argv[0], 1);
+ return (0);
+
+ case 'i':
+ bid = strtoll (optarg, &end, 0);
+ if (end == optarg || *end != 0) {
+ fprintf (stderr, "Can't parse id %s\n",
+ optarg);
+ return (1);
+ }
+ set_bid = 1;
+ break;
+
+ case 's':
+ setup = 1;
+ break;
+
+ case 'd':
+ device = strtol (optarg, &end, 0);
+ if (end == optarg || *end != 0 || device < 0) {
+ fprintf (stderr, "Can't parse device %s\n",
+ optarg);
+ return (1);
+ }
+ break;
+
+ case 'n':
+ if (parse_kmg (&val, optarg) != 0) {
+ fprintf (stderr, "Can't parse reps %s\n",
+ optarg);
+ return (1);
+ }
+ reps = (int)val;
+ break;
+
+ case 'p':
+ npeers = strtol (optarg, &end, 0);
+ if (end == optarg || *end != 0 || npeers <= 0) {
+ fprintf (stderr, "Can't parse npeers %s\n",
+ optarg);
+ return (1);
+ }
+ break;
+
+ default:
+ usage (argv[0], 0);
+ return (1);
+ }
+
+ if ((!setup && !set_bid) ||
+ npeers <= 0 ||
+ device < 0 ||
+ optind == argc) {
+ fprintf (stderr, "%s not specified\n",
+ (!setup && !set_bid) ? "id" :
+ npeers <= 0 ? "npeers" :
+ device < 0 ? "device" : "object id");
+ return (1);
+ }
+
+ oid = strtoull (argv[optind], &end, 0);
+ if (end == argv[optind] || *end != 0) {
+ fprintf (stderr, "Can't parse object id %s\n",
+ argv[optind]);
+ return (1);
+ }
+
+ conn = obdio_connect (device);
+ if (conn == NULL)
+ return (1);
+
+ b = obdio_new_barrier (oid, bid, npeers);
+ if (b == NULL)
+ return (1);
+
+ rc = 0;
+ if (setup) {
+ rc = obdio_setup_barrier (conn, b);
+ if (rc == 0)
+ printf ("Setup barrier: -d %d -i "LPX64" -p %d -n1 "LPX64"\n",
+ device, bid, npeers, oid);
+ } else {
+ for (c = 0; c < reps; c++) {
+ rc = obdio_barrier (conn, b);
+ if (rc != 0)
+ break;
+ if (exponential_modulus (c, 10))
+ printf ("%s: Barrier %d\n", hostname, c);
+ }
+ }
+
+ free (b);
+
+ obdio_disconnect (conn);
+
+ return (rc == 0 ? 0 : 1);
+}
+
+
int jt_obd_disconnect(int argc, char **argv);
int jt_obd_detach(int argc, char **argv);
int jt_obd_cleanup(int argc, char **argv);
+int jt_obd_no_transno(int argc, char **argv);
+int jt_obd_set_readonly(int argc, char **argv);
int jt_obd_newdev(int argc, char **argv);
int jt_obd_list(int argc, char **argv);
int jt_obd_attach(int argc, char **argv);
int jt_obd_getattr(int argc, char **argv);
int jt_obd_test_getattr(int argc, char **argv);
int jt_obd_test_brw(int argc, char **argv);
+int jt_obd_get_stripe(int argc, char **argv);
+int jt_obd_set_stripe(int argc, char **argv);
+int jt_obd_unset_stripe(int argc, char **argv);
int jt_obd_lov_setconfig(int argc, char **argv);
int jt_obd_lov_getconfig(int argc, char **argv);
int jt_obd_test_ldlm(int argc, char **argv);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Eric Barton <eeb@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "obdiolib.h"
+
+int
+obdio_test_fixed_extent (struct obdio_conn *conn,
+ uint32_t myhid, uint32_t mypid,
+ int reps, int locked, uint64_t oid,
+ uint64_t offset, uint32_t size)
+{
+ struct lustre_handle fh;
+ struct lustre_handle lh;
+ void *space;
+ void *buffer;
+ uint32_t *ibuf;
+ int i;
+ int j;
+ int rc;
+ int rc2;
+
+ rc = obdio_open (conn, oid, &fh);
+ if (rc != 0) {
+ fprintf (stderr, "Failed to open object "LPX64": %s\n",
+ oid, strerror (errno));
+ return (rc);
+ }
+
+ buffer = obdio_alloc_aligned_buffer (&space, size);
+ if (buffer == NULL) {
+ fprintf (stderr, "Can't allocate buffer size %d\n", size);
+ rc = -1;
+ goto out_0;
+ }
+
+ for (i = 0; i < reps; i++) {
+ ibuf = (uint32_t *) buffer;
+ for (j = 0; j < size / (4 * sizeof (*ibuf)); j++) {
+ ibuf[0] = myhid;
+ ibuf[1] = mypid;
+ ibuf[2] = i;
+ ibuf[3] = j;
+ ibuf += 4;
+ }
+
+ if (locked) {
+ rc = obdio_enqueue (conn, oid, LCK_PW, offset, size, &lh);
+ if (rc != 0) {
+ fprintf (stderr, "Error on enqueue "LPX64" @ "LPU64" for %u: %s\n",
+ oid, offset, size, strerror (errno));
+ goto out_1;
+ }
+ }
+
+ rc = obdio_pwrite (conn, oid, buffer, size, offset);
+ if (rc != 0) {
+ fprintf (stderr, "Error writing "LPX64" @ "LPU64" for %u: %s\n",
+ oid, offset, size, strerror (errno));
+ if (locked)
+ obdio_cancel (conn, &lh);
+ rc = -1;
+ goto out_1;
+ }
+
+ memset (buffer, 0xbb, size);
+
+ rc = obdio_pread (conn, oid, buffer, size, offset);
+ if (rc != 0) {
+ fprintf (stderr, "Error reading "LPX64" @ "LPU64" for %u: %s\n",
+ oid, offset, size, strerror (errno));
+ if (locked)
+ obdio_cancel (conn, &lh);
+ rc = -1;
+ goto out_1;
+ }
+
+ if (locked) {
+ rc = obdio_cancel (conn, &lh);
+ if (rc != 0) {
+ fprintf (stderr, "Error on cancel "LPX64" @ "LPU64" for %u: %s\n",
+ oid, offset, size, strerror (errno));
+ rc = -1;
+ goto out_1;
+ }
+ }
+
+ ibuf = (uint32_t *) buffer;
+ for (j = 0; j < size / (4 * sizeof (*ibuf)); j++) {
+ if (ibuf[0] != myhid ||
+ ibuf[1] != mypid ||
+ ibuf[2] != i ||
+ ibuf[3] != j) {
+ fprintf (stderr, "Error checking "LPX64" @ "LPU64" for %u, chunk %d\n",
+ oid, offset, size, j);
+ fprintf (stderr, "Expected [%x,%x,%x,%x], got [%x,%x,%x,%x]\n",
+ myhid, mypid, i, j, ibuf[0], ibuf[1], ibuf[2], ibuf[3]);
+ rc = -1;
+ goto out_1;
+ }
+ ibuf += 4;
+ }
+ }
+ out_1:
+ free (space);
+ out_0:
+ rc2 = obdio_close (conn, oid, &fh);
+ if (rc2 != 0)
+ fprintf (stderr, "Error closing object "LPX64": %s\n",
+ oid, strerror (errno));
+ return (rc);
+}
+
+int
+parse_kmg (uint64_t *valp, char *str)
+{
+ uint64_t val;
+ char mod[32];
+
+ switch (sscanf (str, LPU64"%1[gGmMkK]", &val, mod))
+ {
+ default:
+ return (-1);
+
+ case 1:
+ *valp = val;
+ return (0);
+
+ case 2:
+ switch (*mod)
+ {
+ case 'g':
+ case 'G':
+ *valp = val << 30;
+ return (0);
+
+ case 'm':
+ case 'M':
+ *valp = val << 20;
+ return (0);
+
+ case 'k':
+ case 'K':
+ *valp = val << 10;
+ return (0);
+
+ default:
+ *valp = val;
+ return (0);
+ }
+ }
+}
+
+void
+usage (char *cmdname, int help)
+{
+ char *name = strrchr (cmdname, '/');
+
+ if (name == NULL)
+ name = cmdname;
+
+ fprintf (help ? stdout : stderr,
+ "usage: %s -d device -s size -o offset [-i id][-n reps][-l] oid\n",
+ name);
+}
+
+int
+main (int argc, char **argv)
+{
+ uint32_t mypid = getpid ();
+ uint32_t myhid = gethostid ();
+ uint64_t oid;
+ uint64_t base_offset = 0;
+ uint32_t size = 0;
+ int set_size = 0;
+ int device = -1;
+ int reps = 1;
+ int locked = 0;
+ char *end;
+ struct obdio_conn *conn;
+ uint64_t val;
+ int v1;
+ int v2;
+ int rc;
+ int c;
+
+ while ((c = getopt (argc, argv, "hi:s:o:d:n:l")) != -1)
+ switch (c) {
+ case 'h':
+ usage (argv[0], 1);
+ return (0);
+
+ case 'i':
+ switch (sscanf (optarg, "%i.%i", &v1, &v2)) {
+ case 1:
+ mypid = v1;
+ break;
+ case 2:
+ myhid = v1;
+ mypid = v2;
+ break;
+ default:
+ fprintf (stderr, "Can't parse id %s\n",
+ optarg);
+ return (1);
+ }
+ break;
+
+ case 's':
+ if (parse_kmg (&val, optarg) != 0) {
+ fprintf (stderr, "Can't parse size %s\n",
+ optarg);
+ return (1);
+ }
+ size = (uint32_t)val;
+ set_size++;
+ break;
+
+ case 'o':
+ if (parse_kmg (&val, optarg) != 0) {
+ fprintf (stderr, "Can't parse offset %s\n",
+ optarg);
+ return (1);
+ }
+ base_offset = val;
+ break;
+
+ case 'd':
+ device = strtol (optarg, &end, 0);
+ if (end == optarg || *end != 0 || device < 0) {
+ fprintf (stderr, "Can't parse device %s\n",
+ optarg);
+ return (1);
+ }
+ break;
+ case 'n':
+ if (parse_kmg (&val, optarg) != 0) {
+ fprintf (stderr, "Can't parse reps %s\n",
+ optarg);
+ return (1);
+ }
+ reps = (int)val;
+ break;
+ case 'l':
+ locked = 1;
+ break;
+ default:
+ usage (argv[0], 0);
+ return (1);
+ }
+
+ if (!set_size ||
+ device < 0 ||
+ optind == argc) {
+ fprintf (stderr, "No %s specified\n",
+ !set_size ? "size" :
+ device < 0 ? "device" : "object id");
+ return (1);
+ }
+
+ oid = strtoull (argv[optind], &end, 0);
+ if (end == argv[optind] || *end != 0) {
+ fprintf (stderr, "Can't parse object id %s\n",
+ argv[optind]);
+ return (1);
+ }
+
+ conn = obdio_connect (device);
+ if (conn == NULL)
+ return (1);
+
+ rc = obdio_test_fixed_extent (conn, myhid, mypid, reps, locked,
+ oid, base_offset, size);
+
+ obdio_disconnect (conn);
+
+ return (rc == 0 ? 0 : 1);
+}
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2003 Cluster File Systems, Inc.
+ * Author: Eric Barton <eeb@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "obdiolib.h"
+
+void
+obdio_iocinit (struct obdio_conn *conn)
+{
+ memset (&conn->oc_data, 0, sizeof (conn->oc_data));
+ conn->oc_data.ioc_version = OBD_IOCTL_VERSION;
+ conn->oc_data.ioc_addr = conn->oc_conn_addr;
+ conn->oc_data.ioc_cookie = conn->oc_conn_cookie;
+ conn->oc_data.ioc_len = sizeof (conn->oc_data);
+}
+
+int
+obdio_ioctl (struct obdio_conn *conn, int cmd)
+{
+ char *buf = conn->oc_buffer;
+ int rc;
+ int rc2;
+
+ rc = obd_ioctl_pack (&conn->oc_data, &buf, sizeof (conn->oc_buffer));
+ if (rc != 0) {
+ fprintf (stderr, "obdio_ioctl: obd_ioctl_pack: %d (%s)\n",
+ rc, strerror (errno));
+ abort ();
+ }
+
+ rc = ioctl (conn->oc_fd, cmd, buf);
+ if (rc != 0)
+ return (rc);
+
+ rc2 = obd_ioctl_unpack (&conn->oc_data, buf, sizeof (conn->oc_buffer));
+ if (rc2 != 0) {
+ fprintf (stderr, "obdio_ioctl: obd_ioctl_unpack: %d (%s)\n",
+ rc2, strerror (errno));
+ abort ();
+ }
+
+ return (rc);
+}
+
+struct obdio_conn *
+obdio_connect (int device)
+{
+ struct obdio_conn *conn;
+ int rc;
+
+ conn = malloc (sizeof (*conn));
+ if (conn == NULL) {
+ fprintf (stderr, "obdio_connect: no memory\n");
+ return (NULL);
+ }
+ memset (conn, 0, sizeof (*conn));
+
+ conn->oc_fd = open ("/dev/obd", O_RDWR);
+ if (conn->oc_fd < 0) {
+ fprintf (stderr, "obdio_connect: Can't open /dev/obd: %s\n",
+ strerror (errno));
+ goto failed;
+ }
+
+ obdio_iocinit (conn);
+ conn->oc_data.ioc_dev = device;
+ rc = obdio_ioctl (conn, OBD_IOC_DEVICE);
+ if (rc != 0) {
+ fprintf (stderr, "obdio_connect: Can't set device %d: %s\n",
+ device, strerror (errno));
+ goto failed;
+ }
+
+ obdio_iocinit (conn);
+ rc = obdio_ioctl (conn, OBD_IOC_CONNECT);
+ if (rc != 0) {
+ fprintf (stderr, "obdio_connect: Can't connect to device %d: %s\n",
+ device, strerror (errno));
+ goto failed;
+ }
+
+ conn->oc_conn_addr = conn->oc_data.ioc_addr;
+ conn->oc_conn_cookie = conn->oc_data.ioc_cookie;
+ return (conn);
+
+ failed:
+ free (conn);
+ return (NULL);
+}
+
+void
+obdio_disconnect (struct obdio_conn *conn)
+{
+ close (conn->oc_fd);
+ /* obdclass will automatically close on last ref */
+ free (conn);
+}
+
+int
+obdio_open (struct obdio_conn *conn, uint64_t oid, struct lustre_handle *fh)
+{
+ int rc;
+
+ obdio_iocinit (conn);
+
+ conn->oc_data.ioc_obdo1.o_id = oid;
+ conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
+ conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
+
+ rc = obdio_ioctl (conn, OBD_IOC_OPEN);
+
+ if (rc == 0)
+ memcpy (fh, obdo_handle(&conn->oc_data.ioc_obdo1), sizeof (*fh));
+
+ return (rc);
+}
+
+int
+obdio_close (struct obdio_conn *conn, uint64_t oid, struct lustre_handle *fh)
+{
+ obdio_iocinit (conn);
+
+
+ conn->oc_data.ioc_obdo1.o_id = oid;
+ conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
+ memcpy (obdo_handle (&conn->oc_data.ioc_obdo1), fh, sizeof (*fh));
+ conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
+ OBD_MD_FLMODE | OBD_MD_FLHANDLE;
+
+ return (obdio_ioctl (conn, OBD_IOC_CLOSE));
+}
+
+int
+obdio_pread (struct obdio_conn *conn, uint64_t oid,
+ char *buffer, uint32_t count, uint64_t offset)
+{
+ obdio_iocinit (conn);
+
+ conn->oc_data.ioc_obdo1.o_id = oid;
+ conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
+ conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
+
+ conn->oc_data.ioc_pbuf2 = buffer;
+ conn->oc_data.ioc_plen2 = count;
+ conn->oc_data.ioc_count = count;
+ conn->oc_data.ioc_offset = offset;
+
+ return (obdio_ioctl (conn, OBD_IOC_BRW_READ));
+}
+
+int
+obdio_pwrite (struct obdio_conn *conn, uint64_t oid,
+ char *buffer, uint32_t count, uint64_t offset)
+{
+ obdio_iocinit (conn);
+
+ conn->oc_data.ioc_obdo1.o_id = oid;
+ conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
+ conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
+
+ conn->oc_data.ioc_pbuf2 = buffer;
+ conn->oc_data.ioc_plen2 = count;
+ conn->oc_data.ioc_count = count;
+ conn->oc_data.ioc_offset = offset;
+
+ return (obdio_ioctl (conn, OBD_IOC_BRW_WRITE));
+}
+
+int
+obdio_enqueue (struct obdio_conn *conn, uint64_t oid,
+ int mode, uint64_t offset, uint32_t count,
+ struct lustre_handle *lh)
+{
+ int rc;
+
+ obdio_iocinit (conn);
+
+ conn->oc_data.ioc_obdo1.o_id = oid;
+ conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
+ conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
+
+ conn->oc_data.ioc_conn1 = mode;
+ conn->oc_data.ioc_count = count;
+ conn->oc_data.ioc_offset = offset;
+
+ rc = obdio_ioctl (conn, ECHO_IOC_ENQUEUE);
+
+ if (rc == 0)
+ memcpy (lh, obdo_handle (&conn->oc_data.ioc_obdo1), sizeof (*lh));
+
+ return (rc);
+}
+
+int
+obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh)
+{
+ obdio_iocinit (conn);
+
+ memcpy (obdo_handle (&conn->oc_data.ioc_obdo1), lh, sizeof (*lh));
+ conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLHANDLE;
+
+ return (obdio_ioctl (conn, ECHO_IOC_CANCEL));
+}
+
+void *
+obdio_alloc_aligned_buffer (void **spacep, int size)
+{
+ int pagesize = getpagesize();
+ void *space = malloc (size + pagesize - 1);
+
+ *spacep = space;
+ if (space == NULL)
+ return (NULL);
+
+ return ((void *)(((unsigned long)space + pagesize - 1) & ~(pagesize - 1)));
+}
+
+struct obdio_barrier *
+obdio_new_barrier (uint64_t oid, uint64_t id, int npeers)
+{
+ struct obdio_barrier *b;
+
+ b = (struct obdio_barrier *)malloc (sizeof (*b));
+ if (b == NULL) {
+ fprintf (stderr, "obdio_new_barrier "LPX64": Can't allocate\n", oid);
+ return (NULL);
+ }
+
+ b->ob_id = id;
+ b->ob_oid = oid;
+ b->ob_npeers = npeers;
+ b->ob_ordinal = 0;
+ b->ob_count = 0;
+ return (b);
+}
+
+int
+obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b)
+{
+ struct lustre_handle fh;
+ struct lustre_handle lh;
+ int rc;
+ int rc2;
+ void *space;
+ struct obdio_barrier *fileb;
+
+ if (b->ob_ordinal != 0 ||
+ b->ob_count != 0) {
+ fprintf (stderr, "obdio_setup_barrier: invalid parameter\n");
+ abort ();
+ }
+
+ rc = obdio_open (conn, b->ob_oid, &fh);
+ if (rc != 0) {
+ fprintf (stderr, "obdio_setup_barrier "LPX64": Failed to open object: %s\n",
+ b->ob_oid, strerror (errno));
+ return (rc);
+ }
+
+ fileb = (struct obdio_barrier *) obdio_alloc_aligned_buffer (&space, getpagesize ());
+ if (fileb == NULL) {
+ fprintf (stderr, "obdio_setup_barrier "LPX64": Can't allocate page buffer\n",
+ b->ob_oid);
+ rc = -1;
+ goto out_0;
+ }
+
+ memset (fileb, 0, getpagesize ());
+ *fileb = *b;
+
+ rc = obdio_enqueue (conn, b->ob_oid, LCK_PW, 0, getpagesize (), &lh);
+ if (rc != 0) {
+ fprintf (stderr, "obdio_setup_barrier "LPX64": Error on enqueue: %s\n",
+ b->ob_oid, strerror (errno));
+ goto out_1;
+ }
+
+ rc = obdio_pwrite (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+ if (rc != 0)
+ fprintf (stderr, "obdio_setup_barrier "LPX64": Error on write: %s\n",
+ b->ob_oid, strerror (errno));
+
+ rc2 = obdio_cancel (conn, &lh);
+ if (rc == 0 && rc2 != 0) {
+ fprintf (stderr, "obdio_setup_barrier "LPX64": Error on cancel: %s\n",
+ b->ob_oid, strerror (errno));
+ rc = rc2;
+ }
+ out_1:
+ free (space);
+ out_0:
+ rc2 = obdio_close (conn, b->ob_oid, &fh);
+ if (rc == 0 && rc2 != 0) {
+ fprintf (stderr, "obdio_setup_barrier "LPX64": Error on close: %s\n",
+ b->ob_oid, strerror (errno));
+ rc = rc2;
+ }
+
+ return (rc);
+}
+
+int
+obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b)
+{
+ struct lustre_handle fh;
+ struct lustre_handle lh;
+ int rc;
+ int rc2;
+ void *space;
+ struct obdio_barrier *fileb;
+ char *mode;
+
+ rc = obdio_open (conn, b->ob_oid, &fh);
+ if (rc != 0) {
+ fprintf (stderr, "obdio_barrier "LPX64": Error on open: %s\n",
+ b->ob_oid, strerror (errno));
+ return (rc);
+ }
+
+ fileb = (struct obdio_barrier *) obdio_alloc_aligned_buffer (&space, getpagesize ());
+ if (fileb == NULL) {
+ fprintf (stderr, "obdio_barrier "LPX64": Can't allocate page buffer\n",
+ b->ob_oid);
+ rc = -1;
+ goto out_0;
+ }
+
+ rc = obdio_enqueue (conn, b->ob_oid, LCK_PW, 0, getpagesize (), &lh);
+ if (rc != 0) {
+ fprintf (stderr, "obdio_barrier "LPX64": Error on PW enqueue: %s\n",
+ b->ob_oid, strerror (errno));
+ goto out_1;
+ }
+
+ memset (fileb, 0xeb, getpagesize ());
+ rc = obdio_pread (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+ if (rc != 0) {
+ fprintf (stderr, "obdio_barrier "LPX64": Error on initial read: %s\n",
+ b->ob_oid, strerror (errno));
+ goto out_2;
+ }
+
+ if (fileb->ob_id != b->ob_id ||
+ fileb->ob_oid != b->ob_oid ||
+ fileb->ob_npeers != b->ob_npeers ||
+ fileb->ob_count >= b->ob_npeers ||
+ fileb->ob_ordinal != b->ob_ordinal) {
+ fprintf (stderr, "obdio_barrier "LPX64": corrupt on initial read\n", b->ob_id);
+ fprintf (stderr, " got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
+ fileb->ob_id, fileb->ob_oid, fileb->ob_npeers,
+ fileb->ob_ordinal, fileb->ob_count);
+ fprintf (stderr, " expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
+ b->ob_id, b->ob_oid, b->ob_npeers,
+ b->ob_ordinal, b->ob_count);
+ rc = -1;
+ goto out_2;
+ }
+
+ fileb->ob_count++;
+ if (fileb->ob_count == fileb->ob_npeers) { /* I'm the last joiner */
+ fileb->ob_count = 0; /* join count for next barrier */
+ fileb->ob_ordinal++; /* signal all joined */
+ }
+
+ rc = obdio_pwrite (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+ if (rc != 0) {
+ fprintf (stderr, "obdio_barrier "LPX64": Error on initial write: %s\n",
+ b->ob_oid, strerror (errno));
+ goto out_2;
+ }
+
+ mode = "PW";
+ b->ob_ordinal++; /* now I wait... */
+ while (fileb->ob_ordinal != b->ob_ordinal) {
+
+ rc = obdio_cancel (conn, &lh);
+ if (rc != 0) {
+ fprintf (stderr, "obdio_barrier "LPX64": Error on %s cancel: %s\n",
+ b->ob_oid, mode, strerror (errno));
+ goto out_1;
+ }
+
+ mode = "PR";
+ rc = obdio_enqueue (conn, b->ob_oid, LCK_PR, 0, getpagesize (), &lh);
+ if (rc != 0) {
+ fprintf (stderr, "obdio_barrier "LPX64": Error on PR enqueue: %s\n",
+ b->ob_oid, strerror (errno));
+ goto out_1;
+ }
+
+ memset (fileb, 0xeb, getpagesize ());
+ rc = obdio_pread (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+ if (rc != 0) {
+ fprintf (stderr, "obdio_barrier "LPX64": Error on read: %s\n",
+ b->ob_oid, strerror (errno));
+ goto out_2;
+ }
+
+ if (fileb->ob_id != b->ob_id ||
+ fileb->ob_oid != b->ob_oid ||
+ fileb->ob_npeers != b->ob_npeers ||
+ fileb->ob_count >= b->ob_npeers ||
+ (fileb->ob_ordinal != b->ob_ordinal - 1 &&
+ fileb->ob_ordinal != b->ob_ordinal)) {
+ fprintf (stderr, "obdio_barrier "LPX64": corrupt\n", b->ob_id);
+ fprintf (stderr, " got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
+ fileb->ob_id, fileb->ob_oid, fileb->ob_npeers,
+ fileb->ob_ordinal, fileb->ob_count);
+ fprintf (stderr, " expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
+ b->ob_id, b->ob_oid, b->ob_npeers,
+ b->ob_ordinal, b->ob_count);
+ rc = -1;
+ goto out_2;
+ }
+ }
+
+ out_2:
+ rc2 = obdio_cancel (conn, &lh);
+ if (rc == 0 && rc2 != 0) {
+ fprintf (stderr, "obdio_barrier "LPX64": Error on cancel: %s\n",
+ b->ob_oid, strerror (errno));
+ rc = rc2;
+ }
+ out_1:
+ free (space);
+ out_0:
+ rc2 = obdio_close (conn, b->ob_oid, &fh);
+ if (rc == 0 && rc2 != 0) {
+ fprintf (stderr, "obdio_barrier "LPX64": Error on close: %s\n",
+ b->ob_oid, strerror (errno));
+ rc = rc2;
+ }
+
+ return (rc);
+}
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2003 Cluster File Systems, Inc.
+ * Author: Eric Barton <eeb@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#ifndef _OBDIOLIB_H_
+#define _OBDIOLIB_H_
+
+#include <unistd.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/lustre_lib.h>
+#include <linux/lustre_idl.h>
+#include <linux/obd_class.h>
+
+struct obdio_conn {
+ int oc_fd;
+ uint64_t oc_conn_addr;
+ uint64_t oc_conn_cookie;
+ struct obd_ioctl_data oc_data;
+ char oc_buffer[8192];
+};
+
+struct obdio_barrier {
+ uint64_t ob_id;
+ uint64_t ob_oid;
+ uint64_t ob_npeers;
+ uint64_t ob_ordinal;
+ uint64_t ob_count;
+};
+
+extern struct obdio_conn * obdio_connect (int device);
+extern void obdio_disconnect (struct obdio_conn *conn);
+extern int obdio_open (struct obdio_conn *conn, uint64_t oid,
+ struct lustre_handle *fh);
+extern int obdio_close (struct obdio_conn *conn, uint64_t oid,
+ struct lustre_handle *fh);
+extern int obdio_pread (struct obdio_conn *conn, uint64_t oid,
+ char *buffer, uint32_t count, uint64_t offset);
+extern int obdio_pwrite (struct obdio_conn *conn, uint64_t oid,
+ char *buffer, uint32_t count, uint64_t offset);
+extern int obdio_enqueue (struct obdio_conn *conn, uint64_t oid,
+ int mode, uint64_t offset, uint32_t count,
+ struct lustre_handle *lh);
+extern int obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh);
+extern void *obdio_alloc_aligned_buffer (void **spacep, int size);
+extern struct obdio_barrier *obdio_new_barrier (uint64_t oid, uint64_t id, int npeers) ;
+extern int obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b);
+extern int obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b);
+
+#endif
--- /dev/null
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+struct one_stat {
+ char *name;
+ int fd;
+ long long current;
+ long long delta;
+};
+
+struct one_stat *read_bytes;
+struct one_stat *read_reqs;
+struct one_stat *write_bytes;
+struct one_stat *write_reqs;
+struct one_stat *getattr_reqs;
+struct one_stat *setattr_reqs;
+struct one_stat *create_reqs;
+struct one_stat *destroy_reqs;
+struct one_stat *statfs_reqs;
+struct one_stat *open_reqs;
+struct one_stat *close_reqs;
+struct one_stat *punch_reqs;
+
+struct one_stat *
+init_one_stat (char *basename, char *name)
+{
+ char fname[1024];
+ struct one_stat *stat = (struct one_stat *)malloc (sizeof (*stat));
+
+ if (stat == NULL) {
+ fprintf (stderr, "Can't allocate stat %s: %s\n",
+ name, strerror (errno));
+ abort ();
+ }
+
+ snprintf (fname, sizeof (fname), "%s/%s", basename, name);
+
+ memset (stat, 0, sizeof (*stat));
+ stat->name = name;
+
+ stat->fd = open (fname, O_RDONLY);
+ if (stat->fd < 0 ) {
+ fprintf (stderr, "Can't open stat %s: %s\n",
+ fname, strerror (errno));
+ abort ();
+ }
+
+ return (stat);
+}
+
+void
+update_one_stat (struct one_stat *stat)
+{
+ static char buffer[1024];
+ long long prev = stat->current;
+ int nob;
+
+ lseek (stat->fd, 0, SEEK_SET);
+ nob = read (stat->fd, buffer, sizeof (buffer) - 1);
+ if (nob < 0) {
+ fprintf (stderr, "Can't read stat %s: %s\n",
+ stat->name, strerror (errno));
+ abort ();
+ }
+
+ buffer[nob] = 0;
+ if (sscanf (buffer, "%Ld", &stat->current) != 1) {
+ fprintf (stderr, "Can't parse stat %s: %s\n",
+ stat->name, strerror (errno));
+ abort ();
+ }
+
+ stat->delta = stat->current - prev;
+}
+
+double
+timenow ()
+{
+ struct timeval tv;
+
+ gettimeofday (&tv, NULL);
+ return (tv.tv_sec + tv.tv_usec / 1000000.0);
+}
+
+void
+do_stat (void)
+{
+ static double last = 0.0;
+ double now;
+ double t;
+
+ now = timenow();
+
+ update_one_stat (read_bytes);
+ update_one_stat (read_reqs);
+ update_one_stat (write_bytes);
+ update_one_stat (write_reqs);
+ update_one_stat (getattr_reqs);
+ update_one_stat (setattr_reqs);
+ update_one_stat (open_reqs);
+ update_one_stat (close_reqs);
+ update_one_stat (create_reqs);
+ update_one_stat (destroy_reqs);
+ update_one_stat (statfs_reqs);
+ update_one_stat (punch_reqs);
+
+ if (last == 0.0) {
+ printf ("R %Ld/%Ld W %Ld/%Ld attr %Ld/%Ld open %Ld/%Ld create %Ld/%Ld stat %Ld punch %Ld\n",
+ read_bytes->current, read_reqs->current,
+ write_bytes->current, write_reqs->current,
+ getattr_reqs->current, setattr_reqs->current,
+ open_reqs->current, close_reqs->current,
+ create_reqs->current, destroy_reqs->current,
+ statfs_reqs->current, punch_reqs->current);
+ } else {
+ t = now - last;
+
+ printf ("R %6Ld (%5d %6.2fMb)/s W %6Ld (%5d %6.2fMb)/s",
+ read_reqs->delta, (int)(read_reqs->delta / t),
+ read_bytes->delta / ((1<<20) * t),
+ write_reqs->delta, (int)(write_reqs->delta / t),
+ write_bytes->delta / ((1<<20) * t));
+
+ if (getattr_reqs->delta != 0)
+ printf (" ga:%Ld,%d/s", getattr_reqs->delta,
+ (int)(getattr_reqs->delta / t));
+
+ if (setattr_reqs->delta != 0)
+ printf (" sa:%Ld", setattr_reqs->delta);
+
+ if (open_reqs->delta != 0)
+ printf (" op:%Ld", open_reqs->delta);
+
+ if (close_reqs->delta != 0)
+ printf (" cl:%Ld", close_reqs->delta);
+
+ if (create_reqs->delta != 0)
+ printf (" cx:%Ld", create_reqs->delta);
+
+ if (destroy_reqs->delta != 0)
+ printf (" dx:%Ld", destroy_reqs->delta);
+
+ if (statfs_reqs->delta != 0)
+ printf (" st:%Ld", statfs_reqs->delta);
+
+ if (punch_reqs->delta != 0)
+ printf (" pu:%Ld", punch_reqs->delta);
+
+ printf ("\n");
+ }
+
+ last = timenow();
+}
+
+int main (int argc, char **argv)
+{
+ char basedir[128];
+ int interval = 0;
+
+ if (argc < 2) {
+ fprintf (stderr, "obd type not specified\n");
+ return (1);
+ }
+
+ snprintf (basedir, sizeof (basedir), "/proc/sys/%s", argv[1]);
+
+ if (argc > 2)
+ interval = atoi (argv[2]);
+
+ read_bytes = init_one_stat (basedir, "read_bytes");
+ read_reqs = init_one_stat (basedir, "read_reqs");
+ write_bytes = init_one_stat (basedir, "write_bytes");
+ write_reqs = init_one_stat (basedir, "write_reqs");
+ getattr_reqs = init_one_stat (basedir, "getattr_reqs");
+ setattr_reqs = init_one_stat (basedir, "setattr_reqs");
+ create_reqs = init_one_stat (basedir, "create_reqs");
+ destroy_reqs = init_one_stat (basedir, "destroy_reqs");
+ statfs_reqs = init_one_stat (basedir, "statfs_reqs");
+ open_reqs = init_one_stat (basedir, "open_reqs");
+ close_reqs = init_one_stat (basedir, "close_reqs");
+ punch_reqs = init_one_stat (basedir, "punch_reqs");
+
+ do_stat ();
+
+ if (interval == 0)
+ return (0);
+
+ for (;;) {
+ sleep (interval);
+ do_stat ();
+ }
+}