TBD
- * version v0_5_21
- * bug fixes
- - workaround for gcc 3.2, which has macro-argument issues (850)
- - lmc/lconf syntax change for OST UUIDs
+ * version v0_5_21
+ * bug fixes
+ - LDLM_DEBUG macro fix, for gcc 3.2 (850)
+ - failed open()s could cause deadlock; fixed (867, 869)
+ - stop cancelling OST locks when files are closed (481)
+ - overlapping XID spaces caused network corruption (851, 853)
+ - fix unsafe fsfilt counter arithmetic; change to atomic_t
+ - setattr_raw added, to do single-RPC, server-side setattrs
+ - lmc/lconf syntax change for OST UUIDs
+ - fix crashy race condition between ptlrpc_free_req and osc_close
+ - don't use request in mdc_enqueue if we hit a timeout (889)
+ - don't set the inode i_size for regular files from the MDS (896)
+ - handle out of order completion AST (842)
+ - don't LBUG if a lock request times out after receiving AST (913)
+ - avoid d_rehash race in ll_find_alias by rehashing inside dcache_lock
+ - if a bad lock AST arrives, send an error instead of dropping entirely
+ - return 0 from revalidate2 if ll_intent_lock returns -EINTR (912)
+ - fix leak in bulk IO when only partially completed (899, 900, 926)
+ * protocol changes
+ - READPAGE and SETATTRs which don't take server-side locks get
+ their own portal
2003-02-11 Phil Schwan <phil@clusterfs.com>
* version v0_5_20
- client verifies file size before zeroing page past EOF (445)
- OST now writes last allocated objid to disk with allocation (108)
- LOV on echo now works (409)
- * protocol changes
+ * protocol changes
- mds_reint_unlink sends a new buffer, with the EA included. this
buffer is only valid if body->valid & OBD_MD_FLEASIZE, which is only
set if a regular file was being unlinked, and it was the last link
AUTOMAKE_OPTIONS = foreign
if LINUX25
-DIRS24 = mds
+DIRS24 =
else
-DIRS24 = extN mds
+DIRS24 = extN ptlbd
endif
-# NOTE: keep extN before mds and obdfilter
-SUBDIRS = $(DIRS24) obdclass utils ptlrpc ldlm lib obdfilter mdc osc ost llite
-SUBDIRS+= obdecho lov cobd ptlbd tests doc scripts conf
+if LIBLUSTRE
+SUBDIRS = lov obdclass ptlrpc obdecho ldlm osc liblustre utils
+else
+# NOTE: keep extN before obdclass, mds, and obdfilter. Keep obdclass as early
+# as possible, to have the best chance at stopping with "wrong kernel version"
+# instead of some related build failure.
+SUBDIRS = $(DIRS24) obdclass mds utils ptlrpc ldlm lib obdfilter mdc osc ost
+SUBDIRS+= llite obdecho lov cobd tests doc scripts conf
+endif
DIST_SUBDIRS = $(SUBDIRS)
EXTRA_DIST = BUGS FDL Rules include archdep.m4 kernel_patches
# name_SOURCES = my.c files.c
# include $(top_srcdir)/Rules
+
$(MODULE).o: $($(MODULE)_OBJECTS)
$(LD) -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r -o $(MODULE).o $($(MODULE)_OBJECTS)
+AC_ARG_WITH(lib, [ --with-lib compile lustre library], host_cpu="lib")
+
AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
+if test $host_cpu = "lib" ; then
+ host_cpu="lib"
+ AC_MSG_RESULT(no building Lustre library)
+else
if test -e $LINUX/include/asm-um ; then
if test X`ls -id $LINUX/include/asm/ | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then
host_cpu="um";
else
AC_MSG_RESULT(no (asm-um missing))
fi
+fi
AC_MSG_CHECKING(setting make flags system architecture: )
case ${host_cpu} in
+ lib )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -Wall '
+ KCPPFLAGS='-D__arch_lib__ '
+ MOD_LINK=elf_i386
+;;
um )
AC_MSG_RESULT($host_cpu)
KCFLAGS='-g -Wall -pipe -Wno-trigraphs -Wstrict-prototypes -fno-strict-aliasing -fno-common '
- KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include '
+ case ${linux25} in
+ yes )
+ KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -O2 -nostdinc -iwithprefix include -DKBUILD_BASENAME=$(MODULE) -DKBUILD_MODNAME=$(MODULE) '
+ ;;
+ * )
+ KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include '
+ ;;
+ esac
+
MOD_LINK=elf_i386
;;
i*86 )
AC_MSG_RESULT($host_cpu)
KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -pipe'
- KCPPFLAGS='-D__KERNEL__ -DMODULE '
+ case ${linux25} in
+ yes )
+ KCPPFLAGS='-D__KERNEL__ -DMODULE -march=i686 -I$(LINUX)/include/asm-i386/mach-default -nostdinc -iwithprefix include '
+ ;;
+ * )
+ KCPPFLAGS='-D__KERNEL__ -DMODULE '
+ ;;
+ esac
MOD_LINK=elf_i386
;;
;;
esac
+if test $host_cpu != lib ; then
AC_MSG_CHECKING(for MODVERSIONS)
if egrep -e 'MODVERSIONS.*1' $LINUX/include/linux/autoconf.h >/dev/null 2>&1;
then
SMPFLAG=
AC_MSG_RESULT(no)
fi
+fi
CFLAGS="$KCFLAGS $MFLAGS"
ARCHCPPFLAGS="$KCPPFLAGS"
find . -type d -name .deps | xargs rm -rf
aclocal &&
-automake --add-missing &&
+${AUTOMAKE:-automake} --add-missing &&
${AUTOCONF:-autoconf}
#define DEBUG_SUBSYSTEM S_COBD
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+#include <linux/init.h>
+#endif
#include <linux/obd_support.h>
#include <linux/lustre_lib.h>
#include <linux/lustre_net.h>
int *eof, void *data)
{
struct obd_device *dev = (struct obd_device*)data;
- struct lustre_handle *conn = &dev->u.cobd.cobd_target;
+ struct lustre_handle *conn;
struct obd_export *exp;
int rc;
+ LASSERT(dev != NULL);
+ conn = &dev->u.cobd.cobd_target;
+
if ((dev->obd_flags & OBD_SET_UP) == 0)
rc = snprintf (page, count, "not set up\n");
else {
exp = class_conn2export (conn);
LASSERT(exp != NULL);
- rc = snprintf(page, count, "%s\n", exp->exp_obd->obd_uuid.uuid);
+ rc = snprintf(page, count, "%s\n",
+ exp->exp_obd->obd_uuid.uuid);
}
return (rc);
}
int *eof, void *data)
{
struct obd_device *dev = (struct obd_device*)data;
- struct lustre_handle *conn = &dev->u.cobd.cobd_cache;
+ struct lustre_handle *conn;
struct obd_export *exp;
int rc;
+ LASSERT(dev != NULL);
+ conn = &dev->u.cobd.cobd_cache;
+
if ((dev->obd_flags & OBD_SET_UP) == 0)
rc = snprintf (page, count, "not set up\n");
else {
exp = class_conn2export (conn);
LASSERT (exp != NULL);
- rc = snprintf(page, count, "%s\n", exp->exp_obd->obd_uuid.uuid);
+ rc = snprintf(page, count, "%s\n",
+ exp->exp_obd->obd_uuid.uuid);
}
return (rc);
}
<!ENTITY % objref.attr "uuidref CDATA #REQUIRED">
<!-- main elements -->
-<!ELEMENT lustre (node | profile | mountpoint | ldlm | echoclient |
- mds | obd | ost | lov | lovconfig)*>
+<!ELEMENT lustre (node | profile | mountpoint | ldlm | ptlrpc |echoclient |
+ mds | mdsdev| ost | osd | lov | lovconfig)*>
-<!ELEMENT node (network | profile_ref)*>
+<!ELEMENT node (network | routetbl | profile_ref)*>
<!ATTLIST node %object.attr;
router CDATA #IMPLIED>
lo CDATA #REQUIRED
hi CDATA #IMPLIED >
-<!ELEMENT profile (ldlm_ref | network_ref | obd_ref | ost_ref |
- echoclient_ref | mdsdev_ref | lov_ref |
- lovconfig_ref| mountpoint_ref)*>
+<!ELEMENT profile (ldlm_ref | ptlrpc_ref | network_ref | routetbl_ref |
+ osd_ref | mdsdev_ref | lovconfig_ref|
+ echoclient_ref | mountpoint_ref)*>
<!ATTLIST profile %object.attr;>
<!ELEMENT mountpoint (path | fileset | mds_ref | obd_ref)*>
<!ELEMENT ldlm EMPTY>
<!ATTLIST ldlm %object.attr;>
-<!ELEMENT obd (fstype | devpath | devsize | autoformat | active_ref)*>
-<!ATTLIST obd %object.attr;
- obdtype (obdfilter | obdecho) 'obdfilter'>
+<!ELEMENT ptlrpc EMPTY>
+<!ATTLIST ptlrpc %object.attr;>
-<!ELEMENT ost (network_ref | obd_ref | failover_ref)*>
+<!ELEMENT osd (fstype | devpath | devsize | autoformat |
+ target_ref | node_ref)*>
+<!ATTLIST osd %object.attr;
+ osdtype (obdfilter | obdecho) 'obdfilter'>
+
+<!ELEMENT ost (active_ref)*>
<!ATTLIST ost %object.attr;>
-<!ELEMENT mds (active_ref)*>
+<!ELEMENT mds (active_ref | lovconfig_ref)*>
<!ATTLIST mds %object.attr;>
<!ELEMENT mdsdev (fstype | devpath | devsize | autoformat |
- mds_ref | network_ref )*>
+ target_ref | node_ref )*>
<!ATTLIST mdsdev %object.attr;>
<!ELEMENT lov (mds_ref |(obd_ref)+)*>
<!ELEMENT fstype %object.content;>
<!ELEMENT nid %object.content;>
<!ELEMENT port %object.content;>
-<!ELEMENT send_mem %object.content;>
-<!ELEMENT recv_mem %object.content;>
+<!ELEMENT sendmem %object.content;>
+<!ELEMENT recvmem %object.content;>
<!ELEMENT autoformat %object.content;>
<!ELEMENT activetarget %object.content;>
<!ELEMENT devpath %object.content;>
<!-- object reference tag elements -->
<!ELEMENT network_ref %objref.content;>
<!ATTLIST network_ref %objref.attr;>
+<!ELEMENT routetbl_ref %objref.content;>
+<!ATTLIST routetbl_ref %objref.attr;>
<!ELEMENT node_ref %objref.content;>
<!ATTLIST node_ref %objref.attr;>
<!ELEMENT profile_ref %objref.content;>
<!ATTLIST profile_ref %objref.attr;>
-<!ELEMENT obd_ref %objref.content;>
-<!ATTLIST obd_ref %objref.attr;>
+<!ELEMENT osd_ref %objref.content;>
+<!ATTLIST osd_ref %objref.attr;>
<!ELEMENT mds_ref %objref.content;>
<!ATTLIST mds_ref %objref.attr;>
<!ELEMENT mdsdev_ref %objref.content;>
<!ATTLIST mdsdev_ref %objref.attr;>
+<!ELEMENT obd_ref %objref.content;>
+<!ATTLIST obd_ref %objref.attr;>
<!ELEMENT ost_ref %objref.content;>
<!ATTLIST ost_ref %objref.attr;>
+<!ELEMENT active_ref %objref.content;>
+<!ATTLIST active_ref %objref.attr;>
+<!ELEMENT target_ref %objref.content;>
+<!ATTLIST target_ref %objref.attr;>
<!ELEMENT lov_ref %objref.content;>
<!ATTLIST lov_ref %objref.attr;>
<!ELEMENT lovconfig_ref %objref.content;>
<!ATTLIST lovconfig_ref %objref.attr;>
<!ELEMENT mountpoint_ref %objref.content;>
<!ATTLIST mountpoint_ref %objref.attr;>
-<!ELEMENT echoclient_ref %objref.content;>
-<!ATTLIST echoclient_ref %objref.attr;>
+<!ELEMENT echoclient_ref %objref.content;>
+<!ATTLIST echoclient_ref %objref.attr;>
<!ELEMENT failover_ref %objref.content;>
<!ATTLIST failover_ref %objref.attr;>
<!ELEMENT ldlm_ref %objref.content;>
<!ATTLIST ldlm_ref %objref.attr;>
+<!ELEMENT ptlrpc_ref %objref.content;>
+<!ATTLIST ptlrpc_ref %objref.attr;>
<if test="devsize">
devsize: <value-of select="devsize"/>
</if>
-networkRef: <value-of select="network_ref/@uuidref"/>
-mdsRef: <value-of select="mds_ref/@uuidref"/>
+nodeRef: <value-of select="node_ref/@uuidref"/>
+targetRef: <value-of select="target_ref/@uuidref"/>
<text>
</text>
</template>
</text>
</template>
-<template match="obd">
+<template match="osd">
dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
-objectClass: OBD
+objectClass: OSD
lustreName: <value-of select="@name"/>
uuid: <value-of select="@uuid"/>
-activeRef: <value-of select="active_ref/@uuidref"/>
-obdtype: <value-of select="@obdtype"/>
+nodeRef: <value-of select="node_ref/@uuidref"/>
+targetRef: <value-of select="target_ref/@uuidref"/>
+osdtype: <value-of select="@osdtype"/>
<if test="fstype">
fstype: <value-of select="fstype"/>
</if>
</text>
</template>
+<template match="ptlrpc">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: PTLRPC
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/>
+<text>
+</text>
+</template>
<template match="ldlm_ref">
ldlmRef: <value-of select="@uuidref"/>
</template>
+<template match="ptlrpc_ref">
+ptlrpcRef: <value-of select="@uuidref"/>
+</template>
+
<template match="obd_ref">
obdRef: <value-of select="@uuidref"/>
</template>
+<template match="osd_ref">
+osdRef: <value-of select="@uuidref"/>
+</template>
+
<template match="ost_ref">
ostRef: <value-of select="@uuidref"/>
</template>
dn: fs=lustre
fs:lustre
objectClass: lustre
-desc: Lustre Config
+lustreDesc: Lustre Config
echo " please get an updated compiler."
AC_MSG_ERROR(sorry)
}
+TMP_VERSION=`echo $CC_VERSION | cut -c 1-16`
+if test "$TMP_VERSION" = "gcc version 2.95"; then
+ bad_cc
+fi
case "$CC_VERSION" in
# ost_pack_niobuf putting 64bit NTOH temporaries on the stack
# without "sub $0xc,%esp" to protect the stack from being
AC_SUBST(LIBREADLINE)
AC_SUBST(HAVE_LIBREADLINE)
+# XXX this should be a runtime option
+AC_ARG_ENABLE(ost_recovery, [ --enable-ost-recovery: enable support for ost recovery],,
+ enable_ost_recovery="yes")
+if test "$enable_ost_recovery" = "yes" ; then
+ ENABLE_OST_RECOVERY="-DOST_RECOVERY=1"
+else
+ HAVE_LIBREADLINE=""
+fi
+AC_SUBST(ENABLE_OST_RECOVERY)
+
+
# Kernel build environment.
ac_default_prefix=
bindir='${exec_prefix}/usr/bin'
LINUX=$enable_linuxdir
AC_SUBST(LINUX)
-sinclude(archdep.m4)
-
AC_MSG_CHECKING(if you are running linux 2.5...)
if test -e $LINUX/include/linux/namei.h ; then
- linux25=yes
+ linux25="yes"
AC_MSG_RESULT(yes)
else
- linux25=no
+ linux25="no"
AC_MSG_RESULT(no)
fi
AM_CONDITIONAL(LINUX25, test x$linux25 = xyes)
-# Changed by Amrut Joshi on 01/13/2003
-#KINCFLAGS='-I. -I$(top_srcdir)/include -I$(PORTALS)/include -I$(LINUX)/include'
-KINCFLAGS='-I$(top_srcdir)/include -I$(PORTALS)/include -I$(LINUX)/include'
-CPPFLAGS="$KINCFLAGS $ARCHCPPFLAGS"
+sinclude(archdep.m4)
+
portalsdir_def='$(top_srcdir)/../portals'
AC_ARG_WITH(portals, [ --with-portals=[path] set path to Portals source (default=../portals)], enable_portalsdir=$withval)
if ! test -z "$enable_portalslib"; then
PORTALSLIB=${enable_portalslib}
fi
+AC_SUBST(PORTALSLIB)
+AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
+AC_MSG_CHECKING(if you are building lib lustre)
+if test "$host_cpu" = "lib"; then
+ AC_MSG_RESULT(yes)
+ libdir='${exec_prefix}/lib/lustre'
+else
+ AC_MSG_RESULT(no)
+fi
-AC_SUBST(PORTALSLIB)
+if test $host_cpu != "lib" ; then
+KINCFLAGS='-I$(top_srcdir)/include -I$(PORTALS)/include -I$(LINUX)/include'
+else
+KINCFLAGS='-I$(top_srcdir)/include -I$(PORTALS)/include'
+fi
+CPPFLAGS="$KINCFLAGS $ARCHCPPFLAGS"
-AC_MSG_CHECKING(if make dep has been run in kernel source)
-if test -f $LINUX/include/linux/config.h ; then
+if test $host_cpu != "lib" ; then
+AC_MSG_CHECKING(if make dep has been run in kernel source (host $host_cpu) )
+if test -f $LINUX/include/linux/config.h ; then
AC_MSG_RESULT(yes)
else
AC_MSG_ERROR(** cannot find $LINUX/include/linux/config.h. Run make dep in $LINUX.)
AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
fi
-
AC_MSG_CHECKING(for Linux release)
dnl We need to rid ourselves of the nasty [ ] quotes.
AC_MSG_RESULT($RELEASE)
AC_SUBST(RELEASE)
+fi
# Directories for documentation and demos.
docdir='${prefix}/usr/share/doc/$(PACKAGE)'
AC_SUBST(docdir)
# AM_CONFIG_HEADER(include/config.h)
AC_OUTPUT(Makefile lib/Makefile ldlm/Makefile obdecho/Makefile ptlrpc/Makefile \
+ liblustre/Makefile \
lov/Makefile osc/Makefile mdc/Makefile mds/Makefile ost/Makefile \
cobd/Makefile ptlbd/Makefile conf/Makefile \
utils/Makefile utils/lconf tests/Makefile obdfilter/Makefile \
# (or other RH < 12.5 kernels) use the "chaos22" patch instead.
EXTN_FIXES = patch-2.4.18-chaos22
#EXTN_FIXES = ext3-2.4.18-fixes.diff
-EXTNP = htree-ext3-2.4.18.diff linux-2.4.18ea-0.8.26.diff
+EXTNP = htree-ext3-2.4.18.diff linux-2.4.18ea-0.8.26.diff ext3-2.4-ino_t.diff
EXTNP+= ext3-2.4.18-ino_sb_macro.diff extN-misc-fixup.diff extN-noread.diff
-EXTNP+= extN-wantedi.diff
+EXTNP+= extN-wantedi.diff extN-san.diff extN-2.4.18-ino_sb_fixup.diff
#EXTNP+= extN-iget-debug.diff
EXTNC = balloc.c bitmap.c dir.c file.c fsync.c ialloc.c inode.c ioctl.c
EXTNC+= namei.c super.c symlink.c
extN_SOURCES = $(EXTNC) xattr.c # punch.c
extN_DEPENDENCIES = patch-stamp
EXTRA_DIST = $(EXTNP) $(EXTN_FIXES) \
- extN-2.4.18-ino_sb_fixup.diff extN-2.4.18-exports.diff \
- $(wildcard extN.patch-*)
+ ext3-largefile.diff extN-2.4.18-exports.diff \
+ ext3-use-after-free.diff ext3-unmount_sync.diff $(wildcard extN.patch-*)
DISTCLEANFILES = -r $(extN_SOURCES) sed-stamp patch-stamp *.orig *.rej
SUB=-e "s/ext3/extN/g" -e "s/EXT3/EXTN/g" -e "s/extern __inline__/static inline/"
rm -rf $(extN_orig) $(extN_include_orig)
mkdir $(extN_orig) $(extN_include_orig)
list='$(EXTNC)'; for f in $$list; do \
- echo "creating $(extN_orig)/$$f"; \
- sed $(SUB) $(LINUX)/fs/ext3/$$f > $(extN_orig)/$$f; \
- done
+ echo "creating $(extN_orig)/$$f"; \
+ sed $(SUB) $(LINUX)/fs/ext3/$$f > $(extN_orig)/$$f; \
+ done
list='$(EXTNI)'; for i in $$list; do \
s=`echo $$i | sed "s/extN/ext3/"`; \
- echo "creating $(extN_include_orig)/$$i"; \
- sed $(SUB) $(LINUX)/include/linux/$$s > $(extN_include_orig)/$$i; \
- done
+ echo "creating $(extN_include_orig)/$$i"; \
+ sed $(SUB) $(LINUX)/include/linux/$$s > $(extN_include_orig)/$$i; \
+ done
echo timestamp > $@
grep -q extN_mark_inode_dirty && list="$(EXTN_FIXES) $$list"; \
grep -q "if (do_sync_supers)" $(extN_orig)/super.c && \
list="ext3-unmount_sync.diff $$list"; \
+ grep -q "ext3_journal_start(inode, 2)" $(extN_orig)/inode.c || \
+ list="ext3-largefile.diff $$list"; \
+ grep -q "EXPORT_SYMBOL(extN_bread)" $(extN_orig)/super.c || \
+ list="$$list extN-2.4.18-exports.diff"; \
for p in $$list; do \
echo "applying patch $$p"; \
sed $(SUB) $(srcdir)/$$p | \
(cd $(top_builddir) && patch -p1) || exit $$?; \
done; \
- echo "It is OK if the next patch says it is skipping this patch"; \
- echo "applying patch $(srcdir)/extN-2.4.18-exports.diff"; \
- (cd $(top_builddir) && \
- patch -N -p1) < $(srcdir)/extN-2.4.18-exports.diff; \
- echo "applying patch $(srcdir)/extN-2.4.18-ino_sb_fix.diff"; \
- (cd $(top_builddir) && \
- patch -p1) < $(srcdir)/extN-2.4.18-ino_sb_fixup.diff || exit $$?; \
fi
echo timestamp > $@
--- /dev/null
+--- linux/fs/ext3/ialloc.c.orig Sat Oct 19 11:42:23 2002
++++ linux/fs/ext3/ialloc.c Sat Jan 4 12:14:18 2003
+@@ -64,8 +64,8 @@ static int read_inode_bitmap (struct sup
+ if (!bh) {
+ ext3_error (sb, "read_inode_bitmap",
+ "Cannot read inode bitmap - "
+- "block_group = %lu, inode_bitmap = %lu",
+- block_group, (unsigned long) gdp->bg_inode_bitmap);
++ "block_group = %lu, inode_bitmap = %u",
++ block_group, gdp->bg_inode_bitmap);
+ retval = -EIO;
+ }
+ /*
+@@ -531,19 +532,19 @@ out:
+ }
+
+ /* Verify that we are loading a valid orphan from disk */
+-struct inode *ext3_orphan_get (struct super_block * sb, ino_t ino)
++struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
+ {
+- ino_t max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
++ unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
+ unsigned long block_group;
+ int bit;
+ int bitmap_nr;
+ struct buffer_head *bh;
+ struct inode *inode = NULL;
+-
++
+ /* Error cases - e2fsck has already cleaned up for us */
+ if (ino > max_ino) {
+ ext3_warning(sb, __FUNCTION__,
+- "bad orphan ino %ld! e2fsck was run?\n", ino);
++ "bad orphan ino %lu! e2fsck was run?\n", ino);
+ return NULL;
+ }
+
+@@ -552,7 +553,7 @@ struct inode *ext3_orphan_get (struct su
+ if ((bitmap_nr = load_inode_bitmap(sb, block_group)) < 0 ||
+ !(bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr])) {
+ ext3_warning(sb, __FUNCTION__,
+- "inode bitmap error for orphan %ld\n", ino);
++ "inode bitmap error for orphan %lu\n", ino);
+ return NULL;
+ }
+
+@@ -563,7 +564,7 @@ struct inode *ext3_orphan_get (struct su
+ if (!ext3_test_bit(bit, bh->b_data) || !(inode = iget(sb, ino)) ||
+ is_bad_inode(inode) || NEXT_ORPHAN(inode) > max_ino) {
+ ext3_warning(sb, __FUNCTION__,
+- "bad orphan inode %ld! e2fsck was run?\n", ino);
++ "bad orphan inode %lu! e2fsck was run?\n", ino);
+ printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%ld) = %d\n",
+ bit, bh->b_blocknr, ext3_test_bit(bit, bh->b_data));
+ printk(KERN_NOTICE "inode=%p\n", inode);
+@@ -570,9 +571,9 @@ struct inode *ext3_orphan_get (struct su
+ if (inode) {
+ printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
+ is_bad_inode(inode));
+- printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%d\n",
++ printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
+ NEXT_ORPHAN(inode));
+- printk(KERN_NOTICE "max_ino=%ld\n", max_ino);
++ printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+ }
+ /* Avoid freeing blocks if we got a bad deleted inode */
+ if (inode && inode->i_nlink == 0)
+--- linux/fs/ext3/namei.c.orig Sat Oct 19 11:42:45 2002
++++ linux/fs/ext3/namei.c Sat Jan 4 12:13:27 2003
+@@ -716,10 +716,10 @@ int ext3_orphan_del(handle_t *handle, st
+ {
+ struct list_head *prev;
+ struct ext3_sb_info *sbi;
+- ino_t ino_next;
++ unsigned long ino_next;
+ struct ext3_iloc iloc;
+ int err = 0;
+-
++
+ lock_super(inode->i_sb);
+ if (list_empty(&inode->u.ext3_i.i_orphan)) {
+ unlock_super(inode->i_sb);
+@@ -730,7 +730,7 @@ int ext3_orphan_del(handle_t *handle, st
+ prev = inode->u.ext3_i.i_orphan.prev;
+ sbi = EXT3_SB(inode->i_sb);
+
+- jbd_debug(4, "remove inode %ld from orphan list\n", inode->i_ino);
++ jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
+
+ list_del(&inode->u.ext3_i.i_orphan);
+ INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
+@@ -741,13 +741,13 @@ int ext3_orphan_del(handle_t *handle, st
+ * list in memory. */
+ if (!handle)
+ goto out;
+-
++
+ err = ext3_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ goto out_err;
+
+ if (prev == &sbi->s_orphan) {
+- jbd_debug(4, "superblock will point to %ld\n", ino_next);
++ jbd_debug(4, "superblock will point to %lu\n", ino_next);
+ BUFFER_TRACE(sbi->s_sbh, "get_write_access");
+ err = ext3_journal_get_write_access(handle, sbi->s_sbh);
+ if (err)
+@@ -758,8 +758,8 @@ int ext3_orphan_del(handle_t *handle, st
+ struct ext3_iloc iloc2;
+ struct inode *i_prev =
+ list_entry(prev, struct inode, u.ext3_i.i_orphan);
+-
+- jbd_debug(4, "orphan inode %ld will point to %ld\n",
++
++ jbd_debug(4, "orphan inode %lu will point to %lu\n",
+ i_prev->i_ino, ino_next);
+ err = ext3_reserve_inode_write(handle, i_prev, &iloc2);
+ if (err)
+@@ -774,7 +774,7 @@ int ext3_orphan_del(handle_t *handle, st
+ if (err)
+ goto out_brelse;
+
+-out_err:
++out_err:
+ ext3_std_error(inode->i_sb, err);
+ out:
+ unlock_super(inode->i_sb);
+--- linux/include/linux/ext3_fs.h.orig Thu Jan 2 16:10:24 2003
++++ linux/include/linux/ext3_fs.h Sat Jan 4 12:25:41 2003
+@@ -622,7 +622,7 @@ extern int ext3_sync_file (struct file *
+ /* ialloc.c */
+ extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int);
+ extern void ext3_free_inode (handle_t *, struct inode *);
+-extern struct inode * ext3_orphan_get (struct super_block *, ino_t);
++extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
+ extern unsigned long ext3_count_free_inodes (struct super_block *);
+ extern void ext3_check_inodes_bitmap (struct super_block *);
+ extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
struct list_head *prev;
+ struct ext3_inode_info *ei = EXT3_I(inode);
struct ext3_sb_info *sbi;
- ino_t ino_next;
+ unsigned long ino_next;
struct ext3_iloc iloc;
int err = 0;
-
+
lock_super(inode->i_sb);
- if (list_empty(&inode->u.ext3_i.i_orphan)) {
+ if (list_empty(&ei->i_orphan)) {
+ prev = ei->i_orphan.prev;
sbi = EXT3_SB(inode->i_sb);
- jbd_debug(4, "remove inode %ld from orphan list\n", inode->i_ino);
+ jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
- list_del(&inode->u.ext3_i.i_orphan);
- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
/* If we're on an error path, we may not have a valid
* transaction handle with which to update the orphan list on
-@@ -1520,9 +1520,8 @@ int ext3_orphan_del(handle_t *handle, st
+@@ -1520,8 +1520,7 @@ int ext3_orphan_del(handle_t *handle, st
err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
} else {
struct ext3_iloc iloc2;
- struct inode *i_prev =
- list_entry(prev, struct inode, u.ext3_i.i_orphan);
--
+ struct inode *i_prev = orphan_list_entry(prev);
-+
- jbd_debug(4, "orphan inode %ld will point to %ld\n",
+
+ jbd_debug(4, "orphan inode %lu will point to %lu\n",
i_prev->i_ino, ino_next);
- err = ext3_reserve_inode_write(handle, i_prev, &iloc2);
@@ -1695,10 +1695,10 @@ static int ext3_symlink (struct inode *
goto out_no_entry;
} else {
--- /dev/null
+Under rare conditions (filesystem corruption, really) it is possible
+for ext3_dirty_inode() to require _two_ blocks for the transaction: one
+for the inode and one to update the superblock - to set
+EXT3_FEATURE_RO_COMPAT_LARGE_FILE. This causes the filesystem to go
+BUG.
+
+So reserve an additional block for that eventuality.
+
+
+ fs/ext3/inode.c | 2 +-
+ 1 files changed, 1 insertion(+), 1 deletion(-)
+
+--- 25/fs/ext3/inode.c~ext3-transaction-reserved-blocks Sat Dec 14 18:28:21 2002
++++ 25-akpm/fs/ext3/inode.c Sat Dec 14 18:28:21 2002
+@@ -2698,7 +2698,7 @@ void ext3_dirty_inode(struct inode *inod
+ handle_t *handle;
+
+ lock_kernel();
+- handle = ext3_journal_start(inode, 1);
++ handle = ext3_journal_start(inode, 2);
+ if (IS_ERR(handle))
+ goto out;
+ if (current_handle &&
--- /dev/null
+--- lustre/extN/inode.orig.c 2002-12-29 18:48:56.000000000 +0800
++++ lustre/extN/inode.c 2002-12-29 19:17:24.000000000 +0800
+@@ -2728,3 +2728,85 @@
+ * here, in extN_aops_journal_start() to ensure that the forthcoming "see if we
+ * need to extend" test in extN_prepare_write() succeeds.
+ */
++
++/* for each block: 1 ind + 1 dind + 1 tind
++ * for each block: 3 bitmap blocks
++ * for each block: 3 group descriptor blocks
++ * i inode block
++ * 1 superblock
++ * 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quote files
++ * ((1+1+1) * 3 * nblocks) + 1 + 1 + 2 * EXTN_SINGLEDATA_TRANS_BLOCKS
++ *
++ * XXX assuming:
++ * (1) fs logic block size == page size
++ * (2) extN in writeback mode
++ */
++static inline int extN_san_write_trans_blocks(int nblocks)
++{
++ int ret;
++
++ ret = (1 + 1 + 1) * 3 * nblocks + 1 + 1;
++
++#ifdef CONFIG_QUOTA
++ ret += 2 * EXTN_SINGLEDATA_TRANS_BLOCKS;
++#endif
++
++ return ret;
++}
++
++/* Alloc blocks for an inode, while don't create any buffer/page
++ * for data I/O; set the inode size if file is extended.
++ *
++ * @inode: target inode
++ * @blocks: array of logic block number
++ * @nblocks: how many blocks need be alloced
++ * @newsize: new filesize we should set
++ *
++ * return: 0 success, otherwise failed
++ * (*blocks) contains physical block number alloced
++ *
++ * XXX this assume the fs block size == page size
++ */
++int extN_prep_san_write(struct inode *inode, long *blocks,
++ int nblocks, loff_t newsize)
++{
++ handle_t *handle;
++ struct buffer_head bh_tmp;
++ int needed_blocks;
++ int i, ret = 0, ret2;
++
++ needed_blocks = extN_san_write_trans_blocks(nblocks);
++
++ lock_kernel();
++ handle = extN_journal_start(inode, needed_blocks);
++ if (IS_ERR(handle)) {
++ unlock_kernel();
++ return PTR_ERR(handle);
++ }
++ unlock_kernel();
++
++ /* alloc blocks one by one */
++ for (i = 0; i < nblocks; i++) {
++ ret = extN_get_block_handle(handle, inode, blocks[i],
++ &bh_tmp, 1);
++ if (ret)
++ break;
++
++ blocks[i] = bh_tmp.b_blocknr;
++ }
++
++ /* set inode size if needed */
++ if (!ret && (newsize > inode->i_size)) {
++ inode->i_size = newsize;
++ extN_mark_inode_dirty(handle, inode);
++ }
++
++ lock_kernel();
++ ret2 = extN_journal_stop(handle, inode);
++ unlock_kernel();
++
++ if (!ret)
++ ret = ret2;
++ return ret;
++}
++EXPORT_SYMBOL(extN_prep_san_write);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <info@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * User-space Lustre headers.
+ *
+ */
+#ifndef LIBLUSTRE_H__
+#define LIBLUSTRE_H__
+
+#include <sys/mman.h>
+#include <asm/page.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+
+#include <portals/list.h>
+#include <portals/p30.h>
+
+/* definitions for liblustre */
+
+/* always adopt 2.5 definitions */
+#define LINUX_VERSION_CODE 1
+#define KERNEL_VERSION(a,b,c) 0
+
+static inline void inter_module_put(void *a)
+{
+ return;
+}
+
+extern ptl_handle_ni_t tcpnal_ni;
+
+static inline void *inter_module_get(char *arg)
+{
+
+ if (strcmp(arg, "tcpnal_ni") == 0 )
+ return &tcpnal_ni;
+ else
+ return NULL;
+
+}
+
+
+/* cheats for now */
+
+struct work_struct {
+ void (*ws_task)(void *arg);
+ void *ws_arg;
+};
+
+static inline void prepare_work(struct work_struct *q, void (*t)(void *),
+ void *arg)
+{
+ q->ws_task = t;
+ q->ws_arg = arg;
+ return;
+}
+
+static inline void schedule_work(struct work_struct *q)
+{
+ q->ws_task(q->ws_arg);
+}
+
+
+#define strnlen(a,b) strlen(a)
+static inline void *kmalloc(int size, int prot)
+{
+ return malloc(size);
+}
+#define vmalloc malloc
+#define vfree free
+#define kfree(a) free(a)
+#define GFP_KERNEL 1
+#define GFP_HIGHUSER 1
+#define IS_ERR(a) (abs((int)(a)) < 500 ? 1 : 0)
+#define PTR_ERR(a) ((int)(a))
+
+#define capable(foo) 1
+#define CAP_SYS_ADMIN 1
+
+typedef struct {
+ void *cwd;
+
+}mm_segment_t;
+
+typedef void *read_proc_t;
+typedef void *write_proc_t;
+
+
+/* modules */
+
+struct module {
+ int count;
+};
+
+static inline void MODULE_AUTHOR(char *name)
+{
+ printf("%s\n", name);
+}
+#define MODULE_DESCRIPTION(name) MODULE_AUTHOR(name)
+#define MODULE_LICENSE(name) MODULE_AUTHOR(name)
+
+#define THIS_MODULE NULL
+#define __init
+#define __exit
+
+/* devices */
+
+static inline int misc_register(void *foo)
+{
+ return 0;
+}
+#define misc_deregister misc_register
+
+#define __MOD_INC_USE_COUNT(m) do {int a = 1; a++; } while (0)
+#define __MOD_DEC_USE_COUNT(m) do {int a = 1; a++; } while (0)
+#define MOD_INC_USE_COUNT do {int a = 1; a++; } while (0)
+#define MOD_DEC_USE_COUNT do {int a = 1; a++; } while (0)
+
+/* module initialization */
+extern int init_obdclass(void);
+extern int ptlrpc_init(void);
+extern int ldlm_init(void);
+extern int osc_init(void);
+extern int lov_init(void);
+extern int echo_client_init(void);
+
+
+
+/* general stuff */
+#define jiffies 0
+
+#define EXPORT_SYMBOL(S)
+
+typedef int spinlock_t;
+typedef __u64 kdev_t;
+
+#define SPIN_LOCK_UNLOCKED 0
+#define spin_lock(l) do {int a = 1; a++; } while (0)
+#define spin_unlock(l) do {int a= 1; a++; } while (0)
+#define spin_lock_init(l) do {int a= 1; a++; } while (0)
+static inline void spin_lock_bh(spinlock_t *l)
+{
+ return;
+}
+static inline void spin_unlock_bh(spinlock_t *l)
+{
+ return;
+}
+static inline void spin_lock_irqrestore(a,b)
+{
+ return;
+}
+static inline void spin_unlock_irqrestore(a,b)
+{
+ return;
+}
+static inline void spin_lock_irqsave(a,b)
+{
+ return;
+}
+
+#define barrier() do {int a= 1; a++; } while (0)
+
+/* registering symbols */
+
+#define ERESTARTSYS ERESTART
+#define HZ 1
+
+/* random */
+
+static inline void get_random_bytes(void *ptr, int size)
+{
+ static int r;
+ int *p = (int *)ptr;
+ int *end = p + (size / sizeof(int));
+ r = rand();
+ while ( p + sizeof(int) < end ) {
+ *p = r;
+ p++;
+ }
+}
+
+/* memory */
+
+static inline int copy_from_user(void *a,void *b, int c)
+{
+ memcpy(a,b,c);
+ return 0;
+}
+
+static inline int copy_to_user(void *a,void *b, int c)
+{
+ memcpy(a,b,c);
+ return 0;
+}
+
+
+/* slabs */
+typedef struct {
+ int size;
+} kmem_cache_t;
+#define SLAB_HWCACHE_ALIGN 0
+static inline kmem_cache_t *kmem_cache_create(name,objsize,cdum,d,e,f)
+{
+ kmem_cache_t *c;
+ c = malloc(sizeof(*c));
+ if (!c)
+ return NULL;
+ c->size = objsize;
+ return c;
+};
+
+static inline int kmem_cache_destroy(kmem_cache_t *a)
+{
+ free(a);
+ return 0;
+}
+#define kmem_cache_validate(a,b) 1
+#define kmem_cache_alloc(cache, prio) malloc(cache->size)
+#define kmem_cache_free(cache, obj) OBD_FREE(obj, cache->size)
+#define PORTAL_SLAB_ALLOC(lock,cache,size) do { lock = kmem_cache_alloc(cache,prio); } while (0)
+#define PORTAL_SLAB_FREE(lock,cache,size) do { lock = kmem_cache_alloc(cache,prio); } while (0)
+
+struct page {
+ void *addr;
+ int index;
+};
+
+#define kmap(page) (page)->addr
+#define kunmap(a) do { int foo = 1; foo++; } while (0)
+
+static inline struct page *alloc_pages(mask,foo)
+{
+ struct page *pg = malloc(sizeof(*pg));
+
+ if (!pg)
+ return NULL;
+#ifdef MAP_ANONYMOUS
+ pg->addr = mmap(0, PAGE_SIZE, PROT_WRITE, MAP_ANONYMOUS, 0, 0);
+#else
+ pg->addr = malloc(PAGE_SIZE);
+#endif
+
+ if (!pg->addr) {
+ free(pg);
+ return NULL;
+ }
+ return pg;
+}
+
+static inline void __free_pages(struct page *pg, int what)
+{
+#ifdef MAP_ANONYMOUS
+ munmap(pg->addr, PAGE_SIZE);
+#else
+ free(pg->addr);
+#endif
+ free(pg);
+}
+
+/* arithmetic */
+#define do_div(a,b) (a)/(b)
+
+/* dentries / intents */
+struct lookup_intent {
+ void *it_iattr;
+};
+
+struct iattr {
+ int mode;
+};
+
+struct dentry {
+ int d_count;
+};
+struct file {
+ struct dentry *f_dentry;
+ void *private_data;
+} ;
+
+struct vfsmount {
+ void *pwd;
+};
+#define cpu_to_le32(x) ((__u32)(x))
+
+/* semaphores */
+struct semaphore {
+ int count;
+};
+
+#define down(a) do {(a)->count++;} while (0)
+#define up(a) do {(a)->count--;} while (0)
+#define sema_init(a,b) do { (a)->count = b; } while (0)
+
+typedef struct {
+ struct list_head sleepers;
+} wait_queue_head_t;
+
+typedef struct {
+ struct list_head sleeping;
+ void *process;
+} wait_queue_t;
+
+struct signal {
+ int signal;
+};
+
+struct task_struct {
+ int state;
+ struct signal pending;
+ char comm[32];
+ int pid;
+};
+
+extern struct task_struct *current;
+
+
+
+#define set_current_state(foo) do { current->state = foo; } while (0)
+
+#define init_waitqueue_entry(q,p) do { (q)->process = p; } while (0)
+#define add_wait_queue(q,p) do { list_add(&(q)->sleepers, &(p)->sleeping); } while (0)
+#define del_wait_queue(p) do { list_del(&(p)->sleeping); } while (0)
+#define remove_wait_queue(q,p) do { list_del(&(p)->sleeping); } while (0)
+
+#define init_waitqueue_head(l) INIT_LIST_HEAD(&(l)->sleepers)
+#define wake_up(l) do { int a; a++; } while (0)
+#define wait_event(l,m) do { int a; a++; } while (0)
+#define TASK_INTERRUPTIBLE 0
+#define TASK_UNINTERRUPTIBLE 1
+#define TASK_RUNNING 2
+
+
+#define schedule() do { int a; a++; } while (0)
+static inline int schedule_timeout(t)
+{
+ return 0;
+}
+
+#define lock_kernel() do { int a; a++; } while (0)
+#define daemonize(l) do { int a; a++; } while (0)
+#define sigfillset(l) do { int a; a++; } while (0)
+#define recalc_sigpending(l) do { int a; a++; } while (0)
+#define kernel_thread(l,m,n)
+
+static inline int call_usermodehelper(char *prog, char **argv, char **evnp)
+{
+ return 0;
+}
+
+
+
+#define KERN_INFO
+
+
+
+struct timer_list {
+ struct list_head tl_list;
+ void (*function)(unsigned long unused);
+ void *data;
+ int expires;
+};
+
+static inline int timer_pending(struct timer_list *l)
+{
+ if (l->expires > jiffies)
+ return 1;
+ else
+ return 0;
+}
+
+static inline int init_timer(struct timer_list *l)
+{
+ INIT_LIST_HEAD(&l->tl_list);
+ return 0;
+}
+
+static inline void mod_timer(struct timer_list *l, int thetime)
+{
+ l->expires = thetime;
+}
+
+static inline void del_timer(struct timer_list *l)
+{
+ free(l);
+}
+
+typedef struct { volatile int counter; } atomic_t;
+
+#define atomic_read(a) ((a)->counter)
+#define atomic_set(a,b) do {(a)->counter = b; } while (0)
+#define atomic_dec_and_test(a) ((--((a)->counter)) == 0)
+#define atomic_inc(a) (((a)->counter)++)
+#define atomic_dec(a) do { (a)->counter--; } while (0)
+#define atomic_add(b,a) do {(a)->counter += b;} while (0)
+#define atomic_sub(b,a) do {(a)->counter -= b;} while (0)
+
+#define LBUG() do { sleep(1000000); } while (0)
+
+#include <linux/obd_support.h>
+#include <linux/lustre_idl.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_import.h>
+#include <linux/lustre_export.h>
+#include <linux/lustre_net.h>
+
+
+#endif
+
#ifndef _LPROCFS_SNMP_H
#define _LPROCFS_SNMP_H
+#ifdef __KERNEL__
#include <linux/autoconf.h>
#include <linux/proc_fs.h>
+#endif
#ifndef LPROCFS
#ifdef CONFIG_PROC_FS /* Ensure that /proc is configured */
/* class_obd.c */
extern struct proc_dir_entry *proc_lustre_root;
-extern void lprocfs_init_vars(struct lprocfs_static_vars *var);
-extern void lprocfs_init_multi_vars(unsigned int idx,
- struct lprocfs_static_vars *var);
#define LPROCFS_INIT_MULTI_VARS(array, size) \
void lprocfs_init_multi_vars(unsigned int idx, \
x->obd_vars = glob[idx].obd_vars; \
} \
+#ifdef LPROCFS
#define LPROCFS_INIT_VARS(vclass, vinstance) \
void lprocfs_init_vars(struct lprocfs_static_vars *x) \
{ \
x->obd_vars = vinstance; \
} \
-#ifdef LPROCFS
+extern void lprocfs_init_vars(struct lprocfs_static_vars *var);
+extern void lprocfs_init_multi_vars(unsigned int idx,
+ struct lprocfs_static_vars *var);
/* lprocfs_status.c */
extern int lprocfs_add_vars(struct proc_dir_entry *root,
struct lprocfs_vars *var,
static inline struct proc_dir_entry *
lprocfs_register(const char *name, struct proc_dir_entry *parent,
struct lprocfs_vars *list, void *data) { return NULL; }
+static inline void lprocfs_init_vars(struct lprocfs_static_vars *x) { return; }
static inline int lprocfs_add_vars(struct proc_dir_entry *root,
struct lprocfs_vars *var,
void *data) { return 0; }
#define _LUSTRE_DLM_H__
#ifdef __KERNEL__
-
#include <linux/proc_fs.h>
+#endif
+
#include <linux/lustre_lib.h>
#include <linux/lustre_net.h>
#include <linux/lustre_import.h>
-#include <linux/handles.h>
+#include <linux/lustre_handles.h>
struct obd_ops;
struct obd_device;
#define LDLM_MIN_TYPE 10
#define LDLM_MAX_TYPE 11
-extern ldlm_res_compat ldlm_res_compat_table [];
-extern ldlm_res_policy ldlm_res_policy_table [];
-
struct ldlm_resource {
struct ldlm_namespace *lr_namespace;
struct list_head lr_hash;
/* Per-export ldlm state. */
struct ldlm_export_data {
- struct list_head led_held_locks;
- struct obd_import led_import;
+ struct list_head led_held_locks; /* protected by namespace lock */
+ struct obd_import led_import;
};
extern struct obd_ops ldlm_obd_ops;
} \
} while (0)
-/* I hate hate hate hate hate this. This cannot stay. bug 850. -phil */
-#define LDLM_DEBUG0(lock, format) __LDLM_DEBUG(D_DLMTRACE, lock, format"%s","")
-
-#define LDLM_DEBUG(lock, format, a...) __LDLM_DEBUG(D_DLMTRACE, lock, format, a)
-#define LDLM_ERROR(lock, format, a...) __LDLM_DEBUG(D_ERROR, lock, format, a)
+#define LDLM_DEBUG(lock, format, a...) __LDLM_DEBUG(D_DLMTRACE, lock, \
+ format, ## a)
+#define LDLM_ERROR(lock, format, a...) __LDLM_DEBUG(D_ERROR, lock, format, ## a)
#define LDLM_DEBUG_NOLOCK(format, a...) \
CDEBUG(D_DLMTRACE, "### " format "\n" , ## a)
int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
void *data, int flag);
-#endif /* __KERNEL__ */
/* ioctls for trying requests */
#define IOC_LDLM_TYPE 'f'
#ifndef __EXPORT_H
#define __EXPORT_H
-#ifdef __KERNEL__
-
#include <linux/lustre_idl.h>
#include <linux/lustre_dlm.h>
#include <linux/lustre_mds.h>
struct list_head led_open_head;
};
-struct ost_export_data {
- struct obd_uuid oed_uuid; /* client UUID */
-};
-
struct ec_export_data { /* echo client */
struct list_head eced_open_head;
struct list_head eced_locks;
struct mds_export_data eu_mds_data;
struct filter_export_data eu_filter_data;
struct lov_export_data eu_lov_data;
- struct ost_export_data eu_ost_data;
struct ec_export_data eu_ec_data;
} u;
};
#define exp_mds_data u.eu_mds_data
#define exp_lov_data u.eu_lov_data
#define exp_filter_data u.eu_filter_data
-#define exp_ost_data u.eu_ost_data
#define exp_ec_data u.eu_ec_data
extern struct obd_export *class_conn2export(struct lustre_handle *conn);
extern struct obd_device *class_conn2obd(struct lustre_handle *conn);
-#endif /* __KERNEL__ */
#endif /* __EXPORT_H */
void *handle, fsfilt_cb_t cb_func);
int (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs);
int (* fs_sync)(struct super_block *sb);
+ int (* fs_prep_san_write)(struct inode *inode, long *blocks,
+ int nblocks, loff_t newsize);
};
extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops);
static inline void *fsfilt_start(struct obd_device *obd,
struct inode *inode, int op)
{
+ ENTRY;
return obd->obd_fsops->fs_start(inode, op);
}
void *handle)
{
return obd->obd_fsops->fs_commit(inode, handle);
+ EXIT;
}
static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry,
return obd->obd_fsops->fs_sync(fs);
}
+static inline int fs_prep_san_write(struct obd_device *obd,
+ struct inode *inode,
+ long *blocks,
+ int nblocks,
+ loff_t newsize)
+{
+ return obd->obd_fsops->fs_prep_san_write(inode, blocks,
+ nblocks, newsize);
+}
#endif /* __KERNEL__ */
#endif
--- /dev/null
+#ifndef __LINUX_HANDLES_H_
+#define __LINUX_HANDLES_H_
+
+#ifdef __KERNEL__
+#include <asm/types.h>
+#include <asm/atomic.h>
+#include <linux/list.h>
+#endif
+
+typedef void (*portals_handle_addref_cb)(void *object);
+
+/* These handles are most easily used by having them appear at the very top of
+ * whatever object that you want to make handles for. ie:
+ *
+ * struct ldlm_lock {
+ * struct portals_handle handle;
+ * ...
+ * };
+ *
+ * Now you're able to assign the results of cookie2handle directly to an
+ * ldlm_lock. If it's not at the top, you'll want to hack up a macro that
+ * uses some offsetof() magic. */
+
+struct portals_handle {
+ struct list_head h_link;
+ __u64 h_cookie;
+ portals_handle_addref_cb h_addref;
+};
+
+/* handles.c */
+
+/* Add a handle to the hash table */
+void class_handle_hash(struct portals_handle *, portals_handle_addref_cb);
+void class_handle_unhash(struct portals_handle *);
+void *class_handle2object(__u64 cookie);
+int class_handle_init(void);
+void class_handle_cleanup(void);
+
+#endif
# include <asm/types.h>
# include <linux/types.h>
# include <linux/list.h>
+# include <linux/string.h> /* for strncpy, below */
#else
# define __KERNEL__
# include <asm/types.h>
static inline void obd_str2uuid(struct obd_uuid *uuid, char *tmp)
{
- strncpy(uuid->uuid, tmp, sizeof(uuid->uuid));
- uuid->uuid[sizeof(uuid->uuid) - 1] = '\0';
+ strncpy(uuid->uuid, tmp, sizeof(*uuid));
+ uuid->uuid[sizeof(*uuid) - 1] = '\0';
}
/* FOO_REQUEST_PORTAL is for incoming requests on the FOO
#define CONNMGR_REQUEST_PORTAL 1
#define CONNMGR_REPLY_PORTAL 2
-#define OSC_REQUEST_PORTAL 3
+//#define OSC_REQUEST_PORTAL 3
#define OSC_REPLY_PORTAL 4
#define OSC_BULK_PORTAL 5
#define OST_REQUEST_PORTAL 6
-#define OST_REPLY_PORTAL 7
+//#define OST_REPLY_PORTAL 7
#define OST_BULK_PORTAL 8
-#define MDC_REQUEST_PORTAL 9
+//#define MDC_REQUEST_PORTAL 9
#define MDC_REPLY_PORTAL 10
-#define MDC_BULK_PORTAL 11
+//#define MDC_BULK_PORTAL 11
#define MDS_REQUEST_PORTAL 12
-#define MDS_REPLY_PORTAL 13
+//#define MDS_REPLY_PORTAL 13
#define MDS_BULK_PORTAL 14
#define LDLM_CB_REQUEST_PORTAL 15
#define LDLM_CB_REPLY_PORTAL 16
#define PTLBD_REQUEST_PORTAL 19
#define PTLBD_REPLY_PORTAL 20
#define PTLBD_BULK_PORTAL 21
-#define MDS_GETATTR_PORTAL 22
+#define MDS_SETATTR_PORTAL 22
+#define MDS_READPAGE_PORTAL 23
#define SVC_KILLED 1
#define SVC_EVENT 2
#define OST_OPEN 11
#define OST_CLOSE 12
#define OST_STATFS 13
+#define OST_SAN_READ 14
+#define OST_SAN_WRITE 15
+#define OST_SYNCFS 16
typedef uint64_t obd_id;
#define OBD_FL_OBDMDEXISTS (0x00000002)
#define OBD_INLINESZ 60
+#define FD_OSTDATA_SIZE 32
+#if (FD_OSTDATA_SIZE > OBD_INLINESZ)
+# error FD_OSTDATA_SIZE must be smaller than OBD_INLINESZ
+#endif
/* Note: 64-bit types are 64-bit aligned in structure */
struct obdo {
#define OBD_MD_FLEASIZE (0x00020000) /* extended attribute data */
#define OBD_MD_LINKNAME (0x00040000) /* symbolic link target */
#define OBD_MD_FLHANDLE (0x00080000) /* file handle */
+#define OBD_MD_FLCKSUM (0x00100000) /* bulk data checksum */
#define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDFLG | OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\
- OBD_MD_FLEASIZE | OBD_MD_FLHANDLE))
+ OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM))
struct obd_statfs {
__u64 os_type;
__u32 len;
__u32 xid;
__u32 flags;
-};
+} __attribute__((packed));
/* request structure for OST's */
*/
/* opcodes */
-#define MDS_GETATTR 1
-#define MDS_GETATTR_NAME 2
-#define MDS_CLOSE 3
-#define MDS_REINT 4
-#define MDS_READPAGE 6
-#define MDS_CONNECT 7
-#define MDS_DISCONNECT 8
-#define MDS_GETSTATUS 9
-#define MDS_STATFS 10
-#define MDS_GETLOVINFO 11
+#define MDS_GETATTR 33
+#define MDS_GETATTR_NAME 34
+#define MDS_CLOSE 35
+#define MDS_REINT 36
+#define MDS_READPAGE 37
+#define MDS_CONNECT 38
+#define MDS_DISCONNECT 39
+#define MDS_GETSTATUS 40
+#define MDS_STATFS 41
+#define MDS_GETLOVINFO 42
+/*
+ * Do not exceed 63
+ */
#define REINT_SETATTR 1
#define REINT_CREATE 2
#define IT_OPEN_CREATE (1 << 4)
#define IT_OPEN_OPEN (1 << 5)
-#define IT_UNLINK (1<<8)
-
#define REINT_OPCODE_MASK 0xff /* opcodes must fit into this mask */
#define REINT_REPLAYING 0x1000 /* masked into the opcode to indicate replay */
__u32 rn_fsuid;
__u32 rn_fsgid;
__u32 rn_cap;
+ __u32 rn_suppgid1;
+ __u32 rn_suppgid2;
struct ll_fid rn_fid1;
struct ll_fid rn_fid2;
};
#ifndef __IMPORT_H
#define __IMPORT_H
-#ifdef __KERNEL__
#define IMP_INVALID 1
#define IMP_REPLAYABLE 2
-typedef int (*import_recover_t)(struct obd_import *imp, int phase);
+struct obd_import;
+typedef int (*import_recover_t)(struct obd_import *imp, int phase);
#include <linux/lustre_idl.h>
struct obd_import {
struct obd_device *imp_obd;
int imp_flags;
int imp_level;
- __u64 imp_last_xid;
- __u64 imp_last_bulk_xid;
__u64 imp_max_transno;
__u64 imp_peer_committed_transno;
- /* Protects flags, level, last_xid, *_list */
+ /* Protects flags, level, *_list */
spinlock_t imp_lock;
};
extern struct obd_import *class_conn2cliimp(struct lustre_handle *);
extern struct obd_import *class_conn2ldlmimp(struct lustre_handle *);
-#endif /* __KERNEL__ */
#endif /* __IMPORT_H */
#endif
#endif
-#ifdef __KERNEL__
-/* l_net.c */
+/* target.c */
struct ptlrpc_request;
struct obd_device;
struct recovd_data;
struct recovd_obd;
struct obd_export;
#include <linux/lustre_ha.h>
+#include <linux/lustre_net.h>
+
-int target_handle_connect(struct ptlrpc_request *req);
+int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler);
int target_handle_disconnect(struct ptlrpc_request *req);
int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
struct obd_uuid *cluuid);
+int target_revoke_connection(struct recovd_data *rd, int phase);
+
+#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
+void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler);
+void target_abort_recovery(void *data);
+int target_queue_recovery_request(struct ptlrpc_request *req,
+ struct obd_device *obd);
+int target_queue_final_reply(struct ptlrpc_request *req, int rc);
+
+/* client.c */
int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd,
struct obd_uuid *cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover);
int client_obd_disconnect(struct lustre_handle *conn);
int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf);
+int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf);
int client_obd_cleanup(struct obd_device * obddev);
struct client_obd *client_conn2cli(struct lustre_handle *conn);
struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
-int target_revoke_connection(struct recovd_data *rd, int phase);
-
+/* statfs_pack.c */
int obd_self_statfs(struct obd_device *dev, struct statfs *sfs);
/* l_lock.c */
int lustre_fwrite(struct file *file, const char *str, int len, loff_t *off);
int lustre_fsync(struct file *file);
+#ifdef __KERNEL__
+
static inline void l_dput(struct dentry *de)
{
if (!de || IS_ERR(de))
/*
* OBD IOCTLS
*/
-#define OBD_IOCTL_VERSION 0x00010001
+#define OBD_IOCTL_VERSION 0x00010002
struct obd_ioctl_data {
uint32_t ioc_len;
obd_size ioc_count;
obd_off ioc_offset;
uint32_t ioc_dev;
- uint32_t ____padding;
+ uint32_t ioc_command;
+
+ uint64_t ioc_nid;
+ uint32_t ioc_nal;
/* buffers the kernel will treat as user pointers */
uint32_t ioc_plen1;
printk("OBD ioctl: plen2 set but NULL pointer\n");
return 1;
}
- if (obd_ioctl_packlen(data) != data->ioc_len ) {
- printk("OBD ioctl: packlen exceeds ioc_len\n");
+ if (obd_ioctl_packlen(data) != data->ioc_len) {
+ printk("OBD ioctl: packlen exceeds ioc_len (%d != %d)\n",
+ obd_ioctl_packlen(data), data->ioc_len);
return 1;
}
#if 0
return 0;
}
-#else
+#endif
#include <linux/obd_support.h>
EXIT;
return 0;
}
-#endif
#define OBD_IOC_CREATE _IOR ('f', 101, long)
#define OBD_IOC_SETUP _IOW ('f', 102, long)
#define OBD_GET_VERSION _IOWR ('f', 144, long)
+#define OBD_IOC_ADD_UUID _IOWR ('f', 145, long)
+#define OBD_IOC_DEL_UUID _IOWR ('f', 146, long)
+#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, long)
+
#define ECHO_IOC_GET_STRIPE _IOWR('f', 200, long)
#define ECHO_IOC_SET_STRIPE _IOWR('f', 201, long)
#define ECHO_IOC_ENQUEUE _IOWR('f', 202, long)
#define ECHO_IOC_CANCEL _IOWR('f', 203, long)
+#define CHECKSUM_BULK 0
+
+#if CHECKSUM_BULK
+static inline void ost_checksum(__u64 *cksum, void *addr, int len)
+{
+ unsigned char *ptr = (unsigned char *)addr;
+ __u64 sum = 0;
+
+ /* very stupid, but means I don't have to think about byte order */
+ while (len-- > 0)
+ sum += *ptr++;
+
+ *cksum = (*cksum << 2) + sum;
+}
+#else
+#define ost_checksum(cksum, addr, len) do {} while (0)
+#endif
+
/*
* l_wait_event is a flexible sleeping function, permitting simple caller
* configuration of interrupt and timeout sensitivity along with actions to
lwi_cb_data: data \
})
+#ifdef __KERNEL__
+#define l_sigismember sigismember
+#else
+#define l_sigismember(a,b) (*(a) & b)
+#endif
+
/* XXX this should be one mask-check */
#define l_killable_pending(task) \
-(sigismember(&(task->pending.signal), SIGKILL) || \
- sigismember(&(task->pending.signal), SIGINT) || \
- sigismember(&(task->pending.signal), SIGTERM))
+(l_sigismember(&(task->pending.signal), SIGKILL) || \
+ l_sigismember(&(task->pending.signal), SIGINT) || \
+ l_sigismember(&(task->pending.signal), SIGTERM))
#define __l_wait_event(wq, condition, info, ret) \
do { \
#include <linux/lustre_mds.h>
#include <linux/lustre_ha.h>
+
extern kmem_cache_t *ll_file_data_slab;
struct ll_file_data {
struct lustre_handle fd_mdshandle;
- struct lustre_handle fd_osthandle;
struct ptlrpc_request *fd_req;
+ char fd_ostdata[FD_OSTDATA_SIZE];
__u32 fd_flags;
};
struct lov_mds_md *lic_lmm;
};
-#define LL_INLINESZ 60
struct ll_inode_info {
struct lov_stripe_md *lli_smd;
char *lli_symlink_name;
struct semaphore lli_open_sem;
- atomic_t lli_open_count; /* see ll_file_release */
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
struct inode lli_vfs_inode;
#endif
extern struct inode_operations ll_file_inode_operations;
extern struct inode_operations ll_special_inode_operations;
struct ldlm_lock;
-int ll_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *, void *data, int flag);
+int ll_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *, void *data,
+ int flag);
int ll_size_lock(struct inode *, struct lov_stripe_md *, obd_off start,
int mode, struct lustre_handle *);
int ll_size_unlock(struct inode *, struct lov_stripe_md *, int mode,
struct lustre_handle *);
-int ll_file_size(struct inode *inode, struct lov_stripe_md *md,
- struct lustre_handle *);
+int ll_file_size(struct inode *inode, struct lov_stripe_md *md, char *ostdata);
int ll_create_objects(struct super_block *sb, obd_id id, uid_t uid,
gid_t gid, struct lov_stripe_md **lsmp);
/* super.c */
void ll_update_inode(struct inode *, struct mds_body *, struct lov_mds_md *);
+int ll_setattr_raw(struct inode *inode, struct iattr *attr);
/* symlink.c */
extern struct inode_operations ll_fast_symlink_inode_operations;
#define _LUSTRE_MDS_H
#ifdef __KERNEL__
-
#include <linux/fs.h>
+#endif
#include <linux/kp30.h>
#include <linux/lustre_idl.h>
#define LUSTRE_MDT_NAME "mdt"
#define LUSTRE_MDC_NAME "mdc"
-struct mdc_rpc_lock {
+struct mdc_rpc_lock {
struct semaphore rpcl_sem;
struct lookup_intent *rpcl_it;
};
extern struct mdc_rpc_lock mdc_rpc_lock;
+extern struct mdc_rpc_lock mdc_setattr_lock;
static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck)
{
__u32 ur_gid;
__u64 ur_time;
__u32 ur_flags;
- __u32 ur_suppgid;
+ __u32 ur_suppgid1;
+ __u32 ur_suppgid2;
};
#define MDS_LR_CLIENT 8192
#define MDS_CLIENT_SLOTS 17
#define MDS_MOUNT_RECOV 2
-#define MDS_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
/* Data stored per server at the head of the last_rcvd file. In le32 order. */
struct mds_server_data {
__u8 msd_uuid[37]; /* server UUID */
__u8 uuid_padding[3]; /* unused */
- __u64 msd_last_rcvd; /* last completed transaction ID */
+ __u64 msd_last_transno; /* last completed transaction ID */
__u64 msd_mount_count; /* MDS incarnation number */
__u8 padding[512 - 56];
};
struct mds_client_data {
__u8 mcd_uuid[37]; /* client UUID */
__u8 uuid_padding[3]; /* unused */
- __u64 mcd_last_rcvd; /* last completed transaction ID */
__u64 mcd_mount_count; /* MDS incarnation number */
- __u64 mcd_last_xid; /* client RPC xid for the last transaction */
- __u8 padding[MDS_LR_SIZE - 64];
+ __u64 mcd_last_transno; /* last completed transaction ID */
+ __u64 mcd_last_xid; /* xid for the last transaction */
+ __u32 mcd_last_result; /* result from last RPC */
+ __u32 mcd_last_data; /* per-op data (disposition for open &c.) */
+ __u8 padding[MDS_LR_SIZE - 58];
};
/* In-memory access to client data from MDS struct */
spinlock_t med_open_lock;
struct mds_client_data *med_mcd;
int med_off;
- __u64 med_last_xid;
- struct lustre_msg *med_last_reply;
- int med_last_replen;
+ struct ptlrpc_request *med_outstanding_reply;
};
/* file data for open files on MDS */
struct mds_file_data {
struct list_head mfd_list;
__u64 mfd_servercookie;
+ __u64 mfd_xid;
struct file *mfd_file;
};
int mds_reint(struct ptlrpc_request *req, int offset, struct lustre_handle *);
int mds_pack_md(struct obd_device *mds, struct lustre_msg *msg,
int offset, struct mds_body *body, struct inode *inode);
+void mds_steal_ack_locks(struct mds_export_data *med,
+ struct ptlrpc_request *req);
/* mds/mds_fs.c */
int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt);
int cl_off);
int mds_client_free(struct obd_export *exp);
-#endif /* __KERNEL__ */
/* ioctls for trying requests */
#define IOC_REQUEST_TYPE 'f'
#define IOC_REQUEST_CLOSE _IOWR('f', 35, long)
#define IOC_REQUEST_MAX_NR 35
+#define MDS_CHECK_RESENT(req, reconstruct) \
+{ \
+ if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { \
+ struct mds_client_data *mcd = \
+ req->rq_export->exp_mds_data.med_mcd; \
+ if (mcd->mcd_last_xid == req->rq_xid) { \
+ reconstruct; \
+ RETURN(0); \
+ } \
+ DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")", \
+ mcd->mcd_last_xid); \
+ } \
+}
+
#endif
#ifndef _LUSTRE_NET_H
#define _LUSTRE_NET_H
+#ifdef __KERNEL__
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
#include <linux/tqueue.h>
+#else
+#include <linux/workqueue.h>
+#endif
+#endif
+
#include <linux/kp30.h>
// #include <linux/obd.h>
#include <portals/p30.h>
#define CONN_INVALID 1
+struct ptlrpc_peer {
+ ptl_nid_t peer_nid;
+ struct ptlrpc_ni *peer_ni;
+};
+
struct ptlrpc_connection {
struct list_head c_link;
- struct lustre_peer c_peer;
+ struct ptlrpc_peer c_peer;
struct obd_uuid c_local_uuid; /* XXX do we need this? */
struct obd_uuid c_remote_uuid;
};
/* state flags of requests */
-#define PTL_RPC_FL_INTR (1 << 0)
+#define PTL_RPC_FL_INTR (1 << 0) /* reply wait was interrupted by user */
#define PTL_RPC_FL_REPLIED (1 << 1) /* reply was received */
-#define PTL_RPC_FL_SENT (1 << 2)
-#define PTL_BULK_FL_SENT (1 << 3)
-#define PTL_BULK_FL_RCVD (1 << 4)
-#define PTL_RPC_FL_ERR (1 << 5)
-#define PTL_RPC_FL_TIMEOUT (1 << 6)
-#define PTL_RPC_FL_RESEND (1 << 7)
-#define PTL_RPC_FL_RESTART (1 << 8) /* operation must be restarted */
-#define PTL_RPC_FL_FINISHED (1 << 9)
+#define PTL_RPC_FL_SENT (1 << 2) /* request was sent */
+#define PTL_RPC_FL_WANT_ACK (1 << 3) /* reply is awaiting an ACK */
+#define PTL_BULK_FL_SENT (1 << 4) /* outgoing bulk was sent */
+#define PTL_BULK_FL_RCVD (1 << 5) /* incoming bulk was recieved */
+#define PTL_RPC_FL_ERR (1 << 6) /* request failed due to RPC error */
+#define PTL_RPC_FL_TIMEOUT (1 << 7) /* request timed out waiting for reply */
+#define PTL_RPC_FL_RESEND (1 << 8) /* retransmit the request */
+#define PTL_RPC_FL_RESTART (1 << 9) /* operation must be restarted */
#define PTL_RPC_FL_RETAIN (1 << 10) /* retain for replay after reply */
#define PTL_RPC_FL_REPLAY (1 << 11) /* replay upon recovery */
#define PTL_RPC_FL_ALLOCREP (1 << 12) /* reply buffer allocated */
+#define PTL_RPC_FL_NO_RESEND (1 << 13) /* don't automatically resend this req */
+#define PTL_RPC_FL_RESENT (1 << 14) /* server rcvd resend of this req */
struct ptlrpc_request {
int rq_type; /* one of PTL_RPC_MSG_* */
__u64 rq_xid;
int rq_level;
- // void * rq_reply_handle;
- wait_queue_head_t rq_wait_for_rep;
+ wait_queue_head_t rq_wait_for_rep; /* XXX also _for_ack */
/* incoming reply */
ptl_md_t rq_reply_md;
/* outgoing req/rep */
ptl_md_t rq_req_md;
- struct lustre_peer rq_peer; /* XXX see service.c can this be factored away? */
+ struct ptlrpc_peer rq_peer; /* XXX see service.c can this be factored away? */
struct obd_export *rq_export;
struct ptlrpc_connection *rq_connection;
struct obd_import *rq_import;
void (*rq_replay_cb)(struct ptlrpc_request *);
void *rq_replay_data;
+
+ /* Only used on the server side for tracking acks. */
+ struct ptlrpc_req_ack_lock {
+ struct lustre_handle lock;
+ __u32 mode;
+ } rq_ack_locks[4];
};
#define DEBUG_REQ(level, req, fmt, args...) \
struct ptlrpc_request_buffer_desc {
struct list_head rqbd_list;
- struct ptlrpc_service *rqbd_service;
+ struct ptlrpc_srv_ni *rqbd_srv_ni;
ptl_handle_me_t rqbd_me_h;
atomic_t rqbd_refcount;
char *rqbd_buffer;
};
+struct ptlrpc_ni {
+ /* Generic interface state */
+ char *pni_name;
+ ptl_handle_ni_t pni_ni_h;
+ ptl_handle_eq_t pni_request_out_eq_h;
+ ptl_handle_eq_t pni_reply_in_eq_h;
+ ptl_handle_eq_t pni_reply_out_eq_h;
+ ptl_handle_eq_t pni_bulk_put_source_eq_h;
+ ptl_handle_eq_t pni_bulk_put_sink_eq_h;
+ ptl_handle_eq_t pni_bulk_get_source_eq_h;
+ ptl_handle_eq_t pni_bulk_get_sink_eq_h;
+};
+
+struct ptlrpc_srv_ni {
+ /* Interface-specific service state */
+ struct ptlrpc_service *sni_service; /* owning service */
+ struct ptlrpc_ni *sni_ni; /* network interface */
+ ptl_handle_eq_t sni_eq_h; /* event queue handle */
+ struct list_head sni_rqbds; /* all the request buffer descriptors */
+ __u32 sni_nrqbds; /* # request buffers */
+ atomic_t sni_nrqbds_receiving; /* # request buffers posted */
+};
+
struct ptlrpc_service {
time_t srv_time;
time_t srv_timeout;
- /* incoming request buffers */
- /* FIXME: perhaps a list of EQs, if multiple NIs are used? */
-
+ struct list_head srv_ni_list; /* list of interfaces */
__u32 srv_max_req_size; /* biggest request to receive */
__u32 srv_buf_size; /* # bytes in a request buffer */
- struct list_head srv_rqbds; /* all the request buffer descriptors */
- __u32 srv_nrqbds; /* # request buffers */
- atomic_t srv_nrqbds_receiving; /* # request buffers posted for input */
__u32 srv_req_portal;
__u32 srv_rep_portal;
__u32 srv_xid;
- /* event queue */
- ptl_handle_eq_t srv_eq_h;
-
- struct lustre_peer srv_self;
-
wait_queue_head_t srv_waitq; /* all threads sleep on this */
spinlock_t srv_lock;
struct list_head srv_threads;
int (*srv_handler)(struct ptlrpc_request *req);
char *srv_name; /* only statically allocated strings here; we don't clean them */
+
+ int srv_interface_rover;
+ struct ptlrpc_srv_ni srv_interfaces[0];
};
static inline void ptlrpc_hdl2req(struct ptlrpc_request *req,
typedef int (*svc_handler_t)(struct ptlrpc_request *req);
+/* rpc/events.c */
+extern struct ptlrpc_ni ptlrpc_interfaces[];
+extern int ptlrpc_ninterfaces;
+extern int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer);
+
/* rpc/connection.c */
void ptlrpc_readdress_connection(struct ptlrpc_connection *, struct obd_uuid *uuid);
-struct ptlrpc_connection *ptlrpc_get_connection(struct lustre_peer *peer,
+struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer,
struct obd_uuid *uuid);
int ptlrpc_put_connection(struct ptlrpc_connection *c);
struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *);
int ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *bulk);
struct obd_brw_set *obd_brw_set_new(void);
void obd_brw_set_add(struct obd_brw_set *, struct ptlrpc_bulk_desc *);
+void obd_brw_set_del(struct ptlrpc_bulk_desc *);
void obd_brw_set_free(struct obd_brw_set *);
int ptlrpc_reply(struct ptlrpc_service *svc, struct ptlrpc_request *req);
int ptlrpc_queue_wait(struct ptlrpc_request *req);
void ptlrpc_continue_req(struct ptlrpc_request *req);
int ptlrpc_replay_req(struct ptlrpc_request *req);
+int ptlrpc_abort(struct ptlrpc_request *req);
void ptlrpc_restart_req(struct ptlrpc_request *req);
void ptlrpc_abort_inflight(struct obd_import *imp, int dying_import);
/* rpc/service.c */
struct ptlrpc_service *
ptlrpc_init_svc(__u32 nevents, __u32 nbufs, __u32 bufsize, __u32 max_req_size,
- int req_portal, int rep_portal,
- struct obd_uuid *uuid, svc_handler_t, char *name);
+ int req_portal, int rep_portal, svc_handler_t, char *name);
void ptlrpc_stop_all_threads(struct ptlrpc_service *svc);
int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc,
char *name);
int lustre_unpack_msg(struct lustre_msg *m, int len);
void *lustre_msg_buf(struct lustre_msg *m, int n);
+/* rpc/rpc.c */
+__u32 ptlrpc_next_xid(void);
+
static inline void ptlrpc_bulk_decref(struct ptlrpc_bulk_desc *desc)
{
CDEBUG(D_PAGE, "%p -> %d\n", desc, atomic_read(&desc->bd_refcount) - 1);
#ifdef __KERNEL__
# include <linux/fs.h>
# include <linux/list.h>
+# include <linux/sched.h> /* for struct task_struct, for current.h */
+# include <asm/current.h> /* for smp_lock.h */
# include <linux/smp_lock.h>
# include <linux/proc_fs.h>
# include <linux/lustre_idl.h>
# include <linux/lustre_mds.h>
# include <linux/lustre_export.h>
+#endif
struct obd_type {
struct list_head typ_chain;
__u32 ouc_fsuid;
__u32 ouc_fsgid;
__u32 ouc_cap;
- __u32 ouc_suppgid;
+ __u32 ouc_suppgid1;
+ __u32 ouc_suppgid2;
};
#define OBD_RUN_CTXT_MAGIC 0xC0FFEEAA
struct ost_server_data;
+#define FILTER_TRANSNO_SEM
+
+#ifndef OST_RECOVERY
+#undef FILTER_TRANSNO_SEM
+#endif
+
struct filter_obd {
char *fo_fstype;
struct super_block *fo_sb;
struct obd_run_ctxt fo_ctxt;
struct dentry *fo_dentry_O;
struct dentry *fo_dentry_O_mode[16];
+ struct dentry **fo_dentry_O_sub;
spinlock_t fo_objidlock; /* protects fo_lastobjid increment */
+#ifdef FILTER_TRANSNO_SEM
struct semaphore fo_transno_sem;
+#else
+ spinlock_t fo_translock; /* protects fsd_last_rcvd increment */
+#endif
struct file *fo_rcvd_filp;
struct filter_server_data *fo_fsd;
+ unsigned long *fo_last_rcvd_slots;
- __u64 fo_next_recovery_transno;
- int fo_recoverable_clients;
struct file_operations *fo_fop;
struct inode_operations *fo_iop;
struct address_space_operations *fo_aops;
struct list_head fo_export_list;
spinlock_t fo_fddlock; /* protects setting dentry->d_fsdata */
+ int fo_subdir_count;
};
struct mds_server_data;
* call obd_size_wiremd() all the time. */
int cl_max_mds_easize;
struct obd_device *cl_containing_lov;
+ kdev_t cl_sandev;
};
struct mds_obd {
struct ptlrpc_service *mds_service;
- struct ptlrpc_service *mds_getattr_service;
+ struct ptlrpc_service *mds_setattr_service;
+ struct ptlrpc_service *mds_readpage_service;
struct super_block *mds_sb;
struct vfsmount *mds_vfsmnt;
int mds_max_mdsize;
struct file *mds_rcvd_filp;
- struct semaphore mds_transno_sem;
- __u64 mds_last_rcvd;
+ spinlock_t mds_transno_lock;
+ __u64 mds_last_transno;
__u64 mds_mount_count;
struct ll_fid mds_rootfid;
struct mds_server_data *mds_server_data;
- wait_queue_head_t mds_next_transno_waitq;
- __u64 mds_next_recovery_transno;
- int mds_recoverable_clients;
- struct list_head mds_recovery_queue;
- struct list_head mds_delayed_reply_queue;
- spinlock_t mds_processing_task_lock;
- pid_t mds_processing_task;
- struct timer_list mds_recovery_timer;
-
int mds_has_lov_desc;
struct lov_desc mds_lov_desc;
};
struct dentry *dentry;
};
+/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
+#define N_LOCAL_TEMP_PAGE 0x10000000
+
struct obd_trans_info {
__u64 oti_transno;
};
-#define N_LOCAL_TEMP_PAGE 0x00000001
-
/* corresponds to one of the obd's */
struct obd_device {
struct obd_type *obd_type;
spinlock_t obd_dev_lock;
__u64 obd_last_committed;
struct fsfilt_operations *obd_fsops;
+
+ /* XXX encapsulate all this recovery data into one struct */
+ svc_handler_t obd_recovery_handler;
+ int obd_recoverable_clients;
+ spinlock_t obd_processing_task_lock;
+ pid_t obd_processing_task;
+ __u64 obd_next_recovery_transno;
+ wait_queue_head_t obd_next_transno_waitq;
+ struct timer_list obd_recovery_timer;
+ struct list_head obd_recovery_queue;
+ struct list_head obd_delayed_reply_queue;
+
union {
struct ext2_obd ext2;
struct filter_obd filter;
int (*o_statfs)(struct lustre_handle *conn, struct obd_statfs *osfs);
+ int (*o_syncfs)(struct lustre_handle *conn);
int (*o_packmd)(struct lustre_handle *, struct lov_mds_md **wire_tgt,
struct lov_stripe_md *mem_src);
int (*o_unpackmd)(struct lustre_handle *,
struct lov_stripe_md *ea, struct obd_trans_info *oti);
int (*o_brw)(int rw, struct lustre_handle *conn,
struct lov_stripe_md *ea, obd_count oa_bufs,
- struct brw_page *pgarr, struct obd_brw_set *,
+ struct brw_page *pgarr, struct obd_brw_set *,
struct obd_trans_info *oti);
int (*o_punch)(struct lustre_handle *conn, struct obdo *tgt,
struct lov_stripe_md *ea, obd_size count,
__u32 mode, struct lustre_handle *);
int (*o_cancel_unused)(struct lustre_handle *, struct lov_stripe_md *,
int local_only);
+ int (*o_san_preprw)(int cmd, struct lustre_handle *conn,
+ int objcount, struct obd_ioobj *obj,
+ int niocount, struct niobuf_remote *remote);
};
-#endif /* __KERNEL */
#endif /* __OBD_H */
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/time.h>
+#endif
#include <linux/obd_support.h>
+#include <linux/lustre_import.h>
+#include <linux/lustre_net.h>
#include <linux/obd.h>
#include <linux/lustre_lib.h>
#include <linux/lustre_idl.h>
#include <linux/lustre_mds.h>
#include <linux/lustre_dlm.h>
#include <linux/lprocfs_status.h>
-#endif
+
/* OBD Device Declarations */
#define MAX_OBD_DEVICES 128
#define OBD_NO_TRANSNO 0x20 /* XXX needs better name */
/* OBD Operations Declarations */
+extern struct obd_device *class_conn2obd(struct lustre_handle *);
+extern struct obd_export *class_conn2export(struct lustre_handle *);
-#ifdef __KERNEL__
static inline int obd_check_conn(struct lustre_handle *conn)
{
struct obd_device *obd;
CERROR("NULL conn\n");
RETURN(-ENOTCONN);
}
+
obd = class_conn2obd(conn);
if (!obd) {
CERROR("NULL obd\n");
RETURN(rc);
}
+static inline int obd_syncfs(struct lustre_handle *conn)
+{
+ struct obd_export *exp;
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_SETUP(conn, exp);
+ OBD_CHECK_OP(exp->exp_obd, syncfs);
+
+ rc = OBP(exp->exp_obd, syncfs)(conn);
+ RETURN(rc);
+}
+
static inline int obd_punch(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *ea, obd_size start,
obd_size end, struct obd_trans_info *oti)
RETURN(rc);
}
-#endif
+static inline int obd_san_preprw(int cmd, struct lustre_handle *conn,
+ int objcount, struct obd_ioobj *obj,
+ int niocount, struct niobuf_remote *remote)
+{
+ struct obd_export *exp;
+ int rc;
+
+ OBD_CHECK_SETUP(conn, exp);
+ OBD_CHECK_OP(exp->exp_obd, preprw);
+
+ rc = OBP(exp->exp_obd, san_preprw)(cmd, conn, objcount, obj,
+ niocount, remote);
+ RETURN(rc);
+}
+
/* OBD Metadata Support */
return (struct lustre_handle *)&oa->o_inline;
}
-static inline void obd_oa2handle(struct lustre_handle *handle, struct obdo *oa)
-{
- if (oa->o_valid |= OBD_MD_FLHANDLE) {
- struct lustre_handle *oa_handle = obdo_handle(oa);
- memcpy(handle, oa_handle, sizeof(*handle));
- }
-}
-
-static inline void obd_handle2oa(struct obdo *oa, struct lustre_handle *handle)
-{
- if (handle && handle->addr) {
- struct lustre_handle *oa_handle = obdo_handle(oa);
- memcpy(oa_handle, handle, sizeof(*handle));
- oa->o_valid |= OBD_MD_FLHANDLE;
- }
-}
-
-#ifdef __KERNEL__
/* support routines */
extern kmem_cache_t *obdo_cachep;
static inline struct obdo *obdo_alloc(void)
kmem_cache_free(obdo_cachep, oa);
}
+#ifdef __KERNEL__
static inline void obdo_from_iattr(struct obdo *oa, struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
if (ia_valid & ATTR_ATIME) {
oa->o_atime = attr->ia_atime;
oa->o_valid |= OBD_MD_FLATIME;
oa->o_ctime = attr->ia_ctime;
oa->o_valid |= OBD_MD_FLCTIME;
}
+#else
+ if (ia_valid & ATTR_ATIME) {
+ oa->o_atime = attr->ia_atime.tv_sec;
+ oa->o_valid |= OBD_MD_FLATIME;
+ }
+ if (ia_valid & ATTR_MTIME) {
+ oa->o_mtime = attr->ia_mtime.tv_sec;
+ oa->o_valid |= OBD_MD_FLMTIME;
+ }
+ if (ia_valid & ATTR_CTIME) {
+ oa->o_ctime = attr->ia_ctime.tv_sec;
+ oa->o_valid |= OBD_MD_FLCTIME;
+ }
+#endif
+
if (ia_valid & ATTR_SIZE) {
oa->o_size = attr->ia_size;
oa->o_valid |= OBD_MD_FLSIZE;
obd_flag valid)
{
memset(attr, 0, sizeof(*attr));
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
if (valid & OBD_MD_FLATIME) {
attr->ia_atime = oa->o_atime;
attr->ia_valid |= ATTR_ATIME;
attr->ia_ctime = oa->o_ctime;
attr->ia_valid |= ATTR_CTIME;
}
+#else
+ if (valid & OBD_MD_FLATIME) {
+ attr->ia_atime.tv_sec = oa->o_atime;
+ attr->ia_valid |= ATTR_ATIME;
+ }
+ if (valid & OBD_MD_FLMTIME) {
+ attr->ia_mtime.tv_sec = oa->o_mtime;
+ attr->ia_valid |= ATTR_MTIME;
+ }
+ if (valid & OBD_MD_FLCTIME) {
+ attr->ia_ctime.tv_sec = oa->o_ctime;
+ attr->ia_valid |= ATTR_CTIME;
+ }
+#endif
if (valid & OBD_MD_FLSIZE) {
attr->ia_size = oa->o_size;
attr->ia_valid |= ATTR_SIZE;
static inline void obdo_from_inode(struct obdo *dst, struct inode *src,
obd_flag valid)
{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
if (valid & OBD_MD_FLATIME)
dst->o_atime = src->i_atime;
if (valid & OBD_MD_FLMTIME)
dst->o_mtime = src->i_mtime;
if (valid & OBD_MD_FLCTIME)
dst->o_ctime = src->i_ctime;
+#else
+ if (valid & OBD_MD_FLATIME)
+ dst->o_atime = src->i_atime.tv_sec;
+ if (valid & OBD_MD_FLMTIME)
+ dst->o_mtime = src->i_mtime.tv_sec;
+ if (valid & OBD_MD_FLCTIME)
+ dst->o_ctime = src->i_ctime.tv_sec;
+#endif
if (valid & OBD_MD_FLSIZE)
dst->o_size = src->i_size;
if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
{
valid &= src->o_valid;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
if (valid & OBD_MD_FLATIME)
dst->i_atime = src->o_atime;
if (valid & OBD_MD_FLMTIME)
dst->i_mtime = src->o_mtime;
if (valid & OBD_MD_FLCTIME && src->o_ctime > dst->i_ctime)
dst->i_ctime = src->o_ctime;
+#else
+ if (valid & OBD_MD_FLATIME)
+ dst->i_atime.tv_sec = src->o_atime;
+ if (valid & OBD_MD_FLMTIME)
+ dst->i_mtime.tv_sec = src->o_mtime;
+ if (valid & OBD_MD_FLCTIME && src->o_ctime > dst->i_ctime.tv_sec)
+ dst->i_ctime.tv_sec = src->o_ctime;
+#endif
if (valid & OBD_MD_FLSIZE)
dst->i_size = src->o_size;
if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
}
-#ifdef __KERNEL__
/* I'm as embarrassed about this as you are.
*
* <shaver> // XXX do not look into _superhack with remaining eye
struct lustre_handle ocuc_conn;
};
-#endif
/* sysctl.c */
extern void obd_sysctl_init (void);
typedef __u8 class_uuid_t[16];
//int class_uuid_parse(struct obd_uuid in, class_uuid_t out);
void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out);
+
+/* lustre_peer.c */
+int lustre_uuid_to_peer(char *uuid, struct lustre_peer *peer);
+int class_add_uuid(char *uuid, __u64 nid, __u32 nal);
+int class_del_uuid (char *uuid);
+void class_init_uuidlist(void);
+void class_exit_uuidlist(void);
+
#endif /* __LINUX_OBD_CLASS_H */
#define FILTER_LR_CLIENT_START 8192
#define FILTER_LR_CLIENT_SIZE 128
+#define FILTER_SUBDIR_COUNT 32 /* set to zero for no subdirs */
+
#define FILTER_MOUNT_RECOV 2
#define FILTER_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
struct filter_server_data {
__u8 fsd_uuid[37]; /* server UUID */
__u8 fsd_uuid_padding[3]; /* unused */
- __u64 fsd_last_objid; /* last completed transaction ID */
+ __u64 fsd_last_objid; /* last created object ID */
__u64 fsd_last_rcvd; /* last completed transaction ID */
__u64 fsd_mount_count; /* FILTER incarnation number */
- __u8 fsd_padding[FILTER_LR_SERVER_SIZE - 64]; /* */
+ __u32 fsd_feature_compat; /* compatible feature flags */
+ __u32 fsd_feature_rocompat;/* read-only compatible feature flags */
+ __u32 fsd_feature_incompat;/* incompatible feature flags */
+ __u32 fsd_server_size; /* size of server data area */
+ __u32 fsd_client_start; /* start of per-client data area */
+ __u16 fsd_client_size; /* size of per-client data area */
+ __u16 fsd_subdir_count; /* number of subdirectories for objects */
+ __u8 fsd_padding[FILTER_LR_SERVER_SIZE - 88];
};
/* Data stored per client in the last_rcvd file. In le32 order. */
__u64 fcd_last_rcvd; /* last completed transaction ID */
__u64 fcd_mount_count; /* FILTER incarnation number */
__u64 fcd_last_xid; /* client RPC xid for the last transaction */
- __u8 fcd_padding[FILTER_LR_CLIENT_SIZE - 64];
+ __u8 fcd_padding[FILTER_LR_CLIENT_SIZE - 64];
};
+#ifndef OBD_FILTER_SAN_DEVICENAME
+#define OBD_FILTER_SAN_DEVICENAME "sanobdfilter"
+#endif
+
/* In-memory access to client data from OST struct */
struct filter_export_data {
struct list_head fed_open_head; /* files to close on disconnect */
spinlock_t fed_lock; /* protects fed_open_head */
struct filter_client_data *fed_fcd;
- int fed_lr_off;
+ loff_t fed_lr_off;
+ int fed_lr_idx;
};
/* file data for open files on OST */
};
struct filter_dentry_data {
+ obd_id fdd_objid;
atomic_t fdd_open_count;
int fdd_flags;
};
#ifndef _OBD_LOV_H__
#define _OBD_LOV_H__
-#ifdef __KERNEL__
-
#define OBD_LOV_DEVICENAME "lov"
void lov_unpackdesc(struct lov_desc *ld);
{
return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo);
}
-#endif
static inline int lov_mds_md_size(int stripes)
{
#define LUSTRE_OST_NAME "ost"
#define LUSTRE_OSC_NAME "osc"
+#define LUSTRE_SANOSC_NAME "sanosc"
+#define LUSTRE_SANOST_NAME "sanost"
/* ost/ost_pack.c */
void ost_pack_niobuf(void **tmp, __u64 offset, __u32 len, __u32 flags,
#ifndef _OBD_SUPPORT
#define _OBD_SUPPORT
+#ifdef __KERNEL__
#include <linux/config.h>
#include <linux/autoconf.h>
#include <linux/slab.h>
#include <linux/highmem.h>
+#else
+
+#endif
#include <linux/kp30.h>
/* global variables */
#define OBD_FAIL_OST_HANDLE_UNPACK 0x20d
#define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e
#define OBD_FAIL_OST_BRW_READ_BULK 0x20f
+#define OBD_FAIL_OST_SYNCFS_NET 0x210
#define OBD_FAIL_LDLM 0x300
#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301
#define OBD_FAIL_OSC_LOCK_BL_AST 0x403
#define OBD_FAIL_OSC_LOCK_CP_AST 0x404
+#define OBD_FAIL_PTLRPC 0x500
+#define OBD_FAIL_PTLRPC_ACK 0x501
+
/* preparation for a more advanced failure testbed (not functional yet) */
#define OBD_FAIL_MASK_SYS 0x0000FF00
#define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS)
} \
} while(0)
+#define fixme() CDEBUG(D_OTHER, "FIXME\n");
+
+#ifdef __KERNEL__
#include <linux/types.h>
#include <linux/blkdev.h>
-#define fixme() CDEBUG(D_OTHER, "FIXME\n");
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
#define ll_bdevname(a) __bdevname((a))
#define ll_bdevname(a) bdevname((a))
#endif
+
static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
{
if (OBD_FAIL_CHECK(id)) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
#ifdef CONFIG_DEV_RDONLY
CERROR("obd_fail_loc=%x, fail write operation on %s\n",
id, ll_bdevname(dev));
CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
id, ll_bdevname(dev));
#endif
+#else
+#ifdef CONFIG_DEV_RDONLY
+ CERROR("obd_fail_loc=%x, fail write operation on %s\n",
+ id, ll_bdevname(dev.value));
+ dev_set_rdonly(dev, 2);
+#else
+ CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
+ id, ll_bdevname(dev.value));
+#endif
+#endif
/* We set FAIL_ONCE because we never "un-fail" a device */
obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE;
}
}
+#endif /* __KERNEL__ */
+
#define OBD_ALLOC(ptr, size) \
do { \
void *lptr; \
} while (0)
#ifdef CONFIG_DEBUG_SLAB
-#define POISON(lptr, s) do {} while (0)
+#define POISON(lptr, c, s) do {} while (0)
#else
-#define POISON(lptr, s) memset(lptr, 0x5a, s)
+#define POISON(lptr, c, s) memset(lptr, c, s)
#endif
#define OBD_FREE(ptr, size) \
void *lptr = (ptr); \
int s = (size); \
LASSERT(lptr); \
- POISON(lptr, s); \
+ POISON(lptr, 0x5a, s); \
kfree(lptr); \
atomic_sub(s, &obd_memory); \
CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \
---- linux-chaos/fs/inode.c.b_io_export Wed Jan 29 16:56:15 2003
-+++ linux-chaos/fs/inode.c Wed Jan 29 16:56:27 2003
-@@ -66,7 +66,8 @@
+--- linux/fs/inode.c.b_io 2003-02-18 16:39:16.000000000 -0800
++++ linux/fs/inode.c 2003-02-18 16:39:45.000000000 -0800
+@@ -5,6 +5,7 @@
+ */
+
+ #include <linux/config.h>
++#include <linux/module.h>
+ #include <linux/fs.h>
+ #include <linux/string.h>
+ #include <linux/mm.h>
+@@ -66,7 +67,8 @@
* NOTE! You also have to own the lock if you change
* the i_state of an inode while it is in use..
*/
/*
* Statistics gathering..
---- linux-chaos/fs/Makefile.b_io_export Wed Jan 29 16:56:45 2003
-+++ linux-chaos/fs/Makefile Wed Jan 29 16:56:53 2003
+--- linux/fs/Makefile.b_io 2003-02-18 16:39:16.000000000 -0800
++++ linux/fs/Makefile 2003-02-18 16:39:37.000000000 -0800
@@ -7,7 +7,7 @@
O_TARGET := fs.o
mod-subdirs := nls
obj-y := open.o read_write.o devices.o file_table.o buffer.o \
---- linux-chaos/mm/filemap.c.b_io_export Wed Jan 29 16:50:39 2003
-+++ linux-chaos/mm/filemap.c Wed Jan 29 16:51:11 2003
-@@ -65,6 +65,7 @@
- * pagecache_lock
+--- linux/mm/vmscan.c.b_io 2003-02-18 16:39:16.000000000 -0800
++++ linux/mm/vmscan.c 2003-02-18 16:40:01.000000000 -0800
+@@ -14,6 +14,8 @@
+ * Multiqueue VM started 5.8.00, Rik van Riel.
*/
- spinlock_cacheline_t pagemap_lru_lock_cacheline = {SPIN_LOCK_UNLOCKED};
-+EXPORT_SYMBOL(pagemap_lru_lock_cacheline);
- #define CLUSTER_PAGES (1 << page_cluster)
- #define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster)
---- linux-chaos/mm/vmscan.c.b_io_export Wed Jan 29 16:51:58 2003
-+++ linux-chaos/mm/vmscan.c Wed Jan 29 16:55:16 2003
-@@ -839,6 +839,7 @@
++#include <linux/config.h>
++#include <linux/module.h>
+ #include <linux/slab.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/swap.h>
+@@ -837,6 +839,7 @@
set_current_state(TASK_RUNNING);
remove_wait_queue(&kswapd_done, &wait);
}
static void wakeup_memwaiters(void)
{
---- linux-chaos/mm/Makefile.b_io_export Wed Jan 29 16:52:46 2003
-+++ linux-chaos/mm/Makefile Wed Jan 29 16:54:23 2003
+--- linux/mm/Makefile.b_io 2003-02-18 16:39:16.000000000 -0800
++++ linux/mm/Makefile 2003-02-18 16:39:37.000000000 -0800
@@ -9,7 +9,7 @@
O_TARGET := mm.o
-export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o
-+export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o vmscan.c
++export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o vmscan.o
obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
-# This is a BitKeeper generated patch for the following project:
-# Project Name: Linux kernel tree
-# This patch format is intended for GNU patch command version 2.5 or higher.
-# This patch includes the following deltas:
-# ChangeSet 1.810 -> 1.811
-# kernel/ksyms.c 1.149 -> 1.150
-# fs/driverfs/inode.c 1.52 -> 1.53
-# include/linux/fs.h 1.175 -> 1.176
-# include/linux/namei.h 1.3 -> 1.4
-# fs/namei.c 1.56 -> 1.57
-# fs/nfsd/vfs.c 1.44 -> 1.45
-# arch/um/kernel/mem.c 1.5 -> 1.6
-# net/unix/af_unix.c 1.29 -> 1.30
-# mm/slab.c 1.33 -> 1.34
-# fs/sysfs/inode.c 1.55 -> 1.56
-# include/linux/slab.h 1.13 -> 1.14
-# include/linux/dcache.h 1.19 -> 1.20
-#
-# The following is the BitKeeper ChangeSet Log
-# --------------------------------------------
-# 02/10/20 braam@clusterfs.com 1.811
-# Changes for Lustre
-# --------------------------------------------
-#
-diff -Nru a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
---- a/arch/um/kernel/mem.c Sun Dec 8 02:49:38 2002
-+++ b/arch/um/kernel/mem.c Sun Dec 8 02:49:38 2002
-@@ -656,6 +656,22 @@
+ arch/um/kernel/mem.c | 18 +++++++++++-
+ fs/namei.c | 71 +++++++++++++++++++++++++++++++++++--------------
+ fs/nfsd/vfs.c | 2 -
+ fs/sysfs/inode.c | 2 -
+ include/linux/dcache.h | 27 ++++++++++++++++++
+ include/linux/fs.h | 20 +++++++++++++
+ include/linux/namei.h | 3 +-
+ include/linux/slab.h | 1
+ kernel/ksyms.c | 7 ++++
+ mm/slab.c | 5 +++
+ net/unix/af_unix.c | 2 -
+ 11 files changed, 132 insertions(+), 26 deletions(-)
+
+--- linux-2.5.59/arch/um/kernel/mem.c~lustre-2.5 2003-02-22 21:56:58.000000000 +0800
++++ linux-2.5.59-root/arch/um/kernel/mem.c 2003-02-22 21:56:58.000000000 +0800
+@@ -639,6 +639,22 @@ struct page *pte_mem_map(pte_t pte)
return(phys_mem_map(pte_val(pte)));
}
struct mem_region *page_region(struct page *page, int *index_out)
{
int i;
-@@ -743,7 +759,7 @@
+@@ -726,7 +742,7 @@ extern unsigned long region_pa(void *vir
(addr <= region->start + region->len))
return(mk_phys(addr - region->start, i));
}
return(0);
}
-diff -Nru a/fs/driverfs/inode.c b/fs/driverfs/inode.c
---- a/fs/driverfs/inode.c Sun Dec 8 02:49:38 2002
-+++ b/fs/driverfs/inode.c Sun Dec 8 02:49:38 2002
-@@ -523,7 +523,7 @@
- qstr.name = name;
- qstr.len = strlen(name);
- qstr.hash = full_name_hash(name,qstr.len);
-- return lookup_hash(&qstr,parent);
-+ return lookup_hash(&qstr,parent, NULL);
- }
-
- /**
-diff -Nru a/fs/namei.c b/fs/namei.c
---- a/fs/namei.c Sun Dec 8 02:49:38 2002
-+++ b/fs/namei.c Sun Dec 8 02:49:38 2002
-@@ -265,6 +265,9 @@
+--- linux-2.5.59/fs/namei.c~lustre-2.5 2003-02-22 21:56:58.000000000 +0800
++++ linux-2.5.59-root/fs/namei.c 2003-02-22 21:56:58.000000000 +0800
+@@ -265,6 +265,9 @@ int deny_write_access(struct file * file
void path_release(struct nameidata *nd)
{
dput(nd->dentry);
mntput(nd->mnt);
}
-@@ -273,10 +276,18 @@
+@@ -273,10 +276,18 @@ void path_release(struct nameidata *nd)
* Internal lookup() using the new generic dcache.
* SMP-safe
*/
if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
dput(dentry);
-@@ -351,7 +362,7 @@
+@@ -351,7 +362,7 @@ ok:
* make sure that nobody added the entry to the dcache in the meantime..
* SMP-safe
*/
{
struct dentry * result;
struct inode *dir = parent->d_inode;
-@@ -369,7 +380,10 @@
+@@ -369,7 +380,10 @@ static struct dentry * real_lookup(struc
struct dentry * dentry = d_alloc(parent, name);
result = ERR_PTR(-ENOMEM);
if (dentry) {
if (result)
dput(dentry);
else {
-@@ -391,6 +405,12 @@
+@@ -391,6 +405,12 @@ static struct dentry * real_lookup(struc
dput(result);
result = ERR_PTR(-ENOENT);
}
}
return result;
}
-@@ -534,7 +554,7 @@
+@@ -534,7 +554,7 @@ dcache_miss:
unlock_nd(nd);
need_lookup:
if (IS_ERR(dentry))
goto fail;
mntget(mnt);
-@@ -684,7 +704,7 @@
+@@ -684,7 +704,7 @@ int link_path_walk(const char * name, st
nd->dentry = next.dentry;
}
err = -ENOTDIR;
break;
continue;
/* here ends the main loop */
-@@ -737,7 +757,8 @@
+@@ -737,7 +757,8 @@ last_component:
break;
if (lookup_flags & LOOKUP_DIRECTORY) {
err = -ENOTDIR;
break;
}
goto return_base;
-@@ -886,7 +907,8 @@
+@@ -886,7 +907,8 @@ int path_lookup(const char *name, unsign
* needs parent already locked. Doesn't follow mounts.
* SMP-safe.
*/
{
struct dentry * dentry;
struct inode *inode;
-@@ -909,13 +931,16 @@
+@@ -909,13 +931,16 @@ struct dentry * lookup_hash(struct qstr
goto out;
}
+ dentry = inode->i_op->lookup(inode, new);
if (!dentry) {
dentry = new;
- security_ops->inode_post_lookup(inode, dentry);
-@@ -927,7 +952,7 @@
+ security_inode_post_lookup(inode, dentry);
+@@ -927,7 +952,7 @@ out:
}
/* SMP-safe */
{
unsigned long hash;
struct qstr this;
-@@ -947,11 +972,16 @@
+@@ -947,11 +972,16 @@ struct dentry * lookup_one_len(const cha
}
this.hash = end_name_hash(hash);
/*
* namei()
*
-@@ -1268,7 +1298,7 @@
+@@ -1268,7 +1298,7 @@ int open_namei(const char * pathname, in
dir = nd->dentry;
down(&dir->d_inode->i_sem);
do_last:
error = PTR_ERR(dentry);
-@@ -1370,7 +1400,7 @@
+@@ -1371,7 +1401,7 @@ do_link:
}
dir = nd->dentry;
down(&dir->d_inode->i_sem);
putname(nd->last.name);
goto do_last;
}
-@@ -1384,7 +1414,7 @@
+@@ -1385,7 +1415,7 @@ static struct dentry *lookup_create(stru
dentry = ERR_PTR(-EEXIST);
if (nd->last_type != LAST_NORM)
goto fail;
if (IS_ERR(dentry))
goto fail;
if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1614,7 +1644,7 @@
+@@ -1617,7 +1647,7 @@ asmlinkage long sys_rmdir(const char * p
goto exit1;
}
down(&nd.dentry->d_inode->i_sem);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_rmdir(nd.dentry->d_inode, dentry);
-@@ -1675,7 +1705,7 @@
+@@ -1677,7 +1707,7 @@ asmlinkage long sys_unlink(const char *
if (nd.last_type != LAST_NORM)
goto exit1;
down(&nd.dentry->d_inode->i_sem);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
-@@ -1949,7 +1979,8 @@
+@@ -1951,7 +1981,8 @@ int vfs_rename_other(struct inode *old_d
}
int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
-@@ -2020,7 +2051,7 @@
+@@ -2022,7 +2053,7 @@ static inline int do_rename(const char *
trap = lock_rename(new_dir, old_dir);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
-@@ -2040,7 +2071,7 @@
+@@ -2042,7 +2073,7 @@ static inline int do_rename(const char *
error = -EINVAL;
if (old_dentry == trap)
goto exit4;
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto exit4;
-@@ -2050,7 +2081,7 @@
+@@ -2052,7 +2083,7 @@ static inline int do_rename(const char *
goto exit5;
error = vfs_rename(old_dir->d_inode, old_dentry,
exit5:
dput(new_dentry);
exit4:
-diff -Nru a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
---- a/fs/nfsd/vfs.c Sun Dec 8 02:49:38 2002
-+++ b/fs/nfsd/vfs.c Sun Dec 8 02:49:38 2002
-@@ -1292,7 +1292,7 @@
+--- linux-2.5.59/fs/nfsd/vfs.c~lustre-2.5 2003-02-22 21:56:58.000000000 +0800
++++ linux-2.5.59-root/fs/nfsd/vfs.c 2003-02-22 21:56:58.000000000 +0800
+@@ -1337,7 +1337,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
err = nfserr_perm;
} else
#endif
if (!err && EX_ISSYNC(tfhp->fh_export)) {
nfsd_sync_dir(tdentry);
nfsd_sync_dir(fdentry);
-diff -Nru a/fs/sysfs/inode.c b/fs/sysfs/inode.c
---- a/fs/sysfs/inode.c Sun Dec 8 02:49:39 2002
-+++ b/fs/sysfs/inode.c Sun Dec 8 02:49:39 2002
-@@ -471,7 +471,7 @@
+--- linux-2.5.59/fs/sysfs/inode.c~lustre-2.5 2003-02-22 21:56:58.000000000 +0800
++++ linux-2.5.59-root/fs/sysfs/inode.c 2003-02-22 21:56:58.000000000 +0800
+@@ -539,7 +539,7 @@ static struct dentry * get_dentry(struct
qstr.name = name;
qstr.len = strlen(name);
qstr.hash = full_name_hash(name,qstr.len);
+ return lookup_hash(&qstr,parent,NULL);
}
- /**
-diff -Nru a/include/linux/dcache.h b/include/linux/dcache.h
---- a/include/linux/dcache.h Sun Dec 8 02:49:39 2002
-+++ b/include/linux/dcache.h Sun Dec 8 02:49:39 2002
-@@ -9,6 +9,24 @@
- #include <linux/spinlock.h>
- #include <asm/page.h> /* for BUG() */
-
-+#define IT_OPEN (1)
-+#define IT_CREAT (1<<1)
-+#define IT_MKDIR (1<<2)
-+#define IT_LINK (1<<3)
-+#define IT_LINK2 (1<<4)
-+#define IT_SYMLINK (1<<5)
-+#define IT_UNLINK (1<<6)
-+#define IT_RMDIR (1<<7)
-+#define IT_RENAME (1<<8)
-+#define IT_RENAME2 (1<<9)
-+#define IT_READDIR (1<<10)
-+#define IT_GETATTR (1<<11)
-+#define IT_SETATTR (1<<12)
-+#define IT_READLINK (1<<13)
-+#define IT_MKNOD (1<<14)
-+#define IT_LOOKUP (1<<15)
+
+--- linux-2.5.59/include/linux/dcache.h~lustre-2.5 2003-02-22 21:56:58.000000000 +0800
++++ linux-2.5.59-root/include/linux/dcache.h 2003-02-22 22:02:55.000000000 +0800
+@@ -11,6 +11,27 @@
+
+ struct vfsmount;
+
++#define IT_OPEN (1)
++#define IT_CREAT (1<<1)
++#define IT_READDIR (1<<2)
++#define IT_GETATTR (1<<3)
++#define IT_LOOKUP (1<<4)
++#define IT_UNLINK (1<<5)
++
++
++struct lookup_intent {
++ int it_op;
++ int it_mode;
++ int it_flags;
++ int it_disposition;
++ int it_status;
++ struct iattr *it_iattr;
++ __u64 it_lock_handle[2];
++ int it_lock_mode;
++ void *it_data;
++};
+
+
/*
* linux/include/linux/dcache.h
*
-@@ -30,6 +48,8 @@
+@@ -32,6 +53,8 @@ struct qstr {
unsigned int hash;
};
struct dentry_stat_t {
int nr_dentry;
int nr_unused;
-@@ -79,6 +99,7 @@
+@@ -81,6 +104,7 @@ struct dentry {
struct list_head d_subdirs; /* our children */
struct list_head d_alias; /* inode alias list */
int d_mounted;
struct qstr d_name;
unsigned long d_time; /* used by d_revalidate */
struct dentry_operations *d_op;
-@@ -96,6 +117,8 @@
+@@ -100,6 +124,8 @@ struct dentry_operations {
int (*d_delete)(struct dentry *);
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
};
/* the dentry parameter passed to d_hash and d_compare is the parent
-diff -Nru a/include/linux/fs.h b/include/linux/fs.h
---- a/include/linux/fs.h Sun Dec 8 02:49:38 2002
-+++ b/include/linux/fs.h Sun Dec 8 02:49:38 2002
-@@ -700,7 +700,7 @@
+@@ -139,6 +165,7 @@ d_iput: no no yes
+ */
+
+ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
++#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */
+
+ extern spinlock_t dcache_lock;
+ extern rwlock_t dparent_lock;
+--- linux-2.5.59/include/linux/fs.h~lustre-2.5 2003-02-22 21:56:58.000000000 +0800
++++ linux-2.5.59-root/include/linux/fs.h 2003-02-22 22:52:58.000000000 +0800
+@@ -234,6 +234,9 @@ typedef int (get_blocks_t)(struct inode
+ #define ATTR_ATTR_FLAG 1024
+ #define ATTR_KILL_SUID 2048
+ #define ATTR_KILL_SGID 4096
++#define ATTR_RAW 8192 /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN 16384 /* called from open path, ie O_TRUNC */
++
+
+ /*
+ * This is the Inode Attributes structure, used for notify_change(). It
+@@ -676,7 +679,7 @@ extern int vfs_symlink(struct inode *, s
extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
extern int vfs_rmdir(struct inode *, struct dentry *);
extern int vfs_unlink(struct inode *, struct dentry *);
/*
* File types
-@@ -769,6 +769,8 @@
+@@ -762,19 +765,33 @@ struct file_operations {
struct inode_operations {
int (*create) (struct inode *,struct dentry *,int);
struct dentry * (*lookup) (struct inode *,struct dentry *);
+ struct dentry * (*lookup2) (struct inode *,struct dentry *,
+ struct lookup_intent *);
int (*link) (struct dentry *,struct inode *,struct dentry *);
++ int (*link2) (struct inode *,struct inode *, const char *, int);
int (*unlink) (struct inode *,struct dentry *);
++ int (*unlink2) (struct inode *, const char *, int);
int (*symlink) (struct inode *,struct dentry *,const char *);
-@@ -995,6 +997,7 @@
++ int (*symlink2) (struct inode *, const char *, int, const char *);
+ int (*mkdir) (struct inode *,struct dentry *,int);
++ int (*mkdir2) (struct inode *, const char *, int,int);
+ int (*rmdir) (struct inode *,struct dentry *);
++ int (*rmdir2) (struct inode *, const char *, int);
+ int (*mknod) (struct inode *,struct dentry *,int,dev_t);
++ int (*mknod2) (struct inode *, const char *, int,int,int);
+ int (*rename) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *);
++ int (*rename2) (struct inode *, struct inode *,
++ const char *oldname, int oldlen,
++ const char *newname, int newlen);
+ int (*readlink) (struct dentry *, char *,int);
+ int (*follow_link) (struct dentry *, struct nameidata *);
++ int (*follow_link2) (struct dentry *, struct nameidata *,
++ struct lookup_intent *it);
+ void (*truncate) (struct inode *);
+ int (*permission) (struct inode *, int);
+ int (*setattr) (struct dentry *, struct iattr *);
++ int (*setattr_raw) (struct inode *, struct iattr *);
+ int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
+ int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
+ ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
+@@ -987,6 +1004,7 @@ extern int register_filesystem(struct fi
extern int unregister_filesystem(struct file_system_type *);
extern struct vfsmount *kern_mount(struct file_system_type *);
extern int may_umount(struct vfsmount *);
+struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data);
extern long do_mount(char *, char *, char *, unsigned long, void *);
- #define kern_umount mntput
-diff -Nru a/include/linux/namei.h b/include/linux/namei.h
---- a/include/linux/namei.h Sun Dec 8 02:49:38 2002
-+++ b/include/linux/namei.h Sun Dec 8 02:49:38 2002
-@@ -5,6 +5,17 @@
-
- struct vfsmount;
-
-+struct lookup_intent {
-+ int it_op;
-+ int it_mode;
-+ int it_disposition;
-+ int it_status;
-+ struct iattr *it_iattr;
-+ __u64 it_lock_handle[2];
-+ int it_lock_mode;
-+ void *it_data;
-+};
-+
- struct nameidata {
- struct dentry *dentry;
- struct vfsmount *mnt;
-@@ -13,6 +24,7 @@
+ extern int vfs_statfs(struct super_block *, struct statfs *);
+--- linux-2.5.59/include/linux/namei.h~lustre-2.5 2003-02-22 21:56:58.000000000 +0800
++++ linux-2.5.59-root/include/linux/namei.h 2003-02-22 21:56:58.000000000 +0800
+@@ -13,6 +13,7 @@ struct nameidata {
int last_type;
struct dentry *old_dentry;
struct vfsmount *old_mnt;
};
/*
-@@ -46,7 +58,7 @@
+@@ -46,7 +47,7 @@ extern int FASTCALL(link_path_walk(const
extern void path_release(struct nameidata *);
extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
extern int follow_down(struct vfsmount **, struct dentry **);
extern int follow_up(struct vfsmount **, struct dentry **);
-diff -Nru a/include/linux/slab.h b/include/linux/slab.h
---- a/include/linux/slab.h Sun Dec 8 02:49:39 2002
-+++ b/include/linux/slab.h Sun Dec 8 02:49:39 2002
-@@ -56,6 +56,7 @@
+--- linux-2.5.59/include/linux/slab.h~lustre-2.5 2003-02-22 21:56:58.000000000 +0800
++++ linux-2.5.59-root/include/linux/slab.h 2003-02-22 21:56:58.000000000 +0800
+@@ -56,6 +56,7 @@ extern int kmem_cache_destroy(kmem_cache
extern int kmem_cache_shrink(kmem_cache_t *);
extern void *kmem_cache_alloc(kmem_cache_t *, int);
extern void kmem_cache_free(kmem_cache_t *, void *);
extern unsigned int kmem_cache_size(kmem_cache_t *);
extern void *kmalloc(size_t, int);
-diff -Nru a/kernel/ksyms.c b/kernel/ksyms.c
---- a/kernel/ksyms.c Sun Dec 8 02:49:38 2002
-+++ b/kernel/ksyms.c Sun Dec 8 02:49:38 2002
-@@ -365,6 +365,13 @@
- EXPORT_SYMBOL(tty_get_baud_rate);
- EXPORT_SYMBOL(do_SAK);
+--- linux-2.5.59/kernel/ksyms.c~lustre-2.5 2003-02-22 21:56:58.000000000 +0800
++++ linux-2.5.59-root/kernel/ksyms.c 2003-02-22 21:56:58.000000000 +0800
+@@ -376,6 +376,7 @@ EXPORT_SYMBOL(unregister_filesystem);
+ EXPORT_SYMBOL(kern_mount);
+ EXPORT_SYMBOL(__mntput);
+ EXPORT_SYMBOL(may_umount);
++EXPORT_SYMBOL(reparent_to_init);
+
+ /* executable format registration */
+ EXPORT_SYMBOL(register_binfmt);
+@@ -406,6 +407,12 @@ EXPORT_SYMBOL(request_irq);
+ EXPORT_SYMBOL(free_irq);
+ EXPORT_SYMBOL(irq_stat);
+/* lustre */
-+EXPORT_SYMBOL(panic_notifier_list);
-+//EXPORT_SYMBOL(pagecache_lock_cacheline);
+EXPORT_SYMBOL(do_kern_mount);
+EXPORT_SYMBOL(exit_files);
+EXPORT_SYMBOL(kmem_cache_validate);
+
- /* filesystem registration */
- EXPORT_SYMBOL(register_filesystem);
- EXPORT_SYMBOL(unregister_filesystem);
-diff -Nru a/mm/slab.c b/mm/slab.c
---- a/mm/slab.c Sun Dec 8 02:49:39 2002
-+++ b/mm/slab.c Sun Dec 8 02:49:39 2002
-@@ -1236,6 +1236,59 @@
- * Called with the cache-lock held.
- */
-
-+extern struct page *check_get_page(unsigned long kaddr);
-+struct page *page_mem_map(struct page *page);
-+static int kmem_check_cache_obj (kmem_cache_t * cachep,
-+ slab_t *slabp, void * objp)
-+{
-+ int i;
-+ unsigned int objnr;
-+
-+#if DEBUG
-+ if (cachep->flags & SLAB_RED_ZONE) {
-+ objp -= BYTES_PER_WORD;
-+ if ( *(unsigned long *)objp != RED_MAGIC2)
-+ /* Either write before start, or a double free. */
-+ return 0;
-+ if (*(unsigned long *)(objp+cachep->objsize -
-+ BYTES_PER_WORD) != RED_MAGIC2)
-+ /* Either write past end, or a double free. */
-+ return 0;
-+ }
-+#endif
-+
-+ objnr = (objp-slabp->s_mem)/cachep->objsize;
-+ if (objnr >= cachep->num)
-+ return 0;
-+ if (objp != slabp->s_mem + objnr*cachep->objsize)
-+ return 0;
-+
-+ /* Check slab's freelist to see if this obj is there. */
-+ for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
-+ if (i == objnr)
-+ return 0;
-+ }
-+ return 1;
-+}
-+
+
+ /* waitqueue handling */
+ EXPORT_SYMBOL(add_wait_queue);
+ EXPORT_SYMBOL(add_wait_queue_exclusive);
+--- linux-2.5.59/mm/slab.c~lustre-2.5 2003-02-22 21:56:58.000000000 +0800
++++ linux-2.5.59-root/mm/slab.c 2003-02-22 21:56:58.000000000 +0800
+@@ -1793,6 +1793,11 @@ static inline void __cache_free (kmem_ca
+ }
+ }
+
+int kmem_cache_validate(kmem_cache_t *cachep, void *objp)
+{
-+ struct page *page = check_get_page((unsigned long)objp);
-+
-+ if (!page_mem_map(page))
-+ return 0;
-+
-+ if (!PageSlab(page))
-+ return 0;
-+
-+ /* XXX check for freed slab objects ? */
-+ if (!kmem_check_cache_obj(cachep, GET_PAGE_SLAB(page), objp))
-+ return 0;
-+
-+ return (cachep == GET_PAGE_CACHE(page));
++ return 1;
+}
+
- #if DEBUG
- static int kmem_extra_free_checks (kmem_cache_t * cachep,
- slab_t *slabp, void * objp)
-diff -Nru a/net/unix/af_unix.c b/net/unix/af_unix.c
---- a/net/unix/af_unix.c Sun Dec 8 02:49:38 2002
-+++ b/net/unix/af_unix.c Sun Dec 8 02:49:38 2002
-@@ -715,7 +715,7 @@
+ /**
+ * kmem_cache_alloc - Allocate an object
+ * @cachep: The cache to allocate from.
+--- linux-2.5.59/net/unix/af_unix.c~lustre-2.5 2003-02-22 21:56:58.000000000 +0800
++++ linux-2.5.59-root/net/unix/af_unix.c 2003-02-22 21:56:58.000000000 +0800
+@@ -719,7 +719,7 @@ static int unix_bind(struct socket *sock
/*
* Do the final lookup.
*/
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_mknod_unlock;
+
+_
1 files changed, 1 insertion(+)
--- /dev/null Fri Aug 30 17:31:37 2002
-+++ linux-2.4.18-18.8.0-l7-root/include/linux/lustre_version.h Mon Jan 20 12:24:45 2003
++++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h Thu Feb 13 07:58:33 2003
@@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 10
++#define LUSTRE_KERNEL_VERSION 13
_
- fs/dcache.c | 8 +
- fs/namei.c | 288 ++++++++++++++++++++++++++++++++++++++++---------
+ fs/dcache.c | 20 ++
+ fs/exec.c | 18 +-
+ fs/namei.c | 338 ++++++++++++++++++++++++++++++++++++++++---------
fs/nfsd/vfs.c | 2
- fs/open.c | 53 +++++++--
- fs/stat.c | 9 +
- include/linux/dcache.h | 25 ++++
- include/linux/fs.h | 22 +++
+ fs/open.c | 120 +++++++++++++++--
+ fs/stat.c | 8 -
+ include/linux/dcache.h | 28 ++++
+ include/linux/fs.h | 27 +++
kernel/ksyms.c | 1
- 8 files changed, 345 insertions(+), 63 deletions(-)
+ 9 files changed, 478 insertions(+), 84 deletions(-)
---- linux-2.4.18-49chaos-lustre9/fs/dcache.c~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
-+++ linux-2.4.18-49chaos-lustre9-root/fs/dcache.c Wed Jan 29 12:43:32 2003
+--- linux-2.4.18-18.8.0-l12/fs/dcache.c~vfs_intent-2.4.18-18 Wed Feb 26 16:54:17 2003
++++ linux-2.4.18-18.8.0-l12-phil/fs/dcache.c Wed Feb 26 17:31:36 2003
@@ -186,6 +186,13 @@ int d_invalidate(struct dentry * dentry)
spin_unlock(&dcache_lock);
return 0;
INIT_LIST_HEAD(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
---- linux-2.4.18-49chaos-lustre9/fs/namei.c~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
-+++ linux-2.4.18-49chaos-lustre9-root/fs/namei.c Wed Feb 5 16:23:06 2003
+@@ -859,13 +867,19 @@ void d_delete(struct dentry * dentry)
+ * Adds a dentry to the hash according to its name.
+ */
+
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+ struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+ if (!list_empty(&entry->d_hash)) BUG();
+- spin_lock(&dcache_lock);
++ if (lock) spin_lock(&dcache_lock);
+ list_add(&entry->d_hash, list);
+- spin_unlock(&dcache_lock);
++ if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++ __d_rehash(entry, 1);
+ }
+
+ #define do_switch(x,y) do { \
+--- linux-2.4.18-18.8.0-l12/fs/namei.c~vfs_intent-2.4.18-18 Wed Feb 26 16:54:17 2003
++++ linux-2.4.18-18.8.0-l12-phil/fs/namei.c Wed Feb 26 16:54:17 2003
@@ -94,6 +94,13 @@
* XEmacs seems to be relying on it...
*/
path_release(nd);
return -ELOOP;
}
-@@ -449,7 +482,8 @@ static inline void follow_dotdot(struct
+@@ -381,15 +414,26 @@ int follow_up(struct vfsmount **mnt, str
+ return __follow_up(mnt, dentry);
+ }
+
+-static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
++static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry,
++ struct lookup_intent *it)
+ {
+ struct vfsmount *mounted;
+
+ spin_lock(&dcache_lock);
+ mounted = lookup_mnt(*mnt, *dentry);
+ if (mounted) {
++ int opc = 0, mode = 0;
+ *mnt = mntget(mounted);
+ spin_unlock(&dcache_lock);
++ if (it) {
++ opc = it->it_op;
++ mode = it->it_mode;
++ }
++ intent_release(*dentry, it);
++ if (it) {
++ it->it_op = opc;
++ it->it_mode = mode;
++ }
+ dput(*dentry);
+ mntput(mounted->mnt_parent);
+ *dentry = dget(mounted->mnt_root);
+@@ -401,7 +445,7 @@ static inline int __follow_down(struct v
+
+ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
+ {
+- return __follow_down(mnt,dentry);
++ return __follow_down(mnt,dentry,NULL);
+ }
+
+ static inline void follow_dotdot(struct nameidata *nd)
+@@ -437,7 +481,7 @@ static inline void follow_dotdot(struct
+ mntput(nd->mnt);
+ nd->mnt = parent;
+ }
+- while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry))
++ while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry, NULL))
+ ;
+ }
+
+@@ -449,7 +493,8 @@ static inline void follow_dotdot(struct
*
* We expect 'base' to be positive and a directory.
*/
{
struct dentry *dentry;
struct inode *inode;
-@@ -526,12 +560,12 @@ int link_path_walk(const char * name, st
+@@ -526,18 +571,18 @@ int link_path_walk(const char * name, st
break;
}
/* This does the actual lookups.. */
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
-@@ -548,8 +582,8 @@ int link_path_walk(const char * name, st
+ }
+ /* Check mountpoints.. */
+- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
++ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL))
+ ;
+
+ err = -ENOENT;
+@@ -548,8 +593,8 @@ int link_path_walk(const char * name, st
if (!inode->i_op)
goto out_dput;
dput(dentry);
if (err)
goto return_err;
-@@ -565,7 +599,7 @@ int link_path_walk(const char * name, st
+@@ -565,7 +610,7 @@ int link_path_walk(const char * name, st
nd->dentry = dentry;
}
err = -ENOTDIR;
break;
continue;
/* here ends the main loop */
-@@ -592,12 +626,12 @@ last_component:
+@@ -592,22 +637,23 @@ last_component:
if (err < 0)
break;
}
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
-@@ -606,8 +640,9 @@ last_component:
+ }
+- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
++ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it))
;
inode = dentry->d_inode;
if ((lookup_flags & LOOKUP_FOLLOW)
dput(dentry);
if (err)
goto return_err;
-@@ -621,7 +656,8 @@ last_component:
+@@ -621,7 +667,8 @@ last_component:
goto no_inode;
if (lookup_flags & LOOKUP_DIRECTORY) {
err = -ENOTDIR;
break;
}
goto return_base;
-@@ -658,15 +694,28 @@ out_dput:
+@@ -658,15 +705,28 @@ out_dput:
dput(dentry);
break;
}
}
/* SMP-safe */
-@@ -751,6 +800,17 @@ walk_init_root(const char *name, struct
+@@ -751,6 +811,17 @@ walk_init_root(const char *name, struct
}
/* SMP-safe */
int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
{
int error = 0;
-@@ -779,7 +839,8 @@ int path_init(const char *name, unsigned
+@@ -779,7 +850,8 @@ int path_init(const char *name, unsigned
* needs parent already locked. Doesn't follow mounts.
* SMP-safe.
*/
{
struct dentry * dentry;
struct inode *inode;
-@@ -802,13 +863,16 @@ struct dentry * lookup_hash(struct qstr
+@@ -802,13 +874,16 @@ struct dentry * lookup_hash(struct qstr
goto out;
}
dentry = inode->i_op->lookup(inode, new);
unlock_kernel();
if (!dentry)
-@@ -820,6 +884,12 @@ out:
+@@ -820,6 +895,12 @@ out:
return dentry;
}
/* SMP-safe */
struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
{
-@@ -841,7 +911,7 @@ struct dentry * lookup_one_len(const cha
+@@ -841,7 +922,7 @@ struct dentry * lookup_one_len(const cha
}
this.hash = end_name_hash(hash);
access:
return ERR_PTR(-EACCES);
}
-@@ -872,6 +942,23 @@ int __user_walk(const char *name, unsign
+@@ -872,6 +953,23 @@ int __user_walk(const char *name, unsign
return err;
}
/*
* It's inline, so penalty for filesystems that don't use sticky bit is
* minimal.
-@@ -1045,14 +1132,17 @@ int may_open(struct nameidata *nd, int a
+@@ -1045,14 +1143,17 @@ int may_open(struct nameidata *nd, int a
return get_lease(inode, flag);
}
int count = 0;
if ((flag+1) & O_ACCMODE)
-@@ -1066,7 +1156,7 @@ struct file *filp_open(const char * path
+@@ -1066,7 +1167,7 @@ struct file *filp_open(const char * path
* The simplest case - just a plain lookup.
*/
if (!(flag & O_CREAT)) {
if (error)
return ERR_PTR(error);
dentry = nd.dentry;
-@@ -1076,6 +1166,8 @@ struct file *filp_open(const char * path
+@@ -1076,6 +1177,8 @@ struct file *filp_open(const char * path
/*
* Create - we need to know the parent.
*/
error = path_lookup(pathname, LOOKUP_PARENT, &nd);
if (error)
return ERR_PTR(error);
-@@ -1091,7 +1183,7 @@ struct file *filp_open(const char * path
+@@ -1091,7 +1194,7 @@ struct file *filp_open(const char * path
dir = nd.dentry;
down(&dir->d_inode->i_sem);
do_last:
error = PTR_ERR(dentry);
-@@ -1100,6 +1192,7 @@ do_last:
+@@ -1100,6 +1203,7 @@ do_last:
goto exit;
}
/* Negative dentry, just create the file */
if (!dentry->d_inode) {
error = vfs_create(dir->d_inode, dentry,
-@@ -1134,7 +1227,8 @@ do_last:
+@@ -1129,12 +1233,13 @@ do_last:
+ error = -ELOOP;
+ if (flag & O_NOFOLLOW)
+ goto exit_dput;
+- while (__follow_down(&nd.mnt,&dentry) && d_mountpoint(dentry));
++ while (__follow_down(&nd.mnt,&dentry,&it) && d_mountpoint(dentry));
+ }
error = -ENOENT;
if (!dentry->d_inode)
goto exit_dput;
goto do_link;
dput(nd.dentry);
-@@ -1149,11 +1243,13 @@ ok:
+@@ -1149,11 +1254,13 @@ ok:
if (!S_ISREG(nd.dentry->d_inode->i_mode))
open_flags &= ~O_TRUNC;
path_release(&nd);
return ERR_PTR(error);
-@@ -1172,7 +1268,12 @@ do_link:
+@@ -1172,10 +1279,15 @@ do_link:
* are done. Procfs-like symlinks just set LAST_BIND.
*/
UPDATE_ATIME(dentry->d_inode);
+ intent_release(dentry, &it);
dput(dentry);
if (error)
- return error;
-@@ -1194,13 +1295,15 @@ do_link:
+- return error;
++ return ERR_PTR(error);
+ if (nd.last_type == LAST_BIND) {
+ dentry = nd.dentry;
+ goto ok;
+@@ -1194,13 +1306,15 @@ do_link:
}
dir = nd.dentry;
down(&dir->d_inode->i_sem);
{
struct dentry *dentry;
-@@ -1208,7 +1311,7 @@ static struct dentry *lookup_create(stru
+@@ -1208,7 +1322,7 @@ static struct dentry *lookup_create(stru
dentry = ERR_PTR(-EEXIST);
if (nd->last_type != LAST_NORM)
goto fail;
if (IS_ERR(dentry))
goto fail;
if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1264,7 +1367,19 @@ asmlinkage long sys_mknod(const char * f
+@@ -1264,7 +1378,19 @@ asmlinkage long sys_mknod(const char * f
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
+ nd.last.name,
+ nd.last.len,
+ mode, dev);
-+ /* the file system want to use normal vfs path now */
++ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out2;
+ }
error = PTR_ERR(dentry);
mode &= ~current->fs->umask;
-@@ -1285,6 +1400,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1285,6 +1411,7 @@ asmlinkage long sys_mknod(const char * f
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
putname(tmp);
-@@ -1332,7 +1448,17 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1332,7 +1459,17 @@ asmlinkage long sys_mkdir(const char * p
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
+ nd.last.name,
+ nd.last.len,
+ mode);
-+ /* the file system want to use normal vfs path now */
++ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out2;
+ }
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_mkdir(nd.dentry->d_inode, dentry,
-@@ -1340,6 +1466,7 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1340,6 +1477,7 @@ asmlinkage long sys_mkdir(const char * p
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
putname(tmp);
-@@ -1440,8 +1567,17 @@ asmlinkage long sys_rmdir(const char * p
+@@ -1440,8 +1578,33 @@ asmlinkage long sys_rmdir(const char * p
error = -EBUSY;
goto exit1;
}
+ if (nd.dentry->d_inode->i_op->rmdir2) {
+ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ struct dentry *last;
++
++ down(&nd.dentry->d_inode->i_sem);
++ last = lookup_hash_it(&nd.last, nd.dentry, NULL);
++ up(&nd.dentry->d_inode->i_sem);
++ if (IS_ERR(last)) {
++ error = PTR_ERR(last);
++ goto exit1;
++ }
++ if (d_mountpoint(last)) {
++ dput(last);
++ error = -EBUSY;
++ goto exit1;
++ }
++ dput(last);
++
+ error = op->rmdir2(nd.dentry->d_inode,
+ nd.last.name,
+ nd.last.len);
-+ /* the file system want to use normal vfs path now */
++ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto exit1;
+ }
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_rmdir(nd.dentry->d_inode, dentry);
-@@ -1499,8 +1635,17 @@ asmlinkage long sys_unlink(const char *
+@@ -1499,8 +1662,17 @@ asmlinkage long sys_unlink(const char *
error = -EISDIR;
if (nd.last_type != LAST_NORM)
goto exit1;
+ error = op->unlink2(nd.dentry->d_inode,
+ nd.last.name,
+ nd.last.len);
-+ /* the file system want to use normal vfs path now */
++ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto exit1;
+ }
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
-@@ -1567,15 +1712,26 @@ asmlinkage long sys_symlink(const char *
+@@ -1567,15 +1739,26 @@ asmlinkage long sys_symlink(const char *
error = path_lookup(to, LOOKUP_PARENT, &nd);
if (error)
goto out;
+ nd.last.name,
+ nd.last.len,
+ from);
-+ /* the file system want to use normal vfs path now */
++ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out2;
+ }
putname(to);
}
putname(from);
-@@ -1642,7 +1798,7 @@ asmlinkage long sys_link(const char * ol
+@@ -1642,7 +1825,7 @@ asmlinkage long sys_link(const char * ol
struct dentry *new_dentry;
struct nameidata nd, old_nd;
if (error)
goto exit;
error = path_lookup(to, LOOKUP_PARENT, &nd);
-@@ -1651,7 +1807,17 @@ asmlinkage long sys_link(const char * ol
+@@ -1651,7 +1834,17 @@ asmlinkage long sys_link(const char * ol
error = -EXDEV;
if (old_nd.mnt != nd.mnt)
goto out_release;
+ nd.dentry->d_inode,
+ nd.last.name,
+ nd.last.len);
-+ /* the file system want to use normal vfs path now */
++ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out_release;
+ }
error = PTR_ERR(new_dentry);
if (!IS_ERR(new_dentry)) {
error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-@@ -1695,7 +1861,8 @@ exit:
+@@ -1695,7 +1888,8 @@ exit:
* locking].
*/
int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
struct inode *target;
-@@ -1753,6 +1920,7 @@ int vfs_rename_dir(struct inode *old_dir
+@@ -1753,6 +1947,7 @@ int vfs_rename_dir(struct inode *old_dir
error = -EBUSY;
else
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
if (target) {
if (!error)
target->i_flags |= S_DEAD;
-@@ -1774,7 +1942,8 @@ out_unlock:
+@@ -1774,7 +1969,8 @@ out_unlock:
}
int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
-@@ -1805,6 +1974,7 @@ int vfs_rename_other(struct inode *old_d
+@@ -1805,6 +2001,7 @@ int vfs_rename_other(struct inode *old_d
error = -EBUSY;
else
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
double_up(&old_dir->i_zombie, &new_dir->i_zombie);
if (error)
return error;
-@@ -1816,13 +1986,14 @@ int vfs_rename_other(struct inode *old_d
+@@ -1816,13 +2013,14 @@ int vfs_rename_other(struct inode *old_d
}
int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!error) {
if (old_dir == new_dir)
inode_dir_notify(old_dir, DN_RENAME);
-@@ -1862,9 +2033,23 @@ static inline int do_rename(const char *
- if (newnd.last_type != LAST_NORM)
- goto exit2;
+@@ -1864,7 +2062,7 @@ static inline int do_rename(const char *
-+ if (old_dir->d_inode->i_op->rename2) {
-+ lock_kernel();
-+ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
-+ new_dir->d_inode,
-+ oldnd.last.name,
-+ oldnd.last.len,
-+ newnd.last.name,
-+ newnd.last.len);
-+ unlock_kernel();
-+ /* the file system want to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto exit2;
-+ }
-+
double_lock(new_dir, old_dir);
- old_dentry = lookup_hash(&oldnd.last, old_dir);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
-@@ -1880,14 +2065,14 @@ static inline int do_rename(const char *
+@@ -1880,16 +2078,37 @@ static inline int do_rename(const char *
if (newnd.last.name[newnd.last.len])
goto exit4;
}
if (IS_ERR(new_dentry))
goto exit4;
++ if (old_dir->d_inode->i_op->rename2) {
++ lock_kernel();
++ /* don't rename mount point. mds will take care of
++ * the rest sanity checking */
++ if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) {
++ error = -EBUSY;
++ goto exit5;
++ }
++
++ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
++ new_dir->d_inode,
++ oldnd.last.name,
++ oldnd.last.len,
++ newnd.last.name,
++ newnd.last.len);
++ unlock_kernel();
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit5;
++ }
++
lock_kernel();
error = vfs_rename(old_dir->d_inode, old_dentry,
- new_dir->d_inode, new_dentry);
+ new_dir->d_inode, new_dentry, NULL);
unlock_kernel();
-
+-
++exit5:
dput(new_dentry);
-@@ -1940,7 +2125,8 @@ out:
+ exit4:
+ dput(old_dentry);
+@@ -1940,7 +2159,8 @@ out:
}
static inline int
{
int res = 0;
char *name;
-@@ -1953,7 +2139,7 @@ __vfs_follow_link(struct nameidata *nd,
+@@ -1953,7 +2173,7 @@ __vfs_follow_link(struct nameidata *nd,
/* weird __emul_prefix() stuff did it */
goto out;
}
out:
if (current->link_count || res || nd->last_type!=LAST_NORM)
return res;
-@@ -1975,7 +2161,13 @@ fail:
+@@ -1975,7 +2195,13 @@ fail:
int vfs_follow_link(struct nameidata *nd, const char *link)
{
}
/* get the link contents into pagecache */
-@@ -2017,7 +2209,7 @@ int page_follow_link(struct dentry *dent
+@@ -2017,7 +2243,7 @@ int page_follow_link(struct dentry *dent
{
struct page *page = NULL;
char *s = page_getlink(dentry, &page);
if (page) {
kunmap(page);
page_cache_release(page);
---- linux-2.4.18-49chaos-lustre9/fs/nfsd/vfs.c~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
-+++ linux-2.4.18-49chaos-lustre9-root/fs/nfsd/vfs.c Wed Jan 29 12:43:32 2003
+--- linux-2.4.18-18.8.0-l12/fs/nfsd/vfs.c~vfs_intent-2.4.18-18 Wed Feb 26 16:54:17 2003
++++ linux-2.4.18-18.8.0-l12-phil/fs/nfsd/vfs.c Wed Feb 26 16:54:17 2003
@@ -1298,7 +1298,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
err = nfserr_perm;
} else
unlock_kernel();
if (!err && EX_ISSYNC(tfhp->fh_export)) {
nfsd_sync_dir(tdentry);
---- linux-2.4.18-49chaos-lustre9/fs/open.c~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
-+++ linux-2.4.18-49chaos-lustre9-root/fs/open.c Wed Jan 29 12:43:32 2003
-@@ -19,6 +19,9 @@
+--- linux-2.4.18-18.8.0-l12/fs/open.c~vfs_intent-2.4.18-18 Wed Feb 26 16:54:17 2003
++++ linux-2.4.18-18.8.0-l12-phil/fs/open.c Wed Feb 26 16:54:17 2003
+@@ -19,6 +19,8 @@
#include <asm/uaccess.h>
#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
+extern int path_walk_it(const char *name, struct nameidata *nd,
+ struct lookup_intent *it);
-+extern void intent_release(struct dentry *de, struct lookup_intent *it);
int vfs_statfs(struct super_block *sb, struct statfs *buf)
{
-@@ -118,12 +121,13 @@ static inline long do_sys_truncate(const
+@@ -95,9 +97,10 @@ void fd_install(unsigned int fd, struct
+ write_unlock(&files->file_lock);
+ }
+
+-int do_truncate(struct dentry *dentry, loff_t length)
++int do_truncate(struct dentry *dentry, loff_t length, int called_from_open)
+ {
+ struct inode *inode = dentry->d_inode;
++ struct inode_operations *op = dentry->d_inode->i_op;
+ int error;
+ struct iattr newattrs;
+
+@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l
+ down(&inode->i_sem);
+ newattrs.ia_size = length;
+ newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+- error = notify_change(dentry, &newattrs);
++ if (called_from_open)
++ newattrs.ia_valid |= ATTR_FROM_OPEN;
++ if (op->setattr_raw) {
++ newattrs.ia_valid |= ATTR_RAW;
++ newattrs.ia_ctime = CURRENT_TIME;
++ error = op->setattr_raw(inode, &newattrs);
++ } else
++ error = notify_change(dentry, &newattrs);
+ up(&inode->i_sem);
+ return error;
+ }
+@@ -118,12 +128,13 @@ static inline long do_sys_truncate(const
struct nameidata nd;
struct inode * inode;
int error;
-+ struct lookup_intent it = { .it_op = IT_TRUNC };
++ struct lookup_intent it = { .it_op = IT_GETATTR };
error = -EINVAL;
if (length < 0) /* sorry, but loff_t says... */
if (error)
goto out;
inode = nd.dentry->d_inode;
-@@ -168,6 +172,7 @@ static inline long do_sys_truncate(const
+@@ -163,11 +174,13 @@ static inline long do_sys_truncate(const
+ error = locks_verify_truncate(inode, NULL, length);
+ if (!error) {
+ DQUOT_INIT(inode);
+- error = do_truncate(nd.dentry, length);
++ intent_release(nd.dentry, &it);
++ error = do_truncate(nd.dentry, length, 0);
+ }
put_write_access(inode);
dput_and_out:
path_release(&nd);
out:
return error;
-@@ -259,8 +264,9 @@ asmlinkage long sys_utime(char * filenam
- struct nameidata nd;
+@@ -215,7 +228,7 @@ static inline long do_sys_ftruncate(unsi
+
+ error = locks_verify_truncate(inode, file, length);
+ if (!error)
+- error = do_truncate(dentry, length);
++ error = do_truncate(dentry, length, 0);
+ out_putf:
+ fput(file);
+ out:
+@@ -260,11 +273,13 @@ asmlinkage long sys_utime(char * filenam
struct inode * inode;
struct iattr newattrs;
-+ struct lookup_intent it = { .it_op = IT_SETATTR };
- error = user_path_walk(filename, &nd);
-+ error = user_path_walk_it(filename, &nd, &it);
++ error = user_path_walk_it(filename, &nd, NULL);
if (error)
goto out;
inode = nd.dentry->d_inode;
-@@ -286,6 +292,7 @@ asmlinkage long sys_utime(char * filenam
+
++ /* this is safe without a Lustre lock because it only depends
++ on the super block */
+ error = -EROFS;
+ if (IS_RDONLY(inode))
+ goto dput_and_out;
+@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam
+ goto dput_and_out;
+
+ newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+- } else {
++ }
++
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ }
++
++ error = -EROFS;
++ if (IS_RDONLY(inode))
++ goto dput_and_out;
++
++ error = -EPERM;
++ if (!times) {
+ if (current->fsuid != inode->i_uid &&
+ (error = permission(inode,MAY_WRITE)) != 0)
+ goto dput_and_out;
}
++
error = notify_change(nd.dentry, &newattrs);
dput_and_out:
-+ intent_release(nd.dentry, &it);
path_release(&nd);
- out:
- return error;
-@@ -303,8 +310,9 @@ asmlinkage long sys_utimes(char * filena
- struct nameidata nd;
+@@ -304,12 +337,14 @@ asmlinkage long sys_utimes(char * filena
struct inode * inode;
struct iattr newattrs;
-+ struct lookup_intent it = { .it_op = IT_SETATTR };
- error = user_path_walk(filename, &nd);
-+ error = user_path_walk_it(filename, &nd, &it);
++ error = user_path_walk_it(filename, &nd, NULL);
if (error)
goto out;
-@@ -331,6 +339,7 @@ asmlinkage long sys_utimes(char * filena
- }
- error = notify_change(nd.dentry, &newattrs);
- dput_and_out:
-+ intent_release(nd.dentry, &it);
- path_release(&nd);
- out:
- return error;
-@@ -347,6 +356,7 @@ asmlinkage long sys_access(const char *
+ inode = nd.dentry->d_inode;
+
++ /* this is safe without a Lustre lock because it only depends
++ on the super block */
+ error = -EROFS;
+ if (IS_RDONLY(inode))
+ goto dput_and_out;
+@@ -324,7 +359,20 @@ asmlinkage long sys_utimes(char * filena
+ newattrs.ia_atime = times[0].tv_sec;
+ newattrs.ia_mtime = times[1].tv_sec;
+ newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+- } else {
++ }
++
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ }
++
++ error = -EPERM;
++ if (!utimes) {
+ if (current->fsuid != inode->i_uid &&
+ (error = permission(inode,MAY_WRITE)) != 0)
+ goto dput_and_out;
+@@ -347,6 +395,7 @@ asmlinkage long sys_access(const char *
int old_fsuid, old_fsgid;
kernel_cap_t old_cap;
int res;
if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
return -EINVAL;
-@@ -364,13 +374,14 @@ asmlinkage long sys_access(const char *
+@@ -364,13 +413,14 @@ asmlinkage long sys_access(const char *
else
current->cap_effective = current->cap_permitted;
path_release(&nd);
}
-@@ -385,8 +396,11 @@ asmlinkage long sys_chdir(const char * f
+@@ -385,8 +435,11 @@ asmlinkage long sys_chdir(const char * f
{
int error;
struct nameidata nd;
if (error)
goto out;
-@@ -397,6 +411,7 @@ asmlinkage long sys_chdir(const char * f
+@@ -397,6 +450,7 @@ asmlinkage long sys_chdir(const char * f
set_fs_pwd(current->fs, nd.mnt, nd.dentry);
dput_and_out:
path_release(&nd);
out:
return error;
-@@ -436,9 +451,10 @@ asmlinkage long sys_chroot(const char *
+@@ -436,9 +490,10 @@ asmlinkage long sys_chroot(const char *
{
int error;
struct nameidata nd;
if (error)
goto out;
-@@ -454,6 +470,7 @@ asmlinkage long sys_chroot(const char *
+@@ -454,6 +509,7 @@ asmlinkage long sys_chroot(const char *
set_fs_altroot();
error = 0;
dput_and_out:
path_release(&nd);
out:
return error;
-@@ -498,8 +515,9 @@ asmlinkage long sys_chmod(const char * f
- struct inode * inode;
- int error;
- struct iattr newattrs;
-+ struct lookup_intent it = { .it_op = IT_SETATTR };
+@@ -508,6 +564,18 @@ asmlinkage long sys_chmod(const char * f
+ if (IS_RDONLY(inode))
+ goto dput_and_out;
-- error = user_path_walk(filename, &nd);
-+ error = user_path_walk_it(filename, &nd, &it);
- if (error)
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_mode = mode;
++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ }
++
+ error = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto dput_and_out;
+@@ -538,6 +606,20 @@ static int chown_common(struct dentry *
+ error = -EROFS;
+ if (IS_RDONLY(inode))
goto out;
- inode = nd.dentry->d_inode;
-@@ -519,6 +537,7 @@ asmlinkage long sys_chmod(const char * f
- error = notify_change(nd.dentry, &newattrs);
-
- dput_and_out:
-+ intent_release(nd.dentry, &it);
- path_release(&nd);
- out:
- return error;
-@@ -588,10 +607,12 @@ asmlinkage long sys_chown(const char * f
- {
- struct nameidata nd;
- int error;
-+ struct lookup_intent it = { .it_op = IT_SETATTR };
-
-- error = user_path_walk(filename, &nd);
-+ error = user_path_walk_it(filename, &nd, &it);
- if (!error) {
- error = chown_common(nd.dentry, user, group);
-+ intent_release(nd.dentry, &it);
- path_release(&nd);
- }
- return error;
-@@ -601,10 +622,12 @@ asmlinkage long sys_lchown(const char *
- {
- struct nameidata nd;
- int error;
-+ struct lookup_intent it = { .it_op = IT_SETATTR };
-
-- error = user_path_walk_link(filename, &nd);
-+ error = user_path_walk_link_it(filename, &nd, &it);
- if (!error) {
- error = chown_common(nd.dentry, user, group);
-+ intent_release(nd.dentry, &it);
- path_release(&nd);
- }
- return error;
-@@ -628,7 +651,8 @@ extern ssize_t do_readahead(struct file
++
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = dentry->d_inode->i_op;
++
++ newattrs.ia_uid = user;
++ newattrs.ia_gid = group;
++ newattrs.ia_valid = ATTR_UID | ATTR_GID;
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ return error;
++ }
++
+ error = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto out;
+@@ -628,7 +710,8 @@ extern ssize_t do_readahead(struct file
/* for files over a certains size it doesn't pay to do readahead on open */
#define READAHEAD_CUTOFF 48000
{
struct file * f;
struct inode *inode;
-@@ -693,6 +717,7 @@ struct file *dentry_open(struct dentry *
+@@ -649,7 +732,7 @@ struct file *dentry_open(struct dentry *
+ error = locks_verify_locked(inode);
+ if (!error) {
+ DQUOT_INIT(inode);
+- error = do_truncate(dentry, 0);
++ error = do_truncate(dentry, 0, 1);
+ }
+ if (error || !(f->f_mode & FMODE_WRITE))
+ put_write_access(inode);
+@@ -693,6 +776,7 @@ struct file *dentry_open(struct dentry *
do_readahead(f, 0, (48 * 1024) >> PAGE_SHIFT);
return f;
cleanup_all:
-@@ -707,11 +732,17 @@ cleanup_all:
+@@ -707,11 +791,17 @@ cleanup_all:
cleanup_file:
put_filp(f);
cleanup_dentry:
/*
* Find an empty file descriptor entry, and mark it busy.
*/
---- linux-2.4.18-49chaos-lustre9/fs/stat.c~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
-+++ linux-2.4.18-49chaos-lustre9-root/fs/stat.c Wed Jan 29 12:43:32 2003
-@@ -13,6 +13,7 @@
-
- #include <asm/uaccess.h>
-
-+extern void intent_release(struct dentry *de, struct lookup_intent *it);
- /*
- * Revalidate the inode. This is required for proper NFS attribute caching.
- */
-@@ -104,10 +105,12 @@ int vfs_stat(char *name, struct kstat *s
+--- linux-2.4.18-18.8.0-l12/fs/stat.c~vfs_intent-2.4.18-18 Wed Feb 26 16:54:17 2003
++++ linux-2.4.18-18.8.0-l12-phil/fs/stat.c Wed Feb 26 16:54:17 2003
+@@ -104,10 +104,12 @@ int vfs_stat(char *name, struct kstat *s
{
struct nameidata nd;
int error;
path_release(&nd);
}
return error;
-@@ -117,10 +120,12 @@ int vfs_lstat(char *name, struct kstat *
+@@ -117,10 +119,12 @@ int vfs_lstat(char *name, struct kstat *
{
struct nameidata nd;
int error;
path_release(&nd);
}
return error;
---- linux-2.4.18-49chaos-lustre9/include/linux/dcache.h~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
-+++ linux-2.4.18-49chaos-lustre9-root/include/linux/dcache.h Wed Jan 29 12:43:32 2003
-@@ -6,6 +6,27 @@
+--- linux-2.4.18-18.8.0-l12/fs/exec.c~vfs_intent-2.4.18-18 Wed Feb 26 16:54:17 2003
++++ linux-2.4.18-18.8.0-l12-phil/fs/exec.c Wed Feb 26 16:54:17 2003
+@@ -103,13 +103,18 @@ static inline void put_binfmt(struct lin
+ *
+ * Also note that we take the address to load from from the file itself.
+ */
++extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++ int flags, struct lookup_intent *it);
++int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd,
++ struct lookup_intent *it);
+ asmlinkage long sys_uselib(const char * library)
+ {
+ struct file * file;
+ struct nameidata nd;
+ int error;
++ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
+
+- error = user_path_walk(library, &nd);
++ error = user_path_walk_it(library, &nd, &it);
+ if (error)
+ goto out;
+
+@@ -121,7 +126,8 @@ asmlinkage long sys_uselib(const char *
+ if (error)
+ goto exit;
+
+- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++ intent_release(nd.dentry, &it);
+ error = PTR_ERR(file);
+ if (IS_ERR(file))
+ goto out;
+@@ -350,8 +356,9 @@ struct file *open_exec(const char *name)
+ struct inode *inode;
+ struct file *file;
+ int err = 0;
++ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
+
+- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
++ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
+ file = ERR_PTR(err);
+ if (!err) {
+ inode = nd.dentry->d_inode;
+@@ -363,7 +370,8 @@ struct file *open_exec(const char *name)
+ err = -EACCES;
+ file = ERR_PTR(err);
+ if (!err) {
+- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++ intent_release(nd.dentry, &it);
+ if (!IS_ERR(file)) {
+ err = deny_write_access(file);
+ if (err) {
+@@ -976,7 +984,7 @@ int do_coredump(long signr, struct pt_re
+ goto close_fail;
+ if (!file->f_op->write)
+ goto close_fail;
+- if (do_truncate(file->f_dentry, 0) != 0)
++ if (do_truncate(file->f_dentry, 0, 0) != 0)
+ goto close_fail;
+
+ retval = binfmt->core_dump(signr, regs, file);
+--- linux-2.4.18-18.8.0-l12/include/linux/dcache.h~vfs_intent-2.4.18-18 Wed Feb 26 16:54:17 2003
++++ linux-2.4.18-18.8.0-l12-phil/include/linux/dcache.h Wed Feb 26 17:01:30 2003
+@@ -6,6 +6,25 @@
#include <asm/atomic.h>
#include <linux/mount.h>
+#define IT_CREAT (1<<1)
+#define IT_READDIR (1<<2)
+#define IT_GETATTR (1<<3)
-+#define IT_SETATTR (1<<4)
-+#define IT_TRUNC (1<<5)
-+#define IT_READLINK (1<<6)
-+#define IT_LOOKUP (1<<7)
++#define IT_LOOKUP (1<<4)
++#define IT_UNLINK (1<<5)
+
+struct lookup_intent {
+ int it_op;
/*
* linux/include/linux/dcache.h
*
-@@ -78,6 +99,7 @@ struct dentry {
+@@ -78,6 +97,7 @@ struct dentry {
unsigned long d_time; /* used by d_revalidate */
struct dentry_operations *d_op;
struct super_block * d_sb; /* The root of the dentry tree */
unsigned long d_vfs_flags;
void * d_fsdata; /* fs-specific data */
void * d_extra_attributes; /* TUX-specific data */
-@@ -91,6 +113,8 @@ struct dentry_operations {
+@@ -91,8 +111,15 @@ struct dentry_operations {
int (*d_delete)(struct dentry *);
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
+ void (*d_intent_release)(struct dentry *, struct lookup_intent *);
};
++/* defined in fs/namei.c */
++extern void intent_release(struct dentry *de, struct lookup_intent *it);
++/* defined in fs/dcache.c */
++extern void __d_rehash(struct dentry * entry, int lock);
++
/* the dentry parameter passed to d_hash and d_compare is the parent
-@@ -124,6 +148,7 @@ d_iput: no no yes
+ * directory of the entries to be compared. It is used in case these
+ * functions need any directory specific information for determining
+@@ -124,6 +151,7 @@ d_iput: no no yes
* s_nfsd_free_path semaphore will be down
*/
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
extern spinlock_t dcache_lock;
---- linux-2.4.18-49chaos-lustre9/include/linux/fs.h~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
-+++ linux-2.4.18-49chaos-lustre9-root/include/linux/fs.h Wed Jan 29 12:43:32 2003
-@@ -576,6 +576,7 @@ struct file {
+--- linux-2.4.18-18.8.0-l12/include/linux/fs.h~vfs_intent-2.4.18-18 Wed Feb 26 16:54:17 2003
++++ linux-2.4.18-18.8.0-l12-phil/include/linux/fs.h Wed Feb 26 17:31:42 2003
+@@ -338,6 +338,8 @@ extern void set_bh_page(struct buffer_he
+ #define ATTR_MTIME_SET 256
+ #define ATTR_FORCE 512 /* Not a change, but a change it */
+ #define ATTR_ATTR_FLAG 1024
++#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */
+
+ /*
+ * This is the Inode Attributes structure, used for notify_change(). It
+@@ -576,6 +578,7 @@ struct file {
/* needed for tty driver, and maybe others */
void *private_data;
/* preallocated helper kiobuf to speedup O_DIRECT */
struct kiobuf *f_iobuf;
-@@ -836,7 +837,9 @@ extern int vfs_symlink(struct inode *, s
+@@ -836,7 +839,9 @@ extern int vfs_symlink(struct inode *, s
extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
extern int vfs_rmdir(struct inode *, struct dentry *);
extern int vfs_unlink(struct inode *, struct dentry *);
/*
* File types
-@@ -897,16 +900,28 @@ struct file_operations {
+@@ -897,20 +902,33 @@ struct file_operations {
struct inode_operations {
int (*create) (struct inode *,struct dentry *,int);
struct dentry * (*lookup) (struct inode *,struct dentry *);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int);
int (*revalidate) (struct dentry *);
-@@ -1383,6 +1398,7 @@ typedef int (*read_actor_t)(read_descrip
+ int (*setattr) (struct dentry *, struct iattr *);
++ int (*setattr_raw) (struct inode *, struct iattr *);
+ int (*getattr) (struct dentry *, struct iattr *);
+ };
+
+@@ -1112,7 +1130,7 @@ static inline int get_lease(struct inode
+
+ asmlinkage long sys_open(const char *, int, int);
+ asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */
+-extern int do_truncate(struct dentry *, loff_t start);
++extern int do_truncate(struct dentry *, loff_t start, int called_from_open);
+
+ extern struct file *filp_open(const char *, int, int);
+ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
+@@ -1381,6 +1399,7 @@ typedef int (*read_actor_t)(read_descrip
extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_walk(const char *, struct nameidata *));
extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1394,6 +1410,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1392,6 +1411,8 @@ extern struct dentry * lookup_one_len(co
extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
#define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
#define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
extern void inode_init_once(struct inode *);
extern void iput(struct inode *);
-@@ -1494,6 +1512,8 @@ extern struct file_operations generic_ro
+@@ -1492,6 +1513,8 @@ extern struct file_operations generic_ro
extern int vfs_readlink(struct dentry *, char *, int, const char *);
extern int vfs_follow_link(struct nameidata *, const char *);
extern int page_readlink(struct dentry *, char *, int);
extern int page_follow_link(struct dentry *, struct nameidata *);
extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.18-49chaos-lustre9/kernel/ksyms.c~vfs_intent-2.4.18-18 Wed Jan 29 12:43:32 2003
-+++ linux-2.4.18-49chaos-lustre9-root/kernel/ksyms.c Wed Jan 29 12:43:32 2003
-@@ -294,6 +294,7 @@ EXPORT_SYMBOL(read_cache_page);
+--- linux-2.4.18-18.8.0-l12/kernel/ksyms.c~vfs_intent-2.4.18-18 Wed Feb 26 16:54:17 2003
++++ linux-2.4.18-18.8.0-l12-phil/kernel/ksyms.c Wed Feb 26 16:54:17 2003
+@@ -293,6 +293,7 @@ EXPORT_SYMBOL(read_cache_page);
EXPORT_SYMBOL(set_page_dirty);
EXPORT_SYMBOL(vfs_readlink);
EXPORT_SYMBOL(vfs_follow_link);
EXPORT_SYMBOL(page_readlink);
EXPORT_SYMBOL(page_follow_link);
EXPORT_SYMBOL(page_symlink_inode_operations);
-
-_
- fs/dcache.c | 3
- fs/namei.c | 306 ++++++++++++++++++++++++++++++++++++++++---------
+ fs/dcache.c | 8
+ fs/namei.c | 335 +++++++++++++++++----
fs/nfsd/vfs.c | 2
- fs/open.c | 63 +++++++---
- fs/stat.c | 29 +++-
- include/linux/dcache.h | 31 ++++
- include/linux/fs.h | 22 +++
+ fs/open.c | 142 +++++++-
+ fs/stat.c | 24 +
+ include/linux/dcache.h | 26 +
+ include/linux/fs.h | 27 +
kernel/ksyms.c | 1
- 8 files changed, 384 insertions(+), 73 deletions(-)
-
---- linux-2.4.19-hp2_pnnl2/fs/dcache.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
-+++ linux-2.4.19-hp2_pnnl2-root/fs/dcache.c Sun Jan 19 19:04:47 2003
+ fs/exec.c | 18 -
+ 9 files changed, 487 insertions(+), 96 deletions(-)
+
+--- linux-2.4.19-hp2_pnnl4/fs/dcache.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl4-root/fs/dcache.c Sun Jan 19 19:04:47 2003
@@ -186,6 +188,13 @@ int d_invalidate(struct dentry * dentry)
spin_unlock(&dcache_lock);
return 0;
INIT_LIST_HEAD(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
---- linux-2.4.19-hp2_pnnl2/fs/namei.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
-+++ linux-2.4.19-hp2_pnnl2-root/fs/namei.c Sun Jan 19 19:35:55 2003
+@@ -859,13 +867,19 @@ void d_delete(struct dentry * dentry)
+ * Adds a dentry to the hash according to its name.
+ */
+
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+ struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+ if (!list_empty(&entry->d_hash)) BUG();
+- spin_lock(&dcache_lock);
++ if (lock) spin_lock(&dcache_lock);
+ list_add(&entry->d_hash, list);
+- spin_unlock(&dcache_lock);
++ if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++ __d_rehash(entry, 1);
+ }
+
+ #define do_switch(x,y) do { \
+--- linux-2.4.19-hp2_pnnl4/fs/namei.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl4-root/fs/namei.c Sun Jan 19 19:35:55 2003
@@ -94,6 +97,13 @@
* XEmacs seems to be relying on it...
*/
path_release(nd);
return -ELOOP;
}
+@@ -381,15 +416,26 @@ int follow_up(struct vfsmount **mnt, str
+ return __follow_up(mnt, dentry);
+ }
+
+-static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
++static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry,
++ struct lookup_intent *it)
+ {
+ struct vfsmount *mounted;
+
+ spin_lock(&dcache_lock);
+ mounted = lookup_mnt(*mnt, *dentry);
+ if (mounted) {
++ int opc = 0, mode = 0;
+ *mnt = mntget(mounted);
+ spin_unlock(&dcache_lock);
++ if (it) {
++ opc = it->it_op;
++ mode = it->it_mode;
++ }
++ intent_release(*dentry, it);
++ if (it) {
++ it->it_op = opc;
++ it->it_mode = mode;
++ }
+ dput(*dentry);
+ mntput(mounted->mnt_parent);
+ *dentry = dget(mounted->mnt_root);
+@@ -401,7 +447,7 @@ static inline int __follow_down(struct v
+
+ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
+ {
+- return __follow_down(mnt,dentry);
++ return __follow_down(mnt,dentry,NULL);
+ }
+
+ static inline void follow_dotdot(struct nameidata *nd)
+@@ -437,7 +483,7 @@ static inline void follow_dotdot(struct
+ mntput(nd->mnt);
+ nd->mnt = parent;
+ }
+- while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry))
++ while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry, NULL))
+ ;
+ }
+
@@ -447,7 +482,8 @@ static inline void follow_dotdot(struct
*
* We expect 'base' to be positive and a directory.
{
struct dentry *dentry;
struct inode *inode;
-@@ -520,9 +556,9 @@ int link_path_walk(const char * name, st
+@@ -520,15 +556,15 @@ int link_path_walk(const char * name, st
break;
}
/* This does the actual lookups.. */
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
+ }
+ /* Check mountpoints.. */
+- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
++ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL))
+ ;
+
+ err = -ENOENT;
@@ -539,8 +575,8 @@ int link_path_walk(const char * name, st
if (!inode->i_op)
goto out_dput;
break;
continue;
/* here ends the main loop */
-@@ -583,9 +619,9 @@ last_component:
+@@ -583,19 +619,20 @@ last_component:
if (err < 0)
break;
}
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
-@@ -594,8 +630,9 @@ last_component:
+ }
+- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
++ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it))
;
inode = dentry->d_inode;
if ((lookup_flags & LOOKUP_FOLLOW)
/* Negative dentry, just create the file */
if (!dentry->d_inode) {
if (!IS_POSIXACL(dir->d_inode))
-@@ -1071,7 +1156,8 @@ do_last:
+@@ -1066,12 +1151,13 @@ do_last:
+ error = -ELOOP;
+ if (flag & O_NOFOLLOW)
+ goto exit_dput;
+- while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
++ while (__follow_down(&nd->mnt,&dentry,it) && d_mountpoint(dentry));
+ }
error = -ENOENT;
if (!dentry->d_inode)
goto exit_dput;
goto do_link;
dput(nd->dentry);
+@@ -1145,7 +1231,7 @@ do_last:
+ if (!error) {
+ DQUOT_INIT(inode);
+
+- error = do_truncate(dentry, 0);
++ error = do_truncate(dentry, 0, 1);
+ }
+ put_write_access(inode);
+ if (error)
@@ -1157,8 +1243,10 @@ ok:
return 0;
+ nd.last.name,
+ nd.last.len,
+ mode, dev);
-+ /* the file system want to use normal vfs path now */
++ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out2;
+ }
+ nd.last.name,
+ nd.last.len,
+ mode);
-+ /* the file system want to use normal vfs path now */
++ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out2;
+ }
path_release(&nd);
out:
putname(tmp);
-@@ -1450,8 +1578,17 @@ asmlinkage long sys_rmdir(const char * p
+@@ -1450,8 +1578,33 @@ asmlinkage long sys_rmdir(const char * p
error = -EBUSY;
goto exit1;
}
+ if (nd.dentry->d_inode->i_op->rmdir2) {
+ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ struct dentry *last;
++
++ down(&nd.dentry->d_inode->i_sem);
++ last = lookup_hash_it(&nd.last, nd.dentry, NULL);
++ up(&nd.dentry->d_inode->i_sem);
++ if (IS_ERR(last)) {
++ error = PTR_ERR(last);
++ goto exit1;
++ }
++ if (d_mountpoint(last)) {
++ dput(last);
++ error = -EBUSY;
++ goto exit1;
++ }
++ dput(last);
++
+ error = op->rmdir2(nd.dentry->d_inode,
+ nd.last.name,
+ nd.last.len);
-+ /* the file system want to use normal vfs path now */
++ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto exit1;
+ }
+ error = op->unlink2(nd.dentry->d_inode,
+ nd.last.name,
+ nd.last.len);
-+ /* the file system want to use normal vfs path now */
++ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto exit1;
+ }
if (error)
goto out;
- dentry = lookup_create(&nd, 0);
-+ if (nd.dentry->d_inode->i_op->symlink2) {
++ if (nd.dentry->d_inode->i_op->symlink2) {
+ struct inode_operations *op = nd.dentry->d_inode->i_op;
+ error = op->symlink2(nd.dentry->d_inode,
+ nd.last.name,
+ nd.last.len,
+ from);
-+ /* the file system want to use normal vfs path now */
++ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out2;
+ }
+ nd.dentry->d_inode,
+ nd.last.name,
+ nd.last.len);
-+ /* the file system want to use normal vfs path now */
++ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out_release;
+ }
if (!error) {
if (old_dir == new_dir)
inode_dir_notify(old_dir, DN_RENAME);
-@@ -1886,9 +2068,23 @@ static inline int do_rename(const char *
- if (newnd.last_type != LAST_NORM)
- goto exit2;
+@@ -1888,7 +2070,7 @@ static inline int do_rename(const char *
-+ if (old_dir->d_inode->i_op->rename2) {
-+ lock_kernel();
-+ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
-+ new_dir->d_inode,
-+ oldnd.last.name,
-+ oldnd.last.len,
-+ newnd.last.name,
-+ newnd.last.len);
-+ unlock_kernel();
-+ /* the file system want to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto exit2;
-+ }
-+
double_lock(new_dir, old_dir);
- old_dentry = lookup_hash(&oldnd.last, old_dir);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
-@@ -1904,14 +2100,14 @@ static inline int do_rename(const char *
+@@ -1904,16 +2086,37 @@ static inline int do_rename(const char *
if (newnd.last.name[newnd.last.len])
goto exit4;
}
if (IS_ERR(new_dentry))
goto exit4;
++ if (old_dir->d_inode->i_op->rename2) {
++ lock_kernel();
++ /* don't rename mount point. mds will take care of
++ * the rest sanity checking */
++ if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) {
++ error = -EBUSY;
++ goto exit5;
++ }
++
++ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
++ new_dir->d_inode,
++ oldnd.last.name,
++ oldnd.last.len,
++ newnd.last.name,
++ newnd.last.len);
++ unlock_kernel();
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit5;
++ }
++
lock_kernel();
error = vfs_rename(old_dir->d_inode, old_dentry,
- new_dir->d_inode, new_dentry);
+ new_dir->d_inode, new_dentry, NULL);
unlock_kernel();
-
+-
++exit5:
dput(new_dentry);
+ exit4:
+ dput(old_dentry);
@@ -1964,7 +2163,8 @@ out:
}
if (page) {
kunmap(page);
page_cache_release(page);
---- linux-2.4.19-hp2_pnnl2/fs/nfsd/vfs.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
-+++ linux-2.4.19-hp2_pnnl2-root/fs/nfsd/vfs.c Sun Jan 19 19:37:57 2003
+--- linux-2.4.19-hp2_pnnl4/fs/nfsd/vfs.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl4-root/fs/nfsd/vfs.c Sun Jan 19 19:37:57 2003
@@ -1295,7 +1295,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
err = nfserr_perm;
} else
if (!err && EX_ISSYNC(tfhp->fh_export)) {
nfsd_sync_dir(tdentry);
nfsd_sync_dir(fdentry);
---- linux-2.4.19-hp2_pnnl2/fs/open.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
-+++ linux-2.4.19-hp2_pnnl2-root/fs/open.c Sun Jan 19 19:41:00 2003
-@@ -19,6 +19,9 @@
+--- linux-2.4.19-hp2_pnnl4/fs/open.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl4-root/fs/open.c Sun Jan 19 19:41:00 2003
+@@ -19,6 +19,8 @@
#include <asm/uaccess.h>
#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
+extern int path_walk_it(const char *name, struct nameidata *nd,
+ struct lookup_intent *it);
-+extern void intent_release(struct dentry *de, struct lookup_intent *it);
int vfs_statfs(struct super_block *sb, struct statfs *buf)
{
+@@ -95,9 +97,10 @@ void fd_install(unsigned int fd, struct
+ write_unlock(&files->file_lock);
+ }
+
+-int do_truncate(struct dentry *dentry, loff_t length)
++int do_truncate(struct dentry *dentry, loff_t length, int called_from_open)
+ {
+ struct inode *inode = dentry->d_inode;
++ struct inode_operations *op = dentry->d_inode->i_op;
+ int error;
+ struct iattr newattrs;
+
+@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l
+ down(&inode->i_sem);
+ newattrs.ia_size = length;
+ newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+- error = notify_change(dentry, &newattrs);
++ if (called_from_open)
++ newattrs.ia_valid |= ATTR_FROM_OPEN;
++ if (op->setattr_raw) {
++ newattrs.ia_valid |= ATTR_RAW;
++ newattrs.ia_ctime = CURRENT_TIME;
++ error = op->setattr_raw(inode, &newattrs);
++ } else
++ error = notify_change(dentry, &newattrs);
+ up(&inode->i_sem);
+ return error;
+ }
@@ -118,12 +121,13 @@ static inline long do_sys_truncate(const
struct nameidata nd;
struct inode * inode;
int error;
-+ struct lookup_intent it = { .it_op = IT_TRUNC };
++ struct lookup_intent it = { .it_op = IT_GETATTR };
error = -EINVAL;
if (length < 0) /* sorry, but loff_t says... */
if (error)
goto out;
inode = nd.dentry->d_inode;
-@@ -168,6 +172,7 @@ static inline long do_sys_truncate(const
+@@ -163,11 +167,13 @@ static inline long do_sys_truncate(const
+ error = locks_verify_truncate(inode, NULL, length);
+ if (!error) {
+ DQUOT_INIT(inode);
+- error = do_truncate(nd.dentry, length);
++ intent_release(nd.dentry, &it);
++ error = do_truncate(nd.dentry, length, 0);
+ }
put_write_access(inode);
dput_and_out:
path_release(&nd);
out:
return error;
-@@ -259,8 +264,9 @@ asmlinkage long sys_utime(char * filenam
- struct nameidata nd;
+@@ -215,7 +228,7 @@ static inline long do_sys_ftruncate(unsi
+
+ error = locks_verify_truncate(inode, file, length);
+ if (!error)
+- error = do_truncate(dentry, length);
++ error = do_truncate(dentry, length, 0);
+ out_putf:
+ fput(file);
+ out:
+@@ -260,11 +273,13 @@ asmlinkage long sys_utime(char * filenam
struct inode * inode;
struct iattr newattrs;
-+ struct lookup_intent it = { .it_op = IT_SETATTR };
- error = user_path_walk(filename, &nd);
-+ error = user_path_walk_it(filename, &nd, &it);
++ error = user_path_walk_it(filename, &nd, NULL);
if (error)
goto out;
inode = nd.dentry->d_inode;
-@@ -286,6 +292,7 @@ asmlinkage long sys_utime(char * filenam
+
++ /* this is safe without a Lustre lock because it only depends
++ on the super block */
+ error = -EROFS;
+ if (IS_RDONLY(inode))
+ goto dput_and_out;
+@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam
+ goto dput_and_out;
+
+ newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+- } else {
++ }
++
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ }
++
++ error = -EROFS;
++ if (IS_RDONLY(inode))
++ goto dput_and_out;
++
++ error = -EPERM;
++ if (!times) {
+ if (current->fsuid != inode->i_uid &&
+ (error = permission(inode,MAY_WRITE)) != 0)
+ goto dput_and_out;
}
++
error = notify_change(nd.dentry, &newattrs);
dput_and_out:
-+ intent_release(nd.dentry, &it);
path_release(&nd);
- out:
- return error;
-@@ -303,8 +310,9 @@ asmlinkage long sys_utimes(char * filena
- struct nameidata nd;
+@@ -304,12 +337,14 @@ asmlinkage long sys_utimes(char * filena
struct inode * inode;
struct iattr newattrs;
-+ struct lookup_intent it = { .it_op = IT_SETATTR };
- error = user_path_walk(filename, &nd);
-+ error = user_path_walk_it(filename, &nd, &it);
++ error = user_path_walk_it(filename, &nd, NULL);
if (error)
goto out;
-@@ -331,6 +339,7 @@ asmlinkage long sys_utimes(char * filena
- }
- error = notify_change(nd.dentry, &newattrs);
- dput_and_out:
-+ intent_release(nd.dentry, &it);
- path_release(&nd);
- out:
- return error;
+ inode = nd.dentry->d_inode;
+
++ /* this is safe without a Lustre lock because it only depends
++ on the super block */
+ error = -EROFS;
+ if (IS_RDONLY(inode))
+ goto dput_and_out;
+@@ -324,7 +359,20 @@ asmlinkage long sys_utimes(char * filena
+ newattrs.ia_atime = times[0].tv_sec;
+ newattrs.ia_mtime = times[1].tv_sec;
+ newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+- } else {
++ }
++
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ }
++
++ error = -EPERM;
++ if (!utimes) {
+ if (current->fsuid != inode->i_uid &&
+ (error = permission(inode,MAY_WRITE)) != 0)
+ goto dput_and_out;
@@ -347,6 +356,7 @@ asmlinkage long sys_access(const char *
int old_fsuid, old_fsgid;
kernel_cap_t old_cap;
path_release(&nd);
out:
return error;
-@@ -515,8 +530,9 @@ asmlinkage long sys_chmod(const char * f
- struct inode * inode;
- int error;
- struct iattr newattrs;
-+ struct lookup_intent it = { .it_op = IT_SETATTR };
+@@ -508,6 +564,18 @@ asmlinkage long sys_chmod(const char * f
+ if (IS_RDONLY(inode))
+ goto dput_and_out;
-- error = user_path_walk(filename, &nd);
-+ error = user_path_walk_it(filename, &nd, &it);
- if (error)
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_mode = mode;
++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ }
++
+ error = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto dput_and_out;
+@@ -538,6 +606,20 @@ static int chown_common(struct dentry *
+ error = -EROFS;
+ if (IS_RDONLY(inode))
+ goto out;
++
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = dentry->d_inode->i_op;
++
++ newattrs.ia_uid = user;
++ newattrs.ia_gid = group;
++ newattrs.ia_valid = ATTR_UID | ATTR_GID;
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ return error;
++ }
++
+ error = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto out;
- inode = nd.dentry->d_inode;
-@@ -536,6 +552,7 @@ asmlinkage long sys_chmod(const char * f
- error = notify_change(nd.dentry, &newattrs);
-
- dput_and_out:
-+ intent_release(nd.dentry, &it);
- path_release(&nd);
- out:
- return error;
-@@ -605,10 +622,12 @@ asmlinkage long sys_chown(const char * f
- {
- struct nameidata nd;
- int error;
-+ struct lookup_intent it = { .it_op = IT_SETATTR };
-
-- error = user_path_walk(filename, &nd);
-+ error = user_path_walk_it(filename, &nd, &it);
- if (!error) {
- error = chown_common(nd.dentry, user, group);
-+ intent_release(nd.dentry, &it);
- path_release(&nd);
- }
- return error;
-@@ -618,10 +637,12 @@ asmlinkage long sys_lchown(const char *
- {
- struct nameidata nd;
- int error;
-+ struct lookup_intent it = { .it_op = IT_SETATTR };
-
-- error = user_path_walk_link(filename, &nd);
-+ error = user_path_walk_link_it(filename, &nd, &it);
- if (!error) {
- error = chown_common(nd.dentry, user, group);
-+ intent_release(nd.dentry, &it);
- path_release(&nd);
- }
- return error;
@@ -655,10 +676,16 @@ asmlinkage long sys_fchown(unsigned int
* for the internal routines (ie open_namei()/follow_link() etc). 00 is
* used by symlinks.
/*
* Find an empty file descriptor entry, and mark it busy.
*/
---- linux-2.4.19-hp2_pnnl2/fs/stat.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
-+++ linux-2.4.19-hp2_pnnl2-root/fs/stat.c Sun Jan 19 19:44:51 2003
-@@ -13,6 +13,7 @@
-
- #include <asm/uaccess.h>
-
-+extern void intent_release(struct dentry *de, struct lookup_intent *it);
- /*
- * Revalidate the inode. This is required for proper NFS attribute caching.
- */
+--- linux-2.4.19-hp2_pnnl4/fs/stat.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl4-root/fs/stat.c Sun Jan 19 19:44:51 2003
@@ -135,13 +136,15 @@ static int cp_new_stat(struct inode * in
asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
{
path_release(&nd);
}
return error;
-@@ -247,11 +256,12 @@ asmlinkage long sys_readlink(const char
- {
- struct nameidata nd;
- int error;
-+ struct lookup_intent it = { .it_op = IT_READLINK };
-
- if (bufsiz <= 0)
- return -EINVAL;
-
-- error = user_path_walk_link(path, &nd);
-+ error = user_path_walk_link_it(path, &nd, &it);
- if (!error) {
- struct inode * inode = nd.dentry->d_inode;
-
-@@ -261,6 +271,7 @@ asmlinkage long sys_readlink(const char
- UPDATE_ATIME(inode);
- error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
- }
-+ intent_release(nd.dentry, &it);
- path_release(&nd);
- }
- return error;
@@ -333,12 +344,14 @@ asmlinkage long sys_stat64(char * filena
{
struct nameidata nd;
path_release(&nd);
}
return error;
---- linux-2.4.19-hp2_pnnl2/include/linux/dcache.h~vfs_intent_hp Sun Jan 19 19:04:47 2003
-+++ linux-2.4.19-hp2_pnnl2-root/include/linux/dcache.h Sun Jan 19 19:04:48 2003
-@@ -6,6 +6,27 @@
+--- linux-2.4.19-hp2_pnnl4/fs/exec.c~vfs_intent_hp Sun Feb 9 01:14:52 2003
++++ linux-2.4.19-hp2_pnnl4-root/fs/exec.c Sun Feb 9 01:29:49 2003
+@@ -103,13 +104,18 @@ static inline void put_binfmt(struct lin
+ *
+ * Also note that we take the address to load from from the file itself.
+ */
++extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++ int flags, struct lookup_intent *it);
++int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd,
++ struct lookup_intent *it);
+ asmlinkage long sys_uselib(const char * library)
+ {
+ struct file * file;
+ struct nameidata nd;
+ int error;
++ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
+
+- error = user_path_walk(library, &nd);
++ error = user_path_walk_it(library, &nd, &it);
+ if (error)
+ goto out;
+
+@@ -121,7 +127,8 @@ asmlinkage long sys_uselib(const char *
+ if (error)
+ goto exit;
+
+- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++ intent_release(nd.dentry, &it);
+ error = PTR_ERR(file);
+ if (IS_ERR(file))
+ goto out;
+@@ -350,9 +350,10 @@ struct file *open_exec(const char *name)
+ struct inode *inode;
+ struct file *file;
+ int err = 0;
++ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
+
+ if (path_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
+- err = path_walk(name, &nd);
++ err = path_walk_it(name, &nd, &it);
+ file = ERR_PTR(err);
+ if (!err) {
+ inode = nd.dentry->d_inode;
+@@ -363,7 +369,8 @@ struct file *open_exec(const char *name)
+ err = -EACCES;
+ file = ERR_PTR(err);
+ if (!err) {
+- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++ intent_release(nd.dentry, &it);
+ if (!IS_ERR(file)) {
+ err = deny_write_access(file);
+ if (err) {
+@@ -976,7 +986,7 @@ int do_coredump(long signr, struct pt_re
+ goto close_fail;
+ if (!file->f_op->write)
+ goto close_fail;
+- if (do_truncate(file->f_dentry, 0) != 0)
++ if (do_truncate(file->f_dentry, 0, 0) != 0)
+ goto close_fail;
+
+ retval = binfmt->core_dump(signr, regs, file);
+--- linux-2.4.19-hp2_pnnl4/include/linux/dcache.h~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl4-root/include/linux/dcache.h Sun Jan 19 19:04:48 2003
+@@ -6,6 +6,25 @@
#include <asm/atomic.h>
#include <linux/mount.h>
+#define IT_CREAT (1<<1)
+#define IT_READDIR (1<<2)
+#define IT_GETATTR (1<<3)
-+#define IT_SETATTR (1<<4)
-+#define IT_TRUNC (1<<5)
-+#define IT_READLINK (1<<6)
-+#define IT_LOOKUP (1<<7)
++#define IT_LOOKUP (1<<4)
++#define IT_UNLINK (1<<5)
+
+struct lookup_intent {
+ int it_op;
unsigned long d_vfs_flags;
void * d_fsdata; /* fs-specific data */
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
-@@ -90,6 +119,8 @@ struct dentry_operations {
+@@ -90,8 +119,15 @@ struct dentry_operations {
int (*d_delete)(struct dentry *);
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
+ void (*d_intent_release)(struct dentry *, struct lookup_intent *);
};
++/* defined in fs/namei.c */
++extern void intent_release(struct dentry *de, struct lookup_intent *it);
++/* defined in fs/dcache.c */
++extern void __d_rehash(struct dentry * entry, int lock);
++
/* the dentry parameter passed to d_hash and d_compare is the parent
-@@ -124,6 +148,7 @@ d_iput: no no yes
+ * directory of the entries to be compared. It is used in case these
+ * functions need any directory specific information for determining
+@@ -124,6 +149,7 @@ d_iput: no no yes
* s_nfsd_free_path semaphore will be down
*/
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
extern spinlock_t dcache_lock;
---- linux-2.4.19-hp2_pnnl2/include/linux/fs.h~vfs_intent_hp Sun Jan 19 19:04:47 2003
-+++ linux-2.4.19-hp2_pnnl2-root/include/linux/fs.h Sun Jan 19 19:04:48 2003
+--- linux-2.4.19-hp2_pnnl4/include/linux/fs.h~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl4-root/include/linux/fs.h Sun Jan 19 19:04:48 2003
+@@ -338,6 +338,8 @@ extern void set_bh_page(struct buffer_he
+ #define ATTR_MTIME_SET 256
+ #define ATTR_FORCE 512 /* Not a change, but a change it */
+ #define ATTR_ATTR_FLAG 1024
++#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */
+
+ /*
+ * This is the Inode Attributes structure, used for notify_change(). It
@@ -575,6 +575,7 @@ struct file {
/* needed for tty driver, and maybe others */
/*
* File types
-@@ -876,16 +879,28 @@ struct file_operations {
+@@ -876,20 +879,33 @@ struct file_operations {
struct inode_operations {
int (*create) (struct inode *,struct dentry *,int);
struct dentry * (*lookup) (struct inode *,struct dentry *);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int);
int (*revalidate) (struct dentry *);
+ int (*setattr) (struct dentry *, struct iattr *);
++ int (*setattr_raw) (struct inode *, struct iattr *);
+ int (*getattr) (struct dentry *, struct iattr *);
+ int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
+ ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
+@@ -1112,7 +1130,7 @@ static inline int get_lease(struct inode
+
+ asmlinkage long sys_open(const char *, int, int);
+ asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */
+-extern int do_truncate(struct dentry *, loff_t start);
++extern int do_truncate(struct dentry *, loff_t start, int called_from_open);
+
+ extern struct file *filp_open(const char *, int, int);
+ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
@@ -1354,6 +1369,7 @@ typedef int (*read_actor_t)(read_descrip
extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
extern int page_readlink(struct dentry *, char *, int);
extern int page_follow_link(struct dentry *, struct nameidata *);
extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.19-hp2_pnnl2/kernel/ksyms.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
-+++ linux-2.4.19-hp2_pnnl2-root/kernel/ksyms.c Sun Jan 19 19:04:48 2003
+--- linux-2.4.19-hp2_pnnl4/kernel/ksyms.c~vfs_intent_hp Sun Jan 19 19:04:47 2003
++++ linux-2.4.19-hp2_pnnl4-root/kernel/ksyms.c Sun Jan 19 19:04:48 2003
@@ -293,6 +293,7 @@ EXPORT_SYMBOL(read_cache_page);
EXPORT_SYMBOL(set_page_dirty);
EXPORT_SYMBOL(vfs_readlink);
EXPORT_SYMBOL(page_readlink);
EXPORT_SYMBOL(page_follow_link);
EXPORT_SYMBOL(page_symlink_inode_operations);
-
-_
--- /dev/null
+arch/um/kernel/mem.c
+fs/namei.c
+fs/nfsd/vfs.c
+fs/sysfs/inode.c
+include/linux/dcache.h
+include/linux/fs.h
+include/linux/namei.h
+include/linux/slab.h
+kernel/ksyms.c
+mm/slab.c
+net/unix/af_unix.c
fs/nfsd/vfs.c
fs/open.c
fs/stat.c
+fs/exec.c
include/linux/dcache.h
include/linux/fs.h
kernel/ksyms.c
--- /dev/null
+lustre_version.patch
+lustre-2.5.patch
linux-2.4.18-hp2_pnnl2
series/vanilla-2.4.19 ** Not officially supported
linux-2.4.19
+series/lin-2.5.44
+ uml-2.5.44
# See the file COPYING in this distribution
DEFS=
+
+LDLMSOURCES= l_lock.c ldlm_lock.c ldlm_resource.c \
+ldlm_extent.c ldlm_request.c ldlm_lockd.c
+
+if LIBLUSTRE
+lib_LIBRARIES = libldlm.a
+libldlm_a_SOURCES = $(LDLMSOURCES)
+else
MODULE = ldlm
modulefs_DATA = ldlm.o
EXTRA_PROGRAMS = ldlm
-ldlm_SOURCES = l_lock.c ldlm_lock.c ldlm_resource.c ldlm_lockd.c \
-ldlm_extent.c ldlm_request.c
+ldlm_SOURCES = $(LDLMSOURCES)
+endif
include $(top_srcdir)/Rules
+
*
*/
+#define DEBUG_SUBSYSTEM S_LDLM
+#ifdef __KERNEL__
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/smp_lock.h>
+#else
+#include <liblustre.h>
+#endif
-#define DEBUG_SUBSYSTEM S_LDLM
-
+#include <linux/lustre_dlm.h>
#include <linux/obd_class.h>
#include <linux/lustre_lib.h>
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2002 Cluster File Systems, Inc.
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * This file is part of Lustre, http://www.lustre.org.
*
- * by Cluster File Systems, Inc.
- * authors, Peter Braam <braam@clusterfs.com> &
- * Phil Schwan <phil@clusterfs.com>
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define DEBUG_SUBSYSTEM S_LDLM
+#ifndef __KERNEL__
+# include <liblustre.h>
+#endif
#include <linux/lustre_dlm.h>
+#include <linux/obd_support.h>
+#include <linux/lustre_lib.h>
/* This function will be called to judge if the granted queue of another child
* (read: another extent) is conflicting and needs its granted queue walked to
struct ldlm_lock *lock;
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- if (lock->l_extent.end < req_ex->start)
+ if (lock->l_extent.end < req_ex->start) {
new_ex->start = MIN(lock->l_extent.end, new_ex->start);
- else {
+ } else {
if (lock->l_extent.start < req_ex->start &&
!lockmode_compat(lock->l_req_mode, mode))
/* Policy: minimize conflict overlap */
new_ex->start = req_ex->start;
}
- if (lock->l_extent.start > req_ex->end)
+ if (lock->l_extent.start > req_ex->end) {
new_ex->end = MAX(lock->l_extent.start, new_ex->end);
- else {
+ } else {
if (lock->l_extent.end > req_ex->end &&
!lockmode_compat(lock->l_req_mode, mode))
/* Policy: minimize conflict overlap */
#define DEBUG_SUBSYSTEM S_LDLM
+#ifdef __KERNEL__
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/lustre_dlm.h>
#include <linux/lustre_mds.h>
+#else
+#include <liblustre.h>
+#include <linux/kp30.h>
+#endif
+
#include <linux/obd_class.h>
//struct lustre_lock ldlm_everything_lock;
[LDLM_EXTENT] "EXT",
};
+#ifdef __KERNEL__
char *ldlm_it2str(int it)
{
switch (it) {
return "readdir";
case IT_GETATTR:
return "getattr";
- case IT_TRUNC:
- return "truncate";
- case IT_SETATTR:
- return "setattr";
case IT_LOOKUP:
return "lookup";
case IT_UNLINK:
return "UNKNOWN";
}
}
+#endif
extern kmem_cache_t *ldlm_lock_slab;
struct lustre_lock ldlm_handle_lock;
static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b);
-ldlm_res_compat ldlm_res_compat_table[] = {
+static ldlm_res_compat ldlm_res_compat_table[] = {
[LDLM_PLAIN] ldlm_plain_compat,
[LDLM_EXTENT] ldlm_extent_compat,
};
return ELDLM_OK;
}
-ldlm_res_policy ldlm_res_policy_table[] = {
+static ldlm_res_policy ldlm_res_policy_table[] = {
[LDLM_PLAIN] ldlm_plain_policy,
[LDLM_EXTENT] ldlm_extent_policy,
};
if (atomic_dec_and_test(&lock->l_refc)) {
l_lock(&ns->ns_lock);
- LDLM_DEBUG0(lock, "final lock_put on destroyed lock, freeing");
+ LDLM_DEBUG(lock, "final lock_put on destroyed lock, freeing");
LASSERT(lock->l_destroyed);
LASSERT(list_empty(&lock->l_res_link));
list_del_init(&lock->l_export_chain);
ldlm_lock_remove_from_lru(lock);
- portals_handle_unhash(&lock->l_handle);
+ class_handle_unhash(&lock->l_handle);
#if 0
/* Wake anyone waiting for this lock */
}
INIT_LIST_HEAD(&lock->l_handle.h_link);
- portals_handle_hash(&lock->l_handle, lock_handle_addref);
+ class_handle_hash(&lock->l_handle, lock_handle_addref);
RETURN(lock);
}
void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
{
- memset(&lockh->addr, 0x69, sizeof(lockh->addr));
+ POISON(&lockh->addr, 0x69, sizeof(lockh->addr));
lockh->cookie = lock->l_handle.h_cookie;
}
LASSERT(handle);
- lock = portals_handle2object(handle->cookie);
+ lock = class_handle2object(handle->cookie);
if (lock == NULL)
RETURN(NULL);
}
static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
- struct ldlm_lock *new,
+ struct ldlm_lock *new,
void *data, int datalen)
{
struct ldlm_ast_work *w;
CERROR("FL_CBPENDING set on non-local lock--just a "
"warning\n");
- LDLM_DEBUG0(lock, "final decref done on cbpending lock");
+ LDLM_DEBUG(lock, "final decref done on cbpending lock");
+
+ if (lock->l_blocking_ast == NULL) {
+ /* The lock wasn't even fully formed; just destroy it */
+ ldlm_lock_destroy(lock);
+ }
l_unlock(&ns->ns_lock);
/* FIXME: need a real 'desc' here */
- lock->l_blocking_ast(lock, NULL, lock->l_data,
- LDLM_CB_BLOCKING);
+ if (lock->l_blocking_ast != NULL)
+ lock->l_blocking_ast(lock, NULL, lock->l_data,
+ LDLM_CB_BLOCKING);
} else if (ns->ns_client && !lock->l_readers && !lock->l_writers) {
/* If this is a client-side namespace and this was the last
* reference, put it on the LRU. */
lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, NULL);
}
if (rc)
- LDLM_DEBUG0(lock, "matched");
+ LDLM_DEBUG(lock, "matched");
else
LDLM_DEBUG_NOLOCK("not matched");
ldlm_grant_lock(lock, NULL, 0);
EXIT;
out:
- l_unlock(&ns->ns_lock);
/* Don't set 'completion_ast' until here so that if the lock is granted
* immediately we don't do an unnecessary completion call. */
lock->l_completion_ast = completion;
+ l_unlock(&ns->ns_lock);
return ELDLM_OK;
}
struct ldlm_ast_work *w =
list_entry(tmp, struct ldlm_ast_work, w_list);
- if (w->w_blocking)
+ /* It's possible to receive a completion AST before we've set
+ * the l_completion_ast pointer: either because the AST arrived
+ * before the reply, or simply because there's a small race
+ * window between receiving the reply and finishing the local
+ * enqueue. (bug 842)
+ *
+ * This can't happen with the blocking_ast, however, because we
+ * will never call the local blocking_ast until we drop our
+ * reader/writer reference, which we won't do until we get the
+ * reply and finish enqueueing. */
+ if (w->w_blocking) {
+ LASSERT(w->w_lock->l_blocking_ast != NULL);
rc = w->w_lock->l_blocking_ast
(w->w_lock, &w->w_desc, w->w_data,
LDLM_CB_BLOCKING);
- else
+ } else if (w->w_lock->l_completion_ast != NULL) {
rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags,
w->w_data);
+ } else {
+ rc = 0;
+ }
if (rc == -ERESTART)
retval = rc;
else if (rc)
lock->l_blocking_ast(lock, NULL, lock->l_data,
LDLM_CB_CANCELING);
else
- LDLM_DEBUG0(lock, "no blocking ast");
+ LDLM_DEBUG(lock, "no blocking ast");
}
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
}
/* Please do not, no matter how tempting, remove this LBUG without
* talking to me first. -phik */
if (lock->l_readers || lock->l_writers) {
- LDLM_DEBUG0(lock, "lock still has references");
+ LDLM_DEBUG(lock, "lock still has references");
ldlm_lock_dump(D_OTHER, lock);
LBUG();
}
CDEBUG(level, " -- Lock dump: %p (%s) (rc: %d)\n", lock, ver,
atomic_read(&lock->l_refc));
if (lock->l_export && lock->l_export->exp_connection)
- CDEBUG(level, " Node: NID %x (rhandle: "LPX64")\n",
+ CDEBUG(level, " Node: NID "LPX64" on %s (rhandle: "LPX64")\n",
lock->l_export->exp_connection->c_peer.peer_nid,
+ lock->l_export->exp_connection->c_peer.peer_ni->pni_name,
lock->l_remote_handle.cookie);
else
CDEBUG(level, " Node: local\n");
#define EXPORT_SYMTAB
#define DEBUG_SUBSYSTEM S_LDLM
+#ifdef __KERNEL__
#include <linux/module.h>
#include <linux/slab.h>
-#include <linux/lustre_dlm.h>
#include <linux/init.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/lustre_dlm.h>
#include <linux/obd_class.h>
+
extern kmem_cache_t *ldlm_resource_slab;
extern kmem_cache_t *ldlm_lock_slab;
extern struct lustre_lock ldlm_handle_lock;
RETURN(1);
}
+static inline void ldlm_failed_ast(struct ldlm_lock *lock)
+{
+ /* XXX diagnostic */
+ recovd_conn_fail(lock->l_export->exp_connection);
+}
+
int ldlm_server_blocking_ast(struct ldlm_lock *lock,
struct ldlm_lock_desc *desc,
void *data, int flag)
sizeof(body->lock_handle1));
memcpy(&body->lock_desc, desc, sizeof(*desc));
- LDLM_DEBUG0(lock, "server preparing blocking AST");
+ LDLM_DEBUG(lock, "server preparing blocking AST");
req->rq_replen = lustre_msg_size(0, NULL);
ldlm_add_waiting_lock(lock);
rc = ptlrpc_queue_wait(req);
if (rc == -ETIMEDOUT || rc == -EINTR) {
ldlm_del_waiting_lock(lock);
- ldlm_expired_completion_wait(lock);
+ ldlm_failed_ast(lock);
} else if (rc) {
CERROR("client returned %d from blocking AST for lock %p\n",
req->rq_status, lock);
body->lock_flags = flags;
ldlm_lock2desc(lock, &body->lock_desc);
- LDLM_DEBUG0(lock, "server preparing completion AST");
+ LDLM_DEBUG(lock, "server preparing completion AST");
req->rq_replen = lustre_msg_size(0, NULL);
req->rq_level = LUSTRE_CONN_RECOVD;
rc = ptlrpc_queue_wait(req);
if (rc == -ETIMEDOUT || rc == -EINTR) {
ldlm_del_waiting_lock(lock);
- ldlm_expired_completion_wait(lock);
+ ldlm_failed_ast(lock);
} else if (rc) {
CERROR("client returned %d from completion AST for lock %p\n",
req->rq_status, lock);
memcpy(&lock->l_remote_handle, &dlm_req->lock_handle1,
sizeof(lock->l_remote_handle));
- LDLM_DEBUG0(lock, "server-side enqueue handler, new lock created");
+ LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
LASSERT(req->rq_export);
lock->l_export = req->rq_export;
if (!lock) {
req->rq_status = EINVAL;
} else {
- LDLM_DEBUG0(lock, "server-side convert handler START");
+ LDLM_DEBUG(lock, "server-side convert handler START");
ldlm_lock_convert(lock, dlm_req->lock_desc.l_req_mode,
&dlm_rep->lock_flags);
if (ldlm_del_waiting_lock(lock))
if (lock) {
ldlm_reprocess_all(lock->l_resource);
- LDLM_DEBUG0(lock, "server-side convert handler END");
+ LDLM_DEBUG(lock, "server-side convert handler END");
LDLM_LOCK_PUT(lock);
} else
LDLM_DEBUG_NOLOCK("server-side convert handler END");
dlm_req->lock_handle1.cookie);
req->rq_status = ESTALE;
} else {
- LDLM_DEBUG0(lock, "server-side cancel handler START");
+ LDLM_DEBUG(lock, "server-side cancel handler START");
ldlm_lock_cancel(lock);
if (ldlm_del_waiting_lock(lock))
CDEBUG(D_DLMTRACE, "cancelled waiting lock %p\n", lock);
if (lock) {
ldlm_reprocess_all(lock->l_resource);
- LDLM_DEBUG0(lock, "server-side cancel handler END");
+ LDLM_DEBUG(lock, "server-side cancel handler END");
LDLM_LOCK_PUT(lock);
}
lock = ldlm_handle2lock_ns(ns, &dlm_req->lock_handle1);
if (!lock) {
- CERROR("blocking callback on lock "LPX64" - lock disappeared\n",
- dlm_req->lock_handle1.cookie);
+ CDEBUG(D_INFO, "blocking callback on lock "LPX64
+ " - lock disappeared\n", dlm_req->lock_handle1.cookie);
RETURN(-EINVAL);
}
- LDLM_DEBUG0(lock, "client blocking AST callback handler START");
+ LDLM_DEBUG(lock, "client blocking AST callback handler START");
- l_lock(&lock->l_resource->lr_namespace->ns_lock);
+ l_lock(&ns->ns_lock);
lock->l_flags |= LDLM_FL_CBPENDING;
do_ast = (!lock->l_readers && !lock->l_writers);
- l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+ l_unlock(&ns->ns_lock);
if (do_ast) {
LDLM_DEBUG(lock, "already unused, calling "
"callback (%p)", lock->l_blocking_ast);
- if (lock->l_blocking_ast != NULL) {
+ if (lock->l_blocking_ast != NULL)
lock->l_blocking_ast(lock, &dlm_req->lock_desc,
lock->l_data, LDLM_CB_BLOCKING);
- }
- } else
- LDLM_DEBUG0(lock, "Lock still has references, will be"
- " cancelled later");
+ } else {
+ LDLM_DEBUG(lock, "Lock still has references, will be"
+ " cancelled later");
+ }
- LDLM_DEBUG0(lock, "client blocking callback handler END");
+ LDLM_DEBUG(lock, "client blocking callback handler END");
LDLM_LOCK_PUT(lock);
RETURN(0);
}
RETURN(-EINVAL);
}
- LDLM_DEBUG0(lock, "client completion callback handler START");
+ LDLM_DEBUG(lock, "client completion callback handler START");
l_lock(&ns->ns_lock);
* then we might need to switch lock modes, resources, or extents. */
if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) {
lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
- LDLM_DEBUG0(lock, "completion AST, new lock mode");
+ LDLM_DEBUG(lock, "completion AST, new lock mode");
}
if (lock->l_resource->lr_type == LDLM_EXTENT)
memcpy(&lock->l_extent, &dlm_req->lock_desc.l_extent,
sizeof(lock->l_resource->lr_name)) != 0) {
ldlm_lock_change_resource(ns, lock,
dlm_req->lock_desc.l_resource.lr_name);
- LDLM_DEBUG0(lock, "completion AST, new resource");
+ LDLM_DEBUG(lock, "completion AST, new resource");
}
lock->l_resource->lr_tmp = &ast_list;
ldlm_grant_lock(lock, req, sizeof(*req));
lock->l_resource->lr_tmp = NULL;
l_unlock(&ns->ns_lock);
- LDLM_DEBUG0(lock, "callback handler finished, about to run_ast_work");
+ LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work");
LDLM_LOCK_PUT(lock);
ldlm_run_ast_work(&ast_list);
dlm_req = lustre_msg_buf(req->rq_reqmsg, 0);
CERROR("--> lock addr: "LPX64", cookie: "LPX64"\n",
dlm_req->lock_handle1.addr,dlm_req->lock_handle1.cookie);
- RETURN(-ENOTCONN);
+ rc = -ENOTCONN;
+ goto out;
}
LASSERT(req->rq_export != NULL);
CERROR("invalid opcode %d\n", req->rq_reqmsg->opc);
RETURN(-EINVAL);
}
-
+ out:
req->rq_status = rc;
- if (rc) {
- ptlrpc_error(req->rq_svc, req);
- } else {
- rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen,
- &req->rq_repmsg);
- if (rc)
- RETURN(rc);
- ptlrpc_reply(req->rq_svc, req);
- }
+ rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+ if (rc)
+ RETURN(rc);
+ ptlrpc_reply(req->rq_svc, req);
RETURN(0);
}
static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf)
{
struct ldlm_obd *ldlm = &obddev->u.ldlm;
- struct obd_uuid uuid = {"self"};
int rc, i;
ENTRY;
if (rc != 0)
RETURN(rc);
+#ifdef __KERNEL__
ldlm->ldlm_cb_service =
ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE,
LDLM_MAXREQSIZE, LDLM_CB_REQUEST_PORTAL,
- LDLM_CB_REPLY_PORTAL, &uuid,
+ LDLM_CB_REPLY_PORTAL,
ldlm_callback_handler, "ldlm_cbd");
if (!ldlm->ldlm_cb_service) {
ldlm->ldlm_cancel_service =
ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE,
LDLM_MAXREQSIZE, LDLM_CANCEL_REQUEST_PORTAL,
- LDLM_CANCEL_REPLY_PORTAL, &uuid,
+ LDLM_CANCEL_REPLY_PORTAL,
ldlm_cancel_handler, "ldlm_canceld");
if (!ldlm->ldlm_cancel_service) {
}
}
+#endif
INIT_LIST_HEAD(&waiting_locks_list);
spin_lock_init(&waiting_locks_spinlock);
waiting_locks_timer.function = waiting_locks_callback;
RETURN(0);
out_thread:
+#ifdef __KERNEL__
ptlrpc_stop_all_threads(ldlm->ldlm_cancel_service);
ptlrpc_unregister_service(ldlm->ldlm_cancel_service);
ptlrpc_stop_all_threads(ldlm->ldlm_cb_service);
ptlrpc_unregister_service(ldlm->ldlm_cb_service);
-
+#endif
out_proc:
ldlm_proc_cleanup(obddev);
RETURN(-EBUSY);
}
+#ifdef __KERNEL__
ptlrpc_stop_all_threads(ldlm->ldlm_cb_service);
ptlrpc_unregister_service(ldlm->ldlm_cb_service);
ptlrpc_stop_all_threads(ldlm->ldlm_cancel_service);
ptlrpc_unregister_service(ldlm->ldlm_cancel_service);
ldlm_proc_cleanup(obddev);
-
+#endif
ldlm_already_setup = 0;
RETURN(0);
}
o_disconnect: class_disconnect
};
-static int __init ldlm_init(void)
+int __init ldlm_init(void)
{
int rc = class_register_type(&ldlm_obd_ops, 0, OBD_LDLM_DEVICENAME);
if (rc != 0)
EXPORT_SYMBOL(l_lock);
EXPORT_SYMBOL(l_unlock);
+#ifdef __KERNEL__
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
MODULE_DESCRIPTION("Lustre Lock Management Module v0.1");
MODULE_LICENSE("GPL");
module_init(ldlm_init);
module_exit(ldlm_exit);
+#endif
*/
#define DEBUG_SUBSYSTEM S_LDLM
+#ifndef __KERNEL__
+#include <signal.h>
+#include <liblustre.h>
+#endif
#include <linux/lustre_dlm.h>
#include <linux/obd_class.h>
else if (!(conn = obd->u.cli.cl_import.imp_connection))
CERROR("lock %p has NULL connection\n", lock);
else {
- LDLM_DEBUG0(lock, "timed out waiting for completion");
+ LDLM_DEBUG(lock, "timed out waiting for completion");
CERROR("lock %p timed out from %s\n", lock,
conn->c_remote_uuid.uuid);
ldlm_lock_dump(D_ERROR, lock);
LDLM_FL_BLOCK_CONV)))
RETURN(0);
- LDLM_DEBUG0(lock, "client-side enqueue returned a blocked lock, "
- "sleeping");
+ LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
+ "sleeping");
ldlm_lock_dump(D_OTHER, lock);
ldlm_reprocess_all(lock->l_resource);
lock->l_destroyed), &lwi);
if (lock->l_destroyed) {
- LDLM_DEBUG0(lock, "client-side enqueue waking up: destroyed");
+ LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
RETURN(-EIO);
}
RETURN(rc);
}
- LDLM_DEBUG0(lock, "client-side enqueue waking up: granted");
+ LDLM_DEBUG(lock, "client-side enqueue waking up: granted");
RETURN(0);
}
data, cp_data);
if (!lock)
GOTO(out_nolock, err = -ENOMEM);
- LDLM_DEBUG0(lock,
- "client-side local enqueue handler, new lock created");
+ LDLM_DEBUG(lock, "client-side local enqueue handler, new lock created");
ldlm_lock_addref_internal(lock, mode);
ldlm_lock2handle(lock, lockh);
if (lock->l_completion_ast)
lock->l_completion_ast(lock, *flags, NULL);
- LDLM_DEBUG0(lock, "client-side local enqueue END");
+ LDLM_DEBUG(lock, "client-side local enqueue END");
EXIT;
out:
LDLM_LOCK_PUT(lock);
* If we're creating a new lock, get everything all setup nice. */
if (is_replay) {
lock = ldlm_handle2lock(lockh);
- LDLM_DEBUG0(lock, "client-side enqueue START");
+ LDLM_DEBUG(lock, "client-side enqueue START");
LASSERT(connh == lock->l_connh);
} else {
lock = ldlm_lock_create(ns, parent_lock_handle, res_id, type,
* (in just this one case) to run the completion_cb even if it
* arrives before the reply. */
lock->l_completion_ast = completion;
- LDLM_DEBUG0(lock, "client-side enqueue START");
+ LDLM_DEBUG(lock, "client-side enqueue START");
/* for the local lock, add the reference */
ldlm_lock_addref_internal(lock, mode);
ldlm_lock2handle(lock, lockh);
lock->l_connh = connh;
lock->l_export = NULL;
- LDLM_DEBUG0(lock, "sending request");
+ LDLM_DEBUG(lock, "sending request");
rc = ptlrpc_queue_wait(req);
if (rc != ELDLM_OK) {
lock->l_flags |= LDLM_FL_CANCELING;
l_unlock(&ns->ns_lock);
- ldlm_lock_decref(lockh, mode);
- ldlm_lock_destroy(lock);
+ ldlm_lock_decref_and_cancel(lockh, mode);
GOTO(out_req, rc);
}
LBUG();
GOTO(out_req, rc = -ENOMEM);
}
- LDLM_DEBUG0(lock, "client-side enqueue, new resource");
+ LDLM_DEBUG(lock, "client-side enqueue, new resource");
}
}
lock->l_completion_ast(lock, *flags, NULL);
}
- LDLM_DEBUG0(lock, "client-side enqueue END");
+ LDLM_DEBUG(lock, "client-side enqueue END");
EXIT;
out_req:
if (!req_passed_in)
CERROR("Trying to cancel local lock\n");
LBUG();
}
- LDLM_DEBUG0(lock, "client-side local convert");
+ LDLM_DEBUG(lock, "client-side local convert");
ldlm_lock_convert(lock, new_mode, flags);
ldlm_reprocess_all(lock->l_resource);
- LDLM_DEBUG0(lock, "client-side local convert handler END");
+ LDLM_DEBUG(lock, "client-side local convert handler END");
LDLM_LOCK_PUT(lock);
RETURN(0);
}
if (!connh)
RETURN(ldlm_cli_convert_local(lock, new_mode, flags));
- LDLM_DEBUG0(lock, "client-side convert");
+ LDLM_DEBUG(lock, "client-side convert");
req = ptlrpc_prep_req(class_conn2cliimp(connh), LDLM_CONVERT, 1, &size,
NULL);
if (lock->l_connh) {
int local_only;
- LDLM_DEBUG0(lock, "client-side cancel");
+ LDLM_DEBUG(lock, "client-side cancel");
/* Set this flag to prevent others from getting new references*/
l_lock(&lock->l_resource->lr_namespace->ns_lock);
lock->l_flags |= LDLM_FL_CBPENDING;
local_cancel:
ldlm_lock_cancel(lock);
} else {
- LDLM_DEBUG0(lock, "client-side local cancel");
+ LDLM_DEBUG(lock, "client-side local cancel");
if (lock->l_resource->lr_namespace->ns_client) {
CERROR("Trying to cancel local lock\n");
LBUG();
}
ldlm_lock_cancel(lock);
ldlm_reprocess_all(lock->l_resource);
- LDLM_DEBUG0(lock, "client-side local cancel handler END");
+ LDLM_DEBUG(lock, "client-side local cancel handler END");
}
EXIT;
LASSERT(w);
w->w_lock = LDLM_LOCK_GET(lock);
+
+ /* Prevent the cancel callback from being called by setting
+ * LDLM_FL_CANCEL in the lock. Very sneaky. -p */
+ if (flags & LDLM_FL_NO_CALLBACK)
+ w->w_lock->l_flags |= LDLM_FL_CANCEL;
+
list_add(&w->w_list, &list);
}
l_unlock(&ns->ns_lock);
int rc;
w = list_entry(tmp, struct ldlm_ast_work, w_list);
- /* Prevent the cancel callback from being called by setting
- * LDLM_FL_CANCEL in the lock. Very sneaky. -p */
- if (flags & LDLM_FL_NO_CALLBACK)
- w->w_lock->l_flags |= LDLM_FL_CANCEL;
-
if (flags & LDLM_FL_LOCAL_ONLY) {
ldlm_lock_cancel(w->w_lock);
} else {
RETURN(0);
}
-/* Cancel all locks on a namespace (or a specific resource, if given) that have
- * 0 readers/writers.
+/* Cancel all locks on a namespace (or a specific resource, if given)
+ * that have 0 readers/writers.
*
- * If 'local_only' is true, throw the locks away without trying to notify the
- * server. */
+ * If flags & LDLM_FL_LOCAL_ONLY, throw the locks away without trying
+ * to notify the server.
+ * If flags & LDLM_FL_NO_CALLBACK, don't run the cancel callback. */
int ldlm_cli_cancel_unused(struct ldlm_namespace *ns,
struct ldlm_res_id *res_id, int flags)
{
size = sizeof(*reply);
req->rq_replen = lustre_msg_size(1, &size);
- LDLM_DEBUG0(lock, "replaying lock:");
+ LDLM_DEBUG(lock, "replaying lock:");
rc = ptlrpc_queue_wait(req);
if (rc != ELDLM_OK)
GOTO(out, rc);
reply = lustre_msg_buf(req->rq_repmsg, 0);
memcpy(&lock->l_remote_handle, &reply->lock_handle,
sizeof(lock->l_remote_handle));
- LDLM_DEBUG0(lock, "replayed lock:");
+ LDLM_DEBUG(lock, "replayed lock:");
out:
ptlrpc_req_finished(req);
RETURN(rc);
*/
#define DEBUG_SUBSYSTEM S_LDLM
-
+#ifdef __KERNEL__
#include <linux/lustre_dlm.h>
+#else
+#include <liblustre.h>
+#endif
+
#include <linux/obd_class.h>
kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab;
int rc;
ENTRY;
LASSERT(ldlm_ns_proc_dir == NULL);
+ LASSERT(obd != NULL);
rc = lprocfs_obd_attach(obd, 0);
if (rc) {
CERROR("LProcFS failed in ldlm-init\n");
}
}
+#ifdef __KERNEL__
static int lprocfs_uint_rd(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
return snprintf(page, count, "%u\n", *temp);
}
+
#define MAX_STRING_SIZE 128
void ldlm_proc_namespace(struct ldlm_namespace *ns)
{
struct lprocfs_vars lock_vars[2];
char lock_name[MAX_STRING_SIZE + 1];
+ LASSERT(ns != NULL);
+ LASSERT(ns->ns_name != NULL);
+
lock_name[MAX_STRING_SIZE] = '\0';
memset(lock_vars, 0, sizeof(lock_vars));
lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_count", ns->ns_name);
+
lock_vars[0].data = &ns->ns_locks;
lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
lock_vars[0].read_fptr = lprocfs_uint_rd;
lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
}
+#endif
#undef MAX_STRING_SIZE
#define LDLM_MAX_UNUSED 20
spin_lock(&ldlm_namespace_lock);
list_add(&ns->ns_list_chain, &ldlm_namespace_list);
spin_unlock(&ldlm_namespace_lock);
+#ifdef __KERNEL__
ldlm_proc_namespace(ns);
+#endif
RETURN(ns);
out_hash:
- memset(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
+ POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
vfree(ns->ns_hash);
atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
out_ns:
if (local_only || rc != ELDLM_OK)
ldlm_lock_cancel(lock);
} else {
- LDLM_DEBUG0(lock, "Freeing a lock still held by a "
- "client node");
+ LDLM_DEBUG(lock, "Freeing a lock still held by a "
+ "client node");
ldlm_resource_unlink_lock(lock);
ldlm_lock_destroy(lock);
ldlm_namespace_cleanup(ns, 0);
- memset(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
+ POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
vfree(ns->ns_hash /* , sizeof(*ns->ns_hash) * RES_HASH_SIZE */);
atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1);
list_del_init(&res->lr_hash);
list_del_init(&res->lr_childof);
- memset(res, 0x5a, sizeof(*res));
+ POISON(res, 0x5a, sizeof(*res));
kmem_cache_free(ldlm_resource_slab, res);
l_unlock(&ns->ns_lock);
#define EXPORT_SYMTAB
#define DEBUG_SUBSYSTEM S_OST /* XXX WRONG */
+#ifdef __KERNEL__
#include <linux/module.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/obd.h>
#include <linux/obd_ost.h>
#include <linux/lustre_net.h>
#include <linux/lustre_dlm.h>
imp->imp_obd = obddev;
cli->cl_max_mds_easize = sizeof(struct lov_mds_md);
+#if !defined(__KERNEL__) || (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ cli->cl_sandev = 0;
+#else
+ cli->cl_sandev.value = 0;
+#endif
+
+ RETURN(0);
+}
+
+#ifdef __KERNEL__
+/* convert a pathname into a kdev_t */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+static kdev_t path2dev(char *path)
+{
+ struct dentry *dentry;
+ struct nameidata nd;
+ kdev_t dev = 0;
+
+ if (!path_init(path, LOOKUP_FOLLOW, &nd))
+ return 0;
+
+ if (path_walk(path, &nd))
+ return 0;
+
+ dentry = nd.dentry;
+ if (dentry->d_inode && !is_bad_inode(dentry->d_inode) &&
+ S_ISBLK(dentry->d_inode->i_mode))
+ dev = dentry->d_inode->i_rdev;
+ path_release(&nd);
+
+ return dev;
+}
+#else
+static int path2dev(char *path)
+{
+ struct dentry *dentry;
+ struct nameidata nd;
+ int dev = 0;
+
+ if (!path_init(path, LOOKUP_FOLLOW, &nd))
+ return 0;
+
+ if (path_walk(path, &nd))
+ return 0;
+
+ dentry = nd.dentry;
+ if (dentry->d_inode && !is_bad_inode(dentry->d_inode) &&
+ S_ISBLK(dentry->d_inode->i_mode))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ dev = dentry->d_inode->i_rdev;
+#else
+ dev = dentry->d_inode->i_rdev.value;
+#endif
+ path_release(&nd);
+
+ return dev;
+}
+#endif
+
+int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf)
+{
+ struct obd_ioctl_data* data = buf;
+ struct client_obd *cli = &obddev->u.cli;
+ struct obd_import *imp = &cli->cl_import;
+ struct obd_uuid server_uuid;
+ ENTRY;
+
+ if (data->ioc_inllen1 < 1) {
+ CERROR("requires a TARGET UUID\n");
+ RETURN(-EINVAL);
+ }
+
+ if (data->ioc_inllen1 > 37) {
+ CERROR("client UUID must be less than 38 characters\n");
+ RETURN(-EINVAL);
+ }
+
+ if (data->ioc_inllen2 < 1) {
+ CERROR("setup requires a SERVER UUID\n");
+ RETURN(-EINVAL);
+ }
+
+ if (data->ioc_inllen2 > 37) {
+ CERROR("target UUID must be less than 38 characters\n");
+ RETURN(-EINVAL);
+ }
+
+ if (data->ioc_inllen3 < 1) {
+ CERROR("setup requires a SAN device pathname\n");
+ RETURN(-EINVAL);
+ }
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ cli->cl_sandev = path2dev(data->ioc_inlbuf3);
+ if (!cli->cl_sandev) {
+ CERROR("%s seems not a valid SAN device\n", data->ioc_inlbuf3);
+ RETURN(-EINVAL);
+ }
+#else
+ cli->cl_sandev.value = path2dev(data->ioc_inlbuf3);
+ if (!cli->cl_sandev.value) {
+ CERROR("%s seems not a valid SAN device\n", data->ioc_inlbuf3);
+ RETURN(-EINVAL);
+ }
+#endif
+
+ sema_init(&cli->cl_sem, 1);
+ cli->cl_conn_count = 0;
+ memcpy(cli->cl_target_uuid.uuid, data->ioc_inlbuf1, data->ioc_inllen1);
+ memcpy(server_uuid.uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2,
+ sizeof(server_uuid)));
+
+ imp->imp_connection = ptlrpc_uuid_to_connection(&server_uuid);
+ if (!imp->imp_connection)
+ RETURN(-ENOENT);
+
+ INIT_LIST_HEAD(&imp->imp_replay_list);
+ INIT_LIST_HEAD(&imp->imp_sending_list);
+ INIT_LIST_HEAD(&imp->imp_delayed_list);
+ spin_lock_init(&imp->imp_lock);
+
+ ptlrpc_init_client(OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
+ "sanosc", &obddev->obd_ldlm_client);
+ imp->imp_client = &obddev->obd_ldlm_client;
+ imp->imp_obd = obddev;
+
+ cli->cl_max_mds_easize = sizeof(struct lov_mds_md);
RETURN(0);
}
+#endif
int client_obd_cleanup(struct obd_device * obddev)
{
GOTO(out_disco, rc = -ENOMEM);
INIT_LIST_HEAD(&imp->imp_chain);
- imp->imp_last_xid = 0;
imp->imp_max_transno = 0;
imp->imp_peer_committed_transno = 0;
out_ldlm:
ldlm_namespace_free(obd->obd_namespace);
obd->obd_namespace = NULL;
- if (rq_opc == MDS_CONNECT) {
- /* Don't class_disconnect OSCs, because the LOV
- * cares about them even if they can't connect to the
- * OST.
- *
- * This is leak-bait, but without either a way to
- * operate on the osc without an export or separate
- * methods for connect-to-osc and connect-osc-to-ost
- * it's not clear what else to do.
- */
out_disco:
- cli->cl_conn_count--;
- class_disconnect(conn);
- }
+ cli->cl_conn_count--;
+ class_disconnect(conn);
}
out_sem:
up(&cli->cl_sem);
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/errno.h>
+#include <linux/version.h>
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
#include <linux/locks.h> // for wait_on_buffer
#else
fid->f_type = HTON__u32(S_IFMT & inode->i_mode);
}
-
void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
{
b->valid = OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME |
OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
OBD_MD_FLNLINK | OBD_MD_FLGENER;
+
+ /* The MDS file size isn't authoritative for regular files, so don't
+ * even pretend. */
+ if (S_ISREG(inode->i_mode))
+ b->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+
b->ino = HTON__u32(inode->i_ino);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
b->atime = HTON__u32(inode->i_atime);
b->mtime = HTON__u32(inode->i_mtime);
b->ctime = HTON__u32(inode->i_ctime);
+#else
+ b->atime = HTON__u32(inode->i_atime.tv_sec);
+ b->mtime = HTON__u32(inode->i_mtime.tv_sec);
+ b->ctime = HTON__u32(inode->i_ctime.tv_sec);
+#endif
b->mode = HTON__u32(inode->i_mode);
b->size = HTON__u64(inode->i_size);
b->blocks = HTON__u64(inode->i_blocks);
rec->cr_uid = HTON__u32(uid);
rec->cr_gid = HTON__u32(gid);
rec->cr_time = HTON__u64(time);
+ if (in_group_p(dir->i_gid))
+ rec->cr_suppgid = HTON__u32(dir->i_gid);
+ else
+ rec->cr_suppgid = HTON__u32(-1);
tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1);
LOGL0(name, namelen, tmp);
rec->sa_uid = HTON__u32(iattr->ia_uid);
rec->sa_gid = HTON__u32(iattr->ia_gid);
rec->sa_size = HTON__u64(iattr->ia_size);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
rec->sa_atime = HTON__u64(iattr->ia_atime);
rec->sa_mtime = HTON__u64(iattr->ia_mtime);
rec->sa_ctime = HTON__u64(iattr->ia_ctime);
+#else
+ rec->sa_atime = HTON__u64(iattr->ia_atime.tv_sec);
+ rec->sa_mtime = HTON__u64(iattr->ia_mtime.tv_sec);
+ rec->sa_ctime = HTON__u64(iattr->ia_ctime.tv_sec);
+#endif
rec->sa_attr_flags = HTON__u32(iattr->ia_attr_flags);
if ((iattr->ia_valid & ATTR_GID) && in_group_p(iattr->ia_gid))
rec->rn_fsuid = HTON__u32(current->fsuid);
rec->rn_fsgid = HTON__u32(current->fsgid);
rec->rn_cap = HTON__u32(current->cap_effective);
+ if (in_group_p(srcdir->i_gid))
+ rec->rn_suppgid1 = HTON__u32(srcdir->i_gid);
+ else
+ rec->rn_suppgid1 = HTON__u32(-1);
+ if (in_group_p(tgtdir->i_gid))
+ rec->rn_suppgid2 = HTON__u32(tgtdir->i_gid);
+ else
+ rec->rn_suppgid2 = HTON__u32(-1);
ll_inode2fid(&rec->rn_fid1, srcdir);
ll_inode2fid(&rec->rn_fid2, tgtdir);
r->ur_fsuid = NTOH__u32(rec->sa_fsuid);
r->ur_fsgid = NTOH__u32(rec->sa_fsgid);
r->ur_cap = NTOH__u32(rec->sa_cap);
- r->ur_suppgid = NTOH__u32(rec->sa_suppgid);
+ r->ur_suppgid1 = NTOH__u32(rec->sa_suppgid);
+ r->ur_suppgid2 = NTOH__u32(-1);
r->ur_fid1 = &rec->sa_fid;
attr->ia_valid = NTOH__u32(rec->sa_valid);
attr->ia_mode = NTOH__u32(rec->sa_mode);
attr->ia_uid = NTOH__u32(rec->sa_uid);
attr->ia_gid = NTOH__u32(rec->sa_gid);
attr->ia_size = NTOH__u64(rec->sa_size);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
attr->ia_atime = NTOH__u64(rec->sa_atime);
attr->ia_mtime = NTOH__u64(rec->sa_mtime);
attr->ia_ctime = NTOH__u64(rec->sa_ctime);
+#else
+ attr->ia_atime.tv_sec = NTOH__u64(rec->sa_atime);
+ attr->ia_mtime.tv_sec = NTOH__u64(rec->sa_mtime);
+ attr->ia_ctime.tv_sec = NTOH__u64(rec->sa_ctime);
+#endif
attr->ia_attr_flags = NTOH__u32(rec->sa_attr_flags);
if (req->rq_reqmsg->bufcount == offset + 2) {
r->ur_gid = NTOH__u32(rec->cr_gid);
r->ur_time = NTOH__u64(rec->cr_time);
r->ur_flags = NTOH__u32(rec->cr_flags);
- r->ur_suppgid = NTOH__u32(rec->cr_suppgid);
+ r->ur_suppgid1 = NTOH__u32(rec->cr_suppgid);
+ r->ur_suppgid2 = NTOH__u32(-1);
r->ur_name = lustre_msg_buf(req->rq_reqmsg, offset + 1);
r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
r->ur_fsuid = NTOH__u32(rec->lk_fsuid);
r->ur_fsgid = NTOH__u32(rec->lk_fsgid);
r->ur_cap = NTOH__u32(rec->lk_cap);
- r->ur_suppgid = NTOH__u32(rec->lk_suppgid);
+ r->ur_suppgid1 = NTOH__u32(rec->lk_suppgid);
+ r->ur_suppgid2 = NTOH__u32(-1);
r->ur_fid1 = &rec->lk_fid1;
r->ur_fid2 = &rec->lk_fid2;
r->ur_fsgid = NTOH__u32(rec->ul_fsgid);
r->ur_cap = NTOH__u32(rec->ul_cap);
r->ur_mode = NTOH__u32(rec->ul_mode);
- r->ur_suppgid = NTOH__u32(rec->ul_suppgid);
+ r->ur_suppgid1 = NTOH__u32(rec->ul_suppgid);
+ r->ur_suppgid2 = NTOH__u32(-1);
r->ur_fid1 = &rec->ul_fid1;
r->ur_fid2 = &rec->ul_fid2;
r->ur_fsuid = NTOH__u32(rec->rn_fsuid);
r->ur_fsgid = NTOH__u32(rec->rn_fsgid);
r->ur_cap = NTOH__u32(rec->rn_cap);
+ r->ur_suppgid1 = NTOH__u32(rec->rn_suppgid1);
+ r->ur_suppgid2 = NTOH__u32(rec->rn_suppgid2);
r->ur_fid1 = &rec->rn_fid1;
r->ur_fid2 = &rec->rn_fid2;
*/
#define DEBUG_SUBSYSTEM S_OST
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
#include <linux/obd_ost.h>
#include <linux/lustre_net.h>
OBD_SET_CTXT_MAGIC(save);
/*
- CDEBUG(D_INFO, "== push %p->%p == cur fs %p pwd %p (%*s), pwdmnt %p\n",
+ CDEBUG(D_INFO,
+ "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
save, current, current->fs, current->fs->pwd,
+ atomic_read(¤t->fs->pwd->d_count),
+ atomic_read(¤t->fs->pwd->d_inode->i_count),
current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
- current->fs->pwdmnt);
+ current->fs->pwdmnt,
+ atomic_read(¤t->fs->pwdmnt->mnt_count));
*/
save->fs = get_fs();
current->fsuid = uc->ouc_fsuid;
current->fsgid = uc->ouc_fsgid;
current->cap_effective = uc->ouc_cap;
- if (uc->ouc_suppgid != -1)
- current->groups[current->ngroups++] = uc->ouc_suppgid;
+ if (uc->ouc_suppgid1 != -1)
+ current->groups[current->ngroups++] = uc->ouc_suppgid1;
+ if (uc->ouc_suppgid2 != -1)
+ current->groups[current->ngroups++] = uc->ouc_suppgid2;
}
set_fs(new_ctx->fs);
set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
/*
- CDEBUG(D_INFO, "== push %p==%p == cur fs %p pwd %p (%*s), pwdmnt %p\n",
+ CDEBUG(D_INFO,
+ "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
new_ctx, current, current->fs, current->fs->pwd,
+ atomic_read(¤t->fs->pwd->d_count),
+ atomic_read(¤t->fs->pwd->d_inode->i_count),
current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
- current->fs->pwdmnt);
+ current->fs->pwdmnt,
+ atomic_read(¤t->fs->pwdmnt->mnt_count));
*/
}
ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
/*
- CDEBUG(D_INFO, " == pop %p==%p == cur %p pwd %p (%*s), pwdmnt %p\n",
+ CDEBUG(D_INFO,
+ " = pop %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
new_ctx, current, current->fs, current->fs->pwd,
+ atomic_read(¤t->fs->pwd->d_count),
+ atomic_read(¤t->fs->pwd->d_inode->i_count),
current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
- current->fs->pwdmnt);
+ current->fs->pwdmnt,
+ atomic_read(¤t->fs->pwdmnt->mnt_count));
*/
LASSERT(current->fs->pwd == new_ctx->pwd);
LASSERT(current->fs->pwdmnt == new_ctx->pwdmnt);
- //printk("pc2");
set_fs(saved->fs);
- //printk("pc3\n");
set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
- //printk("pc4");
dput(saved->pwd);
- //printk("pc5");
mntput(saved->pwdmnt);
- //printk("pc6\n");
if (uc) {
current->fsuid = saved->fsuid;
current->fsgid = saved->fsgid;
current->cap_effective = saved->cap;
- if (uc->ouc_suppgid != -1)
+ if (uc->ouc_suppgid1 != -1)
+ current->ngroups--;
+ if (uc->ouc_suppgid2 != -1)
current->ngroups--;
}
/*
- CDEBUG(D_INFO, "== pop %p->%p == cur fs %p pwd %p (%*s), pwdmnt %p\n",
+ CDEBUG(D_INFO,
+ "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
saved, current, current->fs, current->fs->pwd,
+ atomic_read(¤t->fs->pwd->d_count),
+ atomic_read(¤t->fs->pwd->d_inode->i_count),
current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
- current->fs->pwdmnt);
+ current->fs->pwdmnt,
+ atomic_read(¤t->fs->pwdmnt->mnt_count));
*/
}
RETURN(0);
}
-int target_handle_connect(struct ptlrpc_request *req)
+
+int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
{
struct obd_device *target;
struct obd_export *export = NULL;
if (!target)
GOTO(out, rc = -ENODEV);
+ spin_lock_bh(&target->obd_processing_task_lock);
+ if (target->obd_flags & OBD_ABORT_RECOVERY)
+ target_abort_recovery(target);
+ spin_unlock_bh(&target->obd_processing_task_lock);
+
conn.addr = req->rq_reqmsg->addr;
conn.cookie = req->rq_reqmsg->cookie;
spin_unlock(&target->obd_dev_lock);
/* Tell the client if we're in recovery. */
- if (target->obd_flags & OBD_RECOVERING)
+ /* If this is the first client, start the recovery timer */
+ if (target->obd_flags & OBD_RECOVERING) {
lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
+ target_start_recovery_timer(target, handler);
+ }
/* Tell the client if we support replayable requests */
if (target->obd_flags & OBD_REPLAYABLE)
}
}
- if (rc == EALREADY) {
- /* We indicate the reconnection in a flag, not an error code. */
- lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT);
- rc = 0;
- } else if (rc) {
- GOTO(out, rc);
- }
-
/* If all else goes well, this is our RPC return code. */
- req->rq_status = rc;
+ req->rq_status = 0;
+
+ if (rc && rc != EALREADY)
+ GOTO(out, rc);
req->rq_repmsg->addr = conn.addr;
req->rq_repmsg->cookie = conn.cookie;
ptlrpc_put_connection(req->rq_connection);
req->rq_connection = ptlrpc_connection_addref(export->exp_connection);
+ if (rc == EALREADY) {
+ /* We indicate the reconnection in a flag, not an error code. */
+ lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT);
+ GOTO(out, rc = 0);
+ }
+
spin_lock(&export->exp_connection->c_lock);
list_add(&export->exp_conn_chain, &export->exp_connection->c_exports);
spin_unlock(&export->exp_connection->c_lock);
list_for_each_safe(expiter, n, &conn->c_exports) {
exp = list_entry(expiter, struct obd_export, exp_conn_chain);
+ CDEBUG(D_HA, "disconnecting export %p/%s\n",
+ exp, exp->exp_client_uuid.uuid);
hdl.addr = (__u64)(unsigned long)exp;
hdl.cookie = exp->exp_cookie;
rc = obd_disconnect(&hdl);
LBUG();
RETURN(-ENOSYS);
}
+
+/*
+ * Recovery functions
+ */
+
+static void abort_delayed_replies(struct obd_device *obd)
+{
+ struct ptlrpc_request *req;
+ struct list_head *tmp, *n;
+ list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ DEBUG_REQ(D_ERROR, req, "aborted:");
+ req->rq_status = -ENOTCONN;
+ req->rq_type = PTL_RPC_MSG_ERR;
+ ptlrpc_reply(req->rq_svc, req);
+ list_del(&req->rq_list);
+ OBD_FREE(req, sizeof *req);
+ }
+}
+
+void target_abort_recovery(void *data)
+{
+ struct obd_device *obd = data;
+ CERROR("disconnecting clients and aborting recovery\n");
+ obd->obd_recoverable_clients = 0;
+ obd->obd_flags &= ~(OBD_RECOVERING | OBD_ABORT_RECOVERY);
+ abort_delayed_replies(obd);
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+ class_disconnect_all(obd);
+ spin_lock_bh(&obd->obd_processing_task_lock);
+}
+
+static void target_recovery_expired(unsigned long castmeharder)
+{
+ struct obd_device *obd = (struct obd_device *)castmeharder;
+ CERROR("recovery timed out, aborting\n");
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ obd->obd_flags |= OBD_ABORT_RECOVERY;
+ wake_up(&obd->obd_next_transno_waitq);
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+}
+
+static void reset_recovery_timer(struct obd_device *obd)
+{
+ CDEBUG(D_ERROR, "timer will expire in %ld seconds\n",
+ OBD_RECOVERY_TIMEOUT / HZ);
+ mod_timer(&obd->obd_recovery_timer, jiffies + OBD_RECOVERY_TIMEOUT);
+}
+
+
+/* Only start it the first time called */
+void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler)
+{
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ if (obd->obd_recovery_handler) {
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+ return;
+ }
+ CERROR("%s: starting recovery timer\n", obd->obd_name);
+ obd->obd_recovery_handler = handler;
+ obd->obd_recovery_timer.function = target_recovery_expired;
+ obd->obd_recovery_timer.data = (unsigned long)obd;
+ init_timer(&obd->obd_recovery_timer);
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+
+ reset_recovery_timer(obd);
+}
+
+static void cancel_recovery_timer(struct obd_device *obd)
+{
+ del_timer(&obd->obd_recovery_timer);
+}
+
+static int check_for_next_transno(struct obd_device *obd)
+{
+ struct ptlrpc_request *req;
+ req = list_entry(obd->obd_recovery_queue.next,
+ struct ptlrpc_request, rq_list);
+ LASSERT(req->rq_reqmsg->transno >= obd->obd_next_recovery_transno);
+
+ return req->rq_reqmsg->transno == obd->obd_next_recovery_transno ||
+ (obd->obd_flags & OBD_RECOVERING) == 0;
+}
+
+static void process_recovery_queue(struct obd_device *obd)
+{
+ struct ptlrpc_request *req;
+ int aborted = 0;
+ ENTRY;
+
+ for (;;) {
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ LASSERT(obd->obd_processing_task == current->pid);
+ req = list_entry(obd->obd_recovery_queue.next,
+ struct ptlrpc_request, rq_list);
+
+ if (req->rq_reqmsg->transno != obd->obd_next_recovery_transno) {
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+ CDEBUG(D_HA, "Waiting for transno "LPD64" (1st is "
+ LPD64")\n",
+ obd->obd_next_recovery_transno,
+ req->rq_reqmsg->transno);
+ wait_event(obd->obd_next_transno_waitq,
+ check_for_next_transno(obd));
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ if (obd->obd_flags & OBD_ABORT_RECOVERY) {
+ target_abort_recovery(obd);
+ aborted = 1;
+ }
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+ if (aborted)
+ return;
+ continue;
+ }
+ list_del_init(&req->rq_list);
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+
+ DEBUG_REQ(D_ERROR, req, "processing: ");
+ (void)obd->obd_recovery_handler(req);
+ reset_recovery_timer(obd);
+#warning FIXME: mds_fsync_super(mds->mds_sb);
+ OBD_FREE(req, sizeof *req);
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ obd->obd_next_recovery_transno++;
+ if (list_empty(&obd->obd_recovery_queue)) {
+ obd->obd_processing_task = 0;
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+ break;
+ }
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+ }
+ EXIT;
+}
+
+int target_queue_recovery_request(struct ptlrpc_request *req,
+ struct obd_device *obd)
+{
+ struct list_head *tmp;
+ int inserted = 0;
+ __u64 transno = req->rq_reqmsg->transno;
+ struct ptlrpc_request *saved_req;
+
+ if (!transno) {
+ INIT_LIST_HEAD(&req->rq_list);
+ DEBUG_REQ(D_HA, req, "not queueing");
+ return 1;
+ }
+
+ spin_lock_bh(&obd->obd_processing_task_lock);
+
+ if (obd->obd_processing_task == current->pid) {
+ /* Processing the queue right now, don't re-add. */
+ LASSERT(list_empty(&req->rq_list));
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+ return 1;
+ }
+
+ OBD_ALLOC(saved_req, sizeof *saved_req);
+ if (!saved_req)
+ LBUG();
+ memcpy(saved_req, req, sizeof *req);
+ req = saved_req;
+ INIT_LIST_HEAD(&req->rq_list);
+
+ /* XXX O(n^2) */
+ list_for_each(tmp, &obd->obd_recovery_queue) {
+ struct ptlrpc_request *reqiter =
+ list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ if (reqiter->rq_reqmsg->transno > transno) {
+ list_add_tail(&req->rq_list, &reqiter->rq_list);
+ inserted = 1;
+ break;
+ }
+ }
+
+ if (!inserted) {
+ list_add_tail(&req->rq_list, &obd->obd_recovery_queue);
+ }
+
+ if (obd->obd_processing_task != 0) {
+ /* Someone else is processing this queue, we'll leave it to
+ * them.
+ */
+ if (transno == obd->obd_next_recovery_transno)
+ wake_up(&obd->obd_next_transno_waitq);
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+ return 0;
+ }
+
+ /* Nobody is processing, and we know there's (at least) one to process
+ * now, so we'll do the honours.
+ */
+ obd->obd_processing_task = current->pid;
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+
+ process_recovery_queue(obd);
+ return 0;
+}
+
+struct obd_device * target_req2obd(struct ptlrpc_request *req)
+{
+ return req->rq_export->exp_obd;
+}
+
+int target_queue_final_reply(struct ptlrpc_request *req, int rc)
+{
+ struct obd_device *obd = target_req2obd(req);
+ struct ptlrpc_request *saved_req;
+
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ if (rc) {
+ /* Just like ptlrpc_error, but without the sending. */
+ lustre_pack_msg(0, NULL, NULL, &req->rq_replen,
+ &req->rq_repmsg);
+ req->rq_type = PTL_RPC_MSG_ERR;
+ }
+
+ LASSERT(list_empty(&req->rq_list));
+ OBD_ALLOC(saved_req, sizeof *saved_req);
+ memcpy(saved_req, req, sizeof *saved_req);
+ req = saved_req;
+ list_add(&req->rq_list, &obd->obd_delayed_reply_queue);
+ if (--obd->obd_recoverable_clients == 0) {
+ struct list_head *tmp, *n;
+ ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace);
+ CDEBUG(D_ERROR,
+ "all clients recovered, sending delayed replies\n");
+ obd->obd_flags &= ~OBD_RECOVERING;
+ list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ DEBUG_REQ(D_ERROR, req, "delayed:");
+ ptlrpc_reply(req->rq_svc, req);
+ list_del(&req->rq_list);
+ OBD_FREE(req, sizeof *req);
+ }
+ cancel_recovery_timer(obd);
+ } else {
+ CERROR("%d recoverable clients remain\n",
+ obd->obd_recoverable_clients);
+ }
+
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+ return 1;
+}
--- /dev/null
+.Xrefs
+config.log
+config.status
+configure
+Makefile
+Makefile.in
+.deps
+TAGS
+libtest
--- /dev/null
+# Administration utilities Makefile
+DEFS=
+
+CFLAGS:=-g -O2 -I$(top_srcdir)/utils -I$(PORTALS)/include -I$(srcdir)/../include -Wall -L$(PORTALSLIB)
+
+KFLAGS:=
+CPPFLAGS = $(HAVE_LIBREADLINE)
+LIBS=
+LLIBS= ../lov/liblov.a ../obdecho/libobdecho.a ../osc/libosc.a ../ldlm/libldlm.a ../ptlrpc/libptlrpc.a ../obdclass/liblustreclass.a
+
+libtest_LDADD := $(LIBREADLINE) $(LLIBS) \
+ $(PORTALS)/user/procbridge/libprocbridge.a $(PORTALS)/user/tcpnal/libtcpnal.a \
+ $(PORTALS)/user/util/libtcpnalutil.a $(PORTALS)/user/$(PORTALS)/api/libptlapi.a \
+ $(PORTALS)/lib/libptllib.a -lptlctl -lpthread -lefence
+bin_PROGRAMS = libtest
+libtest_SOURCES = libtest.c
+
+include $(top_srcdir)/Rules
--- /dev/null
+#include <stdio.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <portals/api-support.h> /* needed for ptpctl.h */
+#include <portals/ptlctl.h> /* needed for parse_dump */
+
+
+#include <liblustre.h>
+#include <linux/obd.h>
+#include <linux/obd_class.h>
+#include <../user/procbridge/procbridge.h>
+
+ptl_handle_ni_t tcpnal_ni;
+
+struct pingcli_args {
+ ptl_nid_t mynid;
+ ptl_nid_t nid;
+ ptl_pid_t port;
+ int count;
+ int size;
+};
+
+struct task_struct *current;
+
+struct obd_class_user_state ocus;
+
+/* portals interfaces */
+inline const ptl_handle_ni_t *
+kportal_get_ni (int nal)
+{
+ return &tcpnal_ni;
+}
+
+inline void
+kportal_put_ni (int nal)
+{
+ return;
+}
+
+void init_current(int argc, char **argv)
+{
+ current = malloc(sizeof(*current));
+ strncpy(current->comm, argv[0], sizeof(current->comm));
+ current->pid = getpid();
+
+}
+
+ptl_nid_t tcpnal_mynid;
+
+int init_lib_portals(struct pingcli_args *args)
+{
+ int rc;
+
+ PtlInit();
+ tcpnal_mynid = args->mynid;
+ rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni);
+ if (rc != 0) {
+ CERROR("ksocknal: PtlNIInit failed: error %d\n", rc);
+ PtlFini();
+ RETURN (rc);
+ }
+ PtlNIDebug(tcpnal_ni, ~0);
+ return rc;
+}
+
+extern int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, unsigned long arg);
+
+
+int lib_ioctl(int dev_id, int opc, void * ptr)
+{
+
+ if (dev_id == OBD_DEV_ID) {
+ struct obd_ioctl_data *ioc = ptr;
+ class_handle_ioctl(&ocus, opc, (unsigned long)ptr);
+
+ /* you _may_ need to call obd_ioctl_unpack or some
+ other verification function if you want to use ioc
+ directly here */
+ printf ("processing ioctl cmd: %x buf len: %d\n",
+ opc, ioc->ioc_len);
+ }
+ return (0);
+}
+
+int main(int argc, char **argv)
+{
+ struct pingcli_args *args;
+ args= malloc(sizeof(*args));
+ if (!args) {
+ printf("Malloc error\n");
+ exit(1);
+ }
+
+ args->mynid = ntohl (inet_addr (argv[1]));
+ INIT_LIST_HEAD(&ocus.ocus_conns);
+
+ init_current(argc, argv);
+ init_obdclass();
+ init_lib_portals(args);
+ ptlrpc_init();
+ ldlm_init();
+ osc_init();
+ echo_client_init();
+ /* XXX need mdc_getlovinfo before lov_init can work.. */
+ // lov_init();
+
+ parse_dump("/tmp/DUMP_FILE", lib_ioctl);
+
+ printf("Hello\n");
+ return 0;
+}
+
unlock_kernel();
/* Record that the thread is running */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
sbi->ll_commitcbd_waketime = CURRENT_TIME;
+#else
+ sbi->ll_commitcbd_waketime = CURRENT_TIME.tv_sec;
+#endif
sbi->ll_commitcbd_timeout = 10 * HZ;
sbi->ll_commitcbd_thread = current;
sbi->ll_commitcbd_flags = LL_COMMITCBD_RUNNING;
ENTRY;
LASSERT(ll_d2d(de) != NULL);
- mdc_put_rpc_lock(&mdc_rpc_lock, it);
if (it->it_lock_mode) {
handle = (struct lustre_handle *)it->it_lock_handle;
- if (it->it_op == IT_SETATTR)
- ldlm_lock_decref_and_cancel(handle, it->it_lock_mode);
- else
- ldlm_lock_decref(handle, it->it_lock_mode);
+ ldlm_lock_decref(handle, it->it_lock_mode);
/* intent_release may be called multiple times, from
this thread and we don't want to double-decref this
RETURN(0);
}
- if (it && it->it_op == IT_TRUNC)
- it->it_op = IT_SETATTR;
-
if (it == NULL || it->it_op == IT_GETATTR) {
/* We could just return 1 immediately, but since we should only
* be called in revalidate2 if we already have a lock, let's
}
rc = ll_intent_lock(de->d_parent->d_inode, &de, it, revalidate2_finish);
- if (rc == -ESTALE)
- RETURN(0);
- if (rc < 0 && it->it_status) {
+ if (rc < 0) {
CERROR("ll_intent_lock: rc %d : it->it_status %d\n", rc,
it->it_status);
RETURN(0);
#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags)
-static int ll_dir_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+static int ll_dir_prepare_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
{
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
return 0;
}
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if ((inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT <= page->index){
/* XXX why do we need this exactly, and why do we think that
* an all-zero directory page is useful?
unlock_page(page);
ll_unlock(LCK_PR, &lockh);
- mdc_put_rpc_lock(&mdc_rpc_lock, &it);
if (rc != ELDLM_OK)
CERROR("ll_unlock: err: %d\n", rc);
return rc;
-} /* ll_dir_readpage */
+}
struct address_space_operations ll_dir_aops = {
readpage: ll_dir_readpage,
loff_t new_size = (page->index << PAGE_CACHE_SHIFT) + to;
int err = 0;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
dir->i_version = ++event;
+#endif
if (new_size > dir->i_size)
dir->i_size = new_size;
SetPageUptodate(page);
int need_revalidate = (filp->f_version != inode->i_version);
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
GOTO(done, 0);
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct obd_ioctl_data *data;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
switch(cmd) {
case IOC_MDC_LOOKUP: {
} else {
/* No transno means that we can just drop our ref. */
spin_unlock_irqrestore(&imp->imp_lock, flags);
- ptlrpc_req_finished(fd->fd_req);
}
+ ptlrpc_req_finished(fd->fd_req);
/* Do this after the fd_req->rq_transno check, because we don't want
* to bounce off zero references. */
oa.o_id = lsm->lsm_object_id;
oa.o_mode = S_IFREG;
oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
- obd_handle2oa(&oa, &fd->fd_osthandle);
+
+ memcpy(&oa.o_inline, fd->fd_ostdata, FD_OSTDATA_SIZE);
+ oa.o_valid |= OBD_MD_FLHANDLE;
+
rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
if (rc)
CERROR("inode %lu object close failed: rc = %d\n",
inode->i_ino, rc);
}
- mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
- mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
if (rc2 && !rc)
rc = rc2;
- if (atomic_dec_and_test(&lli->lli_open_count)) {
- CDEBUG(D_INFO, "last close, cancelling unused locks\n");
- rc2 = obd_cancel_unused(&sbi->ll_osc_conn, lsm, 0);
- if (rc2 && !rc) {
- rc = rc2;
- CERROR("obd_cancel_unused: %d\n", rc);
- }
- } else
- CDEBUG(D_INFO, "not last close, not cancelling unused locks\n");
-
RETURN(rc);
}
RETURN(-ENOMEM);
oa->o_id = lsm->lsm_object_id;
oa->o_mode = S_IFREG;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
- OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
+ oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
+ OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
rc = obd_open(conn, oa, lsm, NULL);
if (rc)
GOTO(out, rc);
file->f_flags &= ~O_LOV_DELAY_CREATE;
- obdo_to_inode(inode, oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
- OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+ obdo_to_inode(inode, oa, (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME));
- obd_oa2handle(&fd->fd_osthandle, oa);
+ if (oa->o_valid |= OBD_MD_FLHANDLE)
+ memcpy(fd->fd_ostdata, obdo_handle(oa), FD_OSTDATA_SIZE);
- atomic_inc(&ll_i2info(inode)->lli_open_count);
+ EXIT;
out:
obdo_free(oa);
- RETURN(rc);
+ return rc;
}
/* Caller must hold lli_open_sem to protect lli->lli_smd from changing and
struct ptlrpc_request *req = NULL;
struct ll_inode_info *lli = ll_i2info(inode);
struct lov_mds_md *lmm = NULL;
- int lmm_size = 0;
struct obdo *oa;
- int rc, err;
+ struct iattr iattr;
+ int rc, err, lmm_size = 0;;
ENTRY;
oa = obdo_alloc();
lmm_size = rc;
/* Save the stripe MD with this file on the MDS */
- rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, inode, NULL,
+ memset(&iattr, 0, sizeof(iattr));
+ iattr.ia_valid = ATTR_FROM_OPEN;
+ rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, inode, &iattr,
lmm, lmm_size, &req);
ptlrpc_req_finished(req);
* lli_open_sem to ensure no other process will create objects, send the
* stripe MD to the MDS, or try to destroy the objects if that fails.
*
- * If we already have the stripe MD locally, we don't request it in
- * mdc_open() by passing a lmm_size = 0.
+ * If we already have the stripe MD locally then we don't request it in
+ * mdc_open(), by passing a lmm_size = 0.
*
* It is up to the application to ensure no other processes open this file
* in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
int rc = 0;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
LL_GET_INTENT(file->f_dentry, it);
rc = ll_it_open_error(IT_OPEN_OPEN, it);
if (rc)
* keeps an atomic flag in the inode which indicates whether the size
* has been updated (see bug 280).
*/
-int ll_file_size(struct inode *inode, struct lov_stripe_md *lsm,
- struct lustre_handle *handle)
+int ll_file_size(struct inode *inode, struct lov_stripe_md *lsm, char *ostdata)
{
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct obdo oa;
oa.o_mode = S_IFREG;
oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
- obd_handle2oa(&oa, handle);
+
+ if (ostdata != NULL) {
+ memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE);
+ oa.o_valid |= OBD_MD_FLHANDLE;
+ }
+
rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm);
if (!rc) {
obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
OBD_MD_FLMTIME | OBD_MD_FLCTIME);
- CDEBUG(D_INODE, "objid "LPX64" size %Lu/%Lu\n",
+ CDEBUG(D_INODE, "objid "LPX64" size %Lu/%Lx\n",
lsm->lsm_object_id, inode->i_size, inode->i_size);
}
#ifdef USE_ATIME
struct iattr attr;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
attr.ia_atime = CURRENT_TIME;
+#else
+ attr.ia_atime = CURRENT_TIME.tv_sec;
+#endif
attr.ia_valid = ATTR_ATIME;
if (inode->i_atime == attr.ia_atime) return;
struct lustre_handle lockh = { 0, 0 };
int rc;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if (inode == NULL)
LBUG();
ssize_t retval;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) &&
!(sbi->ll_flags & LL_SBI_NOLCK)) {
struct ldlm_extent extent;
extent.start = *ppos;
- extent.end = *ppos + count;
+ extent.end = *ppos + count - 1;
CDEBUG(D_INFO, "Locking inode %lu, start "LPU64" end "LPU64"\n",
inode->i_ino, extent.start, extent.end);
}
/* If we don't refresh the file size, generic_file_read may not even
- * call us */
- retval = ll_file_size(inode, lsm, &fd->fd_osthandle);
+ * call ll_readpage */
+ retval = ll_file_size(inode, lsm, fd->fd_ostdata);
if (retval < 0) {
CERROR("ll_file_size: "LPSZ"\n", retval);
RETURN(retval);
ssize_t retval;
ENTRY;
+ /* POSIX, but surprised the VFS doesn't check this already */
+ if (count == 0)
+ return 0;
+
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) {
err = ll_size_lock(inode, lsm, 0, LCK_PW, &eof_lockh);
if (err)
RETURN(err);
/* Get size here so we know extent to enqueue write lock on. */
- retval = ll_file_size(inode, lsm, &fd->fd_osthandle);
+ retval = ll_file_size(inode, lsm, fd->fd_ostdata);
if (retval)
GOTO(out_eof, retval);
!(sbi->ll_flags & LL_SBI_NOLCK)) {
struct ldlm_extent extent;
extent.start = *ppos;
- extent.end = *ppos + count;
+ extent.end = *ppos + count - 1;
CDEBUG(D_INFO, "Locking inode %lu, start "LPU64" end "LPU64"\n",
inode->i_ino, extent.start, extent.end);
struct lustre_handle *conn;
int flags;
- switch(cmd) {
- case TCGETS:
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
+
+ if ((cmd & 0xffffff00) == ((int)'T') << 8) /* tty ioctls */
return -ENOTTY;
+
+ switch(cmd) {
case LL_IOC_GETFLAGS:
/* Get the current value of the file flags */
return put_user(fd->fd_flags, (int *)arg);
long long retval;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
switch (origin) {
case 2: {
struct ll_inode_info *lli = ll_i2info(inode);
struct ll_file_data *fd = file->private_data;
- retval = ll_file_size(inode, lli->lli_smd, &fd->fd_osthandle);
+ retval = ll_file_size(inode, lli->lli_smd, fd->fd_ostdata);
if (retval)
RETURN(retval);
file->f_pos = offset;
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
file->f_reada = 0;
-#endif
file->f_version = ++event;
+#endif
}
retval = offset;
}
struct lov_stripe_md *lsm;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if (!inode) {
CERROR("REPORT THIS LINE TO PETER\n");
RETURN(0);
}
body = lustre_msg_buf(req->rq_repmsg, 0);
+
+ if (S_ISREG(inode->i_mode) &&
+ body->valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) {
+ CERROR("MDS sent back size for regular file\n");
+ body->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+ }
+
if (body->valid & OBD_MD_FLEASIZE)
ll_update_inode(inode, body,
lustre_msg_buf(req->rq_repmsg, 1));
static int ll_getattr(struct vfsmount *mnt, struct dentry *de,
struct kstat *stat)
{
- return ll_inode_revalidate(de);
+ int res = 0;
+ struct inode *inode = de->d_inode;
+
+ res = ll_inode_revalidate(de);
+ if (res)
+ return res;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ stat->dev = inode->i_dev;
+#endif
+ stat->ino = inode->i_ino;
+ stat->mode = inode->i_mode;
+ stat->nlink = inode->i_nlink;
+ stat->uid = inode->i_uid;
+ stat->gid = inode->i_gid;
+ stat->rdev = kdev_t_to_nr(inode->i_rdev);
+ stat->atime = inode->i_atime;
+ stat->mtime = inode->i_mtime;
+ stat->ctime = inode->i_ctime;
+ stat->size = inode->i_size;
+ return 0;
}
#endif
};
struct inode_operations ll_file_inode_operations = {
+ setattr_raw: ll_setattr_raw,
setattr: ll_setattr,
truncate: ll_truncate,
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
};
struct inode_operations ll_special_inode_operations = {
+ setattr_raw: ll_setattr_raw,
setattr: ll_setattr,
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
getattr: ll_getattr,
*/
#define DEBUG_SUBSYSTEM S_LLITE
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
#include <linux/lustre_lite.h>
#include <linux/lprocfs_status.h>
}
#else
-long long mnt_instance;
-
-static inline int lprocfs_llite_statfs(void *data, struct statfs *sfs)
-{
- struct super_block *sb = (struct super_block*)data;
- return (sb->s_op->statfs)(sb, sfs);
+#define LPROC_LLITE_STAT_FCT(fct_name, get_statfs_fct) \
+int fct_name(char *page, char **start, off_t off, \
+ int count, int *eof, void *data) \
+{ \
+ struct statfs sfs; \
+ int rc; \
+ LASSERT(data != NULL); \
+ rc = get_statfs_fct((struct super_block*)data, &sfs); \
+ return (rc==0 \
+ ? lprocfs_##fct_name (page, start, off, count, eof, &sfs) \
+ : rc); \
}
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize, lprocfs_llite_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_llite_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, lprocfs_llite_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, lprocfs_llite_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, lprocfs_llite_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, lprocfs_llite_statfs);
+long long mnt_instance;
+
+LPROC_LLITE_STAT_FCT(rd_blksize, vfs_statfs);
+LPROC_LLITE_STAT_FCT(rd_kbytestotal, vfs_statfs);
+LPROC_LLITE_STAT_FCT(rd_kbytesfree, vfs_statfs);
+LPROC_LLITE_STAT_FCT(rd_filestotal, vfs_statfs);
+LPROC_LLITE_STAT_FCT(rd_filesfree, vfs_statfs);
+LPROC_LLITE_STAT_FCT(rd_filegroups, vfs_statfs);
int rd_path(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
struct super_block *sb = (struct super_block*)data;
+ LASSERT(sb != NULL);
*eof = 1;
return snprintf(page, count, "%s\n", sb->s_type->name);
}
{
struct super_block *sb = (struct super_block *)data;
+ LASSERT(sb != NULL);
*eof = 1;
return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid);
}
name[MAX_STRING_SIZE] = '\0';
lvars[0].name = name;
+ LASSERT(sbi != NULL);
+ LASSERT(mdc != NULL);
+ LASSERT(osc != NULL);
+
/* Mount info */
snprintf(name, MAX_STRING_SIZE, "fs%llu", mnt_instance);
mnt_instance++;
sbi->ll_proc_root = lprocfs_register(name, parent, NULL, NULL);
- if (IS_ERR(sbi->ll_proc_root))
- RETURN(err = PTR_ERR(sbi->ll_proc_root));
-
+ if (IS_ERR(sbi->ll_proc_root)) {
+ err = PTR_ERR(sbi->ll_proc_root);
+ sbi->ll_proc_root = NULL;
+ RETURN(err);
+ }
/* Static configuration info */
err = lprocfs_add_vars(sbi->ll_proc_root, lprocfs_obd_vars, sb);
if (err)
/* MDC info */
strncpy(uuid.uuid, mdc, sizeof(uuid.uuid));
obd = class_uuid2obd(&uuid);
+
+ LASSERT(obd != NULL);
+ LASSERT(obd->obd_type != NULL);
+ LASSERT(obd->obd_type->typ_name != NULL);
+
snprintf(name, MAX_STRING_SIZE, "%s/common_name",
obd->obd_type->typ_name);
lvars[0].read_fptr = lprocfs_rd_name;
strncpy(uuid.uuid, osc, sizeof(uuid.uuid));
obd = class_uuid2obd(&uuid);
+ LASSERT(obd != NULL);
+ LASSERT(obd->obd_type != NULL);
+ LASSERT(obd->obd_type->typ_name != NULL);
+
snprintf(name, MAX_STRING_SIZE, "%s/common_name",
obd->obd_type->typ_name);
lvars[0].read_fptr = lprocfs_rd_name;
static int ll_intent_to_lock_mode(struct lookup_intent *it)
{
/* CREAT needs to be tested before open (both could be set) */
- if (it->it_op & (IT_CREAT | IT_SETATTR))
+ if (it->it_op & IT_CREAT)
return LCK_PW;
else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP))
return LCK_PR;
obd_id ino = 0;
ENTRY;
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ if (it && it->it_op == 0)
+ *it = lookup_it;
+#endif
if (it == NULL)
it = &lookup_it;
if (it->it_disposition & IT_OPEN_CREATE)
ptlrpc_request_addref(request);
+ if (it->it_disposition & IT_OPEN_OPEN)
+ ptlrpc_request_addref(request);
if (it->it_disposition & IT_OPEN_NEG)
flag = LL_LOOKUP_NEGATIVE;
flag = LL_LOOKUP_NEGATIVE;
else
flag = LL_LOOKUP_POSITIVE;
- } else if (it->it_op & (IT_GETATTR | IT_SETATTR | IT_LOOKUP)) {
+ } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
/* For check ops, we want the lookup to succeed */
it->it_data = NULL;
if (it->it_status)
list_del_init(&dentry->d_lru);
list_del_init(&dentry->d_hash);
+ __d_rehash(dentry, 0); /* avoid taking dcache_lock inside */
spin_unlock(&dcache_lock);
- d_rehash(dentry);
atomic_inc(&dentry->d_count);
iput(inode);
dentry->d_flags &= ~DCACHE_LUSTRE_INVALID;
int rc;
ENTRY;
- if (it && it->it_op == IT_TRUNC)
- it->it_op = IT_SETATTR;
-
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
rc = ll_intent_lock(parent, &dentry, it, lookup2_finish);
if (rc < 0) {
CDEBUG(D_INFO, "ll_intent_lock: %d\n", rc);
struct inode *inode;
struct ptlrpc_request *request = NULL;
struct mds_body *body;
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ time_t time = CURRENT_TIME.tv_sec;
+#else
time_t time = CURRENT_TIME;
+#endif
struct ll_sb_info *sbi = ll_i2sbi(dir);
struct ll_read_inode2_cookie lic = { .lic_lmm = NULL, };
ENTRY;
err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_EX, dir,
NULL, &lockh, NULL, 0, &data, sizeof(data));
- mdc_put_rpc_lock(&mdc_rpc_lock, &it);
request = (struct ptlrpc_request *)it.it_data;
if (err < 0)
GOTO(out, err);
int rc = 0;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
it = dentry->d_it;
rc = ll_it_open_error(IT_OPEN_CREATE, it);
int rdev)
{
struct ptlrpc_request *request = NULL;
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ time_t time = CURRENT_TIME.tv_sec;
+#else
time_t time = CURRENT_TIME;
+#endif
struct ll_sb_info *sbi = ll_i2sbi(dir);
int err = -EMLINK;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if (dir->i_nlink >= EXT2_LINK_MAX)
RETURN(err);
struct inode *inode;
int rc = 0;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
LL_GET_INTENT(dentry, it);
if ((mode & S_IFMT) == 0)
const char *tgt)
{
struct ptlrpc_request *request = NULL;
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ time_t time = CURRENT_TIME.tv_sec;
+#else
time_t time = CURRENT_TIME;
+#endif
struct ll_sb_info *sbi = ll_i2sbi(dir);
int err = -EMLINK;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if (dir->i_nlink >= EXT2_LINK_MAX)
RETURN(err);
int err = 0;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
LL_GET_INTENT(dentry, it);
inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
err = mdc_link(&sbi->ll_mdc_conn, src, dir, name, len, &request);
ptlrpc_req_finished(request);
struct inode *inode = old_dentry->d_inode;
int rc;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
LL_GET_INTENT(dentry, it);
if (it && it->it_disposition) {
if (it->it_status)
RETURN(it->it_status);
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ inode->i_ctime.tv_sec = CURRENT_TIME.tv_sec;
+#else
inode->i_ctime = CURRENT_TIME;
+#endif
ext2_inc_count(inode);
atomic_inc(&inode->i_count);
d_instantiate(dentry, inode);
if (rc)
RETURN(rc);
- inode->i_ctime = CURRENT_TIME;
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ inode->i_ctime.tv_sec = CURRENT_TIME.tv_sec;
+#else
+ inode->i_ctime = CURRENT_TIME;
+#endif
ext2_inc_count(inode);
atomic_inc(&inode->i_count);
static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
{
struct ptlrpc_request *request = NULL;
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ time_t time = CURRENT_TIME.tv_sec;
+#else
time_t time = CURRENT_TIME;
+#endif
struct ll_sb_info *sbi = ll_i2sbi(dir);
int err = -EMLINK;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if (dir->i_nlink >= EXT2_LINK_MAX)
RETURN(err);
int err = -EMLINK;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
LL_GET_INTENT(dentry, it);
if (dir->i_nlink >= EXT2_LINK_MAX)
int rc;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
rc = ll_mdc_unlink(dir, NULL, S_IFDIR, name, len);
RETURN(rc);
}
int rc;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
rc = ll_mdc_unlink(dir, NULL, S_IFREG, name, len);
RETURN(rc);
}
struct lookup_intent * it;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
LL_GET_INTENT(dentry, it);
RETURN(ll_common_unlink(dir, dentry, it, S_IFREG));
struct lookup_intent *it;
int rc;
ENTRY;
-
+
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
LL_GET_INTENT(dentry, it);
if ((!it || !it->it_disposition) && !ext2_empty_dir(inode))
struct ll_sb_info *sbi = ll_i2sbi(src);
int err;
ENTRY;
-
+
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
err = mdc_rename(&sbi->ll_mdc_conn, src, tgt,
oldname, oldlen, newname, newlen, &request);
ptlrpc_req_finished(request);
struct page * old_page;
int err;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
LL_GET_INTENT(new_dentry, it);
if (it && it->it_disposition) {
rename: ll_rename,
rename2: ll_rename2,
setattr: ll_setattr,
+ setattr_raw: ll_setattr_raw,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
revalidate: ll_inode_revalidate,
+#endif
};
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
-#include <linux/iobuf.h>
#include <linux/errno.h>
#include <linux/smp_lock.h>
#include <linux/unistd.h>
#include <asm/uaccess.h>
#include <linux/fs.h>
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+#include <linux/buffer_head.h>
+#else
+#include <linux/iobuf.h>
+#endif
#include <linux/stat.h>
#include <asm/uaccess.h>
#include <asm/segment.h>
LBUG();
if (inode->i_size <= offset) {
+ CERROR("reading beyond EOF\n");
memset(kmap(page), 0, PAGE_SIZE);
kunmap(page);
GOTO(readpage_out, rc);
}
+ /* XXX Workaround for BA OSTs returning short reads at EOF. The linux
+ * OST will return the full page, zero-filled at the end, which
+ * will just overwrite the data we set here.
+ * Bug 593 relates to fixing this properly.
+ */
+ if (inode->i_size < offset + PAGE_SIZE) {
+ int count = inode->i_size - offset;
+ void *addr = kmap(page);
+ //POISON(addr, 0x7c, count);
+ memset(addr + count, 0, PAGE_SIZE - count);
+ kunmap(page);
+ }
+
if (PageUptodate(page)) {
CERROR("Explain this please?\n");
GOTO(readpage_out, rc);
}
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
rc = ll_brw(OBD_BRW_READ, inode, page, 0);
EXIT;
oa.o_mode = inode->i_mode;
oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n",
oa.o_id, inode->i_size);
return;
} /* ll_truncate */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+//#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
unsigned to)
ENTRY;
addr = kmap(page);
- if (!PageLocked(page))
- LBUG();
+ LASSERT(PageLocked(page));
+
+ if (PageUptodate(page))
+ RETURN(0);
- if (Page_Uptodate(page))
- GOTO(prepare_done, rc);
+ //POISON(addr + from, 0xca, to - from);
/* We're completely overwriting an existing page, so _don't_ set it up
* to date until commit_write */
if (from == 0 && to == PAGE_SIZE)
RETURN(0);
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
/* If are writing to a new page, no need to read old data. If we
* haven't already gotten the file size in ll_file_write() since
struct ll_file_data *fd = file->private_data;
struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
- rc = ll_file_size(inode, lsm, &fd->fd_osthandle);
+ rc = ll_file_size(inode, lsm, fd->fd_ostdata);
if (rc)
GOTO(prepare_done, rc);
}
SetPageUptodate(page);
else
kunmap (page);
-
+
return rc;
}
* Returns the page unlocked, but with a reference.
*/
static int ll_writepage(struct page *page) {
- struct inode *inode = page->mapping->host; int err; ENTRY;
+ struct inode *inode = page->mapping->host;
+ int err;
+ ENTRY;
LASSERT(PageLocked(page));
/* XXX need to make sure we have LDLM lock on this page */
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
err = ll_brw(OBD_BRW_WRITE, inode, page, 1);
if (err)
CERROR("ll_brw failure %d\n", err);
if (!PageLocked(page))
LBUG();
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
CDEBUG(D_INODE, "commit_page writing (off "LPD64"), count %d\n",
pg.off, pg.count);
RETURN(rc);
} /* ll_commit_write */
-
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
unsigned long blocknr, int blocksize)
{
- obd_count bufs_per_obdo = iobuf->nr_pages;
struct ll_inode_info *lli = ll_i2info(inode);
struct lov_stripe_md *lsm = lli->lli_smd;
struct brw_page *pga;
struct obd_brw_set *set;
- int i, rc = 0;
+ loff_t offset;
+ int length, i, flags, rc = 0;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if (!lsm || !lsm->lsm_object_id)
RETURN(-ENOMEM);
+ /* XXX Keep here until we find ia64 problem, it crashes otherwise */
if (blocksize != PAGE_SIZE) {
CERROR("direct_IO blocksize != PAGE_SIZE\n");
RETURN(-EINVAL);
if (set == NULL)
RETURN(-ENOMEM);
- OBD_ALLOC(pga, sizeof(*pga) * bufs_per_obdo);
+ OBD_ALLOC(pga, sizeof(*pga) * iobuf->nr_pages);
if (!pga) {
obd_brw_set_free(set);
RETURN(-ENOMEM);
}
- /* NB: we can't use iobuf->maplist[i]->index for the offset
- * instead of "blocknr" because ->index contains garbage.
- */
- for (i = 0; i < bufs_per_obdo; i++, blocknr++) {
+ CDEBUG(D_PAGE, "blocksize %u, blocknr %lu, iobuf %p: nr_pages %u, "
+ "array_len %u, offset %u, length %u\n",
+ blocksize, blocknr, iobuf, iobuf->nr_pages,
+ iobuf->array_len, iobuf->offset, iobuf->length);
+
+ flags = (rw == WRITE ? OBD_BRW_CREATE : 0) /* | OBD_BRW_DIRECTIO */;
+ offset = (blocknr << inode->i_blkbits) /* + iobuf->offset? */;
+ length = iobuf->length;
+
+ for (i = 0, length = iobuf->length; length > 0;
+ length -= pga[i].count, offset += pga[i].count, i++) { /*i last!*/
pga[i].pg = iobuf->maplist[i];
- pga[i].count = PAGE_SIZE;
- pga[i].off = (obd_off)blocknr << PAGE_SHIFT;
- pga[i].flag = OBD_BRW_CREATE;
+ pga[i].off = offset;
+ /* To the end of the page, or the length, whatever is less */
+ pga[i].count = min_t(int, PAGE_SIZE - (offset & ~PAGE_MASK),
+ length);
+ pga[i].flag = flags;
+ CDEBUG(D_PAGE, "page %d (%p), offset "LPU64", count %u\n",
+ i, pga[i].pg, pga[i].off, pga[i].count);
+ if (rw == READ) {
+ //POISON(kmap(iobuf->maplist[i]), 0xc5, PAGE_SIZE);
+ //kunmap(iobuf->maplist[i]);
+ }
}
set->brw_callback = ll_brw_sync_wait;
rc = obd_brw(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
- ll_i2obdconn(inode), lsm, bufs_per_obdo, pga, set, NULL);
- if (rc)
- CERROR("error from obd_brw: rc = %d\n", rc);
- else {
+ ll_i2obdconn(inode), lsm, iobuf->nr_pages, pga, set, NULL);
+ if (rc) {
+ CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
+ "error from obd_brw: rc = %d\n", rc);
+ } else {
rc = ll_brw_sync_wait(set, CB_PHASE_START);
if (rc)
CERROR("error from callback: rc = %d\n", rc);
}
obd_brw_set_free(set);
if (rc == 0)
- rc = bufs_per_obdo * PAGE_SIZE;
+ rc = iobuf->length;
- OBD_FREE(pga, sizeof(*pga) * bufs_per_obdo);
+ OBD_FREE(pga, sizeof(*pga) * iobuf->nr_pages);
RETURN(rc);
}
+#endif
int ll_flush_inode_pages(struct inode * inode)
{
ENTRY;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0))
spin_lock(&pagecache_lock);
spin_unlock(&pagecache_lock);
+#endif
OBD_ALLOC(count, sizeof(*count) * bufs_per_obdo);
RETURN(err);
}
-#endif
+//#endif
struct address_space_operations ll_aops = {
readpage: ll_readpage,
#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0))
direct_IO: ll_direct_IO,
+#endif
writepage: ll_writepage,
sync_page: block_sync_page,
prepare_write: ll_prepare_write,
commit_write: ll_commit_write,
bmap: NULL
-#endif
+//#endif
};
struct super_operations ll_super_operations;
/* /proc/lustre/llite root that tracks llite mount points */
-struct proc_dir_entry *proc_lustre_fs_root;
+struct proc_dir_entry *proc_lustre_fs_root = NULL;
/* lproc_llite.c */
extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
struct super_block *sb,
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
OBD_ALLOC(sbi, sizeof(*sbi));
if (!sbi)
RETURN(NULL);
struct ll_fid rootfid;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
list_del(&sbi->ll_conn_chain);
ll_commitcbd_cleanup(sbi);
obd_disconnect(&sbi->ll_osc_conn);
int rc;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
rc = mdc_cancel_unused(&sbi->ll_mdc_conn, inode, LDLM_FL_NO_CALLBACK);
if (rc < 0) {
CERROR("mdc_cancel_unused: %d\n", rc);
static void ll_delete_inode(struct inode *inode)
{
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if (S_ISREG(inode->i_mode)) {
int err;
struct obdo *oa;
RETURN(err);
}
+int ll_setattr_raw(struct inode *inode, struct iattr *attr)
+{
+ struct ptlrpc_request *request = NULL;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ int err = 0;
+ ENTRY;
+
+ if ((attr->ia_valid & ATTR_SIZE)) {
+ err = vmtruncate(inode, attr->ia_size);
+ if (err)
+ RETURN(err);
+ }
+
+ /* Don't send size changes to MDS to avoid "fast EA" problems, and
+ * also avoid a pointless RPC (we get file size from OST anyways).
+ */
+ attr->ia_valid &= ~ATTR_SIZE;
+ if (!attr->ia_valid)
+ RETURN(0);
+
+ err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, NULL, 0,
+ &request);
+ if (err)
+ CERROR("mdc_setattr fails: err = %d\n", err);
+
+ ptlrpc_req_finished(request);
+
+ if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+ struct obdo oa;
+ int err2;
+
+ CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
+ inode->i_ino, attr->ia_mtime);
+ oa.o_id = lsm->lsm_object_id;
+ oa.o_mode = S_IFREG;
+ oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME;
+ oa.o_mtime = attr->ia_mtime;
+ err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
+ if (err2) {
+ CERROR("obd_setattr fails: rc=%d\n", err);
+ if (!err)
+ err = err2;
+ }
+ }
+ RETURN(err);
+}
+
int ll_setattr(struct dentry *de, struct iattr *attr)
{
int rc = inode_change_ok(de->d_inode, attr);
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if (rc)
return rc;
int rc;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
memset(sfs, 0, sizeof(*sfs));
rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
statfs_unpack(sfs, &osfs);
struct ll_inode_info *lli = ll_i2info(inode);
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
sema_init(&lli->lli_open_sem, 1);
- atomic_set(&lli->lli_open_count, 0);
LASSERT(!lli->lli_smd);
struct list_head *ctmp;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
list_for_each(ctmp, &sbi->ll_conn_chain) {
struct ptlrpc_connection *conn;
#include <linux/lprocfs_status.h>
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
kmem_cache_t *ll_file_data_slab;
extern struct address_space_operations ll_aops;
extern struct address_space_operations ll_dir_aops;
struct super_operations ll_super_operations;
/* /proc/lustre/llite root that tracks llite mount points */
-struct proc_dir_entry *proc_lustre_fs_root;
+struct proc_dir_entry *proc_lustre_fs_root = NULL;
/* lproc_llite.c */
extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
struct super_block *sb,
extern int ll_commitcbd_cleanup(struct ll_sb_info *);
int ll_read_inode2(struct inode *inode, void *opaque);
-extern int ll_proc_namespace(struct super_block* sb, char* osc, char* mdc)
+extern int ll_proc_namespace(struct super_block* sb, char* osc, char* mdc);
static char *ll_read_opt(const char *opt, char *data)
{
struct ptlrpc_connection *mdc_conn;
struct ll_read_inode2_cookie lic;
class_uuid_t uuid;
+ struct obd_uuid param_uuid;
ENTRY;
RETURN(-ENOMEM);
INIT_LIST_HEAD(&sbi->ll_conn_chain);
+ INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
generate_random_uuid(uuid);
- class_uuid_unparse(uuid, sbi->ll_sb_uuid);
+ class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
sb->s_fs_info = sbi;
GOTO(out_free, sb = NULL);
}
- obd = class_uuid2obd(mdc);
+ strncpy(param_uuid.uuid, mdc, sizeof(param_uuid.uuid));
+ obd = class_uuid2obd(¶m_uuid);
if (!obd) {
CERROR("MDC %s: not setup or attached\n", mdc);
GOTO(out_free, sb = NULL);
}
- err = obd_connect(&sbi->ll_mdc_conn, obd, sbi->ll_sb_uuid,
+ err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid,
ptlrpc_recovd, ll_recover);
if (err) {
CERROR("cannot connect to %s: rc = %d\n", mdc, err);
GOTO(out_mdc, sb = NULL);
}
- err = obd_connect(&sbi->ll_osc_conn, obd, sbi->ll_sb_uuid,
+ err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid,
ptlrpc_recovd, ll_recover);
if (err) {
CERROR("cannot connect to %s: rc = %d\n", osc, err);
sbi->ll_rootino = rootfid.id;
memset(&osfs, 0, sizeof(osfs));
- err = mdc_statfs(&sbi->ll_mdc_conn, &osfs);
+ err = obd_statfs(&sbi->ll_mdc_conn, &osfs);
sb->s_blocksize = osfs.os_bsize;
sb->s_blocksize_bits = log2(osfs.os_bsize);
sb->s_magic = LL_SUPER_MAGIC;
if (root) {
sb->s_root = d_alloc_root(root);
+ root->i_state &= ~(I_LOCK | I_NEW);
} else {
CERROR("lustre_lite: bad iget4 for root\n");
GOTO(out_cdb, sb = NULL);
static void ll_put_super(struct super_block *sb)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct list_head *tmp, *next;
struct ll_fid rootfid;
ENTRY;
}
obd_disconnect(&sbi->ll_mdc_conn);
+
+ spin_lock(&dcache_lock);
+ list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list){
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_hash);
+ shrink_dcache_parent(dentry);
+ }
+ spin_unlock(&dcache_lock);
+
OBD_FREE(sbi, sizeof(*sbi));
EXIT;
EXIT;
}
+#if 0
static void ll_delete_inode(struct inode *inode)
{
ENTRY;
clear_inode(inode);
EXIT;
}
+#endif
/* like inode_setattr, but doesn't mark the inode dirty */
static int ll_attr2inode(struct inode * inode, struct iattr * attr, int trunc)
*/
attr->ia_valid &= ~ATTR_SIZE;
if (attr->ia_valid) {
- err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, &request);
+ err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, NULL, 0,
+ &request);
if (err)
CERROR("mdc_setattr fails: err = %d\n", err);
if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
struct obdo oa;
- int err;
+ int err2;
CDEBUG(D_ERROR, "setting mtime on OST\n");
oa.o_id = lsm->lsm_object_id;
oa.o_mode = S_IFREG;
oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
- oa.o_mtime = attr->ia_mtime;
- err = obd_setattr(&sbi->ll_osc_conn, &oa, lsm);
- if (err) {
+ oa.o_mtime = attr->ia_mtime.tv_sec;
+ err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
+ if (err2) {
CERROR("obd_setattr fails: rc=%d\n", err);
- if (!rc)
- rc = err;
+ if (!err)
+ err = err2;
}
}
}
if (body->valid & OBD_MD_FLID)
inode->i_ino = body->ino;
if (body->valid & OBD_MD_FLATIME)
- inode->i_atime = body->atime;
+ inode->i_atime.tv_sec = body->atime;
if (body->valid & OBD_MD_FLMTIME)
- inode->i_mtime = body->mtime;
+ inode->i_mtime.tv_sec = body->mtime;
if (body->valid & OBD_MD_FLCTIME)
- inode->i_ctime = body->ctime;
+ inode->i_ctime.tv_sec = body->ctime;
if (body->valid & OBD_MD_FLMODE)
inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
if (body->valid & OBD_MD_FLTYPE)
spin_lock(&conn->c_lock);
conn->c_flags |= CONN_INVALID;
- invalidate_request_list(&conn->c_sending_head);
+ /*invalidate_request_list(&conn->c_sending_head);*/
invalidate_request_list(&conn->c_delayed_head);
spin_unlock(&conn->c_lock);
}
alloc_inode: ll_alloc_inode,
destroy_inode: ll_destroy_inode,
clear_inode: ll_clear_inode,
- delete_inode: ll_delete_inode,
+// delete_inode: ll_delete_inode,
put_super: ll_put_super,
statfs: ll_statfs,
umount_begin: ll_umount_begin
};
+
struct file_system_type lustre_lite_fs_type = {
.owner = THIS_MODULE,
.name = "lustre_lite",
.get_sb = ll_get_sb,
- .kill_sb = kill_litter_super,
+ .kill_sb = kill_anon_super,
};
static int __init init_lustre_lite(void)
#include <linux/mm.h>
#include <linux/stat.h>
#include <linux/smp_lock.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
#define DEBUG_SUBSYSTEM S_LLITE
#include <linux/lustre_lite.h>
int rc;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
/* on symlinks lli_open_sem protects lli_symlink_name allocation/data */
down(&lli->lli_open_sem);
rc = ll_readlink_internal(inode, &request, &symname);
RETURN(rc);
}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
static int ll_follow_link(struct dentry *dentry, struct nameidata *nd,
struct lookup_intent *it)
{
char *symname;
ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
if (it != NULL) {
op = it->it_op;
mode = it->it_mode;
RETURN(rc);
}
+#else
+static int ll_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct inode *inode = dentry->d_inode;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ptlrpc_request *request;
+ int op = 0, mode = 0, rc;
+ char *symname;
+ ENTRY;
+
+ op = nd->it.it_op;
+ mode = nd->it.it_mode;
+
+ ll_intent_release(dentry, &nd->it);
+
+ down(&lli->lli_open_sem);
+
+ rc = ll_readlink_internal(inode, &request, &symname);
+ if (rc)
+ GOTO(out, rc);
+
+ nd->it.it_op = op;
+ nd->it.it_mode = mode;
+
+ rc = vfs_follow_link(nd, symname);
+ out:
+ up(&lli->lli_open_sem);
+ ptlrpc_req_finished(request);
+
+ RETURN(rc);
+}
+#endif
extern int ll_inode_revalidate(struct dentry *dentry);
extern int ll_setattr(struct dentry *de, struct iattr *attr);
struct inode_operations ll_fast_symlink_inode_operations = {
readlink: ll_readlink,
setattr: ll_setattr,
+ setattr_raw: ll_setattr_raw,
follow_link2: ll_follow_link,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
revalidate: ll_inode_revalidate
+#endif
};
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
#include <linux/swapctl.h>
+#endif
#include <linux/proc_fs.h>
#include <linux/slab.h>
#include <linux/stat.h>
DEFS=
+if LIBLUSTRE
+lib_LIBRARIES = liblov.a
+LINX=client.c
+liblov_a_SOURCES = lov_obd.c lov_pack.c $(LINX)
+else
MODULE = lov
modulefs_DATA = lov.o
EXTRA_PROGRAMS = lov
LINX=client.c
-
lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c $(LINX)
+endif
+
client.c:
test -e client.c || ln -sf $(top_srcdir)/lib/client.c
#define EXPORT_SYMTAB
#define DEBUG_SUBSYSTEM S_LOV
-
+#ifdef __KERNEL__
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <asm/div64.h>
+#else
+#include <liblustre.h>
+#endif
+
#include <linux/obd_support.h>
#include <linux/lustre_lib.h>
#include <linux/lustre_net.h>
#include <linux/lustre_mds.h>
#include <linux/obd_class.h>
#include <linux/obd_lov.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <asm/div64.h>
#include <linux/lprocfs_status.h>
-
static kmem_cache_t *lov_file_cache;
struct lov_file_handles {
struct list_head lfh_list;
__u64 lfh_cookie;
int lfh_count;
- struct lustre_handle *lfh_handles;
+ char *lfh_data; /* an array of opaque data saved on behalf of
+ * each osc, FD_OSTDATA_SIZE bytes for each */
};
struct lov_lock_handles {
struct lov_obd *lov = &obd->u.lov;
struct client_obd *mdc = &lov->mdcobd->u.cli;
struct lov_desc *desc = &lov->desc;
+ struct lov_tgt_desc *tgts;
struct obd_export *exp;
struct lustre_handle mdc_conn;
struct obd_uuid lov_mds_uuid = {"LOV_MDS_UUID"};
- struct obd_uuid uuid;
char *tmp;
int rc, rc2, i;
ENTRY;
memcpy(desc, lustre_msg_buf(req->rq_repmsg, 0), sizeof(*desc));
lov_unpackdesc(desc);
- if (req->rq_repmsg->buflens[1] < sizeof(uuid.uuid)*desc->ld_tgt_count){
+ if (req->rq_repmsg->buflens[1] <
+ sizeof(desc->ld_uuid.uuid) * desc->ld_tgt_count){
CERROR("LOV desc: invalid uuid array returned\n");
GOTO(out_conn, rc = -EINVAL);
}
}
tmp = lustre_msg_buf(req->rq_repmsg, 1);
- for (i = 0; i < desc->ld_tgt_count; i++) {
- struct obd_device *tgt;
+ for (i = 0, tgts = lov->tgts; i < desc->ld_tgt_count; i++, tgts++) {
+ struct obd_uuid *uuid = &tgts->uuid;
+ struct obd_device *tgt_obd;
struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" };
- strncpy(uuid.uuid, tmp, sizeof(uuid.uuid));
- memcpy(&lov->tgts[i].uuid, &uuid, sizeof(uuid));
- tgt = client_tgtuuid2obd(&uuid);
- tmp += sizeof(uuid.uuid);
+ obd_str2uuid(uuid, tmp);
+ tgt_obd = client_tgtuuid2obd(uuid);
+ tmp += sizeof(uuid->uuid);
- if (!tgt) {
- CERROR("Target %s not attached\n", uuid.uuid);
+ if (!tgt_obd) {
+ CERROR("Target %s not attached\n", uuid->uuid);
GOTO(out_disc, rc = -EINVAL);
}
- if (!(tgt->obd_flags & OBD_SET_UP)) {
- CERROR("Target %s not set up\n", uuid.uuid);
+ if (!(tgt_obd->obd_flags & OBD_SET_UP)) {
+ CERROR("Target %s not set up\n", uuid->uuid);
GOTO(out_disc, rc = -EINVAL);
}
- rc = obd_connect(&lov->tgts[i].conn, tgt, &lov_osc_uuid, recovd,
+ rc = obd_connect(&tgts->conn, tgt_obd, &lov_osc_uuid, recovd,
recover);
if (rc) {
- CERROR("Target %s connect error %d\n", uuid.uuid,
- rc);
+ CERROR("Target %s connect error %d\n", uuid->uuid, rc);
GOTO(out_disc, rc);
}
- rc = obd_iocontrol(IOC_OSC_REGISTER_LOV, &lov->tgts[i].conn,
- sizeof(struct obd_device *), obd, NULL);
+ rc = obd_iocontrol(IOC_OSC_REGISTER_LOV, &tgts->conn,
+ sizeof(struct obd_device *), obd, NULL);
if (rc) {
CERROR("Target %s REGISTER_LOV error %d\n",
- uuid.uuid, rc);
+ uuid->uuid, rc);
+ obd_disconnect(&tgts->conn);
GOTO(out_disc, rc);
}
desc->ld_active_tgt_count++;
- lov->tgts[i].active = 1;
+ tgts->active = 1;
}
mdc->cl_max_mds_easize = obd_size_wiremd(conn, NULL);
RETURN(rc);
out_disc:
- i--; /* skip failed-connect OSC */
while (i-- > 0) {
- desc->ld_active_tgt_count--;
- lov->tgts[i].active = 0;
- memcpy(&uuid, &lov->tgts[i].uuid, sizeof(uuid));
- rc2 = obd_disconnect(&lov->tgts[i].conn);
+ struct obd_uuid uuid;
+ --tgts;
+ --desc->ld_active_tgt_count;
+ tgts->active = 0;
+ obd_str2uuid(&uuid, tgts->uuid.uuid);
+ rc2 = obd_disconnect(&tgts->conn);
if (rc2)
CERROR("error: LOV target %s disconnect on OST idx %d: "
"rc = %d\n", uuid.uuid, i, rc2);
CERROR("discarding open LOV handle %p:"LPX64"\n",
lfh, lfh->lfh_cookie);
list_del(&lfh->lfh_list);
- OBD_FREE(lfh->lfh_handles,
- lfh->lfh_count * sizeof(*lfh->lfh_handles));
- kmem_cache_free(lov_file_cache, lfh);
+ OBD_FREE(lfh->lfh_data, lfh->lfh_count * FD_OSTDATA_SIZE);
+ PORTAL_SLAB_FREE(lfh, lov_file_cache, sizeof(*lfh));
}
spin_unlock(&exp->exp_lov_data.led_lock);
if (!lsm_new)
GOTO(out_cleanup, rc = -ENOMEM);
memcpy(lsm_new, lsm, size);
+ lsm_new->lsm_stripe_count = obj_alloc;
+
/* XXX LOV STACKING call into osc for sizes */
OBD_FREE(lsm, lov_stripe_md_size(lsm->lsm_stripe_count));
lsm = lsm_new;
memcpy(&tmp, oa, sizeof(tmp));
tmp.o_id = loi->loi_id;
if (lfh)
- memcpy(obdo_handle(&tmp), &lfh->lfh_handles[i],
- sizeof(lfh->lfh_handles[i]));
+ memcpy(obdo_handle(&tmp),
+ lfh->lfh_data + i * FD_OSTDATA_SIZE,
+ FD_OSTDATA_SIZE);
else
tmp.o_valid &= ~OBD_MD_FLHANDLE;
err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, &tmp,
memcpy(&tmp, oa, sizeof(tmp));
tmp.o_id = loi->loi_id;
if (lfh)
- memcpy(obdo_handle(&tmp), &lfh->lfh_handles[i],
- sizeof(lfh->lfh_handles[i]));
+ memcpy(obdo_handle(&tmp),
+ lfh->lfh_data + i * FD_OSTDATA_SIZE,
+ FD_OSTDATA_SIZE);
else
tmp.o_valid &= ~OBD_MD_FLHANDLE;
obdo_cpy_md(tmp, oa, oa->o_valid);
if (lfh)
- memcpy(obdo_handle(tmp), &lfh->lfh_handles[i],
- sizeof(lfh->lfh_handles[i]));
+ memcpy(obdo_handle(tmp),
+ lfh->lfh_data + i * FD_OSTDATA_SIZE,
+ FD_OSTDATA_SIZE);
else
tmp->o_valid &= ~OBD_MD_FLHANDLE;
if (!tmp)
RETURN(-ENOMEM);
- lfh = kmem_cache_alloc(lov_file_cache, GFP_KERNEL);
+ PORTAL_SLAB_ALLOC(lfh, lov_file_cache, sizeof(*lfh));
if (!lfh)
GOTO(out_tmp, rc = -ENOMEM);
- OBD_ALLOC(lfh->lfh_handles,
- lsm->lsm_stripe_count * sizeof(*lfh->lfh_handles));
- if (!lfh->lfh_handles)
+ OBD_ALLOC(lfh->lfh_data, lsm->lsm_stripe_count * FD_OSTDATA_SIZE);
+ if (!lfh->lfh_data)
GOTO(out_lfh, rc = -ENOMEM);
lov = &export->exp_obd->u.lov;
rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp,
NULL, NULL);
if (rc) {
- if (lov->tgts[loi->loi_ost_idx].active) {
- CERROR("error: open objid "LPX64" subobj "LPX64
- " on OST idx %d: rc = %d\n",
- oa->o_id, lsm->lsm_oinfo[i].loi_id,
- loi->loi_ost_idx, rc);
- goto out_handles;
- }
- continue;
+ if (!lov->tgts[loi->loi_ost_idx].active)
+ continue;
+ CERROR("error: open objid "LPX64" subobj "LPX64
+ " on OST idx %d: rc = %d\n",
+ oa->o_id, lsm->lsm_oinfo[i].loi_id,
+ loi->loi_ost_idx, rc);
+ goto out_handles;
}
lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &set);
if (tmp->o_valid & OBD_MD_FLHANDLE)
- memcpy(&lfh->lfh_handles[i], obdo_handle(tmp),
- sizeof(lfh->lfh_handles[i]));
+ memcpy(lfh->lfh_data + i * FD_OSTDATA_SIZE,
+ obdo_handle(tmp), FD_OSTDATA_SIZE);
}
handle = obdo_handle(oa);
memcpy(tmp, oa, sizeof(*tmp));
tmp->o_id = loi->loi_id;
- memcpy(obdo_handle(tmp), &lfh->lfh_handles[i],
- sizeof(lfh->lfh_handles[i]));
+ memcpy(obdo_handle(tmp), lfh->lfh_data + i * FD_OSTDATA_SIZE,
+ FD_OSTDATA_SIZE);
err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp,
NULL, NULL);
}
}
- OBD_FREE(lfh->lfh_handles,
- lsm->lsm_stripe_count * sizeof(*lfh->lfh_handles));
+ OBD_FREE(lfh->lfh_data, lsm->lsm_stripe_count * FD_OSTDATA_SIZE);
out_lfh:
- lfh->lfh_cookie = DEAD_HANDLE_MAGIC;
- kmem_cache_free(lov_file_cache, lfh);
+ PORTAL_SLAB_FREE(lfh, lov_file_cache, sizeof(*lfh));
goto out_tmp;
}
memcpy(&tmp, oa, sizeof(tmp));
tmp.o_id = loi->loi_id;
if (lfh)
- memcpy(obdo_handle(&tmp), &lfh->lfh_handles[i],
- sizeof(lfh->lfh_handles[i]));
+ memcpy(obdo_handle(&tmp),
+ lfh->lfh_data + i * FD_OSTDATA_SIZE,
+ FD_OSTDATA_SIZE);
else
tmp.o_valid &= ~OBD_MD_FLHANDLE;
}
}
if (lfh) {
+ spin_lock(&export->exp_lov_data.led_lock);
list_del(&lfh->lfh_list);
- OBD_FREE(lfh->lfh_handles,
- lsm->lsm_stripe_count * sizeof(*lfh->lfh_handles));
- lfh->lfh_cookie = DEAD_HANDLE_MAGIC;
- kmem_cache_free(lov_file_cache, lfh);
+ spin_unlock(&export->exp_lov_data.led_lock);
+
+ OBD_FREE(lfh->lfh_data, lsm->lsm_stripe_count*FD_OSTDATA_SIZE);
+ PORTAL_SLAB_FREE(lfh, lov_file_cache, sizeof(*lfh));
}
RETURN(rc);
memcpy(&tmp, oa, sizeof(tmp));
tmp.o_id = loi->loi_id;
if (lfh)
- memcpy(obdo_handle(&tmp), &lfh->lfh_handles[i],
- sizeof(lfh->lfh_handles[i]));
+ memcpy(obdo_handle(&tmp),
+ lfh->lfh_data + i * FD_OSTDATA_SIZE,
+ FD_OSTDATA_SIZE);
else
tmp.o_valid &= ~OBD_MD_FLHANDLE;
lockh->addr = (__u64)(unsigned long)lov_lockh;
lockh->cookie = lov_lockh->llh_cookie;
lov_lockhp = lov_lockh->llh_handles;
- } else
+ } else {
lov_lockhp = lockh;
+ }
lov = &export->exp_obd->u.lov;
for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
struct lov_stripe_md submd;
int err;
- if (lov_lockhp->addr == 0 ||
+ if (lov_lockhp->cookie == 0 ||
lov->tgts[loi->loi_ost_idx].active == 0)
continue;
sizeof(*lov_lockh->llh_handles) *
lsm->lsm_stripe_count);
}
- lockh->addr = 0;
lockh->cookie = DEAD_HANDLE_MAGIC;
RETURN(rc);
struct lov_stripe_md submd;
int err;
- if (lov_lockhp->addr == 0) {
+ if (lov_lockhp->cookie == 0) {
CDEBUG(D_HA, "lov idx %d no lock?\n", loi->loi_ost_idx);
continue;
}
sizeof(*lov_lockh->llh_handles) *
lsm->lsm_stripe_count);
}
- lockh->addr = 0;
lockh->cookie = DEAD_HANDLE_MAGIC;
RETURN(rc);
o_iocontrol: lov_iocontrol
};
-static int __init lov_init(void)
+int __init lov_init(void)
{
struct lprocfs_static_vars lvars;
int rc;
class_unregister_type(OBD_LOV_DEVICENAME);
}
+#ifdef __KERNEL__
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver");
MODULE_LICENSE("GPL");
module_init(lov_init);
module_exit(lov_exit);
+#endif
*/
#define DEBUG_SUBSYSTEM S_LLITE
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
#include <linux/lustre_net.h>
#include <linux/obd.h>
lmm.lmm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
- if (lmm.lmm_stripe_count > lov->desc.ld_tgt_count) {
+#if 0 /* the stripe_count/offset is "advisory", and it gets fixed later */
+ if (lmm.lmm_stripe_count > lov->desc.ld_tgt_count &&
+ lmm.lmm_stripe_count != 0xffffffff) {
CERROR("stripe count %u more than OST count %d\n",
lmm.lmm_stripe_count, lov->desc.ld_tgt_count);
RETURN(-EINVAL);
lmm.lmm_stripe_offset, lov->desc.ld_tgt_count);
RETURN(-EINVAL);
}
+#endif
if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) {
CERROR("stripe size %u not multiple of %lu\n",
lmm.lmm_stripe_size, PAGE_SIZE);
RETURN(-EINVAL);
}
- if ((__u64)lmm.lmm_stripe_size * lmm.lmm_stripe_count > ~0UL) {
+ stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count);
+
+ if ((__u64)lmm.lmm_stripe_size * stripe_count > ~0UL) {
CERROR("stripe width %ux%u > %lu on 32-bit system\n",
lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL);
RETURN(-EINVAL);
}
- stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count);
-
/* XXX LOV STACKING call into osc for sizes */
OBD_ALLOC(lsm, lov_stripe_md_size(stripe_count));
if (!lsm)
*/
#define DEBUG_SUBSYSTEM S_CLASS
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
#include <linux/lprocfs_status.h>
#include <linux/obd_class.h>
void *data)
{
struct obd_device *dev = (struct obd_device *)data;
- struct lov_desc *desc = &dev->u.lov.desc;
+ struct lov_desc *desc;
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
*eof = 1;
return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size);
}
void *data)
{
struct obd_device *dev = (struct obd_device *)data;
- struct lov_desc *desc = &dev->u.lov.desc;
+ struct lov_desc *desc;
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
*eof = 1;
return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset);
}
void *data)
{
struct obd_device* dev = (struct obd_device*)data;
- struct lov_desc *desc = &dev->u.lov.desc;
+ struct lov_desc *desc;
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
*eof = 1;
return snprintf(page, count, "%u\n", desc->ld_pattern);
}
void *data)
{
struct obd_device *dev = (struct obd_device *)data;
- struct lov_desc *desc = &dev->u.lov.desc;
+ struct lov_desc *desc;
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
*eof = 1;
return snprintf(page, count, "%u\n", desc->ld_default_stripe_count);
}
void *data)
{
struct obd_device *dev = (struct obd_device*)data;
- struct lov_desc *desc = &dev->u.lov.desc;
+ struct lov_desc *desc;
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
*eof = 1;
return snprintf(page, count, "%u\n", desc->ld_tgt_count);
void *data)
{
struct obd_device* dev = (struct obd_device*)data;
- struct lov_desc *desc = &dev->u.lov.desc;
+ struct lov_desc *desc;
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
*eof = 1;
return snprintf(page, count, "%u\n", desc->ld_active_tgt_count);
}
{
struct obd_device *dev = (struct obd_device*) data;
int len = 0, i;
- struct lov_obd *lov = &dev->u.lov;
- struct lov_tgt_desc *tgts = lov->tgts;
+ struct lov_obd *lov;
+ struct lov_tgt_desc *tgts;
+
+ LASSERT(dev != NULL);
+ lov = &dev->u.lov;
+ tgts = lov->tgts;
+ LASSERT(tgts != NULL);
for (i = 0; i < lov->desc.ld_tgt_count; i++, tgts++) {
int cur;
int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data)
{
struct obd_device *dev = (struct obd_device*) data;
- struct lov_obd *lov = &dev->u.lov;
+ struct lov_obd *lov;
+ LASSERT(dev != NULL);
+ lov = &dev->u.lov;
*eof = 1;
return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid);
}
*/
#define DEBUG_SUBSYSTEM S_CLASS
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
#include <linux/obd_class.h>
#include <linux/lprocfs_status.h>
#include <linux/obd_class.h>
#include <linux/lustre_mds.h>
-extern struct semaphore mdc_sem;
-
+/* mdc_setattr does its own semaphore handling */
static int mdc_reint(struct ptlrpc_request *request, int level)
{
int rc;
if (!(*opcodeptr == REINT_SETATTR))
mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
-
rc = ptlrpc_queue_wait(request);
if (!(*opcodeptr == REINT_SETATTR))
mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
return rc;
}
+/* If mdc_setattr is called with an 'iattr', then it is a normal RPC that
+ * should take the normal semaphore and go to the normal portal.
+ *
+ * If it is called with iattr->ia_valid & ATTR_FROM_OPEN, then it is a
+ * magic open-path setattr that should take the setattr semaphore and
+ * go to the setattr portal. */
int mdc_setattr(struct lustre_handle *conn, struct inode *inode,
struct iattr *iattr, void *ea, int ealen,
struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
struct mds_rec_setattr *rec;
+ struct mdc_rpc_lock *rpc_lock;
int rc, bufcount = 1, size[2] = {sizeof(*rec), ealen};
ENTRY;
+ LASSERT(iattr != NULL);
+
if (ealen > 0)
bufcount = 2;
if (!req)
RETURN(-ENOMEM);
- /* XXX FIXME bug 249 */
- req->rq_request_portal = MDS_GETATTR_PORTAL;
+ if (iattr->ia_valid & ATTR_FROM_OPEN) {
+ req->rq_request_portal = MDS_SETATTR_PORTAL; //XXX FIXME bug 249
+ rpc_lock = &mdc_setattr_lock;
+ } else
+ rpc_lock = &mdc_rpc_lock;
mds_setattr_pack(req, inode, iattr, ea, ealen);
size[0] = sizeof(struct mds_body);
req->rq_replen = lustre_msg_size(1, size);
+ mdc_get_rpc_lock(rpc_lock, NULL);
rc = mdc_reint(req, LUSTRE_CONN_FULL);
+ mdc_put_rpc_lock(rpc_lock, NULL);
+
*request = req;
if (rc == -ERESTARTSYS)
rc = 0;
extern int mds_queue_req(struct ptlrpc_request *);
struct mdc_rpc_lock mdc_rpc_lock;
+struct mdc_rpc_lock mdc_setattr_lock;
EXPORT_SYMBOL(mdc_rpc_lock);
/* Helper that implements most of mdc_getstatus and signal_completed_replay. */
if (!req)
GOTO(out, rc = -ENOMEM);
- /* XXX FIXME bug 249 */
- req->rq_request_portal = MDS_GETATTR_PORTAL;
-
body = lustre_msg_buf(req->rq_reqmsg, 0);
ll_ino2fid(&body->fid1, ino, 0, type);
body->valid = valid;
/* XXX what tells us that 'data' is a valid inode at all?
* we should probably validate the lock handle first?
*/
-
inode = igrab(inode);
- if (inode == NULL) /* inode->i_state & I_FREEING */
+ if (inode == NULL) /* inode->i_state & I_FREEING */
break;
if (S_ISDIR(inode->i_mode)) {
ll_invalidate_inode_pages(inode);
}
- if (inode != inode->i_sb->s_root->d_inode)
+ if (inode->i_sb->s_root &&
+ inode != inode->i_sb->s_root->d_inode)
d_unhash_aliases(inode);
iput(inode);
lit->opc = NTOH__u64((__u64)it->it_op);
/* pack the intended request */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
mds_open_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
current->fsgid, CURRENT_TIME, it->it_flags,
de->d_name.name, de->d_name.len, tgt, tgtlen);
+#else
+ mds_open_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
+ current->fsgid, CURRENT_TIME.tv_sec, it->it_flags,
+ de->d_name.name, de->d_name.len, tgt, tgtlen);
+#endif
req->rq_replen = lustre_msg_size(3, repsize);
} else if (it->it_op & IT_UNLINK) {
size[2] = sizeof(struct mds_rec_unlink);
d->unl_de, d->unl_mode,
d->unl_name, d->unl_len);
req->rq_replen = lustre_msg_size(3, repsize);
- } else if (it->it_op & (IT_GETATTR| IT_SETATTR | IT_LOOKUP)) {
+ } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
size[2] = sizeof(struct mds_body);
size[3] = de->d_name.len + 1;
lock_type, NULL, 0, lock_mode, &flags,
ldlm_completion_ast, mdc_blocking_ast, dir, NULL,
lockh);
+ mdc_put_rpc_lock(&mdc_rpc_lock, it);
/* If we successfully created, mark the request so that replay will
* do the right thing */
lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
}
- dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
-
/* This can go when we're sure that this can never happen */
LASSERT(rc != -ENOENT);
if (rc == ELDLM_LOCK_ABORTED) {
/* The server almost certainly gave us a lock other than the
* one that we asked for. If we already have a matching lock,
* then cancel this one--we don't need two. */
- LDLM_DEBUG0(lock, "matching against this");
+ LDLM_DEBUG(lock, "matching against this");
memcpy(&lockh2, lockh, sizeof(lockh2));
if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL,
LDLM_LOCK_PUT(lock);
}
+ dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
it->it_disposition = (int) dlm_rep->lock_policy_res1;
it->it_status = (int) dlm_rep->lock_policy_res2;
it->it_lock_mode = lock_mode;
void mdc_set_open_replay_data(struct ll_file_data *fd)
{
+ struct ptlrpc_request *req = fd->fd_req;
+ struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, 2);
+ struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
+
+ memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
fd->fd_req->rq_replay_cb = mdc_replay_open;
fd->fd_req->rq_replay_data = &fd->fd_mdshandle;
}
req->rq_replen = lustre_msg_size(0, NULL);
+ mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
rc = ptlrpc_queue_wait(req);
+ mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
EXIT;
out:
struct ptlrpc_bulk_desc *desc = NULL;
struct ptlrpc_bulk_page *bulk = NULL;
struct mds_body *body;
- unsigned long flags;
int rc, size = sizeof(*body);
ENTRY;
if (!req)
GOTO(out2, rc = -ENOMEM);
+ /* XXX FIXME bug 249 */
+ req->rq_request_portal = MDS_READPAGE_PORTAL;
+
bulk = ptlrpc_prep_bulk_page(desc);
if (bulk == NULL)
GOTO(out2, rc = -ENOMEM);
- spin_lock_irqsave(&imp->imp_lock, flags);
- bulk->bp_xid = ++imp->imp_last_bulk_xid;
- spin_unlock_irqrestore(&imp->imp_lock, flags);
+ bulk->bp_xid = ptlrpc_next_xid();
bulk->bp_buflen = PAGE_CACHE_SIZE;
bulk->bp_buf = addr;
if (rc)
GOTO(check_rc, rc);
} else if (flags & MSG_CONNECT_RECONNECT) {
- DEBUG_REQ(D_HA, req, "reconnecting to MDS\n");
+ DEBUG_REQ(D_HA, req, "reconnecting to MDS");
/* Nothing else to do here. */
} else {
- DEBUG_REQ(D_HA, req, "evicted: invalidating\n");
+ DEBUG_REQ(D_HA, req, "evicted: invalidating");
/* Otherwise, clean everything up. */
ldlm_namespace_cleanup(ns, 1);
ptlrpc_abort_inflight(imp, 0);
ptlrpc_req_finished(req);
spin_lock_irqsave(&imp->imp_lock, flags);
imp->imp_level = LUSTRE_CONN_FULL;
- imp->imp_flags &= ~IMP_INVALID;
spin_unlock_irqrestore(&imp->imp_lock, flags);
ptlrpc_wake_delayed(imp);
{
struct lprocfs_static_vars lvars;
mdc_init_rpc_lock(&mdc_rpc_lock);
+ mdc_init_rpc_lock(&mdc_setattr_lock);
lprocfs_init_vars(&lvars);
return class_register_type(&mdc_obd_ops, lvars.module_vars,
LUSTRE_MDC_NAME);
#include <linux/init.h>
#include <linux/obd_class.h>
#include <linux/random.h>
-#include <linux/locks.h>
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/workqueue.h>
+#include <linux/mount.h>
+#else
+#include <linux/locks.h>
#endif
#include <linux/obd_lov.h>
#include <linux/lustre_mds.h>
extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count,
struct obd_uuid *uuidarray);
extern int mds_get_lovdesc(struct mds_obd *obd, struct lov_desc *desc);
-extern void mds_start_transno(struct mds_obd *mds);
-extern int mds_finish_transno(struct mds_obd *mds, void *handle,
- struct ptlrpc_request *req, int rc);
+int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
+ struct ptlrpc_request *req, int rc, int disp);
static int mds_cleanup(struct obd_device * obddev);
inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req)
return result;
}
-static void mds_abort_recovery(void *data);
/* Establish a connection to the MDS.
*
struct obd_export *exp;
struct mds_export_data *med;
struct mds_client_data *mcd;
- struct mds_obd *mds = &obd->u.mds;
int rc;
ENTRY;
RETURN(-EINVAL);
/* Check for aborted recovery. */
- spin_lock_bh(&mds->mds_processing_task_lock);
+ spin_lock_bh(&obd->obd_processing_task_lock);
if (obd->obd_flags & OBD_ABORT_RECOVERY)
- mds_abort_recovery(mds);
- spin_unlock_bh(&mds->mds_processing_task_lock);
+ target_abort_recovery(obd);
+ spin_unlock_bh(&obd->obd_processing_task_lock);
/* XXX There is a small race between checking the list and adding a
* new connection for the same UUID, but the real threat (list
mfd->mfd_servercookie = DEAD_HANDLE_MAGIC;
kmem_cache_free(mds_file_cache, mfd);
- if (file->f_dentry->d_parent)
+ if (file->f_dentry->d_parent) {
+ LASSERT(atomic_read(&file->f_dentry->d_parent->d_count));
de = dget(file->f_dentry->d_parent);
+ }
rc = filp_close(file, 0);
if (de)
l_dput(de);
spin_unlock(&med->med_open_lock);
ldlm_cancel_locks_for_export(export);
+ if (med->med_outstanding_reply) {
+ /* Fake the ack, so the locks get cancelled. */
+ med->med_outstanding_reply->rq_flags &= ~PTL_RPC_FL_WANT_ACK;
+ med->med_outstanding_reply->rq_flags |= PTL_RPC_FL_ERR;
+ wake_up(&med->med_outstanding_reply->rq_wait_for_rep);
+ med->med_outstanding_reply = NULL;
+ }
mds_client_free(export);
rc = class_disconnect(conn);
struct lustre_handle lockh;
int rc;
- LDLM_DEBUG0(lock, "already unused, calling ldlm_cli_cancel");
+ LDLM_DEBUG(lock, "already unused, calling ldlm_cli_cancel");
ldlm_lock2handle(lock, &lockh);
rc = ldlm_cli_cancel(&lockh);
if (rc < 0)
CERROR("ldlm_cli_cancel: %d\n", rc);
} else {
- LDLM_DEBUG0(lock, "Lock still has references, will be "
- "cancelled later");
+ LDLM_DEBUG(lock, "Lock still has references, will be "
+ "cancelled later");
}
RETURN(0);
}
rc = 0;
}
- return rc;
+ RETURN(rc);
}
static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry,
return(rc);
}
+/* This is more copy-and-paste from getattr_name than I'd like. */
+static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
+ struct lustre_handle *client_lockh)
+{
+ struct mds_export_data *med = &req->rq_export->exp_mds_data;
+ struct mds_client_data *mcd = med->med_mcd;
+ struct obd_device *obd = req->rq_export->exp_obd;
+ struct mds_obd *mds = mds_req2mds(req);
+ struct dentry *parent, *child;
+ struct mds_body *body;
+ struct inode *dir;
+ struct obd_run_ctxt saved;
+ struct obd_ucred uc;
+ int namelen, rc = 0;
+ char *name;
+
+ req->rq_transno = mcd->mcd_last_transno;
+ req->rq_status = mcd->mcd_last_result;
+
+ if (med->med_outstanding_reply)
+ mds_steal_ack_locks(med, req);
+
+ if (req->rq_status)
+ return;
+
+ body = lustre_msg_buf(req->rq_reqmsg, offset);
+ name = lustre_msg_buf(req->rq_reqmsg, offset + 1);
+ namelen = req->rq_reqmsg->buflens[offset + 1];
+ /* requests were at offset 2, replies go back at 1 */
+ if (offset)
+ offset = 1;
+
+ uc.ouc_fsuid = body->fsuid;
+ uc.ouc_fsgid = body->fsgid;
+ uc.ouc_cap = body->capability;
+ uc.ouc_suppgid1 = body->suppgid;
+ uc.ouc_suppgid2 = -1;
+ push_ctxt(&saved, &mds->mds_ctxt, &uc);
+ parent = mds_fid2dentry(mds, &body->fid1, NULL);
+ LASSERT(!IS_ERR(parent));
+ dir = parent->d_inode;
+ LASSERT(dir);
+ child = lookup_one_len(name, parent, namelen - 1);
+ LASSERT(!IS_ERR(child));
+
+ if (!med->med_outstanding_reply) {
+ /* XXX need to enqueue client lock */
+ LBUG();
+ }
+
+ if (req->rq_repmsg == NULL)
+ mds_getattr_pack_msg(req, child->d_inode, offset);
+
+ rc = mds_getattr_internal(obd, child, req, body, offset);
+ LASSERT(!rc);
+ l_dput(child);
+ l_dput(parent);
+}
+
static int mds_getattr_name(int offset, struct ptlrpc_request *req,
struct lustre_handle *child_lockh)
{
- struct ldlm_intent *it = lustre_msg_buf(req->rq_reqmsg, 1);
- int lock_mode;
struct mds_obd *mds = mds_req2mds(req);
struct obd_device *obd = req->rq_export->exp_obd;
struct obd_run_ctxt saved;
struct obd_ucred uc;
struct ldlm_res_id child_res_id = { .name = {0} };
struct lustre_handle parent_lockh;
- int namelen, flags = 0, rc = 0;
+ int namelen, flags = 0, rc = 0, cleanup_phase = 0;
char *name;
ENTRY;
LASSERT(!strcmp(obd->obd_type->typ_name, "mds"));
+ MDS_CHECK_RESENT(req,
+ reconstruct_getattr_name(offset, req, child_lockh));
+
if (req->rq_reqmsg->bufcount <= offset + 1) {
LBUG();
- GOTO(out_pre_de, rc = -EINVAL);
+ GOTO(cleanup, rc = -EINVAL);
}
body = lustre_msg_buf(req->rq_reqmsg, offset);
uc.ouc_fsuid = body->fsuid;
uc.ouc_fsgid = body->fsgid;
uc.ouc_cap = body->capability;
- uc.ouc_suppgid = body->suppgid;
+ uc.ouc_suppgid1 = body->suppgid;
+ uc.ouc_suppgid2 = -1;
push_ctxt(&saved, &mds->mds_ctxt, &uc);
/* Step 1: Lookup/lock parent */
de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_PR,
&parent_lockh);
if (IS_ERR(de))
- GOTO(out_pre_de, rc = PTR_ERR(de));
+ GOTO(cleanup, rc = PTR_ERR(de));
dir = de->d_inode;
LASSERT(dir);
+ cleanup_phase = 1; /* parent dentry and lock */
+
CDEBUG(D_INODE, "parent ino %lu, name %*s\n", dir->i_ino,namelen,name);
/* Step 2: Lookup child */
dchild = lookup_one_len(name, de, namelen - 1);
if (IS_ERR(dchild)) {
CDEBUG(D_INODE, "child lookup error %ld\n", PTR_ERR(dchild));
- GOTO(out_step_1, rc = PTR_ERR(dchild));
- } else if (dchild->d_inode == NULL) {
- GOTO(out_step_2, rc = -ENOENT);
+ GOTO(cleanup, rc = PTR_ERR(dchild));
+ }
+
+ cleanup_phase = 2; /* child dentry */
+
+ if (dchild->d_inode == NULL) {
+ GOTO(cleanup, rc = -ENOENT);
}
/* Step 3: Lock child */
- if (it->opc == IT_SETATTR)
- lock_mode = LCK_PW;
- else
- lock_mode = LCK_PR;
child_res_id.name[0] = dchild->d_inode->i_ino;
child_res_id.name[1] = dchild->d_inode->i_generation;
rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
- child_res_id, LDLM_PLAIN, NULL, 0, lock_mode,
+ child_res_id, LDLM_PLAIN, NULL, 0, LCK_PR,
&flags, ldlm_completion_ast, mds_blocking_ast,
NULL, NULL, child_lockh);
if (rc != ELDLM_OK) {
CERROR("ldlm_cli_enqueue: %d\n", rc);
- GOTO(out_step_2, rc = -EIO);
+ GOTO(cleanup, rc = -EIO);
}
+ cleanup_phase = 3; /* child lock */
+
if (req->rq_repmsg == NULL)
mds_getattr_pack_msg(req, dchild->d_inode, offset);
rc = mds_getattr_internal(obd, dchild, req, body, offset);
- if (rc)
- GOTO(out_step_3, rc);
- GOTO(out_step_2, rc); /* returns the lock to the client */
- out_step_3:
- ldlm_lock_decref(child_lockh, LCK_PR);
- out_step_2:
- l_dput(dchild);
- out_step_1:
- ldlm_lock_decref(&parent_lockh, LCK_PR);
- l_dput(de);
- out_pre_de:
+ GOTO(cleanup, rc); /* returns the lock to the client */
+
+ cleanup:
+ rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, NULL,
+ req, rc, 0);
+ switch (cleanup_phase) {
+ case 3:
+ if (rc)
+ ldlm_lock_decref(child_lockh, LCK_PR);
+ case 2:
+ l_dput(dchild);
+
+ case 1:
+ if (rc) {
+ ldlm_lock_decref(&parent_lockh, LCK_PR);
+ } else {
+ memcpy(&req->rq_ack_locks[0].lock, &parent_lockh,
+ sizeof(parent_lockh));
+ req->rq_ack_locks[0].mode = LCK_PR;
+ }
+ l_dput(de);
+ default: ;
+ }
req->rq_status = rc;
pop_ctxt(&saved, &mds->mds_ctxt, &uc);
return rc;
uc.ouc_fsgid = body->fsgid;
uc.ouc_cap = body->capability;
push_ctxt(&saved, &mds->mds_ctxt, &uc);
- mds_start_transno(mds);
handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR);
if (IS_ERR(handle)) {
rc = PTR_ERR(handle);
- mds_finish_transno(mds, handle, req, rc);
GOTO(out_ea, rc);
}
rc = fsfilt_set_md(obd, inode,handle,lmm,lmm_size);
- rc = mds_finish_transno(mds, handle, req, rc);
-
- rc2 = fsfilt_commit(obd, inode, handle);
- if (rc2 && !rc)
- rc = rc2;
+ rc = mds_finish_transno(mds, inode, handle, req, rc, 0);
out_ea:
pop_ctxt(&saved, &mds->mds_ctxt, &uc);
#endif
+static void reconstruct_close(struct ptlrpc_request *req)
+{
+ struct mds_export_data *med = &req->rq_export->exp_mds_data;
+ struct mds_client_data *mcd = med->med_mcd;
+
+ req->rq_transno = mcd->mcd_last_transno;
+ req->rq_status = mcd->mcd_last_result;
+
+ /* XXX When open-unlink is working, we'll need to steal ack locks as
+ * XXX well, and make sure that we do the right unlinking after we
+ * XXX get the ack back.
+ */
+}
+
static int mds_close(struct ptlrpc_request *req)
{
struct mds_export_data *med = &req->rq_export->exp_mds_data;
int rc;
ENTRY;
+ MDS_CHECK_RESENT(req, reconstruct_close(req));
+
body = lustre_msg_buf(req->rq_reqmsg, 0);
mfd = mds_handle2mfd(&body->handle);
return rc;
}
-/* forward declaration */
-int mds_handle(struct ptlrpc_request *req);
-
-static void abort_delayed_replies(struct mds_obd *mds)
-{
- struct ptlrpc_request *req;
- struct list_head *tmp, *n;
- list_for_each_safe(tmp, n, &mds->mds_delayed_reply_queue) {
- req = list_entry(tmp, struct ptlrpc_request, rq_list);
- DEBUG_REQ(D_ERROR, req, "aborted:");
- req->rq_status = -ENOTCONN;
- req->rq_type = PTL_RPC_MSG_ERR;
- ptlrpc_reply(req->rq_svc, req);
- list_del(&req->rq_list);
- OBD_FREE(req, sizeof *req);
- }
-}
-
-static void mds_abort_recovery(void *data)
-{
- struct mds_obd *mds = data;
- struct obd_device *obd = list_entry(mds, struct obd_device, u.mds);
- CERROR("disconnecting clients and aborting recovery\n");
- mds->mds_recoverable_clients = 0;
- obd->obd_flags &= ~(OBD_RECOVERING | OBD_ABORT_RECOVERY);
- abort_delayed_replies(mds);
- spin_unlock_bh(&mds->mds_processing_task_lock);
- class_disconnect_all(obd);
- spin_lock_bh(&mds->mds_processing_task_lock);
-}
-
-static void mds_recovery_expired(unsigned long castmeharder)
-{
- struct mds_obd *mds = (struct mds_obd *)castmeharder;
- struct obd_device *obd = list_entry(mds, struct obd_device, u.mds);
- CERROR("recovery timed out, aborting\n");
- spin_lock_bh(&mds->mds_processing_task_lock);
- obd->obd_flags |= OBD_ABORT_RECOVERY;
- wake_up(&mds->mds_next_transno_waitq);
- spin_unlock_bh(&mds->mds_processing_task_lock);
-}
-
-static void reset_recovery_timer(struct mds_obd *mds)
-{
- CDEBUG(D_ERROR, "timer will expire in %ld seconds\n",
- MDS_RECOVERY_TIMEOUT / HZ);
- mod_timer(&mds->mds_recovery_timer, jiffies + MDS_RECOVERY_TIMEOUT);
-}
-
-static void start_recovery_timer(struct mds_obd *mds)
-{
- mds->mds_recovery_timer.function = mds_recovery_expired;
- mds->mds_recovery_timer.data = (unsigned long)mds;
- init_timer(&mds->mds_recovery_timer);
- reset_recovery_timer(mds);
-}
-
-static void cancel_recovery_timer(struct mds_obd *mds)
-{
- del_timer(&mds->mds_recovery_timer);
-}
-
-static int check_for_next_transno(struct mds_obd *mds)
-{
- struct ptlrpc_request *req;
- struct obd_device *obd = list_entry(mds, struct obd_device, u.mds);
- req = list_entry(mds->mds_recovery_queue.next,
- struct ptlrpc_request, rq_list);
- LASSERT(req->rq_reqmsg->transno >= mds->mds_next_recovery_transno);
-
- return req->rq_reqmsg->transno == mds->mds_next_recovery_transno ||
- (obd->obd_flags & OBD_RECOVERING) == 0;
-}
-
-static void process_recovery_queue(struct mds_obd *mds)
-{
- struct ptlrpc_request *req;
- struct obd_device *obd = list_entry(mds, struct obd_device, u.mds);
- int aborted = 0;
- ENTRY;
-
- for (;;) {
- spin_lock_bh(&mds->mds_processing_task_lock);
- LASSERT(mds->mds_processing_task == current->pid);
- req = list_entry(mds->mds_recovery_queue.next,
- struct ptlrpc_request, rq_list);
-
- if (req->rq_reqmsg->transno != mds->mds_next_recovery_transno) {
- spin_unlock_bh(&mds->mds_processing_task_lock);
- CDEBUG(D_HA, "Waiting for transno "LPD64" (1st is "
- LPD64")\n",
- mds->mds_next_recovery_transno,
- req->rq_reqmsg->transno);
- wait_event(mds->mds_next_transno_waitq,
- check_for_next_transno(mds));
- spin_lock_bh(&mds->mds_processing_task_lock);
- if (obd->obd_flags & OBD_ABORT_RECOVERY) {
- mds_abort_recovery(mds);
- aborted = 1;
- }
- spin_unlock_bh(&mds->mds_processing_task_lock);
- if (aborted)
- return;
- continue;
- }
- list_del_init(&req->rq_list);
- spin_unlock_bh(&mds->mds_processing_task_lock);
-
- DEBUG_REQ(D_ERROR, req, "processing: ");
- (void)mds_handle(req);
- reset_recovery_timer(mds);
- mds_fsync_super(mds->mds_sb);
- OBD_FREE(req, sizeof *req);
- spin_lock_bh(&mds->mds_processing_task_lock);
- mds->mds_next_recovery_transno++;
- if (list_empty(&mds->mds_recovery_queue)) {
- mds->mds_processing_task = 0;
- spin_unlock_bh(&mds->mds_processing_task_lock);
- break;
- }
- spin_unlock_bh(&mds->mds_processing_task_lock);
- }
- EXIT;
-}
-
-static int queue_recovery_request(struct ptlrpc_request *req,
- struct mds_obd *mds)
-{
- struct list_head *tmp;
- int inserted = 0;
- __u64 transno = req->rq_reqmsg->transno;
- struct ptlrpc_request *saved_req;
-
- if (!transno) {
- INIT_LIST_HEAD(&req->rq_list);
- DEBUG_REQ(D_HA, req, "not queueing");
- return 1;
- }
-
- spin_lock_bh(&mds->mds_processing_task_lock);
-
- if (mds->mds_processing_task == current->pid) {
- /* Processing the queue right now, don't re-add. */
- LASSERT(list_empty(&req->rq_list));
- spin_unlock_bh(&mds->mds_processing_task_lock);
- return 1;
- }
-
- OBD_ALLOC(saved_req, sizeof *saved_req);
- if (!saved_req)
- LBUG();
- memcpy(saved_req, req, sizeof *req);
- req = saved_req;
- INIT_LIST_HEAD(&req->rq_list);
-
- /* XXX O(n^2) */
- list_for_each(tmp, &mds->mds_recovery_queue) {
- struct ptlrpc_request *reqiter =
- list_entry(tmp, struct ptlrpc_request, rq_list);
-
- if (reqiter->rq_reqmsg->transno > transno) {
- list_add_tail(&req->rq_list, &reqiter->rq_list);
- inserted = 1;
- break;
- }
- }
-
- if (!inserted) {
- list_add_tail(&req->rq_list, &mds->mds_recovery_queue);
- }
-
- if (mds->mds_processing_task != 0) {
- /* Someone else is processing this queue, we'll leave it to
- * them.
- */
- if (transno == mds->mds_next_recovery_transno)
- wake_up(&mds->mds_next_transno_waitq);
- spin_unlock_bh(&mds->mds_processing_task_lock);
- return 0;
- }
-
- /* Nobody is processing, and we know there's (at least) one to process
- * now, so we'll do the honours.
- */
- mds->mds_processing_task = current->pid;
- spin_unlock_bh(&mds->mds_processing_task_lock);
-
- process_recovery_queue(mds);
- return 0;
-}
-
static int filter_recovery_request(struct ptlrpc_request *req,
- struct mds_obd *mds, int *process)
+ struct obd_device *obd, int *process)
{
switch (req->rq_reqmsg->opc) {
case MDS_CONNECT: /* This will never get here, but for completeness. */
case MDS_GETSTATUS: /* used in unmounting */
case MDS_REINT:
case LDLM_ENQUEUE:
- *process = queue_recovery_request(req, mds);
+ *process = target_queue_recovery_request(req, obd);
RETURN(0);
default:
}
}
-static int mds_queue_final_reply(struct ptlrpc_request *req, int rc)
-{
- struct mds_obd *mds = mds_req2mds(req);
- struct obd_device *mds_obd = list_entry(mds, struct obd_device, u.mds);
- struct ptlrpc_request *saved_req;
-
- spin_lock_bh(&mds->mds_processing_task_lock);
- if (rc) {
- /* Just like ptlrpc_error, but without the sending. */
- lustre_pack_msg(0, NULL, NULL, &req->rq_replen,
- &req->rq_repmsg);
- req->rq_type = PTL_RPC_MSG_ERR;
- }
-
- LASSERT(list_empty(&req->rq_list));
- OBD_ALLOC(saved_req, sizeof *saved_req);
- memcpy(saved_req, req, sizeof *saved_req);
- req = saved_req;
- list_add(&req->rq_list, &mds->mds_delayed_reply_queue);
- if (--mds->mds_recoverable_clients == 0) {
- struct list_head *tmp, *n;
- ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace);
- CDEBUG(D_ERROR,
- "all clients recovered, sending delayed replies\n");
- mds_obd->obd_flags &= ~OBD_RECOVERING;
- list_for_each_safe(tmp, n, &mds->mds_delayed_reply_queue) {
- req = list_entry(tmp, struct ptlrpc_request, rq_list);
- DEBUG_REQ(D_ERROR, req, "delayed:");
- ptlrpc_reply(req->rq_svc, req);
- list_del(&req->rq_list);
- OBD_FREE(req, sizeof *req);
- }
- cancel_recovery_timer(mds);
- } else {
- CERROR("%d recoverable clients remain\n",
- mds->mds_recoverable_clients);
- }
-
- spin_unlock_bh(&mds->mds_processing_task_lock);
- return 1;
-}
-
static char *reint_names[] = {
[REINT_SETATTR] "setattr",
[REINT_CREATE] "create",
[REINT_OPEN] "open",
};
+void mds_steal_ack_locks(struct mds_export_data *med,
+ struct ptlrpc_request *req)
+{
+ struct ptlrpc_request *oldrep = med->med_outstanding_reply;
+ memcpy(req->rq_ack_locks, oldrep->rq_ack_locks,
+ sizeof req->rq_ack_locks);
+ oldrep->rq_flags |= PTL_RPC_FL_RESENT;
+ wake_up(&oldrep->rq_wait_for_rep);
+ DEBUG_REQ(D_HA, oldrep, "stole locks from");
+ DEBUG_REQ(D_HA, req, "stole locks for");
+}
+
+static void mds_send_reply(struct ptlrpc_request *req, int rc)
+{
+ int i;
+ struct ptlrpc_req_ack_lock *ack_lock;
+ struct l_wait_info lwi;
+ struct mds_export_data *med =
+ (req->rq_export && req->rq_ack_locks[0].mode) ?
+ &req->rq_export->exp_mds_data : NULL;
+
+ if (med) {
+ med->med_outstanding_reply = req;
+ req->rq_flags |= PTL_RPC_FL_WANT_ACK;
+ init_waitqueue_head(&req->rq_wait_for_rep);
+ }
+
+ if (!OBD_FAIL_CHECK(OBD_FAIL_MDS_ALL_REPLY_NET | OBD_FAIL_ONCE)) {
+ if (rc) {
+ DEBUG_REQ(D_ERROR, req, "processing error (%d)", rc);
+ ptlrpc_error(req->rq_svc, req);
+ } else {
+ DEBUG_REQ(D_NET, req, "sending reply");
+ ptlrpc_reply(req->rq_svc, req);
+ }
+ } else {
+ obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED;
+ DEBUG_REQ(D_ERROR, req, "dropping reply");
+ if (!med && req->rq_repmsg)
+ OBD_FREE(req->rq_repmsg, req->rq_replen);
+ }
+
+ if (!med) {
+ DEBUG_REQ(D_HA, req, "not waiting for ack");
+ return;
+ }
+
+ lwi = LWI_TIMEOUT(obd_timeout / 2 * HZ, NULL, NULL);
+ rc = l_wait_event(req->rq_wait_for_rep,
+ (req->rq_flags & PTL_RPC_FL_WANT_ACK) == 0 ||
+ (req->rq_flags & PTL_RPC_FL_RESENT),
+ &lwi);
+
+ if (req->rq_flags & PTL_RPC_FL_RESENT) {
+ /* The client resent this request, so abort the
+ * waiting-ack portals stuff, and don't decref the
+ * locks.
+ */
+ DEBUG_REQ(D_HA, req, "resent: not cancelling locks");
+ ptlrpc_abort(req);
+ return;
+ }
+
+ if (rc == -ETIMEDOUT) {
+ ptlrpc_abort(req);
+ recovd_conn_fail(req->rq_export->exp_connection);
+ DEBUG_REQ(D_HA, req, "cancelling locks for timeout");
+ } else {
+ DEBUG_REQ(D_HA, req, "cancelling locks for ack");
+ }
+
+ med->med_outstanding_reply = NULL;
+
+ for (ack_lock = req->rq_ack_locks, i = 0; i < 4; i++, ack_lock++) {
+ if (!ack_lock->mode)
+ break;
+ ldlm_lock_decref(&ack_lock->lock, ack_lock->mode);
+ }
+}
+
int mds_handle(struct ptlrpc_request *req)
{
int should_process, rc;
struct mds_obd *mds = NULL; /* quell gcc overwarning */
- struct obd_device *mds_obd = NULL;
+ struct obd_device *obd = NULL;
ENTRY;
rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
}
med = &req->rq_export->exp_mds_data;
- mds_obd = req->rq_export->exp_obd;
- mds = &mds_obd->u.mds;
- spin_lock_bh(&mds->mds_processing_task_lock);
- if (mds_obd->obd_flags & OBD_ABORT_RECOVERY)
- mds_abort_recovery(mds);
- spin_unlock_bh(&mds->mds_processing_task_lock);
-
- if (mds_obd->obd_flags & OBD_RECOVERING) {
- rc = filter_recovery_request(req, mds, &should_process);
+ obd = req->rq_export->exp_obd;
+ mds = &obd->u.mds;
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ if (obd->obd_flags & OBD_ABORT_RECOVERY)
+ target_abort_recovery(obd);
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+
+ if (obd->obd_flags & OBD_RECOVERING) {
+ rc = filter_recovery_request(req, obd, &should_process);
if (rc || !should_process)
RETURN(rc);
- } else if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
- if (req->rq_xid == med->med_last_xid) {
- DEBUG_REQ(D_HA, req, "resending reply");
- OBD_ALLOC(req->rq_repmsg, med->med_last_replen);
- req->rq_replen = med->med_last_replen;
- memcpy(req->rq_repmsg, med->med_last_reply,
- req->rq_replen);
- ptlrpc_reply(req->rq_svc, req);
- return 0;
- }
- DEBUG_REQ(D_HA, req, "no reply for resend, continuing");
}
-
}
switch (req->rq_reqmsg->opc) {
case MDS_CONNECT:
DEBUG_REQ(D_INODE, req, "connect");
OBD_FAIL_RETURN(OBD_FAIL_MDS_CONNECT_NET, 0);
- rc = target_handle_connect(req);
+ rc = target_handle_connect(req, mds_handle);
/* Make sure that last_rcvd is correct. */
if (!rc) {
/* Now that we have an export, set mds. */
struct lustre_handle lockh;
DEBUG_REQ(D_INODE, req, "getattr_name");
OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NAME_NET, 0);
+
+ /* If this request gets a reconstructed reply, we won't be
+ * acquiring any new locks in mds_getattr_name, so we don't
+ * want to cancel.
+ */
+ lockh.addr = 0;
rc = mds_getattr_name(0, req, &lockh);
- if (rc == 0)
+ if (rc == 0 && lockh.addr)
ldlm_lock_decref(&lockh, LCK_PR);
break;
}
break;
case MDS_READPAGE:
- DEBUG_REQ(D_INODE, req, "readpage\n");
+ DEBUG_REQ(D_INODE, req, "readpage");
OBD_FAIL_RETURN(OBD_FAIL_MDS_READPAGE_NET, 0);
rc = mds_readpage(req);
DEBUG_REQ(D_IOCTL, req,
"not sending last_committed update");
}
- CDEBUG(D_INFO, "last_transno %Lu, last_committed %Lu, xid %d\n",
- (unsigned long long)mds->mds_last_rcvd,
- (unsigned long long)obd->obd_last_committed,
- cpu_to_le32(req->rq_xid));
+ CDEBUG(D_INFO, "last_transno "LPU64", last_committed "LPU64
+ ", xid "LPU64"\n",
+ mds->mds_last_transno, obd->obd_last_committed,
+ NTOH__u64(req->rq_xid));
}
out:
if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) {
- if (mds_obd && (mds_obd->obd_flags & OBD_RECOVERING)) {
+ if (obd && (obd->obd_flags & OBD_RECOVERING)) {
DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply");
- return mds_queue_final_reply(req, rc);
+ return target_queue_final_reply(req, rc);
}
/* Lost a race with recovery; let the error path DTRT. */
rc = req->rq_status = -ENOTCONN;
}
- if (req->rq_export && mds_obd &&
- (mds_obd->obd_flags & OBD_RECOVERING) == 0) {
- struct mds_export_data *med = &req->rq_export->exp_mds_data;
- if (med->med_last_reply)
- OBD_FREE(med->med_last_reply, med->med_last_replen);
- OBD_ALLOC(med->med_last_reply, req->rq_replen);
- med->med_last_replen = req->rq_replen;
- med->med_last_xid = req->rq_xid;
- memcpy(med->med_last_reply, req->rq_repmsg, req->rq_replen);
- /* XXX serialize */
- }
-
- if (!OBD_FAIL_CHECK(OBD_FAIL_MDS_ALL_REPLY_NET | OBD_FAIL_ONCE)) {
- if (rc) {
- DEBUG_REQ(D_ERROR, req, "processing error (%d)", rc);
- ptlrpc_error(req->rq_svc, req);
- } else {
- DEBUG_REQ(D_NET, req, "sending reply");
- ptlrpc_reply(req->rq_svc, req);
- }
- } else {
- obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED;
- DEBUG_REQ(D_ERROR, req, "dropping reply");
- if (req->rq_repmsg)
- OBD_FREE(req->rq_repmsg, req->rq_replen);
- }
-
+ mds_send_reply(req, rc);
return 0;
}
* then the server last_rcvd value may be less than that of the clients.
* This will alert us that we may need to do client recovery.
*
- * Also assumes for mds_last_rcvd that we are not modifying it (no locking).
+ * Also assumes for mds_last_transno that we are not modifying it (no locking).
*/
int mds_update_server_data(struct mds_obd *mds)
{
int rc;
push_ctxt(&saved, &mds->mds_ctxt, NULL);
- msd->msd_last_rcvd = cpu_to_le64(mds->mds_last_rcvd);
+ msd->msd_last_transno = cpu_to_le64(mds->mds_last_transno);
msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count);
- CDEBUG(D_SUPER, "MDS mount_count is %Lu, last_rcvd is %Lu\n",
+ CDEBUG(D_SUPER, "MDS mount_count is %Lu, last_transno is %Lu\n",
(unsigned long long)mds->mds_mount_count,
- (unsigned long long)mds->mds_last_rcvd);
+ (unsigned long long)mds->mds_last_transno);
rc = lustre_fwrite(filp, (char *)msd, sizeof(*msd), &off);
if (rc != sizeof(*msd)) {
CERROR("error writing MDS server data: rc = %d\n", rc);
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
rc = fsync_dev(filp->f_dentry->d_inode->i_rdev);
#else
- rc = file_fsync(filp, filp->f_dentry, 1);
+ rc = file_fsync(filp, filp->f_dentry, 1);
#endif
if (rc)
CERROR("error flushing MDS server data: rc = %d\n", rc);
if (!mds->mds_sb)
GOTO(err_put, rc = -ENODEV);
- init_MUTEX(&mds->mds_transno_sem);
+ spin_lock_init(&mds->mds_transno_lock);
mds->mds_max_mdsize = sizeof(struct lov_mds_md);
rc = mds_fs_setup(obddev, mnt);
if (rc) {
GOTO(err_put, rc);
}
- if (obddev->obd_flags & OBD_RECOVERING)
- start_recovery_timer(mds);
-
obddev->obd_namespace =
ldlm_namespace_new("mds_server", LDLM_NAMESPACE_SERVER);
if (obddev->obd_namespace == NULL) {
ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
"mds_ldlm_client", &obddev->obd_ldlm_client);
- spin_lock_init(&mds->mds_processing_task_lock);
- mds->mds_processing_task = 0;
mds->mds_has_lov_desc = 0;
- INIT_LIST_HEAD(&mds->mds_recovery_queue);
- INIT_LIST_HEAD(&mds->mds_delayed_reply_queue);
- init_waitqueue_head(&mds->mds_next_transno_waitq);
RETURN(0);
lock_kernel();
err_ops:
fsfilt_put_ops(obddev->obd_fsops);
- RETURN(rc);
+ return rc;
}
static int mds_cleanup(struct obd_device *obddev)
RETURN(0);
}
+inline void fixup_handle_for_resent_req(struct ptlrpc_request *req,
+ struct lustre_handle *lockh)
+{
+ struct mds_export_data *med = &req->rq_export->exp_mds_data;
+ struct mds_client_data *mcd = med->med_mcd;
+ struct ptlrpc_request *oldrep = med->med_outstanding_reply;
+ struct ldlm_reply *dlm_rep;
+
+ if ((lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) &&
+ (mcd->mcd_last_xid == req->rq_xid) && (oldrep != NULL)) {
+ DEBUG_REQ(D_HA, req, "restoring lock handle from %p", oldrep);
+ dlm_rep = lustre_msg_buf(oldrep->rq_repmsg, 0);
+ lockh->addr = dlm_rep->lock_handle.addr;
+ lockh->cookie = dlm_rep->lock_handle.cookie;
+ }
+}
+
static int ldlm_intent_policy(struct ldlm_namespace *ns,
struct ldlm_lock **lockp, void *req_cookie,
ldlm_mode_t mode, int flags, void *data)
rep = lustre_msg_buf(req->rq_repmsg, 0);
rep->lock_policy_res1 = IT_INTENT_EXEC;
+ fixup_handle_for_resent_req(req, &lockh);
+
/* execute policy */
switch ((long)it->opc) {
case IT_OPEN:
case IT_GETATTR:
case IT_LOOKUP:
case IT_READDIR:
- case IT_SETATTR:
rc = mds_getattr_name(offset, req, &lockh);
/* FIXME: we need to sit down and decide on who should
* set req->rq_status, who should return negative and
}
if (flags & LDLM_FL_INTENT_ONLY) {
- LDLM_DEBUG0(lock, "INTENT_ONLY, aborting lock");
+ LDLM_DEBUG(lock, "INTENT_ONLY, aborting lock");
RETURN(ELDLM_LOCK_ABORTED);
}
* to get. */
new_lock = ldlm_handle2lock(&lockh);
LASSERT(new_lock != NULL);
- mds_body = lustre_msg_buf(req->rq_repmsg, 1);
*lockp = new_lock;
+ rep->lock_policy_res2 = req->rq_status;
+
+ if (new_lock->l_export == req->rq_export) {
+ /* Already gave this to the client, which means that we
+ * reconstructed a reply. */
+ LASSERT(lustre_msg_get_flags(req->rq_reqmsg) &
+ MSG_RESENT);
+ RETURN(ELDLM_LOCK_REPLACED);
+ }
+
/* Fixup the lock to be given to the client */
l_lock(&new_lock->l_resource->lr_namespace->ns_lock);
LASSERT(new_lock->l_readers + new_lock->l_writers == 1);
LDLM_LOCK_PUT(new_lock);
l_unlock(&new_lock->l_resource->lr_namespace->ns_lock);
- rep->lock_policy_res2 = req->rq_status;
-
RETURN(ELDLM_LOCK_REPLACED);
} else {
int size = sizeof(struct ldlm_reply);
static int mdt_setup(struct obd_device *obddev, obd_count len, void *buf)
{
struct mds_obd *mds = &obddev->u.mds;
- struct obd_uuid uuid = { "self" };
int i, rc = 0;
ENTRY;
mds->mds_service = ptlrpc_init_svc(MDS_NEVENTS, MDS_NBUFS,
MDS_BUFSIZE, MDS_MAXREQSIZE,
MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
- &uuid, mds_handle, "mds");
+ mds_handle, "mds");
if (!mds->mds_service) {
CERROR("failed to start service\n");
RETURN(rc = -ENOMEM);
}
}
- mds->mds_getattr_service =
+ mds->mds_setattr_service =
ptlrpc_init_svc(MDS_NEVENTS, MDS_NBUFS,
MDS_BUFSIZE, MDS_MAXREQSIZE,
- MDS_GETATTR_PORTAL, MDC_REPLY_PORTAL,
- &uuid, mds_handle, "mds");
- if (!mds->mds_getattr_service) {
+ MDS_SETATTR_PORTAL, MDC_REPLY_PORTAL,
+ mds_handle, "mds");
+ if (!mds->mds_setattr_service) {
CERROR("failed to start getattr service\n");
GOTO(err_thread, rc = -ENOMEM);
}
for (i = 0; i < MDT_NUM_THREADS; i++) {
char name[32];
sprintf(name, "ll_mdt_attr_%02d", i);
- rc = ptlrpc_start_thread(obddev, mds->mds_getattr_service,
+ rc = ptlrpc_start_thread(obddev, mds->mds_setattr_service,
name);
if (rc) {
- CERROR("cannot start MDT getattr thread #%d: rc %d\n",
+ CERROR("cannot start MDT setattr thread #%d: rc %d\n",
i, rc);
GOTO(err_thread2, rc);
}
}
+ mds->mds_readpage_service =
+ ptlrpc_init_svc(MDS_NEVENTS, MDS_NBUFS,
+ MDS_BUFSIZE, MDS_MAXREQSIZE,
+ MDS_READPAGE_PORTAL, MDC_REPLY_PORTAL,
+ mds_handle, "mds");
+ if (!mds->mds_readpage_service) {
+ CERROR("failed to start readpage service\n");
+ GOTO(err_thread2, rc = -ENOMEM);
+ }
+
+ for (i = 0; i < MDT_NUM_THREADS; i++) {
+ char name[32];
+ sprintf(name, "ll_mdt_rdpg_%02d", i);
+ rc = ptlrpc_start_thread(obddev, mds->mds_readpage_service,
+ name);
+ if (rc) {
+ CERROR("cannot start MDT readpage thread #%d: rc %d\n",
+ i, rc);
+ GOTO(err_thread3, rc);
+ }
+ }
+
RETURN(0);
+err_thread3:
+ ptlrpc_stop_all_threads(mds->mds_readpage_service);
+ ptlrpc_unregister_service(mds->mds_readpage_service);
err_thread2:
- ptlrpc_stop_all_threads(mds->mds_getattr_service);
- ptlrpc_unregister_service(mds->mds_getattr_service);
+ ptlrpc_stop_all_threads(mds->mds_setattr_service);
+ ptlrpc_unregister_service(mds->mds_setattr_service);
err_thread:
ptlrpc_stop_all_threads(mds->mds_service);
ptlrpc_unregister_service(mds->mds_service);
struct mds_obd *mds = &obddev->u.mds;
ENTRY;
- ptlrpc_stop_all_threads(mds->mds_getattr_service);
- ptlrpc_unregister_service(mds->mds_getattr_service);
+ ptlrpc_stop_all_threads(mds->mds_readpage_service);
+ ptlrpc_unregister_service(mds->mds_readpage_service);
+
+ ptlrpc_stop_all_threads(mds->mds_setattr_service);
+ ptlrpc_unregister_service(mds->mds_setattr_service);
ptlrpc_stop_all_threads(mds->mds_service);
ptlrpc_unregister_service(mds->mds_service);
*/
#define DEBUG_SUBSYSTEM S_CLASS
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
#include <linux/lustre_lite.h>
#include <linux/lustre_fsfilt.h>
#include <linux/lprocfs_status.h>
int lprocfs_mds_statfs(void *data, struct statfs *sfs)
{
struct obd_device* dev = (struct obd_device*) data;
- struct mds_obd *mds = &dev->u.mds;
+ struct mds_obd *mds;
+
+ LASSERT(dev != NULL);
+ mds = &dev->u.mds;
return vfs_statfs(mds->mds_sb, sfs);
}
{
struct obd_device *obd = (struct obd_device *)data;
+ LASSERT(obd != NULL);
+ LASSERT(obd->obd_fsops != NULL);
+ LASSERT(obd->obd_fsops->fs_type != NULL);
return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
}
#include <linux/module.h>
#include <linux/kmod.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <linux/mount.h>
+#endif
#include <linux/lustre_mds.h>
#include <linux/obd_class.h>
#include <linux/obd_support.h>
med->med_mcd->mcd_uuid, med->med_off);
}
- if (med->med_last_reply) {
- OBD_FREE(med->med_last_reply, med->med_last_replen);
- med->med_last_reply = NULL;
- }
OBD_FREE(med->med_mcd, sizeof(*med->med_mcd));
return 0;
loff_t off = 0;
int cl_off;
unsigned long last_rcvd_size = f->f_dentry->d_inode->i_size;
- __u64 last_rcvd = 0;
+ __u64 last_transno = 0;
__u64 last_mount;
int rc = 0;
last_rcvd_size, (last_rcvd_size - MDS_LR_CLIENT)/MDS_LR_SIZE);
/*
- * When we do a clean MDS shutdown, we save the last_rcvd into
- * the header. If we find clients with higher last_rcvd values
- * then those clients may need recovery done.
+ * When we do a clean MDS shutdown, we save the last_transno into
+ * the header.
*/
- last_rcvd = le64_to_cpu(msd->msd_last_rcvd);
- mds->mds_last_rcvd = last_rcvd;
- CDEBUG(D_INODE, "got "LPU64" for server last_rcvd value\n", last_rcvd);
+ last_transno = le64_to_cpu(msd->msd_last_transno);
+ mds->mds_last_transno = last_transno;
+ CDEBUG(D_INODE, "got "LPU64" for server last_rcvd value\n",
+ last_transno);
last_mount = le64_to_cpu(msd->msd_mount_count);
mds->mds_mount_count = last_mount;
continue;
}
- last_rcvd = le64_to_cpu(mcd->mcd_last_rcvd);
+ last_transno = le64_to_cpu(mcd->mcd_last_transno);
/* These exports are cleaned up by mds_disconnect(), so they
* need to be set up like real exports as mds_connect() does.
spin_lock_init(&med->med_open_lock);
mcd = NULL;
- mds->mds_recoverable_clients++;
+ obddev->obd_recoverable_clients++;
} else {
CDEBUG(D_INFO,
"discarded client %d, UUID '%s', count %Ld\n",
}
CDEBUG(D_OTHER, "client at offset %d has last_rcvd = %Lu\n",
- cl_off, (unsigned long long)last_rcvd);
+ cl_off, (unsigned long long)last_transno);
- if (last_rcvd > mds->mds_last_rcvd)
- mds->mds_last_rcvd = last_rcvd;
+ if (last_transno > mds->mds_last_transno)
+ mds->mds_last_transno = last_transno;
}
- obddev->obd_last_committed = mds->mds_last_rcvd;
- if (mds->mds_recoverable_clients) {
- CERROR("RECOVERY: %d recoverable clients, last_rcvd "LPU64"\n",
- mds->mds_recoverable_clients, mds->mds_last_rcvd);
- mds->mds_next_recovery_transno = obddev->obd_last_committed + 1;
+ obddev->obd_last_committed = mds->mds_last_transno;
+ if (obddev->obd_recoverable_clients) {
+ CERROR("RECOVERY: %d recoverable clients, last_transno "
+ LPU64"\n",
+ obddev->obd_recoverable_clients, mds->mds_last_transno);
+ obddev->obd_next_recovery_transno = obddev->obd_last_committed
+ + 1;
obddev->obd_flags |= OBD_RECOVERING;
}
case OBD_IOC_SET_READONLY:
CERROR("setting device %s read-only\n",
ll_bdevname(obd->u.mds.mds_sb->s_dev));
+#ifdef CONFIG_DEV_RDONLY
dev_set_rdonly(obd->u.mds.mds_sb->s_dev, 2);
+#endif
RETURN(0);
default:
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * lustre/mds/handler.c
- * Lustre Metadata Server (mds) request handler
- *
- * Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ * Copyright (c) 2003 Cluster File Systems, Inc.
* Author: Peter Braam <braam@clusterfs.com>
* Author: Andreas Dilger <adilger@clusterfs.com>
* Author: Phil Schwan <phil@clusterfs.com>
#include <linux/init.h>
#include <linux/obd_class.h>
#include <linux/random.h>
-#include <linux/locks.h>
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
#include <linux/buffer_head.h>
#include <linux/workqueue.h>
+#else
+#include <linux/locks.h>
#endif
#include <linux/obd_lov.h>
#include <linux/lustre_mds.h>
extern kmem_cache_t *mds_file_cache;
extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
-extern void mds_start_transno(struct mds_obd *mds);
-extern int mds_finish_transno(struct mds_obd *mds, void *handle,
- struct ptlrpc_request *req, int rc);
+int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
+ struct ptlrpc_request *req, int rc, __u32 op_data);
extern int enqueue_ordered_locks(int lock_mode, struct obd_device *obd,
struct ldlm_res_id *p1_res_id,
struct ldlm_res_id *p2_res_id,
struct lustre_handle *c1_lockh,
struct lustre_handle *c2_lockh);
+void reconstruct_open(struct mds_update_record *rec, struct ptlrpc_request *req,
+ struct lustre_handle *child_lockh)
+{
+ struct mds_export_data *med = &req->rq_export->exp_mds_data;
+ struct mds_client_data *mcd = med->med_mcd;
+ struct mds_obd *mds = mds_req2mds(req);
+ struct mds_file_data *mfd;
+ struct obd_device *obd = req->rq_export->exp_obd;
+ struct dentry *parent, *child;
+ struct ldlm_reply *rep = lustre_msg_buf(req->rq_repmsg, 0);
+ struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
+ int disp, rc;
+ ENTRY;
+
+ ENTRY;
+
+ /* copy rc, transno and disp; steal locks */
+ req->rq_transno = mcd->mcd_last_transno;
+ req->rq_status = mcd->mcd_last_result;
+ disp = rep->lock_policy_res1 = mcd->mcd_last_data;
+
+ if (med->med_outstanding_reply)
+ mds_steal_ack_locks(med, req);
+
+ /* We never care about these. */
+ disp &= ~(IT_OPEN_LOOKUP | IT_OPEN_POS | IT_OPEN_NEG);
+ if (!disp) {
+ EXIT;
+ return; /* error looking up parent or child */
+ }
+
+ parent = mds_fid2dentry(mds, rec->ur_fid1, NULL);
+ LASSERT(!IS_ERR(parent));
+
+ child = lookup_one_len(lustre_msg_buf(req->rq_reqmsg, 3),
+ parent, req->rq_reqmsg->buflens[3] - 1);
+ LASSERT(!IS_ERR(child));
+
+ if (!child->d_inode) {
+ GOTO(out_dput, 0); /* child not present to open */
+ }
+
+ /* At this point, we know we have a child, which means that we'll send
+ * it back _unless_ it was open failed, _and_ we didn't create the file.
+ * I love you guys. No, really.
+ */
+ if (((disp & (IT_OPEN_OPEN | IT_OPEN_CREATE)) == IT_OPEN_OPEN) &&
+ req->rq_status) {
+ GOTO(out_dput, 0);
+ }
+
+ if (!med->med_outstanding_reply) {
+ LBUG(); /* XXX need to get enqueue client lock */
+ }
+
+ /* get lock (write for O_CREAT, read otherwise) */
+
+ mds_pack_inode2fid(&body->fid1, child->d_inode);
+ mds_pack_inode2body(body, child->d_inode);
+ if (S_ISREG(child->d_inode->i_mode)) {
+ rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
+ child->d_inode);
+ if (rc)
+ LASSERT(rc == req->rq_status);
+ } else {
+ /* XXX need to check this case */
+ }
+
+ /* If we're opening a file without an EA, change to a write
+ lock (unless we already have one). */
+
+ /* If we have -EEXIST as the status, and we were asked to create
+ * exclusively, we can tell we failed because the file already existed.
+ */
+ if (req->rq_status == -EEXIST &&
+ ((rec->ur_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
+ GOTO(out_dput, 0);
+ }
+
+ /* If we didn't get as far as trying to open, then some locking thing
+ * probably went wrong, and we'll just bail here.
+ */
+ if ((disp & IT_OPEN_OPEN) == 0) {
+ GOTO(out_dput, 0);
+ }
+
+ /* If we failed, then we must have failed opening, so don't look for
+ * file descriptor or anything, just give the client the bad news.
+ */
+ if (req->rq_status) {
+ GOTO(out_dput, 0);
+ }
+
+ if (med->med_outstanding_reply) {
+ struct list_head *t;
+ mfd = NULL;
+ /* XXX can we just look in the old reply to find the handle in
+ * XXX O(1) here? */
+ list_for_each(t, &med->med_open_head) {
+ mfd = list_entry(t, struct mds_file_data, mfd_list);
+ if (mfd->mfd_xid == req->rq_xid)
+ break;
+ mfd = NULL;
+ }
+ /* if we're not recovering, it had better be found */
+ LASSERT(mfd);
+ } else {
+ struct file *file;
+ mfd = kmem_cache_alloc(mds_file_cache, GFP_KERNEL);
+ if (!mfd) {
+ CERROR("mds: out of memory\n");
+ GOTO(out_dput, req->rq_status = -ENOMEM);
+ }
+ mntget(mds->mds_vfsmnt);
+ file = dentry_open(child, mds->mds_vfsmnt,
+ rec->ur_flags & ~(O_DIRECT | O_TRUNC));
+ LASSERT(!IS_ERR(file)); /* XXX -ENOMEM? */
+ file->private_data = mfd;
+ mfd->mfd_file = file;
+ mfd->mfd_xid = req->rq_xid;
+ get_random_bytes(&mfd->mfd_servercookie,
+ sizeof(mfd->mfd_servercookie));
+ spin_lock(&med->med_open_lock);
+ list_add(&mfd->mfd_list, &med->med_open_head);
+ spin_unlock(&med->med_open_lock);
+ }
+
+ body->handle.addr = (__u64)(unsigned long)mfd;
+ body->handle.cookie = mfd->mfd_servercookie;
+
+ out_dput:
+ l_dput(child);
+ l_dput(parent);
+ EXIT;
+}
+
int mds_open(struct mds_update_record *rec, int offset,
struct ptlrpc_request *req, struct lustre_handle *child_lockh)
{
struct ldlm_reply *rep = lustre_msg_buf(req->rq_repmsg, 0);
struct file *file;
struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
- struct dentry *dchild, *parent;
+ struct dentry *dchild = NULL, *parent;
struct mds_export_data *med;
struct mds_file_data *mfd = NULL;
struct ldlm_res_id child_res_id = { .name = {0} };
struct lustre_handle parent_lockh;
int rc = 0, parent_mode, child_mode = LCK_PR, lock_flags, created = 0;
+ int cleanup_phase = 0;
+ void *handle = NULL;
ENTRY;
-#warning replay of open needs to be redone
- /* was this animal open already and the client lost the reply? */
- /* XXX need some way to detect a reopen, to avoid locked list walks */
+ MDS_CHECK_RESENT(req, reconstruct_open(rec, req, child_lockh));
+
med = &req->rq_export->exp_mds_data;
-#if 0
- spin_lock(&med->med_open_lock);
- list_for_each(tmp, &med->med_open_head) {
- mfd = list_entry(tmp, typeof(*mfd), mfd_list);
- if (!memcmp(&mfd->mfd_clienthandle, &body->handle,
- sizeof(mfd->mfd_clienthandle)) &&
- body->fid1.id == mfd->mfd_file->f_dentry->d_inode->i_ino) {
- dchild = mfd->mfd_file->f_dentry;
- spin_unlock(&med->med_open_lock);
- CERROR("Re opening "LPD64"\n", body->fid1.id);
- GOTO(out_pack, rc = 0);
- }
- }
- spin_unlock(&med->med_open_lock);
-#endif
rep->lock_policy_res1 |= IT_OPEN_LOOKUP;
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) {
CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n");
if (IS_ERR(parent)) {
rc = PTR_ERR(parent);
CERROR("parent lookup error %d\n", rc);
- LBUG();
- RETURN(rc);
+ GOTO(cleanup, rc);
}
LASSERT(parent->d_inode);
+ cleanup_phase = 1; /* parent dentry and lock */
+
/* Step 2: Lookup the child */
dchild = lookup_one_len(lustre_msg_buf(req->rq_reqmsg, 3),
parent, req->rq_reqmsg->buflens[3] - 1);
if (IS_ERR(dchild))
- GOTO(out_step_2, rc = PTR_ERR(dchild));
+ GOTO(cleanup, rc = PTR_ERR(dchild));
+
+ cleanup_phase = 2; /* child dentry */
if (dchild->d_inode)
rep->lock_policy_res1 |= IT_OPEN_POS;
/* Step 3: If the child was negative, and we're supposed to,
* create it. */
- if ((rec->ur_flags & O_CREAT) && !dchild->d_inode) {
- int err;
- void *handle;
- mds_start_transno(mds);
+ if (!dchild->d_inode) {
+ if (!(rec->ur_flags & O_CREAT)) {
+ /* It's negative and we weren't supposed to create it */
+ GOTO(cleanup, rc = -ENOENT);
+ }
+
rep->lock_policy_res1 |= IT_OPEN_CREATE;
handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE);
if (IS_ERR(handle)) {
rc = PTR_ERR(handle);
- mds_finish_transno(mds, handle, req, rc);
- GOTO(out_step_3, rc);
+ handle = NULL;
+ GOTO(cleanup, rc);
}
rc = vfs_create(parent->d_inode, dchild, rec->ur_mode);
- rc = mds_finish_transno(mds, handle, req, rc);
- err = fsfilt_commit(obd, parent->d_inode, handle);
- if (rc || err) {
- CERROR("error on commit: err = %d\n", err);
- if (!rc)
- rc = err;
- GOTO(out_step_3, rc);
- }
+ if (rc)
+ GOTO(cleanup, rc);
created = 1;
child_mode = LCK_PW;
- } else if (!dchild->d_inode) {
- /* It's negative and we weren't supposed to create it */
- GOTO(out_step_3, rc = -ENOENT);
}
/* Step 4: It's positive, so lock the child */
mds_blocking_ast, NULL, NULL, child_lockh);
if (rc != ELDLM_OK) {
CERROR("ldlm_cli_enqueue: %d\n", rc);
- GOTO(out_step_3, rc = -EIO);
+ GOTO(cleanup, rc = -EIO);
}
+ cleanup_phase = 3; /* child lock */
+
mds_pack_inode2fid(&body->fid1, dchild->d_inode);
mds_pack_inode2body(body, dchild->d_inode);
if (S_ISREG(dchild->d_inode->i_mode)) {
rc = mds_pack_md(obd, req->rq_repmsg, 2, body, dchild->d_inode);
if (rc)
- GOTO(out_step_4, rc);
+ GOTO(cleanup, rc);
} else {
/* If this isn't a regular file, we can't open it. */
- GOTO(out_step_3, rc = 0); /* returns the lock to the client */
+
+ /* We want to drop the child dentry, because we're not returning
+ * failure (which would do this for us in step 2), and we're not
+ * handing it off to the open file in dentry_open. */
+ l_dput(dchild);
+ GOTO(cleanup, rc = 0); /* returns the lock to the client */
}
if (!created && (rec->ur_flags & O_CREAT) && (rec->ur_flags & O_EXCL)) {
/* File already exists, we didn't just create it, and we
* were passed O_EXCL; err-or. */
- GOTO(out_step_3, rc = -EEXIST); // returns a lock to the client
+ GOTO(cleanup, rc = -EEXIST); // returns a lock to the client
}
/* If we're opening a file without an EA, the client needs a write
* lock. */
- if (child_mode != LCK_PW && S_ISREG(dchild->d_inode->i_mode) &&
- !(body->valid & OBD_MD_FLEASIZE)) {
+ if (child_mode != LCK_PW && !(body->valid & OBD_MD_FLEASIZE)) {
ldlm_lock_decref(child_lockh, child_mode);
child_mode = LCK_PW;
goto reacquire;
mfd = kmem_cache_alloc(mds_file_cache, GFP_KERNEL);
if (!mfd) {
CERROR("mds: out of memory\n");
- GOTO(out_step_4, req->rq_status = -ENOMEM);
+ GOTO(cleanup, rc = -ENOMEM);
}
+ cleanup_phase = 4; /* mfd allocated */
+
/* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
mntget(mds->mds_vfsmnt);
- file = dentry_open(dchild,mds->mds_vfsmnt,
+ file = dentry_open(dchild, mds->mds_vfsmnt,
rec->ur_flags & ~(O_DIRECT | O_TRUNC));
- if (IS_ERR(file))
- GOTO(out_step_5, rc = PTR_ERR(file));
+ if (IS_ERR(file)) {
+ dchild = NULL; /* prevent a double dput in step 2 */
+ GOTO(cleanup, rc = PTR_ERR(file));
+ }
file->private_data = mfd;
mfd->mfd_file = file;
+ mfd->mfd_xid = req->rq_xid;
get_random_bytes(&mfd->mfd_servercookie, sizeof(mfd->mfd_servercookie));
spin_lock(&med->med_open_lock);
list_add(&mfd->mfd_list, &med->med_open_head);
body->handle.cookie = mfd->mfd_servercookie;
CDEBUG(D_INODE, "file %p: mfd %p, cookie "LPX64"\n",
mfd->mfd_file, mfd, mfd->mfd_servercookie);
- GOTO(out_step_2, rc = 0); /* returns a lock to the client */
+ GOTO(cleanup, rc = 0); /* returns a lock to the client */
- out_step_5:
- if (mfd != NULL) {
- kmem_cache_free(mds_file_cache, mfd);
- mfd = NULL;
+ cleanup:
+ rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
+ req, rc, rep->lock_policy_res1);
+ switch (cleanup_phase) {
+ case 4:
+ if (rc)
+ kmem_cache_free(mds_file_cache, mfd);
+ case 3:
+ /* This is the same logic as in the IT_OPEN part of
+ * ldlm_intent_policy: if we found the dentry, or we tried to
+ * open it (meaning that we created, if it wasn't found), then
+ * we return the lock to the caller and client. */
+ if (!(rep->lock_policy_res1 & (IT_OPEN_OPEN | IT_OPEN_POS)))
+ ldlm_lock_decref(child_lockh, child_mode);
+ case 2:
+ if (rc)
+ l_dput(dchild);
+ case 1:
+ l_dput(parent);
+ if (rc) {
+ ldlm_lock_decref(&parent_lockh, parent_mode);
+ } else {
+ memcpy(&req->rq_ack_locks[0].lock, &parent_lockh,
+ sizeof(parent_lockh));
+ req->rq_ack_locks[0].mode = parent_mode;
+ }
}
- out_step_4:
- ldlm_lock_decref(child_lockh, child_mode);
- out_step_3:
- l_dput(dchild);
- out_step_2:
- l_dput(parent);
- ldlm_lock_decref(&parent_lockh, parent_mode);
RETURN(rc);
}
#define EXPORT_SYMTAB
#define DEBUG_SUBSYSTEM S_MDS
+#include <linux/fs.h>
#include <linux/obd_support.h>
#include <linux/obd_class.h>
#include <linux/obd.h>
obd->obd_last_committed = last_rcvd;
}
-void mds_start_transno(struct mds_obd *mds)
-{
- ENTRY;
- down(&mds->mds_transno_sem);
-}
-
/* Assumes caller has already pushed us into the kernel context. */
-int mds_finish_transno(struct mds_obd *mds, void *handle,
- struct ptlrpc_request *req, int rc)
+int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
+ struct ptlrpc_request *req, int rc,
+ __u32 op_data)
{
struct mds_export_data *med = &req->rq_export->exp_mds_data;
struct mds_client_data *mcd = med->med_mcd;
- __u64 last_rcvd;
+ struct obd_device *obd = req->rq_export->exp_obd;
+ int started_handle = 0, err;
+ __u64 transno;
loff_t off;
ssize_t written;
-
- /* Propagate error code. */
- if (rc)
- GOTO(out, rc);
+ ENTRY;
/* we don't allocate new transnos for replayed requests */
if (req->rq_level == LUSTRE_CONN_RECOVD)
- GOTO(out, rc = 0);
+ GOTO(out, rc = rc);
+
+ if (!handle) {
+ /* if we're starting our own xaction, use our own inode */
+ i = mds->mds_rcvd_filp->f_dentry->d_inode;
+ handle = fsfilt_start(obd, i, FSFILT_OP_SETATTR);
+ if (IS_ERR(handle)) {
+ CERROR("fsfilt_start: %ld\n", PTR_ERR(handle));
+ GOTO(out, rc = PTR_ERR(handle));
+ }
+ started_handle = 1;
+ }
off = MDS_LR_CLIENT + med->med_off * MDS_LR_SIZE;
- last_rcvd = ++mds->mds_last_rcvd;
- req->rq_repmsg->transno = HTON__u64(last_rcvd);
- mcd->mcd_last_rcvd = cpu_to_le64(last_rcvd);
+ spin_lock(&mds->mds_transno_lock);
+ transno = ++mds->mds_last_transno;
+ spin_unlock(&mds->mds_transno_lock);
+ req->rq_repmsg->transno = req->rq_transno = HTON__u64(transno);
+ mcd->mcd_last_transno = cpu_to_le64(transno);
mcd->mcd_mount_count = cpu_to_le64(mds->mds_mount_count);
mcd->mcd_last_xid = cpu_to_le64(req->rq_xid);
+ mcd->mcd_last_result = cpu_to_le32(rc);
+ mcd->mcd_last_data = cpu_to_le32(op_data);
- fsfilt_set_last_rcvd(req->rq_export->exp_obd, last_rcvd, handle,
+ fsfilt_set_last_rcvd(req->rq_export->exp_obd, transno, handle,
mds_last_rcvd_cb);
written = lustre_fwrite(mds->mds_rcvd_filp, (char *)mcd, sizeof(*mcd),
&off);
- CDEBUG(D_INODE, "wrote trans #"LPD64" for client %s at #%d: written = "
- LPSZ"\n", last_rcvd, mcd->mcd_uuid, med->med_off, written);
-
- if (written == sizeof(*mcd))
- GOTO(out, rc = 0);
- CERROR("error writing to last_rcvd file: rc = %d\n", rc);
- if (written >= 0)
- GOTO(out, rc = -EIO);
+ CDEBUG(D_INODE, "wrote trans "LPU64" client %s at #%u: written = "
+ LPSZ"\n", transno, mcd->mcd_uuid, med->med_off, written);
+
+ if (written != sizeof(*mcd)) {
+ CERROR("error writing to last_rcvd: rc = "LPSZ"\n", written);
+ if (rc == 0) {
+ if (written < 0)
+ rc = written;
+ else
+ rc = -EIO;
+ }
+ }
- rc = 0;
+ err = fsfilt_commit(obd, i, handle);
+ if (err) {
+ CERROR("error committing transaction: %d\n", err);
+ if (!rc)
+ rc = err;
+ }
EXIT;
out:
- up(&mds->mds_transno_sem);
return rc;
}
-/* In the write-back case, the client holds a lock on a subtree (not supported).
- * In the intent case, the client holds a lock on the child inode. */
+/* this gives the same functionality as the code between
+ * sys_chmod and inode_setattr
+ * chown_common and inode_setattr
+ * utimes and inode_setattr
+ */
+int mds_fix_attr(struct inode *inode, struct mds_update_record *rec)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ time_t now = CURRENT_TIME;
+#else
+ time_t now = CURRENT_TIME.tv_sec;
+#endif
+ struct iattr *attr = &rec->ur_iattr;
+ unsigned int ia_valid = attr->ia_valid;
+ int error;
+ ENTRY;
+
+ /* only fix up attrs if the client VFS didn't already */
+ if (!(ia_valid & ATTR_RAW))
+ RETURN(0);
+
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ RETURN(-EPERM);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ attr->ia_ctime = now;
+ if (!(ia_valid & ATTR_ATIME_SET))
+ attr->ia_atime = now;
+ if (!(ia_valid & ATTR_MTIME_SET))
+ attr->ia_mtime = now;
+#else
+ attr->ia_ctime.tv_sec = now;
+ if (!(ia_valid & ATTR_ATIME_SET))
+ attr->ia_atime.tv_sec = now;
+ if (!(ia_valid & ATTR_MTIME_SET))
+ attr->ia_mtime.tv_sec = now;
+#endif
+
+ /* times */
+ if ((ia_valid & (ATTR_MTIME|ATTR_ATIME))==(ATTR_MTIME|ATTR_ATIME) &&
+ !(ia_valid & ATTR_ATIME_SET)) {
+ if (rec->ur_fsuid != inode->i_uid &&
+ (error = permission(inode,MAY_WRITE)) != 0)
+ RETURN(error);
+ } else if (ia_valid & ATTR_UID) {
+ /* chown */
+ error = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ RETURN(-EPERM);
+ if (attr->ia_uid == (uid_t) -1)
+ attr->ia_uid = inode->i_uid;
+ if (attr->ia_gid == (gid_t) -1)
+ attr->ia_gid = inode->i_gid;
+ attr->ia_mode = inode->i_mode;
+ attr->ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
+ /*
+ * If the user or group of a non-directory has been
+ * changed by a non-root user, remove the setuid bit.
+ * 19981026 David C Niemi <niemi@tux.org>
+ *
+ * Changed this to apply to all users, including root,
+ * to avoid some races. This is the behavior we had in
+ * 2.0. The check for non-root was definitely wrong
+ * for 2.2 anyway, as it should have been using
+ * CAP_FSETID rather than fsuid -- 19990830 SD.
+ */
+ if ((inode->i_mode & S_ISUID) == S_ISUID &&
+ !S_ISDIR(inode->i_mode)) {
+ attr->ia_mode &= ~S_ISUID;
+ attr->ia_valid |= ATTR_MODE;
+ }
+ /*
+ * Likewise, if the user or group of a non-directory
+ * has been changed by a non-root user, remove the
+ * setgid bit UNLESS there is no group execute bit
+ * (this would be a file marked for mandatory
+ * locking). 19981026 David C Niemi <niemi@tux.org>
+ *
+ * Removed the fsuid check (see the comment above) --
+ * 19990830 SD.
+ */
+ if (((inode->i_mode & (S_ISGID | S_IXGRP)) ==
+ (S_ISGID | S_IXGRP)) && !S_ISDIR(inode->i_mode)) {
+ attr->ia_mode &= ~S_ISGID;
+ attr->ia_valid |= ATTR_MODE;
+ }
+ } else if (ia_valid & ATTR_MODE) {
+ int mode = attr->ia_mode;
+ /* chmod */
+ if (attr->ia_mode == (mode_t) -1)
+ attr->ia_mode = inode->i_mode;
+ attr->ia_mode =
+ (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+ }
+ RETURN(0);
+}
+
+static void reconstruct_reint_setattr(struct mds_update_record *rec,
+ int offset, struct ptlrpc_request *req)
+{
+ struct mds_export_data *med = &req->rq_export->exp_mds_data;
+ struct mds_client_data *mcd = med->med_mcd;
+ struct mds_obd *obd = &req->rq_export->exp_obd->u.mds;
+ struct dentry *de;
+ struct mds_body *body;
+
+ req->rq_transno = mcd->mcd_last_transno;
+ req->rq_status = mcd->mcd_last_result;
+
+ if (med->med_outstanding_reply)
+ mds_steal_ack_locks(med, req);
+
+ de = mds_fid2dentry(obd, rec->ur_fid1, NULL);
+ if (IS_ERR(de)) {
+ LASSERT(PTR_ERR(de) == req->rq_status);
+ return;
+ }
+
+ body = lustre_msg_buf(req->rq_repmsg, 0);
+ mds_pack_inode2fid(&body->fid1, de->d_inode);
+ mds_pack_inode2body(body, de->d_inode);
+
+ l_dput(de);
+}
+
+/* In the raw-setattr case, we lock the child inode.
+ * In the write-back case or if being called from open, the client holds a lock
+ * already.
+ *
+ * We use the ATTR_FROM_OPEN flag to tell these cases apart. */
static int mds_reint_setattr(struct mds_update_record *rec, int offset,
struct ptlrpc_request *req,
struct lustre_handle *lh)
struct obd_device *obd = req->rq_export->exp_obd;
struct mds_body *body;
struct dentry *de;
- struct inode *inode;
- void *handle;
- int rc = 0, err;
+ struct inode *inode = NULL;
+ struct lustre_handle lockh;
+ void *handle = NULL;
+ int rc = 0, cleanup_phase = 0, err, locked = 0;
+ ENTRY;
- de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
- if (IS_ERR(de))
- GOTO(out_setattr, rc = PTR_ERR(de));
- inode = de->d_inode;
+ MDS_CHECK_RESENT(req, reconstruct_reint_setattr(rec, offset, req));
+
+ if (rec->ur_iattr.ia_valid & ATTR_FROM_OPEN) {
+ de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
+ if (IS_ERR(de))
+ GOTO(cleanup, rc = PTR_ERR(de));
+ } else {
+ de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW,
+ &lockh);
+ if (IS_ERR(de))
+ GOTO(cleanup, rc = PTR_ERR(de));
+ locked = 1;
+ }
+ cleanup_phase = 1;
+ inode = de->d_inode;
LASSERT(inode);
+
CDEBUG(D_INODE, "ino %lu\n", inode->i_ino);
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE,
to_kdev_t(inode->i_sb->s_dev));
- mds_start_transno(mds);
handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR);
if (IS_ERR(handle)) {
rc = PTR_ERR(handle);
- (void)mds_finish_transno(mds, handle, req, rc);
- GOTO(out_setattr_de, rc);
+ handle = NULL;
+ GOTO(cleanup, rc);
}
+ rc = mds_fix_attr(inode, rec);
+ if (rc)
+ GOTO(cleanup, rc);
+
rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr);
if (rc == 0 && S_ISREG(inode->i_mode) &&
req->rq_reqmsg->bufcount > 1) {
mds_pack_inode2fid(&body->fid1, inode);
mds_pack_inode2body(body, inode);
- rc = mds_finish_transno(mds, handle, req, rc);
- err = fsfilt_commit(obd, de->d_inode, handle);
- if (err) {
- CERROR("error on commit: err = %d\n", err);
- if (!rc)
- rc = err;
+ EXIT;
+ cleanup:
+ err = mds_finish_transno(mds, inode, handle, req, rc, 0);
+ switch(cleanup_phase) {
+ case 1:
+ l_dput(de);
+ if (locked) {
+ if (rc) {
+ ldlm_lock_decref(&lockh, LCK_PW);
+ } else {
+ memcpy(&req->rq_ack_locks[0].lock, &lockh,
+ sizeof(lockh));
+ req->rq_ack_locks[0].mode = LCK_PW;
+ }
+ }
+ case 0:
+ break;
+ default:
+ LBUG();
}
+ if (err && !rc)
+ rc = err;
- EXIT;
-out_setattr_de:
- l_dput(de);
-out_setattr:
req->rq_status = rc;
return 0;
}
+static void reconstruct_reint_create(struct mds_update_record *rec, int offset,
+ struct ptlrpc_request *req)
+{
+ struct mds_export_data *med = &req->rq_export->exp_mds_data;
+ struct mds_client_data *mcd = med->med_mcd;
+ struct mds_obd *obd = &req->rq_export->exp_obd->u.mds;
+ struct dentry *parent, *child;
+ struct mds_body *body;
+
+ req->rq_transno = mcd->mcd_last_transno;
+ req->rq_status = mcd->mcd_last_result;
+
+ if (med->med_outstanding_reply)
+ mds_steal_ack_locks(med, req);
+
+ if (req->rq_status)
+ return;
+
+ parent = mds_fid2dentry(obd, rec->ur_fid1, NULL);
+ LASSERT(!IS_ERR(parent));
+ child = lookup_one_len(rec->ur_name, parent, rec->ur_namelen - 1);
+ LASSERT(!IS_ERR(child));
+ body = lustre_msg_buf(req->rq_repmsg, offset);
+ mds_pack_inode2fid(&body->fid1, child->d_inode);
+ mds_pack_inode2body(body, child->d_inode);
+ l_dput(parent);
+ l_dput(child);
+}
+
static int mds_reint_create(struct mds_update_record *rec, int offset,
struct ptlrpc_request *req,
struct lustre_handle *lh)
struct mds_obd *mds = mds_req2mds(req);
struct obd_device *obd = req->rq_export->exp_obd;
struct dentry *dchild = NULL;
- struct inode *dir;
- void *handle;
+ struct inode *dir = NULL;
+ void *handle = NULL;
struct lustre_handle lockh;
- int rc = 0, err, type = rec->ur_mode & S_IFMT;
+ int rc = 0, err, type = rec->ur_mode & S_IFMT, cleanup_phase = 0;
+ int created = 0;
ENTRY;
LASSERT(offset == 0);
LASSERT(!strcmp(req->rq_export->exp_obd->obd_type->typ_name, "mds"));
+ MDS_CHECK_RESENT(req, reconstruct_reint_create(rec, offset, req));
+
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE))
- GOTO(out_create, rc = -ESTALE);
+ GOTO(cleanup, rc = -ESTALE);
de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW, &lockh);
if (IS_ERR(de)) {
rc = PTR_ERR(de);
CERROR("parent lookup error %d\n", rc);
- LBUG();
- GOTO(out_create, rc);
+ GOTO(cleanup, rc);
}
+ cleanup_phase = 1; /* locked parent dentry */
dir = de->d_inode;
LASSERT(dir);
CDEBUG(D_INODE, "parent ino %lu creating name %s mode %o\n",
if (IS_ERR(dchild)) {
rc = PTR_ERR(dchild);
CERROR("child lookup error %d\n", rc);
- GOTO(out_create_de, rc);
+ GOTO(cleanup, rc);
}
+ cleanup_phase = 2; /* child dentry */
+
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE,
to_kdev_t(dir->i_sb->s_dev));
else
LASSERT(!(rec->ur_opcode & REINT_REPLAYING));
- /* From here on, we must exit via a path that calls mds_finish_transno,
- * so that we release the mds_transno_sem (and, in the case of success,
- * update the transno correctly). out_create_commit and
- * out_transno_dchild are good candidates.
- */
- mds_start_transno(mds);
-
switch (type) {
case S_IFREG:{
handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE);
if (IS_ERR(handle))
- GOTO(out_transno_dchild, rc = PTR_ERR(handle));
+ GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_create(dir, dchild, rec->ur_mode);
EXIT;
break;
case S_IFDIR:{
handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR);
if (IS_ERR(handle))
- GOTO(out_transno_dchild, rc = PTR_ERR(handle));
+ GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_mkdir(dir, dchild, rec->ur_mode);
EXIT;
break;
case S_IFLNK:{
handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK);
if (IS_ERR(handle))
- GOTO(out_transno_dchild, rc = PTR_ERR(handle));
+ GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_symlink(dir, dchild, rec->ur_tgt);
EXIT;
break;
int rdev = rec->ur_rdev;
handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD);
if (IS_ERR(handle))
- GOTO(out_transno_dchild, rc = PTR_ERR(handle));
+ GOTO(cleanup, (handle = NULL, rc = PTR_ERR(handle)));
rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev);
EXIT;
break;
}
default:
CERROR("bad file type %o creating %s\n", type, rec->ur_name);
- handle = NULL; /* quell uninitialized warning */
- GOTO(out_transno_dchild, rc = -EINVAL);
+ GOTO(cleanup, rc = -EINVAL);
}
/* In case we stored the desired inum in here, we want to clean up.
- * We also do this in the out_transno_dchild block, for the error cases.
+ * We also do this in the cleanup block, for the error cases.
*/
dchild->d_fsdata = NULL;
if (rc) {
CDEBUG(D_INODE, "error during create: %d\n", rc);
- GOTO(out_create_commit, rc);
+ GOTO(cleanup, rc);
} else {
struct iattr iattr;
struct inode *inode = dchild->d_inode;
struct mds_body *body;
+ created = 1;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
iattr.ia_atime = rec->ur_time;
iattr.ia_ctime = rec->ur_time;
iattr.ia_mtime = rec->ur_time;
+#else
+ iattr.ia_atime.tv_sec = rec->ur_time;
+ iattr.ia_ctime.tv_sec = rec->ur_time;
+ iattr.ia_mtime.tv_sec = rec->ur_time;
+#endif
iattr.ia_uid = rec->ur_uid;
iattr.ia_gid = rec->ur_gid;
iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME |
mds_pack_inode2body(body, inode);
}
EXIT;
-out_create_commit:
- if (rc) {
- rc = mds_finish_transno(mds, handle, req, rc);
+
+cleanup:
+ err = mds_finish_transno(mds, dir, handle, req, rc, 0);
+
+ if (rc && created) {
+ /* Destroy the file we just created. This should not need
+ * extra journal credits, as we have already modified all of
+ * the blocks needed in order to create the file in the first
+ * place.
+ */
+ switch (type) {
+ case S_IFDIR:
+ err = vfs_rmdir(dir, dchild);
+ if (err)
+ CERROR("rmdir in error path: %d\n", err);
+ break;
+ default:
+ err = vfs_unlink(dir, dchild);
+ if (err)
+ CERROR("unlink in error path: %d\n", err);
+ break;
+ }
} else {
- rc = mds_finish_transno(mds, handle, req, rc);
- if (rc)
- GOTO(out_create_unlink, rc);
+ rc = err;
}
- err = fsfilt_commit(obd, dir, handle);
- if (err) {
- CERROR("error on commit: err = %d\n", err);
- if (!rc)
- rc = err;
- }
-out_create_dchild:
- l_dput(dchild);
-out_create_de:
- ldlm_lock_decref(&lockh, LCK_PW);
- l_dput(de);
-out_create:
- req->rq_status = rc;
- return 0;
-
-out_transno_dchild:
- dchild->d_fsdata = NULL;
- /* Need to release the transno lock, and then put the dchild. */
- LASSERT(rc);
- mds_finish_transno(mds, handle, req, rc);
- goto out_create_dchild;
-
-out_create_unlink:
- /* Destroy the file we just created. This should not need extra
- * journal credits, as we have already modified all of the blocks
- * needed in order to create the file in the first place.
- */
- switch (type) {
- case S_IFDIR:
- err = vfs_rmdir(dir, dchild);
- if (err)
- CERROR("failed rmdir in error path: rc = %d\n", err);
+ switch (cleanup_phase) {
+ case 2: /* child dentry */
+ dchild->d_fsdata = NULL;
+ l_dput(dchild);
+ case 1: /* locked parent dentry */
+ if (rc) {
+ ldlm_lock_decref(&lockh, LCK_PW);
+ } else {
+ memcpy(&req->rq_ack_locks[0].lock, &lockh,
+ sizeof(lockh));
+ req->rq_ack_locks[0].mode = LCK_PW;
+ }
+ l_dput(de);
+ case 0:
break;
default:
- err = vfs_unlink(dir, dchild);
- if (err)
- CERROR("failed unlink in error path: rc = %d\n", err);
- break;
+ CERROR("invalid cleanup_phase %d\n", cleanup_phase);
+ LBUG();
}
-
- goto out_create_commit;
+ req->rq_status = rc;
+ return 0;
}
/* This function doesn't use ldlm_match_or_enqueue because we're always called
RETURN(0);
}
+static void reconstruct_reint_unlink(struct mds_update_record *rec, int offset,
+ struct ptlrpc_request *req,
+ struct lustre_handle *child_lockh)
+{
+ struct mds_export_data *med = &req->rq_export->exp_mds_data;
+ struct mds_client_data *mcd = med->med_mcd;
+
+ req->rq_transno = mcd->mcd_last_transno;
+ req->rq_status = mcd->mcd_last_result;
+
+ if (med->med_outstanding_reply)
+ mds_steal_ack_locks(med, req);
+
+ DEBUG_REQ(D_ERROR, req,
+ "can't get EA for reconstructed unlink, leaking OST inodes");
+}
+
static int mds_reint_unlink(struct mds_update_record *rec, int offset,
struct ptlrpc_request *req,
struct lustre_handle *child_lockh)
struct mds_obd *mds = mds_req2mds(req);
struct obd_device *obd = req->rq_export->exp_obd;
struct mds_body *body = NULL;
- struct inode *dir_inode, *child_inode;
- struct lustre_handle *handle, parent_lockh;
+ struct inode *dir_inode = NULL, *child_inode;
+ struct lustre_handle parent_lockh;
+ void *handle = NULL;
struct ldlm_res_id child_res_id = { .name = {0} };
char *name;
- int namelen, err, rc = 0, flags = 0, return_lock = 0;
+ int namelen, rc = 0, flags = 0, return_lock = 0;
+ int cleanup_phase = 0;
ENTRY;
+ MDS_CHECK_RESENT(req, reconstruct_reint_unlink(rec, offset, req,
+ child_lockh));
+
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
- GOTO(out, rc = -ENOENT);
+ GOTO(cleanup, rc = -ENOENT);
/* Step 1: Lookup the parent by FID */
dir_de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW,
&parent_lockh);
if (IS_ERR(dir_de))
- GOTO(out, rc = PTR_ERR(dir_de));
+ GOTO(cleanup, rc = PTR_ERR(dir_de));
dir_inode = dir_de->d_inode;
LASSERT(dir_inode);
+ cleanup_phase = 1; /* Have parent dentry lock */
+
/* Step 2: Lookup the child */
name = lustre_msg_buf(req->rq_reqmsg, offset + 1);
namelen = req->rq_reqmsg->buflens[offset + 1] - 1;
dchild = lookup_one_len(name, dir_de, namelen);
if (IS_ERR(dchild))
- GOTO(out_step_2a, rc = PTR_ERR(dchild));
+ GOTO(cleanup, rc = PTR_ERR(dchild));
+
+ cleanup_phase = 2; /* child dentry */
+
child_inode = dchild->d_inode;
if (child_inode == NULL) {
if (rec->ur_opcode & REINT_REPLAYING) {
dir_inode->i_ino, rec->ur_name);
rc = -ENOENT;
}
- GOTO(out_step_2b, rc);
+ GOTO(cleanup, rc);
}
DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu",
dir_inode->i_ino, child_inode->i_ino);
- /* Step 3: Get lock a lock on the child */
+ /* Step 3: Get a lock on the child */
child_res_id.name[0] = child_inode->i_ino;
child_res_id.name[1] = child_inode->i_generation;
&flags, ldlm_completion_ast, mds_blocking_ast,
NULL, NULL, child_lockh);
if (rc != ELDLM_OK)
- GOTO(out_step_2b, rc);
+ GOTO(cleanup, rc);
+
+ cleanup_phase = 3; /* child lock */
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE,
to_kdev_t(dir_inode->i_sb->s_dev));
/* Step 4: Do the unlink: client decides between rmdir/unlink!
* (bug 72) */
- mds_start_transno(mds);
switch (rec->ur_mode & S_IFMT) {
case S_IFDIR:
handle = fsfilt_start(obd, dir_inode, FSFILT_OP_RMDIR);
if (IS_ERR(handle))
- GOTO(out_cancel_transno, rc = PTR_ERR(handle));
+ GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_rmdir(dir_inode, dchild);
break;
case S_IFREG:
case S_IFSOCK:
handle = fsfilt_start(obd, dir_inode, FSFILT_OP_UNLINK);
if (IS_ERR(handle))
- GOTO(out_cancel_transno, rc = PTR_ERR(handle));
+ GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_unlink(dir_inode, dchild);
break;
default:
CERROR("bad file type %o unlinking %s\n", rec->ur_mode, name);
- handle = NULL;
LBUG();
- GOTO(out_cancel_transno, rc = -EINVAL);
+ GOTO(cleanup, rc = -EINVAL);
}
- rc = mds_finish_transno(mds, handle, req, rc);
- err = fsfilt_commit(obd, dir_inode, handle);
- if (rc != 0 || err != 0) {
+ cleanup:
+ rc = mds_finish_transno(mds, dir_inode, handle, req, rc, 0);
+ if (rc && body) {
/* Don't unlink the OST objects if the MDS unlink failed */
body->valid = 0;
}
- if (err) {
- CERROR("error on commit: err = %d\n", err);
- if (!rc)
- rc = err;
+ switch(cleanup_phase) {
+ case 3: /* child lock */
+ if (rc != 0 || return_lock == 0)
+ ldlm_lock_decref(child_lockh, LCK_EX);
+ case 2: /* child dentry */
+ l_dput(dchild);
+ case 1: /* parent dentry and lock */
+ if (rc) {
+ ldlm_lock_decref(&parent_lockh, LCK_EX);
+ } else {
+ memcpy(&req->rq_ack_locks[0].lock, &parent_lockh,
+ sizeof(parent_lockh));
+ req->rq_ack_locks[0].mode = LCK_EX;
+ }
+ l_dput(dir_de);
+ case 0:
+ break;
+ default:
+ CERROR("invalid cleanup_phase %d\n", cleanup_phase);
+ LBUG();
}
-
- GOTO(out_step_4, rc);
- out_step_4:
- if (rc != 0 || return_lock == 0)
- ldlm_lock_decref(child_lockh, LCK_EX);
- out_step_2b:
- l_dput(dchild);
- out_step_2a:
- ldlm_lock_decref(&parent_lockh, LCK_EX);
- l_dput(dir_de);
- out:
req->rq_status = rc;
return 0;
+}
+
+static void reconstruct_reint_link(struct mds_update_record *rec, int offset,
+ struct ptlrpc_request *req)
+{
+ struct mds_export_data *med = &req->rq_export->exp_mds_data;
+ struct mds_client_data *mcd = med->med_mcd;
- out_cancel_transno:
- rc = mds_finish_transno(mds, handle, req, rc);
- goto out_step_4;
+ req->rq_transno = mcd->mcd_last_transno;
+ req->rq_status = mcd->mcd_last_result;
+
+ if (med->med_outstanding_reply)
+ mds_steal_ack_locks(med, req);
+ else
+ LBUG(); /* don't support it yet, but it'll be fun! */
}
static int mds_reint_link(struct mds_update_record *rec, int offset,
- struct ptlrpc_request *req, struct lustre_handle *lh)
+ struct ptlrpc_request *req,
+ struct lustre_handle *lh)
{
struct obd_device *obd = req->rq_export->exp_obd;
struct dentry *de_src = NULL;
struct dentry *de_tgt_dir = NULL;
struct dentry *dchild = NULL;
struct mds_obd *mds = mds_req2mds(req);
- struct lustre_handle *handle, tgt_dir_lockh, src_lockh;
+ struct lustre_handle *handle = NULL, tgt_dir_lockh, src_lockh;
struct ldlm_res_id src_res_id = { .name = {0} };
struct ldlm_res_id tgt_dir_res_id = { .name = {0} };
- int lock_mode, rc = 0, err;
+ int lock_mode = 0, rc = 0, cleanup_phase = 0;
ENTRY;
+ MDS_CHECK_RESENT(req, reconstruct_reint_link(rec, offset, req));
+
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_LINK))
- GOTO(out, rc = -ENOENT);
+ GOTO(cleanup, rc = -ENOENT);
/* Step 1: Lookup the source inode and target directory by FID */
de_src = mds_fid2dentry(mds, rec->ur_fid1, NULL);
if (IS_ERR(de_src))
- GOTO(out, rc = PTR_ERR(de_src));
+ GOTO(cleanup, rc = PTR_ERR(de_src));
+
+ cleanup_phase = 1; /* source dentry */
de_tgt_dir = mds_fid2dentry(mds, rec->ur_fid2, NULL);
if (IS_ERR(de_tgt_dir))
- GOTO(out_de_src, rc = PTR_ERR(de_tgt_dir));
+ GOTO(cleanup, rc = PTR_ERR(de_tgt_dir));
+
+ cleanup_phase = 2; /* target directory dentry */
CDEBUG(D_INODE, "linking %*s/%s to inode %lu\n",
de_tgt_dir->d_name.len, de_tgt_dir->d_name.name, rec->ur_name,
rc = enqueue_ordered_locks(LCK_EX, obd, &src_res_id, &tgt_dir_res_id,
&src_lockh, &tgt_dir_lockh);
if (rc != ELDLM_OK)
- GOTO(out_tgt_dir, rc = -EIO);
+ GOTO(cleanup, rc = -EIO);
+
+ cleanup_phase = 3; /* locks */
/* Step 3: Lookup the child */
dchild = lookup_one_len(rec->ur_name, de_tgt_dir, rec->ur_namelen - 1);
if (IS_ERR(dchild)) {
CERROR("child lookup error %ld\n", PTR_ERR(dchild));
- GOTO(out_drop_locks, rc = PTR_ERR(dchild));
+ GOTO(cleanup, rc = PTR_ERR(dchild));
}
+ cleanup_phase = 4; /* child dentry */
+
if (dchild->d_inode) {
if (rec->ur_opcode & REINT_REPLAYING) {
/* XXX verify that the link is to the the right file? */
de_tgt_dir->d_inode->i_ino, rec->ur_name);
rc = -EEXIST;
}
- GOTO(out_drop_child, rc);
+ GOTO(cleanup, rc);
}
/* Step 4: Do it. */
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
to_kdev_t(de_src->d_inode->i_sb->s_dev));
- mds_start_transno(mds);
handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK);
if (IS_ERR(handle)) {
rc = PTR_ERR(handle);
- mds_finish_transno(mds, handle, req, rc);
- GOTO(out_drop_child, rc);
+ GOTO(cleanup, rc);
}
rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild);
if (rc)
CERROR("link error %d\n", rc);
- rc = mds_finish_transno(mds, handle, req, rc);
-
- err = fsfilt_commit(obd, de_tgt_dir->d_inode, handle);
- if (err) {
- CERROR("error on commit: err = %d\n", err);
- if (!rc)
- rc = err;
- }
-
+cleanup:
+ rc = mds_finish_transno(mds, de_tgt_dir ? de_tgt_dir->d_inode : NULL,
+ handle, req, rc, 0);
EXIT;
-out_drop_child:
- l_dput(dchild);
-out_drop_locks:
- ldlm_lock_decref(&src_lockh, lock_mode);
- ldlm_lock_decref(&tgt_dir_lockh, lock_mode);
-out_tgt_dir:
- l_dput(de_tgt_dir);
-out_de_src:
- l_dput(de_src);
-out:
+ switch (cleanup_phase) {
+ case 4: /* child dentry */
+ l_dput(dchild);
+ case 3: /* locks */
+ if (rc) {
+ ldlm_lock_decref(&src_lockh, lock_mode);
+ ldlm_lock_decref(&tgt_dir_lockh, lock_mode);
+ } else {
+ memcpy(&req->rq_ack_locks[0].lock, &src_lockh,
+ sizeof(src_lockh));
+ memcpy(&req->rq_ack_locks[1].lock, &tgt_dir_lockh,
+ sizeof(tgt_dir_lockh));
+ req->rq_ack_locks[0].mode = lock_mode;
+ req->rq_ack_locks[1].mode = lock_mode;
+ }
+ case 2: /* target dentry */
+ l_dput(de_tgt_dir);
+ case 1: /* source dentry */
+ l_dput(de_src);
+ case 0:
+ break;
+ default:
+ CERROR("invalid cleanup_phase %d\n", cleanup_phase);
+ LBUG();
+ }
req->rq_status = rc;
return 0;
}
+static void reconstruct_reint_rename(struct mds_update_record *rec,
+ int offset, struct ptlrpc_request *req)
+{
+ struct mds_export_data *med = &req->rq_export->exp_mds_data;
+ struct mds_client_data *mcd = med->med_mcd;
+
+ req->rq_transno = mcd->mcd_last_transno;
+ req->rq_status = mcd->mcd_last_result;
+
+ if (med->med_outstanding_reply)
+ mds_steal_ack_locks(med, req);
+ else
+ LBUG(); /* don't support it yet, but it'll be fun! */
+
+}
+
static int mds_reint_rename(struct mds_update_record *rec, int offset,
struct ptlrpc_request *req,
struct lustre_handle *lockh)
struct ldlm_res_id p2_res_id = { .name = {0} };
struct ldlm_res_id c1_res_id = { .name = {0} };
struct ldlm_res_id c2_res_id = { .name = {0} };
- int rc = 0, err, lock_count = 3, flags = LDLM_FL_LOCAL_ONLY;
- void *handle;
+ int rc = 0, lock_count = 3, flags = LDLM_FL_LOCAL_ONLY;
+ int cleanup_phase = 0;
+ void *handle = NULL;
ENTRY;
+ MDS_CHECK_RESENT(req, reconstruct_reint_rename(rec, offset, req));
+
de_srcdir = mds_fid2dentry(mds, rec->ur_fid1, NULL);
if (IS_ERR(de_srcdir))
- GOTO(out, rc = PTR_ERR(de_srcdir));
+ GOTO(cleanup, rc = PTR_ERR(de_srcdir));
+
+ cleanup_phase = 1; /* source directory dentry */
+
de_tgtdir = mds_fid2dentry(mds, rec->ur_fid2, NULL);
if (IS_ERR(de_tgtdir))
- GOTO(out_put_srcdir, rc = PTR_ERR(de_tgtdir));
+ GOTO(cleanup, rc = PTR_ERR(de_tgtdir));
+
+ cleanup_phase = 2; /* target directory dentry */
/* The idea here is that we need to get four locks in the end:
* one on each parent directory, one on each child. We need to take
rc = enqueue_ordered_locks(LCK_EX, obd, &p1_res_id, &p2_res_id,
&(dlm_handles[0]), &(dlm_handles[1]));
if (rc != ELDLM_OK)
- GOTO(out_put_tgtdir, rc);
+ GOTO(cleanup, rc);
+
+ cleanup_phase = 3; /* parent locks */
/* Step 2: Lookup the children */
de_old = lookup_one_len(rec->ur_name, de_srcdir, rec->ur_namelen - 1);
if (IS_ERR(de_old)) {
CERROR("old child lookup error (%*s): %ld\n",
rec->ur_namelen - 1, rec->ur_name, PTR_ERR(de_old));
- GOTO(out_step_2a, rc = PTR_ERR(de_old));
+ GOTO(cleanup, rc = PTR_ERR(de_old));
}
+ cleanup_phase = 4; /* original name dentry */
+
if (de_old->d_inode == NULL)
- GOTO(out_step_2b, rc = -ENOENT);
+ GOTO(cleanup, rc = -ENOENT);
+
+ /* sanity check for src inode */
+ if (de_old->d_inode->i_ino == de_srcdir->d_inode->i_ino ||
+ de_old->d_inode->i_ino == de_tgtdir->d_inode->i_ino)
+ GOTO(cleanup, rc = -EINVAL);
de_new = lookup_one_len(rec->ur_tgt, de_tgtdir, rec->ur_tgtlen - 1);
if (IS_ERR(de_new)) {
CERROR("new child lookup error (%*s): %ld\n",
rec->ur_tgtlen - 1, rec->ur_tgt, PTR_ERR(de_new));
- GOTO(out_step_2b, rc = PTR_ERR(de_new));
+ GOTO(cleanup, rc = PTR_ERR(de_new));
}
+ cleanup_phase = 5; /* target dentry */
+
+ /* sanity check for dest inode */
+ if (de_new->d_inode &&
+ (de_new->d_inode->i_ino == de_srcdir->d_inode->i_ino ||
+ de_new->d_inode->i_ino == de_tgtdir->d_inode->i_ino))
+ GOTO(cleanup, rc = -EINVAL);
+
/* Step 3: Take locks on the children */
c1_res_id.name[0] = de_old->d_inode->i_ino;
c1_res_id.name[1] = de_old->d_inode->i_generation;
lock_count = 4;
}
if (rc != ELDLM_OK)
- GOTO(out_step_3, rc);
+ GOTO(cleanup, rc);
+
+ cleanup_phase = 6; /* child locks */
/* Step 4: Execute the rename */
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
to_kdev_t(de_srcdir->d_inode->i_sb->s_dev));
- mds_start_transno(mds);
handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME);
- if (IS_ERR(handle)) {
- rc = PTR_ERR(handle);
- mds_finish_transno(mds, handle, req, rc);
- GOTO(out_step_4, rc);
- }
+ if (IS_ERR(handle))
+ GOTO(cleanup, rc = PTR_ERR(handle));
lock_kernel();
rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new,
NULL);
unlock_kernel();
- rc = mds_finish_transno(mds, handle, req, rc);
-
- err = fsfilt_commit(obd, de_tgtdir->d_inode, handle);
- if (err) {
- CERROR("error on commit: err = %d\n", err);
- if (!rc)
- rc = err;
- }
-
EXIT;
- out_step_4:
- ldlm_lock_decref(&(dlm_handles[2]), LCK_EX);
- if (lock_count == 4)
- ldlm_lock_decref(&(dlm_handles[3]), LCK_EX);
- out_step_3:
- l_dput(de_new);
- out_step_2b:
- l_dput(de_old);
- out_step_2a:
- ldlm_lock_decref(&(dlm_handles[0]), LCK_EX);
- ldlm_lock_decref(&(dlm_handles[1]), LCK_EX);
- out_put_tgtdir:
- l_dput(de_tgtdir);
- out_put_srcdir:
- l_dput(de_srcdir);
- out:
+cleanup:
+ rc = mds_finish_transno(mds, de_tgtdir ? de_tgtdir->d_inode : NULL,
+ handle, req, rc, 0);
+ switch (cleanup_phase) {
+ case 6: /* child locks */
+ if (rc) {
+ ldlm_lock_decref(&(dlm_handles[2]), LCK_EX);
+ if (lock_count == 4)
+ ldlm_lock_decref(&(dlm_handles[3]), LCK_EX);
+ } else {
+ memcpy(&req->rq_ack_locks[2].lock, &(dlm_handles[2]),
+ sizeof(dlm_handles[2]));
+ req->rq_ack_locks[2].mode = LCK_EX;
+ if (lock_count == 4) {
+ memcpy(&req->rq_ack_locks[3].lock,
+ &dlm_handles[3], sizeof(dlm_handles[3]));
+ req->rq_ack_locks[3].mode = LCK_EX;
+ }
+ }
+ case 5: /* target dentry */
+ l_dput(de_new);
+ case 4: /* source dentry */
+ l_dput(de_old);
+ case 3: /* parent locks */
+ if (rc) {
+ ldlm_lock_decref(&(dlm_handles[0]), LCK_EX);
+ ldlm_lock_decref(&(dlm_handles[1]), LCK_EX);
+ } else {
+ memcpy(&req->rq_ack_locks[0].lock, &(dlm_handles[0]),
+ sizeof(dlm_handles[0]));
+ memcpy(&req->rq_ack_locks[1].lock, &(dlm_handles[1]),
+ sizeof(dlm_handles[1]));
+ req->rq_ack_locks[0].mode = LCK_EX;
+ req->rq_ack_locks[1].mode = LCK_EX;
+ }
+ case 2: /* target directory dentry */
+ l_dput(de_tgtdir);
+ case 1: /* source directry dentry */
+ l_dput(de_srcdir);
+ case 0:
+ break;
+ default:
+ CERROR("invalid cleanup_phase %d\n", cleanup_phase);
+ LBUG();
+ }
req->rq_status = rc;
return 0;
}
uc.ouc_fsuid = rec->ur_fsuid;
uc.ouc_fsgid = rec->ur_fsgid;
uc.ouc_cap = rec->ur_cap;
- uc.ouc_suppgid = rec->ur_suppgid;
+ uc.ouc_suppgid1 = rec->ur_suppgid1;
+ uc.ouc_suppgid2 = rec->ur_suppgid2;
push_ctxt(&saved, &mds->mds_ctxt, &uc);
rc = reinters[realop] (rec, offset, req, lockh);
+
# FIXME: we need to make it clear that obdclass.o depends on
# lustre_build_version, or 'make -j2' breaks!
DEFS=
FSMOD = fsfilt_extN
endif
+if LIBLUSTRE
+lib_LIBRARIES = liblustreclass.a
+liblustreclass_a_SOURCES = uuid.c statfs_pack.c genops.c debug.c class_obd.c lustre_handles.c lustre_peer.c lprocfs_status.c
+
+class_obd.o: lustre_version
+
+lustre_version:
+ echo '#define LUSTRE_VERSION 12' > $(top_builddir)/include/linux/lustre_build_version.h
+ echo '#define BUILD_VERSION "1"' >> $(top_builddir)/include/linux/lustre_build_version.h
+
+else
modulefs_DATA = lustre_build_version obdclass.o $(FSMOD).o fsfilt_reiserfs.o
EXTRA_PROGRAMS = obdclass $(FSMOD) fsfilt_reiserfs
-obdclass_SOURCES = debug.c genops.c class_obd.c sysctl.c uuid.c lprocfs_status.c
+obdclass_SOURCES = class_obd.c debug.c genops.c sysctl.c uuid.c lprocfs_status.c lustre_handles.c lustre_peer.c
obdclass_SOURCES += fsfilt.c statfs_pack.c
+endif
include $(top_srcdir)/Rules
# XXX I'm sure there's some automake mv-if-different helper for this.
lustre_build_version:
- perl $(top_srcdir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir)> tmpver
+ perl $(top_srcdir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver
cmp -z $(top_builddir)/include/linux/lustre_build_version.h tmpver \
2> /dev/null && \
$(RM) tmpver || \
* infrastructure for managing object devices
*/
+#define DEBUG_SUBSYSTEM S_CLASS
#define EXPORT_SYMTAB
+#ifdef __KERNEL__
#include <linux/config.h> /* for CONFIG_PROC_FS */
#include <linux/module.h>
#include <linux/errno.h>
#include <asm/poll.h>
#include <asm/uaccess.h>
#include <linux/miscdevice.h>
+#else
-#define DEBUG_SUBSYSTEM S_CLASS
+# include <liblustre.h>
+
+#endif
#include <linux/obd_support.h>
#include <linux/obd_class.h>
}
}
-/* to control /dev/obd */
-static int obd_class_ioctl (struct inode * inode, struct file * filp,
- unsigned int cmd, unsigned long arg)
+
+int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd,
+ unsigned long arg)
{
char *buf = NULL;
struct obd_ioctl_data *data;
- struct obd_class_user_state *ocus = filp->private_data;
struct obd_device *obd = ocus->ocus_current_obd;
struct lustre_handle conn;
int err = 0, len = 0, serialised = 0;
if (!obd && cmd != OBD_IOC_DEVICE && cmd != TCGETS &&
cmd != OBD_IOC_LIST && cmd != OBD_GET_VERSION &&
- cmd != OBD_IOC_NAME2DEV && cmd != OBD_IOC_NEWDEV) {
+ cmd != OBD_IOC_NAME2DEV && cmd != OBD_IOC_NEWDEV &&
+ cmd != OBD_IOC_ADD_UUID && cmd != OBD_IOC_DEL_UUID &&
+ cmd != OBD_IOC_CLOSE_UUID) {
CERROR("OBD ioctl: No device\n");
GOTO(out, err = -EINVAL);
}
status = "-";
l = snprintf(buf2, remains, "%2d %s %s %s %s %d\n",
i, status, obd->obd_type->typ_name,
- obd->obd_name, obd->obd_uuid.uuid, obd->obd_type->typ_refcnt);
+ obd->obd_name, obd->obd_uuid.uuid,
+ obd->obd_type->typ_refcnt);
buf2 +=l;
remains -=l;
if (remains <= 0) {
case OBD_IOC_ATTACH: {
struct obd_type *type;
- int minor;
+ int minor, len;
/* have we attached a type to this device */
if (obd->obd_flags & OBD_ATTACHED || obd->obd_type) {
CERROR("Type not nul terminated!\n");
GOTO(out, err = -EINVAL);
}
-
+ if (!data->ioc_inllen2 || !data->ioc_inlbuf2) {
+ CERROR("No name passed!\n");
+ GOTO(out, err = -EINVAL);
+ }
CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n",
MKSTR(data->ioc_inlbuf1),
MKSTR(data->ioc_inlbuf2), MKSTR(data->ioc_inlbuf3));
INIT_LIST_HEAD(&obd->obd_imports);
spin_lock_init(&obd->obd_dev_lock);
- if (data->ioc_inlbuf2) {
- int len = strlen(data->ioc_inlbuf2) + 1;
- OBD_ALLOC(obd->obd_name, len);
- if (!obd->obd_name) {
- class_put_type(obd->obd_type);
- obd->obd_type = NULL;
- GOTO(out, err = -ENOMEM);
- }
- memcpy(obd->obd_name, data->ioc_inlbuf2, len);
- } else {
- CERROR("WARNING: unnamed obd device\n");
+ /* XXX belong ins setup not attach */
+ /* recovery data */
+ spin_lock_init(&obd->obd_processing_task_lock);
+ init_waitqueue_head(&obd->obd_next_transno_waitq);
+ INIT_LIST_HEAD(&obd->obd_recovery_queue);
+ INIT_LIST_HEAD(&obd->obd_delayed_reply_queue);
+
+ len = strlen(data->ioc_inlbuf2) + 1;
+ OBD_ALLOC(obd->obd_name, len);
+ if (!obd->obd_name) {
+ class_put_type(obd->obd_type);
+ obd->obd_type = NULL;
+ GOTO(out, err = -ENOMEM);
}
+ memcpy(obd->obd_name, data->ioc_inlbuf2, len);
+
if (data->ioc_inlbuf3) {
int len = strlen(data->ioc_inlbuf3);
if (len >= sizeof(obd->obd_uuid)) {
err = OBP(obd,attach)(obd, sizeof(*data), data);
if (err) {
if(data->ioc_inlbuf2)
- OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
+ OBD_FREE(obd->obd_name,
+ strlen(obd->obd_name) + 1);
class_put_type(obd->obd_type);
obd->obd_type = NULL;
} else {
GOTO(out, err = 0);
}
- default:
- obd_data2conn(&conn, data);
+ case OBD_IOC_CLOSE_UUID: {
+ struct lustre_peer peer;
+ CDEBUG(D_IOCTL, "closing all connections to uuid %s\n",
+ data->ioc_inlbuf1);
+ lustre_uuid_to_peer(data->ioc_inlbuf1, &peer);
+ GOTO(out, err = 0);
+ }
+ case OBD_IOC_ADD_UUID: {
+ CDEBUG(D_IOCTL, "adding mapping from uuid %s to nid "LPX64
+ ", nal %d\n", data->ioc_inlbuf1, data->ioc_nid,
+ data->ioc_nal);
+
+ err = class_add_uuid(data->ioc_inlbuf1, data->ioc_nid,
+ data->ioc_nal);
+ GOTO(out, err);
+ }
+ case OBD_IOC_DEL_UUID: {
+ CDEBUG(D_IOCTL, "removing mappings for uuid %s\n",
+ data->ioc_inlbuf1 == NULL ? "<all uuids>" :
+ data->ioc_inlbuf1);
- err = obd_iocontrol(cmd, &conn, len, data, NULL);
+ err = class_del_uuid(data->ioc_inlbuf1);
+ GOTO(out, err);
+ }
+ default: {
+ // obd_data2conn(&conn, data);
+ struct obd_class_user_conn *oconn = list_entry(ocus->ocus_conns.next, struct obd_class_user_conn, ocuc_chain);
+ err = obd_iocontrol(cmd, &oconn->ocuc_conn, len, data, NULL);
if (err)
GOTO(out, err);
err = -EFAULT;
GOTO(out, err);
}
+ }
out:
if (buf)
+#define OBD_MINOR 241
+#ifdef __KERNEL__
+/* to control /dev/obd */
+static int obd_class_ioctl (struct inode * inode, struct file * filp,
+ unsigned int cmd, unsigned long arg)
+{
+ return class_handle_ioctl(filp->private_data, cmd, arg);
+}
+
/* declare character device */
static struct file_operations obd_psdev_fops = {
ioctl: obd_class_ioctl, /* ioctl */
};
/* modules setup */
-#define OBD_MINOR 241
static struct miscdevice obd_psdev = {
OBD_MINOR,
"obd_psdev",
&obd_psdev_fops
};
+#else
+void *obd_psdev = NULL;
+#endif
void (*class_signal_connection_failure)(struct ptlrpc_connection *);
EXPORT_SYMBOL(class_disconnect);
EXPORT_SYMBOL(class_disconnect_all);
EXPORT_SYMBOL(class_uuid_unparse);
+EXPORT_SYMBOL(lustre_uuid_to_peer);
EXPORT_SYMBOL(class_signal_connection_failure);
+EXPORT_SYMBOL(class_handle_hash);
+EXPORT_SYMBOL(class_handle_unhash);
+EXPORT_SYMBOL(class_handle2object);
+
+#ifdef __KERNEL__
static int __init init_obdclass(void)
+#else
+int init_obdclass(void)
+#endif
{
struct obd_device *obd;
int err;
printk(KERN_INFO "OBD class driver Build Version: " BUILD_VERSION
", info@clusterfs.com\n");
+ class_init_uuidlist();
+ class_handle_init();
+
sema_init(&obd_conf_sem, 1);
INIT_LIST_HEAD(&obd_types);
if (err)
return err;
+#ifdef __KERNEL__
obd_sysctl_init();
+#endif
#ifdef LPROCFS
proc_lustre_root = proc_mkdir("lustre", proc_root_fs);
return 0;
}
+#ifdef __KERNEL__
static void __exit cleanup_obdclass(void)
+#else
+static void cleanup_obdclass(void)
+#endif
{
int i;
ENTRY;
}
obd_cleanup_caches();
+#ifdef __KERNEL__
obd_sysctl_clean();
-
+#endif
if (proc_lustre_root) {
lprocfs_remove(proc_lustre_root);
proc_lustre_root = NULL;
}
+ class_handle_cleanup();
+ class_exit_uuidlist();
+
CERROR("obd mem max: %d leaked: %d\n", obd_memmax,
atomic_read(&obd_memory));
EXIT;
/* Check that we're building against the appropriate version of the Lustre
* kernel patch */
+#ifdef __KERNEL__
#include <linux/lustre_version.h>
-#define LUSTRE_SOURCE_VERSION 10
+#define LUSTRE_SOURCE_VERSION 13
#if (LUSTRE_KERNEL_VERSION < LUSTRE_SOURCE_VERSION)
# error Cannot continue: Your Lustre kernel patch is older than the sources
-#elif (LUSTRE_KERNEL_VERSION > 11)
+#elif (LUSTRE_KERNEL_VERSION > LUSTRE_SOURCE_VERSION)
# error Cannot continue: Your Lustre sources are older than the kernel patch
#endif
+#else
+#warning "Lib Lustre - no versioning information"
+#endif
+#ifdef __KERNEL__
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
MODULE_DESCRIPTION("Lustre Class Driver Build Version: " BUILD_VERSION);
MODULE_LICENSE("GPL");
module_init(init_obdclass);
module_exit(cleanup_obdclass);
+#endif
#define DEBUG_SUBSYSTEM D_OTHER
#define EXPORT_SYMTAB
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
+
#include <linux/obd_ost.h>
+#include <linux/obd_support.h>
#include <linux/lustre_debug.h>
#include <linux/lustre_net.h>
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#error "FIXME: this needs to be updated to match fsfilt_extN.c"
+//#error "FIXME: this needs to be updated to match fsfilt_extN.c"
#define DEBUG_SUBSYSTEM S_FILTER
#include <linux/init.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
-#include <linux/ext3_xattr.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# include <linux/ext3_xattr.h>
+#else
+# include <asm/statfs.h>
+#endif
#include <linux/kp30.h>
#include <linux/lustre_fsfilt.h>
#include <linux/obd.h>
#include <linux/module.h>
static kmem_cache_t *fcb_cache;
-static int fcb_cache_count;
+static atomic_t fcb_cache_count = ATOMIC_INIT(0);
struct fsfilt_cb_data {
struct journal_callback cb_jcb; /* data private to jbd */
fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
kmem_cache_free(fcb_cache, fcb);
- --fcb_cache_count;
+ atomic_dec(&fcb_cache_count);
}
static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
if (!fcb)
RETURN(-ENOMEM);
- ++fcb_cache_count;
+ atomic_inc(&fcb_cache_count);
fcb->cb_func = cb_func;
fcb->cb_obd = obd;
fcb->cb_last_rcvd = last_rcvd;
GOTO(out, rc = -ENOMEM);
}
- rc = fsfilt_register_ops(&fsfilt_ext3_fs_ops);
+ rc = fsfilt_register_ops(&fsfilt_ext3_ops);
if (rc)
kmem_cache_destroy(fcb_cache);
{
int rc;
- fsfilt_unregister_ops(&fsfilt_ext3_fs_ops);
+ fsfilt_unregister_ops(&fsfilt_ext3_ops);
rc = kmem_cache_destroy(fcb_cache);
- if (rc || fcb_cache_count) {
+ if (rc || atomic_read(&fcb_cache_count)) {
CERROR("can't free fsfilt callback cache: count %d, rc = %d\n",
- fcb_cache_count, rc);
+ atomic_read(&fcb_cache_count), rc);
}
//rc = ext3_xattr_unregister();
#include <linux/module.h>
static kmem_cache_t *fcb_cache;
-static int fcb_cache_count;
+static atomic_t fcb_cache_count = ATOMIC_INIT(0);
struct fsfilt_cb_data {
struct journal_callback cb_jcb; /* data private to jbd */
fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
kmem_cache_free(fcb_cache, fcb);
- --fcb_cache_count;
+ atomic_dec(&fcb_cache_count);
}
static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
if (!fcb)
RETURN(-ENOMEM);
- ++fcb_cache_count;
+ atomic_inc(&fcb_cache_count);
fcb->cb_func = cb_func;
fcb->cb_obd = obd;
fcb->cb_last_rcvd = last_rcvd;
return extN_force_commit(sb);
}
+extern int extN_prep_san_write(struct inode *inode, long *blocks,
+ int nblocks, loff_t newsize);
+static int fsfilt_extN_prep_san_write(struct inode *inode, long *blocks,
+ int nblocks, loff_t newsize)
+{
+ return extN_prep_san_write(inode, blocks, nblocks, newsize);
+}
+
static struct fsfilt_operations fsfilt_extN_ops = {
fs_type: "extN",
fs_owner: THIS_MODULE,
fs_set_last_rcvd: fsfilt_extN_set_last_rcvd,
fs_statfs: fsfilt_extN_statfs,
fs_sync: fsfilt_extN_sync,
+ fs_prep_san_write: fsfilt_extN_prep_san_write,
};
static int __init fsfilt_extN_init(void)
fsfilt_unregister_ops(&fsfilt_extN_ops);
rc = kmem_cache_destroy(fcb_cache);
- if (rc || fcb_cache_count) {
+ if (rc || atomic_read(&fcb_cache_count)) {
CERROR("can't free fsfilt callback cache: count %d, rc = %d\n",
- fcb_cache_count, rc);
+ atomic_read(&fcb_cache_count), rc);
}
//rc = extN_xattr_unregister();
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+#include <linux/init.h>
+#include <asm/statfs.h>
+#endif
#include <linux/kp30.h>
#include <linux/lustre_fsfilt.h>
#include <linux/obd.h>
*/
#define DEBUG_SUBSYSTEM S_CLASS
+#ifdef __KERNEL__
#include <linux/kmod.h> /* for request_module() */
#include <linux/module.h>
#include <linux/obd_class.h>
#include <linux/random.h>
#include <linux/slab.h>
+#else
+#include <liblustre.h>
+#include <linux/obd_class.h>
+#include <linux/obd.h>
+#endif
#include <linux/lprocfs_status.h>
extern struct list_head obd_types;
type->typ_procroot = lprocfs_register(type->typ_name, proc_lustre_root,
vars, type);
- if (IS_ERR(type->typ_procroot)) {
+ if (type->typ_procroot && IS_ERR(type->typ_procroot)) {
rc = PTR_ERR(type->typ_procroot);
type->typ_procroot = NULL;
list_del(&type->typ_chain);
void class_destroy_export(struct obd_export *exp)
{
- ENTRY;
-
LASSERT(exp->exp_cookie != DEAD_HANDLE_MAGIC);
+ CDEBUG(D_IOCTL, "destroying export %p/%s\n", exp,
+ exp->exp_client_uuid.uuid);
+
spin_lock(&exp->exp_obd->obd_dev_lock);
list_del(&exp->exp_obd_chain);
spin_unlock(&exp->exp_obd->obd_dev_lock);
exp->exp_cookie = DEAD_HANDLE_MAGIC;
kmem_cache_free(export_cachep, exp);
-
- EXIT;
}
/* a connection defines an export context in which preallocation can
be managed. */
-int class_connect(struct lustre_handle *conn, struct obd_device *obd,
+int class_connect(struct lustre_handle *exporth, struct obd_device *obd,
struct obd_uuid *cluuid)
{
struct obd_export * export;
- if (conn == NULL) {
+ if (exporth == NULL) {
LBUG();
return -EINVAL;
}
if (!export)
return -ENOMEM;
- conn->addr = (__u64) (unsigned long)export;
- conn->cookie = export->exp_cookie;
+ exporth->addr = (__u64) (unsigned long)export;
+ exporth->cookie = export->exp_cookie;
memcpy(&export->exp_client_uuid, cluuid, sizeof(export->exp_client_uuid));
CDEBUG(D_IOCTL, "connect: addr %Lx cookie %Lx\n",
- (long long)conn->addr, (long long)conn->cookie);
+ (long long)exporth->addr, (long long)exporth->cookie);
return 0;
}
*/
#define EXPORT_SYMTAB
+#define DEBUG_SUBSYSTEM S_CLASS
+#ifdef __KERNEL__
#include <linux/config.h>
#include <linux/module.h>
#include <linux/version.h>
#include <linux/slab.h>
#include <linux/types.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
+
+#else
+#include <liblustre.h>
+#endif
-#define DEBUG_SUBSYSTEM S_CLASS
#include <linux/obd_class.h>
#include <linux/lprocfs_status.h>
{
struct proc_dir_entry *temp = root;
struct proc_dir_entry *rm_entry;
- struct proc_dir_entry *parent = root->parent;
+ struct proc_dir_entry *parent;
+
+ LASSERT(root != NULL);
+ parent = root->parent;
+ LASSERT(parent != NULL);
while (1) {
while (temp->subdir)
newchild = lprocfs_srch(parent, name);
if (newchild) {
CERROR(" Lproc: Attempting to register %s more than once \n",
- name);
- return NULL;
+ name);
+ return ERR_PTR(-EALREADY);
}
newchild = proc_mkdir(name, parent);
int lprocfs_rd_u64(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
+ LASSERT(data != NULL);
*eof = 1;
return snprintf(page, count, LPU64"\n", *(__u64 *)data);
}
{
struct obd_device* dev = (struct obd_device*)data;
+ LASSERT(dev != NULL);
*eof = 1;
return snprintf(page, count, "%s\n", dev->obd_uuid.uuid);
}
{
struct obd_device* dev = (struct obd_device *)data;
+ LASSERT(dev != NULL);
+ LASSERT(dev->obd_name != NULL);
*eof = 1;
return snprintf(page, count, "%s\n", dev->obd_name);
}
int lprocfs_rd_blksize(char* page, char **start, off_t off, int count,
int *eof, struct statfs *sfs)
{
+ LASSERT(sfs != NULL);
*eof = 1;
-
return snprintf(page, count, "%lu\n", sfs->f_bsize);
}
int lprocfs_rd_kbytestotal(char* page, char **start, off_t off, int count,
int *eof, struct statfs *sfs)
{
- __u32 blk_size = sfs->f_bsize >> 10;
- __u64 result = sfs->f_blocks;
+ __u32 blk_size;
+ __u64 result;
+
+ LASSERT(sfs != NULL);
+ blk_size = sfs->f_bsize >> 10;
+ result = sfs->f_blocks;
while (blk_size >>= 1)
result <<= 1;
int lprocfs_rd_kbytesfree(char* page, char **start, off_t off, int count,
int *eof, struct statfs *sfs)
{
- __u32 blk_size = sfs->f_bsize >> 10;
- __u64 result = sfs->f_bfree;
+ __u32 blk_size;
+ __u64 result;
+
+ LASSERT(sfs != NULL);
+ blk_size = sfs->f_bsize >> 10;
+ result = sfs->f_bfree;
while (blk_size >>= 1)
result <<= 1;
int lprocfs_rd_filestotal(char* page, char **start, off_t off, int count,
int *eof, struct statfs *sfs)
{
+ LASSERT(sfs != NULL);
*eof = 1;
return snprintf(page, count, "%ld\n", sfs->f_files);
}
int lprocfs_rd_filesfree(char* page, char **start, off_t off, int count,
int *eof, struct statfs *sfs)
{
+ LASSERT(sfs != NULL);
*eof = 1;
return snprintf(page, count, "%ld\n", sfs->f_ffree);
}
int *eof, void *data)
{
struct obd_device* obd = (struct obd_device*)data;
- struct client_obd* cli = &obd->u.cli;
+ struct client_obd* cli;
+
+ LASSERT(obd != NULL);
+ cli = &obd->u.cli;
+ *eof = 1;
return snprintf(page, count, "%s\n", cli->cl_target_uuid.uuid);
}
int *eof, void *data)
{
struct obd_device *obd = (struct obd_device*)data;
- struct ptlrpc_connection *conn = obd->u.cli.cl_import.imp_connection;
+ struct ptlrpc_connection *conn;
+ LASSERT(obd != NULL);
+ conn = obd->u.cli.cl_import.imp_connection;
+ LASSERT(conn != NULL);
*eof = 1;
return snprintf(page, count, "%s\n", conn->c_remote_uuid.uuid);
}
{
struct obd_type* class = (struct obd_type*) data;
+ LASSERT(class != NULL);
*eof = 1;
return snprintf(page, count, "%d\n", class->typ_refcnt);
}
int lprocfs_obd_attach(struct obd_device *dev, struct lprocfs_vars *list)
{
int rc = 0;
+
+ LASSERT(dev != NULL);
+ LASSERT(dev->obd_type != NULL);
+ LASSERT(dev->obd_type->typ_procroot != NULL);
+
dev->obd_proc_entry = lprocfs_register(dev->obd_name,
dev->obd_type->typ_procroot,
list, dev);
if (IS_ERR(dev->obd_proc_entry)) {
rc = PTR_ERR(dev->obd_proc_entry);
- dev->obd_proc_entry = NULL;
+ dev->obd_proc_entry = NULL;
}
return rc;
}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+#ifdef __KERNEL__
+#include <linux/types.h>
+#include <linux/random.h>
+#else
+#include <liblustre.h>
+#endif
+
+
+#include <linux/kp30.h>
+#include <linux/lustre_handles.h>
+
+static spinlock_t handle_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t random_lock = SPIN_LOCK_UNLOCKED;
+static struct list_head *handle_hash = NULL;
+static int handle_count = 0;
+
+#define HANDLE_HASH_SIZE (1 << 14)
+#define HANDLE_HASH_MASK (HANDLE_HASH_SIZE - 1)
+
+void class_handle_hash(struct portals_handle *h, portals_handle_addref_cb cb)
+{
+ struct list_head *bucket;
+ ENTRY;
+
+ LASSERT(h != NULL);
+ LASSERT(list_empty(&h->h_link));
+
+ /* My hypothesis is that get_random_bytes, if called from two threads at
+ * the same time, will return the same bytes. -phil */
+ spin_lock(&random_lock);
+ get_random_bytes(&h->h_cookie, sizeof(h->h_cookie));
+ spin_unlock(&random_lock);
+
+ h->h_addref = cb;
+
+ bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK);
+
+ CDEBUG(D_INFO, "adding object %p with handle "LPX64" to hash\n",
+ h, h->h_cookie);
+
+ spin_lock(&handle_lock);
+ list_add(&h->h_link, bucket);
+ handle_count++;
+ spin_unlock(&handle_lock);
+ EXIT;
+}
+
+static void class_handle_unhash_nolock(struct portals_handle *h)
+{
+ LASSERT(!list_empty(&h->h_link));
+
+ CDEBUG(D_INFO, "removing object %p with handle "LPX64" from hash\n",
+ h, h->h_cookie);
+
+ handle_count--;
+ list_del_init(&h->h_link);
+}
+
+void class_handle_unhash(struct portals_handle *h)
+{
+ spin_lock(&handle_lock);
+ class_handle_unhash_nolock(h);
+ spin_unlock(&handle_lock);
+}
+
+void *class_handle2object(__u64 cookie)
+{
+ struct list_head *bucket, *tmp;
+ void *retval = NULL;
+ ENTRY;
+
+ LASSERT(handle_hash != NULL);
+
+ spin_lock(&handle_lock);
+ bucket = handle_hash + (cookie & HANDLE_HASH_MASK);
+
+ list_for_each(tmp, bucket) {
+ struct portals_handle *h;
+ h = list_entry(tmp, struct portals_handle, h_link);
+
+ if (h->h_cookie == cookie) {
+ h->h_addref(h);
+ retval = h;
+ break;
+ }
+ }
+ spin_unlock(&handle_lock);
+
+ RETURN(retval);
+}
+
+int class_handle_init(void)
+{
+ struct list_head *bucket;
+
+ LASSERT(handle_hash == NULL);
+
+ PORTAL_ALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+ if (handle_hash == NULL)
+ return -ENOMEM;
+
+ for (bucket = handle_hash + HANDLE_HASH_SIZE - 1; bucket >= handle_hash;
+ bucket--)
+ INIT_LIST_HEAD(bucket);
+
+ return 0;
+}
+
+static void cleanup_all_handles(void)
+{
+ int i;
+
+ spin_lock(&handle_lock);
+ for (i = 0; i < HANDLE_HASH_SIZE; i++) {
+ struct list_head *tmp, *pos;
+ list_for_each_safe(tmp, pos, &(handle_hash[i])) {
+ struct portals_handle *h;
+ h = list_entry(tmp, struct portals_handle, h_link);
+
+ CERROR("forcing cleanup for handle "LPX64"\n",
+ h->h_cookie);
+
+ class_handle_unhash_nolock(h);
+ }
+ }
+ spin_lock(&handle_lock);
+}
+
+void class_handle_cleanup(void)
+{
+ LASSERT(handle_hash != NULL);
+
+ if (handle_count != 0) {
+ CERROR("handle_count at cleanup: %d\n", handle_count);
+ cleanup_all_handles();
+ }
+
+ PORTAL_FREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+ handle_hash = NULL;
+
+ if (handle_count)
+ CERROR("leaked %d handles\n", handle_count);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#ifdef __KERNEL__
+# include <linux/module.h>
+# include <linux/init.h>
+# include <linux/list.h>
+#else
+# include <liblustre.h>
+#endif
+#include <linux/obd.h>
+#include <linux/obd_support.h>
+#include <linux/obd_class.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_ha.h>
+#include <linux/lustre_net.h>
+#include <linux/lprocfs_status.h>
+
+struct uuid_nid_data {
+ struct list_head head;
+ ptl_nid_t nid;
+ char *uuid;
+ __u32 nal;
+ ptl_handle_ni_t ni;
+};
+
+/* FIXME: This should probably become more elegant than a global linked list */
+static struct list_head g_uuid_list;
+static spinlock_t g_uuid_lock;
+
+void class_init_uuidlist(void)
+{
+ INIT_LIST_HEAD(&g_uuid_list);
+ spin_lock_init(&g_uuid_lock);
+}
+
+void class_exit_uuidlist(void)
+{
+ struct list_head *tmp, *n;
+
+ /* Module going => sole user => don't need to lock g_uuid_list */
+ list_for_each_safe(tmp, n, &g_uuid_list) {
+ struct uuid_nid_data *data =
+ list_entry(tmp, struct uuid_nid_data, head);
+
+ PORTAL_FREE(data->uuid, strlen(data->uuid) + 1);
+ PORTAL_FREE(data, sizeof(*data));
+ }
+}
+
+int lustre_uuid_to_peer(char *uuid, struct lustre_peer *peer)
+{
+ struct list_head *tmp;
+
+ spin_lock (&g_uuid_lock);
+
+ list_for_each(tmp, &g_uuid_list) {
+ struct uuid_nid_data *data =
+ list_entry(tmp, struct uuid_nid_data, head);
+
+ if (strcmp(data->uuid, uuid) == 0) {
+ peer->peer_nid = data->nid;
+ peer->peer_ni = data->ni;
+
+ spin_unlock (&g_uuid_lock);
+ return 0;
+ }
+ }
+
+ spin_unlock (&g_uuid_lock);
+ return -1;
+}
+
+int class_add_uuid(char *uuid, __u64 nid, __u32 nal)
+{
+ const ptl_handle_ni_t *nip;
+ struct uuid_nid_data *data;
+ int rc;
+ int nob = strnlen (uuid, PAGE_SIZE) + 1;
+
+ if (nob > PAGE_SIZE)
+ return -EINVAL;
+
+ nip = kportal_get_ni (nal);
+ if (nip == NULL) {
+ CERROR("get_ni failed: is the NAL module loaded?\n");
+ return -EIO;
+ }
+
+ rc = -ENOMEM;
+ PORTAL_ALLOC(data, sizeof(*data));
+ if (data == NULL)
+ goto fail_0;
+
+ PORTAL_ALLOC(data->uuid, nob);
+ if (data == NULL)
+ goto fail_1;
+
+ memcpy(data->uuid, uuid, nob);
+ data->nid = nid;
+ data->nal = nal;
+ data->ni = *nip;
+
+ spin_lock (&g_uuid_lock);
+
+ list_add(&data->head, &g_uuid_list);
+
+ spin_unlock (&g_uuid_lock);
+
+ return 0;
+
+ fail_1:
+ PORTAL_FREE (data, sizeof (*data));
+ fail_0:
+ kportal_put_ni (nal);
+ return (rc);
+}
+
+/* delete only one entry if uuid is specified, otherwise delete all */
+int class_del_uuid (char *uuid)
+{
+ struct list_head deathrow;
+ struct list_head *tmp;
+ struct list_head *n;
+ struct uuid_nid_data *data;
+
+ INIT_LIST_HEAD (&deathrow);
+
+ spin_lock (&g_uuid_lock);
+
+ list_for_each_safe(tmp, n, &g_uuid_list) {
+ data = list_entry(tmp, struct uuid_nid_data, head);
+
+ if (uuid == NULL || strcmp(data->uuid, uuid) == 0) {
+ list_del (&data->head);
+ list_add (&data->head, &deathrow);
+ if (uuid)
+ break;
+ }
+ }
+
+ spin_unlock (&g_uuid_lock);
+
+ if (list_empty (&deathrow))
+ return -EINVAL;
+
+ do {
+ data = list_entry(deathrow.next, struct uuid_nid_data, head);
+
+ list_del (&data->head);
+
+ kportal_put_ni (data->nal);
+ PORTAL_FREE(data->uuid, strlen(data->uuid) + 1);
+ PORTAL_FREE(data, sizeof(*data));
+ } while (!list_empty (&deathrow));
+
+ return 0;
+}
#define DEBUG_SUBSYSTEM S_CLASS
#define EXPORT_SYMTAB
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
+
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
+
+#include <linux/lustre_export.h>
#include <linux/lustre_net.h>
#include <linux/obd_support.h>
#include <linux/obd_class.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
#include <linux/swapctl.h>
+#endif
#include <linux/proc_fs.h>
#include <linux/slab.h>
#include <linux/stat.h>
* Library General Public License.
* %End-Header%
*/
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#ifdef __KERNEL__
#include <linux/ctype.h>
#include <linux/kernel.h>
-
-#define DEBUG_SUBSYSTEM S_CLASS
+#else
+#include <liblustre.h>
+#endif
#include <linux/obd_support.h>
#include <linux/obd_class.h>
# See the file COPYING in this distribution
DEFS=
+
+if LIBLUSTRE
+lib_LIBRARIES = libobdecho.a
+libobdecho_a_SOURCES = echo_client.c
+else
MODULE = obdecho
modulefs_DATA = obdecho.o
EXTRA_PROGRAMS = obdecho
-
LINX=
obdecho_SOURCES = echo.c echo_client.c lproc_echo.c $(LINX)
+endif
include $(top_srcdir)/Rules
__u64 st_create_reqs;
__u64 st_destroy_reqs;
__u64 st_statfs_reqs;
+ __u64 st_sync_reqs;
__u64 st_open_reqs;
__u64 st_close_reqs;
__u64 st_punch_reqs;
stat += xprocfs_iostats[i].field; \
return (stat); \
}
-
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
DECLARE_XPROCFS_SUM_STAT (st_read_bytes)
DECLARE_XPROCFS_SUM_STAT (st_read_reqs)
DECLARE_XPROCFS_SUM_STAT (st_write_bytes)
DECLARE_XPROCFS_SUM_STAT (st_create_reqs)
DECLARE_XPROCFS_SUM_STAT (st_destroy_reqs)
DECLARE_XPROCFS_SUM_STAT (st_statfs_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_sync_reqs)
DECLARE_XPROCFS_SUM_STAT (st_open_reqs)
DECLARE_XPROCFS_SUM_STAT (st_close_reqs)
DECLARE_XPROCFS_SUM_STAT (st_punch_reqs)
+#endif
static int
xprocfs_rd_stat (char *page, char **start, off_t off, int count,
{
long long (*fn)(void) = (long long(*)(void))data;
int len;
-
+
*eof = 1;
if (off != 0)
return (0);
*start = page;
return (len);
}
-
+
static void
xprocfs_add_stat(char *name, long long (*fn)(void))
xprocfs_init (char *name)
{
char dirname[64];
-
+
snprintf (dirname, sizeof (dirname), "sys/%s", name);
xprocfs_dir = proc_mkdir (dirname, NULL);
return;
}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
xprocfs_add_stat ("read_bytes", xprocfs_sum_st_read_bytes);
xprocfs_add_stat ("read_reqs", xprocfs_sum_st_read_reqs);
xprocfs_add_stat ("write_bytes", xprocfs_sum_st_write_bytes);
xprocfs_add_stat ("create_reqs", xprocfs_sum_st_create_reqs);
xprocfs_add_stat ("destroy_reqs", xprocfs_sum_st_destroy_reqs);
xprocfs_add_stat ("statfs_reqs", xprocfs_sum_st_statfs_reqs);
+ xprocfs_add_stat ("sync_reqs", xprocfs_sum_st_sync_reqs);
xprocfs_add_stat ("open_reqs", xprocfs_sum_st_open_reqs);
xprocfs_add_stat ("close_reqs", xprocfs_sum_st_close_reqs);
xprocfs_add_stat ("punch_reqs", xprocfs_sum_st_punch_reqs);
+#endif
}
void xprocfs_fini (void)
remove_proc_entry ("create_reqs", xprocfs_dir);
remove_proc_entry ("destroy_reqs", xprocfs_dir);
remove_proc_entry ("statfs_reqs", xprocfs_dir);
+ remove_proc_entry ("sync_reqs", xprocfs_dir);
remove_proc_entry ("open_reqs", xprocfs_dir);
remove_proc_entry ("close_reqs", xprocfs_dir);
remove_proc_entry ("punch_reqs", xprocfs_dir);
static int echo_disconnect(struct lustre_handle *conn)
{
struct obd_export *exp = class_conn2export(conn);
-
+
LASSERT (exp != NULL);
-
+
ldlm_cancel_locks_for_export (exp);
return (class_disconnect (conn));
}
fh->addr = oa->o_id;
fh->cookie = ECHO_HANDLE_MAGIC;
-
+
oa->o_valid |= OBD_MD_FLHANDLE;
return 0;
}
CERROR ("invalid file handle on close: "LPX64"\n", fh->cookie);
return (-EINVAL);
}
-
+
return 0;
}
obd_id id = oa->o_id;
XPROCFS_BUMP_MYCPU_IOSTAT (st_getattr_reqs, 1);
-
+
if (!obd) {
CERROR("invalid client "LPX64"\n", conn->addr);
RETURN(-EINVAL);
struct obd_device *obd = class_conn2obd(conn);
XPROCFS_BUMP_MYCPU_IOSTAT (st_setattr_reqs, 1);
-
+
if (!obd) {
CERROR("invalid client "LPX64"\n", conn->addr);
RETURN(-EINVAL);
int echo_preprw(int cmd, struct lustre_handle *conn, int objcount,
struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb,
- struct niobuf_local *res, void **desc_private, struct obd_trans_info *oti)
+ struct niobuf_local *res, void **desc_private,
+ struct obd_trans_info *oti)
{
struct obd_device *obd;
struct niobuf_local *r = res;
if (isobj0 &&
(nb->offset >> PAGE_SHIFT) < ECHO_OBJECT0_NPAGES) {
- r->page = echo_object0_pages[nb->offset >> PAGE_SHIFT];
+ r->page = echo_object0_pages[nb->offset >>
+ PAGE_SHIFT];
/* Take extra ref so __free_pages() can be called OK */
get_page (r->page);
} else {
r->page = alloc_pages(gfp_mask, 0);
if (r->page == NULL) {
- CERROR("can't get page %d/%d for id "LPU64"\n",
+ CERROR("can't get page %u/%u for id "
+ LPU64"\n",
j, obj->ioo_bufcnt, obj->ioo_id);
GOTO(preprw_cleanup, rc = -ENOMEM);
}
r->page, r->addr, r->offset);
if (cmd == OBD_BRW_READ) {
- XPROCFS_BUMP_MYCPU_IOSTAT (st_read_bytes, r->len);
+ XPROCFS_BUMP_MYCPU_IOSTAT(st_read_bytes,r->len);
if (verify)
- page_debug_setup(r->addr, r->len, r->offset,
- obj->ioo_id);
+ page_debug_setup(r->addr, r->len,
+ r->offset,obj->ioo_id);
} else {
- XPROCFS_BUMP_MYCPU_IOSTAT (st_write_bytes, r->len);
+ XPROCFS_BUMP_MYCPU_IOSTAT(st_write_bytes,
+ r->len);
if (verify)
page_debug_setup(r->addr, r->len,
0xecc0ecc0ecc0ecc0,
if (vrc != 0 && rc == 0)
rc = vrc;
}
-
+
kunmap(page);
/* NB see comment above regarding object0 pages */
obd_kmap_put(1);
extern void echo_client_cleanup(void);
static void
-echo_object0_pages_fini (void)
+echo_object0_pages_fini (void)
{
int i;
-
- for (i = 0; i < ECHO_OBJECT0_NPAGES; i++)
+
+ for (i = 0; i < ECHO_OBJECT0_NPAGES; i++)
if (echo_object0_pages[i] != NULL) {
__free_pages (echo_object0_pages[i], 0);
echo_object0_pages[i] = NULL;
{
struct page *pg;
int i;
-
+
for (i = 0; i < ECHO_OBJECT0_NPAGES; i++) {
- int gfp_mask = (i < ECHO_OBJECT0_NPAGES/2) ? GFP_KERNEL : GFP_HIGHUSER;
-
+ int gfp_mask = (i < ECHO_OBJECT0_NPAGES/2) ?
+ GFP_KERNEL : GFP_HIGHUSER;
+
pg = alloc_pages (gfp_mask, 0);
if (pg == NULL) {
echo_object0_pages_fini ();
return (-ENOMEM);
}
-
+
memset (kmap (pg), 0, PAGE_SIZE);
kunmap (pg);
echo_object0_pages[i] = pg;
}
-
+
return (0);
}
rc = echo_object0_pages_init ();
if (rc != 0)
goto failed_0;
-
+
rc = class_register_type(&echo_obd_ops, lvars.module_vars,
OBD_ECHO_DEVICENAME);
if (rc != 0)
echo_object0_pages_fini ();
failed_0:
xprocfs_fini ();
-
+
RETURN(rc);
}
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#define DEBUG_SUBSYSTEM S_ECHO
+#ifdef __KERNEL__
#include <linux/version.h>
#include <linux/module.h>
#include <linux/fs.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
#include <linux/iobuf.h>
+#endif
#include <asm/div64.h>
+#else
+#include <liblustre.h>
+#endif
-#define DEBUG_SUBSYSTEM S_ECHO
-
+#include <linux/obd.h>
#include <linux/obd_support.h>
#include <linux/obd_class.h>
#include <linux/obd_echo.h>
}
static struct ec_object *
-echo_allocate_object (struct obd_device *obd)
+echo_allocate_object (struct obd_device *obd)
{
struct echo_client_obd *ec = &obd->u.echo_client;
struct ec_object *eco;
-
+
OBD_ALLOC (eco, sizeof (*eco));
if (eco == NULL)
return (NULL);
OBD_FREE (eco, sizeof (*eco));
return (NULL);
}
-
+
eco->eco_device = obd;
eco->eco_deleted = 0;
eco->eco_refcount = 0;
}
static void
-echo_free_object (struct ec_object *eco)
+echo_free_object (struct ec_object *eco)
{
struct obd_device *obd = eco->eco_device;
struct echo_client_obd *ec = &obd->u.echo_client;
struct lov_stripe_md *lsm;
int rc;
int i;
-
+
if ((oa->o_valid & OBD_MD_FLID) == 0 && /* no obj id */
(on_target || /* set_stripe */
ec->ec_nstripes != 0)) { /* LOV */
eco = echo_allocate_object (obd);
if (eco == NULL)
return (-ENOMEM);
-
+
lsm = eco->eco_lsm;
if (ulsm != NULL) {
if (rc != 0)
goto failed;
}
-
+
/* setup object ID here for !on_target and LOV hint */
if ((oa->o_valid & OBD_MD_FLID) != 0)
eco->eco_id = lsm->lsm_object_id = oa->o_id;
-
+
/* defaults -> actual values */
if (lsm->lsm_stripe_offset == 0xffffffff)
lsm->lsm_stripe_offset = 0;
lsm->lsm_oinfo[i].loi_ost_idx =
(lsm->lsm_stripe_offset + i) % ec->ec_nstripes;
}
-
+
if (on_target) {
rc = obd_create (&ec->ec_conn, oa, &lsm, NULL);
if (rc != 0)
goto failed;
-
+
/* See what object ID we were given */
LASSERT ((oa->o_valid & OBD_MD_FLID) != 0);
eco->eco_id = lsm->lsm_object_id = oa->o_id;
}
-
+
spin_lock (&ec->ec_lock);
eco2 = echo_find_object_locked (obd, oa->o_id);
if (eco2 != NULL) { /* conflict */
spin_unlock (&ec->ec_lock);
-
- CERROR ("Can't create object id "LPX64": id already exists%s\n",
+
+ CERROR ("Can't create object id "LPX64": id already exists%s\n",
oa->o_id, on_target ? " (undoing create)" : "");
-
+
if (on_target)
obd_destroy (&ec->ec_conn, oa, lsm, NULL);
-
+
rc = -EEXIST;
goto failed;
}
-
+
list_add (&eco->eco_obj_chain, &ec->ec_objects);
spin_unlock (&ec->ec_lock);
CDEBUG (D_INFO,
}
static int
-echo_get_object (struct ec_object **ecop, struct obd_device *obd, struct obdo *oa)
+echo_get_object (struct ec_object **ecop, struct obd_device *obd,
+ struct obdo *oa)
{
struct echo_client_obd *ec = &obd->u.echo_client;
struct ec_object *eco;
struct ec_object *eco2;
int rc;
- if ((oa->o_valid & OBD_MD_FLID) == 0)
+ if ((oa->o_valid & OBD_MD_FLID) == 0)
{
CERROR ("No valid oid\n");
return (-EINVAL);
}
-
+
spin_lock (&ec->ec_lock);
eco = echo_find_object_locked (obd, oa->o_id);
if (eco != NULL) {
if (eco->eco_deleted) /* being deleted */
return (-EAGAIN); /* (see comment in cleanup) */
-
+
eco->eco_refcount++;
spin_unlock (&ec->ec_lock);
*ecop = eco;
}
spin_unlock (&ec->ec_lock);
-
+
echo_free_object (eco);
return (rc);
}
* sure there will be no more lock callbacks.
*/
obd_cancel_unused (&ec->ec_conn, eco->eco_lsm, 0);
-
+
/* now we can let it go */
spin_lock (&ec->ec_lock);
list_del (&eco->eco_obj_chain);
spin_unlock (&ec->ec_lock);
-
+
LASSERT (eco->eco_refcount == 0);
echo_free_object (eco);
stripe_off, stripe_id);
if (vrc != 0 && rc == 0)
rc = vrc;
-
+
kunmap(pgp->pg);
}
__free_pages(pgp->pg, 0);
return (rc);
}
-static int
-echo_client_ubrw (struct obd_device *obd, int rw,
- struct obdo *oa, struct lov_stripe_md *lsm,
- obd_off offset, obd_size count, char *buffer)
+#ifdef __KERNEL__
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+static int echo_client_ubrw(struct obd_device *obd, int rw,
+ struct obdo *oa, struct lov_stripe_md *lsm,
+ obd_off offset, obd_size count, char *buffer)
{
struct echo_client_obd *ec = &obd->u.echo_client;
struct obd_brw_set *set;
obd_brw_set_free(set);
return (rc);
}
+#else
+static int echo_client_ubrw(struct obd_device *obd, int rw,
+ struct obdo *oa, struct lov_stripe_md *lsm,
+ obd_off offset, obd_size count, char *buffer)
+{
+ LBUG();
+ return 0;
+}
+#endif
+#endif
static int
echo_open (struct obd_export *exp, struct obdo *oa)
struct ec_open_object *ecoo;
struct ec_object *eco;
int rc;
-
+
rc = echo_get_object (&eco, obd, oa);
if (rc != 0)
return (rc);
-
+
rc = -ENOMEM;
OBD_ALLOC (ecoo, sizeof (*ecoo));
if (ecoo == NULL)
rc = obd_open (&ec->ec_conn, oa, eco->eco_lsm, NULL);
if (rc != 0)
goto failed_1;
-
+
memcpy (&ecoo->ecoo_oa, oa, sizeof (*oa));
ecoo->ecoo_object = eco;
/* ecoo takes ref from echo_get_object() above */
spin_lock (&ec->ec_lock);
- list_add (&ecoo->ecoo_exp_chain,
- &exp->exp_ec_data.eced_open_head);
-
+ list_add (&ecoo->ecoo_exp_chain, &exp->exp_ec_data.eced_open_head);
+
ufh->addr = (__u64)((long) ecoo);
ufh->cookie = ecoo->ecoo_cookie = ec->ec_unique++;
-
+
spin_unlock (&ec->ec_lock);
return (0);
-
+
failed_1:
OBD_FREE (ecoo, sizeof (*ecoo));
failed_0:
int found = 0;
struct list_head *el;
int rc;
-
+
if ((oa->o_valid & OBD_MD_FLHANDLE) == 0)
return (-EINVAL);
-
+
spin_lock (&ec->ec_lock);
list_for_each (el, &exp->exp_ec_data.eced_open_head) {
}
spin_unlock (&ec->ec_lock);
-
+
if (!found)
return (-EINVAL);
rc = obd_close (&ec->ec_conn, &ecoo->ecoo_oa,
ecoo->ecoo_object->eco_lsm, NULL);
-
+
echo_put_object (ecoo->ecoo_object);
OBD_FREE (ecoo, sizeof (*ecoo));
/* #ifdef this out if we're not feeling paranoid */
spin_lock (&ec->ec_lock);
list_for_each (el, &ec->ec_objects) {
- found = (eco == list_entry (el, struct ec_object, eco_obj_chain));
+ found = (eco == list_entry(el, struct ec_object,
+ eco_obj_chain));
if (found)
break;
}
spin_unlock (&ec->ec_lock);
LASSERT (found);
-
+
switch (flag) {
case LDLM_CB_BLOCKING:
- CDEBUG (D_INFO, "blocking callback on "LPX64", handle "LPX64"."LPX64"\n",
- eco->eco_id, lockh.addr, lockh.cookie);
+ CDEBUG (D_INFO, "blocking callback on "LPX64", handle "LPX64"."
+ LPX64"\n", eco->eco_id, lockh.addr, lockh.cookie);
rc = ldlm_cli_cancel (&lockh);
if (rc != ELDLM_OK)
CERROR ("ldlm_cli_cancel failed: %d\n", rc);
break;
case LDLM_CB_CANCELING:
- CDEBUG (D_INFO, "canceling callback on "LPX64", handle "LPX64"."LPX64"\n",
- eco->eco_id, lockh.addr, lockh.cookie);
+ CDEBUG (D_INFO, "canceling callback on "LPX64", handle "LPX64"."
+ LPX64"\n", eco->eco_id, lockh.addr, lockh.cookie);
break;
default:
ecl->ecl_object = eco;
ecl->ecl_extent.start = offset;
ecl->ecl_extent.end = (nob == 0) ? ((obd_off)-1) : (offset + nob - 1);
-
+
flags = 0;
- rc = obd_enqueue (&ec->ec_conn, eco->eco_lsm, NULL,
- LDLM_EXTENT, &ecl->ecl_extent, sizeof (ecl->ecl_extent),
- mode, &flags, echo_ldlm_callback, eco, sizeof (*eco),
+ rc = obd_enqueue (&ec->ec_conn, eco->eco_lsm, NULL, LDLM_EXTENT,
+ &ecl->ecl_extent,sizeof(ecl->ecl_extent), mode,
+ &flags, echo_ldlm_callback, eco, sizeof (*eco),
&ecl->ecl_handle);
if (rc != 0)
goto failed_1;
ulh->addr = (__u64)((long)ecl);
ulh->cookie = ecl->ecl_cookie = ec->ec_unique++;
-
+
spin_unlock (&ec->ec_lock);
oa->o_valid |= OBD_MD_FLHANDLE;
if ((oa->o_valid & OBD_MD_FLHANDLE) == 0)
return (-EINVAL);
-
+
spin_lock (&ec->ec_lock);
-
+
list_for_each (el, &exp->exp_ec_data.eced_locks) {
ecl = list_entry (el, struct ec_lock, ecl_exp_chain);
-
+
if ((__u64)((long)ecl) == ulh->addr) {
found = (ecl->ecl_cookie == ulh->cookie);
if (found)
break;
}
}
-
+
spin_unlock (&ec->ec_lock);
-
+
if (!found)
return (-ENOENT);
-
- rc = obd_cancel (&ec->ec_conn,
+
+ rc = obd_cancel (&ec->ec_conn,
ecl->ecl_object->eco_lsm,
ecl->ecl_mode,
&ecl->ecl_handle);
-
+
echo_put_object (ecl->ecl_object);
OBD_FREE (ecl, sizeof (*ecl));
-
+
return (rc);
}
case OBD_IOC_CREATE: /* may create echo object */
if (!capable (CAP_SYS_ADMIN))
GOTO (out, rc = -EPERM);
-
+
rc = echo_create_object (obd, 1, &data->ioc_obdo1,
data->ioc_pbuf1, data->ioc_plen1);
GOTO(out, rc);
case OBD_IOC_DESTROY:
if (!capable (CAP_SYS_ADMIN))
GOTO (out, rc = -EPERM);
-
+
rc = echo_get_object (&eco, obd, &data->ioc_obdo1);
if (rc == 0) {
rc = obd_destroy(&ec->ec_conn, &data->ioc_obdo1,
case OBD_IOC_SETATTR:
if (!capable (CAP_SYS_ADMIN))
GOTO (out, rc = -EPERM);
-
+
rc = echo_get_object (&eco, obd, &data->ioc_obdo1);
if (rc == 0) {
rc = obd_setattr(&ec->ec_conn, &data->ioc_obdo1,
case OBD_IOC_BRW_WRITE:
if (!capable (CAP_SYS_ADMIN))
GOTO (out, rc = -EPERM);
-
+
rw = OBD_BRW_WRITE;
/* fall through */
case OBD_IOC_BRW_READ:
data->ioc_offset,
data->ioc_count);
else
+#ifdef __KERNEL__
rc = echo_client_ubrw(obd, rw, &data->ioc_obdo1,
eco->eco_lsm,
data->ioc_offset,
data->ioc_count,
data->ioc_pbuf2);
+#endif
echo_put_object(eco);
}
GOTO(out, rc);
}
} else {
rc = echo_create_object(obd, 0, &data->ioc_obdo1,
- data->ioc_pbuf1, data->ioc_plen1);
+ data->ioc_pbuf1,
+ data->ioc_plen1);
}
GOTO (out, rc);
case ECHO_IOC_ENQUEUE:
if (!capable (CAP_SYS_ADMIN))
GOTO (out, rc = -EPERM);
-
- rc = echo_enqueue (exp, &data->ioc_obdo1,
+
+ rc = echo_enqueue (exp, &data->ioc_obdo1,
data->ioc_conn1, /* lock mode */
- data->ioc_offset, data->ioc_count); /* extent */
+ data->ioc_offset, data->ioc_count);/*extent*/
GOTO (out, rc);
case ECHO_IOC_CANCEL:
ecl = list_entry (exp->exp_ec_data.eced_locks.next,
struct ec_lock, ecl_exp_chain);
list_del (&ecl->ecl_exp_chain);
-
+
rc = obd_cancel (&ec->ec_conn, ecl->ecl_object->eco_lsm,
ecl->ecl_mode, &ecl->ecl_handle);
CERROR ("Cancel lock on object "LPX64" on disconnect (%d)\n",
ecl->ecl_object->eco_id, rc);
-
+
echo_put_object (ecl->ecl_object);
OBD_FREE (ecl, sizeof (*ecl));
}
void *data)
{
struct obd_device* dev = (struct obd_device*)data;
- int rc = snprintf(page, count, "%s\n", dev->u.echo.eo_fstype);
+
+ LASSERT(dev != NULL);
*eof = 1;
- return rc;
+ return snprintf(page, count, "%s\n", dev->u.echo.eo_fstype);
}
struct lprocfs_vars lprocfs_obd_vars[] = {
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-DEFS=
+DEFS = $(ENABLE_OST_RECOVERY)
MODULE = obdfilter
modulefs_DATA = obdfilter.o
EXTRA_PROGRAMS = obdfilter
#include <linux/random.h>
#include <linux/lustre_fsfilt.h>
#include <linux/lprocfs_status.h>
-
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+#include <linux/mount.h>
+#endif
static kmem_cache_t *filter_open_cache;
static kmem_cache_t *filter_dentry_cache;
__u64 st_create_reqs;
__u64 st_destroy_reqs;
__u64 st_statfs_reqs;
+ __u64 st_syncfs_reqs;
__u64 st_open_reqs;
__u64 st_close_reqs;
__u64 st_punch_reqs;
xprocfs_iostats[smp_processor_id()].field += (count); \
} while (0)
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
#define DECLARE_XPROCFS_SUM_STAT(field) \
static long long \
xprocfs_sum_##field (void) \
DECLARE_XPROCFS_SUM_STAT (st_create_reqs)
DECLARE_XPROCFS_SUM_STAT (st_destroy_reqs)
DECLARE_XPROCFS_SUM_STAT (st_statfs_reqs)
+DECLARE_XPROCFS_SUM_STAT (st_syncfs_reqs)
DECLARE_XPROCFS_SUM_STAT (st_open_reqs)
DECLARE_XPROCFS_SUM_STAT (st_close_reqs)
DECLARE_XPROCFS_SUM_STAT (st_punch_reqs)
+#endif
static int
xprocfs_rd_stat (char *page, char **start, off_t off, int count,
return;
}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
xprocfs_add_stat ("read_bytes", xprocfs_sum_st_read_bytes);
xprocfs_add_stat ("read_reqs", xprocfs_sum_st_read_reqs);
xprocfs_add_stat ("write_bytes", xprocfs_sum_st_write_bytes);
xprocfs_add_stat ("create_reqs", xprocfs_sum_st_create_reqs);
xprocfs_add_stat ("destroy_reqs", xprocfs_sum_st_destroy_reqs);
xprocfs_add_stat ("statfs_reqs", xprocfs_sum_st_statfs_reqs);
+ xprocfs_add_stat ("syncfs_reqs", xprocfs_sum_st_syncfs_reqs);
xprocfs_add_stat ("open_reqs", xprocfs_sum_st_open_reqs);
xprocfs_add_stat ("close_reqs", xprocfs_sum_st_close_reqs);
xprocfs_add_stat ("punch_reqs", xprocfs_sum_st_punch_reqs);
+#endif
}
void xprocfs_fini (void)
remove_proc_entry ("create_reqs", xprocfs_dir);
remove_proc_entry ("destroy_reqs", xprocfs_dir);
remove_proc_entry ("statfs_reqs", xprocfs_dir);
+ remove_proc_entry ("syncfs_reqs", xprocfs_dir);
remove_proc_entry ("open_reqs", xprocfs_dir);
remove_proc_entry ("close_reqs", xprocfs_dir);
remove_proc_entry ("punch_reqs", xprocfs_dir);
void filter_start_transno(struct obd_export *export)
{
+#ifdef FILTER_TRANSNO_SEM
struct obd_device * obd = export->exp_obd;
ENTRY;
down(&obd->u.filter.fo_transno_sem);
+#endif
}
/* Assumes caller has already pushed us into the kernel context. */
ssize_t written;
/* Propagate error code. */
- if (rc)
- GOTO(out, rc);
+ if (rc) {
+#ifdef FILTER_TRANSNO_SEM
+ up(&filter->fo_transno_sem);
+#endif
+ RETURN(rc);
+ }
+
+ if (!(obd->obd_flags & OBD_REPLAYABLE)) {
+ RETURN(0);
+ }
/* we don't allocate new transnos for replayed requests */
#if 0
GOTO(out, rc = 0);
#endif
- off = FILTER_LR_CLIENT_START + fed->fed_lr_off * FILTER_LR_CLIENT_SIZE;
+ off = fed->fed_lr_off;
- last_rcvd = ++filter->fo_fsd->fsd_last_rcvd;
+#ifndef FILTER_TRANSNO_SEM
+ spin_lock(&filter->fo_translock);
+#endif
+ last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_rcvd);
+ filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd + 1);
+#ifndef FILTER_TRANSNO_SEM
+ spin_unlock(&filter->fo_translock);
+#endif
if (oti)
oti->oti_transno = last_rcvd;
fcd->fcd_last_rcvd = cpu_to_le64(last_rcvd);
- fcd->fcd_mount_count = cpu_to_le64(filter->fo_fsd->fsd_mount_count);
+ fcd->fcd_mount_count = filter->fo_fsd->fsd_mount_count;
/* get this from oti */
#if 0
written = lustre_fwrite(filter->fo_rcvd_filp, (char *)fcd, sizeof(*fcd),
&off);
CDEBUG(D_INODE, "wrote trans #"LPD64" for client %s at #%d: written = "
- LPSZ"\n", last_rcvd, fcd->fcd_uuid, fed->fed_lr_off, written);
+ LPSZ"\n", last_rcvd, fcd->fcd_uuid, fed->fed_lr_idx, written);
+#ifdef FILTER_TRANSNO_SEM
+ up(&filter->fo_transno_sem);
+#endif
if (written == sizeof(*fcd))
- GOTO(out, rc = 0);
- CERROR("error writing to last_rcvd file: rc = %d\n", rc);
+ RETURN(0);
+ CERROR("error writing to last_rcvd file: rc = %d\n", written);
if (written >= 0)
- GOTO(out, rc = -EIO);
-
- rc = 0;
-
- EXIT;
- out:
+ RETURN(-EIO);
- up(&filter->fo_transno_sem);
- return rc;
+ RETURN(written);
}
/* write the pathname into the string */
-static int filter_id(char *buf, obd_id id, obd_mode mode)
+static char *filter_id(char *buf, struct filter_obd *filter, obd_id id,
+ obd_mode mode)
{
- return sprintf(buf, "O/%s/"LPU64, obd_mode_to_type(mode), id);
+ if ((mode & S_IFMT) != S_IFREG || filter->fo_subdir_count == 0)
+ sprintf(buf, "O/%s/"LPU64, obd_mode_to_type(mode), id);
+ else
+ sprintf(buf, "O/%s/d%d/"LPU64, obd_mode_to_type(mode),
+ (int)id & (filter->fo_subdir_count - 1), id);
+
+ return buf;
}
static inline void f_dput(struct dentry *dentry)
#define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8)
#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long))
-static unsigned long filter_last_rcvd_slots[FILTER_LR_MAX_CLIENT_WORDS];
-
/* Add client data to the FILTER. We use a bitmap to locate a free space
- * in the last_rcvd file if cl_off is -1 (i.e. a new client).
+ * in the last_rcvd file if cl_idx is -1 (i.e. a new client).
* Otherwise, we have just read the data from the last_rcvd file and
* we know its offset.
*/
int filter_client_add(struct filter_obd *filter,
- struct filter_export_data *fed, int cl_off)
+ struct filter_export_data *fed, int cl_idx)
{
- int new_client = (cl_off == -1);
+ int new_client = (cl_idx == -1);
+
+ LASSERT(filter->fo_last_rcvd_slots != NULL);
- /* the bitmap operations can handle cl_off > sizeof(long) * 8, so
+ /* the bitmap operations can handle cl_idx > sizeof(long) * 8, so
* there's no need for extra complication here
*/
if (new_client) {
- cl_off = find_first_zero_bit(filter_last_rcvd_slots,
+ cl_idx = find_first_zero_bit(filter->fo_last_rcvd_slots,
FILTER_LR_MAX_CLIENTS);
repeat:
- if (cl_off >= FILTER_LR_MAX_CLIENTS) {
+ if (cl_idx >= FILTER_LR_MAX_CLIENTS) {
CERROR("no client slots - fix FILTER_LR_MAX_CLIENTS\n");
return -ENOMEM;
}
- if (test_and_set_bit(cl_off, filter_last_rcvd_slots)) {
+ if (test_and_set_bit(cl_idx, filter->fo_last_rcvd_slots)) {
CERROR("FILTER client %d: found bit is set in bitmap\n",
- cl_off);
- cl_off = find_next_zero_bit(filter_last_rcvd_slots,
+ cl_idx);
+ cl_idx = find_next_zero_bit(filter->fo_last_rcvd_slots,
FILTER_LR_MAX_CLIENTS,
- cl_off);
+ cl_idx);
goto repeat;
}
} else {
- if (test_and_set_bit(cl_off, filter_last_rcvd_slots)) {
+ if (test_and_set_bit(cl_idx, filter->fo_last_rcvd_slots)) {
CERROR("FILTER client %d: bit already set in bitmap!\n",
- cl_off);
+ cl_idx);
LBUG();
}
}
- CDEBUG(D_INFO, "client at offset %d with UUID '%s' added\n",
- cl_off, fed->fed_fcd->fcd_uuid);
+ fed->fed_lr_idx = cl_idx;
+ fed->fed_lr_off = le32_to_cpu(filter->fo_fsd->fsd_client_start) +
+ cl_idx * le16_to_cpu(filter->fo_fsd->fsd_client_size);
- fed->fed_lr_off = cl_off;
+ CDEBUG(D_INFO, "client at index %d (%llu) with UUID '%s' added\n",
+ fed->fed_lr_idx, fed->fed_lr_off, fed->fed_fcd->fcd_uuid);
if (new_client) {
struct obd_run_ctxt saved;
- loff_t off = FILTER_LR_CLIENT_START +
- (cl_off * FILTER_LR_CLIENT_SIZE);
+ loff_t off = fed->fed_lr_off;
ssize_t written;
+ CDEBUG(D_INFO, "writing client fcd at idx %u (%llu) (len %u)\n",
+ fed->fed_lr_idx,off,(unsigned int)sizeof(*fed->fed_fcd));
+
push_ctxt(&saved, &filter->fo_ctxt, NULL);
written = lustre_fwrite(filter->fo_rcvd_filp,
(char *)fed->fed_fcd,
RETURN(written);
RETURN(-EIO);
}
- CDEBUG(D_INFO, "wrote client fcd at off %u (len %u)\n",
- FILTER_LR_CLIENT_START + (cl_off*FILTER_LR_CLIENT_SIZE),
- (unsigned int)sizeof(*fed->fed_fcd));
}
return 0;
}
if (!fed->fed_fcd)
RETURN(0);
- off = FILTER_LR_CLIENT_START + (fed->fed_lr_off*FILTER_LR_CLIENT_SIZE);
+ LASSERT(filter->fo_last_rcvd_slots != NULL);
+
+ off = fed->fed_lr_off;
- CDEBUG(D_INFO, "freeing client at offset %u (%lld)with UUID '%s'\n",
- fed->fed_lr_off, off, fed->fed_fcd->fcd_uuid);
+ CDEBUG(D_INFO, "freeing client at idx %u (%lld) with UUID '%s'\n",
+ fed->fed_lr_idx, fed->fed_lr_off, fed->fed_fcd->fcd_uuid);
- if (!test_and_clear_bit(fed->fed_lr_off, filter_last_rcvd_slots)) {
+ if (!test_and_clear_bit(fed->fed_lr_idx, filter->fo_last_rcvd_slots)) {
CERROR("FILTER client %u: bit already clear in bitmap!!\n",
- fed->fed_lr_off);
+ fed->fed_lr_idx);
LBUG();
}
sizeof(zero_fcd), &off);
/* XXX: this write gets lost sometimes, unless this sync is here. */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- fsync_dev(filter->fo_rcvd_filp->f_dentry->d_inode->i_rdev);
-#else
- file_fsync(filter->fo_rcvd_filp, filter->fo_rcvd_filp->f_dentry, 1);
-#endif
+ file_fsync(filter->fo_rcvd_filp, filter->fo_rcvd_filp->f_dentry, 1);
pop_ctxt(&saved, &filter->fo_ctxt, NULL);
if (written != sizeof(zero_fcd)) {
- CERROR("error zeroing out client %s off %d in %s: %d\n",
- fed->fed_fcd->fcd_uuid, fed->fed_lr_off, LAST_RCVD,
- written);
+ CERROR("error zeroing out client %s idx %u (%llu) in %s: %d\n",
+ fed->fed_fcd->fcd_uuid, fed->fed_lr_idx, fed->fed_lr_off,
+ LAST_RCVD, written);
} else {
CDEBUG(D_INFO,
- "zeroed disconnecting client %s at off %d ("LPX64")\n",
- fed->fed_fcd->fcd_uuid, fed->fed_lr_off, off);
+ "zeroed disconnecting client %s at idx %u (%llu)\n",
+ fed->fed_fcd->fcd_uuid, fed->fed_lr_idx,fed->fed_lr_off);
}
OBD_FREE(fed->fed_fcd, sizeof(*fed->fed_fcd));
return 0;
}
-static void filter_unpack_fsd(struct filter_server_data *fsd)
-{
- fsd->fsd_last_objid = le64_to_cpu(fsd->fsd_last_objid);
- fsd->fsd_last_rcvd = le64_to_cpu(fsd->fsd_last_rcvd);
- fsd->fsd_mount_count = le64_to_cpu(fsd->fsd_mount_count);
-}
-
-static void filter_pack_fsd(struct filter_server_data *disk_fsd,
- struct filter_server_data *fsd)
-{
- memset(disk_fsd, 0, sizeof(*disk_fsd));
- memcpy(disk_fsd->fsd_uuid, fsd->fsd_uuid, sizeof(fsd->fsd_uuid));
- disk_fsd->fsd_last_objid = cpu_to_le64(fsd->fsd_last_objid);
- disk_fsd->fsd_last_rcvd = cpu_to_le64(fsd->fsd_last_rcvd);
- disk_fsd->fsd_mount_count = cpu_to_le64(fsd->fsd_mount_count);
-}
-
static int filter_free_server_data(struct filter_obd *filter)
{
OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd));
filter->fo_fsd = NULL;
-
+ OBD_FREE(filter->fo_last_rcvd_slots,
+ FILTER_LR_MAX_CLIENT_WORDS * sizeof(unsigned long));
+ filter->fo_last_rcvd_slots = NULL;
return 0;
}
-/* assumes caller has already in kernel ctxt */
+/* assumes caller is already in kernel ctxt */
static int filter_update_server_data(struct file *filp,
struct filter_server_data *fsd)
{
- struct filter_server_data disk_fsd;
loff_t off = 0;
int rc;
CDEBUG(D_INODE, "server uuid : %s\n", fsd->fsd_uuid);
- CDEBUG(D_INODE, "server last_objid: "LPU64"\n", fsd->fsd_last_objid);
- CDEBUG(D_INODE, "server last_rcvd : "LPU64"\n", fsd->fsd_last_rcvd);
- CDEBUG(D_INODE, "server last_mount: "LPU64"\n", fsd->fsd_mount_count);
-
- filter_pack_fsd(&disk_fsd, fsd);
- rc = lustre_fwrite(filp, (char *)&disk_fsd,
- sizeof(disk_fsd), &off);
- if (rc != sizeof(disk_fsd)) {
+ CDEBUG(D_INODE, "server last_objid: "LPU64"\n",
+ le64_to_cpu(fsd->fsd_last_objid));
+ CDEBUG(D_INODE, "server last_rcvd : "LPU64"\n",
+ le64_to_cpu(fsd->fsd_last_rcvd));
+ CDEBUG(D_INODE, "server last_mount: "LPU64"\n",
+ le64_to_cpu(fsd->fsd_mount_count));
+
+ rc = lustre_fwrite(filp, (char *)fsd, sizeof(*fsd), &off);
+ if (rc != sizeof(*fsd)) {
CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n",
rc);
RETURN(-EIO);
}
/* assumes caller has already in kernel ctxt */
-static int filter_init_server_data(struct obd_device *obd,
- struct file * filp,
+static int filter_init_server_data(struct obd_device *obd, struct file * filp,
__u64 init_lastobjid)
{
struct filter_obd *filter = &obd->u.filter;
struct filter_client_data *fcd = NULL;
struct inode *inode = filp->f_dentry->d_inode;
unsigned long last_rcvd_size = inode->i_size;
- int cl_off;
+ __u64 mount_count;
+ int cl_idx;
loff_t off = 0;
int rc;
RETURN(-ENOMEM);
filter->fo_fsd = fsd;
+ OBD_ALLOC(filter->fo_last_rcvd_slots,
+ FILTER_LR_MAX_CLIENT_WORDS * sizeof(unsigned long));
+ if (filter->fo_last_rcvd_slots == NULL) {
+ OBD_FREE(fsd, sizeof(*fsd));
+ RETURN(-ENOMEM);
+ }
+
if (last_rcvd_size == 0) {
CERROR("%s: initializing new last_rcvd\n", obd->obd_name);
memcpy(fsd->fsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->fsd_uuid));
- fsd->fsd_last_objid = init_lastobjid;
+ fsd->fsd_last_objid = cpu_to_le64(init_lastobjid);
fsd->fsd_last_rcvd = 0;
- fsd->fsd_mount_count = 0;
-
+ mount_count = fsd->fsd_mount_count = 0;
+ fsd->fsd_server_size = cpu_to_le32(FILTER_LR_SERVER_SIZE);
+ fsd->fsd_client_start = cpu_to_le32(FILTER_LR_CLIENT_START);
+ fsd->fsd_client_size = cpu_to_le16(FILTER_LR_CLIENT_SIZE);
+ fsd->fsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT);
+ filter->fo_subdir_count = FILTER_SUBDIR_COUNT;
} else {
- ssize_t retval = lustre_fread(filp, (char *)fsd,
- sizeof(*fsd),
+ ssize_t retval = lustre_fread(filp, (char *)fsd, sizeof(*fsd),
&off);
if (retval != sizeof(*fsd)) {
CDEBUG(D_INODE,"OBD filter: error reading lastobjid\n");
GOTO(out, rc = -EIO);
}
- filter_unpack_fsd(fsd);
+ mount_count = le64_to_cpu(fsd->fsd_mount_count);
+ filter->fo_subdir_count = le16_to_cpu(fsd->fsd_subdir_count);
+ }
+
+ if (fsd->fsd_feature_incompat) {
+ CERROR("unsupported feature %x\n",
+ le32_to_cpu(fsd->fsd_feature_incompat));
+ RETURN(-EINVAL);
+ }
+ if (fsd->fsd_feature_rocompat) {
+ CERROR("read-only feature %x\n",
+ le32_to_cpu(fsd->fsd_feature_rocompat));
+ /* Do something like remount filesystem read-only */
+ RETURN(-EINVAL);
}
CDEBUG(D_INODE, "%s: server last_objid: "LPU64"\n",
- obd->obd_name, fsd->fsd_last_objid);
+ obd->obd_name, le64_to_cpu(fsd->fsd_last_objid));
CDEBUG(D_INODE, "%s: server last_rcvd : "LPU64"\n",
- obd->obd_name, fsd->fsd_last_rcvd);
+ obd->obd_name, le64_to_cpu(fsd->fsd_last_rcvd));
CDEBUG(D_INODE, "%s: server last_mount: "LPU64"\n",
- obd->obd_name, fsd->fsd_mount_count);
+ obd->obd_name, mount_count);
+ CDEBUG(D_INODE, "%s: server data size: %u\n",
+ obd->obd_name, le32_to_cpu(fsd->fsd_server_size));
+ CDEBUG(D_INODE, "%s: per-client data start: %u\n",
+ obd->obd_name, le32_to_cpu(fsd->fsd_client_start));
+ CDEBUG(D_INODE, "%s: per-client data size: %u\n",
+ obd->obd_name, le32_to_cpu(fsd->fsd_client_size));
+ CDEBUG(D_INODE, "%s: server subdir_count: %u\n",
+ obd->obd_name, le16_to_cpu(fsd->fsd_subdir_count));
/*
* When we do a clean FILTER shutdown, we save the last_rcvd into
* the header. If we find clients with higher last_rcvd values
* then those clients may need recovery done.
*/
- /* off is adjusted by lustre_fread, so we don't adjust it in the loop */
- for (off = FILTER_LR_CLIENT_START, cl_off = 0; off < last_rcvd_size;
- cl_off++) {
- __u64 last_rcvd;
- int mount_age;
-
- if (!fcd) {
- OBD_ALLOC(fcd, sizeof(*fcd));
- if (!fcd)
- GOTO(err_fsd, rc = -ENOMEM);
- }
-
- rc = lustre_fread(filp, (char *)fcd, sizeof(*fcd), &off);
- if (rc != sizeof(*fcd)) {
- CERROR("error reading FILTER %s offset %d: rc = %d\n",
- LAST_RCVD, cl_off, rc);
- if (rc > 0) /* XXX fatal error or just abort reading? */
- rc = -EIO;
- break;
- }
-
- if (fcd->fcd_uuid[0] == '\0') {
- CDEBUG(D_INFO, "skipping zeroed client at offset %d\n",
- cl_off);
- continue;
- }
+ if (obd->obd_flags & OBD_REPLAYABLE) {
+ for (cl_idx = 0; off < last_rcvd_size; cl_idx++) {
+ __u64 last_rcvd;
+ int mount_age;
+
+ if (!fcd) {
+ OBD_ALLOC(fcd, sizeof(*fcd));
+ if (!fcd)
+ GOTO(err_fsd, rc = -ENOMEM);
+ }
- last_rcvd = le64_to_cpu(fcd->fcd_last_rcvd);
+ /* Don't assume off is incremented properly, in case
+ * sizeof(fsd) isn't the same as fsd->fsd_client_size.
+ */
+ off = le32_to_cpu(fsd->fsd_client_start) +
+ cl_idx * le16_to_cpu(fsd->fsd_client_size);
+ rc = lustre_fread(filp, (char *)fcd, sizeof(*fcd), &off);
+ if (rc != sizeof(*fcd)) {
+ CERROR("error reading FILTER %s offset %d: rc = %d\n",
+ LAST_RCVD, cl_idx, rc);
+ if (rc > 0) /* XXX fatal error or just abort reading? */
+ rc = -EIO;
+ break;
+ }
- /* These exports are cleaned up by filter_disconnect(), so they
- * need to be set up like real exports as filter_connect() does.
- */
- mount_age = fsd->fsd_mount_count -
- le64_to_cpu(fcd->fcd_mount_count);
- if (mount_age < FILTER_MOUNT_RECOV) {
- CERROR("RCVRNG CLIENT uuid: %s off: %d lr: "LPU64
- "srv lr: "LPU64" mnt: "LPU64" last mount: "LPU64
- "\n", fcd->fcd_uuid, cl_off,
- last_rcvd, fsd->fsd_last_rcvd,
- le64_to_cpu(fcd->fcd_mount_count),
- fsd->fsd_mount_count);
-#if 0
- /* disabled until OST recovery is actually working */
- struct obd_export *exp = class_new_export(obd);
- struct filter_export_data *fed;
+ if (fcd->fcd_uuid[0] == '\0') {
+ CDEBUG(D_INFO, "skipping zeroed client at offset %d\n",
+ cl_idx);
+ continue;
+ }
- if (!exp) {
- rc = -ENOMEM;
- break;
+ last_rcvd = le64_to_cpu(fcd->fcd_last_rcvd);
+
+ /* These exports are cleaned up by filter_disconnect(), so they
+ * need to be set up like real exports as filter_connect() does.
+ */
+ mount_age = mount_count - le64_to_cpu(fcd->fcd_mount_count);
+ if (mount_age < FILTER_MOUNT_RECOV) {
+ struct obd_export *exp = class_new_export(obd);
+ struct filter_export_data *fed;
+ CERROR("RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
+ " srv lr: "LPU64" mnt: "LPU64" last mount: "
+ LPU64"\n", fcd->fcd_uuid, cl_idx,
+ last_rcvd, le64_to_cpu(fsd->fsd_last_rcvd),
+ le64_to_cpu(fcd->fcd_mount_count), mount_count);
+ /* disabled until OST recovery is actually working */
+
+ if (!exp) {
+ rc = -ENOMEM;
+ break;
+ }
+ memcpy(&exp->exp_client_uuid.uuid, fcd->fcd_uuid,
+ sizeof exp->exp_client_uuid.uuid);
+ fed = &exp->exp_filter_data;
+ fed->fed_fcd = fcd;
+ filter_client_add(filter, fed, cl_idx);
+ /* create helper if export init gets more complex */
+ INIT_LIST_HEAD(&fed->fed_open_head);
+ spin_lock_init(&fed->fed_lock);
+
+ fcd = NULL;
+ obd->obd_recoverable_clients++;
+ } else {
+ CDEBUG(D_INFO,
+ "discarded client %d UUID '%s' count "LPU64"\n",
+ cl_idx, fcd->fcd_uuid,
+ le64_to_cpu(fcd->fcd_mount_count));
}
- fed = &exp->exp_filter_data;
- fed->fed_fcd = fcd;
- filter_client_add(filter, fed, cl_off);
- /* create helper if export init gets more complex */
- INIT_LIST_HEAD(&fed->fed_open_head);
- spin_lock_init(&fed->fed_lock);
+ CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n",
+ cl_idx, last_rcvd);
- fcd = NULL;
- filter->fo_recoverable_clients++;
-#endif
- } else {
- CDEBUG(D_INFO,
- "discarded client %d, UUID '%s', count %Ld\n",
- cl_off, fcd->fcd_uuid,
- (long long)le64_to_cpu(fcd->fcd_mount_count));
+ if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_rcvd))
+ filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd);
}
- CDEBUG(D_OTHER, "client at offset %d has last_rcvd = %Lu\n",
- cl_off, (unsigned long long)last_rcvd);
+ obd->obd_last_committed = le64_to_cpu(filter->fo_fsd->fsd_last_rcvd);
+ if (obd->obd_recoverable_clients) {
+ CERROR("RECOVERY: %d recoverable clients, last_rcvd "LPU64"\n",
+ obd->obd_recoverable_clients,
+ le64_to_cpu(filter->fo_fsd->fsd_last_rcvd));
+ obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
+ obd->obd_flags |= OBD_RECOVERING;
+ }
- if (last_rcvd > filter->fo_fsd->fsd_last_rcvd)
- filter->fo_fsd->fsd_last_rcvd = last_rcvd;
- }
+ if (fcd)
+ OBD_FREE(fcd, sizeof(*fcd));
- obd->obd_last_committed = filter->fo_fsd->fsd_last_rcvd;
- if (filter->fo_recoverable_clients) {
- CERROR("RECOVERY: %d recoverable clients, last_rcvd "LPU64"\n",
- filter->fo_recoverable_clients,
- filter->fo_fsd->fsd_last_rcvd);
- filter->fo_next_recovery_transno = obd->obd_last_committed + 1;
- obd->obd_flags |= OBD_RECOVERING;
+ } else {
+ CERROR("%s: recovery support OFF\n", obd->obd_name);
}
- if (fcd)
- OBD_FREE(fcd, sizeof(*fcd));
-
- fsd->fsd_mount_count++;
+ fsd->fsd_mount_count = cpu_to_le64(mount_count + 1);
/* save it,so mount count and last_recvd is current */
rc = filter_update_server_data(filp, filter->fo_fsd);
{
struct obd_run_ctxt saved;
struct filter_obd *filter = &obd->u.filter;
- struct dentry *dentry;
+ struct dentry *dentry, *O_dentry;
struct file *file;
struct inode *inode;
+ int i;
int rc = 0;
int mode = 0;
* Create directories and/or get dentries for each object type.
* This saves us from having to do multiple lookups for each one.
*/
+ O_dentry = filter->fo_dentry_O;
for (mode = 0; mode < (S_IFMT >> S_SHIFT); mode++) {
char *name = obd_type_by_mode[mode];
filter->fo_dentry_O_mode[mode] = NULL;
continue;
}
- dentry = simple_mkdir(filter->fo_dentry_O, name, 0700);
+ dentry = simple_mkdir(O_dentry, name, 0700);
CDEBUG(D_INODE, "got/created O/%s: %p\n", name, dentry);
if (IS_ERR(dentry)) {
rc = PTR_ERR(dentry);
CERROR("cannot create O/%s: rc = %d\n", name, rc);
- GOTO(out_O_mode, rc);
+ GOTO(err_O_mode, rc);
}
filter->fo_dentry_O_mode[mode] = dentry;
}
file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700);
- if ( !file || IS_ERR(file) ) {
+ if (!file || IS_ERR(file)) {
rc = PTR_ERR(file);
CERROR("OBD filter: cannot open/create %s: rc = %d\n",
LAST_RCVD, rc);
- GOTO(out_O_mode, rc);
+ GOTO(err_O_mode, rc);
}
if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
}
filter->fo_rcvd_filp = file;
+ if (filter->fo_subdir_count) {
+ O_dentry = filter->fo_dentry_O_mode[S_IFREG >> S_SHIFT];
+ OBD_ALLOC(filter->fo_dentry_O_sub,
+ FILTER_SUBDIR_COUNT * sizeof(dentry));
+ if (!filter->fo_dentry_O_sub)
+ GOTO(err_client, rc = -ENOMEM);
+
+ for (i = 0; i < filter->fo_subdir_count; i++) {
+ char dir[20];
+ snprintf(dir, sizeof(dir), "d%u", i);
+
+ dentry = simple_mkdir(O_dentry, dir, 0700);
+ CDEBUG(D_INODE, "got/created O/R/%s: %p\n", dir,dentry);
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
+ CERROR("can't create O/R/%s: rc = %d\n",dir,rc);
+ GOTO(err_O_sub, rc);
+ }
+ filter->fo_dentry_O_sub[i] = dentry;
+ }
+ }
rc = 0;
out:
pop_ctxt(&saved, &filter->fo_ctxt, NULL);
return(rc);
+err_O_sub:
+ while (i-- > 0) {
+ struct dentry *dentry = filter->fo_dentry_O_sub[i];
+ if (dentry) {
+ f_dput(dentry);
+ filter->fo_dentry_O_sub[i] = NULL;
+ }
+ }
+ OBD_FREE(filter->fo_dentry_O_sub,
+ filter->fo_subdir_count * sizeof(dentry));
err_client:
class_disconnect_all(obd);
err_filp:
if (filp_close(file, 0))
CERROR("can't close %s after error\n", LAST_RCVD);
filter->fo_rcvd_filp = NULL;
- out_O_mode:
+err_O_mode:
while (mode-- > 0) {
struct dentry *dentry = filter->fo_dentry_O_mode[mode];
if (dentry) {
rc = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
if (rc)
CERROR("OBD filter: error writing lastobjid: rc = %ld\n", rc);
- filter_free_server_data(filter);
if (filter->fo_rcvd_filp) {
- /* broken sync at umount bug workaround */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- rc = fsync_dev(filter->fo_rcvd_filp->f_dentry->d_inode->i_rdev);
-#else
rc = file_fsync(filter->fo_rcvd_filp,
filter->fo_rcvd_filp->f_dentry, 1);
-#endif
filp_close(filter->fo_rcvd_filp, 0);
filter->fo_rcvd_filp = NULL;
if (rc)
- CERROR("last_rcvd file won't closek rc = %ld\n", rc);
+ CERROR("last_rcvd file won't closed rc = %ld\n", rc);
}
+ if (filter->fo_subdir_count) {
+ int i;
+ for (i = 0; i < filter->fo_subdir_count; i++) {
+ struct dentry *dentry = filter->fo_dentry_O_sub[i];
+ f_dput(dentry);
+ filter->fo_dentry_O_sub[i] = NULL;
+ }
+ OBD_FREE(filter->fo_dentry_O_sub,
+ filter->fo_subdir_count *
+ sizeof(*filter->fo_dentry_O_sub));
+ }
for (mode = 0; mode < (S_IFMT >> S_SHIFT); mode++) {
struct dentry *dentry = filter->fo_dentry_O_mode[mode];
if (dentry) {
}
}
f_dput(filter->fo_dentry_O);
+ filter_free_server_data(filter);
pop_ctxt(&saved, &filter->fo_ctxt, NULL);
}
LASSERT(obd->u.filter.fo_fsd != NULL);
spin_lock(&obd->u.filter.fo_objidlock);
- id = ++obd->u.filter.fo_fsd->fsd_last_objid;
+ id = le64_to_cpu(obd->u.filter.fo_fsd->fsd_last_objid);
+ obd->u.filter.fo_fsd->fsd_last_objid = cpu_to_le64(id + 1);
spin_unlock(&obd->u.filter.fo_objidlock);
return id;
}
static inline struct dentry *filter_parent(struct obd_device *obd,
- obd_mode mode)
+ obd_mode mode, obd_id objid)
{
struct filter_obd *filter = &obd->u.filter;
LASSERT((mode & S_IFMT) == S_IFREG); /* only regular files for now */
- return filter->fo_dentry_O_mode[(mode & S_IFMT) >> S_SHIFT];
+ if ((mode & S_IFMT) != S_IFREG || filter->fo_subdir_count == 0)
+ return filter->fo_dentry_O_mode[(mode & S_IFMT) >> S_SHIFT];
+
+ return filter->fo_dentry_O_sub[objid & (filter->fo_subdir_count - 1)];
}
static struct file *filter_obj_open(struct obd_export *export,
GOTO(out_ffd, file = ERR_PTR(-ENOMEM));
}
- filter_id(name, id, type);
push_ctxt(&saved, &filter->fo_ctxt, NULL);
- file = filp_open(name, O_RDWR | O_LARGEFILE, 0 /* type? */);
+ file = filp_open(filter_id(name, filter, id, type),
+ O_RDWR | O_LARGEFILE, type);
pop_ctxt(&saved, &filter->fo_ctxt, NULL);
if (IS_ERR(file)) {
LASSERT(kmem_cache_validate(filter_dentry_cache, fdd));
/* should only happen during client recovery */
if (fdd->fdd_flags & FILTER_FLAG_DESTROY)
- CDEBUG(D_INODE,"opening destroyed object "LPX64"\n",id);
+ CDEBUG(D_INODE,"opening destroyed object "LPU64"\n",id);
atomic_inc(&fdd->fdd_open_count);
} else {
atomic_set(&fdd->fdd_open_count, 1);
fdd->fdd_flags = 0;
+ fdd->fdd_objid = id;
/* If this is racy, then we can use {cmp}xchg and atomic_add */
dentry->d_fsdata = fdd;
spin_unlock(&filter->fo_fddlock);
get_random_bytes(&ffd->ffd_servercookie, sizeof(ffd->ffd_servercookie));
ffd->ffd_file = file;
+ LASSERT(file->private_data == NULL);
file->private_data = ffd;
if (!dentry->d_op)
list_add(&ffd->ffd_export_list, &fed->fed_open_head);
spin_unlock(&fed->fed_lock);
- CDEBUG(D_INODE, "opened objid "LPX64": rc = %p\n", id, file);
+ CDEBUG(D_INODE, "opened objid "LPU64": rc = %p\n", id, file);
EXIT;
out:
return file;
if (atomic_dec_and_test(&fdd->fdd_open_count) &&
fdd->fdd_flags & FILTER_FLAG_DESTROY) {
- struct dentry *dir_dentry = filter_parent(obd, S_IFREG);
+ struct dentry *dir_dentry = filter_parent(obd, S_IFREG, fdd->fdd_objid);
struct obd_run_ctxt saved;
void *handle;
/* obd methods */
/* mount the file system (secretly) */
-static int filter_setup(struct obd_device *obd, obd_count len, void *buf)
+static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
+ char *option)
{
struct obd_ioctl_data* data = buf;
struct filter_obd *filter;
ENTRY;
if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2)
- RETURN(rc = -EINVAL);
+ RETURN(-EINVAL);
obd->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
if (IS_ERR(obd->obd_fsops))
- RETURN(rc = PTR_ERR(obd->obd_fsops));
+ RETURN(PTR_ERR(obd->obd_fsops));
- mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, NULL);
+ mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, option);
rc = PTR_ERR(mnt);
- if (IS_ERR(mnt))
+ if (IS_ERR(mnt)) {
+ CERROR("mount of %s as type %s failed: rc %d\n",
+ data->ioc_inlbuf2, data->ioc_inlbuf1, rc);
GOTO(err_ops, rc);
+ }
+#if OST_RECOVERY
obd->obd_flags |= OBD_REPLAYABLE;
+#endif
filter = &obd->u.filter;;
- init_MUTEX(&filter->fo_transno_sem);
filter->fo_vfsmnt = mnt;
filter->fo_fstype = strdup(data->ioc_inlbuf2);
filter->fo_sb = mnt->mnt_root->d_inode->i_sb;
if (rc)
GOTO(err_kfree, rc);
+#ifdef FILTER_TRANSNO_SEM
+ init_MUTEX(&filter->fo_transno_sem);
+#else
+ spin_lock_init(&filter->fo_translock);
+#endif
spin_lock_init(&filter->fo_fddlock);
spin_lock_init(&filter->fo_objidlock);
INIT_LIST_HEAD(&filter->fo_export_list);
return rc;
}
+static int filter_setup(struct obd_device *obd, obd_count len, void *buf)
+{
+ return filter_common_setup(obd, len, buf, NULL);
+}
+
+/* sanobd setup methods - use a specific mount option */
+static int filter_san_setup(struct obd_device *obd, obd_count len, void *buf)
+{
+ struct obd_ioctl_data* data = buf;
+ char *option = NULL;
+
+ if (!data->ioc_inlbuf2)
+ RETURN(-EINVAL);
+
+ /* for extN/ext3 filesystem, we must mount it with 'writeback' mode */
+ if (!strcmp(data->ioc_inlbuf2, "extN") ||
+ !strcmp(data->ioc_inlbuf2, "ext3"))
+ option = "data=writeback";
+ else
+ LBUG(); /* just a reminder */
+
+ return filter_common_setup(obd, len, buf, option);
+}
static int filter_cleanup(struct obd_device *obd)
{
INIT_LIST_HEAD(&exp->exp_filter_data.fed_open_head);
spin_lock_init(&exp->exp_filter_data.fed_lock);
- rc = filter_client_add(filter, fed, -1);
- if (rc)
- GOTO(out_fcd, rc);
+ if (obd->obd_flags & OBD_REPLAYABLE) {
+ rc = filter_client_add(filter, fed, -1);
+ if (rc)
+ GOTO(out_fcd, rc);
+ }
RETURN(rc);
spin_unlock(&fed->fed_lock);
ldlm_cancel_locks_for_export(exp);
- filter_client_free(exp);
+
+ if (exp->exp_obd->obd_flags & OBD_REPLAYABLE)
+ filter_client_free(exp);
rc = class_disconnect(conn);
int type = oa->o_mode & S_IFMT;
ENTRY;
- CDEBUG(D_INFO, "src inode %lu (%p), dst obdo "LPX64" valid 0x%08x\n",
+ CDEBUG(D_INFO, "src inode %lu (%p), dst obdo "LPU64" valid 0x%08x\n",
inode->i_ino, inode, oa->o_id, valid);
/* Don't copy the inode number in place of the object ID */
obdo_from_inode(oa, inode, valid);
CERROR("invalid client "LPX64"\n", conn->addr);
RETURN(ERR_PTR(-EINVAL));
}
- dentry = filter_fid2dentry(obd, filter_parent(obd, oa->o_mode),
+ dentry = filter_fid2dentry(obd, filter_parent(obd, oa->o_mode,
+ oa->o_id),
oa->o_id, locked);
}
if (IS_ERR(dentry)) {
- CERROR("%s error looking up object: "LPX64"\n", what, oa->o_id);
+ CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id);
RETURN(dentry);
}
if (!dentry->d_inode) {
- CERROR("%s on non-existent object: "LPX64"\n", what, oa->o_id);
+ CERROR("%s on non-existent object: "LPU64"\n", what, oa->o_id);
f_dput(dentry);
LBUG();
RETURN(ERR_PTR(-ENOENT));
XPROCFS_BUMP_MYCPU_IOSTAT (st_close_reqs, 1);
if (!(oa->o_valid & OBD_MD_FLHANDLE)) {
- CERROR("no handle for close of objid "LPX64"\n", oa->o_id);
+ CERROR("no handle for close of objid "LPU64"\n", oa->o_id);
RETURN(-EINVAL);
}
oa->o_id = filter_next_id(obd);
push_ctxt(&saved, &filter->fo_ctxt, NULL);
- dir_dentry = filter_parent(obd, oa->o_mode);
+ dir_dentry = filter_parent(obd, S_IFREG, oa->o_id);
down(&dir_dentry->d_inode->i_sem);
new = filter_fid2dentry(obd, dir_dentry, oa->o_id, 0);
if (IS_ERR(new))
GOTO(out, rc = PTR_ERR(new));
if (new->d_inode) {
+ char buf[32];
+
/* This would only happen if lastobjid was bad on disk */
- CERROR("objid O/%*s/"LPU64" already exists\n",
- dir_dentry->d_name.len, dir_dentry->d_name.name,
- oa->o_id);
+ CERROR("objid %s already exists\n",
+ filter_id(buf, filter, S_IFREG, oa->o_id));
LBUG();
GOTO(out, rc = -EEXIST);
}
XPROCFS_BUMP_MYCPU_IOSTAT (st_destroy_reqs, 1);
- CDEBUG(D_INODE, "destroying objid "LPX64"\n", oa->o_id);
+ CDEBUG(D_INODE, "destroying objid "LPU64"\n", oa->o_id);
- dir_dentry = filter_parent(obd, oa->o_mode);
+ dir_dentry = filter_parent(obd, oa->o_mode, oa->o_id);
down(&dir_dentry->d_inode->i_sem);
object_dentry = filter_oa2dentry(conn, oa, 0);
fdd->fdd_flags |= FILTER_FLAG_DESTROY;
/* XXX put into PENDING directory in case of crash */
CDEBUG(D_INODE,
- "defer destroy of %dx open objid "LPX64"\n",
+ "defer destroy of %dx open objid "LPU64"\n",
atomic_read(&fdd->fdd_open_count), oa->o_id);
} else
CDEBUG(D_INODE,
- "repeat destroy of %dx open objid "LPX64"\n",
+ "repeat destroy of %dx open objid "LPU64"\n",
atomic_read(&fdd->fdd_open_count), oa->o_id);
GOTO(out_commit, rc = 0);
}
if (end != OBD_OBJECT_EOF)
CERROR("PUNCH not supported, only truncate works\n");
- CDEBUG(D_INODE, "calling truncate for object "LPX64", valid = %x, "
+ CDEBUG(D_INODE, "calling truncate for object "LPU64", valid = %x, "
"o_size = "LPD64"\n", oa->o_id, oa->o_valid, start);
oa->o_size = start;
error = filter_setattr(conn, oa, NULL, oti);
static struct page *
-lustre_get_page_read(struct inode *inode, struct niobuf_remote *rnb)
+lustre_get_page_read(struct inode *inode, struct niobuf_local *lnb)
{
- unsigned long index = rnb->offset >> PAGE_SHIFT;
+ unsigned long index = lnb->offset >> PAGE_SHIFT;
struct address_space *mapping = inode->i_mapping;
struct page *page;
int rc;
(filler_t*)mapping->a_ops->readpage, NULL);
if (!IS_ERR(page)) {
wait_on_page(page);
- kmap(page);
+ lnb->addr = kmap(page);
+ lnb->page = page;
if (!PageUptodate(page)) {
CERROR("page index %lu not uptodate\n", index);
GOTO(err_page, rc = -EIO);
}
struct page *filter_get_page_write(struct inode *inode,
- struct niobuf_remote *rnb,
struct niobuf_local *lnb, int *pglocked)
{
- unsigned long index = rnb->offset >> PAGE_SHIFT;
+ unsigned long index = lnb->offset >> PAGE_SHIFT;
struct address_space *mapping = inode->i_mapping;
-
struct page *page;
int rc;
addr = __get_free_pages(GFP_KERNEL, 0); /* locked page */
if (!addr) {
CERROR("no memory for a temp page\n");
- LBUG();
GOTO(err, rc = -ENOMEM);
}
- /* XXX debugging */
- memset((void *)addr, 0xBA, PAGE_SIZE);
+ POISON((void *)addr, 0xBA, PAGE_SIZE);
page = virt_to_page(addr);
kmap(page);
page->index = index;
+ lnb->addr = (void *)addr;
+ lnb->page = page;
lnb->flags |= N_LOCAL_TEMP_PAGE;
} else if (!IS_ERR(page)) {
(*pglocked)++;
kmap(page);
rc = mapping->a_ops->prepare_write(NULL, page,
- rnb->offset % PAGE_SIZE,
- rnb->len);
+ lnb->offset & ~PAGE_MASK,
+ lnb->len);
if (rc) {
- CERROR("page index %lu, rc = %d\n", index, rc);
if (rc != -ENOSPC)
- LBUG();
+ CERROR("page index %lu, rc = %d\n", index, rc);
GOTO(err_unlock, rc);
}
/* XXX not sure if we need this if we are overwriting page */
LBUG();
GOTO(err_unlock, rc = -EIO);
}
+ lnb->addr = page_address(page);
+ lnb->page = page;
}
+
return page;
err_unlock:
unsigned blocksize = head->b_size;
/* debugging: just seeing if this ever happens */
- CERROR("called filter_commit_write for ino %lu:%lu on err %d\n",
+ CDEBUG(err == -ENOSPC ? D_INODE : D_ERROR,
+ "called for ino %lu:%lu on err %d\n",
lnb->page->mapping->host->i_ino, lnb->page->index, err);
/* Currently one buffer per page, but in the future... */
struct obd_ioobj *o;
struct niobuf_remote *rnb = nb;
struct niobuf_local *lnb = res;
- struct dentry *dir_dentry;
struct fsfilt_objinfo *fso;
int pglocked = 0;
int rc = 0;
RETURN(-ENOMEM);
push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
- dir_dentry = filter_parent(obd, S_IFREG);
for (i = 0, o = obj; i < objcount; i++, o++) {
struct filter_dentry_data *fdd;
LASSERT(o->ioo_bufcnt);
- dentry = filter_fid2dentry(obd, dir_dentry, o->ioo_id, 0);
+ dentry = filter_fid2dentry(obd, filter_parent(obd, S_IFREG,
+ o->ioo_id),
+ o->ioo_id, 0);
if (IS_ERR(dentry))
GOTO(out_objinfo, rc = PTR_ERR(dentry));
if (!dentry->d_inode) {
CERROR("trying to BRW to non-existent file "LPU64"\n",
o->ioo_id);
+ f_dput(dentry);
GOTO(out_objinfo, rc = -ENOENT);
}
fdd = dentry->d_fsdata;
if (!fdd || !atomic_read(&fdd->fdd_open_count))
- CDEBUG(D_PAGE, "I/O to unopened object "LPX64"\n",
+ CDEBUG(D_PAGE, "I/O to unopened object "LPU64"\n",
o->ioo_id);
}
if (cmd & OBD_BRW_WRITE) {
-#warning "FIXME: we need to get inode->i_sem for each object here"
+#warning "FIXME: we need inode->i_sem for each object to protect vs truncate"
/* Even worse, we need to get locks on mulitple inodes (in
* order) or use the DLM to do the locking for us (and use
* the same locking in filter_setattr() for truncate. The
filter_start_transno(export);
*desc_private = fsfilt_brw_start(obd, objcount, fso,
niocount, nb);
- if (IS_ERR(*desc_private))
- GOTO(out_objinfo, rc = PTR_ERR(*desc_private));
+ if (IS_ERR(*desc_private)) {
+ rc = PTR_ERR(*desc_private);
+ CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
+ "error starting transaction: rc = %d\n", rc);
+ *desc_private = NULL;
+ GOTO(out_objinfo, rc);
+ }
}
obd_kmap_get(niocount, 1);
else
lnb->dentry = dget(dentry);
+ /* lnb->offset is aligned, while rnb->offset isn't,
+ * and we need to copy the fields to lnb anyways.
+ */
+ memcpy(lnb, rnb, sizeof(*rnb));
if (cmd & OBD_BRW_WRITE) {
- page = filter_get_page_write(inode, rnb, lnb,
+ page = filter_get_page_write(inode, lnb,
&pglocked);
- XPROCFS_BUMP_MYCPU_IOSTAT (st_write_bytes,
- rnb->len);
+ XPROCFS_BUMP_MYCPU_IOSTAT(st_write_bytes,
+ lnb->len);
} else {
- page = lustre_get_page_read(inode, rnb);
+ page = lustre_get_page_read(inode, lnb);
- XPROCFS_BUMP_MYCPU_IOSTAT (st_read_bytes,
- rnb->len);
+ XPROCFS_BUMP_MYCPU_IOSTAT(st_read_bytes,
+ lnb->len);
}
if (IS_ERR(page)) {
rc = PTR_ERR(page);
+ CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
+ "error on page @"LPU64"%u/%u: rc = %d\n",
+ lnb->offset, j, o->ioo_bufcnt, rc);
f_dput(dentry);
GOTO(out_pages, rc);
}
-
- lnb->addr = page_address(page);
- lnb->offset = rnb->offset;
- lnb->page = page;
- lnb->len = rnb->len;
}
}
out_pages:
while (lnb-- > res) {
- CERROR("%d error cleanup on brw\n", rc);
if (cmd & OBD_BRW_WRITE)
filter_commit_write(lnb, rc);
else
f_dput(lnb->dentry);
}
obd_kmap_put(niocount);
- goto out_err; /* dropped the dentry refs already (one per page) */
+ if (cmd & OBD_BRW_WRITE) {
+ filter_finish_transno(export, *desc_private, oti, rc);
+ fsfilt_commit(obd,
+ filter_parent(obd,S_IFREG,obj->ioo_id)->d_inode,
+ *desc_private);
+ }
+ goto out; /* dropped the dentry refs already (one per page) */
out_objinfo:
for (i = 0; i < objcount && fso[i].fso_dentry; i++)
f_dput(fso[i].fso_dentry);
-out_err:
- if (cmd & OBD_BRW_WRITE) {
- filter_finish_transno(export, *desc_private, oti, rc);
- fsfilt_commit(obd, dir_dentry->d_inode, *desc_private);
- }
goto out;
}
RETURN(rc);
}
-static int filter_sync(struct obd_device *obd)
+static int filter_syncfs(struct lustre_handle *conn)
{
+ struct obd_device *obd;
+ ENTRY;
+
+ obd = class_conn2obd(conn);
+
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_syncfs_reqs, 1);
+
RETURN(fsfilt_sync(obd, obd->u.filter.fo_sb));
}
}
if (cmd & OBD_BRW_WRITE) {
+ /* We just want any dentry for the commit, for now */
+ struct dentry *dir_dentry = filter_parent(obd, S_IFREG, 0);
int err;
- struct dentry *dir_dentry = filter_parent(obd, S_IFREG);
rc = filter_finish_transno(export, desc_private, oti, rc);
err = fsfilt_commit(obd, dir_dentry->d_inode, desc_private);
rc = err;
if (obd_sync_filter) {
/* this can fail with ENOMEM, what should we do then? */
- filter_sync(obd);
+ filter_syncfs(conn);
}
/* XXX <adilger> LASSERT(last_rcvd == last_committed)*/
}
RETURN(ret);
}
+static int filter_san_preprw(int cmd, struct lustre_handle *conn,
+ int objcount, struct obd_ioobj *obj,
+ int niocount, struct niobuf_remote *nb)
+{
+ struct obd_device *obd;
+ struct obd_ioobj *o = obj;
+ struct niobuf_remote *rnb = nb;
+ int rc = 0;
+ int i;
+ ENTRY;
+
+ if ((cmd & OBD_BRW_WRITE) != 0)
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_write_reqs, 1);
+ else
+ XPROCFS_BUMP_MYCPU_IOSTAT (st_read_reqs, 1);
+
+ obd = class_conn2obd(conn);
+ if (!obd) {
+ CDEBUG(D_IOCTL, "invalid client "LPX64"\n", conn->addr);
+ RETURN(-EINVAL);
+ }
+
+ for (i = 0; i < objcount; i++, o++) {
+ struct dentry *dentry;
+ struct inode *inode;
+ int j;
+
+ dentry = filter_fid2dentry(obd, filter_parent(obd, S_IFREG,
+ o->ioo_id),
+ o->ioo_id, 0);
+ if (IS_ERR(dentry))
+ GOTO(out, rc = PTR_ERR(dentry));
+ inode = dentry->d_inode;
+ if (!inode) {
+ CERROR("trying to BRW to non-existent file "LPU64"\n",
+ o->ioo_id);
+ f_dput(dentry);
+ GOTO(out, rc = -ENOENT);
+ }
+
+ for (j = 0; j < o->ioo_bufcnt; j++, rnb++) {
+ long block;
+
+ block = rnb->offset >> PAGE_SHIFT;
+
+ if (cmd == OBD_BRW_READ) {
+ block = inode->i_mapping->a_ops->bmap(
+ inode->i_mapping, block);
+ } else {
+ loff_t newsize = rnb->offset + rnb->len;
+ /* fs_prep_san_write will also update inode
+ * size for us:
+ * (1) new alloced block
+ * (2) existed block but size extented
+ */
+ /* FIXME We could call fs_prep_san_write()
+ * only once for all the blocks allocation.
+ * Now call it once for each block, for
+ * simplicity. And if error happens, we
+ * probably need to release previous alloced
+ * block */
+ rc = fs_prep_san_write(obd, inode, &block,
+ 1, newsize);
+ if (rc)
+ break;
+ }
+
+ rnb->offset = block;
+ }
+ f_dput(dentry);
+ }
+out:
+ RETURN(rc);
+}
+
static int filter_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
{
struct obd_device *obd;
o_connect: filter_connect,
o_disconnect: filter_disconnect,
o_statfs: filter_statfs,
+ o_syncfs: filter_syncfs,
o_getattr: filter_getattr,
o_create: filter_create,
o_setattr: filter_setattr,
o_preprw: filter_preprw,
o_commitrw: filter_commitrw
#if 0
+ o_san_preprw: filter_san_preprw,
+ o_preallocate: filter_preallocate_inodes,
+ o_migrate: filter_migrate,
+ o_copy: filter_copy_data,
+ o_iterate: filter_iterate
+#endif
+};
+
+static struct obd_ops filter_sanobd_ops = {
+ o_owner: THIS_MODULE,
+ o_attach: filter_attach,
+ o_detach: filter_detach,
+ o_get_info: filter_get_info,
+ o_setup: filter_san_setup,
+ o_cleanup: filter_cleanup,
+ o_connect: filter_connect,
+ o_disconnect: filter_disconnect,
+ o_statfs: filter_statfs,
+ o_getattr: filter_getattr,
+ o_create: filter_create,
+ o_setattr: filter_setattr,
+ o_destroy: filter_destroy,
+ o_open: filter_open,
+ o_close: filter_close,
+ o_brw: filter_brw,
+ o_punch: filter_truncate,
+ o_preprw: filter_preprw,
+ o_commitrw: filter_commitrw,
+ o_san_preprw: filter_san_preprw,
+#if 0
o_preallocate: filter_preallocate_inodes,
o_migrate: filter_migrate,
o_copy: filter_copy_data,
static int __init obdfilter_init(void)
{
struct lprocfs_static_vars lvars;
+ int rc;
printk(KERN_INFO "Lustre Filtering OBD driver; info@clusterfs.com\n");
filter_open_cache = kmem_cache_create("ll_filter_fdata",
sizeof(struct filter_dentry_data),
0, 0, NULL, NULL);
if (!filter_dentry_cache) {
- kmem_cache_destroy(filter_open_cache);
- RETURN(-ENOMEM);
+ rc = -ENOMEM;
+ goto err1;
}
xprocfs_init ("filter");
lprocfs_init_vars(&lvars);
- return class_register_type(&filter_obd_ops, lvars.module_vars,
- OBD_FILTER_DEVICENAME);
+
+ rc = class_register_type(&filter_obd_ops, lvars.module_vars,
+ OBD_FILTER_DEVICENAME);
+ if (rc)
+ goto err2;
+
+ rc = class_register_type(&filter_sanobd_ops, lvars.module_vars,
+ OBD_FILTER_SAN_DEVICENAME);
+ if (rc)
+ goto err3;
+
+ return 0;
+err3:
+ class_unregister_type(OBD_FILTER_DEVICENAME);
+err2:
+ kmem_cache_destroy(filter_dentry_cache);
+err1:
+ kmem_cache_destroy(filter_open_cache);
+ return rc;
}
static void __exit obdfilter_exit(void)
{
+ class_unregister_type(OBD_FILTER_SAN_DEVICENAME);
class_unregister_type(OBD_FILTER_DEVICENAME);
if (kmem_cache_destroy(filter_dentry_cache))
CERROR("couldn't free obdfilter dentry cache\n");
*/
#define DEBUG_SUBSYSTEM S_CLASS
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
#include <linux/lprocfs_status.h>
#include <linux/obd.h>
static inline int lprocfs_filter_statfs(void *data, struct statfs *sfs)
{
struct obd_device *dev = (struct obd_device *) data;
+ LASSERT(dev != NULL);
return vfs_statfs(dev->u.filter.fo_sb, sfs);
}
void *data)
{
struct obd_device *dev = (struct obd_device *)data;
+ LASSERT(dev != NULL);
return snprintf(page, count, "%s\n", dev->u.filter.fo_fstype);
}
DEFS=
+
+if LIBLUSTRE
+lib_LIBRARIES = libosc.a
+LINX= obd_pack.c client.c
+libosc_a_SOURCES = osc_request.c $(LINX)
+else
MODULE = osc
modulefs_DATA = osc.o
EXTRA_PROGRAMS = osc
-
LINX= obd_pack.c client.c
osc_SOURCES = osc_request.c lproc_osc.c $(LINX)
+endif
obd_pack.c:
test -e obd_pack.c || ln -sf $(top_srcdir)/lib/obd_pack.c
*/
#define DEBUG_SUBSYSTEM S_CLASS
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
#include <linux/obd_class.h>
#include <linux/lprocfs_status.h>
#define EXPORT_SYMTAB
#define DEBUG_SUBSYSTEM S_OSC
+#ifdef __KERNEL__
#include <linux/version.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/lustre_dlm.h>
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
#include <linux/workqueue.h>
+#include <linux/smp_lock.h>
+#else
+#include <linux/locks.h>
#endif
+#else
+#include <liblustre.h>
+#endif
+
#include <linux/kp30.h>
#include <linux/lustre_mds.h> /* for mds_objid */
#include <linux/obd_ost.h>
#include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
#include <linux/lprocfs_status.h>
+/* It is important that ood_fh remain the first item in this structure: that
+ * way, we don't have to re-pack the obdo's inline data before we send it to
+ * the server, we can just send the whole struct unaltered. */
+#define OSC_OBDO_DATA_MAGIC 0xD15EA5ED
+struct osc_obdo_data {
+ struct lustre_handle ood_fh;
+ struct ptlrpc_request *ood_request;
+ __u32 ood_magic;
+};
+#include <linux/obd_lov.h> /* just for the startup assertion; is that wrong? */
+
+static int send_sync(struct obd_import *imp, struct ll_fid *rootfid,
+ int level, int msg_flags)
+{
+ struct ptlrpc_request *req;
+ struct mds_body *body;
+ int rc, size = sizeof(*body);
+ ENTRY;
+
+ req = ptlrpc_prep_req(imp, OST_SYNCFS, 1, &size, NULL);
+ if (!req)
+ GOTO(out, rc = -ENOMEM);
+
+ body = lustre_msg_buf(req->rq_reqmsg, 0);
+ req->rq_level = level;
+ req->rq_replen = lustre_msg_size(1, &size);
+
+ req->rq_reqmsg->flags |= msg_flags;
+ rc = ptlrpc_queue_wait(req);
+
+ if (!rc) {
+ CDEBUG(D_NET, "last_committed="LPU64
+ ", last_xid="LPU64"\n",
+ req->rq_repmsg->last_committed,
+ req->rq_repmsg->last_xid);
+ }
+
+ EXIT;
+ out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+static int signal_completed_replay(struct obd_import *imp)
+{
+ struct ll_fid fid;
+
+ return send_sync(imp, &fid, LUSTRE_CONN_RECOVD, MSG_LAST_REPLAY);
+}
+
static int osc_attach(struct obd_device *dev, obd_count len, void *data)
{
struct lprocfs_static_vars lvars;
RETURN(lsm_size);
}
-inline void oti_from_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
+inline void oti_from_request(struct obd_trans_info *oti,
+ struct ptlrpc_request *req)
{
if (oti && req->rq_repmsg)
oti->oti_transno = NTOH__u64(req->rq_repmsg->transno);
if (!request)
RETURN(-ENOMEM);
-#warning FIXME: request->rq_flags |= PTL_RPC_FL_REPLAY;
+ request->rq_flags |= PTL_RPC_FL_REPLAY;
body = lustre_msg_buf(request->rq_reqmsg, 0);
#warning FIXME: pack only valid fields instead of memcpy, endianness
memcpy(&body->oa, oa, sizeof(*oa));
if (rc)
GOTO(out, rc);
- body = lustre_msg_buf(request->rq_repmsg, 0);
- CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
- if (oa)
+ if (oa) {
+ struct osc_obdo_data ood;
+ body = lustre_msg_buf(request->rq_repmsg, 0);
memcpy(oa, &body->oa, sizeof(*oa));
+ /* If the open succeeded, we better have a handle */
+ /* BlueArc OSTs don't send back (o_valid | FLHANDLE). sigh.
+ * Temporary workaround until fixed. -phil 24 Feb 03 */
+ //LASSERT(oa->o_valid & OBD_MD_FLHANDLE);
+ oa->o_valid |= OBD_MD_FLHANDLE;
+
+ memcpy(&ood.ood_fh, obdo_handle(oa), sizeof(ood.ood_fh));
+ ood.ood_request = ptlrpc_request_addref(request);
+ ood.ood_magic = OSC_OBDO_DATA_MAGIC;
+
+ /* Save this data in the request; it will be passed back to us
+ * in future obdos. This memcpy is guaranteed to be safe,
+ * because we check at compile-time that sizeof(ood) is smaller
+ * than oa->o_inline. */
+ memcpy(&oa->o_inline, &ood, sizeof(ood));
+ }
+
EXIT;
out:
ptlrpc_req_finished(request);
static int osc_close(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *md, struct obd_trans_info *oti)
{
+ struct obd_import *import = class_conn2cliimp(conn);
struct ptlrpc_request *request;
struct ost_body *body;
+ struct osc_obdo_data *ood;
+ unsigned long flags;
int rc, size = sizeof(*body);
ENTRY;
- request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_CLOSE, 1, &size,
- NULL);
+ LASSERT(oa != NULL);
+ ood = (struct osc_obdo_data *)&oa->o_inline;
+ LASSERT(ood->ood_magic == OSC_OBDO_DATA_MAGIC);
+
+ request = ptlrpc_prep_req(import, OST_CLOSE, 1, &size, NULL);
if (!request)
RETURN(-ENOMEM);
request->rq_replen = lustre_msg_size(1, &size);
rc = ptlrpc_queue_wait(request);
- if (rc)
+ if (rc) {
+ /* FIXME: Does this mean that the file is still open locally?
+ * If not, and I somehow suspect not, we need to cleanup
+ * below */
GOTO(out, rc);
+ }
+
+ spin_lock_irqsave(&import->imp_lock, flags);
+ ood->ood_request->rq_flags &= ~PTL_RPC_FL_REPLAY;
+ /* see comments in llite/file.c:ll_mdc_close() */
+ if (ood->ood_request->rq_transno) {
+ LBUG(); /* this can't happen yet */
+ if (!request->rq_transno) {
+ request->rq_transno = ood->ood_request->rq_transno;
+ ptlrpc_retain_replayable_request(request, import);
+ }
+ spin_unlock_irqrestore(&import->imp_lock, flags);
+ } else {
+ spin_unlock_irqrestore(&import->imp_lock, flags);
+ ptlrpc_req_finished(ood->ood_request);
+ }
body = lustre_msg_buf(request->rq_repmsg, 0);
- CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
- if (oa)
- memcpy(oa, &body->oa, sizeof(*oa));
+ memcpy(oa, &body->oa, sizeof(*oa));
EXIT;
out:
struct list_head *tmp;
ENTRY;
- /* This feels wrong to me. */
list_for_each(tmp, &desc->bd_page_list) {
struct ptlrpc_bulk_page *bulk;
bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
EXIT;
}
+/*
+ * This is called when there was a bulk error return. However, we don't know
+ * whether the bulk completed or not. We cancel the portals bulk descriptors,
+ * so that if the OST decides to send them later we don't double free. Then
+ * remove this descriptor from the set so that the set callback doesn't wait
+ * forever for the last CB_PHASE_FINISH to be called, and finally dump all of
+ * the bulk descriptor references.
+ */
+static void osc_ptl_ev_abort(struct ptlrpc_bulk_desc *desc)
+{
+ ENTRY;
+
+ LASSERT(desc->bd_brw_set != NULL);
+
+ ptlrpc_abort_bulk(desc);
+ obd_brw_set_del(desc);
+ unmap_and_decref_bulk_desc(desc);
+
+ EXIT;
+}
+
static int osc_brw_read(struct lustre_handle *conn, struct lov_stripe_md *lsm,
obd_count page_count, struct brw_page *pga,
struct obd_brw_set *set)
struct ptlrpc_bulk_desc *desc = NULL;
struct ost_body *body;
int rc, size[3] = {sizeof(*body)}, mapped = 0;
- unsigned long flags;
struct obd_ioobj *iooptr;
void *nioptr;
__u32 xid;
ENTRY;
+restart_bulk:
size[1] = sizeof(struct obd_ioobj);
size[2] = page_count * sizeof(struct niobuf_remote);
RETURN(-ENOMEM);
body = lustre_msg_buf(request->rq_reqmsg, 0);
+ body->oa.o_valid = HTON__u32(OBD_MD_FLCKSUM * CHECKSUM_BULK);
desc = ptlrpc_prep_bulk(connection);
if (!desc)
ost_pack_ioo(&iooptr, lsm, page_count);
/* end almost identical to brw_write case */
- spin_lock_irqsave(&imp->imp_lock, flags);
- xid = ++imp->imp_last_xid; /* single xid for all pages */
- spin_unlock_irqrestore(&imp->imp_lock, flags);
+ xid = ptlrpc_next_xid(); /* single xid for all pages */
obd_kmap_get(page_count, 0);
for (mapped = 0; mapped < page_count; mapped++) {
struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
- if (bulk == NULL)
- GOTO(out_unmap, rc = -ENOMEM);
+ if (bulk == NULL) {
+ unmap_and_decref_bulk_desc(desc);
+ GOTO(out_req, rc = -ENOMEM);
+ }
bulk->bp_xid = xid; /* single xid for all pages */
* Register the bulk first, because the reply could arrive out of order,
* and we want to be ready for the bulk data.
*
- * One reference is released when brw_finish is complete, the other when
- * the caller removes us from the "set" list.
+ * One reference is released when osc_ptl_ev_hdlr() is called by
+ * portals, the other when the caller removes us from the "set" list.
*
* On error, we never do the brw_finish, so we handle all decrefs.
*/
OBD_FAIL_OSC_BRW_READ_BULK);
} else {
rc = ptlrpc_register_bulk_put(desc);
- if (rc)
- GOTO(out_unmap, rc);
+ if (rc) {
+ unmap_and_decref_bulk_desc(desc);
+ GOTO(out_req, rc);
+ }
obd_brw_set_add(set, desc);
}
+ request->rq_flags |= PTL_RPC_FL_NO_RESEND;
request->rq_replen = lustre_msg_size(1, size);
rc = ptlrpc_queue_wait(request);
- /*
- * XXX: If there is an error during the processing of the callback,
- * such as a timeout in a sleep that it performs, brw_finish
- * will never get called, and we'll leak the desc, fail to kunmap
- * things, cats will live with dogs. One solution would be to
- * export brw_finish as osc_brw_finish, so that the timeout case
- * and its kin could call it for proper cleanup. An alternative
- * would be for an error return from the callback to cause us to
- * clean up, but that doesn't help the truly async cases (like
- * LOV), which will immediately return from their PHASE_START
- * callback, before any such cleanup-requiring error condition can
- * be detected.
- */
+ /* XXX bug 937 here */
+ if (rc == -ETIMEDOUT && (request->rq_flags & PTL_RPC_FL_RESEND)) {
+ DEBUG_REQ(D_HA, request, "BULK TIMEOUT");
+ ptlrpc_req_finished(request);
+ goto restart_bulk;
+ }
+
+ if (rc) {
+ osc_ptl_ev_abort(desc);
+ GOTO(out_req, rc);
+ }
+
+#if CHECKSUM_BULK
+ body = lustre_msg_buf(request->rq_repmsg, 0);
+ if (body->oa.o_valid & NTOH__u32(OBD_MD_FLCKSUM)) {
+ static int cksum_counter;
+ __u64 server_cksum = NTOH__u64(body->oa.o_rdev);
+ __u64 cksum = 0;
+
+ for (mapped = 0; mapped < page_count; mapped++) {
+ char *ptr = kmap(pga[mapped].pg);
+ int off = pga[mapped].off & (PAGE_SIZE - 1);
+ int len = pga[mapped].count;
+
+ LASSERT(off + len <= PAGE_SIZE);
+ ost_checksum(&cksum, ptr + off, len);
+ kunmap(pga[mapped].pg);
+ }
+
+ cksum_counter++;
+ if (server_cksum != cksum) {
+ CERROR("Bad checksum: server "LPX64", client "LPX64
+ ", server NID "LPX64"\n", server_cksum, cksum,
+ imp->imp_connection->c_peer.peer_nid);
+ cksum_counter = 0;
+ } else if ((cksum_counter & (-cksum_counter)) == cksum_counter)
+ CERROR("Checksum %u from "LPX64" OK: "LPX64"\n",
+ cksum_counter,
+ imp->imp_connection->c_peer.peer_nid, cksum);
+ } else {
+ static int cksum_missed;
+ cksum_missed++;
+ if ((cksum_missed & (-cksum_missed)) == cksum_missed)
+ CERROR("Request checksum %u from "LPX64", no reply\n",
+ cksum_missed,
+ imp->imp_connection->c_peer.peer_nid);
+ }
+#endif
+
+ EXIT;
out_req:
ptlrpc_req_finished(request);
- RETURN(rc);
-
- /* Clean up on error. */
-out_unmap:
- while (mapped-- > 0)
- kunmap(pga[mapped].pg);
- obd_kmap_put(page_count);
- ptlrpc_bulk_decref(desc);
- goto out_req;
+ return rc;
}
static int osc_brw_write(struct lustre_handle *conn, struct lov_stripe_md *lsm,
struct ptlrpc_bulk_desc *desc = NULL;
struct ost_body *body;
int rc, size[3] = {sizeof(*body)}, mapped = 0;
- unsigned long flags;
struct obd_ioobj *iooptr;
void *nioptr;
__u32 xid;
+#if CHECKSUM_BULK
+ __u64 cksum = 0;
+#endif
ENTRY;
+restart_bulk:
size[1] = sizeof(struct obd_ioobj);
size[2] = page_count * sizeof(struct niobuf_remote);
ost_pack_ioo(&iooptr, lsm, page_count);
/* end almost identical to brw_read case */
- spin_lock_irqsave(&imp->imp_lock, flags);
- xid = ++imp->imp_last_xid; /* single xid for all pages */
- spin_unlock_irqrestore(&imp->imp_lock, flags);
+ xid = ptlrpc_next_xid(); /* single xid for all pages */
obd_kmap_get(page_count, 0);
for (mapped = 0; mapped < page_count; mapped++) {
struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
- if (bulk == NULL)
- GOTO(out_unmap, rc = -ENOMEM);
+ if (bulk == NULL) {
+ unmap_and_decref_bulk_desc(desc);
+ GOTO(out_req, rc = -ENOMEM);
+ }
bulk->bp_xid = xid; /* single xid for all pages */
bulk->bp_buf = kmap(pga[mapped].pg);
bulk->bp_page = pga[mapped].pg;
- bulk->bp_buflen = PAGE_SIZE;
+ bulk->bp_buflen = pga[mapped].count;
ost_pack_niobuf(&nioptr, pga[mapped].off, pga[mapped].count,
pga[mapped].flag, bulk->bp_xid);
+ ost_checksum(&cksum, bulk->bp_buf, bulk->bp_buflen);
}
+#if CHECKSUM_BULK
+ body->oa.o_rdev = HTON__u64(cksum);
+ body->oa.o_valid |= HTON__u32(OBD_MD_FLCKSUM);
+#endif
/*
* Register the bulk first, because the reply could arrive out of
* order, and we want to be ready for the bulk data.
*/
if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_WRITE_BULK)) {
CERROR("obd_fail_loc=%x, skipping register_bulk\n",
- OBD_FAIL_OSC_BRW_WRITE_BULK);
+ OBD_FAIL_OSC_BRW_WRITE_BULK);
} else {
rc = ptlrpc_register_bulk_get(desc);
- if (rc)
- GOTO(out_unmap, rc);
+ if (rc) {
+ unmap_and_decref_bulk_desc(desc);
+ GOTO(out_req, rc);
+ }
obd_brw_set_add(set, desc);
}
+ request->rq_flags |= PTL_RPC_FL_NO_RESEND;
request->rq_replen = lustre_msg_size(1, size);
rc = ptlrpc_queue_wait(request);
- /*
- * XXX: If there is an error during the processing of the callback,
- * such as a timeout in a sleep that it performs, brw_finish
- * will never get called, and we'll leak the desc, fail to kunmap
- * things, cats will live with dogs. One solution would be to
- * export brw_finish as osc_brw_finish, so that the timeout case
- * and its kin could call it for proper cleanup. An alternative
- * would be for an error return from the callback to cause us to
- * clean up, but that doesn't help the truly async cases (like
- * LOV), which will immediately return from their PHASE_START
- * callback, before any such cleanup-requiring error condition can
- * be detected.
- */
+ /* XXX bug 937 here */
+ if (rc == -ETIMEDOUT && (request->rq_flags & PTL_RPC_FL_RESEND)) {
+ DEBUG_REQ(D_HA, request, "BULK TIMEOUT");
+ ptlrpc_req_finished(request);
+ goto restart_bulk;
+ }
+
+ if (rc) {
+ osc_ptl_ev_abort(desc);
+ GOTO(out_req, rc);
+ }
+
+ EXIT;
out_req:
ptlrpc_req_finished(request);
+ return rc;
+}
+
+#ifndef min_t
+#define min_t(a,b,c) ( b<c ) ? b : c
+#endif
+
+#warning "FIXME: make values dynamic based on get_info at setup (bug 665)"
+#define OSC_BRW_MAX_SIZE 65536
+#define OSC_BRW_MAX_IOV min_t(int, PTL_MD_MAX_IOV, OSC_BRW_MAX_SIZE/PAGE_SIZE)
+
+static int osc_brw(int cmd, struct lustre_handle *conn,
+ struct lov_stripe_md *md, obd_count page_count,
+ struct brw_page *pga, struct obd_brw_set *set,
+ struct obd_trans_info *oti)
+{
+ ENTRY;
+
+ while (page_count) {
+ obd_count pages_per_brw;
+ int rc;
+
+ if (page_count > OSC_BRW_MAX_IOV)
+ pages_per_brw = OSC_BRW_MAX_IOV;
+ else
+ pages_per_brw = page_count;
+
+ if (cmd & OBD_BRW_WRITE)
+ rc = osc_brw_write(conn, md, pages_per_brw, pga,
+ set, oti);
+ else
+ rc = osc_brw_read(conn, md, pages_per_brw, pga, set);
+
+ if (rc != 0)
+ RETURN(rc);
+
+ page_count -= pages_per_brw;
+ pga += pages_per_brw;
+ }
+ RETURN(0);
+}
+
+#ifdef __KERNEL__
+/* Note: caller will lock/unlock, and set uptodate on the pages */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+static int sanosc_brw_read(struct lustre_handle *conn,
+ struct lov_stripe_md *md,
+ obd_count page_count,
+ struct brw_page *pga,
+ struct obd_brw_set *set)
+{
+ struct ptlrpc_request *request = NULL;
+ struct ost_body *body;
+ struct niobuf_remote *remote, *nio_rep;
+ int rc, j, size[3] = {sizeof(*body)}, mapped = 0;
+ struct obd_ioobj *iooptr;
+ void *nioptr;
+ ENTRY;
+
+ size[1] = sizeof(struct obd_ioobj);
+ size[2] = page_count * sizeof(*remote);
+
+ request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_SAN_READ, 3,
+ size, NULL);
+ if (!request)
+ RETURN(-ENOMEM);
+
+ body = lustre_msg_buf(request->rq_reqmsg, 0);
+ iooptr = lustre_msg_buf(request->rq_reqmsg, 1);
+ nioptr = lustre_msg_buf(request->rq_reqmsg, 2);
+ ost_pack_ioo(&iooptr, md, page_count);
+
+ obd_kmap_get(page_count, 0);
+
+ for (mapped = 0; mapped < page_count; mapped++) {
+ LASSERT(PageLocked(pga[mapped].pg));
+
+ kmap(pga[mapped].pg);
+ ost_pack_niobuf(&nioptr, pga[mapped].off, pga[mapped].count,
+ pga[mapped].flag, 0);
+ }
+
+ size[1] = page_count * sizeof(*remote);
+ request->rq_replen = lustre_msg_size(2, size);
+
+ rc = ptlrpc_queue_wait(request);
+ if (rc)
+ GOTO(out_unmap, rc);
+
+ nioptr = lustre_msg_buf(request->rq_repmsg, 1);
+ if (!nioptr)
+ GOTO(out_unmap, rc = -EINVAL);
+
+ if (request->rq_repmsg->buflens[1] != size[1]) {
+ CERROR("buffer length wrong (%d vs. %d)\n",
+ request->rq_repmsg->buflens[1], size[1]);
+ GOTO(out_unmap, rc = -EINVAL);
+ }
+
+ for (j = 0; j < page_count; j++) {
+ ost_unpack_niobuf(&nioptr, &remote);
+ }
+
+ nioptr = lustre_msg_buf(request->rq_repmsg, 1);
+ nio_rep = (struct niobuf_remote*)nioptr;
+
+ /* actual read */
+ for (j = 0; j < page_count; j++) {
+ struct page *page = pga[j].pg;
+ struct buffer_head *bh;
+ kdev_t dev;
+
+ /* got san device associated */
+ LASSERT(class_conn2obd(conn));
+ dev = class_conn2obd(conn)->u.cli.cl_sandev;
+
+ /* hole */
+ if (!nio_rep[j].offset) {
+ CDEBUG(D_PAGE, "hole at ino %lu; index %ld\n",
+ page->mapping->host->i_ino,
+ page->index);
+ memset(page_address(page), 0, PAGE_SIZE);
+ continue;
+ }
+
+ if (!page->buffers) {
+ create_empty_buffers(page, dev, PAGE_SIZE);
+ bh = page->buffers;
+
+ clear_bit(BH_New, &bh->b_state);
+ set_bit(BH_Mapped, &bh->b_state);
+ bh->b_blocknr = (unsigned long)nio_rep[j].offset;
+
+ clear_bit(BH_Uptodate, &bh->b_state);
+
+ ll_rw_block(READ, 1, &bh);
+ } else {
+ bh = page->buffers;
+
+ /* if buffer already existed, it must be the
+ * one we mapped before, check it */
+ LASSERT(!test_bit(BH_New, &bh->b_state));
+ LASSERT(test_bit(BH_Mapped, &bh->b_state));
+ LASSERT(bh->b_blocknr ==
+ (unsigned long)nio_rep[j].offset);
+
+ /* wait it's io completion */
+ if (test_bit(BH_Lock, &bh->b_state))
+ wait_on_buffer(bh);
+
+ if (!test_bit(BH_Uptodate, &bh->b_state))
+ ll_rw_block(READ, 1, &bh);
+ }
+
+
+ /* must do syncronous write here */
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ /* I/O error */
+ rc = -EIO;
+ goto out_unmap;
+ }
+ }
+
+out_req:
+ ptlrpc_req_finished(request);
RETURN(rc);
+out_unmap:
/* Clean up on error. */
+ while (mapped-- > 0)
+ kunmap(pga[mapped].pg);
+
+ obd_kmap_put(page_count);
+
+ goto out_req;
+}
+
+static int sanosc_brw_write(struct lustre_handle *conn,
+ struct lov_stripe_md *md,
+ obd_count page_count,
+ struct brw_page *pga,
+ struct obd_brw_set *set)
+{
+ struct ptlrpc_request *request = NULL;
+ struct ost_body *body;
+ struct niobuf_remote *remote, *nio_rep;
+ int rc, j, size[3] = {sizeof(*body)}, mapped = 0;
+ struct obd_ioobj *iooptr;
+ void *nioptr;
+ ENTRY;
+
+ size[1] = sizeof(struct obd_ioobj);
+ size[2] = page_count * sizeof(*remote);
+
+ request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_SAN_WRITE,
+ 3, size, NULL);
+ if (!request)
+ RETURN(-ENOMEM);
+
+ body = lustre_msg_buf(request->rq_reqmsg, 0);
+ iooptr = lustre_msg_buf(request->rq_reqmsg, 1);
+ nioptr = lustre_msg_buf(request->rq_reqmsg, 2);
+ ost_pack_ioo(&iooptr, md, page_count);
+
+ /* map pages, and pack request */
+ obd_kmap_get(page_count, 0);
+ for (mapped = 0; mapped < page_count; mapped++) {
+ LASSERT(PageLocked(pga[mapped].pg));
+
+ kmap(pga[mapped].pg);
+ ost_pack_niobuf(&nioptr, pga[mapped].off, pga[mapped].count,
+ pga[mapped].flag, 0);
+ }
+
+ size[1] = page_count * sizeof(*remote);
+ request->rq_replen = lustre_msg_size(2, size);
+
+ rc = ptlrpc_queue_wait(request);
+ if (rc)
+ GOTO(out_unmap, rc);
+
+ nioptr = lustre_msg_buf(request->rq_repmsg, 1);
+ if (!nioptr)
+ GOTO(out_unmap, rc = -EINVAL);
+
+ if (request->rq_repmsg->buflens[1] != size[1]) {
+ CERROR("buffer length wrong (%d vs. %d)\n",
+ request->rq_repmsg->buflens[1], size[1]);
+ GOTO(out_unmap, rc = -EINVAL);
+ }
+
+ for (j = 0; j < page_count; j++) {
+ ost_unpack_niobuf(&nioptr, &remote);
+ }
+
+ nioptr = lustre_msg_buf(request->rq_repmsg, 1);
+ nio_rep = (struct niobuf_remote*)nioptr;
+
+ /* actual write */
+ for (j = 0; j < page_count; j++) {
+ struct page *page = pga[j].pg;
+ struct buffer_head *bh;
+ kdev_t dev;
+
+ /* got san device associated */
+ LASSERT(class_conn2obd(conn));
+ dev = class_conn2obd(conn)->u.cli.cl_sandev;
+
+ if (!page->buffers) {
+ create_empty_buffers(page, dev, PAGE_SIZE);
+ } else {
+ /* checking */
+ LASSERT(!test_bit(BH_New, &page->buffers->b_state));
+ LASSERT(test_bit(BH_Mapped, &page->buffers->b_state));
+ LASSERT(page->buffers->b_blocknr ==
+ (unsigned long)nio_rep[j].offset);
+ }
+ bh = page->buffers;
+
+ LASSERT(bh);
+
+ /* if buffer locked, wait it's io completion */
+ if (test_bit(BH_Lock, &bh->b_state))
+ wait_on_buffer(bh);
+
+ clear_bit(BH_New, &bh->b_state);
+ set_bit(BH_Mapped, &bh->b_state);
+
+ /* override the block nr */
+ bh->b_blocknr = (unsigned long)nio_rep[j].offset;
+
+ /* we are about to write it, so set it
+ * uptodate/dirty
+ * page lock should garentee no race condition here */
+ set_bit(BH_Uptodate, &bh->b_state);
+ set_bit(BH_Dirty, &bh->b_state);
+
+ ll_rw_block(WRITE, 1, &bh);
+
+ /* must do syncronous write here */
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh) || test_bit(BH_Dirty, &bh->b_state)) {
+ /* I/O error */
+ rc = -EIO;
+ goto out_unmap;
+ }
+ }
+
+out_req:
+ ptlrpc_req_finished(request);
+ RETURN(rc);
+
out_unmap:
+ /* Clean up on error. */
while (mapped-- > 0)
kunmap(pga[mapped].pg);
+
obd_kmap_put(page_count);
- ptlrpc_bulk_decref(desc);
+
goto out_req;
}
+#else
+static int sanosc_brw_read(struct lustre_handle *conn,
+ struct lov_stripe_md *md,
+ obd_count page_count,
+ struct brw_page *pga,
+ struct obd_brw_set *set)
+{
+ LBUG();
+ return 0;
+}
-static int osc_brw(int cmd, struct lustre_handle *conn,
- struct lov_stripe_md *md, obd_count page_count,
- struct brw_page *pga, struct obd_brw_set *set,
- struct obd_trans_info *oti)
+static int sanosc_brw_write(struct lustre_handle *conn,
+ struct lov_stripe_md *md,
+ obd_count page_count,
+ struct brw_page *pga,
+ struct obd_brw_set *set)
+{
+ LBUG();
+ return 0;
+}
+#endif
+
+static int sanosc_brw(int cmd, struct lustre_handle *conn,
+ struct lov_stripe_md *md, obd_count page_count,
+ struct brw_page *pga, struct obd_brw_set *set,
+ struct obd_trans_info *oti)
{
ENTRY;
obd_count pages_per_brw;
int rc;
- if (page_count > PTL_MD_MAX_IOV)
- pages_per_brw = PTL_MD_MAX_IOV;
+ if (page_count > OSC_BRW_MAX_IOV)
+ pages_per_brw = OSC_BRW_MAX_IOV;
else
pages_per_brw = page_count;
if (cmd & OBD_BRW_WRITE)
- rc = osc_brw_write(conn, md, pages_per_brw, pga, set, oti);
+ rc = sanosc_brw_write(conn, md, pages_per_brw,
+ pga, set);
else
- rc = osc_brw_read(conn, md, pages_per_brw, pga, set);
+ rc = sanosc_brw_read(conn, md, pages_per_brw, pga, set);
if (rc != 0)
RETURN(rc);
}
RETURN(0);
}
+#endif
static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
struct lustre_handle *parent_lock,
GOTO(out, err = -EINVAL);
}
- if (data->ioc_inllen2 < sizeof(uuid.uuid)) {
+ if (data->ioc_inllen2 < sizeof(uuid)) {
OBD_FREE(buf, len);
GOTO(out, err = -EINVAL);
}
desc->ld_default_stripe_size = 0;
desc->ld_default_stripe_offset = 0;
desc->ld_pattern = 0;
- memcpy(desc->ld_uuid.uuid, obddev->obd_uuid.uuid, sizeof(uuid.uuid));
+ memcpy(&desc->ld_uuid, &obddev->obd_uuid, sizeof(uuid));
- memcpy(data->ioc_inlbuf2, obddev->obd_uuid.uuid,
- sizeof(uuid.uuid));
+ memcpy(data->ioc_inlbuf2, &obddev->obd_uuid, sizeof(uuid));
err = copy_to_user((void *)uarg, buf, len);
if (err)
fakeconn.addr = (__u64)(unsigned long)exp;
fakeconn.cookie = exp->exp_cookie;
- ioc_data.ioc_inlbuf1 = &imp->imp_obd->u.cli.cl_target_uuid;
+ ioc_data.ioc_inlbuf1 =
+ (char *)&imp->imp_obd->u.cli.cl_target_uuid;
ioc_data.ioc_offset = active;
rc = obd_iocontrol(IOC_LOV_SET_OSC_ACTIVE, &fakeconn,
sizeof ioc_data, &ioc_data, NULL);
- if (rc) {
- CERROR("disabling %s on LOV %p/%s: %d\n",
+ if (rc)
+ CERROR("error disabling %s on LOV %p/%s: %d\n",
imp->imp_obd->u.cli.cl_target_uuid.uuid,
notify_obd, notify_obd->obd_uuid.uuid, rc);
- }
} else {
CDEBUG(D_HA, "No exports for obd %p/%s, can't notify about "
"%p\n", notify_obd, notify_obd->obd_uuid.uuid,
{
int rc;
unsigned long flags;
+ int msg_flags;
struct ptlrpc_request *req;
+ struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
ENTRY;
+ CDEBUG(D_HA, "%s: entering phase: %d\n",
+ imp->imp_obd->obd_name, phase);
switch(phase) {
case PTLRPC_RECOVD_PHASE_PREPARE: {
- struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
- ldlm_namespace_cleanup(ns, 1 /* no network ops */);
- ptlrpc_abort_inflight(imp, 0);
- set_osc_active(imp, 0 /* inactive */);
+ if (imp->imp_flags & IMP_REPLAYABLE) {
+ CDEBUG(D_HA, "failover OST\n");
+ /* If we're a failover OSC/OST, just cancel unused
+ * locks to simplify lock replay.
+ */
+ ldlm_cli_cancel_unused(ns, NULL, LDLM_FL_LOCAL_ONLY);
+ } else {
+ CDEBUG(D_HA, "non-failover OST\n");
+ /* Non-failover OSTs (LLNL scenario) disable the OSC
+ * and invalidate local state.
+ */
+ ldlm_namespace_cleanup(ns, 1 /* no network ops */);
+ ptlrpc_abort_inflight(imp, 0);
+ set_osc_active(imp, 0 /* inactive */);
+ }
RETURN(0);
}
- case PTLRPC_RECOVD_PHASE_RECOVER:
+ case PTLRPC_RECOVD_PHASE_RECOVER: {
+ reconnect:
imp->imp_flags &= ~IMP_INVALID;
rc = ptlrpc_reconnect_import(imp, OST_CONNECT, &req);
- ptlrpc_req_finished(req);
+
+ msg_flags = req->rq_repmsg
+ ? lustre_msg_get_op_flags(req->rq_repmsg)
+ : 0;
+
+ if (rc == -EBUSY && (msg_flags & MSG_CONNECT_RECOVERING))
+ CERROR("reconnect denied by recovery; should retry\n");
+
if (rc) {
+ if (phase != PTLRPC_RECOVD_PHASE_NOTCONN) {
+ CERROR("can't reconnect, invalidating\n");
+ ldlm_namespace_cleanup(ns, 1);
+ ptlrpc_abort_inflight(imp, 0);
+ }
imp->imp_flags |= IMP_INVALID;
+ ptlrpc_req_finished(req);
RETURN(rc);
}
+ if (msg_flags & MSG_CONNECT_RECOVERING) {
+ /* Replay if they want it. */
+ DEBUG_REQ(D_HA, req, "OST wants replay");
+ rc = ptlrpc_replay(imp);
+ if (rc)
+ GOTO(check_rc, rc);
+
+ rc = ldlm_replay_locks(imp);
+ if (rc)
+ GOTO(check_rc, rc);
+
+ rc = signal_completed_replay(imp);
+ if (rc)
+ GOTO(check_rc, rc);
+ } else if (msg_flags & MSG_CONNECT_RECONNECT) {
+ DEBUG_REQ(D_HA, req, "reconnecting to MDS\n");
+ /* Nothing else to do here. */
+ } else {
+ DEBUG_REQ(D_HA, req, "evicted: invalidating\n");
+ /* Otherwise, clean everything up. */
+ ldlm_namespace_cleanup(ns, 1);
+ ptlrpc_abort_inflight(imp, 0);
+ }
+
+ ptlrpc_req_finished(req);
+
spin_lock_irqsave(&imp->imp_lock, flags);
imp->imp_level = LUSTRE_CONN_FULL;
+ imp->imp_flags &= ~IMP_INVALID;
spin_unlock_irqrestore(&imp->imp_lock, flags);
/* Is this the right place? Should we do this in _PREPARE
*/
ptlrpc_wake_delayed(imp);
+ rc = ptlrpc_resend(imp);
+ if (rc)
+ GOTO(check_rc, rc);
+
set_osc_active(imp, 1 /* active */);
RETURN(0);
+ check_rc:
+ /* If we get disconnected in the middle, recovery has probably
+ * failed. Reconnect and find out.
+ */
+ if (rc == -ENOTCONN)
+ goto reconnect;
+ RETURN(rc);
+ }
case PTLRPC_RECOVD_PHASE_NOTCONN:
osc_recover(imp, PTLRPC_RECOVD_PHASE_PREPARE);
RETURN(osc_recover(imp, PTLRPC_RECOVD_PHASE_RECOVER));
o_iocontrol: osc_iocontrol
};
-static int __init osc_init(void)
+struct obd_ops sanosc_obd_ops = {
+ o_owner: THIS_MODULE,
+ o_attach: osc_attach,
+ o_detach: osc_detach,
+ o_cleanup: client_obd_cleanup,
+ o_connect: osc_connect,
+ o_disconnect: client_obd_disconnect,
+ o_statfs: osc_statfs,
+ o_packmd: osc_packmd,
+ o_unpackmd: osc_unpackmd,
+ o_create: osc_create,
+ o_destroy: osc_destroy,
+ o_getattr: osc_getattr,
+ o_setattr: osc_setattr,
+ o_open: osc_open,
+ o_close: osc_close,
+#ifdef __KERNEL__
+ o_setup: client_sanobd_setup,
+ o_brw: sanosc_brw,
+#endif
+ o_punch: osc_punch,
+ o_enqueue: osc_enqueue,
+ o_cancel: osc_cancel,
+ o_cancel_unused: osc_cancel_unused,
+ o_iocontrol: osc_iocontrol,
+};
+
+int __init osc_init(void)
{
struct lprocfs_static_vars lvars;
+ int rc;
+ ENTRY;
+
+ LASSERT(sizeof(struct osc_obdo_data) <= FD_OSTDATA_SIZE);
lprocfs_init_vars(&lvars);
- RETURN(class_register_type(&osc_obd_ops, lvars.module_vars,
- LUSTRE_OSC_NAME));
+
+ rc = class_register_type(&osc_obd_ops, lvars.module_vars,
+ LUSTRE_OSC_NAME);
+ if (rc)
+ RETURN(rc);
+
+ rc = class_register_type(&sanosc_obd_ops, lvars.module_vars,
+ LUSTRE_SANOSC_NAME);
+ if (rc)
+ class_unregister_type(LUSTRE_OSC_NAME);
+
+ RETURN(rc);
}
static void __exit osc_exit(void)
{
+ class_unregister_type(LUSTRE_SANOSC_NAME);
class_unregister_type(LUSTRE_OSC_NAME);
}
+#ifdef __KERNEL__
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
MODULE_DESCRIPTION("Lustre Object Storage Client (OSC)");
MODULE_LICENSE("GPL");
module_init(osc_init);
module_exit(osc_exit);
+#endif
#include <linux/init.h>
#include <linux/lprocfs_status.h>
+inline void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
+{
+ if (oti && req->rq_repmsg)
+ req->rq_repmsg->transno = HTON__u64(oti->oti_transno);
+ EXIT;
+}
static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti)
{
RETURN(0);
}
+static int ost_syncfs(struct ptlrpc_request *req)
+{
+ struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
+ struct obd_statfs *osfs;
+ int rc, size = sizeof(*osfs);
+ ENTRY;
+
+ rc = lustre_pack_msg(0, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+ if (rc)
+ RETURN(rc);
+
+ rc = obd_syncfs(conn);
+ if (rc) {
+ CERROR("ost: syncfs failed: rc %d\n", rc);
+ req->rq_status = rc;
+ RETURN(rc);
+ }
+
+ RETURN(0);
+}
+
static int ost_open(struct ptlrpc_request *req, struct obd_trans_info *oti)
{
struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
void *desc_priv = NULL;
int cmd, i, j, objcount, niocount, size = sizeof(*body);
int rc = 0;
+#if CHECKSUM_BULK
+ __u64 cksum = 0;
+#endif
ENTRY;
body = lustre_msg_buf(req->rq_reqmsg, 0);
if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK))
GOTO(out, req->rq_status = -EIO);
+ /* Hmm, we don't return anything in this reply buffer?
+ * We should be returning per-page status codes and also
+ * per-object size, blocks count, mtime, ctime. (bug 593) */
+ rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+ if (rc)
+ GOTO(out, req->rq_status = rc);
+
for (i = 0; i < objcount; i++) {
ost_unpack_ioo(&tmp1, &ioo);
if (tmp2 + ioo->ioo_bufcnt > end2) {
bulk->bp_xid = remote_nb[i].xid;
bulk->bp_buf = local_nb[i].addr;
bulk->bp_buflen = remote_nb[i].len;
+ if (body->oa.o_valid & NTOH__u32(OBD_MD_FLCKSUM))
+ ost_checksum(&cksum, bulk->bp_buf, bulk->bp_buflen);
}
rc = ptlrpc_bulk_put(desc);
out_local:
OBD_FREE(local_nb, sizeof(*local_nb) * niocount);
out:
- if (!rc)
- /* Hmm, we don't return anything in this reply buffer?
- * We should be returning per-page status codes and also
- * per-object size, blocks count, mtime, ctime. (bug 593) */
- rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
- &req->rq_repmsg);
if (rc)
ptlrpc_error(req->rq_svc, req);
- else
+ else {
+#if CHECKSUM_BULK
+ body = lustre_msg_buf(req->rq_repmsg, 0);
+ body->oa.o_rdev = HTON__u64(cksum);
+ body->oa.o_valid |= HTON__u32(OBD_MD_FLCKSUM);
+#endif
ptlrpc_reply(req->rq_svc, req);
+ }
+
RETURN(rc);
}
}
}
- OBD_ALLOC(local_nb, sizeof(*local_nb)* niocount);
+ OBD_ALLOC(local_nb, sizeof(*local_nb) * niocount);
if (local_nb == NULL)
GOTO(out, rc = -ENOMEM);
remote_nb, local_nb, &desc_priv, oti);
if (req->rq_status)
- GOTO(out, rc = 0);
+ GOTO(out_local, rc = 0);
desc = ptlrpc_prep_bulk(req->rq_connection);
if (desc == NULL)
GOTO(out_bulk, rc);
}
+#if CHECKSUM_BULK
+ if ((body->oa.o_valid & NTOH__u32(OBD_MD_FLCKSUM))) {
+ static int cksum_counter;
+ __u64 client_cksum = NTOH__u64(body->oa.o_rdev);
+ __u64 cksum = 0;
+
+ for (i = 0; i < niocount; i++) {
+ char *ptr = kmap(local_nb[i].page);
+ int off = local_nb[i].offset & (PAGE_SIZE - 1);
+ int len = local_nb[i].len;
+
+ LASSERT(off + len <= PAGE_SIZE);
+ ost_checksum(&cksum, ptr + off, len);
+ kunmap(local_nb[i].page);
+ }
+
+ if (client_cksum != cksum) {
+ CERROR("Bad checksum: client "LPX64", server "LPX64
+ ", client NID "LPX64"\n", client_cksum, cksum,
+ req->rq_connection->c_peer.peer_nid);
+ cksum_counter = 1;
+ } else {
+ cksum_counter++;
+ if ((cksum_counter & (-cksum_counter)) == cksum_counter)
+ CERROR("Checksum %d from "LPX64": "LPX64" OK\n",
+ cksum_counter,
+ req->rq_connection->c_peer.peer_nid,
+ cksum);
+ }
+ }
+#endif
+
req->rq_status = obd_commitrw(cmd, conn, objcount, ioo, niocount,
local_nb, desc_priv, oti);
&req->rq_repmsg);
if (rc)
ptlrpc_error(req->rq_svc, req);
- else
+ else {
+ oti_to_request(oti, req);
rc = ptlrpc_reply(req->rq_svc, req);
+ }
RETURN(rc);
}
-inline void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
+static int ost_san_brw(struct ptlrpc_request *req, int alloc)
{
- if (oti && req->rq_repmsg)
- req->rq_repmsg->transno = HTON__u64(oti->oti_transno);
- EXIT;
+ struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
+ struct niobuf_remote *remote_nb, *res_nb;
+ struct obd_ioobj *ioo;
+ struct ost_body *body;
+ int cmd, rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
+ void *tmp1, *tmp2, *end2;
+ ENTRY;
+
+ body = lustre_msg_buf(req->rq_reqmsg, 0);
+ tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
+ tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
+ end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
+ objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
+ niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
+
+ cmd = alloc ? OBD_BRW_WRITE : OBD_BRW_READ;
+
+ for (i = 0; i < objcount; i++) {
+ ost_unpack_ioo((void *)&tmp1, &ioo);
+ if (tmp2 + ioo->ioo_bufcnt > end2) {
+ rc = -EFAULT;
+ break;
+ }
+ for (j = 0; j < ioo->ioo_bufcnt; j++)
+ ost_unpack_niobuf((void *)&tmp2, &remote_nb);
+ }
+
+ size[1] = niocount * sizeof(*remote_nb);
+ rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg);
+ if (rc)
+ GOTO(out, rc);
+
+ /* The unpackers move tmp1 and tmp2, so reset them before using */
+ tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
+ tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
+
+ req->rq_status = obd_san_preprw(cmd, conn, objcount, tmp1,
+ niocount, tmp2);
+
+ if (req->rq_status) {
+ rc = 0;
+ goto out;
+ }
+
+ remote_nb = lustre_msg_buf(req->rq_repmsg, 1);
+ res_nb = lustre_msg_buf(req->rq_reqmsg, 2);
+ for (i = 0; i < niocount; i++) {
+ /* this advances remote_nb */
+ ost_pack_niobuf((void **)&remote_nb,
+ res_nb[i].offset,
+ res_nb[i].len, /* 0 */
+ res_nb[i].flags, /* 0 */
+ res_nb[i].xid
+ );
+ }
+
+ rc = 0;
+
+out:
+ if (rc) {
+ OBD_FREE(req->rq_repmsg, req->rq_replen);
+ req->rq_repmsg = NULL;
+ ptlrpc_error(req->rq_svc, req);
+ } else
+ ptlrpc_reply(req->rq_svc, req);
+
+ return rc;
+}
+
+static int filter_recovery_request(struct ptlrpc_request *req,
+ struct obd_device *obd, int *process)
+{
+ switch (req->rq_reqmsg->opc) {
+ case OST_CONNECT: /* This will never get here, but for completeness. */
+ case OST_DISCONNECT:
+ *process = 1;
+ RETURN(0);
+
+ case OST_CLOSE:
+ case OST_CREATE:
+ case OST_DESTROY:
+ case OST_OPEN:
+ case OST_PUNCH:
+ case OST_SETATTR:
+ case OST_SYNCFS:
+ case OST_WRITE:
+ case LDLM_ENQUEUE:
+ *process = target_queue_recovery_request(req, obd);
+ RETURN(0);
+
+ default:
+ DEBUG_REQ(D_ERROR, req, "not permitted during recovery");
+ *process = 0;
+ /* XXX what should we set rq_status to here? */
+ RETURN(ptlrpc_error(req->rq_svc, req));
+ }
}
static int ost_handle(struct ptlrpc_request *req)
{
struct obd_trans_info trans_info = { 0, }, *oti = &trans_info;
- int rc;
+ int should_process, rc;
ENTRY;
rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
GOTO(out, rc);
}
- if (req->rq_reqmsg->opc != OST_CONNECT && req->rq_export == NULL) {
- CERROR("lustre_ost: operation %d on unconnected OST\n",
- req->rq_reqmsg->opc);
- req->rq_status = -ENOTCONN;
- GOTO(out, rc = -ENOTCONN);
- }
+ if (req->rq_reqmsg->opc != OST_CONNECT) {
+ struct obd_device *obd;
+
+ if (req->rq_export == NULL) {
+ CERROR("lustre_ost: operation %d on unconnected OST\n",
+ req->rq_reqmsg->opc);
+ req->rq_status = -ENOTCONN;
+ GOTO(out, rc = -ENOTCONN);
+ }
+
+ obd = req->rq_export->exp_obd;
+
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ if (obd->obd_flags & OBD_ABORT_RECOVERY)
+ target_abort_recovery(obd);
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+
+ if (obd->obd_flags & OBD_RECOVERING) {
+ rc = filter_recovery_request(req, obd, &should_process);
+ if (rc || !should_process)
+ RETURN(rc);
+ } else if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+#if 0
+/* need to store this reply somewhere... */
+ if (req->rq_xid == med->med_last_xid) {
+ DEBUG_REQ(D_HA, req, "resending reply");
+ OBD_ALLOC(req->rq_repmsg, med->med_last_replen);
+ req->rq_replen = med->med_last_replen;
+ memcpy(req->rq_repmsg, med->med_last_reply,
+ req->rq_replen);
+ ptlrpc_reply(req->rq_svc, req);
+ return 0;
+ }
+ DEBUG_REQ(D_HA, req, "no reply for resend, continuing");
+#endif
+ }
+
+ }
if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0)
GOTO(out, rc = -EINVAL);
case OST_CONNECT:
CDEBUG(D_INODE, "connect\n");
OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0);
- rc = target_handle_connect(req);
+ rc = target_handle_connect(req, ost_handle);
break;
case OST_DISCONNECT:
CDEBUG(D_INODE, "disconnect\n");
rc = ost_brw_read(req);
/* ost_brw sends its own replies */
RETURN(rc);
+ case OST_SAN_READ:
+ CDEBUG(D_INODE, "san read\n");
+ OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
+ rc = ost_san_brw(req, 0);
+ /* ost_san_brw sends its own replies */
+ RETURN(rc);
+ case OST_SAN_WRITE:
+ CDEBUG(D_INODE, "san write\n");
+ OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
+ rc = ost_san_brw(req, 1);
+ /* ost_san_brw sends its own replies */
+ RETURN(rc);
case OST_PUNCH:
CDEBUG(D_INODE, "punch\n");
OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
OBD_FAIL_RETURN(OBD_FAIL_OST_STATFS_NET, 0);
rc = ost_statfs(req);
break;
+ case OST_SYNCFS:
+ CDEBUG(D_INODE, "sync\n");
+ OBD_FAIL_RETURN(OBD_FAIL_OST_SYNCFS_NET, 0);
+ rc = ost_syncfs(req);
+ break;
case LDLM_ENQUEUE:
CDEBUG(D_INODE, "enqueue\n");
OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
}
out:
- //req->rq_status = rc;
+ if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) {
+ struct obd_device *obd = req->rq_export->exp_obd;
+
+ if (obd && (obd->obd_flags & OBD_RECOVERING)) {
+ DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply");
+ return target_queue_final_reply(req, rc);
+ }
+ /* Lost a race with recovery; let the error path DTRT. */
+ rc = req->rq_status = -ENOTCONN;
+ }
+
if (rc) {
CERROR("ost: processing error (opcode=%d): %d\n",
req->rq_reqmsg->opc, rc);
static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
{
struct ost_obd *ost = &obddev->u.ost;
- struct obd_uuid self = { "self" };
int err;
int i;
ENTRY;
ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS,
OST_BUFSIZE, OST_MAXREQSIZE,
OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
- &self, ost_handle, "ost");
+ ost_handle, "ost");
if (!ost->ost_service) {
CERROR("failed to start service\n");
GOTO(error_disc, err = -ENOMEM);
return lprocfs_obd_detach(dev);
}
-/* This is so similar to mds_connect that it makes my heart weep: we should
- * shuffle the UUID into obd_export proper and make this all happen in
- * target_handle_connect.
+/* I don't think this function is ever used, since nothing
+ * connects directly to this module.
*/
static int ost_connect(struct lustre_handle *conn,
struct obd_device *obd, struct obd_uuid *cluuid,
ptlrpc_recovery_cb_t recover)
{
struct obd_export *exp;
- struct ost_export_data *oed;
- struct list_head *p;
int rc;
ENTRY;
if (!conn || !obd || !cluuid)
RETURN(-EINVAL);
- /* lctl gets a backstage, all-access pass. */
- if (!strcmp(cluuid->uuid, "OBD_CLASS_UUID"))
- goto dont_check_exports;
-
- spin_lock(&obd->obd_dev_lock);
- list_for_each(p, &obd->obd_exports) {
- exp = list_entry(p, struct obd_export, exp_obd_chain);
- oed = &exp->exp_ost_data;
- if (!memcmp(cluuid->uuid, oed->oed_uuid.uuid,
- sizeof(oed->oed_uuid.uuid))) {
- spin_unlock(&obd->obd_dev_lock);
- LASSERT(exp->exp_obd == obd);
-
- RETURN(target_handle_reconnect(conn, exp, cluuid));
- }
- }
-
- dont_check_exports:
rc = class_connect(conn, obd, cluuid);
if (rc)
RETURN(rc);
exp = class_conn2export(conn);
LASSERT(exp);
- oed = &exp->exp_ost_data;
- memcpy(oed->oed_uuid.uuid, cluuid->uuid, sizeof(oed->oed_uuid.uuid));
-
RETURN(0);
}
* from client_obd_connect.. *shrug*
*/
INIT_LIST_HEAD(&imp->imp_chain);
- imp->imp_last_xid = 0;
imp->imp_max_transno = 0;
imp->imp_peer_committed_transno = 0;
imp->imp_level = LUSTRE_CONN_FULL;
RETURN(rc);
INIT_LIST_HEAD(&imp->imp_chain);
- imp->imp_last_xid = 0;
imp->imp_max_transno = 0;
- imp->imp_peer_last_xid = 0;
imp->imp_peer_committed_transno = 0;
imp->imp_level = LUSTRE_CONN_FULL;
struct ptlrpc_request *req;
struct ptlrpc_bulk_desc *desc;
struct buffer_head *bh;
- unsigned long flags;
unsigned int page_count;
int rc, rep_size, size[2];
__u32 xid;
desc->bd_portal = PTLBD_BULK_PORTAL;
desc->bd_ptl_ev_hdlr = NULL;
- spin_lock_irqsave(&imp->imp_lock, flags);
- xid = ++imp->imp_last_xid;
- spin_unlock_irqrestore(&imp->imp_lock, flags);
+ xid = ptlrpc_next_xid();
for ( niob = niobs, bh = first_bh ; bh ; bh = bh->b_next, niob++ ) {
struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
static int ptlbd_sv_setup(struct obd_device *obddev, obd_count len, void *buf)
{
- struct obd_uuid self_uuid = { "self" };
struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
int rc;
ENTRY;
ptlbd->ptlbd_service =
ptlrpc_init_svc(PTLBD_NEVENTS, PTLBD_NBUFS, PTLBD_BUFSIZE,
PTLBD_MAXREQSIZE, PTLBD_REQUEST_PORTAL,
- PTLBD_REPLY_PORTAL, &self_uuid,
+ PTLBD_REPLY_PORTAL,
ptlbd_parse_req, "ptlbd_sv");
if (ptlbd->ptlbd_service == NULL)
DEFS=
+if LIBLUSTRE
+lib_LIBRARIES = libptlrpc.a
+libptlrpc_a_SOURCES = client.c niobuf.c pack_generic.c recovd.c recover.c connection.c rpc.c events.c # lproc_ptlrpc.c service.c
+else
MODULE = ptlrpc
modulefs_DATA = ptlrpc.o
EXTRA_PROGRAMS = ptlrpc
ptlrpc_SOURCES = recovd.c recover.c connection.c rpc.c events.c service.c client.c niobuf.c pack_generic.c lproc_ptlrpc.c
+endif
include $(top_srcdir)/Rules
*/
#define DEBUG_SUBSYSTEM S_RPC
+#ifndef __KERNEL__
+#include <errno.h>
+#include <signal.h>
+#include <liblustre.h>
+#endif
#include <linux/obd_support.h>
#include <linux/obd_class.h>
struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
{
struct ptlrpc_connection *c;
- struct lustre_peer peer;
+ struct ptlrpc_peer peer;
int err;
- err = kportal_uuid_to_peer(uuid->uuid, &peer);
+ err = ptlrpc_uuid_to_peer(uuid, &peer);
if (err != 0) {
CERROR("cannot find peer %s!\n", uuid->uuid);
return NULL;
void ptlrpc_readdress_connection(struct ptlrpc_connection *conn,struct obd_uuid *uuid)
{
- struct lustre_peer peer;
+ struct ptlrpc_peer peer;
int err;
- err = kportal_uuid_to_peer(uuid->uuid, &peer);
+ err = ptlrpc_uuid_to_peer (uuid, &peer);
if (err != 0) {
CERROR("cannot find peer %s!\n", uuid->uuid);
return;
}
- memcpy(&conn->c_peer, &peer, sizeof(peer));
+ memcpy (&conn->c_peer, &peer, sizeof (peer));
return;
}
LASSERT(desc->bd_connection);
- /* If PtlMDUnlink succeeds, then it hasn't completed yet. If it
- * fails, the bulk finished _just_ in time (after the timeout
- * fired but before we got this far) and we'll let it live.
+ /* If PtlMDUnlink succeeds, then bulk I/O on the MD hasn't
+ * even started yet. XXX where do we kunmup the thing?
+ *
+ * If it fail with PTL_MD_BUSY, then the network is still
+ * reading/writing the buffers and we must wait for it to
+ * complete (which it will within finite time, most
+ * probably with failure; we really need portals error
+ * events to detect that).
+ *
+ * Otherwise (PTL_INV_MD) it completed after the bd_flags
+ * test above!
*/
- if (PtlMDUnlink(desc->bd_md_h) != 0) {
+ if (PtlMDUnlink(desc->bd_md_h) != PTL_OK) {
CERROR("Near-miss on OST %s -- need to adjust "
"obd_timeout?\n",
desc->bd_connection->c_remote_uuid.uuid);
return;
}
+ /* We must take it off the imp_replay_list first. Otherwise, we'll set
+ * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
+ if (request->rq_import) {
+ unsigned long flags = 0;
+ if (!locked)
+ spin_lock_irqsave(&request->rq_import->imp_lock, flags);
+ list_del_init(&request->rq_list);
+ if (!locked)
+ spin_unlock_irqrestore(&request->rq_import->imp_lock,
+ flags);
+ }
+
if (atomic_read(&request->rq_refcount) != 0) {
CERROR("freeing request %p (%d->%s:%d) with refcount %d\n",
request, request->rq_reqmsg->opc,
request->rq_connection->c_remote_uuid.uuid,
request->rq_import->imp_client->cli_request_portal,
atomic_read (&request->rq_refcount));
- /* LBUG(); */
+ LBUG();
}
if (request->rq_repmsg != NULL) {
request->rq_reqmsg = NULL;
}
- if (request->rq_import) {
- unsigned long flags = 0;
- if (!locked)
- spin_lock_irqsave(&request->rq_import->imp_lock, flags);
- list_del_init(&request->rq_list);
- if (!locked)
- spin_unlock_irqrestore(&request->rq_import->imp_lock,
- flags);
- }
-
ptlrpc_put_connection(request->rq_connection);
OBD_FREE(request, sizeof(*request));
EXIT;
}
if (req->rq_flags & PTL_RPC_FL_RESEND) {
- ENTRY;
DEBUG_REQ(D_ERROR, req, "RESEND:");
GOTO(out, rc = 1);
}
}
/* Abort this request and cleanup any resources associated with it. */
-static int ptlrpc_abort(struct ptlrpc_request *request)
+int ptlrpc_abort(struct ptlrpc_request *request)
{
/* First remove the ME for the reply; in theory, this means
* that we can tear down the buffer safely. */
LASSERT(spin_is_locked(&imp->imp_lock));
#endif
- CDEBUG(D_HA, "committing for last_committed "LPU64"\n",
- imp->imp_peer_committed_transno);
+ CDEBUG(D_HA, "%s: committing for last_committed "LPU64"\n",
+ imp->imp_obd->obd_name, imp->imp_peer_committed_transno);
list_for_each_safe(tmp, saved, &imp->imp_replay_list) {
req = list_entry(tmp, struct ptlrpc_request, rq_list);
void ptlrpc_continue_req(struct ptlrpc_request *req)
{
- ENTRY;
DEBUG_REQ(D_HA, req, "continuing delayed request");
req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
wake_up(&req->rq_wait_for_rep);
- EXIT;
}
void ptlrpc_resend_req(struct ptlrpc_request *req)
{
- ENTRY;
DEBUG_REQ(D_HA, req, "resending");
req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
req->rq_flags |= PTL_RPC_FL_RESEND;
req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
wake_up(&req->rq_wait_for_rep);
- EXIT;
}
void ptlrpc_restart_req(struct ptlrpc_request *req)
{
- ENTRY;
DEBUG_REQ(D_HA, req, "restarting (possibly-)completed request");
req->rq_status = -ERESTARTSYS;
req->rq_flags |= PTL_RPC_FL_RESTART;
req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
wake_up(&req->rq_wait_for_rep);
- EXIT;
}
static int expired_request(void *data)
init_waitqueue_head(&req->rq_wait_for_rep);
- spin_lock_irqsave(&imp->imp_lock, flags);
- req->rq_xid = HTON__u32(++imp->imp_last_xid);
- spin_unlock_irqrestore(&imp->imp_lock, flags);
+ req->rq_xid = HTON__u32(ptlrpc_next_xid());
/* for distributed debugging */
req->rq_reqmsg->status = HTON__u32(current->pid);
- CDEBUG(D_RPCTRACE, "Sending RPC pid:xid:nid:opc %d:"LPU64":%x:%d\n",
- NTOH__u32(req->rq_reqmsg->status), req->rq_xid,
- conn->c_peer.peer_nid, NTOH__u32(req->rq_reqmsg->opc));
+ CDEBUG(D_RPCTRACE, "Sending RPC pid:xid:nid:opc %d:"LPU64":%s:"LPX64
+ ":%d\n", NTOH__u32(req->rq_reqmsg->status), req->rq_xid,
+ conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid,
+ NTOH__u32(req->rq_reqmsg->opc));
spin_lock_irqsave(&imp->imp_lock, flags);
lwi = LWI_TIMEOUT_INTR(obd_timeout * HZ, expired_request,
interrupted_request, req);
}
+#ifdef __KERNEL__
l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
+#else
+ {
+ extern int reply_in_callback(ptl_event_t *ev);
+ ptl_event_t reply_ev;
+ PtlEQWait(req->rq_connection->c_peer.peer_ni->pni_reply_in_eq_h, &reply_ev);
+ reply_in_callback(&reply_ev);
+ }
+#endif
+
DEBUG_REQ(D_NET, req, "-- done sleeping");
spin_lock_irqsave(&imp->imp_lock, flags);
/* Don't resend if we were interrupted. */
if ((req->rq_flags & (PTL_RPC_FL_RESEND | PTL_RPC_FL_INTR)) ==
PTL_RPC_FL_RESEND) {
+ if (req->rq_flags & PTL_RPC_FL_NO_RESEND) {
+ ptlrpc_abort(req); /* clean up reply buffers */
+ req->rq_flags &= ~PTL_RPC_FL_NO_RESEND;
+ GOTO(out, rc = -ETIMEDOUT);
+ }
req->rq_flags &= ~PTL_RPC_FL_RESEND;
lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
DEBUG_REQ(D_HA, req, "resending: ");
* ptlrpc_queue_wait must (and does) hold imp_lock while testing this
* flag and then putting requests on sending_list or delayed_list.
*/
- spin_lock_irqsave(&imp->imp_lock, flags);
- imp->imp_flags |= IMP_INVALID;
- spin_unlock_irqrestore(&imp->imp_lock, flags);
+ if ((imp->imp_flags & IMP_REPLAYABLE) == 0) {
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ imp->imp_flags |= IMP_INVALID;
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+ }
list_for_each_safe(tmp, n, &imp->imp_sending_list) {
struct ptlrpc_request *req =
*/
#define DEBUG_SUBSYSTEM S_RPC
-
+#ifdef __KERNEL__
#include <linux/obd_support.h>
#include <linux/obd_class.h>
#include <linux/lustre_net.h>
+#else
+#include <liblustre.h>
+#endif
static spinlock_t conn_lock;
static struct list_head conn_list;
/* If UUID is NULL, c->c_remote_uuid must be all zeroes
* If UUID is non-NULL, c->c_remote_uuid must match. */
-static int match_connection_uuid(struct ptlrpc_connection *c, struct obd_uuid *uuid)
+static int match_connection_uuid(struct ptlrpc_connection *c,
+ struct obd_uuid *uuid)
{
struct obd_uuid zero_uuid;
memset(&zero_uuid, 0, sizeof(zero_uuid));
if (uuid)
- return memcmp(c->c_remote_uuid.uuid, uuid->uuid,
+ return memcmp(c->c_remote_uuid.uuid, uuid->uuid,
sizeof(uuid->uuid));
return memcmp(c->c_remote_uuid.uuid, &zero_uuid, sizeof(zero_uuid));
}
-struct ptlrpc_connection *ptlrpc_get_connection(struct lustre_peer *peer,
+struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer,
struct obd_uuid *uuid)
{
struct list_head *tmp, *pos;
struct ptlrpc_connection *c;
ENTRY;
- CDEBUG(D_INFO, "peer is %08x %08lx %08lx\n",
- peer->peer_nid, peer->peer_ni.nal_idx, peer->peer_ni.handle_idx);
+ CDEBUG(D_INFO, "peer is "LPX64" on %s\n",
+ peer->peer_nid, peer->peer_ni->pni_name);
spin_lock(&conn_lock);
list_for_each(tmp, &conn_list) {
c = list_entry(tmp, struct ptlrpc_connection, c_link);
- if (memcmp(peer, &c->c_peer, sizeof(*peer)) == 0 &&
+ if (peer->peer_nid == c->c_peer.peer_nid &&
+ peer->peer_ni == c->c_peer.peer_ni &&
!match_connection_uuid(c, uuid)) {
ptlrpc_connection_addref(c);
GOTO(out, c);
list_for_each_safe(tmp, pos, &conn_unused_list) {
c = list_entry(tmp, struct ptlrpc_connection, c_link);
- if (memcmp(peer, &c->c_peer, sizeof(*peer)) == 0 &&
+ if (peer->peer_nid == c->c_peer.peer_nid &&
+ peer->peer_ni == c->c_peer.peer_ni &&
!match_connection_uuid(c, uuid)) {
ptlrpc_connection_addref(c);
list_del(&c->c_link);
INIT_LIST_HEAD(&c->c_recovd_data.rd_managed_chain);
INIT_LIST_HEAD(&c->c_delayed_head);
atomic_set(&c->c_refcount, 0);
- ptlrpc_connection_addref(c);
+ memcpy(&c->c_peer, peer, sizeof(c->c_peer));
spin_lock_init(&c->c_lock);
- memcpy(&c->c_peer, peer, sizeof(c->c_peer));
+ ptlrpc_connection_addref(c);
+
list_add(&c->c_link, &conn_list);
EXIT;
RETURN(0);
}
- CDEBUG(D_INFO, "connection=%p refcount %d\n",
- c, atomic_read(&c->c_refcount) - 1);
+ CDEBUG (D_INFO, "connection=%p refcount %d to "LPX64" on %s\n",
+ c, atomic_read(&c->c_refcount), c->c_peer.peer_nid,
+ c->c_peer.peer_ni->pni_name);
+
if (atomic_dec_and_test(&c->c_refcount)) {
recovd_conn_unmanage(c);
spin_lock(&conn_lock);
struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *c)
{
ENTRY;
- CDEBUG(D_INFO, "connection=%p refcount %d\n",
- c, atomic_read(&c->c_refcount) + 1);
atomic_inc(&c->c_refcount);
+ CDEBUG (D_INFO, "connection=%p refcount %d to "LPX64" on %s\n",
+ c, atomic_read(&c->c_refcount), c->c_peer.peer_nid,
+ c->c_peer.peer_ni->pni_name);
RETURN(c);
}
}
list_for_each_safe(tmp, pos, &conn_list) {
c = list_entry(tmp, struct ptlrpc_connection, c_link);
- CERROR("Connection %p/%s has refcount %d (nid=%lu)\n",
+ CERROR("Connection %p/%s has refcount %d (nid="LPX64" on %s)\n",
c, c->c_remote_uuid.uuid, atomic_read(&c->c_refcount),
- (unsigned long)c->c_peer.peer_nid);
+ c->c_peer.peer_nid, c->c_peer.peer_ni->pni_name);
list_del(&c->c_link);
OBD_FREE(c, sizeof(*c));
}
#define DEBUG_SUBSYSTEM S_RPC
+#ifdef __KERNEL__
#include <linux/module.h>
-#include <linux/obd_support.h>
+#else
+#include <liblustre.h>
+#endif
+#include <linux/obd_class.h>
#include <linux/lustre_net.h>
-ptl_handle_eq_t request_out_eq, reply_in_eq, reply_out_eq,
- bulk_put_source_eq, bulk_put_sink_eq,
- bulk_get_source_eq, bulk_get_sink_eq;
-static const ptl_handle_ni_t *socknal_nip = NULL, *toenal_nip = NULL,
- *qswnal_nip = NULL, *gmnal_nip = NULL;
+struct ptlrpc_ni ptlrpc_interfaces[NAL_MAX_NR];
+int ptlrpc_ninterfaces;
/*
* Free the packet when it has gone out
if (ev->type == PTL_EVENT_SENT) {
OBD_FREE(ev->mem_desc.start, ev->mem_desc.length);
+ } else if (ev->type == PTL_EVENT_ACK) {
+ struct ptlrpc_request *req = ev->mem_desc.user_ptr;
+ if (req->rq_flags & PTL_RPC_FL_WANT_ACK) {
+ req->rq_flags &= ~PTL_RPC_FL_WANT_ACK;
+ wake_up(&req->rq_wait_for_rep);
+ } else {
+ DEBUG_REQ(D_ERROR, req,
+ "ack received for reply, not wanted");
+ }
} else {
- // XXX make sure we understand all events, including ACK's
+ // XXX make sure we understand all events
CERROR("Unknown event %d\n", ev->type);
LBUG();
}
/*
* Wake up the thread waiting for the reply once it comes in.
*/
-static int reply_in_callback(ptl_event_t *ev)
+int reply_in_callback(ptl_event_t *ev)
{
struct ptlrpc_request *req = ev->mem_desc.user_ptr;
ENTRY;
int request_in_callback(ptl_event_t *ev)
{
struct ptlrpc_request_buffer_desc *rqbd = ev->mem_desc.user_ptr;
- struct ptlrpc_service *service = rqbd->rqbd_service;
+ struct ptlrpc_srv_ni *srv_ni = rqbd->rqbd_srv_ni;
+ struct ptlrpc_service *service = srv_ni->sni_service;
/* requests always contiguous */
LASSERT((ev->mem_desc.options & PTL_MD_IOV) == 0);
/* we only enable puts */
LASSERT(ev->type == PTL_EVENT_PUT);
- LASSERT(atomic_read(&service->srv_nrqbds_receiving) > 0);
+ LASSERT(atomic_read(&srv_ni->sni_nrqbds_receiving) > 0);
LASSERT(atomic_read(&rqbd->rqbd_refcount) > 0);
if (ev->rlength != ev->mlength)
/* we're off the air */
/* we'll probably start dropping packets in portals soon */
- if (atomic_dec_and_test(&service->srv_nrqbds_receiving))
+ if (atomic_dec_and_test(&srv_ni->sni_nrqbds_receiving))
CERROR("All request buffers busy\n");
} else {
/* +1 ref for service thread */
RETURN(0);
}
-int ptlrpc_init_portals(void)
+int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer)
{
- int rc;
- ptl_handle_ni_t ni;
-
- /* Use the qswnal if it's there */
- if ((qswnal_nip = inter_module_get("kqswnal_ni")) != NULL)
- ni = *qswnal_nip;
- else if ((gmnal_nip = inter_module_get("kgmnal_ni")) != NULL)
- ni = *gmnal_nip;
- else if ((socknal_nip = inter_module_get("ksocknal_ni")) != NULL)
- ni = *socknal_nip;
- else if ((toenal_nip = inter_module_get("ktoenal_ni")) != NULL)
- ni = *toenal_nip;
- else {
- CERROR("get_ni failed: is a NAL module loaded?\n");
- return -EIO;
+ struct ptlrpc_ni *pni;
+ struct lustre_peer lpeer;
+ int i;
+ int rc = lustre_uuid_to_peer (uuid->uuid, &lpeer);
+
+ if (rc != 0)
+ RETURN (rc);
+
+ for (i = 0; i < ptlrpc_ninterfaces; i++) {
+ pni = &ptlrpc_interfaces[i];
+
+ if (!memcmp (&lpeer.peer_ni, &pni->pni_ni_h,
+ sizeof (lpeer.peer_ni))) {
+ peer->peer_nid = lpeer.peer_nid;
+ peer->peer_ni = pni;
+ return (0);
+ }
}
+
+ CERROR ("Can't find ptlrpc interface for "LPX64" ni handle %08lx %08lx\n",
+ lpeer.peer_nid, lpeer.peer_ni.nal_idx, lpeer.peer_ni.handle_idx);
+ return (-ENOENT);
+}
- rc = PtlEQAlloc(ni, 1024, request_out_callback, &request_out_eq);
- if (rc != PTL_OK)
- CERROR("PtlEQAlloc failed: %d\n", rc);
+void ptlrpc_ni_fini (struct ptlrpc_ni *pni)
+{
+ PtlEQFree(pni->pni_request_out_eq_h);
+ PtlEQFree(pni->pni_reply_out_eq_h);
+ PtlEQFree(pni->pni_reply_in_eq_h);
+ PtlEQFree(pni->pni_bulk_put_source_eq_h);
+ PtlEQFree(pni->pni_bulk_put_sink_eq_h);
+ PtlEQFree(pni->pni_bulk_get_source_eq_h);
+ PtlEQFree(pni->pni_bulk_get_sink_eq_h);
+
+ inter_module_put(pni->pni_name);
+}
- rc = PtlEQAlloc(ni, 1024, reply_out_callback, &reply_out_eq);
- if (rc != PTL_OK)
- CERROR("PtlEQAlloc failed: %d\n", rc);
+int ptlrpc_ni_init (char *name, struct ptlrpc_ni *pni)
+{
+ int rc;
+ ptl_handle_ni_t *nip;
- rc = PtlEQAlloc(ni, 1024, reply_in_callback, &reply_in_eq);
+ nip = (ptl_handle_ni_t *)inter_module_get (name);
+ if (nip == NULL) {
+ CDEBUG (D_NET, "Network interface %s not loaded\n", name);
+ return (-ENOENT);
+ }
+
+ CDEBUG (D_NET, "init %s: nal_idx %ld\n", name, nip->nal_idx);
+
+ pni->pni_name = name;
+ pni->pni_ni_h = *nip;
+
+ ptl_set_inv_handle (&pni->pni_request_out_eq_h);
+ ptl_set_inv_handle (&pni->pni_reply_out_eq_h);
+ ptl_set_inv_handle (&pni->pni_reply_in_eq_h);
+ ptl_set_inv_handle (&pni->pni_bulk_put_source_eq_h);
+ ptl_set_inv_handle (&pni->pni_bulk_put_sink_eq_h);
+ ptl_set_inv_handle (&pni->pni_bulk_get_source_eq_h);
+ ptl_set_inv_handle (&pni->pni_bulk_get_sink_eq_h);
+
+ /* NB We never actually PtlEQGet() out of these events queues since
+ * we're only interested in the event callback, so we can just let
+ * them wrap. Their sizes aren't a big deal, apart from providing
+ * a little history for debugging... */
+
+ rc = PtlEQAlloc(pni->pni_ni_h, 1024, request_out_callback,
+ &pni->pni_request_out_eq_h);
if (rc != PTL_OK)
- CERROR("PtlEQAlloc failed: %d\n", rc);
-
- rc = PtlEQAlloc(ni, 1024, bulk_put_source_callback,
- &bulk_put_source_eq);
+ GOTO (fail, rc = -ENOMEM);
+
+ rc = PtlEQAlloc(pni->pni_ni_h, 1024, reply_out_callback,
+ &pni->pni_reply_out_eq_h);
if (rc != PTL_OK)
- CERROR("PtlEQAlloc failed: %d\n", rc);
-
- rc = PtlEQAlloc(ni, 1024, bulk_put_sink_callback, &bulk_put_sink_eq);
+ GOTO (fail, rc = -ENOMEM);
+
+ rc = PtlEQAlloc(pni->pni_ni_h, 1024, reply_in_callback,
+ &pni->pni_reply_in_eq_h);
if (rc != PTL_OK)
- CERROR("PtlEQAlloc failed: %d\n", rc);
-
- rc = PtlEQAlloc(ni, 1024, bulk_get_source_callback,
- &bulk_get_source_eq);
+ GOTO (fail, rc = -ENOMEM);
+
+ rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_put_source_callback,
+ &pni->pni_bulk_put_source_eq_h);
if (rc != PTL_OK)
- CERROR("PtlEQAlloc failed: %d\n", rc);
-
- rc = PtlEQAlloc(ni, 1024, bulk_get_sink_callback, &bulk_get_sink_eq);
+ GOTO (fail, rc = -ENOMEM);
+
+ rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_put_sink_callback,
+ &pni->pni_bulk_put_sink_eq_h);
if (rc != PTL_OK)
- CERROR("PtlEQAlloc failed: %d\n", rc);
+ GOTO (fail, rc = -ENOMEM);
+
+ rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_get_source_callback,
+ &pni->pni_bulk_get_source_eq_h);
+ if (rc != PTL_OK)
+ GOTO (fail, rc = -ENOMEM);
+
+ rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_get_sink_callback,
+ &pni->pni_bulk_get_sink_eq_h);
+ if (rc != PTL_OK)
+ GOTO (fail, rc = -ENOMEM);
+
+ return (0);
+ fail:
+ CERROR ("Failed to initialise network interface %s: %d\n",
+ name, rc);
+
+ /* OK to do complete teardown since we invalidated the handles above... */
+ ptlrpc_ni_fini (pni);
+ return (rc);
+}
- return rc;
+int ptlrpc_init_portals(void)
+{
+ /* Add new portals network interface names here.
+ * Order is irrelevent! */
+ char *ni_names[] = { "kqswnal_ni",
+ "kgmnal_ni",
+ "ksocknal_ni",
+ "ktoenal_ni",
+ "tcpnal_ni",
+ NULL };
+ int rc;
+ int i;
+
+ LASSERT (ptlrpc_ninterfaces == 0);
+
+ for (i = 0; ni_names[i] != NULL; i++) {
+ LASSERT (ptlrpc_ninterfaces <
+ sizeof (ptlrpc_interfaces)/sizeof (ptlrpc_interfaces[0]));
+
+ rc = ptlrpc_ni_init (ni_names[i],
+ &ptlrpc_interfaces[ptlrpc_ninterfaces]);
+ if (rc == 0)
+ ptlrpc_ninterfaces++;
+ }
+
+ if (ptlrpc_ninterfaces == 0) {
+ CERROR("network initialisation failed: is a NAL module loaded?\n");
+ return -EIO;
+ }
+ return 0;
}
void ptlrpc_exit_portals(void)
{
- PtlEQFree(request_out_eq);
- PtlEQFree(reply_out_eq);
- PtlEQFree(reply_in_eq);
- PtlEQFree(bulk_put_source_eq);
- PtlEQFree(bulk_put_sink_eq);
- PtlEQFree(bulk_get_source_eq);
- PtlEQFree(bulk_get_sink_eq);
-
- if (qswnal_nip != NULL)
- inter_module_put("kqswnal_ni");
- if (socknal_nip != NULL)
- inter_module_put("ksocknal_ni");
- if (gmnal_nip != NULL)
- inter_module_put("kgmnal_ni");
- if (toenal_nip != NULL)
- inter_module_put("ktoenal_ni");
+ while (ptlrpc_ninterfaces > 0)
+ ptlrpc_ni_fini (&ptlrpc_interfaces[--ptlrpc_ninterfaces]);
}
*/
#define DEBUG_SUBSYSTEM S_RPC
-
+#ifndef __KERNEL__
+#include <liblustre.h>
+#include <portals/lib-types.h>
+#endif
#include <linux/obd_support.h>
#include <linux/lustre_net.h>
#include <linux/lustre_lib.h>
#include <linux/obd.h>
-extern ptl_handle_eq_t request_out_eq, reply_in_eq, reply_out_eq,
- bulk_put_source_eq, bulk_put_sink_eq,
- bulk_get_source_eq, bulk_get_sink_eq;
-
static int ptl_send_buf(struct ptlrpc_request *request,
struct ptlrpc_connection *conn, int portal)
{
int rc;
ptl_process_id_t remote_id;
ptl_handle_md_t md_h;
+ ptl_ack_req_t ack_req;
LASSERT(conn);
+ CDEBUG (D_INFO, "conn=%p ni %s nid "LPX64" on %s\n",
+ conn, conn->c_peer.peer_ni->pni_name,
+ conn->c_peer.peer_nid, conn->c_peer.peer_ni->pni_name);
request->rq_req_md.user_ptr = request;
request->rq_reqmsg->type = HTON__u32(request->rq_type);
request->rq_req_md.start = request->rq_reqmsg;
request->rq_req_md.length = request->rq_reqlen;
- request->rq_req_md.eventq = request_out_eq;
+ request->rq_req_md.eventq = conn->c_peer.peer_ni->pni_request_out_eq_h;
break;
case PTL_RPC_MSG_ERR:
case PTL_RPC_MSG_REPLY:
request->rq_repmsg->type = HTON__u32(request->rq_type);
request->rq_req_md.start = request->rq_repmsg;
request->rq_req_md.length = request->rq_replen;
- request->rq_req_md.eventq = reply_out_eq;
+ request->rq_req_md.eventq = conn->c_peer.peer_ni->pni_reply_out_eq_h;
break;
default:
LBUG();
return -1; /* notreached */
}
- request->rq_req_md.threshold = 1;
+ if (request->rq_flags & PTL_RPC_FL_WANT_ACK) {
+ request->rq_req_md.threshold = 2; /* SENT and ACK */
+ ack_req = PTL_ACK_REQ;
+ } else {
+ request->rq_req_md.threshold = 1;
+ ack_req = PTL_NOACK_REQ;
+ }
request->rq_req_md.options = PTL_MD_OP_PUT;
request->rq_req_md.user_ptr = request;
- rc = PtlMDBind(conn->c_peer.peer_ni, request->rq_req_md, &md_h);
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_ACK | OBD_FAIL_ONCE)) {
+ request->rq_req_md.options |= PTL_MD_ACK_DISABLE;
+ obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED;
+ }
+
+ rc = PtlMDBind(conn->c_peer.peer_ni->pni_ni_h, request->rq_req_md, &md_h);
if (rc != 0) {
CERROR("PtlMDBind failed: %d\n", rc);
LBUG();
if (!portal)
LBUG();
- rc = PtlPut(md_h, PTL_NOACK_REQ, remote_id, portal, 0, request->rq_xid,
- 0, 0);
+ rc = PtlPut(md_h, ack_req, remote_id, portal, 0, request->rq_xid, 0, 0);
if (rc != PTL_OK) {
CERROR("PtlPut("LPU64", %d, "LPD64") failed: %d\n",
remote_id.nid, portal, request->rq_xid, rc);
int ptlrpc_bulk_put(struct ptlrpc_bulk_desc *desc)
{
int rc;
+ struct ptlrpc_peer *peer;
struct list_head *tmp, *next;
ptl_process_id_t remote_id;
__u32 xid = 0;
if (iov == NULL)
RETURN (-ENOMEM);
+ peer = &desc->bd_connection->c_peer;
+
desc->bd_md.start = iov;
desc->bd_md.niov = 0;
desc->bd_md.length = 0;
- desc->bd_md.eventq = bulk_put_source_eq;
+ desc->bd_md.eventq = peer->peer_ni->pni_bulk_put_source_eq_h;
desc->bd_md.threshold = 2; /* SENT and ACK */
desc->bd_md.options = PTL_MD_OP_PUT | PTL_MD_IOV;
desc->bd_md.user_ptr = desc;
LASSERT(desc->bd_md.niov == desc->bd_page_count);
LASSERT(desc->bd_md.niov != 0);
- rc = PtlMDBind(desc->bd_connection->c_peer.peer_ni, desc->bd_md,
+ rc = PtlMDBind(peer->peer_ni->pni_ni_h, desc->bd_md,
&desc->bd_md_h);
ptlrpc_put_bulk_iov (desc, iov); /*move down to reduce latency to send*/
RETURN(rc);
}
- remote_id.nid = desc->bd_connection->c_peer.peer_nid;
+ remote_id.nid = peer->peer_nid;
remote_id.pid = 0;
- CDEBUG(D_NET, "Sending %u pages %u bytes to portal %d nid "LPX64" pid "
- "%d xid %d\n", desc->bd_md.niov, desc->bd_md.length,
- desc->bd_portal, remote_id.nid, remote_id.pid, xid);
+ CDEBUG(D_NET, "Sending %u pages %u bytes to portal %d on %s "
+ "nid "LPX64" pid %d xid %d\n",
+ desc->bd_md.niov, desc->bd_md.length,
+ desc->bd_portal, peer->peer_ni->pni_name,
+ remote_id.nid, remote_id.pid, xid);
rc = PtlPut(desc->bd_md_h, PTL_ACK_REQ, remote_id,
desc->bd_portal, 0, xid, 0, 0);
int ptlrpc_bulk_get(struct ptlrpc_bulk_desc *desc)
{
int rc;
+ struct ptlrpc_peer *peer;
struct list_head *tmp, *next;
ptl_process_id_t remote_id;
__u32 xid = 0;
if (iov == NULL)
RETURN (-ENOMEM);
+ peer = &desc->bd_connection->c_peer;
+
desc->bd_md.start = iov;
desc->bd_md.niov = 0;
desc->bd_md.length = 0;
- desc->bd_md.eventq = bulk_get_sink_eq;
+ desc->bd_md.eventq = peer->peer_ni->pni_bulk_get_sink_eq_h;
desc->bd_md.threshold = 2; /* SENT and REPLY */
desc->bd_md.options = PTL_MD_OP_GET | PTL_MD_IOV;
desc->bd_md.user_ptr = desc;
iov[desc->bd_md.niov].iov_base = bulk->bp_buf;
iov[desc->bd_md.niov].iov_len = bulk->bp_buflen;
if (iov[desc->bd_md.niov].iov_len <= 0) {
- CERROR("bad bp_buflen[%d] @ %p: %d\n", desc->bd_md.niov,
- bulk->bp_buf, bulk->bp_buflen);
- CERROR("desc: xid %u, pages %d, ptl %d, ref %d\n",
- xid, desc->bd_page_count, desc->bd_portal,
+ CERROR("bad bulk %p bp_buflen[%d] @ %p: %d\n", bulk,
+ desc->bd_md.niov, bulk->bp_buf, bulk->bp_buflen);
+ CERROR("desc %p: xid %u, pages %d, ptl %d, ref %d\n",
+ desc, xid, desc->bd_page_count, desc->bd_portal,
atomic_read(&desc->bd_refcount));
LBUG();
}
LASSERT(desc->bd_md.niov == desc->bd_page_count);
LASSERT(desc->bd_md.niov != 0);
- rc = PtlMDBind(desc->bd_connection->c_peer.peer_ni, desc->bd_md,
+ rc = PtlMDBind(peer->peer_ni->pni_ni_h, desc->bd_md,
&desc->bd_md_h);
ptlrpc_put_bulk_iov (desc, iov); /*move down to reduce latency to send*/
remote_id.nid = desc->bd_connection->c_peer.peer_nid;
remote_id.pid = 0;
- CDEBUG(D_NET, "Sending %u pages %u bytes to portal %d nid "LPX64" pid "
- "%d xid %d\n", desc->bd_md.niov, desc->bd_md.length,
- desc->bd_portal, remote_id.nid, remote_id.pid, xid);
+ CDEBUG(D_NET, "Sending %u pages %u bytes to portal %d on %s "
+ "nid "LPX64" pid %d xid %d\n",
+ desc->bd_md.niov, desc->bd_md.length,
+ desc->bd_portal, peer->peer_ni->pni_name,
+ remote_id.nid, remote_id.pid, xid);
rc = PtlGet(desc->bd_md_h, remote_id, desc->bd_portal, 0, xid, 0);
if (rc != PTL_OK) {
static int ptlrpc_register_bulk_shared(struct ptlrpc_bulk_desc *desc)
{
+ struct ptlrpc_peer *peer;
struct list_head *tmp, *next;
int rc;
__u32 xid = 0;
if (iov == NULL)
return (-ENOMEM);
+ peer = &desc->bd_connection->c_peer;
+
desc->bd_md.start = iov;
desc->bd_md.niov = 0;
desc->bd_md.length = 0;
source_id.nid = desc->bd_connection->c_peer.peer_nid;
source_id.pid = PTL_PID_ANY;
- rc = PtlMEAttach(desc->bd_connection->c_peer.peer_ni,
+ rc = PtlMEAttach(peer->peer_ni->pni_ni_h,
desc->bd_portal, source_id, xid, 0,
PTL_UNLINK, PTL_INS_AFTER, &desc->bd_me_h);
ptlrpc_put_bulk_iov (desc, iov);
CDEBUG(D_NET, "Setup bulk sink buffers: %u pages %u bytes, xid %u, "
- "portal %u\n", desc->bd_md.niov, desc->bd_md.length,
- xid, desc->bd_portal);
+ "portal %u on %s\n", desc->bd_md.niov, desc->bd_md.length,
+ xid, desc->bd_portal, peer->peer_ni->pni_name);
RETURN(0);
int ptlrpc_register_bulk_get(struct ptlrpc_bulk_desc *desc)
{
desc->bd_md.options = PTL_MD_OP_GET | PTL_MD_IOV;
- desc->bd_md.eventq = bulk_get_source_eq;
+ desc->bd_md.eventq =
+ desc->bd_connection->c_peer.peer_ni->pni_bulk_get_source_eq_h;
return ptlrpc_register_bulk_shared(desc);
}
int ptlrpc_register_bulk_put(struct ptlrpc_bulk_desc *desc)
{
desc->bd_md.options = PTL_MD_OP_PUT | PTL_MD_IOV;
- desc->bd_md.eventq = bulk_put_sink_eq;
+ desc->bd_md.eventq =
+ desc->bd_connection->c_peer.peer_ni->pni_bulk_put_sink_eq_h;
return ptlrpc_register_bulk_shared(desc);
}
list_add(&desc->bd_set_chain, &set->brw_desc_head);
}
+void obd_brw_set_del(struct ptlrpc_bulk_desc *desc)
+{
+ atomic_dec(&desc->bd_brw_set->brw_refcount);
+ list_del_init(&desc->bd_set_chain);
+ ptlrpc_bulk_decref(desc);
+}
+
struct obd_brw_set *obd_brw_set_new(void)
{
struct obd_brw_set *set;
struct list_head *tmp, *next;
ENTRY;
- if (!list_empty(&set->brw_desc_head)) {
- EXIT;
- return;
- }
-
list_for_each_safe(tmp, next, &set->brw_desc_head) {
struct ptlrpc_bulk_desc *desc =
list_entry(tmp, struct ptlrpc_bulk_desc, bd_set_chain);
}
}
- rc = PtlMEAttach(request->rq_connection->c_peer.peer_ni,
+ rc = PtlMEAttach(request->rq_connection->c_peer.peer_ni->pni_ni_h,
request->rq_reply_portal,/* XXX FIXME bug 625069 */
source_id, request->rq_xid, 0, PTL_UNLINK,
PTL_INS_AFTER, &request->rq_reply_me_h);
request->rq_reply_md.threshold = 1;
request->rq_reply_md.options = PTL_MD_OP_PUT;
request->rq_reply_md.user_ptr = request;
- request->rq_reply_md.eventq = reply_in_eq;
+ request->rq_reply_md.eventq =
+ request->rq_connection->c_peer.peer_ni->pni_reply_in_eq_h;
rc = PtlMDAttach(request->rq_reply_me_h, request->rq_reply_md,
PTL_UNLINK, NULL);
}
CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64
- ", portal %u\n",
+ ", portal %u on %s\n",
request->rq_replen, request->rq_xid,
- request->rq_reply_portal);
+ request->rq_reply_portal,
+ request->rq_connection->c_peer.peer_ni->pni_name);
}
/* Clear any flags that may be present from previous sends,
- * except for REPLAY. */
- request->rq_flags &= PTL_RPC_FL_REPLAY;
+ * except for REPLAY, NO_RESEND and WANT_ACK. */
+ request->rq_flags &= (PTL_RPC_FL_REPLAY | PTL_RPC_FL_NO_RESEND |
+ PTL_RPC_FL_WANT_ACK);
rc = ptl_send_buf(request, request->rq_connection,
request->rq_request_portal);
RETURN(rc);
void ptlrpc_link_svc_me(struct ptlrpc_request_buffer_desc *rqbd)
{
- struct ptlrpc_service *service = rqbd->rqbd_service;
+ struct ptlrpc_srv_ni *srv_ni = rqbd->rqbd_srv_ni;
+ struct ptlrpc_service *service = srv_ni->sni_service;
static ptl_process_id_t match_id = {PTL_NID_ANY, PTL_PID_ANY};
int rc;
ptl_md_t dummy;
LASSERT(atomic_read(&rqbd->rqbd_refcount) == 0);
+ CDEBUG(D_NET, "PtlMEAttach: portal %d on %s h %lx.%lx\n",
+ service->srv_req_portal, srv_ni->sni_ni->pni_name,
+ srv_ni->sni_ni->pni_ni_h.nal_idx,
+ srv_ni->sni_ni->pni_ni_h.handle_idx);
+
/* Attach the leading ME on which we build the ring */
- rc = PtlMEAttach(service->srv_self.peer_ni, service->srv_req_portal,
+ rc = PtlMEAttach(srv_ni->sni_ni->pni_ni_h, service->srv_req_portal,
match_id, 0, ~0,
PTL_UNLINK, PTL_INS_AFTER, &rqbd->rqbd_me_h);
if (rc != PTL_OK) {
dummy.threshold = PTL_MD_THRESH_INF;
dummy.options = PTL_MD_OP_PUT | PTL_MD_MAX_SIZE | PTL_MD_AUTO_UNLINK;
dummy.user_ptr = rqbd;
- dummy.eventq = service->srv_eq_h;
+ dummy.eventq = srv_ni->sni_eq_h;
- atomic_inc(&service->srv_nrqbds_receiving);
+ atomic_inc(&srv_ni->sni_nrqbds_receiving);
atomic_set(&rqbd->rqbd_refcount, 1); /* 1 ref for portals */
rc = PtlMDAttach(rqbd->rqbd_me_h, dummy, PTL_UNLINK, &md_h);
#warning proper cleanup required
PtlMEUnlink (rqbd->rqbd_me_h);
atomic_set(&rqbd->rqbd_refcount, 0);
- atomic_dec(&service->srv_nrqbds_receiving);
+ atomic_dec(&srv_ni->sni_nrqbds_receiving);
}
}
*/
#define DEBUG_SUBSYSTEM S_RPC
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
#include <linux/obd_support.h>
#include <linux/lustre_net.h>
*/
#define DEBUG_SUBSYSTEM S_RPC
-
+#ifndef __KERNEL__
+#include <liblustre.h>
+#include <linux/obd.h>
+#include <linux/obd_class.h>
+#else
#include <linux/lustre_lite.h>
+#endif
+
#include <linux/lustre_ha.h>
#include <linux/obd_support.h>
return;
}
- CERROR("connection %p to %s (%08x %08lx %08lx) failed\n", conn,
+ CERROR("connection %p to %s nid "LPX64" on %s failed\n", conn,
conn->c_remote_uuid.uuid, conn->c_peer.peer_nid,
- conn->c_peer.peer_ni.nal_idx, conn->c_peer.peer_ni.handle_idx);
+ conn->c_peer.peer_ni->pni_name);
list_del(&rd->rd_managed_chain);
list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
if (rd->rd_phase != RD_IDLE) {
RETURN(0);
}
+#ifdef __KERNEL__
static int recovd_main(void *arg)
{
struct recovd_obd *recovd = (struct recovd_obd *)arg;
int recovd_setup(struct recovd_obd *recovd)
{
- int rc;
+ int rc = 0; /* initialize for Liblustre */
ENTRY;
RETURN(0);
}
+#else
+int recovd_setup(struct recovd_obd *recovd)
+{
+ return 0;
+}
+#endif
int recovd_cleanup(struct recovd_obd *recovd)
{
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#define DEBUG_SUBSYSTEM S_RPC
+#ifdef __KERNEL__
#include <linux/config.h>
#include <linux/module.h>
#include <linux/kmod.h>
-
-#define DEBUG_SUBSYSTEM S_RPC
+#else
+#include <liblustre.h>
+#endif
#include <linux/lustre_ha.h>
#include <linux/lustre_net.h>
ENTRY;
spin_lock_irqsave(&imp->imp_lock, flags);
- list_for_each(tmp, &imp->imp_sending_list) {
- req = list_entry(tmp, struct ptlrpc_request, rq_list);
- DEBUG_REQ(D_HA, req, "SENDING: ");
- }
-
list_for_each_safe(tmp, pos, &imp->imp_sending_list) {
req = list_entry(tmp, struct ptlrpc_request, rq_list);
break;
case RESTART:
- DEBUG_REQ(D_HA, req, "RESTART:");
ptlrpc_restart_req(req);
break;
case RESEND_IGNORE:
- DEBUG_REQ(D_HA, req, "RESEND_IGNORE:");
rc = ptlrpc_replay_req(req);
if (rc) {
DEBUG_REQ(D_ERROR, req, "error %d resending:",
break;
case RESEND:
- DEBUG_REQ(D_HA, req, "RESEND:");
ptlrpc_resend_req(req);
break;
#define EXPORT_SYMTAB
#define DEBUG_SUBSYSTEM S_RPC
-#include <linux/module.h>
+#ifdef __KERNEL__
+# include <linux/module.h>
+# include <linux/init.h>
+#else
+# include <liblustre.h>
+#endif
+#include <linux/obd.h>
#include <linux/obd_support.h>
#include <linux/obd_class.h>
#include <linux/lustre_lib.h>
#include <linux/lustre_ha.h>
#include <linux/lustre_net.h>
-#include <linux/init.h>
#include <linux/lprocfs_status.h>
extern int ptlrpc_init_portals(void);
extern void ptlrpc_exit_portals(void);
+static __u32 ptlrpc_last_xid = 0;
+static spinlock_t ptlrpc_last_xid_lock = SPIN_LOCK_UNLOCKED;
+
+__u32 ptlrpc_next_xid(void)
+{
+ __u32 tmp;
+ spin_lock(&ptlrpc_last_xid_lock);
+ tmp = ++ptlrpc_last_xid;
+ spin_unlock(&ptlrpc_last_xid_lock);
+ return tmp;
+}
int connmgr_setup(struct obd_device *obddev, obd_count len, void *buf)
{
LASSERT(conn->c_recovd_data.rd_recovd == recovd);
#warning check buffer overflow in next line
- if (!strcmp(conn->c_remote_uuid.uuid, data->ioc_inlbuf1))
+ if (!strcmp(conn->c_remote_uuid.uuid,
+ data->ioc_inlbuf1))
break;
conn = NULL;
}
int connmgr_attach(struct obd_device *dev, obd_count len, void *data)
{
struct lprocfs_static_vars lvars;
+ int rc = 0;
lprocfs_init_vars(&lvars);
- return lprocfs_obd_attach(dev, lvars.obd_vars);
+ rc = lprocfs_obd_attach(dev, lvars.obd_vars);
+ return rc;
}
int conmgr_detach(struct obd_device *dev)
o_disconnect: class_disconnect
};
-static int __init ptlrpc_init(void)
+
+
+__init int ptlrpc_init(void)
{
struct lprocfs_static_vars lvars;
int rc;
ptlrpc_cleanup_connection();
}
+/* rpc.c */
+EXPORT_SYMBOL(ptlrpc_next_xid);
+
/* recovd.c */
EXPORT_SYMBOL(ptlrpc_recovd);
EXPORT_SYMBOL(recovd_conn_fail);
EXPORT_SYMBOL(obd_brw_set_free);
EXPORT_SYMBOL(obd_brw_set_new);
EXPORT_SYMBOL(obd_brw_set_add);
+EXPORT_SYMBOL(obd_brw_set_del);
/* client.c */
EXPORT_SYMBOL(ptlrpc_init_client);
EXPORT_SYMBOL(ptlrpc_restart_req);
EXPORT_SYMBOL(ptlrpc_prep_req);
EXPORT_SYMBOL(ptlrpc_free_req);
+EXPORT_SYMBOL(ptlrpc_abort);
EXPORT_SYMBOL(ptlrpc_req_finished);
EXPORT_SYMBOL(ptlrpc_request_addref);
EXPORT_SYMBOL(ptlrpc_prep_bulk);
EXPORT_SYMBOL(ptlrpc_resend);
EXPORT_SYMBOL(ptlrpc_wake_delayed);
+#ifdef __KERNEL__
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
MODULE_DESCRIPTION("Lustre Request Processor");
MODULE_LICENSE("GPL");
module_init(ptlrpc_init);
module_exit(ptlrpc_exit);
+#endif
*/
#define DEBUG_SUBSYSTEM S_RPC
-
+#ifndef __KERNEL__
+#include <liblustre.h>
+#include <linux/kp30.h>
+#endif
#include <linux/obd_support.h>
#include <linux/obd_class.h>
#include <linux/lustre_net.h>
static int ptlrpc_check_event(struct ptlrpc_service *svc,
struct ptlrpc_thread *thread, ptl_event_t *event)
{
+ struct ptlrpc_srv_ni *srv_ni;
+ int i;
+ int idx;
int rc;
ENTRY;
GOTO(out, rc = 1);
LASSERT ((thread->t_flags & SVC_EVENT) == 0);
- LASSERT (ptl_is_valid_handle (&svc->srv_eq_h));
+ LASSERT (ptlrpc_ninterfaces > 0);
- rc = PtlEQGet(svc->srv_eq_h, event);
- switch (rc)
- {
- case PTL_OK:
- thread->t_flags |= SVC_EVENT;
- GOTO(out, rc = 1);
+ for (i = 0; i < ptlrpc_ninterfaces; i++) {
+ idx = (svc->srv_interface_rover + i) % ptlrpc_ninterfaces;
+ srv_ni = &svc->srv_interfaces[idx];
- case PTL_EQ_EMPTY:
- GOTO(out, rc = 0);
+ LASSERT (ptl_is_valid_handle (&srv_ni->sni_eq_h));
- default:
- CERROR("BUG: PtlEQGet returned %d\n", rc);
- LBUG();
+ rc = PtlEQGet(srv_ni->sni_eq_h, event);
+ switch (rc)
+ {
+ case PTL_OK:
+ /* next time start with the next interface */
+ svc->srv_interface_rover = (idx+1) % ptlrpc_ninterfaces;
+ thread->t_flags |= SVC_EVENT;
+ GOTO(out, rc = 1);
+
+ case PTL_EQ_EMPTY:
+ continue;
+
+ default:
+ CERROR("BUG: PtlEQGet returned %d\n", rc);
+ LBUG();
+ }
}
+ rc = 0;
out:
spin_unlock(&svc->srv_lock);
return rc;
ptlrpc_init_svc(__u32 nevents, __u32 nbufs,
__u32 bufsize, __u32 max_req_size,
int req_portal, int rep_portal,
- struct obd_uuid *uuid, svc_handler_t handler, char *name)
+ svc_handler_t handler, char *name)
{
- int err;
- int rc, i;
+ int ssize;
+ int rc;
+ int i;
+ int j;
struct ptlrpc_service *service;
+ struct ptlrpc_srv_ni *srv_ni;
ENTRY;
- OBD_ALLOC(service, sizeof(*service));
- if (!service)
+ LASSERT (ptlrpc_ninterfaces > 0);
+
+ ssize = offsetof (struct ptlrpc_service,
+ srv_interfaces[ptlrpc_ninterfaces]);
+ OBD_ALLOC(service, ssize);
+ if (service == NULL)
RETURN(NULL);
service->srv_name = name;
service->srv_max_req_size = max_req_size;
service->srv_buf_size = bufsize;
- INIT_LIST_HEAD(&service->srv_rqbds);
- service->srv_nrqbds = 0;
- atomic_set(&service->srv_nrqbds_receiving, 0);
service->srv_rep_portal = rep_portal;
service->srv_req_portal = req_portal;
service->srv_handler = handler;
-
- err = kportal_uuid_to_peer(uuid->uuid, &service->srv_self);
- if (err) {
- CERROR("%s: cannot get peer for uuid '%s'\n", name,
- uuid->uuid);
- OBD_FREE(service, sizeof(*service));
- RETURN(NULL);
- }
-
- rc = PtlEQAlloc(service->srv_self.peer_ni, nevents,
- request_in_callback, &(service->srv_eq_h));
-
- if (rc != PTL_OK) {
- CERROR("%s: PtlEQAlloc failed: %d\n", name, rc);
- OBD_FREE(service, sizeof(*service));
- RETURN(NULL);
+ service->srv_interface_rover = 0;
+
+ /* First initialise enough for early teardown */
+ for (i = 0; i < ptlrpc_ninterfaces; i++) {
+ srv_ni = &service->srv_interfaces[i];
+
+ srv_ni->sni_service = service;
+ srv_ni->sni_ni = &ptlrpc_interfaces[i];
+ ptl_set_inv_handle (&srv_ni->sni_eq_h);
+ INIT_LIST_HEAD(&srv_ni->sni_rqbds);
+ srv_ni->sni_nrqbds = 0;
+ atomic_set(&srv_ni->sni_nrqbds_receiving, 0);
}
- for (i = 0; i < nbufs; i++) {
- struct ptlrpc_request_buffer_desc *rqbd;
-
- OBD_ALLOC(rqbd, sizeof(*rqbd));
- if (rqbd == NULL)
- GOTO(failed, NULL);
-
- rqbd->rqbd_service = service;
- ptl_set_inv_handle(&rqbd->rqbd_me_h);
- atomic_set(&rqbd->rqbd_refcount, 0);
- OBD_ALLOC(rqbd->rqbd_buffer, service->srv_buf_size);
- if (rqbd->rqbd_buffer == NULL) {
- OBD_FREE(rqbd, sizeof(*rqbd));
- GOTO(failed, NULL);
+ /* Now allocate the event queue and request buffers, assuming all
+ * interfaces require the same level of buffering. */
+ for (i = 0; i < ptlrpc_ninterfaces; i++) {
+ srv_ni = &service->srv_interfaces[i];
+ CDEBUG (D_NET, "%s: initialising interface %s\n", name,
+ srv_ni->sni_ni->pni_name);
+
+ rc = PtlEQAlloc(srv_ni->sni_ni->pni_ni_h, nevents,
+ request_in_callback, &(srv_ni->sni_eq_h));
+ if (rc != PTL_OK) {
+ CERROR("%s.%d: PtlEQAlloc on %s failed: %d\n",
+ name, i, srv_ni->sni_ni->pni_name, rc);
+ GOTO (failed, NULL);
}
- list_add(&rqbd->rqbd_list, &service->srv_rqbds);
- service->srv_nrqbds++;
- ptlrpc_link_svc_me(rqbd);
+ for (j = 0; j < nbufs; j++) {
+ struct ptlrpc_request_buffer_desc *rqbd;
+
+ OBD_ALLOC(rqbd, sizeof(*rqbd));
+ if (rqbd == NULL) {
+ CERROR ("%s.%d: Can't allocate request "
+ "descriptor %d on %s\n",
+ name, i, srv_ni->sni_nrqbds,
+ srv_ni->sni_ni->pni_name);
+ GOTO(failed, NULL);
+ }
+
+ rqbd->rqbd_srv_ni = srv_ni;
+ ptl_set_inv_handle(&rqbd->rqbd_me_h);
+ atomic_set(&rqbd->rqbd_refcount, 0);
+
+ OBD_ALLOC(rqbd->rqbd_buffer, service->srv_buf_size);
+ if (rqbd->rqbd_buffer == NULL) {
+ CERROR ("%s.%d: Can't allocate request "
+ "buffer %d on %s\n",
+ name, i, srv_ni->sni_nrqbds,
+ srv_ni->sni_ni->pni_name);
+ OBD_FREE(rqbd, sizeof(*rqbd));
+ GOTO(failed, NULL);
+ }
+ list_add(&rqbd->rqbd_list, &srv_ni->sni_rqbds);
+ srv_ni->sni_nrqbds++;
+
+ ptlrpc_link_svc_me(rqbd);
+ }
}
- CDEBUG(D_NET, "Starting service listening on portal %d (eq: %lu)\n",
- service->srv_req_portal, service->srv_eq_h.handle_idx);
+ CDEBUG(D_NET, "%s: Started on %d interfaces, listening on portal %d\n",
+ service->srv_name, ptlrpc_ninterfaces, service->srv_req_portal);
RETURN(service);
failed:
LASSERT (atomic_read (&rqbd->rqbd_refcount) > 0);
LASSERT ((event->mem_desc.options & PTL_MD_IOV) == 0);
- LASSERT (rqbd->rqbd_service == svc);
+ LASSERT (rqbd->rqbd_srv_ni->sni_service == svc);
LASSERT (rqbd->rqbd_buffer == event->mem_desc.start);
LASSERT (event->offset + event->mlength <= svc->srv_buf_size);
memset(request, 0, sizeof(*request));
+ INIT_LIST_HEAD(&request->rq_list);
request->rq_svc = svc;
request->rq_obd = obddev;
request->rq_xid = event->match_bits;
goto out;
}
- CDEBUG(D_RPCTRACE, "Handling RPC pid:xid:nid:opc %d:"LPU64":"LPX64":%d\n",
- NTOH__u32(request->rq_reqmsg->status),
- request->rq_xid,
- event->initiator.nid,
- NTOH__u32(request->rq_reqmsg->opc));
+ CDEBUG(D_RPCTRACE, "Handling RPC ni:pid:xid:nid:opc %d:%d:"LPU64":"
+ LPX64":%d\n", rqbd->rqbd_srv_ni - &svc->srv_interfaces[0],
+ NTOH__u32(request->rq_reqmsg->status), request->rq_xid,
+ event->initiator.nid, NTOH__u32(request->rq_reqmsg->opc));
if (NTOH__u32(request->rq_reqmsg->type) != PTL_RPC_MSG_REQUEST) {
CERROR("wrong packet type received (type=%u)\n",
event->mem_desc.start, event->offset);
request->rq_peer.peer_nid = event->initiator.nid;
- /* FIXME: this NI should be the incoming NI.
- * We don't know how to find that from here. */
- request->rq_peer.peer_ni = svc->srv_self.peer_ni;
+ request->rq_peer.peer_ni = rqbd->rqbd_srv_ni->sni_ni;
request->rq_export = class_conn2export((struct lustre_handle *)
request->rq_reqmsg);
#endif
#ifdef __arch_um__
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid);
+#endif
#else
strcpy(current->comm, data->name);
#endif
int ptlrpc_unregister_service(struct ptlrpc_service *service)
{
+ int i;
int rc;
+ struct ptlrpc_srv_ni *srv_ni;
LASSERT (list_empty (&service->srv_threads));
* freeing them.
*/
- while (!list_empty (&service->srv_rqbds)) {
- struct ptlrpc_request_buffer_desc *rqbd =
- list_entry (service->srv_rqbds.next,
- struct ptlrpc_request_buffer_desc,
- rqbd_list);
-
- list_del (&rqbd->rqbd_list);
-
- LASSERT (atomic_read (&rqbd->rqbd_refcount) > 0);
- /* refcount could be anything; it's possible for the
- * buffers to continued to get filled after all the server
- * threads exited. But we know they _have_ exited.
- */
-
- (void) PtlMEUnlink(rqbd->rqbd_me_h);
- /* The callback handler could have unlinked this ME already
- * (we're racing with her) but it's safe to ensure it _has_
- * been unlinked.
- */
-
- OBD_FREE (rqbd->rqbd_buffer, service->srv_buf_size);
- OBD_FREE (rqbd, sizeof (*rqbd));
- service->srv_nrqbds--;
- }
+ for (i = 0; i < ptlrpc_ninterfaces; i++) {
+ srv_ni = &service->srv_interfaces[i];
+ CDEBUG (D_NET, "%s: tearing down interface %s\n",
+ service->srv_name, srv_ni->sni_ni->pni_name);
+
+ while (!list_empty (&srv_ni->sni_rqbds)) {
+ struct ptlrpc_request_buffer_desc *rqbd =
+ list_entry (srv_ni->sni_rqbds.next,
+ struct ptlrpc_request_buffer_desc,
+ rqbd_list);
+
+ list_del (&rqbd->rqbd_list);
+
+ LASSERT (atomic_read (&rqbd->rqbd_refcount) > 0);
+ /* refcount could be anything; it's possible for
+ * the buffers to continued to get filled after all
+ * the server threads exited. But we know they
+ * _have_ exited.
+ */
+
+ (void) PtlMEUnlink(rqbd->rqbd_me_h);
+ /* The callback handler could have unlinked this ME
+ * already (we're racing with her) but it's safe to
+ * ensure it _has_ been unlinked.
+ */
+
+ OBD_FREE (rqbd->rqbd_buffer, service->srv_buf_size);
+ OBD_FREE (rqbd, sizeof (*rqbd));
+ srv_ni->sni_nrqbds--;
+ }
- LASSERT (service->srv_nrqbds == 0);
+ LASSERT (srv_ni->sni_nrqbds == 0);
- rc = PtlEQFree(service->srv_eq_h);
- if (rc)
- CERROR("PtlEQFree failed: %d\n", rc);
+ if (ptl_is_valid_handle (&srv_ni->sni_eq_h)) {
+ rc = PtlEQFree(srv_ni->sni_eq_h);
+ if (rc)
+ CERROR("%s.%d: PtlEQFree failed on %s: %d\n",
+ service->srv_name, i,
+ srv_ni->sni_ni->pni_name, rc);
+ }
+ }
- OBD_FREE(service, sizeof(*service));
- if (rc)
- LBUG();
- return rc;
+ OBD_FREE(service,
+ offsetof (struct ptlrpc_service,
+ srv_interfaces[ptlrpc_ninterfaces]));
+ return 0;
}
%define linuxdir @LINUX@
%define portalsdir @PORTALS@
%define portalslibdir @PORTALSLIB@
-Release: 0301070810ltutor3
+Release: 0302240920chaos
Summary: Lustre Lite File System
Name: lustre-lite
%description -n lustre-ldap
Configures openldap server for LDAP Lustre config database
+
+%package -n liblustre
+Summary: Lustre Lib
+Group: Development/Kernel
+
+%description -n liblustre
+Lustre lib binary package.
+
+
%prep
%setup -qn lustre-%{version}
+%setup -c -n lustre-%{version}-lib
%build
rm -rf $RPM_BUILD_ROOT
# Set an explicit path to our Linux tree, if we can.
+cd $RPM_BUILD_DIR/lustre-%{version}
./configure --with-linux='%{linuxdir}' --with-portals='%{portalsdir}' --with-portalslib='%{portalslibdir}'
make
+cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
+./configure --with-lib --with-portals='%{portalsdir}' --with-portalslib='%{portalslibdir}'
+make
%install
+cd $RPM_BUILD_DIR/lustre-%{version}
+make install prefix=$RPM_BUILD_ROOT
+
+cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
make install prefix=$RPM_BUILD_ROOT
+
# Create the pristine source directory.
+cd $RPM_BUILD_DIR/lustre-%{version}
mkdir -p $RPM_BUILD_ROOT/usr/src
rm -f lustre-source
ln -s $RPM_BUILD_ROOT/usr/src lustre-source
%files -n lustre-source
%attr(-, root, root) /usr/src/lustre-%{version}
+%files -n liblustre
+%attr(-, root, root) /lib/lustre
+%attr(-, root, root) /lib/lustre/liblov.a
+%attr(-, root, root) /lib/lustre/liblustreclass.a
+%attr(-, root, root) /lib/lustre/libptlrpc.a
+%attr(-, root, root) /lib/lustre/libobdecho.a
+%attr(-, root, root) /lib/lustre/libldlm.a
+%attr(-, root, root) /lib/lustre/libosc.a
+%attr(-, root, root) /usr/sbin/lctl
+%attr(-, root, root) /usr/sbin/lfind
+%attr(-, root, root) /usr/sbin/lstripe
+%attr(-, root, root) /usr/sbin/obdio
+%attr(-, root, root) /usr/sbin/obdbarrier
+%attr(-, root, root) /usr/sbin/obdstat
+%attr(-, root, root) /usr/sbin/lload
+%attr(-, root, root) /usr/sbin/lconf
+%attr(-, root, root) /usr/sbin/lmc
+%attr(-, root, root) /usr/sbin/llanalyze
+
+
%files -n lustre-ldap
%attr(-, root, root) /etc/openldap/slapd-lustre.conf
%attr(-, root, root) /etc/openldap/schema/lustre.schema
writeme
mcreate
munlink
+mlink
tchmod
toexcl
fsx
wantedi
createtest
open_delay
+statone
client-echo.cfg elan-server.cfg net-client.cfg obdecho.cfg \
client-mount.cfg ldlm.cfg net-local.cfg obdfilter.cfg \
client-mount2.cfg lustre.cfg net-server.cfg sanity.sh \
- rundbench \
+ rundbench mcreate \
elan-client.cfg mds.cfg trivial.sh
pkgexampledir = '${exec_prefix}/usr/lib/$(PACKAGE)/examples'
pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh local.sh echo.sh uml.sh lov.sh
runtests runvmstat snaprun.sh tbox.sh common.sh
noinst_PROGRAMS = openunlink testreq truncate directio openme writeme open_delay
noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy
-noinst_PROGRAMS += stat createmany statmany multifstat createtest
+noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink
# noinst_PROGRAMS += ldaptest
-noinst_PROGRAMS += checkstat wantedi
+noinst_PROGRAMS += checkstat wantedi statone
sbin_PROGRAMS = mcreate mkdirmany
# ldaptest_SOURCES = ldaptest.c
testreq_SOURCES = testreq.c
mcreate_SOURCES = mcreate.c
munlink_SOURCES = munlink.c
+mlink_SOURCES = mlink.c
truncate_SOURCES = truncate.c
directio_SOURCES = directio.c
openunlink_SOURCES = openunlink.c
stat_SOURCES = stat.c
createmany_SOURCES = createmany.c
statmany_SOURCES = statmany.c
+statone_SOURCES = statone.c
mkdirmany_SOURCES = mkdirmany.c
multifstat_SOURCES = multifstat.c
checkstat_SOURCES = checkstat.c
--- /dev/null
+#!/bin/sh
+set -e
+
+#
+# Runs create.pl and rename.pl on two mountpoints with increasing load, varying
+# debug levels. Assumes that the node is already setup with llmount2.sh
+#
+
+SRCDIR="`dirname $0`"
+CREATE=$SRCDIR/create.pl
+
+debug_client_on()
+{
+ echo -1 > /proc/sys/portals/debug
+}
+
+debug_client_off()
+{
+ echo 0 > /proc/sys/portals/debug
+}
+
+MNT=${MNT:-/mnt/lustre}
+
+debug_client_on
+echo "create.pl, 2 mounts, 1 thread, 10 ops, debug on"
+perl $CREATE -- $MNT 2 10
+echo "create.pl, 2 mounts, 1 thread, 100 ops, debug on"
+perl $CREATE --silent -- $MNT 2 100
+echo "create.pl --mcreate=0, 2 mounts, 1 thread, 10 ops, debug on"
+perl $CREATE --mcreate=0 -- $MNT 2 10
+echo "create.pl --mcreate=0, 2 mounts, 1 thread, 100 ops, debug on"
+perl $CREATE --mcreate=0 --silent -- $MNT 2 100
+echo "rename.pl, 2 mounts, 1 thread, 10 ops, debug on"
+perl rename.pl --count=2 $MNT 10
+echo "rename.pl, 2 mounts, 1 thread, 100 ops, debug on"
+perl rename.pl --count=2 --silent $MNT 100
+
+debug_client_off
+echo "create.pl, 2 mounts, 1 thread, 1000 ops, debug off"
+perl $CREATE --silent -- $MNT 2 1000
+echo "create.pl --mcreate=0, 2 mounts, 1 thread, 1000 ops, debug off"
+perl $CREATE --silent --mcreate=0 -- $MNT 2 1000
+echo "rename.pl, 2 mounts, 1 thread, 1000 ops, debug off"
+perl rename.pl --count=2 --silent $MNT 1000
+
+debug_client_on
+echo "create.pl, 2 mounts, 2 threads, 100 ops, debug on"
+perl $CREATE --silent -- $MNT 2 100 &
+perl $CREATE --silent -- $MNT 2 100 &
+wait
+echo "create.pl --mcreate=0, 2 mounts, 2 threads, 100 ops, debug on"
+perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
+perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
+wait
+echo "rename.pl, 2 mounts, 2 thread, 1000 ops, debug on"
+perl rename.pl --count=2 --silent $MNT 1000 &
+perl rename.pl --count=2 --silent $MNT 1000 &
+wait
+
+debug_client_off
+echo "create.pl, 2 mounts, 2 threads, 2000 ops, debug off"
+perl $CREATE --silent -- $MNT 2 2000 &
+perl $CREATE --silent -- $MNT 2 2000 &
+wait
+echo "create.pl --mcreate=0, 2 mounts, 2 threads, 2000 ops, debug off"
+perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
+perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
+wait
+echo "rename.pl, 2 mounts, 2 threads, 2000 ops, debug off"
+perl rename.pl --count=2 --silent $MNT 2000 &
+perl rename.pl --count=2 --silent $MNT 2000 &
+wait
+
+debug_client_on
+echo "create.pl, 2 mounts, 4 threads, 100 ops, debug on"
+for i in `seq 1 4`; do
+ perl $CREATE --silent -- $MNT 2 100 &
+done
+wait
+echo "create.pl --mcreate=0, 2 mounts, 4 threads, 100 ops, debug on"
+for i in `seq 1 4`; do
+ perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
+done
+wait
+echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug on"
+for i in `seq 1 4`; do
+ perl rename.pl --count=2 --silent $MNT 2000 &
+done
+wait
+
+debug_client_off
+echo "create.pl, 2 mounts, 4 threads, 2000 ops, debug off"
+for i in `seq 1 4`; do
+ perl $CREATE --silent -- $MNT 2 2000 &
+done
+wait
+echo "create.pl --mcreate=0, 2 mounts, 4 threads, 2000 ops, debug off"
+for i in `seq 1 4`; do
+ perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
+done
+wait
+echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug off"
+for i in `seq 1 4`; do
+ perl rename.pl --count=2 --silent $MNT 2000 &
+done
+wait
+
+debug_client_on
+echo "create.pl, 2 mounts, 8 threads, 500 ops, debug on"
+for i in `seq 1 8`; do
+ perl $CREATE --silent -- $MNT 2 500 &
+done
+wait
+echo "create.pl --mcreate=0, 2 mounts, 8 threads, 500 ops, debug on"
+for i in `seq 1 8`; do
+ perl $CREATE --silent --mcreate=0 -- $MNT 2 500 &
+done
+wait
+echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug on"
+for i in `seq 1 8`; do
+ perl rename.pl --count=2 --silent $MNT 2000 &
+done
+wait
+
+debug_client_off
+echo "create.pl, 2 mounts, 8 threads, 2000 ops, debug off"
+for i in `seq 1 8`; do
+ perl $CREATE --silent -- $MNT 2 2000 &
+done
+wait
+echo "create.pl --mcreate=0, 2 mounts, 8 threads, 2000 ops, debug off"
+for i in `seq 1 8`; do
+ perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
+done
+wait
+echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug off"
+for i in `seq 1 8`; do
+ perl rename.pl --count=2 --silent $MNT 2000 &
+done
+wait
# load, varying debug levels
#
-SRCDIR="`dirname $0`/"
-. $SRCDIR/common.sh
+SRCDIR="`dirname $0`"
+CREATE=$SRCDIR/create.pl
+
+debug_client_on()
+{
+ echo -1 > /proc/sys/portals/debug
+}
+
+debug_client_off()
+{
+ echo 0 > /proc/sys/portals/debug
+}
MNT=${MNT:-/mnt/lustre}
debug_client_on
echo "create.pl, 1 mount, 1 thread, 10 ops, debug on"
-perl create.pl -- $MNT -1 10
+perl $CREATE -- $MNT -1 10
echo "create.pl, 1 mount, 1 thread, 100 ops, debug on"
-perl create.pl --silent -- $MNT -1 100
+perl $CREATE --silent -- $MNT -1 100
echo "create.pl --mcreate=0, 1 mount, 1 thread, 10 ops, debug on"
-perl create.pl --mcreate=0 -- $MNT -1 10
+perl $CREATE --mcreate=0 -- $MNT -1 10
echo "create.pl --mcreate=0, 1 mount, 1 thread, 100 ops, debug on"
-perl create.pl --mcreate=0 --silent -- $MNT -1 100
+perl $CREATE --mcreate=0 --silent -- $MNT -1 100
echo "rename.pl, 1 mount, 1 thread, 10 ops, debug on"
perl rename.pl $MNT 10
echo "rename.pl, 1 mount, 1 thread, 100 ops, debug on"
debug_client_off
echo "create.pl, 1 mount, 1 thread, 1000 ops, debug off"
-perl create.pl --silent -- $MNT -1 1000
+perl $CREATE --silent -- $MNT -1 1000
echo "create.pl --mcreate=0, 1 mount, 1 thread, 1000 ops, debug off"
-perl create.pl --silent --mcreate=0 -- $MNT -1 1000
+perl $CREATE --silent --mcreate=0 -- $MNT -1 1000
echo "rename.pl, 1 mount, 1 thread, 1000 ops, debug off"
perl rename.pl --silent $MNT 1000
debug_client_on
echo "create.pl, 1 mount, 2 threads, 100 ops, debug on"
-perl create.pl --silent -- $MNT -1 100 &
-perl create.pl --silent -- $MNT -1 100 &
+perl $CREATE --silent -- $MNT -1 100 &
+perl $CREATE --silent -- $MNT -1 100 &
wait
echo "create.pl --mcreate=0, 1 mount, 2 threads, 100 ops, debug on"
-perl create.pl --silent --mcreate=0 -- $MNT -1 100 &
-perl create.pl --silent --mcreate=0 -- $MNT -1 100 &
+perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
+perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
wait
echo "rename.pl, 1 mount, 2 thread, 1000 ops, debug on"
perl rename.pl --silent $MNT 1000 &
debug_client_off
echo "create.pl, 1 mount, 2 threads, 2000 ops, debug off"
-perl create.pl --silent -- $MNT -1 2000 &
-perl create.pl --silent -- $MNT -1 2000 &
+perl $CREATE --silent -- $MNT -1 2000 &
+perl $CREATE --silent -- $MNT -1 2000 &
wait
echo "create.pl --mcreate=0, 1 mount, 2 threads, 2000 ops, debug off"
-perl create.pl --silent --mcreate=0 -- $MNT -1 2000 &
-perl create.pl --silent --mcreate=0 -- $MNT -1 2000 &
+perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
+perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
wait
echo "rename.pl, 1 mount, 2 threads, 2000 ops, debug off"
perl rename.pl --silent $MNT 2000 &
debug_client_on
echo "create.pl, 1 mount, 4 threads, 100 ops, debug on"
for i in `seq 1 4`; do
- perl create.pl --silent -- $MNT -1 100 &
+ perl $CREATE --silent -- $MNT -1 100 &
done
wait
echo "create.pl --mcreate=0, 1 mount, 4 threads, 100 ops, debug on"
for i in `seq 1 4`; do
- perl create.pl --silent --mcreate=0 -- $MNT -1 100 &
+ perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
done
wait
echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug on"
debug_client_off
echo "create.pl, 1 mount, 4 threads, 2000 ops, debug off"
for i in `seq 1 4`; do
- perl create.pl --silent -- $MNT -1 2000 &
+ perl $CREATE --silent -- $MNT -1 2000 &
done
wait
echo "create.pl --mcreate=0, 1 mount, 4 threads, 2000 ops, debug off"
for i in `seq 1 4`; do
- perl create.pl --silent --mcreate=0 -- $MNT -1 2000 &
+ perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
done
wait
echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug off"
debug_client_on
echo "create.pl, 1 mount, 8 threads, 500 ops, debug on"
for i in `seq 1 8`; do
- perl create.pl --silent -- $MNT -1 500 &
+ perl $CREATE --silent -- $MNT -1 500 &
done
wait
echo "create.pl --mcreate=0, 1 mount, 8 threads, 500 ops, debug on"
for i in `seq 1 8`; do
- perl create.pl --silent --mcreate=0 -- $MNT -1 500 &
+ perl $CREATE --silent --mcreate=0 -- $MNT -1 500 &
done
wait
echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug on"
debug_client_off
echo "create.pl, 1 mount, 8 threads, 2000 ops, debug off"
for i in `seq 1 8`; do
- perl create.pl --silent -- $MNT -1 2000 &
+ perl $CREATE --silent -- $MNT -1 2000 &
done
wait
echo "create.pl --mcreate=0, 1 mount, 8 threads, 2000 ops, debug off"
for i in `seq 1 8`; do
- perl create.pl --silent --mcreate=0 -- $MNT -1 2000 &
+ perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
done
wait
echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug off"
perl rename.pl --silent $MNT 2000 &
done
wait
+sh rundbench 1
+sh rundbench 2
+sh rundbench 4
+sh rundbench 8
+sh rundbench 16
+sh rundbench 32
# the CVS HEAD are allowed.
set -vxe
+[ "$CONFIGS" -a -z "$SANITYN" ] && SANITYN=no
[ "$CONFIGS" ] || CONFIGS="local lov"
-[ "$THREADS" ] || THREADS=1
+[ "$MAX_THREADS" ] || MAX_THREADS=50
+if [ -z "$THREADS" ]; then
+ KB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo`
+ THREADS=`expr $KB / 16384`
+ [ $THREADS -gt $MAX_THREADS ] && THREADS=$MAX_THREADS
+fi
[ "$SIZE" ] || SIZE=20480
[ "$RSIZE" ] || RSIZE=64
[ "$UID" ] || UID=1000
if [ "$DBENCH" != "no" ]; then
mount | grep $MNT || sh llmount.sh
+ SPACE=`df $MNT | tail -1 | awk '{ print $4 }'`
+ DB_THREADS=`expr $SPACE / 50000`
+ [ $THREADS -lt $DB_THREADS ] && DB_THREADS=$THREADS
+
$DEBUG_OFF
sh rundbench 1
sh llmountcleanup.sh
sh llrmount.sh
- if [ $THREADS -gt 1 ]; then
+ if [ $DB_THREADS -gt 1 ]; then
$DEBUG_OFF
- sh rundbench $THREADS
+ sh rundbench $DB_THREADS
sh llmountcleanup.sh
sh llrmount.sh
fi
fi
if [ "$IOZONE_DIR" != "no" ]; then
mount | grep $MNT || sh llmount.sh
+ SPACE=`df $MNT | tail -1 | awk '{ print $4 }'`
+ IOZ_THREADS=`expr $SPACE / $SIZE`
+ [ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS
+
$DEBUG_OFF
iozone -I $IOZONE_OPTS $IOZONE_FILE.odir
IOZVER=`iozone -v | awk '/Revision:/ { print $3 }' | tr -d '.'`
sh llmountcleanup.sh
sh llrmount.sh
- if [ "$THREADS" -gt 1 -a "$IOZVER" -ge 3145 ]; then
+ if [ "$IOZ_THREADS" -gt 1 -a "$IOZVER" -ge 3145 ]; then
$DEBUG_OFF
THREAD=1
IOZONE_FILE="-F "
- SIZE=`expr $SIZE / $THREADS`
- while [ $THREAD -le $THREADS ]; do
+ while [ $THREAD -le $IOZ_THREADS ]; do
IOZONE_FILE="$IOZONE_FILE $MNT/iozone.$THREAD"
THREAD=`expr $THREAD + 1`
done
- iozone -I $IOZONE_OPTS -t $THREADS $IOZONE_FILE
+ iozone -I $IOZONE_OPTS -t $IOZ_THREADS $IOZONE_FILE
sh llmountcleanup.sh
sh llrmount.sh
elif [ $IOZVER -lt 3145 ]; then
mount | grep $MNT && sh llmountcleanup.sh
done
-[ "$SANITYN" != "no" ] && NAME=mount2 sh sanityN.sh
-
+if [ "$SANITYN" != "no" ]; then
+ export NAME=mount2
+ mount | grep $MNT || sh llmount.sh
+ sh sanityN.sh
+ mount | grep $MNT && sh llmountcleanup.sh
+fi
LMC_REAL="../../lustre/utils/lmc -m $config"
TCPBUF=1048576
-OST=ba-ost-1
+OST=${OST:-ba-ost-1}
CLIENT=client
UUIDLIST=${UUIDLIST:-/usr/local/admin/ba-ost/UUID.txt}
-h2ip () {
+h2tcp () {
echo "${1}"
}
BATCH=/tmp/lmc-batch.$$
# Client node
${LMC} --add net --node $CLIENT --tcpbuf $TCPBUF --nid '*' --nettype tcp
-OBD_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST`
-[ "$OBD_UUID" ] && OBD_UUID="--obduuid=$OBD_UUID" || echo "$OST: no UUID"
+OST_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST`
+[ "$OST_UUID" ] && OST_UUID="--ostuuid=$OST_UUID" || echo "$OST: no UUID"
# server node
${LMC} --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp
-${LMC} --add ost --node $OST --obd obd1 --obdtype=obdecho -obduuid $OBD_UUID
+${LMC} --add ost --node $OST --ost ost1 --obdtype=obdecho $OST_UUID
# osc on client
-${LMC} --add echo_client --node $CLIENT --obd obd1
+${LMC} --add echo_client --node $CLIENT --ost ost1
$LMC_REAL --batch $BATCH
rm -f $BATCH
UUIDLIST=${UUIDLIST:-/usr/local/admin/ba-ost/UUID.txt}
-h2ip () {
+h2tcp () {
echo "${1}"
}
BATCH=/tmp/lmc-batch.$$
${LMC} --add net --node $MDS --tcpbuf $TCPBUF --nid $MDS --nettype tcp
${LMC} --add mds --node $MDS --mds mds1 --dev /tmp/mds1 --size 50000
-OBD_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST`
-[ "$OBD_UUID" ] && OBD_UUID="--obduuid $OBD_UUID" || echo "$OST: no UUID"
+OST_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST`
+[ "$OST_UUID" ] && OST_UUID="--ostuuid $OST_UUID" || echo "$OST: no UUID"
# server node
${LMC} --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp
-${LMC} --add ost --node $OST --obd obd1 $OBD_UUID --dev bluearc
+${LMC} --add ost --node $OST --ost ost1 $OST_UUID --dev bluearc
# mount point on the MDS/client
-${LMC} --add mtpt --node $MDS --path /mnt/lustre --mds mds1 --lov obd1
+${LMC} --add mtpt --node $MDS --path /mnt/lustre --mds mds1 --lov ost1
# other clients
${LMC} --add net --node client --tcpbuf $TCPBUF --nid '*' --nettype tcp
-${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov obd1
+${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov ost1
$LMC_REAL --batch $BATCH
rm -f $BATCH
--- /dev/null
+#!/bin/sh
+set -evx
+
+MNT=${MNT:-/mnt/lustre}
+DIR=${DIR:-$MNT}
+SRC=${SRC:-`dirname $0`/../..}
+while date; do
+ for i in portals lustre; do
+ TGT=$DIR/$i
+ [ -d $TGT ] || cp -av $SRC/$i/ $TGT
+ make -C $TGT clean
+ make -C $TGT -j2
+ make -C $TGT clean
+ done
+done
#include <fcntl.h>
#include <stdlib.h>
#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
#include <sys/mman.h>
// not correctly in the headers yet!!
#ifndef O_DIRECT
-#define O_DIRECT 040000 /* direct disk access hint */
+#define O_DIRECT 040000 /* direct disk access hint */
#endif
-#define BLOCKSIZE 4096
-
int main(int argc, char **argv)
{
int fd;
char *buf;
- int pages;
+ int blocks;
+ struct stat st;
int rc;
if (argc != 3) {
- printf("Usage: %s file nr_pages\n", argv[0]);
+ printf("Usage: %s file nr_blocks\n", argv[0]);
return 1;
}
- pages = strtoul(argv[2], 0, 0);
- printf("directio on %s for %d pages \n", argv[1], pages);
+ blocks = strtoul(argv[2], 0, 0);
+ fd = open(argv[1], O_DIRECT | O_RDWR | O_CREAT, 0644);
+ if (fd == -1) {
+ printf("Cannot open %s: %s\n", argv[1], strerror(errno));
+ return 1;
+ }
- buf = mmap(0, pages * BLOCKSIZE, PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_ANON, 0, 0);
- if (!buf) {
- printf("No memory %s\n", strerror(errno));
+ if (fstat(fd, &st) < 0) {
+ printf("Cannot stat %s: %s\n", argv[1], strerror(errno));
return 1;
}
- fd = open(argv[1], O_DIRECT | O_RDWR | O_CREAT);
- if (fd == -1) {
- printf("Cannot open %s: %s\n", argv[1], strerror(errno));
+ printf("directio on %s for %dx%lu blocks \n", argv[1], blocks,
+ st.st_blksize);
+
+ buf = mmap(0, blocks * st.st_blksize, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, 0, 0);
+ if (!buf) {
+ printf("No memory %s\n", strerror(errno));
return 1;
}
- rc = read(fd, buf, pages * BLOCKSIZE);
- if (rc != pages * BLOCKSIZE) {
- printf("Read error: %s, rc %d\n", strerror(errno), rc);
+ rc = write(fd, buf, blocks * st.st_blksize);
+ if (rc != blocks * st.st_blksize) {
+ printf("Write error %s (rc = %d)\n", strerror(errno), rc);
return 1;
}
- if ( lseek(fd, 0, SEEK_SET) != 0 ) {
+ if (lseek(fd, 0, SEEK_SET) != 0) {
printf("Cannot seek %s\n", strerror(errno));
return 1;
}
- rc = write(fd, buf, pages * BLOCKSIZE);
- if (rc != pages * BLOCKSIZE) {
- printf("Write error %s\n", strerror(errno));
+ rc = read(fd, buf, blocks * st.st_blksize);
+ if (rc != blocks * st.st_blksize) {
+ printf("Read error: %s (rc = %d)\n", strerror(errno), rc);
return 1;
}
#!/bin/sh
TMP=${TMP:-/tmp}
+LCMD=$TMP/lkcd-cmds-`hostname`
+echo "Storing LKCD module info in $LCMD"
cat /tmp/ogdb-`hostname` | while read JUNK M JUNK; do
- MOD="../$M"
+ DIR=`dirname $M`
+ DIR=`cd $PWD/../$DIR; pwd`
+ MOD="$DIR/`basename $M`"
MAP=`echo $MOD | sed -e 's/\.o$/.map/'`
- MODNAME=`basename $MOD | sed -e 's/\.o$//'`
+ MODNAME=`basename $M | sed -e 's/\.o$//'`
nm $MOD > $MAP
- echo namelist -a $PWD/$MOD
- echo symtab -a $PWD/$MAP $MODNAME
+ echo namelist -a $MOD | tee -a $LCMD
+ echo symtab -a $MAP $MODNAME | tee -a $LCMD
done
lustre_opt="--lustre=$LUSTRE"
fi
+if [ "$1" = "-v" ]; then
+ verbose="-v"
+fi
+
[ -x $LCONF ] || chmod a+rx $LCONF
sh $mkconfig $config || exit 1
-${LCONF} $portals_opt $lustre_opt --reformat --gdb $config || exit 2
+${LCONF} $portals_opt $lustre_opt --reformat --gdb $verbose $config || exit 2
+
#!/bin/bash
config=${1:-local.xml}
${LMC} --add net --node localhost --nid localhost --nettype tcp || exit 11
# configure mds server
-${LMC} --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC} --add mds --node localhost --mds mds1 $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
# configure ost
-${LMC} --add ost --node localhost --ost obd1 --dev $OSTDEV --size $OSTSIZE || exit 30
+${LMC} --add ost --node localhost --ost obd1 $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 30
# create client config
${LMC} --add mtpt --node localhost --path /mnt/lustre --mds mds1 --ost obd1 || exit 40
TCPBUF=1048576
-h2ip () {
+h2tcp () {
echo "${1}"
}
BATCH=/tmp/lmc-batch.$$
LUSTRE_QUERY=/usr/local/cfs/lustre-failover/lustre-query
GW_NODE=mcr21
CLIENT_ELAN=`hostname | sed s/[^0-9]*//;`
-OST_BA=ba50
-OST_UUID=10400010-5dec-11c2-0b5f-00301700041a
+OST=${OST:-ba50}
+UUIDLIST=${UUIDLIST:-/usr/local/admin/ba-ost/UUID.txt}
+OST_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST`
+[ "$OST_UUID" ] && OST_UUID="--ostuuid=$OST_UUID" || echo "$OST: no UUID"
MDS_DEVICE=/dev/sda3
MDS_SIZE=500000
TCPBUF=1048576
echo $1 | sed 's/[^0-9]*//g'
}
-h2ip () {
+h2tcp () {
echo "${1}"
}
# create client node
$LMC -o $CONFIG --add net --node client --nid '*' --nettype elan
-$LMC -m $CONFIG --add net --router --node mcr21 --tcpbuf $TCPBUF --nid `h2ip $GW_NODE` --nettype tcp
+$LMC -m $CONFIG --add net --router --node mcr21 --tcpbuf $TCPBUF --nid `h2tcp $GW_NODE` --nettype tcp
$LMC -m $CONFIG --add net --router --node mcr21 --nid `h2elan $GW_NODE` --nettype elan
$LMC -m $CONFIG --add route --node $GW_NODE --nettype elan --gw `h2elan $GW_NODE` --lo $CLIENT_ELAN
done
# create OST node entry
-$LMC -m $CONFIG --add net --node $OST_BA --tcpbuf $TCPBUF --nid $OST_BA --nettype tcp
-$LMC -m $CONFIG --add ost --node $OST_BA --obd obd_$OST_BA --obduuid $OST_UUID --dev bluearc
-$LMC -m $CONFIG --add route --node $GW_NODE --nettype tcp --gw `h2ip $GW_NODE` --lo $OST_BA
+$LMC -m $CONFIG --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp
+$LMC -m $CONFIG --add ost --node $OST --ost ost_$OST $OST_UUID --dev bluearc
+$LMC -m $CONFIG --add route --node $GW_NODE --nettype tcp --gw `h2tcp $GW_NODE` --lo $OST
# mount
-$LMC -m $CONFIG --add mtpt --node client --path /mnt/lustre --mds mds_$ACTIVEMDS --lov obd_$OST_BA
+$LMC -m $CONFIG --add mtpt --node client --path /mnt/lustre --mds mds_$ACTIVEMDS --lov ost_$OST
echo $1 | sed 's/[^0-9]*//g'
}
-h2ip () {
+h2tcp () {
echo "${1}"
}
do
gwnode=$BASE`gw2node $gw`
echo "Router: $gwnode"
- ${LMC} --add net --router --node $gwnode --tcpbuf $TCPBUF --nid `h2ip $gwnode` --nettype tcp || exit 1
+ ${LMC} --add net --router --node $gwnode --tcpbuf $TCPBUF --nid `h2tcp $gwnode` --nettype tcp || exit 1
${LMC} --add net --node $gwnode --nid `h2elan $gwnode` --nettype elan || exit 1
${LMC} --add route --node $gwnode --nettype elan --gw `h2elan $gwnode` --lo `h2elan $CLIENT_LO` --hi `h2elan $CLIENT_HI` || exit 2
do
OST=${OSTBASE}$server
echo "server: $OST"
- OBD_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST`
- [ "$OBD_UUID" ] && OBD_UUID="--obduuid $OBD_UUID" || echo "$OST: no UUID"
+ OST_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST`
+ [ "$OST_UUID" ] && OST_UUID="--ostuuid $OST_UUID" || echo "$OST: no UUID"
# server node
${LMC} --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp || exit 1
# the device on the server
${LMC} --add ost --lov lov1 --node $OST $OBD_UUID --dev bluearc || exit 3
# route to server
- ${LMC} --add route --node $gwnode --nettype tcp --gw `h2ip $gwnode` --lo $OST || exit 2
+ ${LMC} --add route --node $gwnode --nettype tcp --gw `h2tcp $gwnode` --lo $OST || exit 2
let server=$server+1
let i=$i+1
done
echo $1 | sed 's/[^0-9]*//g'
}
-h2ip () {
+h2tcp () {
echo "${1}"
}
# Client node
${LMC} --add net --node client --nid '*' --nettype elan || exit 1
# Router node
-${LMC} --add net --router --node $ROUTER --tcpbuf $TCPBUF --nid `h2ip $ROUTER` --nettype tcp || exit 1
+${LMC} --add net --router --node $ROUTER --tcpbuf $TCPBUF --nid `h2tcp $ROUTER` --nettype tcp || exit 1
${LMC} --add net --node $ROUTER --nid `h2elan $ROUTER` --nettype elan|| exit 1
${LMC} -m $config --add route --node $ROUTER --nettype elan --gw `h2elan $ROUTER` --lo `h2elan $CLIENT_LO` --hi `h2elan $CLIENT_HI` || exit 2
# server node
${LMC} --add net --node $s --tcpbuf $TCPBUF --nid $s --nettype tcp || exit 1
# route to server
- ${LMC} --add route --node $ROUTER --nettype tcp --gw `h2ip $ROUTER` --lo $s || exit 2
+ ${LMC} --add route --node $ROUTER --nettype tcp --gw `h2tcp $ROUTER` --lo $s || exit 2
# the device on the server
${LMC} --add ost --node $s --obd obd_$s --obdtype=obdecho || exit 3
# attach to the device on the client (this would normally be a mount)
echo $1 | sed 's/[^0-9]*//g'
}
-h2ip () {
+h2tcp () {
echo "${1}"
}
# Client node
${LMC} --add net --node client --nid '*' --nettype elan || exit 1
# Router node
-${LMC} --add net --router --node $ROUTER --tcpbuf $TCPBUF --nid `h2ip $ROUTER` --nettype tcp || exit 1
+${LMC} --add net --router --node $ROUTER --tcpbuf $TCPBUF --nid `h2tcp $ROUTER` --nettype tcp || exit 1
${LMC} --add net --node $ROUTER --nid `h2elan $ROUTER` --nettype elan|| exit 1
${LMC} --add route --node $ROUTER --gw `h2elan $ROUTER` --lo `h2elan $CLIENT_LO` --hi `h2elan $CLIENT_HI` --nettype elan || exit 2
# server node
${LMC} --add net --node $s --tcpbuf $TCPBUF --nid $s --nettype tcp || exit 1
# route to server
- ${LMC} --add route --node $ROUTER --nettype tcp --gw `h2ip $ROUTER` --lo $s || exit 2
+ ${LMC} --add route --node $ROUTER --nettype tcp --gw `h2tcp $ROUTER` --lo $s || exit 2
# the device on the server
#${LMC} --format --lov lov1 --node $s --ost bluearc || exit 3
${LMC} --add ost --lov lov1 --node $s --dev bluearc --format || exit 3
--- /dev/null
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+int main(int argc, char ** argv)
+{
+ int rc;
+
+ if (argc < 3) {
+ printf("Usage: %s file link\n", argv[0]);
+ return 1;
+ }
+
+ rc = link(argv[1], argv[2]);
+ if (rc) {
+ printf("link(%s, %s) error: %s\n", argv[1], argv[2],
+ strerror(errno));
+ return errno;
+ }
+ return 0;
+}
config=${1:-mount2.xml}
-LMC="${LMC:-../utils/lmc} -m $config"
+SRCDIR=`dirname $0`
+PATH=$SRCDIR:$SRCDIR/../utils:$PATH
+LMC="${LMC:-lmc} -m $config"
TMP=${TMP:-/tmp}
MDSDEV=${MDSDEV:-$TMP/mds1}
OSTDEV=${OSTDEV:-$TMP/ost1}
OSTSIZE=${OSTSIZE:-200000}
-kver=`uname -r | cut -d "." -f 1,2`
-
-case $kver in
- 2.4) FSTYPE="--fstype=extN" ;;
- 2.5) FSTYPE="--fstype=ext3" ;;
- *) echo "Kernel version $kver not supported"
- exit 1
- ;;
-esac
-
-
rm -f $config
# create nodes
${LMC} --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
# configure ost
-${LMC} --add ost --node localhost --obd obd1 --dev $OSTDEV --size $OSTSIZE || exit 30
+${LMC} --add ost --node localhost --ost ost1 --dev $OSTDEV --size $OSTSIZE || exit 30
# create client config
-${LMC} --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --obd obd1 || exit 40
-${LMC} --add mtpt --node localhost --path /mnt/lustre2 --mds mds1 --obd obd1 || exit 40
+${LMC} --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --ost ost1 || exit 40
+${LMC} --add mtpt --node localhost --path /mnt/lustre2 --mds mds1 --ost ost1 || exit 40
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
+#include <liblustre.h>
+#include <linux/obd.h>
#include <linux/lustre_lib.h>
#include <linux/lustre_lite.h>
#include <linux/obd_lov.h>
--- /dev/null
+#!/bin/sh
+
+set -ex
+
+LUSTRE=${LUSTRE:-`dirname $0`/..}
+PATH=$PATH:$LUSTRE/utils:$LUSTRE/tests
+
+. $LUSTRE/../ltest/functional/llite/common/common.sh
+
+PDSH='pdsh -S -w'
+
+# XXX I wish all this stuff was in some default-config.sh somewhere
+MDSNODE=${MDSNODE:-mdev6}
+OSTNODE=${OSTNODE:-mdev7}
+CLIENT=${CLIENTNODE:-mdev8}
+NETWORKTYPE=${NETWORKTYPE:-tcp}
+MOUNTPT=${MOUNTPT:-/mnt/lustre}
+CONFIG=recovery-small.xml
+MDSDEV=/tmp/mds
+OSTDEV=/tmp/ost
+MDSSIZE=100000
+OSTSIZE=100000
+
+do_mds() {
+ $PDSH $MDSNODE "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@"
+}
+
+do_client() {
+ $PDSH $CLIENT "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@"
+}
+
+do_ost() {
+ $PDSH $OSTNODE "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@"
+}
+
+drop_request() {
+ do_mds "echo 0x121 > /proc/sys/lustre/fail_loc"
+ do_client "$1 & sleep ${TIMEOUT:-5}; sleep 2; kill \$!"
+ do_mds "echo 0 > /proc/sys/lustre/fail_loc"
+}
+
+make_config() {
+ rm -f $CONFIG
+ for NODE in $CLIENT $MDSNODE $OSTNODE; do
+ lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \
+ --nettype $NETWORKTYPE || exit 4
+ done
+ lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV \
+ --size $MDSSIZE || exit 5
+ lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --dev $OSTDEV \
+ --size $OSTSIZE || exit 6
+ lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \
+ --ost ost1 || exit 7
+}
+
+start_mds() {
+ do_mds "lconf $@ $CONFIG"
+}
+
+shutdown_mds() {
+ do_mds "lconf $@ --cleanup $CONFIG"
+}
+
+start_ost() {
+ do_ost "lconf $@ $CONFIG"
+}
+
+shutdown_ost() {
+ do_ost "lconf $@ --cleanup $CONFIG"
+}
+
+mount_client() {
+ do_client "lconf $@ $CONFIG"
+}
+
+unmount_client() {
+ do_client "lconf $@ --cleanup $CONFIG"
+}
+
+setup() {
+ make_config
+ start_mds ${REFORMAT:---reformat}
+ start_ost ${REFORMAT:---reformat}
+ mount_client --timeout=${TIMEOUT:-5} --recovery_upcall=/bin/true
+}
+
+cleanup() {
+ do_mds "echo 0 > /proc/sys/lustre/fail_loc"
+ unmount_client $@ || true
+ shutdown_mds $@ || true
+ shutdown_ost $@ || true
+}
+
+wait_for_timeout() {
+ # wait to make sure we enter recovery
+ # it'd be better if the upcall notified us somehow, I think
+ sleep $(( ${TIMEOUT:-5} + 2 ))
+}
+
+try_to_cleanup() {
+ kill -INT $!
+ unmount_client --force
+ mount_client --timeout=${TIMEOUT:-5} --recovery_upcall=/bin/true
+}
+
+if [ ! -z "$ONLY" ]; then
+ eval "$ONLY"
+ exit $?
+fi
+
+setup
+drop_request "mcreate /mnt/lustre/1" & wait_for_timeout
+try_to_cleanup
+
+drop_request "tchmod 111 /mnt/lustre/2" & wait_for_timeout
+try_to_cleanup
+
+drop_request "statone /mnt/lustre/2" & wait_for_timeout
+try_to_cleanup
+
+do_client "cp /etc/resolv.conf /mnt/lustre/resolv.conf"
+drop_request "cat /mnt/lustre/resolv.conf > /dev/null" & wait_for_timeout
+try_to_cleanup
+
+drop_request "mv /mnt/lustre/resolv.conf /mnt/lustre/renamed" & wait_for_timeout
+try_to_cleanup
+
+drop_request "mlink /mnt/lustre/renamed-again /mnt/lustre/link1" & wait_for_timeout
+try_to_cleanup
+
+drop_request "munlink /mnt/lustre/link1" & wait_for_timeout
+try_to_cleanup
+
+cleanup
PDSH='pdsh -S -w'
# XXX I wish all this stuff was in some default-config.sh somewhere
-MDSNODE=${MDSNODE:-dev2}
-OSTNODE=${OSTNODE:-dev3}
-CLIENT=${CLIENTNODE:-dev4}
+MDSNODE=${MDSNODE:-mdev6}
+OSTNODE=${OSTNODE:-mdev7}
+CLIENT=${CLIENTNODE:-mdev8}
NETWORKTYPE=${NETWORKTYPE:-tcp}
MOUNTPT=${MOUNTPT:-/mnt/lustre}
CONFIG=recovery-small.xml
setup() {
make_config
- start_mds --reformat
- start_ost --reformat
+ start_mds ${REFORMAT:---reformat}
+ start_ost ${REFORMAT:---reformat}
# XXX we should write our own upcall, when we move this somewhere better.
- mount_client --timeout=10 \
+ mount_client --timeout=${TIMEOUT:-5} \
--recovery_upcall=$PWD/../../ltest/functional/llite/09/client-upcall.sh
}
cleanup() {
- unmount_client || true
- shutdown_mds || true
- shutdown_ost || true
+ do_mds "echo 0 > /proc/sys/lustre/fail_loc"
+ unmount_client $@ || true
+ shutdown_mds $@ || true
+ shutdown_ost $@ || true
}
replay() {
- if [ $# -gt 1 ]; then
- do_client "$1"
- shift
- fi
do_mds "sync"
do_mds 'echo -e "device \$mds1\\nprobe\\nnotransno\\nreadonly" | lctl'
do_client "$1" &
shutdown_mds -f
start_mds
wait
- do_client "ls $MOUNPT" # trigger failover, if we haven't already
+ do_client "df -h $MOUNTPT" # trigger failover, if we haven't already
}
if [ ! -z "$ONLY" ]; then
setup
drop_request "mcreate /mnt/lustre/1"
drop_reply "mcreate /mnt/lustre/2"
-replay "mcreate /mnt/lustre/3"
+# replay "mcreate /mnt/lustre/3"
+
+drop_request "tchmod 111 /mnt/lustre/2"
+drop_reply "tchmod 666 /mnt/lustre/2"
+# replay "tchmod 444 /mnt/lustre/2"
+
+drop_request "statone /mnt/lustre/2"
+drop_reply "statone /mnt/lustre/2"
+# replay "statone /mnt/lustre/2"
+
+do_client "cp /etc/resolv.conf /mnt/lustre/resolv.conf"
+drop_request "cat /mnt/lustre/resolv.conf > /dev/null"
+drop_reply "cat /mnt/lustre/resolv.conf > /dev/null"
+
+drop_request "mv /mnt/lustre/resolv.conf /mnt/lustre/renamed"
+drop_reply "mv /mnt/lustre/renamed /mnt/lustre/renamed-again"
+
+drop_request "mlink /mnt/lustre/renamed-again /mnt/lustre/link1"
+drop_reply "mlink /mnt/lustre/renamed-again /mnt/lustre/link2"
+
+drop_request "munlink /mnt/lustre/link1"
+drop_reply "munlink /mnt/lustre/link2"
+
+
cleanup
#!/bin/sh
[ -z "$SIZE" ] && SIZE=5g
-[ -z "$LOOPS" ] && LOOPS=9999
+[ -z "$COUNT" ] && COUNT=100
[ -z "$VERIFY" ] && VERIFY="-+d"
[ -z "$ODIR" ] && ODIR="-I"
[ -z "$REC" ] && REC=64
[ -z "$FILE" ] && FILE=/mnt/lustre/iozone.$$
[ $1 ] && SIZE=$1
-COUNT=0
+LOOP=0
rm -f endiozone
echo 0 > /proc/sys/portals/debug
while date; do
- echo "Test #$COUNT"
+ LOOP=`expr $LOOP + 1`
+ echo "Test #$LOOP"
iozone $VERIFY $ODIR -r $REC -i 0 -i 1 -f $FILE -s $SIZE 2>&1 || exit $?
- COUNT=`expr $COUNT + 1`
- [ -f endiozone -o $COUNT -ge $LOOPS ] && rm -f endiozone && exit 0
+ [ -f endiozone -o $LOOP -ge $COUNT ] && rm -f endiozone && exit 0
done | tee /tmp/iozone.log
exit $RC
}
-export PATH=/sbin:/usr/sbin:$SRCDIR:$PATH
+log() {
+ echo "$*"
+ lctl mark "$*"
+}
+
+export PATH=/sbin:/usr/sbin:$SRCDIR:$SRCDIR/../utils:$PATH
ERROR=
SRC=/etc
[ "$LCONF" ] || LCONF=$SRCDIR/../utils/lconf
[ "$MCREATE" ] || MCREATE=$SRCDIR/../tests/mcreate
+
[ "$MKDIRMANY" ] || MKDIRMANY=$SRCDIR/../tests/mkdirmany
while [ "$1" ]; do
USED=`expr $USED + 16` # Some space for the status file
# let's start slowly here...
-echo "touching $OSCMT"
+log "touching $OSCMT"
touch $OSCMT || fail "can't touch $OSCMT" 2
HOSTS=$OSCMT/hosts.$$
# this will cause the following cp to trigger bug #620096
-echo "create an empty file $HOSTS"
-$MCREATE $HOSTS
+log "create an empty file $HOSTS"
+mcreate $HOSTS
-echo "copying /etc/hosts to $HOSTS"
+log "copying /etc/hosts to $HOSTS"
cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS" 3
-echo "comparing /etc/hosts and $HOSTS"
+log "comparing /etc/hosts and $HOSTS"
diff -u /etc/hosts $HOSTS || fail "$HOSTS different" 4
-echo "renaming $HOSTS to $HOSTS.ren"
+log "renaming $HOSTS to $HOSTS.ren"
mv $HOSTS $HOSTS.ren || fail "can't rename $HOSTS to $HOSTS.ren" 5
-echo "copying /etc/hosts to $HOSTS again"
+log "copying /etc/hosts to $HOSTS again"
cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS again" 6
-echo "truncating $HOSTS"
+log "truncating $HOSTS"
> $HOSTS || fail "can't truncate $HOSTS" 8
-echo "removing $HOSTS"
+log "removing $HOSTS"
rm $HOSTS || fail "can't remove $HOSTS" 9
DST=$OSCMT/runtest.$$
# let's start slowly here...
-echo "creating $DST"
+log "creating $DST"
mkdir $DST || fail "can't mkdir $DST" 10
# ok, that hopefully worked, so let's do a little more, with files that
# haven't changed in the last day (hopefully they don't change during test)
FILES=`find $SRC -type f -mtime +1 -ctime +1 | head -$COUNT`
-echo "copying files from $SRC to $DST$SRC"
+log "copying files from $SRC to $DST$SRC"
tar cf - $FILES | tar xvf - -C $DST || fail "copying $SRC" 11
-echo "comparing newly copied files"
+log "comparing newly copied files"
for f in $FILES; do
- [ $V ] && echo "verifying $DST/$f"
+ [ $V ] && log "verifying $DST/$f"
diff -q $f $DST/$f || ERROR=11
done
sh llmountcleanup.sh || exit 19
sh llrmount.sh || exit 20
-echo "comparing previously copied files"
+log "comparing previously copied files"
for f in $FILES; do
- [ $V ] && echo "verifying $DST/$f"
+ [ $V ] && log "verifying $DST/$f"
diff -q $f $DST/$f || ERROR=22
done
sh llmountcleanup.sh || exit 19
sh llrmount.sh || exit 20
-echo "renaming $HOSTS.ren to $HOSTS"
+log "renaming $HOSTS.ren to $HOSTS"
mv $HOSTS.ren $HOSTS || fail "can't rename $HOSTS.ren to $HOSTS" 32
-echo "truncating $HOSTS"
+log "truncating $HOSTS"
> $HOSTS || fail "can't truncate $HOSTS" 34
-echo "removing $HOSTS"
+log "removing $HOSTS"
rm $HOSTS || fail "can't remove $HOSTS again" 36
-echo "removing $DST"
+log "removing $DST"
rm -r $V $DST || fail "can't remove $DST" 37
# mkdirmany test (bug 589)
-echo "running mkdirmany $OSCMT/base$$ 100"
+log "running mkdirmany $OSCMT/base$$ 100"
$MKDIRMANY $OSCMT/base$$ 100 || fail "mkdirmany failed"
-echo "removing mkdirmany directories"
+log "removing mkdirmany directories"
rmdir $OSCMT/base$$* || fail "mkdirmany cleanup failed"
+log "done"
+
NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
if [ $NOWUSED -gt $USED ]; then
echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2
}
START=${START:-start}
-error () {
+log() {
+ echo "$*"
+ lctl mark "$*"
+}
+
+error() {
echo FAIL
exit 1
}
mount | grep $MOUNT || sh llmount.sh
-echo '== touch .../f ; rm .../f ======================== test 0'
+log '== touch .../f ; rm .../f ======================== test 0'
touch $DIR/f
$CHECKSTAT -t file $DIR/f || error
rm $DIR/f
$CLEAN
$START
-echo '== mkdir .../d1; mkdir .../d1/d2 ================= test 1'
+log '== mkdir .../d1; mkdir .../d1/d2 ================= test 1'
mkdir $DIR/d1
mkdir $DIR/d1/d2
$CHECKSTAT -t dir $DIR/d1/d2 || error
$CLEAN
$START
-echo '== rmdir .../d1/d2; rmdir .../d1 ================= test 1b'
+log '== rmdir .../d1/d2; rmdir .../d1 ================= test 1b'
rmdir $DIR/d1/d2
rmdir $DIR/d1
$CHECKSTAT -a $DIR/d1 || error
$CLEAN
$START
-echo '== mkdir .../d2; touch .../d2/f ================== test 2'
+log '== mkdir .../d2; touch .../d2/f ================== test 2'
mkdir $DIR/d2
touch $DIR/d2/f
$CHECKSTAT -t file $DIR/d2/f || error
$CLEAN
$START
-echo '== rm -r .../d2; touch .../d2/f ================== test 2b'
+log '== rm -r .../d2; touch .../d2/f ================== test 2b'
rm -r $DIR/d2
$CHECKSTAT -a $DIR/d2 || error
pass
$CLEAN
$START
-echo '== mkdir .../d3 ================================== test 3'
+log '== mkdir .../d3 ================================== test 3'
mkdir $DIR/d3
$CHECKSTAT -t dir $DIR/d3 || error
pass
$CLEAN
$START
-echo '== touch .../d3/f ================================ test 3b'
+log '== touch .../d3/f ================================ test 3b'
touch $DIR/d3/f
$CHECKSTAT -t file $DIR/d3/f || error
pass
$CLEAN
$START
-echo '== rm -r .../d3 ================================== test 3c'
+log '== rm -r .../d3 ================================== test 3c'
rm -r $DIR/d3
$CHECKSTAT -a $DIR/d3 || error
pass
$CLEAN
$START
-echo '== mkdir .../d4 ================================== test 4'
+log '== mkdir .../d4 ================================== test 4'
mkdir $DIR/d4
$CHECKSTAT -t dir $DIR/d4 || error
pass
$CLEAN
$START
-echo '== mkdir .../d4/d2 =============================== test 4b'
+log '== mkdir .../d4/d2 =============================== test 4b'
mkdir $DIR/d4/d2
$CHECKSTAT -t dir $DIR/d4/d2 || error
pass
$CLEAN
$START
-echo '== mkdir .../d5; mkdir .../d5/d2; chmod .../d5/d2 = test 5'
+log '== mkdir .../d5; mkdir .../d5/d2; chmod .../d5/d2 = test 5'
mkdir $DIR/d5
mkdir $DIR/d5/d2
chmod 0707 $DIR/d5/d2
$CLEAN
$START
-echo '== touch .../f6; chmod .../f6 ==================== test 6'
+log '== touch .../f6; chmod .../f6 ==================== test 6'
touch $DIR/f6
chmod 0666 $DIR/f6
$CHECKSTAT -t file -p 0666 $DIR/f6 || error
$CLEAN
$START
-echo '== mkdir .../d7; mcreate .../d7/f; chmod .../d7/f = test 7'
+log '== mkdir .../d7; mcreate .../d7/f; chmod .../d7/f = test 7'
mkdir $DIR/d7
$MCREATE $DIR/d7/f
chmod 0666 $DIR/d7/f
$CLEAN
$START
-echo '== mkdir .../d7; mcreate .../d7/f2; chmod .../d7/f2 = test 7b'
+log '== mkdir .../d7; mcreate .../d7/f2; echo foo > .../d7/f2 = test 7b'
$MCREATE $DIR/d7/f2
-echo -n foo > $DIR/d7/f2
+log -n foo > $DIR/d7/f2
[ "`cat $DIR/d7/f2`" = "foo" ] || error
$CHECKSTAT -t file -s 3 $DIR/d7/f2 || error
pass
$CLEAN
$START
-echo '== mkdir .../d8; touch .../d8/f; chmod .../d8/f == test 8'
+log '== mkdir .../d8; touch .../d8/f; chmod .../d8/f == test 8'
mkdir $DIR/d8
touch $DIR/d8/f
chmod 0666 $DIR/d8/f
$START
-echo '== mkdir .../d9 .../d9/d2 .../d9/d2/d3 =========== test 9'
+log '== mkdir .../d9 .../d9/d2 .../d9/d2/d3 =========== test 9'
mkdir $DIR/d9
mkdir $DIR/d9/d2
mkdir $DIR/d9/d2/d3
$START
-echo '== mkdir .../d10 .../d10/d2; touch .../d10/d2/f = test 10'
+log '== mkdir .../d10 .../d10/d2; touch .../d10/d2/f = test 10'
mkdir $DIR/d10
mkdir $DIR/d10/d2
touch $DIR/d10/d2/f
$CLEAN
$START
-echo '== mkdir .../d11 d11/d2; chmod .../d11/d2 ======= test 11'
+log '== mkdir .../d11 d11/d2; chmod .../d11/d2 ======= test 11'
mkdir $DIR/d11
mkdir $DIR/d11/d2
chmod 0666 $DIR/d11/d2
$CLEAN
$START
-echo '== mkdir .../d12; touch .../d12/f; chmod .../d12/f == test 12'
+log '== mkdir .../d12; touch .../d12/f; chmod .../d12/f == test 12'
mkdir $DIR/d12
touch $DIR/d12/f
chmod 0666 $DIR/d12/f
$CLEAN
$START
-echo '== mkdir .../d13; creat .../d13/f; .../d13/f; > .../d13/f == test 13'
+log '== mkdir .../d13; creat .../d13/f; .../d13/f; > .../d13/f == test 13'
mkdir $DIR/d13
dd if=/dev/zero of=$DIR/d13/f count=10
> $DIR/d13/f
$CLEAN
$START
-echo '================================================== test 14'
+log '================================================== test 14'
mkdir $DIR/d14
touch $DIR/d14/f
rm $DIR/d14/f
$CLEAN
$START
-echo '================================================== test 15'
+log '================================================== test 15'
mkdir $DIR/d15
touch $DIR/d15/f
mv $DIR/d15/f $DIR/d15/f2
$CLEAN
$START
-echo '================================================== test 16'
+log '================================================== test 16'
mkdir $DIR/d16
touch $DIR/d16/f
rm -rf $DIR/d16/f
$CLEAN
$START
-echo '== symlinks: create, remove (dangling and real) == test 17'
+log '== symlinks: create, remove (dangling and real) == test 17'
mkdir $DIR/d17
touch $DIR/d17/f
ln -s $DIR/d17/f $DIR/d17/l-exist
$CLEAN
$START
-echo "== touch .../f ; ls ... ========================= test 18"
+log "== touch .../f ; ls ... ========================= test 18"
touch $DIR/f
ls $DIR || error
pass
$CLEAN
$START
-echo "== touch .../f ; ls -l ... ====================== test 19"
+log "== touch .../f ; ls -l ... ====================== test 19"
touch $DIR/f
ls -l $DIR
rm $DIR/f
$CLEAN
$START
-echo "== touch .../f ; ls -l ... ====================== test 20"
+log "== touch .../f ; ls -l ... ====================== test 20"
touch $DIR/f
rm $DIR/f
-echo "1 done"
+log "1 done"
touch $DIR/f
rm $DIR/f
-echo "2 done"
+log "2 done"
touch $DIR/f
rm $DIR/f
-echo "3 done"
+log "3 done"
$CHECKSTAT -a $DIR/f || error
pass
$CLEAN
$START
-echo '== write to dangling link ======================== test 21'
+log '== write to dangling link ======================== test 21'
mkdir $DIR/d21
[ -f $DIR/d21/dangle ] && rm -f $DIR/d21/dangle
ln -s dangle $DIR/d21/link
$CLEAN
$START
-echo '== unpack tar archive as non-root user =========== test 22'
+log '== unpack tar archive as non-root user =========== test 22'
mkdir $DIR/d22
which sudo && chown 4711 $DIR/d22
SUDO=`which sudo 2> /dev/null` && SUDO="$SUDO -u #4711" || SUDO=""
$CLEAN
$START
-echo '== O_CREAT|O_EXCL in subdir ====================== test 23'
+log '== O_CREAT|O_EXCL in subdir ====================== test 23'
mkdir $DIR/d23
$TOEXCL $DIR/d23/f23
$TOEXCL -e $DIR/d23/f23 || error
echo '== rename sanity ================================= test24'
echo '-- same directory rename'
-echo '-- test 24-R1: touch a ; rename a b'
+log '-- test 24-R1: touch a ; rename a b'
mkdir $DIR/R1
touch $DIR/R1/f
mv $DIR/R1/f $DIR/R1/g
$CLEAN
$START
-echo '-- test 24-R2: touch a b ; rename a b;'
+log '-- test 24-R2: touch a b ; rename a b;'
mkdir $DIR/R2
touch $DIR/R2/{f,g}
mv $DIR/R2/f $DIR/R2/g
$CLEAN
$START
-echo '-- test 24-R3: mkdir a ; rename a b;'
+log '-- test 24-R3: mkdir a ; rename a b;'
mkdir $DIR/R3
mkdir $DIR/R3/f
mv $DIR/R3/f $DIR/R3/g
$CLEAN
$START
-echo '-- test 24-R4: mkdir a b ; rename a b;'
+log '-- test 24-R4: mkdir a b ; rename a b;'
mkdir $DIR/R4
mkdir $DIR/R4/{f,g}
perl -e "rename \"$DIR/R4/f\", \"$DIR/R4/g\";"
$START
echo '-- cross directory renames --'
-echo '-- test 24-R5: touch a ; rename a b'
+log '-- test 24-R5: touch a ; rename a b'
mkdir $DIR/R5{a,b}
touch $DIR/R5a/f
mv $DIR/R5a/f $DIR/R5b/g
$CLEAN
$START
-echo '-- test 24-R6: touch a ; rename a b'
+log '-- test 24-R6: touch a ; rename a b'
mkdir $DIR/R6{a,b}
touch $DIR/R6a/f $DIR/R6b/g
mv $DIR/R6a/f $DIR/R6b/g
$CLEAN
$START
-echo '-- test 24-R7: touch a ; rename a b'
+log '-- test 24-R7: touch a ; rename a b'
mkdir $DIR/R7{a,b}
mkdir $DIR/R7a/f
mv $DIR/R7a/f $DIR/R7b/g
$CLEAN
$START
-echo '-- test 24-R8: touch a ; rename a b'
+log '-- test 24-R8: touch a ; rename a b'
mkdir $DIR/R8{a,b}
mkdir $DIR/R8a/f $DIR/R8b/g
perl -e "rename \"$DIR/R8a/f\", \"$DIR/R8b/g\";"
$START
echo "-- rename error cases"
-echo "-- test 24-R9 target error: touch f ; mkdir a ; rename f a"
+log "-- test 24-R9 target error: touch f ; mkdir a ; rename f a"
mkdir $DIR/R9
mkdir $DIR/R9/a
touch $DIR/R9/f
$CLEAN
$START
-echo "--test 24-R10 source does not exist"
+log "--test 24-R10 source does not exist"
mkdir $DIR/R10
perl -e "rename \"$DIR/R10/f\", \"$DIR/R10/g\""
$CHECKSTAT -t dir $DIR/R10 || error
$START
echo '== symlink sanity ================================ test25'
-echo "--test 25.1 create file in symlinked directory"
+log "--test 25.1 create file in symlinked directory"
mkdir $DIR/d25
ln -s d25 $DIR/s25
touch $DIR/s25/foo
$CLEAN
$START
-echo "--test 25.2 lookup file in symlinked directory"
+log "--test 25.2 lookup file in symlinked directory"
$CHECKSTAT -t file $DIR/s25/foo
pass
$CLEAN
$START
-echo "--test 26 multiple component symlink"
+log "--test 26 multiple component symlink"
mkdir $DIR/d26
mkdir $DIR/d26/d26-2
ln -s d26/d26-2 $DIR/s26
$CLEAN
$START
-echo "--test 26.1 multiple component symlink at the end of a lookup"
+log "--test 26.1 multiple component symlink at the end of a lookup"
ln -s d26/d26-2/foo $DIR/s26-2
touch $DIR/s26-2
pass
$CLEAN
$START
-echo "--test 26.2 a chain of symlinks"
+log "--test 26.2 a chain of symlinks"
mkdir $DIR/d26.2
touch $DIR/d26.2/foo
ln -s d26.2 $DIR/s26.2-1
$START
# recursive symlinks (bug 439)
-echo "--test 26.3 create multiple component recursive symlink"
+log "--test 26.3 create multiple component recursive symlink"
ln -s d26-3/foo $DIR/d26-3
pass
$CLEAN
$START
-echo "--test 26.3 unlink multiple component recursive symlink"
+log "--test 26.3 unlink multiple component recursive symlink"
rm $DIR/d26-3
pass
$CLEAN
$START
echo '== stripe sanity ================================= test27'
-echo "--test 27.1 create one stripe"
+log "--test 27.1 create one stripe"
mkdir $DIR/d27
$LSTRIPE $DIR/d27/f0 8192 0 1
$CHECKSTAT -t file $DIR/d27/f0
-echo "--test 27.2 write to one stripe file"
+log "--test 27.2 write to one stripe file"
cp /etc/hosts $DIR/d27/f0
pass
-echo "--test 27.3 create two stripe file f01"
+log "--test 27.3 create two stripe file f01"
$LSTRIPE $DIR/d27/f01 8192 0 2
-echo "--test 27.4 write to two stripe file file f01"
+log "--test 27.4 write to two stripe file file f01"
dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4
pass
-echo "--test 27.5 create file with default settings"
+log "--test 27.5 create file with default settings"
$LSTRIPE $DIR/d27/fdef 0 -1 0
$CHECKSTAT -t file $DIR/d27/fdef
#dd if=/dev/zero of=$DIR/d27/fdef bs=4k count=4
-echo "--test 27.6 lstripe existing file (should return error)"
+log "--test 27.6 lstripe existing file (should return error)"
$LSTRIPE $DIR/d27/f12 8192 1 2
! $LSTRIPE $DIR/d27/f12 8192 1 2
$CHECKSTAT -t file $DIR/d27/f12
pass
-echo "--test 27.7 lstripe with bad stripe size (should return error on LOV)"
+log "--test 27.7 lstripe with bad stripe size (should return error on LOV)"
$LSTRIPE $DIR/d27/fbad 100 1 2 || /bin/true
dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4
pass
$CLEAN
$START
-echo "--test 27.8 lfind "
+log "--test 27.8 lfind "
$LFIND $DIR/d27
pass
$CLEAN
$START
-echo '== create/mknod/mkdir with bad file types ======== test28'
+log '== create/mknod/mkdir with bad file types ======== test28'
mkdir $DIR/d28
$CREATETEST $DIR/d28/ct || error
pass
-echo '== IT_GETATTR regression ======================== test29'
-mkdir $MOUNT/d29
-touch $MOUNT/d29/foo
-ls -l $MOUNT/d29
-MDCDIR=${MDCDIR:-/proc/fs/lustre/ldlm/ldlm/MDC_MNT_localhost_mds1}
+log '== IT_GETATTR regression ======================== test29'
+mkdir $DIR/d29
+touch $DIR/d29/foo
+ls -l $DIR/d29
+MDCDIR=${MDCDIR:-/proc/fs/lustre/ldlm/ldlm/MDC_*}
LOCKCOUNTORIG=`cat $MDCDIR/lock_count`
LOCKUNUSEDCOUNTORIG=`cat $MDCDIR/lock_unused_count`
-ls -l $MOUNT/d29
+ls -l $DIR/d29
LOCKCOUNTCURRENT=`cat $MDCDIR/lock_count`
LOCKUNUSEDCOUNTCURRENT=`cat $MDCDIR/lock_unused_count`
if [ $LOCKCOUNTCURRENT -gt $LOCKCOUNTORIG ] || [ $LOCKUNUSEDCOUNTCURRENT -gt $LOCKUNUSEDCOUNTORIG ]; then
$CLEAN
$START
-echo '== cleanup ============================================='
-rm -r $DIR/[Rdfs][1-9]*
+log '== run binary from Lustre (execve) =============== test30'
+cp `which ls` $DIR
+$DIR/ls /
+$CLEAN
+$START
+
+log '== open-unlink file ============================== test31'
+./openunlink $DIR/f31 $DIR/f31 || error
+pass
+
+log '== cleanup ============================================='
+rm -r $DIR/[Rdfs][1-9]* $DIR/ls
echo '======================= finished ======================='
exit
#if 0
#include <linux/extN_fs.h>
#endif
+#include <liblustre.h>
#include <linux/lustre_lib.h>
#include <linux/obd.h>
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <liblustre.h>
+#include <linux/lustre_lib.h>
+#include <linux/obd.h>
+
+int main(int argc, char **argv)
+{
+ struct obd_ioctl_data data;
+ char rawbuf[8192], parent[4096], *buf = rawbuf, *base, *t;
+ int max = sizeof(rawbuf), fd, offset, rc;
+
+ if (argc != 2) {
+ printf("usage: %s filename\n", argv[0]);
+ return 1;
+ }
+
+ base = argv[1];
+ t = strrchr(base, '/');
+ if (!t) {
+ strcpy(parent, ".");
+ offset = -1;
+ } else {
+ strncpy(parent, base, t - base);
+ offset = t - base - 1;
+ }
+
+ fd = open(parent, O_RDONLY);
+ if (fd < 0) {
+ printf("open(%s) error: %s\n", parent, strerror(errno));
+ exit(errno);
+ }
+
+ memset(&data, 0, sizeof(data));
+ data.ioc_version = OBD_IOCTL_VERSION;
+ data.ioc_len = sizeof(data);
+ if (offset >= 0)
+ data.ioc_inlbuf1 = base + offset + 2;
+ else
+ data.ioc_inlbuf1 = base;
+ data.ioc_inllen1 = strlen(data.ioc_inlbuf1) + 1;
+
+ if (obd_ioctl_pack(&data, &buf, max)) {
+ printf("ioctl_pack failed.\n");
+ exit(1);
+ }
+
+ rc = ioctl(fd, IOC_MDC_LOOKUP, buf);
+ if (rc < 0) {
+ printf("ioctl(%s/%s) error: %s\n", parent,
+ data.ioc_inlbuf1, strerror(errno));
+ exit(errno);
+ }
+
+ return 0;
+}
#include <sys/types.h>
#include <sys/stat.h>
+#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
}
mode = strtoul(argv[1], NULL, 8);
- return chmod(argv[2], mode);
+ return chmod(argv[2], mode) ? errno : 0;
}
#include <getopt.h>
#undef _GNU_SOURCE
+#include <liblustre.h>
#include <linux/lustre_mds.h>
static void usage(char *argv0, int status)
# create nodes
echo -n "adding NET for:"
-for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | sort -u`; do
+for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | tr -s " " "\n" | sort -u`; do
echo -n " $NODE"
- ${LMC} -m $config --add net --node $NODE --nid `h2$NETTYPE $NODE` --nettype elan || exit 1
+ ${LMC} -m $config --add net --node $NODE --nid `h2$NETTYPE $NODE` --nettype $NETTYPE || exit 1
done
# configure mds server
#include <time.h>
#include <limits.h>
#include <sys/ioctl.h>
-#include <linux/lustre_lib.h>
+#include <liblustre.h>
#include <linux/obd.h>
+#include <linux/lustre_lib.h>
static int usage(char *prog, FILE *out)
{
obdstat
obdio
obdbarrier
+lload
CFLAGS:=-g -O2 -I$(top_srcdir)/utils -I$(PORTALS)/include -I$(srcdir)/../include -Wall -L$(PORTALSLIB)
KFLAGS:=
CPPFLAGS = $(HAVE_LIBREADLINE)
-obdctl_LDADD := $(LIBREADLINE)
lctl_LDADD := $(LIBREADLINE) -lptlctl
-sbin_PROGRAMS = lctl lfind lstripe obdctl obdio obdbarrier obdstat
+lload_LDADD := -lptlctl
+sbin_PROGRAMS = lctl lfind lstripe obdio obdbarrier obdstat lload
sbin_SCRIPTS = lconf lmc llanalyze
-obdctl_SOURCES = parser.c obdctl.c obd.c parser.h obdctl.h
-lctl_SOURCES = parser.c obd.c lctl.c parser.h
+lctl_SOURCES = parser.c obd.c lctl.c parser.h obdctl.h
+lload_SOURCES = lload.c
obdio_SOURCES = obdio.c obdiolib.c obdiolib.h
obdbarrier_SOURCES = obdbarrier.c obdiolib.c obdiolib.h
lfind_SOURCES = lfind.c
from fcntl import F_GETFL, F_SETFL
# Global parameters
-TCP_ACCEPTOR = ''
MAXTCPBUF = 1048576
DEFAULT_TCPBUF = 1048576
#
self._ldapurl = ''
self._config_name = ''
self._select = {}
+ self._lctl_dump = ''
def verbose(self, flag = None):
if flag: self._verbose = flag
return self._select[srv]
return None
+ def lctl_dump(self, val = None):
+ if val: self._lctl_dump = val
+ return self._lctl_dump
+
config = Config()
# ============================================================
+# handle daemons, like the acceptor
+class DaemonHandler:
+ """ Manage starting and stopping a daemon. Assumes daemon manages
+ it's own pid file. """
+
+ def __init__(self, cmd):
+ self.command = cmd
+ self.path =""
+
+ def start(self):
+ if self.running():
+ log(self.command, "already running.")
+ if not self.path:
+ self.path = find_prog(self.command)
+ if not self.path:
+ panic(self.command, "not found.")
+ ret, out = runcmd(self.path +' '+ self.command_line())
+ if ret:
+ raise CommandError(self.path, out, ret)
+
+ def stop(self):
+ if self.running():
+ pid = self.read_pidfile()
+ try:
+ log ("killing process", pid)
+ os.kill(pid, 15)
+ #time.sleep(1) # let daemon die
+ except OSError, e:
+ log("unable to kill", self.command, e)
+ if self.running():
+ log("unable to kill", self.command)
+
+ def running(self):
+ pid = self.read_pidfile()
+ if pid:
+ try:
+ os.kill(pid, 0)
+ except OSError:
+ self.clean_pidfile()
+ else:
+ return 1
+ return 0
+
+ def read_pidfile(self):
+ try:
+ fp = open(self.pidfile(), 'r')
+ pid = int(fp.read())
+ fp.close()
+ return pid
+ except IOError:
+ return 0
+
+ def clean_pidfile(self):
+ """ Remove a stale pidfile """
+ log("removing stale pidfile:", self.pidfile())
+ try:
+ os.unlink(self.pidfile())
+ except OSError, e:
+ log(self.pidfile(), e)
+
+class AcceptorHandler(DaemonHandler):
+ def __init__(self, port, net_type, send_mem, recv_mem, irq_aff, nid_xchg):
+ DaemonHandler.__init__(self, "acceptor")
+ self.port = port
+ self.flags = ''
+ self.send_mem = send_mem
+ self.recv_mem = recv_mem
+
+ if net_type == 'toe':
+ self.flags = self.flags + ' -N 4'
+ if irq_aff:
+ self.flags = self.flags + ' -i'
+ if nid_xchg:
+ self.flags = self.flags + ' -x'
+
+ def pidfile(self):
+ return "/var/run/%s-%d.pid" % (self.command, self.port)
+
+ def command_line(self):
+ return string.join(map(str,('-s', self.send_mem, '-r', self.recv_mem, self.flags, self.port)))
+
+acceptors = {}
+
+# start the acceptors
+def run_acceptors():
+ for port in acceptors.keys():
+ daemon = acceptors[port]
+ if not daemon.running():
+ daemon.start()
+
+def stop_acceptor(port):
+ if acceptors.has_key(port):
+ daemon = acceptors[port]
+ if daemon.running():
+ daemon.stop()
+
+
+# ============================================================
# handle lctl interface
class LCTLInterface:
"""
Initialize close by finding the lctl binary.
"""
self.lctl = find_prog(cmd)
+ self.save_file = ''
if not self.lctl:
if config.noexec():
debug('! lctl not found')
else:
raise CommandError('lctl', "unable to find lctl binary.")
+ def use_save_file(self, file):
+ self.save_file = file
+
def set_nonblock(self, fd):
fl = fcntl.fcntl(fd, F_GETFL)
fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
should modify command line to accept multiple commands, or
create complex command line options
"""
- debug("+", self.lctl, cmds)
+ cmd_line = self.lctl
+ if self.save_file:
+ cmds = '\n dump ' + self.save_file + cmds
+
+ debug("+", cmd_line, cmds)
if config.noexec(): return (0, [])
- child = popen2.Popen3(self.lctl, 1) # Capture stdout and stderr from command
+ child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
child.tochild.write(cmds + "\n")
child.tochild.close()
cmds = """
network %s
mynid %s
- add_uuid self %s
- quit""" % (net, nid, nid)
- else:
- cmds = """
- network %s
- add_uuid self %s
- quit""" % (net, nid)
-
- self.run(cmds)
+ quit """ % (net, nid)
+ self.run(cmds)
# create a new connection
- def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
- if net in ('tcp', 'toe'):
- cmds = """
+ def connect(self, srv):
+ cmds = "\n add_uuid %s %s %s" % (srv.uuid, srv.nid, srv.net_type)
+ if srv.net_type in ('tcp', 'toe') and not config.lctl_dump():
+ flags = ''
+ if srv.irq_affinity:
+ flags = flags + 'i'
+ if srv.nid_exchange:
+ flags = flags + 'x'
+ cmds = """%s
network %s
- add_uuid %s %s
send_mem %d
recv_mem %d
- connect %s %d
- quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, )
- else:
- cmds = """
- network %s
- add_uuid %s %s
- connect %s %d
- quit""" % (net, servuuid, nid, nid, port, )
-
+ connect %s %d %s""" % (cmds, srv.net_type,
+ srv.send_mem,
+ srv.recv_mem,
+ srv.hostaddr, srv.port, flags )
+
+ cmds = cmds + "\n quit"
self.run(cmds)
# add a route to a range
cmds = """
network %s
add_route %s %s %s
- quit """ % (net, gw, lo, hi)
+ quit """ % (net,
+ gw, lo, hi)
self.run(cmds)
def add_route_host(self, net, uuid, gw, tgt):
cmds = """
network %s
- add_uuid %s %s
+ add_uuid %s %s %s
add_route %s %s
- quit """ % (net, uuid, tgt, gw, tgt)
+ quit """ % (net,
+ uuid, tgt, net,
+ gw, tgt)
self.run(cmds)
# add a route to a range
cmds = """
ignore_errors
network %s
- del_uuid self
disconnect
quit""" % (net)
self.run(cmds)
# Run a command and return the output and status.
# stderr is sent to /dev/null, could use popen3 to
# save it if necessary
-def run(*args):
- cmd = string.join(map(str,args))
+def runcmd(cmd):
debug ("+", cmd)
if config.noexec(): return (0, [])
f = os.popen(cmd + ' 2>&1')
ret = 0
return (ret, out)
+def run(*args):
+ cmd = string.join(map(str,args))
+ return runcmd(cmd)
+
# Run a command in the background.
def run_daemon(*args):
cmd = string.join(map(str,args))
cmdpath = os.path.dirname(sys.argv[0])
syspath.insert(0, cmdpath);
if config.portals_dir():
- syspath.insert(0, os.path.join(cmdpath, config.portals_dir()+'/linux/utils/'))
+ syspath.insert(0, os.path.join(config.portals_dir()+'/linux/utils/'))
for d in syspath:
prog = os.path.join(d,cmd)
if os.access(prog, os.X_OK):
ip = string.split(addr, ':')[1]
return ip
+def get_local_nid(net_type, wildcard):
+ """Return the local nid. First look for an elan interface,
+ then use the local address. """
+ local = ""
+ if os.access('/proc/elan/device0/position', os.R_OK):
+ local = get_local_address('elan', '*')
+ else:
+ local = get_local_address(net_type, wildcard)
+ return local
+
def get_local_address(net_type, wildcard):
"""Return the local address for the network type."""
local = ""
"""Return true if a device exists for the uuid"""
# expect this format:
# 1 UP ldlm ldlm ldlm_UUID 2
+ if config.lctl_dump():
+ return 0
try:
out = lctl.device_list()
for s in out:
except CommandError, e:
e.dump()
return 0
+
+def is_network_prepared():
+ """If the PTLRPC device exists, then assumet that all networking
+ has been configured"""
+ if config.lctl_dump():
+ return 0
+ try:
+ out = lctl.device_list()
+ for s in out:
+ if 'RPCDEV_UUID' == string.split(s)[4]:
+ return 1
+ except CommandError, e:
+ e.dump()
+ return 0
+
def fs_is_mounted(path):
"""Return true if path is a mounted lustre filesystem"""
msg = string.join(map(str,args))
print self.module_name + ":", self.name, self.uuid, msg
- def lookup_server(self, srv_uuid):
- """ Lookup a server's network information """
- net = self.db.get_ost_net(srv_uuid)
- if not net:
- panic ("Unable to find a server for:", srv_uuid)
- self._server = Network(net)
-
- def get_server(self):
- return self._server
-
def cleanup(self):
""" default cleanup, used for most modules """
self.info()
- srv = self.get_server()
- if srv and local_net(srv):
- try:
- lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
- except CommandError, e:
- log(self.module_name, "disconnect failed: ", self.name)
- e.dump()
- cleanup_error(e.rc)
try:
lctl.cleanup(self.name, self.uuid)
except CommandError, e:
log(self.module_name, "cleanup failed: ", self.name)
e.dump()
cleanup_error(e.rc)
-
+
def add_portals_module(self, dev_dir, modname):
"""Append a module to list of modules to load."""
self.kmodule_list.append((config.portals_dir(), dev_dir, modname))
log('! unable to unload module:', mod)
logall(out)
-
class Network(Module):
def __init__(self,db):
Module.__init__(self, 'NETWORK', db)
self.net_type = self.db.get_val('nettype')
self.nid = self.db.get_val('nid', '*')
self.port = self.db.get_val_int('port', 0)
- self.send_mem = self.db.get_val_int('send_mem', DEFAULT_TCPBUF)
- self.recv_mem = self.db.get_val_int('recv_mem', DEFAULT_TCPBUF)
+ self.send_mem = self.db.get_val_int('sendmem', DEFAULT_TCPBUF)
+ self.recv_mem = self.db.get_val_int('recvmem', DEFAULT_TCPBUF)
+ self.irq_affinity = self.db.get_val_int('irqaffinity', 0)
+ self.nid_exchange = self.db.get_val_int('nidexchange', 0)
+
if '*' in self.nid:
- self.nid = get_local_address(self.net_type, self.nid)
+ self.nid = get_local_nid(self.net_type, self.nid)
if not self.nid:
panic("unable to set nid for", self.net_type, self.nid)
debug("nid:", self.nid)
+
+ self.hostaddr = self.db.get_val('hostaddr', self.nid)
+ if '*' in self.hostaddr:
+ self.hostaddr = get_local_address(self.net_type, self.hostaddr)
+ if not self.nid:
+ panic("unable to set nid for", self.net_type, self.hostaddr)
+ debug("hostaddr:", self.hostaddr)
+ # debug ( "hostaddr ", self.hostaddr, "net_type", self.net_type)
+
self.add_portals_module("linux/oslib", 'portals')
if node_needs_router():
self.add_portals_module("linux/router", 'kptlrouter')
if self.net_type == 'gm':
self.add_portals_module("/linux/gmnal", 'kgmnal')
self.add_lustre_module('obdclass', 'obdclass')
- self.add_lustre_module('ptlrpc', 'ptlrpc')
def prepare(self):
+ if is_network_prepared():
+ return
+ self.info(self.net_type, self.nid, self.port)
+ lctl.network(self.net_type, self.nid)
+
+ def cleanup(self):
self.info(self.net_type, self.nid, self.port)
if self.net_type in ('tcp', 'toe'):
- nal_id = '' # default is socknal
- if self.net_type == 'toe':
- nal_id = '-N 4'
- ret, out = run(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, nal_id, self.port)
- if ret:
- raise CommandError(TCP_ACCEPTOR, out, ret)
+ stop_acceptor(self.port)
+ try:
+ lctl.disconnectAll(self.net_type)
+ except CommandError, e:
+ print "disconnectAll failed: ", self.name
+ e.dump()
+ cleanup_error(e.rc)
+
+class Router(Module):
+ def __init__(self,db):
+ Module.__init__(self, 'ROUTER', db)
+ def prepare(self):
+ if is_network_prepared():
+ return
+ self.info()
for net_type, gw, lo, hi in self.db.get_route_tbl():
lctl.add_route(net_type, gw, lo, hi)
- if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '':
- srvdb = self.db.nid2server(lo)
+ if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+ srvdb = self.db.nid2server(lo, net_type)
+
if not srvdb:
panic("no server for nid", lo)
else:
srv = Network(srvdb)
- lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
-
-
- lctl.network(self.net_type, self.nid)
- if not is_prepared("RPCDEV_UUID"):
- lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID")
-
+ lctl.connect(srv)
def cleanup(self):
- self.info(self.net_type, self.nid, self.port)
for net_type, gw, lo, hi in self.db.get_route_tbl():
- if self.net_type in ('tcp', 'toe') and hi == '':
- srvdb = self.db.nid2server(lo)
+ if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '':
+ srvdb = self.db.nid2server(lo, net_type)
if not srvdb:
panic("no server for nid", lo)
else:
e.dump()
cleanup_error(e.rc)
try:
- lctl.del_route(self.net_type, self.nid, lo, hi)
+ lctl.del_route(net_type, gw, lo, hi)
except CommandError, e:
print "del_route failed: ", self.name
e.dump()
cleanup_error(e.rc)
-
- try:
- if is_prepared("RPCDEV_UUID"):
- lctl.cleanup("RPCDEV", "RPCDEV_UUID")
- except CommandError, e:
- print "cleanup failed: RPCDEV"
- e.dump()
- cleanup_error(e.rc)
- try:
- lctl.disconnectAll(self.net_type)
- except CommandError, e:
- print "disconnectAll failed: ", self.name
- e.dump()
- cleanup_error(e.rc)
- if self.net_type in ('tcp', 'toe'):
- # yikes, this ugly! need to save pid in /var/something
- run("killall acceptor")
class LDLM(Module):
def __init__(self,db):
if is_prepared(self.uuid):
return
self.info()
- lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
- setup ="")
+ lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid))
+ def cleanup(self):
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
+
+class PTLRPC(Module):
+ def __init__(self,db):
+ Module.__init__(self, 'PTLRPC', db)
+ self.add_lustre_module('ptlrpc', 'ptlrpc')
+ def prepare(self):
+ if is_prepared(self.uuid):
+ return
+ self.info()
+ lctl.newdev(attach="ptlrpc %s %s" % (self.name, self.uuid))
+ def cleanup(self):
+ if is_prepared(self.uuid):
+ Module.cleanup(self)
class LOV(Module):
def __init__(self,db):
self.devlist = self.db.get_refs('obd')
self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
self.osclist = []
+ self.mdc_uudi = ''
for obd_uuid in self.devlist:
obd = self.db.lookup(obd_uuid)
osc = get_osc(obd, self.name)
class MDSDEV(Module):
def __init__(self,db):
Module.__init__(self, 'MDSDEV', db)
- self.devname = self.db.get_val('devpath','')
+ self.devpath = self.db.get_val('devpath','')
self.size = self.db.get_val_int('devsize', 0)
self.fstype = self.db.get_val('fstype', '')
# overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
- self.uuid = self.db.get_first_ref('target')
- mds = self.db.lookup(self.uuid)
+ target_uuid = self.db.get_first_ref('target')
+ mds = self.db.lookup(target_uuid)
self.name = mds.getName()
self.lovconfig_uuids = mds.get_refs('lovconfig')
# FIXME: if fstype not set, then determine based on kernel version
self.format = self.db.get_val('autoformat', "no")
+
+ active_uuid = mds.get_active_target()
+ if not active_uuid:
+ panic("No target device found:", target_uuid)
+ if active_uuid == self.uuid:
+ self.active = 1
+ else:
+ self.active = 0
+ self.target_dev_uuid = self.uuid
+ self.uuid = target_uuid
+ # modules
if self.fstype == 'extN':
self.add_lustre_module('extN', 'extN')
self.add_lustre_module('mds', 'mds')
if self.fstype:
self.add_lustre_module('obdclass', 'fsfilt_%s' % (self.fstype))
+
+ def load_module(self):
+ if self.active:
+ Module.load_module(self)
def prepare(self):
if is_prepared(self.uuid):
return
- self.info(self.devname, self.fstype, self.format)
- blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
+ if not self.active:
+ debug(self.uuid, "not active")
+ return
+ self.info(self.devpath, self.fstype, self.format)
+ run_acceptors()
+ blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
if not is_prepared('MDT_UUID'):
lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'),
setup ="")
cleanup_error(e.rc)
if is_prepared(self.uuid):
Module.cleanup(self)
- clean_loop(self.devname)
+ clean_loop(self.devpath)
class OSD(Module):
def __init__(self, db):
Module.__init__(self, 'OSD', db)
self.osdtype = self.db.get_val('osdtype')
- self.devname = self.db.get_val('devpath', '')
+ self.devpath = self.db.get_val('devpath', '')
self.size = self.db.get_val_int('devsize', 0)
self.fstype = self.db.get_val('fstype', '')
- self.uuid = self.db.get_first_ref('target')
- ost = self.db.lookup(self.uuid)
+ target_uuid = self.db.get_first_ref('target')
+ ost = self.db.lookup(target_uuid)
self.name = ost.getName()
# FIXME: if fstype not set, then determine based on kernel version
self.format = self.db.get_val('autoformat', 'yes')
if self.fstype == 'extN':
self.add_lustre_module('extN', 'extN')
+
+ active_uuid = ost.get_active_target()
+ if not active_uuid:
+ panic("No target device found:", target_uuid)
+ if active_uuid == self.uuid:
+ self.active = 1
+ else:
+ self.active = 0
+ self.target_dev_uuid = self.uuid
+ self.uuid = target_uuid
+ # modules
self.add_lustre_module('ost', 'ost')
self.add_lustre_module(self.osdtype, self.osdtype)
if self.fstype:
self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype))
+ def load_module(self):
+ if self.active:
+ Module.load_module(self)
+
# need to check /proc/mounts and /etc/mtab before
# formatting anything.
# FIXME: check if device is already formatted.
def prepare(self):
if is_prepared(self.uuid):
return
- self.info(self.osdtype, self.devname, self.size, self.fstype, self.format)
+ if not self.active:
+ debug(self.uuid, "not active")
+ return
+ self.info(self.osdtype, self.devpath, self.size, self.fstype, self.format)
+ run_acceptors()
if self.osdtype == 'obdecho':
blkdev = ''
else:
- blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
+ blkdev = block_dev(self.devpath, self.size, self.fstype, self.format)
lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid),
setup ="%s %s" %(blkdev, self.fstype))
if not is_prepared('OSS_UUID'):
if is_prepared(self.uuid):
Module.cleanup(self)
if not self.osdtype == 'obdecho':
- clean_loop(self.devname)
+ clean_loop(self.devpath)
# Generic client module, used by OSC and MDC
class Client(Module):
- def __init__(self, db, module, owner, target_name, target_uuid):
- self.target_name = target_name
- self.target_uuid = target_uuid
- self.db = db
- node_name = config.select(target_name)
- if node_name:
- self.tgt_dev_uuid = self.db.get_target_device(node_name, target_uuid)
- else:
- self.tgt_dev_uuid = db.get_first_ref('active')
+ def __init__(self, tgtdb, module, owner):
+ self.target_name = tgtdb.getName()
+ self.target_uuid = tgtdb.getUUID()
+ self.db = tgtdb
+
+ self.tgt_dev_uuid = tgtdb.get_active_target()
if not self.tgt_dev_uuid:
- panic("No target device found for target:", target_name)
+ panic("No target device found for target:", self.target_name)
+
self.kmodule_list = []
self._server = None
self._connected = 0
self.module = module
self.module_name = string.upper(module)
- self.name = '%s_%s_%s' % (self.module_name, owner, target_name)
- self.uuid = '%05x_%s_%05x' % (int(random.random() * 1048576), self.name,
- int(random.random() * 1048576))
+ self.name = '%s_%s_%s' % (self.module_name, owner, self.target_name)
+ self.uuid = '%05x%05x_%.14s_%05x%05x' % (int(random.random() * 1048576),
+ int(random.random() * 1048576),self.name,
+ int(random.random() * 1048576),
+ int(random.random() * 1048576))
self.uuid = self.uuid[0:36]
self.lookup_server(self.tgt_dev_uuid)
self.add_lustre_module(module, module)
+ def lookup_server(self, srv_uuid):
+ """ Lookup a server's network information """
+ self._server_nets = self.db.get_ost_net(srv_uuid)
+ if len(self._server_nets) == 0:
+ panic ("Unable to find a server for:", srv_uuid)
+
+ def get_servers(self):
+ return self._server_nets
+
def prepare(self, ignore_connect_failure = 0):
if is_prepared(self.uuid):
return
self.info(self.target_uuid)
- srv = self.get_server()
try:
- if local_net(srv):
- #debug("LOCAL NET")
- lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
+ srv = local_net(self.get_servers())
+ if srv:
+ lctl.connect(srv)
else:
- #debug("NOT LOCAL NET")
- r = find_route(srv)
- if r:
+ srv, r = find_route(self.get_servers())
+ if srv:
lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
else:
- panic ("no route to", srv.nid)
+ panic ("no route to", self.target_uuid)
except CommandError:
if (ignore_connect_failure == 0):
pass
- lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
+ if srv:
+ lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
setup ="%s %s" %(self.target_uuid, srv.uuid))
def cleanup(self):
- srv = self.get_server()
- if local_net(srv):
- Module.cleanup(self)
+ Module.cleanup(self)
+ srv = local_net(self.get_servers())
+ if srv:
+ try:
+ lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+ except CommandError, e:
+ log(self.module_name, "disconnect failed: ", self.name)
+ e.dump()
+ cleanup_error(e.rc)
else:
- self.info(self.targt_uuid)
- r = find_route(srv)
- if r:
+ self.info(self.target_uuid)
+ srv, r = find_route(self.get_servers())
+ if srv:
try:
lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
except CommandError, e:
print "del_route failed: ", self.name
e.dump()
cleanup_error(e.rc)
- Module.cleanup(self)
class MDC(Client):
- def __init__(self, db, owner, target_name, target_uuid):
- Client.__init__(self, db, 'mdc', owner, target_name, target_uuid)
+ def __init__(self, db, owner):
+ Client.__init__(self, db, 'mdc', owner)
class OSC(Client):
- def __init__(self, db, owner, target_name, target_uuid):
- Client.__init__(self, db, 'osc', owner, target_name, target_uuid)
+ def __init__(self, db, owner):
+ Client.__init__(self, db, 'osc', owner)
class COBD(Module):
mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
else:
mdc_uuid = self.vosc.get_mdc_uuid()
+ if not mdc_uuid:
+ panic("Unable to determine MDC UUID. Probably need to cleanup before re-mounting.")
self.info(self.path, self.mds_uuid, self.obd_uuid)
cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
(self.vosc.get_uuid(), mdc_uuid, self.path)
uuids = self._get_all_refs()
return uuids
- def get_ost_net(self, uuid):
- ost = self.lookup(uuid)
- uuid = ost.get_first_ref('network')
- if not uuid:
- return None
- return ost.lookup(uuid)
-
- def nid2server(self, nid):
+ def get_ost_net(self, osd_uuid):
+ srv_list = []
+ if not osd_uuid:
+ return srv_list
+ osd = self.lookup(osd_uuid)
+ node_uuid = osd.get_first_ref('node')
+ node = self.lookup(node_uuid)
+ if not node:
+ panic("unable to find node for osd_uuid:", osd_uuid,
+ " node_ref:", node_uuid)
+ for net_uuid in node.get_networks():
+ db = node.lookup(net_uuid)
+ srv_list.append(Network(db))
+ return srv_list
+
+ def nid2server(self, nid, net_type):
netlist = self.lookup_class('network')
for net_db in netlist:
- if net_db.get_val('nid') == nid:
+ if net_db.get_val('nid') == nid and net_db.get_val('nettype') == net_type:
return net_db
return None
type = self.get_class()
ret=0;
if type in ('network',):
- ret = 10
+ ret = 5
+ elif type in ('routetbl',):
+ ret = 6
+ elif type in ('ptlrpc',):
+ ret = 7
elif type in ('device', 'ldlm'):
ret = 20
elif type in ('osd', 'mdd', 'cobd'):
# Find the target_device for target on a node
# node->profiles->device_refs->target
- def get_target_device(self, node_name, target_uuid):
+ def get_target_device(self, target_uuid, node_name):
node_db = self.lookup_name(node_name)
if not node_db:
return None
return ref[1]
return None
+ def get_active_target(self):
+ target_uuid = self.getUUID()
+ target_name = self.getName()
+ node_name = config.select(target_name)
+ if node_name:
+ tgt_dev_uuid = self.get_target_device(target_uuid, node_name)
+ else:
+ tgt_dev_uuid = self.get_first_ref('active')
+ return tgt_dev_uuid
+
+
# get all network uuids for this node
def get_networks(self):
ret = []
for prof_uuid in prof_list:
prof_db = self.lookup(prof_uuid)
net_list = prof_db.get_refs('network')
- debug("get_networks():", prof_uuid, net_list)
+ #debug("get_networks():", prof_uuid, net_list)
for net_uuid in net_list:
ret.append(net_uuid)
return ret
for t in tbl:
routes = t.getElementsByTagName('route')
for r in routes:
- lo = self.xmlattr(r, 'lo')
- hi = self.xmlattr(r, 'hi')
- res.append((type, gw, lo, hi))
+ net_type = self.xmlattr(r, 'type')
+ if type != net_type:
+ lo = self.xmlattr(r, 'lo')
+ hi = self.xmlattr(r, 'hi')
+ res.append((type, gw, lo, hi))
return res
def get_route_tbl(self):
ret = []
- tbls = self.dom_node.getElementsByTagName('routetbl')
- for tbl in tbls:
- for r in tbl.getElementsByTagName('route'):
- net_type = self.xmlattr(r, 'type')
- gw = self.xmlattr(r, 'gw')
- lo = self.xmlattr(r, 'lo')
- hi = self.xmlattr(r, 'hi')
- ret.append((net_type, gw, lo, hi))
+ for r in self.dom_node.getElementsByTagName('route'):
+ net_type = self.xmlattr(r, 'type')
+ gw = self.xmlattr(r, 'gw')
+ lo = self.xmlattr(r, 'lo')
+ hi = self.xmlattr(r, 'hi')
+ ret.append((net_type, gw, lo, hi))
return ret
# OSC is no longer in the xml, so we have to fake it.
# this is getting ugly and begging for another refactoring
def get_osc(ost_db, owner):
- osc = OSC(ost_db, owner, ost_db.getName(), ost_db.getUUID())
+ osc = OSC(ost_db, owner)
return osc
def get_mdc(db, owner, mds_uuid):
mds_db = db.lookup(mds_uuid);
if not mds_db:
panic("no mds:", mds_uuid)
- mdc = MDC(mds_db, owner, mds_db.getName(), mds_uuid)
+ mdc = MDC(mds_db, owner)
return mdc
def prepare_mdc(db, owner, mds_uuid):
def add_local_interfaces(node_db):
global local_node
- debug("add_local")
for netuuid in node_db.get_networks():
net = node_db.lookup(netuuid)
+ srv = Network(net)
debug("add_local", netuuid)
- local_node.append((net.get_val('nettype'), net.get_val('nid')))
+ local_node.append((srv.net_type, srv.nid))
+ if acceptors.has_key(srv.port):
+ panic("duplicate port:", srv.port)
+ if srv.net_type in ('tcp', 'toe'):
+ acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type,
+ srv.send_mem, srv.recv_mem,
+ srv.irq_affinity,
+ srv.nid_exchange)
def node_needs_router():
return router_flag
#debug("init_route_config: gw is", gw)
if not gw:
continue
- for netuuid in node_db.get_networks():
- db = node_db.lookup(netuuid)
- #debug("init_route_config: tbl: ", db.get_route_tbl())
- if local_type != db.get_val('nettype'):
- for route in db.get_routes(local_type, gw):
- routes.append(route)
- #debug("init_route_config routes:", routes)
+ for route in node_db.get_routes(local_type, gw):
+ routes.append(route)
+ debug("init_route_config routes:", routes)
+
+def local_net(srv_list):
+ global local_node
+ for iface in local_node:
+ for srv in srv_list:
+ #debug("local_net a:", srv.net_type, "b:", iface[0])
+ if srv.net_type == iface[0]:
+ return srv
+ return None
-def local_net(net):
+def local_net_type(net_type):
global local_node
for iface in local_node:
- #debug("local_net a:", net.net_type, "b:", iface[0])
- if net.net_type == iface[0]:
+ if net_type == iface[0]:
return 1
return 0
-def find_route(net):
+def find_route(srv_list):
global local_node, routes
frm_type = local_node[0][0]
- to_type = net.net_type
- to = net.nid
- debug ('looking for route to', to_type,to)
- for r in routes:
- #debug("find_route: ", r)
- if r[2] == to:
- return r
- return None
+ for srv in srv_list:
+ #debug("find_route: srv:", srv.hostaddr, "type: ", srv.net_type)
+ to_type = srv.net_type
+ to = srv.hostaddr
+ #debug ('looking for route to', to_type, to)
+ for r in routes:
+ #debug("find_route: ", r)
+ if r[2] == to:
+ return srv, r
+ return None,None
############################################################
n = None
if type == 'ldlm':
n = LDLM(db)
+ elif type == 'ptlrpc':
+ n = PTLRPC(db)
elif type == 'lov':
n = LOV(db)
elif type == 'network':
n = Network(db)
+ elif type == 'routetbl':
+ n = Router(db)
elif type == 'osd':
n = OSD(db)
elif type == 'cobd':
operation(services)
def doSetup(services):
+ if config.nosetup():
+ return
for s in services:
n = newService(s[1])
n.prepare()
def doModules(services):
+ if config.nomod():
+ return
for s in services:
n = newService(s[1])
n.load_module()
def doCleanup(services):
+ if config.nosetup():
+ return
services.reverse()
for s in services:
n = newService(s[1])
n.cleanup()
def doUnloadModules(services):
+ if config.nomod():
+ return
services.reverse()
for s in services:
n = newService(s[1])
recovery_upcall = node_db.get_val('recovery_upcall', '')
timeout = node_db.get_val_int('timeout', 0)
+ add_local_interfaces(node_db)
if not router_flag:
- add_local_interfaces(node_db)
init_route_config(lustreDB)
# Two step process: (1) load modules, (2) setup lustre
if config.force():
# the command line can override this value
timeout = 5
+ # ugly hack, only need to run lctl commands for --dump
+ if config.lctl_dump():
+ for_each_profile(node_db, prof_list, doCleanup)
+ return
+
sys_set_timeout(timeout)
sys_set_recovery_upcall(recovery_upcall)
for_each_profile(node_db, prof_list, doUnloadModules)
else:
+ # ugly hack, only need to run lctl commands for --dump
+ if config.lctl_dump():
+ for_each_profile(node_db, prof_list, doSetup)
+ return
+
for_each_profile(node_db, prof_list, doModules)
sys_set_debug_path()
"help", "node=", "nomod", "nosetup",
"dump=", "force", "minlevel=", "maxlevel=",
"timeout=", "recovery_upcall=",
- "ldapurl=", "config=", "select="]
+ "ldapurl=", "config=", "select=", "lctl_dump="]
opts = []
args = []
config.verbose(1)
if o in ("-n", "--noexec"):
config.noexec(1)
- config.verbose(1)
if o == "--portals":
config.portals_dir(a)
if o == "--lustre":
config.config_name(a)
if o == "--select":
config.init_select(a)
+ if o == "--lctl_dump":
+ config.lctl_dump(a)
return args
# Shutdown does steps in reverse
#
def main():
- global TCP_ACCEPTOR, lctl, MAXTCPBUF
+ global lctl, MAXTCPBUF
host = socket.gethostname()
setupModulePath(sys.argv[0])
- TCP_ACCEPTOR = find_prog('acceptor')
- if not TCP_ACCEPTOR:
- if config.noexec():
- TCP_ACCEPTOR = 'acceptor'
- debug('! acceptor not found')
- else:
- panic('acceptor not found')
-
lctl = LCTLInterface('lctl')
-
- sys_make_devices()
- sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
- sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
+ if config.lctl_dump():
+ lctl.use_save_file(config.lctl_dump())
+ else:
+ sys_make_devices()
+ sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
+ sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
doHost(db, node_list)
if first_cleanup_error:
sys.exit(first_cleanup_error)
-
+
#include <stdlib.h>
#include <stdio.h>
+#include <portals/api-support.h>
#include <portals/ptlctl.h>
#include "obdctl.h"
#include "parser.h"
{"ignore_errors", jt_opt_ignore_errors, 0,
"ignore errors that occur during script processing\n"
"ignore_errors"},
+ {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"},
/* Network configuration commands */
{"==== network config ====", jt_noop, 0, "network config"},
"The nid defaults to hostname for tcp networks and is automatically "
"setup for elan/myrinet networks.\n"
"usage: mynid [nid]"},
- {"add_uuid", jt_ptl_add_uuid, 0, "associate a UUID with a nid\n"
- "usage: add_uuid <uuid> <nid>"},
- {"close_uuid", jt_ptl_close_uuid, 0, "disconnect a UUID\n"
+ {"add_uuid", jt_obd_add_uuid, 0, "associate a UUID with a nid\n"
+ "usage: add_uuid <uuid> <nid> <net_type>"},
+ {"close_uuid", jt_obd_close_uuid, 0, "disconnect a UUID\n"
"usage: close_uuid <uuid>)"},
- {"del_uuid", jt_ptl_del_uuid, 0, "delete a UUID association\n"
+ {"del_uuid", jt_obd_del_uuid, 0, "delete a UUID association\n"
"usage: del_uuid <uuid>"},
{"add_route", jt_ptl_add_route, 0,
"add an entry to the routing table\n"
{"setattr", jt_obd_setattr, 0,
"set mode attribute for OST object <objid>\n"
"usage: setattr <objid> <mode>"},
- {"create", jt_obd_create, 0,
+ {"create", jt_obd_create, 0,
"create <num> OST objects (with <mode>)\n"
"usage: create [num [mode [verbose]]]"},
{"destroy", jt_obd_destroy, 0,
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/types.h>
+
+
+#include <liblustre.h>
+#include <linux/obd.h>
#include <linux/lustre_lib.h>
#include <linux/lustre_lite.h>
#include <linux/obd_lov.h>
--add net
--node node_name
- --nid addr
+ --nid nid
--nettype tcp|elan|toe|gm
+ --hostaddr addr
--port port
--tcpbuf size
+ --irq_affinity 0|1
+ --nid_exchange 0|1
--router
--add mds
ldlm_name = 'ldlm'
ldlm_uuid = 'ldlm_UUID'
+
+ptlrpc_name = 'RPCDEV'
+ptlrpc_uuid = 'RPCDEV_UUID'
+
def new_lustre(dom):
"""Create a new empty lustre document"""
# adding ldlm here is a bit of a hack, but one is enough.
str = """<lustre>
<ldlm name="%s" uuid="%s"/>
- </lustre>""" % (ldlm_name, ldlm_uuid)
+ <ptlrpc name="%s" uuid="%s"/>
+ </lustre>""" % (ldlm_name, ldlm_uuid,
+ ptlrpc_name, ptlrpc_uuid)
return dom.parseString(str)
names = {}
node.appendChild(new)
return new
- def network(self, name, uuid, hostname, net, port=0, tcpbuf=0):
+ def network(self, name, uuid, nid, net, hostaddr="", port=0, tcpbuf=0, irq_aff=0, nid_xchg=0):
"""create <network> node"""
network = self.newService("network", name, uuid)
network.setAttribute("nettype", net);
- self.addElement(network, "nid", hostname)
+ self.addElement(network, "nid", nid)
+ if hostaddr:
+ self.addElement(network, "hostaddr", hostaddr)
if port:
self.addElement(network, "port", "%d" %(port))
if tcpbuf:
self.addElement(network, "sendmem", "%d" %(tcpbuf))
self.addElement(network, "recvmem", "%d" %(tcpbuf))
+ if irq_aff:
+ self.addElement(network, "irqaffinity", "%d" %(irq_aff))
+ if nid_xchg:
+ self.addElement(network, "nidexchange", "%d" %(nid_xchg))
return network
+ def routetbl(self, name, uuid):
+ """create <routetbl> node"""
+ rtbl = self.newService("routetbl", name, uuid)
+ return rtbl
+
def route(self, net_type, gw, lo, hi):
""" create one entry for the route table """
ref = self.doc.createElement('route')
ldlm = self.newService("ldlm", name, uuid)
return ldlm
- def osd(self, name, uuid, fs, osdtype, devname, format, ost_uuid, net_uuid, dev_size=0):
+ def osd(self, name, uuid, fs, osdtype, devname, format, ost_uuid, node_uuid, dev_size=0):
osd = self.newService("osd", name, uuid)
osd.setAttribute('osdtype', osdtype)
osd.appendChild(self.ref("target", ost_uuid))
- osd.appendChild(self.ref("network", net_uuid))
+ osd.appendChild(self.ref("node", node_uuid))
if fs:
self.addElement(osd, "fstype", fs)
if devname:
mds.appendChild(self.ref("active",mdd_uuid))
return mds
- def mdsdev(self, name, uuid, fs, devname, format, net_uuid, node_uuid,
+ def mdsdev(self, name, uuid, fs, devname, format, node_uuid,
mds_uuid, dev_size=0 ):
mdd = self.newService("mdsdev", name, uuid)
self.addElement(mdd, "fstype", fs)
self.addElement(mdd, "autoformat", format)
if dev_size:
self.addElement(mdd, "devsize", "%s" % (dev_size))
- mdd.appendChild(self.ref("network", net_uuid))
+ mdd.appendChild(self.ref("node", node_uuid))
mdd.appendChild(self.ref("target", mds_uuid))
return mdd
lustre.appendChild(profile)
node_add_profile(gen, node, 'ldlm', ldlm_uuid)
+ node_add_profile(gen, node, 'ptlrpc', ptlrpc_uuid)
if has_option(options, 'router'):
node.setAttribute('router', '1')
if has_option(options, 'timeout'):
node_name = get_option(options, 'node')
nid = get_option(options, 'nid')
+ hostaddr = get_option(options, 'hostaddr', '')
net_type = get_option(options, 'nettype')
if net_type in ('tcp', 'toe'):
port = get_option_int(options, 'port', DEFAULT_PORT)
tcpbuf = get_option_int(options, 'tcpbuf', 0)
+ irq_aff = get_option_int(options, 'irq_affinity', 0)
+ nid_xchg = get_option_int(options, 'nid_exchange', 0)
elif net_type in ('elan', 'gm'):
port = 0
tcpbuf = 0
+ irq_aff = 0
+ nid_xchg = 0
else:
print "Unknown net_type: ", net_type
sys.exit(2)
node = ret
net_name = new_name('NET_'+ node_name +'_'+ net_type)
net_uuid = new_uuid(net_name)
- node.appendChild(gen.network(net_name, net_uuid, nid, net_type, port, tcpbuf))
+ node.appendChild(gen.network(net_name, net_uuid, nid, net_type, hostaddr, port, tcpbuf, irq_aff, nid_xchg))
node_add_profile(gen, node, "network", net_uuid)
if not node:
error (node_name, " not found.")
- netlist = node.getElementsByTagName('network')
- net = netlist[0]
- rlist = net.getElementsByTagName('routetbl')
+ rlist = node.getElementsByTagName('routetbl')
if len(rlist) > 0:
rtbl = rlist[0]
else:
- rtbl = gen.addElement(net, 'routetbl')
+ rtbl_name = new_name("RTBL_" + node_name)
+ rtbl_uuid = new_uuid(rtbl_name)
+ rtbl = gen.routetbl(rtbl_name, rtbl_uuid)
+ node.appendChild(rtbl)
+ node_add_profile(gen, node, "routetbl", rtbl_uuid)
rtbl.appendChild(gen.route(net_type, gw, lo, hi))
error("NODE: ", node_name, "not found")
mdd = gen.mdsdev(mdd_name, mdd_uuid, fstype, devname, get_format_flag(options),
- net_uuid, node_uuid, mds_uuid, dev_size=size)
+ node_uuid, mds_uuid, dev_size=size)
lustre.appendChild(mdd)
lovname = get_option(options, 'lov', '')
osdtype = get_option(options, 'osdtype', 'obdfilter', deprecated_tag="obdtype")
+ node_uuid = name2uuid(lustre, node_name)
+
if osdtype == 'obdecho':
fstype = ''
devname = ''
error('add_ost:', '"'+lovname+'"', "lov element not found.")
lov_add_obd(gen, lov, ost_uuid)
- net_uuid = get_net_uuid(lustre, node_name)
- if not net_uuid:
- error("NODE: No net network interface for", node_name, "found")
-
osd = gen.osd(osdname, osd_uuid, fstype, osdtype, devname, get_format_flag(options), ost_uuid,
- net_uuid, size)
+ node_uuid, size)
node = findByName(lustre, node_name, "node")
"dev=", "size=", "obd=", "ost=", "obdtype=", "osdtype=", "obduuid=", "in=",
"ostuuid=", "path=", "help", "batch=", "lov=", "gw=", "lo=", "hi=",
"osc=", "real_obd=", "cache_obd=", "fstype=",
- "timeout=", "recovery_upcall="]
+ "timeout=", "recovery_upcall=", "nid_exchange=", "irq_affinity=",
+ "hostaddr=",]
opts = []
args = []
options = {}
# network options
if o == "--nid":
options['nid'] = a
+ if o == "--hostaddr":
+ options['hostaddr'] = a
if o == "--nettype":
options['nettype'] = a
if o == "--net":
options['mtpt'] = 1
if o == "--route":
options['route'] = 1
+ if o == "--nid_exchange":
+ options['nid_exchange'] = a
+ if o == "--irq_affinity":
+ options['irq_affinity'] = a
# ost options
if o == "--dev":
#include <stdarg.h>
#include <signal.h>
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
#include <linux/lustre_lib.h>
#include <linux/lustre_idl.h>
#include <linux/lustre_dlm.h>
-#include <linux/obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
#include <linux/obd.h> /* for struct lov_stripe_md */
+#include <linux/obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
#include <linux/lustre_build_version.h>
#include <unistd.h>
#undef __KERNEL__
#include "obdctl.h"
+#include <portals/ptlctl.h>
#include "parser.h"
#include <stdio.h>
struct timeval prev_time;
#endif
-int fd = -1;
uint64_t conn_addr = -1;
uint64_t conn_cookie;
char rawbuf[8192];
struct lov_stripe_md lsm;
} lsm_buffer;
-static int getfd(char *func);
static char *cmdname(char *func);
-#define IOCINIT(data) \
+#define IOC_INIT(data) \
do { \
memset(&data, 0, sizeof(data)); \
- data.ioc_version = OBD_IOCTL_VERSION; \
data.ioc_addr = conn_addr; \
data.ioc_cookie = conn_cookie; \
- data.ioc_len = sizeof(data); \
- if (fd < 0) { \
- fprintf(stderr, "No device open, use device\n"); \
- return 1; \
- } \
} while (0)
#define IOC_PACK(func, data) \
do { \
+ memset(buf, 0, sizeof(rawbuf)); \
if (obd_ioctl_pack(&data, &buf, max)) { \
fprintf(stderr, "error: %s: invalid ioctl\n", \
cmdname(func)); \
struct obd_ioctl_data data;
int rc;
- if (getfd(func))
- return -1;
-
- IOCINIT(data);
+ IOC_INIT(data);
data.ioc_inllen1 = strlen(name) + 1;
data.ioc_inlbuf1 = name;
IOC_PACK(func, data);
- rc = ioctl(fd, OBD_IOC_NAME2DEV, buf);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_NAME2DEV, buf);
if (rc < 0) {
fprintf(stderr, "error: %s: %s - %s\n", cmdname(func),
name, strerror(rc = errno));
*p = 0;
space--;
-
+
nob = snprintf(p, space, LPX64, lsm->lsm_object_id);
p += nob;
space -= nob;
-
+
if (lsm->lsm_stripe_count != 0) {
- nob = snprintf (p, space, "=%u#%u@%d",
+ nob = snprintf (p, space, "=%u#%u@%d",
lsm->lsm_stripe_size,
lsm->lsm_stripe_count,
lsm->lsm_stripe_offset);
fprintf (stderr, "lsm_string() overflowed buffer\n");
abort ();
}
-
+
return (buffer);
}
static void
-reset_lsmb (union lsm_buffer *lsmb)
+reset_lsmb (union lsm_buffer *lsmb)
{
memset (lsmb->space, 0, sizeof (lsmb->space));
lsmb->lsm.lsm_magic = LOV_MAGIC;
-
}
-static int
+static int
parse_lsm (union lsm_buffer *lsmb, char *string)
{
struct lov_stripe_md *lsm = &lsmb->lsm;
char *end;
int i;
-
+
/*
- * object_id[=size#count[@offset][:id]*]
+ * object_id[=size#count[@offset][:id]*]
*/
reset_lsmb (lsmb);
-
+
lsm->lsm_object_id = strtoull (string, &end, 0);
if (end == string)
return (-1);
string = end;
-
+
if (*string == 0)
return (0);
if (*string != '=')
return (-1);
string++;
-
+
lsm->lsm_stripe_size = strtoul (string, &end, 0);
if (end == string)
return (-1);
string = end;
-
+
if (*string != '#')
return (-1);
string++;
-
+
lsm->lsm_stripe_count = strtoul (string, &end, 0);
if (end == string)
return (-1);
return (-1);
string = end;
}
-
- if (*string == 0) /* don't have to specify obj ids */
+
+ if (*string == 0) /* don't have to specify obj ids */
return (0);
-
+
for (i = 0; i < lsm->lsm_stripe_count; i++) {
if (*string != ':')
return (-1);
if (*string != 0)
return (-1);
-
+
return (0);
}
return func;
}
-static int getfd(char *func)
-{
- if (fd == -1)
- fd = open("/dev/obd", O_RDWR);
- if (fd == -1) {
- fprintf(stderr, "error: %s: opening /dev/obd: %s\n"
- "hint: lustre kernel modules may not be loaded.\n",
- cmdname(func), strerror(errno));
- return -1;
- }
- return 0;
-}
-
#define difftime(a, b) \
((double)(a)->tv_sec - (b)->tv_sec + \
((double)((a)->tv_usec - (b)->tv_usec) / 1000000))
if (conn_addr == -1)
return 0;
- IOCINIT(data);
+ IOC_INIT(data);
- rc = ioctl(fd, OBD_IOC_DISCONNECT, &data);
+ IOC_PACK(func, data);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_DISCONNECT, buf);
if (rc < 0) {
- fprintf(stderr, "error: %s: OPD_IOC_DISCONNECT %s\n",
+ fprintf(stderr, "error: %s: OPD_IOC_DISCONNECT %s\n",
cmdname(func),strerror(errno));
} else {
if (verbose)
prev_time = this_time;
}
-#define SHMEM_SETUP() shmem_setup()
-#define SHMEM_RESET() shmem_reset()
-#define SHMEM_BUMP() shmem_bump()
-#define SHMEM_SNAP(n) shmem_snap(n)
+#define SHMEM_SETUP() shmem_setup()
+#define SHMEM_RESET() shmem_reset()
+#define SHMEM_BUMP() shmem_bump()
+#define SHMEM_SNAP(n) shmem_snap(n)
#else
#define SHMEM_SETUP()
#define SHMEM_RESET()
data.ioc_dev = dev;
- if (getfd(func))
- return -1;
-
IOC_PACK(func, data);
- return ioctl(fd, OBD_IOC_DEVICE, buf);
+ return l_ioctl(OBD_DEV_ID, OBD_IOC_DEVICE, buf);
}
int jt_obd_device(int argc, char **argv)
struct obd_ioctl_data data;
int rc;
- IOCINIT(data);
+ IOC_INIT(data);
do_disconnect(argv[0], 1);
if (argc != 1)
return CMD_HELP;
- rc = ioctl(fd, OBD_IOC_CONNECT, &data);
+ IOC_PACK(argv[0], data);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CONNECT, buf);
+ IOC_UNPACK(argv[0], data);
if (rc < 0)
fprintf(stderr, "error: %s: OBD_IOC_CONNECT %s\n",
cmdname(argv[0]), strerror(rc = errno));
struct obd_ioctl_data data;
int rc;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc != 1)
return CMD_HELP;
- rc = ioctl(fd, OBD_IOC_DETACH, buf);
+ IOC_PACK(argv[0], data);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_DETACH, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
char force = 'F';
int rc;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc != 1 && argc != 2)
return CMD_HELP;
}
IOC_PACK(argv[0], data);
- rc = ioctl(fd, OBD_IOC_CLEANUP, buf);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CLEANUP, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
struct obd_ioctl_data data;
int rc;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc != 1)
return CMD_HELP;
- rc = ioctl(fd, OBD_IOC_NO_TRANSNO, &data);
+ IOC_PACK(argv[0], data);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_NO_TRANSNO, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
struct obd_ioctl_data data;
int rc;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc != 1)
return CMD_HELP;
- rc = ioctl(fd, OBD_IOC_SET_READONLY, &data);
+ IOC_PACK(argv[0], data);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SET_READONLY, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
int rc;
struct obd_ioctl_data data;
- if (getfd(argv[0]))
- return -1;
-
- IOCINIT(data);
+ IOC_INIT(data);
if (argc != 1)
return CMD_HELP;
- rc = ioctl(fd, OBD_IOC_NEWDEV, &data);
+ IOC_PACK(argv[0], data);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_NEWDEV, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
else {
+ IOC_UNPACK(argv[0], data);
printf("Current device set to %d\n", data.ioc_dev);
}
char buf[8192];
struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf;
- if (getfd(argv[0]))
- return -1;
+ if (argc != 1)
+ return CMD_HELP;
memset(buf, 0, sizeof(buf));
data->ioc_version = OBD_IOCTL_VERSION;
data->ioc_addr = conn_addr;
data->ioc_cookie = conn_addr;
- data->ioc_len = sizeof(buf);
data->ioc_inllen1 = sizeof(buf) - size_round(sizeof(*data));
+ data->ioc_len = obd_ioctl_packlen(data);
- if (argc != 1)
- return CMD_HELP;
-
- rc = ioctl(fd, OBD_GET_VERSION, data);
+ rc = l_ioctl(OBD_DEV_ID, OBD_GET_VERSION, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
else {
printf("Lustre version: %s\n", data->ioc_bulk);
}
-
- printf("lctl version: %s\n",BUILD_VERSION);
+
+ printf("lctl version: %s\n", BUILD_VERSION);
return rc;
}
char buf[8192];
struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf;
- if (getfd(argv[0]))
- return -1;
+ if (argc != 1)
+ return CMD_HELP;
memset(buf, 0, sizeof(buf));
data->ioc_version = OBD_IOCTL_VERSION;
data->ioc_addr = conn_addr;
data->ioc_cookie = conn_addr;
- data->ioc_len = sizeof(buf);
data->ioc_inllen1 = sizeof(buf) - size_round(sizeof(*data));
+ data->ioc_len = obd_ioctl_packlen(data);
- if (argc != 1)
- return CMD_HELP;
-
- rc = ioctl(fd, OBD_IOC_LIST, data);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_LIST, data);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
struct obd_ioctl_data data;
int rc;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc != 2 && argc != 3 && argc != 4)
return CMD_HELP;
}
IOC_PACK(argv[0], data);
- rc = ioctl(fd, OBD_IOC_ATTACH, buf);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_ATTACH, buf);
if (rc < 0)
- fprintf(stderr, "error: %s: OBD_IOC_ATTACH %s\n",
+ fprintf(stderr, "error: %s: OBD_IOC_ATTACH %s\n",
cmdname(argv[0]), strerror(rc = errno));
else if (argc == 3) {
char name[1024];
struct obd_ioctl_data data;
int rc;
- IOCINIT(data);
+ IOC_INIT(data);
- if (argc > 3)
+ if (argc > 4)
return CMD_HELP;
data.ioc_dev = -1;
data.ioc_inllen1 = strlen(argv[1]) + 1;
data.ioc_inlbuf1 = argv[1];
}
- if (argc == 3) {
+ if (argc > 2) {
data.ioc_inllen2 = strlen(argv[2]) + 1;
data.ioc_inlbuf2 = argv[2];
}
+ if (argc > 3) {
+ data.ioc_inllen3 = strlen(argv[3]) + 1;
+ data.ioc_inlbuf3 = argv[3];
+ }
IOC_PACK(argv[0], data);
- rc = ioctl(fd, OBD_IOC_SETUP, buf);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SETUP, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
__u64 id;
int rc;
char *end;
-
+
if (argc != 2)
return (CMD_HELP);
}
memset (&lsm_buffer, 0, sizeof (lsm_buffer));
-
- IOCINIT (data);
+
+ IOC_INIT (data);
data.ioc_obdo1.o_id = id;
data.ioc_obdo1.o_mode = S_IFREG | 0644;
data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
data.ioc_plen1 = sizeof (lsm_buffer);
IOC_PACK(argv[0], data);
- rc = ioctl(fd, ECHO_IOC_GET_STRIPE, buf);
+ rc = l_ioctl(OBD_DEV_ID, ECHO_IOC_GET_STRIPE, buf);
IOC_UNPACK(argv[0], data);
if (rc != 0) {
- fprintf (stderr, "Error: %s: rc %d(%s)\n",
+ fprintf (stderr, "Error: %s: rc %d(%s)\n",
cmdname (argv[0]), rc, strerror (errno));
return (rc);
}
-
+
printf ("%s\n", lsm_string (&lsm_buffer.lsm));
-
+
return (rc);
}
}
}
- for (i = 0; i < count; i++)
- {
- IOCINIT (data);
+ for (i = 0; i < count; i++) {
+ IOC_INIT (data);
data.ioc_obdo1.o_id = lsm_buffer.lsm.lsm_object_id + i;
data.ioc_obdo1.o_mode = S_IFREG | 0644;
data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
data.ioc_pbuf1 = (char *)&lsm_buffer;
data.ioc_plen1 = sizeof (lsm_buffer);
-
+
IOC_PACK (argv[0], data);
- rc = ioctl (fd, ECHO_IOC_SET_STRIPE, buf);
+ rc = l_ioctl (OBD_DEV_ID, ECHO_IOC_SET_STRIPE, buf);
IOC_UNPACK (argv[0], data);
-
+
if (rc != 0) {
- fprintf (stderr, "Error: %s: rc %d(%s)\n",
+ fprintf (stderr, "Error: %s: rc %d(%s)\n",
cmdname (argv[0]), rc, strerror (errno));
return (rc);
}
}
-
+
return (0);
}
if (argc != 2)
return CMD_HELP;
-
+
id = strtoll (argv[1], &end, 0);
if (*end == 0) {
fprintf (stderr, "error: %s: invalid object id '%s'\n",
cmdname (argv[0]), argv[1]);
return CMD_HELP;
}
-
- IOCINIT (data);
+
+ IOC_INIT (data);
data.ioc_obdo1.o_id = lsm_buffer.lsm.lsm_object_id;
data.ioc_obdo1.o_mode = S_IFREG | 0644;
data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
-
+
IOC_PACK (argv[0], data);
- rc = ioctl (fd, ECHO_IOC_SET_STRIPE, buf);
+ rc = l_ioctl (OBD_DEV_ID, ECHO_IOC_SET_STRIPE, buf);
IOC_UNPACK (argv[0], data);
-
+
if (rc != 0)
- fprintf (stderr, "Error: %s: rc %d(%s)\n",
+ fprintf (stderr, "Error: %s: rc %d(%s)\n",
cmdname (argv[0]), rc, strerror (errno));
return (0);
int verbose = 1, mode = 0100644, rc = 0, i;
char *end;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc < 2 || argc > 5)
return CMD_HELP;
}
base_id = lsm_buffer.lsm.lsm_object_id;
}
-
+
printf("%s: "LPD64" objects\n", cmdname(argv[0]), count);
gettimeofday(&next_time, NULL);
next_time.tv_sec -= verbose;
data.ioc_pbuf1 = (char *)&lsm_buffer;
IOC_PACK(argv[0], data);
- rc = ioctl(fd, OBD_IOC_CREATE, buf);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CREATE, buf);
IOC_UNPACK(argv[0], data);
SHMEM_BUMP();
if (rc < 0) {
if (be_verbose(verbose, &next_time, i, &next_count, count))
printf("%s: #%d is object id "LPX64"\n",
- cmdname(argv[0]), i, data.ioc_obdo1.o_id);
+ cmdname(argv[0]), i, data.ioc_obdo1.o_id);
}
return rc;
}
char *end;
int rc;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc != 2)
return CMD_HELP;
data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
IOC_PACK(argv[0], data);
- rc = ioctl(fd, OBD_IOC_SETATTR, buf);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SETATTR, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
char *end;
int rc = 0, i;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc < 2 || argc > 4)
return CMD_HELP;
data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
IOC_PACK(argv[0], data);
- rc = ioctl(fd, OBD_IOC_DESTROY, buf);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_DESTROY, buf);
IOC_UNPACK(argv[0], data);
SHMEM_BUMP();
if (rc < 0) {
if (argc != 2)
return CMD_HELP;
- IOCINIT(data);
+ IOC_INIT(data);
data.ioc_obdo1.o_id = strtoull(argv[1], &end, 0);
if (*end) {
fprintf(stderr, "error: %s: invalid objid '%s'\n",
printf("%s: object id "LPX64"\n", cmdname(argv[0]),data.ioc_obdo1.o_id);
IOC_PACK(argv[0], data);
- rc = ioctl(fd, OBD_IOC_GETATTR, buf);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_GETATTR, buf);
IOC_UNPACK(argv[0], data);
if (rc) {
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
if (argc < 2 && argc > 4)
return CMD_HELP;
- IOCINIT(data);
+ IOC_INIT(data);
count = strtoull(argv[1], &end, 0);
if (*end) {
fprintf(stderr, "error: %s: invalid iteration count '%s'\n",
data.ioc_obdo1.o_id = objid;
data.ioc_obdo1.o_mode = S_IFREG;
data.ioc_obdo1.o_valid = 0xffffffff;
- rc = ioctl(fd, OBD_IOC_GETATTR, &data);
+ IOC_PACK(argv[0], data);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_GETATTR, &data);
SHMEM_BUMP();
if (rc < 0) {
fprintf(stderr, "error: %s: #"LPD64" - %d:%s\n",
len = pages * PAGE_SIZE;
- IOCINIT(data);
+ IOC_INIT(data);
data.ioc_obdo1.o_id = objid;
data.ioc_obdo1.o_mode = S_IFREG;
data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
pages, objid, data.ioc_offset, ctime(&start.tv_sec));
IOC_PACK(argv[0], data);
- rc = ioctl(fd, OBD_IOC_OPEN, buf);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_OPEN, buf);
IOC_UNPACK(argv[0], data);
if (rc) {
fprintf(stderr, "error: brw_open: %s\n", strerror(rc = errno));
rw = write ? OBD_IOC_BRW_WRITE : OBD_IOC_BRW_READ;
for (i = 1, next_count = verbose; i <= count; i++) {
- rc = ioctl(fd, rw, buf);
+ rc = l_ioctl(OBD_DEV_ID, rw, buf);
SHMEM_BUMP();
if (rc) {
fprintf(stderr, "error: %s: #%d - %s on %s\n",
i, pages, diff, (double)i * pages / diff,
ctime(&end.tv_sec));
}
- rw = ioctl(fd, OBD_IOC_CLOSE, buf);
+ rw = l_ioctl(OBD_DEV_ID, OBD_IOC_CLOSE, buf);
if (rw) {
fprintf(stderr, "error: brw_close: %s\n", strerror(rw = errno));
if (!rc)
int rc, i;
char *end;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc <= 6)
return CMD_HELP;
rc = -EINVAL;
goto out;
}
- rc = ioctl(fd, OBD_IOC_LOV_SET_CONFIG, buf);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_LOV_SET_CONFIG, buf);
if (rc)
fprintf(stderr, "error: %s: ioctl error: %s\n",
cmdname(argv[0]), strerror(rc = errno));
struct lov_desc desc;
struct obd_uuid *uuidarray;
char *path;
- int rc, tmpfd;
+ int rc, fd;
- /* FIXME: ug. IOCINIT checks fd. */
- tmpfd = fd;
- fd = 1;
- IOCINIT(data);
- fd = tmpfd;
+ IOC_INIT(data);
if (argc != 2)
return CMD_HELP;
path = argv[1];
- tmpfd = open(path, O_RDONLY);
- if (tmpfd < 0) {
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
fprintf(stderr, "open \"%s\" failed: %s\n", path,
strerror(errno));
return -1;
rc = -EINVAL;
goto out;
}
- rc = ioctl(tmpfd, OBD_IOC_LOV_GET_CONFIG, buf);
+ rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
if (rc == -ENOSPC) {
free(uuidarray);
goto repeat;
}
out:
free(uuidarray);
- close(tmpfd);
+ close(fd);
return rc;
}
struct obd_ioctl_data data;
int rc;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc != 1)
return CMD_HELP;
- rc = ioctl(fd, IOC_LDLM_TEST, &data);
+ IOC_PACK(argv[0], data);
+ rc = l_ioctl(OBD_DEV_ID, IOC_LDLM_TEST, buf);
if (rc)
fprintf(stderr, "error: %s: test failed: %s\n",
cmdname(argv[0]), strerror(rc = errno));
struct obd_ioctl_data data;
int rc;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc != 1)
return CMD_HELP;
- rc = ioctl(fd, IOC_LDLM_DUMP, &data);
+ IOC_PACK(argv[0], data);
+ rc = l_ioctl(OBD_DEV_ID, IOC_LDLM_DUMP, buf);
if (rc)
fprintf(stderr, "error: %s failed: %s\n",
cmdname(argv[0]), strerror(rc = errno));
char argstring[200];
int i, count = sizeof(argstring) - 1;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc > 5)
return CMD_HELP;
}
IOC_PACK(argv[0], data);
- rc = ioctl(fd, IOC_LDLM_REGRESS_START, buf);
+ rc = l_ioctl(OBD_DEV_ID, IOC_LDLM_REGRESS_START, buf);
if (rc)
fprintf(stderr, "error: %s: test failed: %s\n",
cmdname(argv[0]), strerror(rc = errno));
{
int rc;
struct obd_ioctl_data data;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc != 1)
return CMD_HELP;
- rc = ioctl(fd, IOC_LDLM_REGRESS_STOP, &data);
+ IOC_PACK(argv[0], data);
+ rc = l_ioctl(OBD_DEV_ID, IOC_LDLM_REGRESS_STOP, buf);
if (rc)
fprintf(stderr, "error: %s: test failed: %s\n",
struct obd_ioctl_data data;
int rc;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc != 3)
return CMD_HELP;
data.ioc_offset = atoi(argv[2]);
IOC_PACK(argv[0], data);
- rc = ioctl(fd, IOC_LOV_SET_OSC_ACTIVE, buf);
+ rc = l_ioctl(OBD_DEV_ID, IOC_LOV_SET_OSC_ACTIVE, buf);
if (rc)
fprintf(stderr, "error: %s: failed: %s\n",
cmdname(argv[0]), strerror(rc = errno));
int rc;
struct obd_ioctl_data data;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc < 2 || argc > 3)
return CMD_HELP;
}
IOC_PACK(argv[0], data);
- rc = ioctl(fd, OBD_IOC_RECOVD_NEWCONN, buf);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_RECOVD_NEWCONN, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
int rc;
struct obd_ioctl_data data;
- IOCINIT(data);
+ IOC_INIT(data);
if (argc < 2)
return CMD_HELP;
data.ioc_inlbuf1 = argv[1];
IOC_PACK(argv[0], data);
- rc = ioctl(fd, OBD_IOC_RECOVD_FAILCONN, buf);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_RECOVD_FAILCONN, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]),
strerror(rc = errno));
-
+
return rc;
}
{
struct obd_ioctl_data data;
char *parent, *child;
- int rc, tmpfd, verbose = 1;
+ int rc, fd, verbose = 1;
if (argc < 3 || argc > 4)
return CMD_HELP;
if (argc == 4)
verbose = get_verbose(argv[0], argv[3]);
- /* FIXME: ug. IOCINIT checks fd. */
- tmpfd = fd;
- fd = 1;
- IOCINIT(data);
- fd = tmpfd;
+ IOC_INIT(data);
data.ioc_inllen1 = strlen(child) + 1;
data.ioc_inlbuf1 = child;
IOC_PACK(argv[0], data);
- tmpfd = open(parent, O_RDONLY);
- if (tmpfd < 0) {
+ fd = open(parent, O_RDONLY);
+ if (fd < 0) {
fprintf(stderr, "open \"%s\" failed: %s\n", parent,
strerror(errno));
return -1;
}
- rc = ioctl(tmpfd, IOC_MDC_LOOKUP, buf);
+ rc = ioctl(fd, IOC_MDC_LOOKUP, buf);
if (rc < 0) {
fprintf(stderr, "error: %s: ioctl error: %s\n",
cmdname(argv[0]), strerror(rc = errno));
}
- close(tmpfd);
+ close(fd);
if (verbose) {
IOC_UNPACK(argv[0], data);
return rc;
}
+static
+int do_add_uuid(char * func, char *uuid, ptl_nid_t nid, int nal)
+{
+ char tmp[64];
+ int rc;
+ struct obd_ioctl_data data;
+
+ IOC_INIT(data);
+ data.ioc_nid = nid;
+ data.ioc_inllen1 = strlen(uuid) + 1;
+ data.ioc_inlbuf1 = uuid;
+ data.ioc_nal = nal;
+
+ IOC_PACK(func, data);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_ADD_UUID, buf);
+ if (rc) {
+ fprintf(stderr, "IOC_PORTAL_ADD_UUID failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ printf ("Added uuid %s: %s\n", uuid, ptl_nid2str (tmp, nid));
+ return 0;
+}
+
+int jt_obd_add_uuid(int argc, char **argv)
+{
+ ptl_nid_t nid = 0;
+ int nal;
+
+ if (argc != 4) {
+ return CMD_HELP;
+ }
+
+ if (ptl_parse_nid (&nid, argv[2]) != 0) {
+ fprintf (stderr, "Can't parse NID %s\n", argv[2]);
+ return (-1);
+ }
+
+ nal = ptl_name2nal(argv[3]);
+
+ if (nal == 0) {
+ fprintf (stderr, "Can't parse NAL %s\n", argv[3]);
+ return -1;
+ }
+
+ return do_add_uuid(argv[0], argv[1], nid, nal);
+}
+
+int jt_obd_close_uuid(int argc, char **argv)
+{
+ int rc, nal;
+ struct obd_ioctl_data data;
+
+ if (argc != 3) {
+ fprintf(stderr, "usage: %s <uuid>\n", argv[0]);
+ return 0;
+ }
+
+ nal = ptl_name2nal(argv[2]);
+
+ if (nal == 0) {
+ fprintf (stderr, "Can't parse NAL %s\n", argv[2]);
+ return -1;
+ }
+
+ IOC_INIT(data);
+ data.ioc_inllen1 = strlen(argv[1]) + 1;
+ data.ioc_inlbuf1 = argv[1];
+ data.ioc_nal = nal;
+
+ IOC_PACK(argv[0], data);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CLOSE_UUID, buf);
+ if (rc) {
+ fprintf(stderr, "IOC_PORTAL_CLOSE_UUID failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+
+int jt_obd_del_uuid(int argc, char **argv)
+{
+ int rc;
+ struct obd_ioctl_data data;
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s <uuid>\n", argv[0]);
+ return 0;
+ }
+
+ IOC_INIT(data);
+
+ if (strcmp (argv[1], "_all_"))
+ {
+ data.ioc_inllen1 = strlen(argv[1]) + 1;
+ data.ioc_inlbuf1 = argv[1];
+ }
+
+ IOC_PACK(argv[0], data);
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_DEL_UUID, buf);
+ if (rc) {
+ fprintf(stderr, "IOC_PORTAL_DEL_UUID failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
static void signal_server(int sig)
{
if (sig == SIGINT) {
int obd_initialize(int argc, char **argv)
{
SHMEM_SETUP();
+ register_ioc_dev(OBD_DEV_ID, OBD_DEV_PATH);
+
return 0;
}
#include <errno.h>
#include <string.h>
+#include <liblustre.h>
#include "obdiolib.h"
int
#define _OBDCTL_H_
int do_disconnect(char *func, int verbose);
-int obd_initialize(int argc, char **argv);
+ int obd_initialize(int argc, char **argv);
void obd_cleanup(int argc, char **argv);
int jt_opt_device(int argc, char **argv);
int jt_obd_failconn(int argc, char **argv);
int jt_obd_mdc_lookup(int argc, char **argv);
int jt_get_version(int argc, char **argv);
+int jt_obd_add_uuid(int argc, char **argv);
+int jt_obd_close_uuid(int argc, char **argv);
+int jt_obd_del_uuid(int argc, char **argv);
#endif
#include <errno.h>
#include <string.h>
+#include <liblustre.h>
#include "obdiolib.h"
int
#include <sys/types.h>
#include <sys/stat.h>
+#include <liblustre.h>
#include "obdiolib.h"
void
printf ("\n");
}
+ fflush(stdout);
last = timenow();
}