From cc669f5de2e3017bea33065b6b085691b042f7bb Mon Sep 17 00:00:00 2001 From: pschwan Date: Sat, 14 Dec 2002 19:40:17 +0000 Subject: [PATCH] land b_md onto HEAD. the highlights: - fstat didn't correctly synchronize attributes (399) - server must handle lock cancellation during blocking AST prep (487) - bulk descriptors were free()d too soon (511) - fix paths in lconf, which would load incorrect modules (451, 507) - fix confusing lconf 'host not found' error message (386) --- lustre/ChangeLog | 5 + lustre/configure.in | 14 +- lustre/extN/htree-ext3-2.4.18.diff | 8 +- lustre/include/linux/lustre_export.h | 1 + lustre/include/linux/lustre_fsfilt.h | 151 +++++ lustre/include/linux/lustre_idl.h | 1 - lustre/include/linux/lustre_lib.h | 15 +- lustre/include/linux/lustre_mds.h | 39 +- lustre/include/linux/lustre_net.h | 5 +- lustre/include/linux/obd.h | 80 +-- lustre/include/linux/obd_support.h | 18 +- lustre/ldlm/ldlm_lock.c | 2 - lustre/ldlm/ldlm_lockd.c | 33 +- lustre/ldlm/ldlm_resource.c | 41 +- lustre/lib/simple.c | 46 +- lustre/llite/file.c | 24 +- lustre/llite/namei.c | 2 +- lustre/llite/rw.c | 30 +- lustre/lov/lov_pack.c | 23 +- lustre/mdc/mdc_request.c | 5 +- lustre/mds/Makefile.am | 11 +- lustre/mds/handler.c | 95 +-- lustre/mds/lproc_mds.c | 14 +- lustre/mds/mds_ext2.c | 145 ---- lustre/mds/mds_extN.c | 356 ---------- lustre/mds/mds_fs.c | 157 +---- lustre/mds/mds_reint.c | 57 +- lustre/obdclass/Makefile.am | 14 +- lustre/obdclass/class_obd.c | 2 +- lustre/obdclass/fsfilt.c | 110 +++ lustre/{mds/mds_ext3.c => obdclass/fsfilt_ext3.c} | 192 +++--- lustre/obdclass/fsfilt_extN.c | 449 +++++++++++++ lustre/obdclass/genops.c | 33 +- lustre/obdecho/echo_client.c | 4 +- lustre/obdfilter/filter.c | 780 ++++++++-------------- lustre/ost/ost_handler.c | 8 +- lustre/ptlrpc/client.c | 13 +- lustre/ptlrpc/events.c | 64 +- lustre/ptlrpc/niobuf.c | 2 +- lustre/scripts/lustre.spec.in | 2 +- lustre/tests/.cvsignore | 1 + lustre/tests/Makefile.am | 5 +- lustre/tests/checkstat.c | 315 +++++++++ lustre/tests/common.sh | 14 +- lustre/tests/fsx.c | 23 +- lustre/tests/llmodules.sh | 4 +- lustre/tests/runslabinfo | 2 +- lustre/tests/sanity.sh | 356 ++++++---- lustre/tests/toexcl.c | 69 +- lustre/utils/.cvsignore | 1 + lustre/utils/{lconf => lconf.in} | 65 +- lustre/utils/llanalyze | 14 +- lustre/utils/obd.c | 5 +- 53 files changed, 2138 insertions(+), 1787 deletions(-) create mode 100644 lustre/include/linux/lustre_fsfilt.h delete mode 100644 lustre/mds/mds_ext2.c delete mode 100644 lustre/mds/mds_extN.c create mode 100644 lustre/obdclass/fsfilt.c rename lustre/{mds/mds_ext3.c => obdclass/fsfilt_ext3.c} (55%) create mode 100644 lustre/obdclass/fsfilt_extN.c create mode 100644 lustre/tests/checkstat.c rename lustre/utils/{lconf => lconf.in} (96%) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index bbf73c2..8495e29 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -9,6 +9,11 @@ TBA - stop dereferencing request after dropping refcount (457) - don't LASSERT(spin_is_locked) on non-SMP (455) - fixes for many rename() bugs + - fstat didn't correctly synchronize attributes (399) + - server must handle lock cancellation during blocking AST prep (487) + - bulk descriptors were free()d too soon (511) + - fix paths in lconf, which would load incorrect modules (451, 507) + - fix confusing lconf 'host not found' error message (386) 2002-12-02 Andreas Dilger * version v0_5_18 diff --git a/lustre/configure.in b/lustre/configure.in index c40124e..47c3d35 100644 --- a/lustre/configure.in +++ b/lustre/configure.in @@ -62,7 +62,15 @@ portalsdir_def='$(top_srcdir)/../portals' AC_ARG_WITH(portals, [ --with-portals=[path] set path to Portals source (default=../portals)], enable_portalsdir=$withval) AC_ARG_ENABLE(portalsdir, [ --enable-portalsdir=[path] (deprecated) set path to Portals source (default=$(top_srcdir)/../portals)],, enable_portalsdir=$portalsdir_def) PORTALS=$enable_portalsdir + +if test $PORTALS = $portalsdir_def; then + PORTALSLOC='../portals' +else + PORTALSLOC=$PORTALS +fi + AC_SUBST(PORTALS) +AC_SUBST(PORTALSLOC) portalslib_def=$enable_portalsdir/linux/utils AC_ARG_WITH(portalslib, [ --with-portalslib=[path] set path to Portals library (default=../portals/linux/utils)], enable_portalslib=$withval) @@ -120,6 +128,6 @@ AC_SUBST(demodir) AC_OUTPUT(Makefile lib/Makefile ldlm/Makefile obdecho/Makefile ptlrpc/Makefile \ lov/Makefile osc/Makefile mdc/Makefile mds/Makefile ost/Makefile \ - utils/Makefile tests/Makefile obdfilter/Makefile obdclass/Makefile \ - llite/Makefile doc/Makefile scripts/Makefile \ - scripts/lustre.spec extN/Makefile) + utils/Makefile utils/lconf tests/Makefile obdfilter/Makefile \ + obdclass/Makefile llite/Makefile doc/Makefile scripts/Makefile \ + scripts/lustre.spec extN/Makefile, chmod +x utils/lconf) diff --git a/lustre/extN/htree-ext3-2.4.18.diff b/lustre/extN/htree-ext3-2.4.18.diff index de8bc8a..9eba30c 100644 --- a/lustre/extN/htree-ext3-2.4.18.diff +++ b/lustre/extN/htree-ext3-2.4.18.diff @@ -48,7 +48,7 @@ /* * define how far ahead to read directories while searching them. -@@ -38,6 +42,433 @@ +@@ -38,6 +42,437 @@ #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) @@ -81,7 +81,6 @@ + +#define dxtrace_on(command) command +#define dxtrace_off(command) -+#define dxtrace dxtrace_off + +struct fake_dirent +{ @@ -247,6 +246,8 @@ +/* + * Debug + */ ++#ifdef DX_DEBUG ++#define dxtrace dxtrace_on +static void dx_show_index (char * label, struct dx_entry *entries) +{ + int i, n = dx_get_count (entries); @@ -318,6 +319,9 @@ + names, space/bcount,(space/bcount)*100/blocksize); + return (struct stats) { names, space, bcount}; +} ++#else ++#define dxtrace dxtrace_off ++#endif + +/* + * Probe for a directory leaf block to search diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/linux/lustre_export.h index dc2c0b5..ba9555c 100644 --- a/lustre/include/linux/lustre_export.h +++ b/lustre/include/linux/lustre_export.h @@ -14,6 +14,7 @@ #include #include +#include #include struct lov_export_data { diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h new file mode 100644 index 0000000..eeae647 --- /dev/null +++ b/lustre/include/linux/lustre_fsfilt.h @@ -0,0 +1,151 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Filesystem interface helper. + * + */ + +#ifndef _LUSTRE_FSFILT_H +#define _LUSTRE_FSFILT_H + +#ifdef __KERNEL__ + +#include +#include + +typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, int error); + +struct fsfilt_objinfo { + struct dentry *fso_dentry; + int fso_bufcnt; +}; + +struct fsfilt_operations { + struct list_head fs_list; + struct module *fs_owner; + char *fs_type; + void *(* fs_start)(struct inode *inode, int op); + void *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso, + int niocount, struct niobuf_remote *nb); + int (* fs_commit)(struct inode *inode, void *handle); + int (* fs_setattr)(struct dentry *dentry, void *handle, + struct iattr *iattr); + int (* fs_set_md)(struct inode *inode, void *handle, void *md, + int size); + int (* fs_get_md)(struct inode *inode, void *md, int size); + ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count, + loff_t *offset); + int (* fs_journal_data)(struct file *file); + int (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd, + void *handle, fsfilt_cb_t cb_func); + int (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs); +}; + +extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops); +extern void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops); +extern struct fsfilt_operations *fsfilt_get_ops(char *type); +extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops); + +#define FSFILT_OP_UNLINK 1 +#define FSFILT_OP_RMDIR 2 +#define FSFILT_OP_RENAME 3 +#define FSFILT_OP_CREATE 4 +#define FSFILT_OP_MKDIR 5 +#define FSFILT_OP_SYMLINK 6 +#define FSFILT_OP_MKNOD 7 +#define FSFILT_OP_SETATTR 8 +#define FSFILT_OP_LINK 9 + +static inline void *fsfilt_start(struct obd_device *obd, + struct inode *inode, int op) +{ + return obd->obd_fsops->fs_start(inode, op); +} + +static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount, + struct fsfilt_objinfo *fso, int niocount, + struct niobuf_remote *nb) +{ + return obd->obd_fsops->fs_brw_start(objcount, fso, niocount, nb); +} + +static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode, + void *handle) +{ + return obd->obd_fsops->fs_commit(inode, handle); +} + +static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, + void *handle, struct iattr *iattr) +{ + int rc; + /* + * NOTE: we probably don't need to take i_sem here when changing + * ATTR_SIZE because the MDS never needs to truncate a file. + * The ext2/ext3 code never truncates a directory, and files + * stored on the MDS are entirely sparse (no data blocks). + * If we do need to get it, we can do it here. + */ + lock_kernel(); + rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr); + unlock_kernel(); + + return rc; +} + +static inline int fsfilt_set_md(struct obd_device *obd, struct inode *inode, + void *handle, void *md, int size) +{ + return obd->obd_fsops->fs_set_md(inode, handle, md, size); +} + +static inline int fsfilt_get_md(struct obd_device *obd, struct inode *inode, + void *md, int size) +{ + return obd->obd_fsops->fs_get_md(inode, md, size); +} + +static inline ssize_t fsfilt_readpage(struct obd_device *obd, + struct file *file, char *buf, + size_t count, loff_t *offset) +{ + return obd->obd_fsops->fs_readpage(file, buf, count, offset); +} + +static inline int fsfilt_journal_data(struct obd_device *obd, struct file *file) +{ + return obd->obd_fsops->fs_journal_data(file); +} + +static inline int fsfilt_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, + void *handle, fsfilt_cb_t cb_func) +{ + return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd,handle,cb_func); +} + +static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *fs, + struct obd_statfs *osfs) +{ + return obd->obd_fsops->fs_statfs(fs, osfs); +} + +#endif /* __KERNEL__ */ + +#endif diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index ea75f08..8d6536f 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -284,7 +284,6 @@ struct obd_statfs { struct obd_ioobj { obd_id ioo_id; obd_gr ioo_gr; - /* struct lustre_handle ioo_handle; XXX in the future */ __u32 ioo_type; __u32 ioo_bufcnt; }; diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index da5cc81..0372504 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -30,10 +30,23 @@ #else # include #endif -#include /* XXX just for LASSERT! */ +#include #include +#include /* XXX just for LASSERT! */ #include +#ifndef LPU64 +#if BITS_PER_LONG > 32 +#define LPU64 "%lu" +#define LPD64 "%ld" +#define LPX64 "%#lx" +#else +#define LPU64 "%Lu" +#define LPD64 "%Ld" +#define LPX64 "%#Lx" +#endif +#endif + #ifdef __KERNEL__ /* l_net.c */ struct ptlrpc_request; diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h index 0260ac8..558c10b 100644 --- a/lustre/include/linux/lustre_mds.h +++ b/lustre/include/linux/lustre_mds.h @@ -153,6 +153,10 @@ int mds_reint(struct ptlrpc_request *req, int offset); int mds_pack_md(struct mds_obd *mds, struct ptlrpc_request *req, int offset, struct mds_body *body, struct inode *inode); +/* mds/mds_fs.c */ +int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt); +void mds_fs_cleanup(struct obd_device *obddev); + /* mdc/mdc_request.c */ int mdc_enqueue(struct lustre_handle *conn, int lock_type, struct lookup_intent *it, int lock_mode, struct inode *dir, @@ -198,41 +202,6 @@ int mds_client_add(struct mds_obd *mds, struct mds_export_data *med, int cl_off); int mds_client_free(struct obd_export *exp); -/* mds/mds_fs.c */ -struct mds_fs_operations { - struct module *fs_owner; - void *(* fs_start)(struct inode *inode, int op); - int (* fs_commit)(struct inode *inode, void *handle); - int (* fs_setattr)(struct dentry *dentry, void *handle, - struct iattr *iattr); - int (* fs_set_md)(struct inode *inode, void *handle, - struct lov_mds_md *md, int size); - int (* fs_get_md)(struct inode *inode, struct lov_mds_md *md, - int size); - ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count, - loff_t *offset); - void (* fs_delete_inode)(struct inode *inode); - void (* cl_delete_inode)(struct inode *inode); - int (* fs_journal_data)(struct file *file); - int (* fs_set_last_rcvd)(struct mds_obd *mds, void *handle); - int (* fs_statfs)(struct super_block *sb, struct statfs *sfs); -}; - -extern int mds_register_fs_type(struct mds_fs_operations *op, const char *name); -extern void mds_unregister_fs_type(const char *name); -extern int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt); -extern void mds_fs_cleanup(struct obd_device *obddev); - -#define MDS_FSOP_UNLINK 1 -#define MDS_FSOP_RMDIR 2 -#define MDS_FSOP_RENAME 3 -#define MDS_FSOP_CREATE 4 -#define MDS_FSOP_MKDIR 5 -#define MDS_FSOP_SYMLINK 6 -#define MDS_FSOP_MKNOD 7 -#define MDS_FSOP_SETATTR 8 -#define MDS_FSOP_LINK 9 - #endif /* __KERNEL__ */ /* ioctls for trying requests */ diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index e539d8e..fb060d0 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -344,12 +344,11 @@ void *lustre_msg_buf(struct lustre_msg *m, int n); static inline void ptlrpc_bulk_decref(struct ptlrpc_bulk_desc *desc) { + CDEBUG(D_PAGE, "%p -> %d\n", desc, atomic_read(&desc->bd_refcount) - 1); + if (atomic_dec_and_test(&desc->bd_refcount)) { CDEBUG(D_PAGE, "Released last ref on %p, freeing\n", desc); ptlrpc_free_bulk(desc); - } else { - CDEBUG(D_PAGE, "%p -> %d\n", desc, - atomic_read(&desc->bd_refcount)); } } diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index cb72b5b..9612846 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -123,15 +123,12 @@ struct client_obd { struct mds_obd { struct ptlrpc_service *mds_service; - char *mds_fstype; struct super_block *mds_sb; - struct super_operations *mds_sop; struct vfsmount *mds_vfsmnt; struct obd_run_ctxt mds_ctxt; struct file_operations *mds_fop; struct inode_operations *mds_iop; struct address_space_operations *mds_aops; - struct mds_fs_operations *mds_fsops; int mds_max_mdsize; struct file *mds_rcvd_filp; @@ -250,6 +247,7 @@ struct obd_device { struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ /* a spinlock is OK for what we do now, may need a semaphore later */ spinlock_t obd_dev_lock; + struct fsfilt_operations *obd_fsops; union { struct ext2_obd ext2; struct filter_obd filter; @@ -257,7 +255,6 @@ struct obd_device { struct client_obd cli; struct ost_obd ost; struct echo_client_obd echo_client;; - // struct osc_obd osc; struct ldlm_obd ldlm; struct echo_obd echo; struct recovd_obd recovd; @@ -269,7 +266,7 @@ struct obd_device { } u; /* Fields used by LProcFS */ unsigned int cntr_mem_size; - void* counters; + void *counters; }; struct obd_ops { @@ -342,79 +339,6 @@ struct obd_ops { __u32 mode, struct lustre_handle *); int (*o_cancel_unused)(struct lustre_handle *, struct lov_stripe_md *, int local_only); - }; - -static inline void *mds_fs_start(struct mds_obd *mds, struct inode *inode, - int op) -{ - return mds->mds_fsops->fs_start(inode, op); -} - -static inline int mds_fs_commit(struct mds_obd *mds, struct inode *inode, - void *handle) -{ - return mds->mds_fsops->fs_commit(inode, handle); -} - -static inline int mds_fs_setattr(struct mds_obd *mds, struct dentry *dentry, - void *handle, struct iattr *iattr) -{ - int rc; - /* - * NOTE: we probably don't need to take i_sem here when changing - * ATTR_SIZE because the MDS never needs to truncate a file. - * The ext2/ext3 code never truncates a directory, and files - * stored on the MDS are entirely sparse (no data blocks). - * If we do need to get it, we can do it here. - */ - lock_kernel(); - rc = mds->mds_fsops->fs_setattr(dentry, handle, iattr); - unlock_kernel(); - - return rc; -} - -static inline int mds_fs_set_md(struct mds_obd *mds, struct inode *inode, - void *handle, struct lov_mds_md *md, - int size) -{ - return mds->mds_fsops->fs_set_md(inode, handle, md, size); -} - -static inline int mds_fs_get_md(struct mds_obd *mds, struct inode *inode, - struct lov_mds_md *md, int size) -{ - return mds->mds_fsops->fs_get_md(inode, md, size); -} - -static inline ssize_t mds_fs_readpage(struct mds_obd *mds, struct file *file, - char *buf, size_t count, loff_t *offset) -{ - return mds->mds_fsops->fs_readpage(file, buf, count, offset); -} - -/* Set up callback to update mds->mds_last_committed with the current - * value of mds->mds_last_recieved when this transaction is on disk. - */ -static inline int mds_fs_set_last_rcvd(struct mds_obd *mds, void *handle) -{ - return mds->mds_fsops->fs_set_last_rcvd(mds, handle); -} - -/* Enable data journaling on the given file */ -static inline ssize_t mds_fs_journal_data(struct mds_obd *mds, - struct file *file) -{ - return mds->mds_fsops->fs_journal_data(file); -} - -static inline int mds_fs_statfs(struct mds_obd *mds, struct statfs *sfs) -{ - if (mds->mds_fsops->fs_statfs) - return mds->mds_fsops->fs_statfs(mds->mds_sb, sfs); - - return vfs_statfs(mds->mds_sb, sfs); -} #endif /* __KERNEL */ #endif /* __OBD_H */ diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index b95b266..f6c2770 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -30,7 +30,7 @@ #include /* global variables */ -extern unsigned long obd_memory; +extern atomic_t obd_memory; extern unsigned long obd_fail_loc; extern unsigned long obd_timeout; extern char obd_recovery_upcall[128]; @@ -153,23 +153,23 @@ static inline void OBD_FAIL_WRITE(int id, kdev_t dev) #define OBD_ALLOC(ptr, size) \ do { \ void *lptr; \ - long s = (size); \ + int s = (size); \ (ptr) = lptr = kmalloc(s, GFP_KERNEL); \ if (lptr == NULL) { \ CERROR("kmalloc of '" #ptr "' (%ld bytes) failed " \ "at %s:%d\n", s, __FILE__, __LINE__); \ } else { \ memset(lptr, 0, s); \ - obd_memory += s; \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at " \ - "%p (tot %ld).\n", s, lptr, obd_memory); \ + atomic_add(s, &obd_memory); \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p " \ + "(tot %d)\n", s, lptr, atomic_read(&obd_memory));\ } \ } while (0) #ifdef CONFIG_DEBUG_SLAB #define POISON(lptr, s) do {} while (0) #else -#define POISON(lptr, s) memset(lptr, 0xb6, s) +#define POISON(lptr, s) memset(lptr, 0x5a, s) #endif #define OBD_FREE(ptr, size) \ @@ -179,9 +179,9 @@ do { \ LASSERT(lptr); \ POISON(lptr, s); \ kfree(lptr); \ - obd_memory -= s; \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %ld).\n", \ - s, lptr, obd_memory); \ + atomic_sub(s, &obd_memory); \ + CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ + s, lptr, atomic_read(&obd_memory)); \ (ptr) = (void *)0xdeadbeef; \ } while (0) diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 83a6661..9742eec 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -175,8 +175,6 @@ void ldlm_lock_put(struct ldlm_lock *lock) lock->l_resource = NULL; lock->l_random = DEAD_HANDLE_MAGIC; - if (lock->l_export && lock->l_export->exp_connection) - ptlrpc_put_connection(lock->l_export->exp_connection); memset(lock, 0x5a, sizeof(*lock)); kmem_cache_free(ldlm_lock_slab, lock); CDEBUG(D_MALLOC, "kfreed 'lock': %d at %p (tot 0).\n", diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index bd2dd09..cbf67d0 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -142,6 +142,15 @@ static int ldlm_server_blocking_ast(struct ldlm_lock *lock, RETURN(0); } + LASSERT(lock); + + l_lock(&lock->l_resource->lr_namespace->ns_lock); + if (lock->l_destroyed) { + /* What's the point? */ + l_unlock(&lock->l_resource->lr_namespace->ns_lock); + RETURN(0); + } + req = ptlrpc_prep_req(&lock->l_export->exp_ldlm_data.led_import, LDLM_BL_CALLBACK, 1, &size, NULL); if (!req) @@ -156,6 +165,8 @@ static int ldlm_server_blocking_ast(struct ldlm_lock *lock, req->rq_replen = 0; /* no reply needed */ ldlm_add_waiting_lock(lock); + l_unlock(&lock->l_resource->lr_namespace->ns_lock); + (void)ptl_send_rpc(req); /* not waiting for reply */ @@ -247,6 +258,13 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req) sizeof(lock->l_remote_handle)); LDLM_DEBUG(lock, "server-side enqueue handler, new lock created"); + LASSERT(req->rq_export); + lock->l_export = req->rq_export; + l_lock(&lock->l_resource->lr_namespace->ns_lock); + list_add(&lock->l_export_chain, + &lock->l_export->exp_ldlm_data.led_held_locks); + l_unlock(&lock->l_resource->lr_namespace->ns_lock); + err = ldlm_lock_enqueue(lock, cookie, cookielen, &flags, ldlm_server_completion_ast, ldlm_server_blocking_ast); @@ -266,14 +284,6 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req) dlm_rep->lock_mode = lock->l_req_mode; } - lock->l_export = req->rq_export; - if (lock->l_export) { - l_lock(&lock->l_resource->lr_namespace->ns_lock); - list_add(&lock->l_export_chain, - &lock->l_export->exp_ldlm_data.led_held_locks); - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - } - EXIT; out: if (lock) @@ -481,11 +491,16 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) } if (req->rq_export == NULL) { + struct ldlm_request *dlm_req; + CERROR("operation %d with bad export (ptl req %d/rep %d)\n", req->rq_reqmsg->opc, req->rq_request_portal, req->rq_reply_portal); CERROR("--> export addr: "LPX64", cookie: "LPX64"\n", req->rq_reqmsg->addr, req->rq_reqmsg->cookie); + dlm_req = lustre_msg_buf(req->rq_reqmsg, 0); + CERROR("--> lock addr: "LPX64", cookie: "LPX64"\n", + dlm_req->lock_handle1.addr,dlm_req->lock_handle1.cookie); CERROR("--> ignoring this error as a temporary workaround! " "beware!\n"); //RETURN(-ENOTCONN); @@ -554,7 +569,7 @@ static int ldlm_cancel_handler(struct ptlrpc_request *req) } -static int ldlm_iocontrol(long cmd, struct lustre_handle *conn, int len, +static int ldlm_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, void *karg, void *uarg) { struct obd_device *obddev = class_conn2obd(conn); diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 64ec591..8bb5aa3 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -68,23 +68,19 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client) ENTRY; OBD_ALLOC(ns, sizeof(*ns)); - if (!ns) { - LBUG(); - GOTO(out, NULL); - } + if (!ns) + RETURN(NULL); ns->ns_hash = vmalloc(sizeof(*ns->ns_hash) * RES_HASH_SIZE); - if (!ns->ns_hash) { - LBUG(); - GOTO(out, ns); - } - obd_memory += sizeof(*ns->ns_hash) * RES_HASH_SIZE; + if (!ns->ns_hash) + GOTO(out_ns, NULL); + + atomic_add(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory); OBD_ALLOC(ns->ns_name, strlen(name) + 1); - if (!ns->ns_name) { - LBUG(); - GOTO(out, ns); - } + if (!ns->ns_name) + GOTO(out_hash, NULL); + strcpy(ns->ns_name, name); INIT_LIST_HEAD(&ns->ns_root_list); @@ -109,16 +105,12 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client) ldlm_proc_namespace(ns); RETURN(ns); - out: - if (ns && ns->ns_hash) { - memset(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE); - vfree(ns->ns_hash); - obd_memory -= sizeof(*ns->ns_hash) * RES_HASH_SIZE; - } - if (ns && ns->ns_name) - OBD_FREE(ns->ns_name, strlen(name) + 1); - if (ns) - OBD_FREE(ns, sizeof(*ns)); +out_hash: + memset(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE); + vfree(ns->ns_hash); + atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory); +out_ns: + OBD_FREE(ns, sizeof(*ns)); return NULL; } @@ -212,7 +204,7 @@ int ldlm_namespace_free(struct ldlm_namespace *ns) memset(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE); vfree(ns->ns_hash /* , sizeof(*ns->ns_hash) * RES_HASH_SIZE */); - obd_memory -= sizeof(*ns->ns_hash) * RES_HASH_SIZE; + atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory); OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1); OBD_FREE(ns, sizeof(*ns)); @@ -411,7 +403,6 @@ int ldlm_resource_putref(struct ldlm_resource *res) ENTRY; CDEBUG(D_INFO, "putref res: %p count: %d\n", res, atomic_read(&res->lr_refcount)); - out: LASSERT(atomic_read(&res->lr_refcount) >= 0); } diff --git a/lustre/lib/simple.c b/lustre/lib/simple.c index 4b423d4..cb4ccda 100644 --- a/lustre/lib/simple.c +++ b/lustre/lib/simple.c @@ -139,25 +139,28 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode) down(&dir->d_inode->i_sem); dchild = lookup_one_len(name, dir, strlen(name)); if (IS_ERR(dchild)) - GOTO(out, PTR_ERR(dchild)); + GOTO(out_up, dchild); if (dchild->d_inode) { if ((dchild->d_inode->i_mode & S_IFMT) != S_IFREG) - GOTO(out, err = -EEXIST); + GOTO(out_err, err = -EEXIST); - GOTO(out, dchild); + GOTO(out_up, dchild); } err = vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG); - EXIT; -out: - up(&dir->d_inode->i_sem); - if (err) { - dput(dchild); - RETURN(ERR_PTR(err)); - } + if (err) + GOTO(out_err, err); + up(&dir->d_inode->i_sem); RETURN(dchild); + +out_err: + dput(dchild); + dchild = ERR_PTR(err); +out_up: + up(&dir->d_inode->i_sem); + return dchild; } /* utility to make a directory */ @@ -172,25 +175,28 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode) down(&dir->d_inode->i_sem); dchild = lookup_one_len(name, dir, strlen(name)); if (IS_ERR(dchild)) - GOTO(out, PTR_ERR(dchild)); + GOTO(out_up, dchild); if (dchild->d_inode) { if (!S_ISDIR(dchild->d_inode->i_mode)) - GOTO(out, err = -ENOTDIR); + GOTO(out_err, err = -ENOTDIR); - GOTO(out, dchild); + GOTO(out_up, dchild); } err = vfs_mkdir(dir->d_inode, dchild, mode); - EXIT; -out: - up(&dir->d_inode->i_sem); - if (err) { - dput(dchild); - RETURN(ERR_PTR(err)); - } + if (err) + GOTO(out_err, err); + up(&dir->d_inode->i_sem); RETURN(dchild); + +out_err: + dput(dchild); + dchild = ERR_PTR(err); +out_up: + up(&dir->d_inode->i_sem); + return dchild; } /* diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 3d343a0..b265ffc 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -804,25 +804,25 @@ static int ll_inode_revalidate(struct dentry *dentry) struct lov_stripe_md *lsm; ENTRY; - if (!inode) { + if (!inode) { CERROR("REPORT THIS LINE TO PETER\n"); RETURN(0); } - - if (! ll_have_md_lock(dentry)) { - struct ptlrpc_request *req; + + if (!ll_have_md_lock(dentry)) { + struct ptlrpc_request *req = NULL; struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); - struct mds_body *body; - int rc, datalen, valid; + struct mds_body *body; + unsigned long valid = 0; + int datalen = 0; + int rc; if (S_ISREG(inode->i_mode)) { datalen = obd_size_wiremd(&sbi->ll_osc_conn, NULL); valid |= OBD_MD_FLEASIZE; } - rc = mdc_getattr(&sbi->ll_mdc_conn, - inode->i_ino, - inode->i_mode, valid, - datalen, &req); + rc = mdc_getattr(&sbi->ll_mdc_conn, inode->i_ino, + inode->i_mode, valid, datalen, &req); if (rc) { CERROR("failure %d inode "LPX64"\n", rc, inode->i_ino); ptlrpc_req_finished(req); @@ -832,9 +832,7 @@ static int ll_inode_revalidate(struct dentry *dentry) body = lustre_msg_buf(req->rq_repmsg, 0); ll_update_inode(inode, body); ptlrpc_req_finished(req); - } - - + } lsm = ll_i2info(inode)->lli_smd; if (!lsm) /* object not yet allocated, don't validate size */ diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 00a6aac..54a81a4 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -377,7 +377,7 @@ int ll_intent_lock(struct inode *parent, struct dentry **de, RETURN(rc); drop_req: - ptlrpc_free_req(request); + ptlrpc_req_finished(request); drop_lock: #warning FIXME: must release lock here RETURN(rc); diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index ee1631e..c572590 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -241,23 +241,27 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from, return rc; } -/* returns the page unlocked, but with a reference */ -static int ll_writepage(struct page *page) -{ - struct inode *inode = page->mapping->host; - int err; - ENTRY; +/* Write a page from kupdated or kswapd. + * + * We unlock the page even in the face of an error, otherwise dirty + * pages could OOM the system if they cannot be written. Also, there + * is nobody to return an error code to from here - the application + * may not even be running anymore. + * + * Returns the page unlocked, but with a reference. + */ +static int ll_writepage(struct page *page) { + struct inode *inode = page->mapping->host; int err; ENTRY; - if (!PageLocked(page)) - LBUG(); + LASSERT(PageLocked(page)); + /* XXX need to make sure we have LDLM lock on this page */ err = ll_brw(OBD_BRW_WRITE, inode, page, 1); - if ( !err ) { - //SetPageUptodate(page); - set_page_clean(page); - } else { + if (err) CERROR("ll_brw failure %d\n", err); - } + else + set_page_clean(page); + unlock_page(page); RETURN(err); } diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index d28a6c9..247015c 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -59,8 +59,14 @@ int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp, int i; ENTRY; - if (lsm) + if (lsm) { + if (lsm->lsm_magic != LOV_MAGIC) { + CERROR("bad mem LOV MAGIC: %#08x != %#08x\n", + lsm->lsm_magic, LOV_MAGIC); + RETURN(-EINVAL); + } stripe_count = lsm->lsm_stripe_count; + } /* XXX LOV STACKING call into osc for sizes */ lmm_size = lov_mds_md_size(ost_count); @@ -90,6 +96,7 @@ int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp, /* XXX endianness */ lmm->lmm_magic = (lsm->lsm_magic); lmm->lmm_object_id = (lsm->lsm_object_id); + LASSERT(lsm->lsm_object_id); lmm->lmm_stripe_size = (lsm->lsm_stripe_size); lmm->lmm_stripe_pattern = (lsm->lsm_stripe_pattern); lmm->lmm_stripe_offset = (lsm->lsm_stripe_offset); @@ -97,9 +104,11 @@ int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp, /* Only fill in the object ids which we are actually using. * Assumes lmm_objects is otherwise zero-filled. */ - for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) + for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) { /* XXX call down to osc_packmd() to do the packing */ + LASSERT(loi->loi_id); lmm->lmm_objects[loi->loi_ost_idx].l_object_id = (loi->loi_id); + } RETURN(lmm_size); } @@ -118,9 +127,15 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp, int i; ENTRY; - if (lmm) + if (lmm) { /* endianness */ + if (lmm->lmm_magic != LOV_MAGIC) { + CERROR("bad wire LOV MAGIC: %#08x != %#08x\n", + lmm->lmm_magic, LOV_MAGIC); + RETURN(-EINVAL); + } stripe_count = (lmm->lmm_stripe_count); + } if (!stripe_count) stripe_count = lov->desc.ld_default_stripe_count; @@ -156,6 +171,7 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp, ost_offset = lsm->lsm_stripe_offset = (lmm->lmm_stripe_offset); lsm->lsm_magic = (lmm->lmm_magic); lsm->lsm_object_id = (lmm->lmm_object_id); + LASSERT(lsm->lsm_object_id); lsm->lsm_stripe_size = (lmm->lmm_stripe_size); lsm->lsm_stripe_pattern = (lmm->lmm_stripe_pattern); @@ -171,6 +187,7 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp, loi->loi_ost_idx = ost_offset; loi++; } + LASSERT(loi - lsm->lsm_oinfo == stripe_count); RETURN(lsm_size); } diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index f8a5f0f..daeccf1 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -203,7 +203,7 @@ static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, ll_invalidate_inode_pages(inode); } - if ( inode != inode->i_sb->s_root->d_inode ) { + if (inode != inode->i_sb->s_root->d_inode) { /* XXX should this igrab move up 12 lines? */ LASSERT(igrab(inode) == inode); d_delete_aliases(inode); @@ -569,6 +569,7 @@ int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset, bulk->bp_buflen = PAGE_SIZE; bulk->bp_buf = addr; bulk->bp_xid = req->rq_xid; + desc->bd_ptl_ev_hdlr = NULL; desc->bd_portal = MDS_BULK_PORTAL; rc = ptlrpc_register_bulk(desc); @@ -591,7 +592,7 @@ int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset, EXIT; out2: - ptlrpc_free_bulk(desc); + ptlrpc_bulk_decref(desc); out: *request = req; return rc; diff --git a/lustre/mds/Makefile.am b/lustre/mds/Makefile.am index 3332d0b..6a0855e 100644 --- a/lustre/mds/Makefile.am +++ b/lustre/mds/Makefile.am @@ -5,15 +5,10 @@ DEFS= -if LINUX25 -FSMOD = mds_ext3 -else -FSMOD = mds_extN -endif - MODULE = mds -modulefs_DATA = mds.o $(FSMOD).o -EXTRA_PROGRAMS = mds $(FSMOD) + +modulefs_DATA = mds.o +EXTRA_PROGRAMS = mds LINX= mds_updates.c simple.c ll_pack.c target.c diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 393b793..8eab05f 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -40,6 +40,8 @@ #include #endif #include +#include +#include #include static kmem_cache_t *mds_file_cache; @@ -73,12 +75,11 @@ static int mds_bulk_timeout(void *data) static int mds_sendpage(struct ptlrpc_request *req, struct file *file, __u64 offset) { - int rc = 0; - struct mds_obd *mds = mds_req2mds(req); struct ptlrpc_bulk_desc *desc; struct ptlrpc_bulk_page *bulk; struct l_wait_info lwi; char *buf; + int rc = 0; ENTRY; desc = ptlrpc_prep_bulk(req->rq_connection); @@ -93,7 +94,8 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, if (buf == NULL) GOTO(cleanup_bulk, rc = -ENOMEM); - rc = mds_fs_readpage(mds, file, buf, PAGE_SIZE, (loff_t *)&offset); + rc = fsfilt_readpage(req->rq_export->exp_obd, file, buf, PAGE_SIZE, + (loff_t *)&offset); if (rc != PAGE_SIZE) GOTO(cleanup_buf, rc = -EIO); @@ -101,6 +103,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, bulk->bp_xid = req->rq_xid; bulk->bp_buf = buf; bulk->bp_buflen = PAGE_SIZE; + desc->bd_ptl_ev_hdlr = NULL; desc->bd_portal = MDS_BULK_PORTAL; rc = ptlrpc_send_bulk(desc); @@ -127,7 +130,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, cleanup_buf: OBD_FREE(buf, PAGE_SIZE); cleanup_bulk: - ptlrpc_free_bulk(desc); + ptlrpc_bulk_decref(desc); out: return rc; } @@ -596,8 +599,9 @@ int mds_pack_md(struct mds_obd *mds, struct ptlrpc_request *req, * discarded right after unpacking, and the LOV can figure out the * size itself from the ost count. */ - if ((rc = mds_fs_get_md(mds, inode, lmm, lmm_size)) < 0) { - CDEBUG(D_INFO, "No md for ino %lu: rc = %d\n", inode->i_ino, rc); + if ((rc = fsfilt_get_md(req->rq_export->exp_obd, inode, + lmm, lmm_size)) < 0) { + CDEBUG(D_INFO, "No md for ino %lu: rc = %d\n", inode->i_ino,rc); } else if (rc > 0) { body->valid |= OBD_MD_FLEASIZE; rc = 0; @@ -623,7 +627,7 @@ static int mds_getattr_internal(struct mds_obd *mds, struct dentry *dentry, mds_pack_inode2fid(&body->fid1, inode); mds_pack_inode2body(body, inode); - if (S_ISREG(inode->i_mode)) { + if (S_ISREG(inode->i_mode) /* && reqbody->valid & OBD_MD_FLEASIZE */) { rc = mds_pack_md(mds, req, reply_off + 1, body, inode); } else if (S_ISLNK(inode->i_mode) && reqbody->valid & OBD_MD_LINKNAME) { char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1); @@ -656,7 +660,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req) __u64 res_id[3] = {0, 0, 0}; ENTRY; - LASSERT(!strcmp(req->rq_export->exp_obd->obd_type->typ_name, "mds")); + LASSERT(!strcmp(obd->obd_type->typ_name, "mds")); if (req->rq_reqmsg->bufcount <= offset + 1) { LBUG(); @@ -748,7 +752,7 @@ static int mds_getattr(int offset, struct ptlrpc_request *req) inode = de->d_inode; if (S_ISREG(body->fid1.f_type)) { - int rc = mds_fs_get_md(mds, inode, NULL, 0); + int rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0); CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n", rc, inode->i_ino); if (rc < 0) { @@ -786,18 +790,18 @@ static int mds_getattr(int offset, struct ptlrpc_request *req) req->rq_status = mds_getattr_internal(mds, de, req, body, 0); + EXIT; out: l_dput(de); out_pop: pop_ctxt(&saved, &mds->mds_ctxt, &uc); - RETURN(rc); + return rc; } static int mds_statfs(struct ptlrpc_request *req) { - struct mds_obd *mds = mds_req2mds(req); + struct obd_device *obd = req->rq_export->exp_obd; struct obd_statfs *osfs; - struct statfs sfs; int rc, size = sizeof(*osfs); ENTRY; @@ -807,24 +811,24 @@ static int mds_statfs(struct ptlrpc_request *req) GOTO(out, rc); } - rc = mds_fs_statfs(mds, &sfs); + osfs = lustre_msg_buf(req->rq_repmsg, 0); + rc = fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs); if (rc) { CERROR("mds: statfs failed: rc %d\n", rc); GOTO(out, rc); } - osfs = lustre_msg_buf(req->rq_repmsg, 0); - memset(osfs, 0, size); - statfs_pack(osfs, &sfs); obd_statfs_pack(osfs, osfs); + EXIT; out: req->rq_status = rc; - RETURN(0); + return 0; } static struct mds_file_data *mds_handle2mfd(struct lustre_handle *handle) { struct mds_file_data *mfd = NULL; + ENTRY; if (!handle || !handle->addr) RETURN(NULL); @@ -836,12 +840,13 @@ static struct mds_file_data *mds_handle2mfd(struct lustre_handle *handle) if (mfd->mfd_servercookie != handle->cookie) RETURN(NULL); - return mfd; + RETURN(mfd); } static int mds_store_md(struct mds_obd *mds, struct ptlrpc_request *req, int offset, struct mds_body *body, struct inode *inode) { + struct obd_device *obd = req->rq_export->exp_obd; struct lov_mds_md *lmm = lustre_msg_buf(req->rq_reqmsg, offset); int lmm_size = req->rq_reqmsg->buflens[offset]; struct obd_run_ctxt saved; @@ -866,17 +871,17 @@ static int mds_store_md(struct mds_obd *mds, struct ptlrpc_request *req, uc.ouc_cap = body->capability; push_ctxt(&saved, &mds->mds_ctxt, &uc); mds_start_transno(mds); - handle = mds_fs_start(mds, inode, MDS_FSOP_SETATTR); + handle = fsfilt_start(obd, inode,FSFILT_OP_SETATTR); if (IS_ERR(handle)) { rc = PTR_ERR(handle); mds_finish_transno(mds, handle, req, rc); GOTO(out_ea, rc); } - rc = mds_fs_set_md(mds, inode, handle, lmm, lmm_size); + rc = fsfilt_set_md(obd, inode,handle,lmm,lmm_size); rc = mds_finish_transno(mds, handle, req, rc); - rc2 = mds_fs_commit(mds, inode, handle); + rc2 = fsfilt_commit(obd, inode, handle); if (rc2 && !rc) rc = rc2; out_ea: @@ -1096,7 +1101,7 @@ int mds_handle(struct ptlrpc_request *req); static int check_for_next_transno(struct mds_obd *mds) { struct ptlrpc_request *req; - req = list_entry(mds->mds_recovery_queue.next, + req = list_entry(mds->mds_recovery_queue.next, struct ptlrpc_request, rq_list); return req->rq_reqmsg->transno == mds->mds_next_recovery_transno; } @@ -1104,10 +1109,10 @@ static int check_for_next_transno(struct mds_obd *mds) static void process_recovery_queue(struct mds_obd *mds) { struct ptlrpc_request *req; - + for (;;) { spin_lock(&mds->mds_processing_task_lock); - req = list_entry(mds->mds_recovery_queue.next, + req = list_entry(mds->mds_recovery_queue.next, struct ptlrpc_request, rq_list); if (req->rq_reqmsg->transno != mds->mds_next_recovery_transno) { @@ -1121,7 +1126,7 @@ static void process_recovery_queue(struct mds_obd *mds) DEBUG_REQ(D_HA, req, ""); mds_handle(req); - + if (list_empty(&mds->mds_recovery_queue)) break; } @@ -1148,7 +1153,7 @@ static int queue_recovery_request(struct ptlrpc_request *req, /* XXX O(n^2) */ list_for_each(tmp, &mds->mds_recovery_queue) { - struct ptlrpc_request *reqiter = + struct ptlrpc_request *reqiter = list_entry(tmp, struct ptlrpc_request, rq_list); if (reqiter->rq_reqmsg->transno > transno) { list_add_tail(&req->rq_list, &reqiter->rq_list); @@ -1180,7 +1185,7 @@ static int queue_recovery_request(struct ptlrpc_request *req, return 0; } -static int filter_recovery_request(struct ptlrpc_request *req, +static int filter_recovery_request(struct ptlrpc_request *req, struct mds_obd *mds, int *process) { switch (req->rq_reqmsg->opc) { @@ -1189,13 +1194,13 @@ static int filter_recovery_request(struct ptlrpc_request *req, case MDS_OPEN: *process = 1; RETURN(0); - + case MDS_GETSTATUS: /* used in unmounting */ case MDS_REINT: case LDLM_ENQUEUE: *process = queue_recovery_request(req, mds); RETURN(0); - + default: DEBUG_REQ(D_ERROR, req, "not permitted during recovery"); *process = 0; @@ -1324,13 +1329,13 @@ int mds_handle(struct ptlrpc_request *req) case MDS_REINT: { int size = sizeof(struct mds_body); - int opc = *(u32 *)lustre_msg_buf(req->rq_reqmsg, 0), + int opc = *(u32 *)lustre_msg_buf(req->rq_reqmsg, 0), realopc = opc & REINT_OPCODE_MASK; - + DEBUG_REQ(D_INODE, req, "reint (%s%s)", reint_names[realopc], opc & REINT_REPLAYING ? "|REPLAYING" : ""); - + OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET, 0); rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, @@ -1400,7 +1405,7 @@ int mds_handle(struct ptlrpc_request *req) DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply"); return mds_queue_final_reply(req, rc); } - + /* MDS_CONNECT / EALREADY (note: not -EALREADY!) isn't an error */ if (rc && (req->rq_reqmsg->opc != MDS_CONNECT || rc != EALREADY)) { @@ -1488,16 +1493,18 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2) GOTO(err_dec, rc = -EINVAL); - mds->mds_fstype = strdup(data->ioc_inlbuf2); + obddev->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2); + if (IS_ERR(obddev->obd_fsops)) + GOTO(err_dec, rc = PTR_ERR(obddev->obd_fsops)); - mnt = do_kern_mount(mds->mds_fstype, 0, data->ioc_inlbuf1, NULL); + mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, NULL); if (IS_ERR(mnt)) { rc = PTR_ERR(mnt); CERROR("do_kern_mount failed: rc = %d\n", rc); - GOTO(err_kfree, rc); + GOTO(err_ops, rc); } - CERROR("%s: mnt is %p\n", data->ioc_inlbuf1, mnt); + CDEBUG(D_SUPER, "%s: mnt = %p\n", data->ioc_inlbuf1, mnt); mds->mds_sb = mnt->mnt_root->d_inode->i_sb; if (!mds->mds_sb) GOTO(err_put, rc = -ENODEV); @@ -1524,7 +1531,7 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) mds->mds_processing_task = 0; INIT_LIST_HEAD(&mds->mds_recovery_queue); INIT_LIST_HEAD(&mds->mds_delayed_reply_queue); - + RETURN(0); err_fs: @@ -1534,8 +1541,8 @@ err_put: mntput(mds->mds_vfsmnt); mds->mds_sb = 0; lock_kernel(); -err_kfree: - kfree(mds->mds_fstype); +err_ops: + fsfilt_put_ops(obddev->obd_fsops); err_dec: MOD_DEC_USE_COUNT; RETURN(rc); @@ -1567,7 +1574,6 @@ static int mds_cleanup(struct obd_device *obddev) unlock_kernel(); mntput(mds->mds_vfsmnt); mds->mds_sb = 0; - kfree(mds->mds_fstype); ldlm_namespace_free(obddev->obd_namespace); @@ -1576,6 +1582,7 @@ static int mds_cleanup(struct obd_device *obddev) dev_clear_rdonly(2); #endif mds_fs_cleanup(obddev); + fsfilt_put_ops(obddev->obd_fsops); MOD_DEC_USE_COUNT; RETURN(0); @@ -1806,7 +1813,6 @@ static struct obd_ops mdt_obd_ops = { static int __init mds_init(void) { - mds_file_cache = kmem_cache_create("ll_mds_file_data", sizeof(struct mds_file_data), 0, 0, NULL, NULL); @@ -1816,20 +1822,17 @@ static int __init mds_init(void) class_register_type(&mds_obd_ops, status_class_var, LUSTRE_MDS_NAME); class_register_type(&mdt_obd_ops, 0, LUSTRE_MDT_NAME); ldlm_register_intent(ldlm_intent_policy); - return 0; + return 0; } static void __exit mds_exit(void) { - - ldlm_unregister_intent(); class_unregister_type(LUSTRE_MDS_NAME); class_unregister_type(LUSTRE_MDT_NAME); if (kmem_cache_destroy(mds_file_cache)) CERROR("couldn't free MDS file cache\n"); - } MODULE_AUTHOR("Cluster File Systems "); diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index 0fc96bd..7028603 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -22,6 +22,7 @@ #define DEBUG_SUBSYSTEM S_CLASS #include +#include #include int rd_uuid(char* page, char **start, off_t off, int count, int *eof, @@ -103,15 +104,12 @@ int rd_kbfree(char* page, char **start, off_t off, int count, int *eof, } -int rd_fstype(char* page, char **start, off_t off, int count, int *eof, +int rd_fstype(char *page, char **start, off_t off, int count, int *eof, void *data) -{ - struct obd_device* temp = (struct obd_device*)data; - struct mds_obd *mds = &temp->u.mds; - int len = 0; - len += snprintf(page, count, "%s\n", mds->mds_fstype); - return len; - +{ + struct obd_device *obd = (struct obd_device *)data; + + return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type); } int rd_filestotal(char* page, char **start, off_t off, int count, int *eof, diff --git a/lustre/mds/mds_ext2.c b/lustre/mds/mds_ext2.c deleted file mode 100644 index ef1d8e5..0000000 --- a/lustre/mds/mds_ext2.c +++ /dev/null @@ -1,145 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * linux/mds/mds_null.c - * - * Lustre Metadata Server (mds) journal abstraction routines - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * author: Andreas Dilger - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - * - */ - -#define DEBUG_SUBSYSTEM S_MDS - -#include -#include -#include -#include - -static void *mds_ext2_start(struct inode *inode, int nblocks) -{ - return (void *)1; -} - -static int mds_ext2_stop(struct inode *inode, void *handle) -{ - return 0; -} - -static int mds_ext2_setattr(struct dentry *dentry, void *handle, - struct iattr *iattr) -{ - struct inode *inode = dentry->d_inode; - - lock_kernel(); - - /* a _really_ horrible hack to avoid removing the data stored - in the block pointers; this data is the object id - this will go into an extended attribute at some point. - */ - if (iattr->ia_valid & ATTR_SIZE) { - /* ATTR_SIZE would invoke truncate: clear it */ - iattr->ia_valid &= ~ATTR_SIZE; - inode->i_size = iattr->ia_size; - - /* make sure _something_ gets set - so new inode - goes to disk (probably won't work over XFS */ - if (!iattr->ia_valid & ATTR_MODE) { - iattr->ia_valid |= ATTR_MODE; - iattr->ia_mode = inode->i_mode; - } - } - - if (inode->i_op->setattr) - rc = inode->i_op->setattr(dentry, iattr); - else - rc = inode_setattr(inode, iattr); - - unlock_kernel(); - - return rc; -} - -/* - * FIXME: nasty hack - store the object id in the first two - * direct block spots. This should be done with EAs... - */ -static int mds_ext2_set_objid(struct inode *inode, void *handle, obd_id id) -{ - (__u64)(inode->u.ext2_i.i_data[0]) = cpu_to_le64(id); - return 0; -} - -static int mds_ext2_get_objid(struct inode *inode, obd_id *id) -{ - *id = le64_to_cpu(inode->u.ext2_i.i_data[0]); - - return 0; -} - -static ssize_t mds_ext2_readpage(struct file *file, char *buf, size_t count, - loff_t *offset) -{ - if (S_ISREG(file->f_dentry->d_inode->i_mode)) - return file->f_op->read(file, buf, count, offset); - else - return generic_file_read(file, buf, count, offset); -} - -static struct mds_fs_operations mds_ext2_fs_ops; - -static void mds_ext2_delete_inode(struct inode *inode) -{ - if (S_ISREG(inode->i_mode)) - mds_ext2_set_objid(inode, NULL, 0); - - mds_ext2_fs_ops.cl_delete_inode(inode); -} - -static int mds_ext2_set_last_rcvd(struct mds_obd *mds, void *handle) -{ - /* Bail for ext2 - can't tell when it is on disk anyways, sync? */ - mds->mds_last_committed = mds->mds_last_rcvd; - - return 0; -} - -static int mds_ext2_journal_data(struct file *filp) -{ - return 0; -} - -static struct mds_fs_operations mds_ext2_fs_ops = { - fs_owner: THIS_MODULE, - fs_start: mds_ext2_start, - fs_commit: mds_ext2_stop, - fs_setattr: mds_ext2_setattr, - fs_set_objid: mds_ext2_set_objid, - fs_get_objid: mds_ext2_get_objid, - fs_readpage: mds_ext2_readpage, - fs_delete_inode: mds_ext2_delete_inode, - cl_delete_inode: clear_inode, - fs_journal_data: mds_ext2_journal_data, - fs_set_last_rcvd: mds_ext2_set_last_rcvd, -}; - -static int __init mds_ext2_init(void) -{ - return mds_register_fs_type(&mds_ext2_fs_ops, "ext2"); -} - -static void __exit mds_ext2_exit(void) -{ - mds_unregister_fs_type("ext2"); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. "); -MODULE_DESCRIPTION("Lustre MDS ext2 Filesystem Helper v0.1"); -MODULE_LICENSE("GPL"); - -module_init(mds_ext2_init); -module_exit(mds_ext2_exit); diff --git a/lustre/mds/mds_extN.c b/lustre/mds/mds_extN.c deleted file mode 100644 index 4091f8d..0000000 --- a/lustre/mds/mds_extN.c +++ /dev/null @@ -1,356 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lustre/mds/mds_extN.c - * Lustre Metadata Server (mds) journal abstraction routines - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Andreas Dilger - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_MDS - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static struct mds_fs_operations mds_extN_fs_ops; -static kmem_cache_t *mcb_cache; -static int mcb_cache_count; - -struct mds_cb_data { - struct journal_callback cb_jcb; - struct mds_obd *cb_mds; - __u64 cb_last_rcvd; -}; - -#define EXTN_XATTR_INDEX_LUSTRE 5 -#define XATTR_LUSTRE_MDS_OBJID "system.lustre_mds_objid" - -/* - * We don't currently need any additional blocks for rmdir and - * unlink transactions because we are storing the OST oa_id inside - * the inode (which we will be changing anyways as part of this - * transaction). - */ -static void *mds_extN_start(struct inode *inode, int op) -{ - /* For updates to the last recieved file */ - int nblocks = EXTN_DATA_TRANS_BLOCKS; - void *handle; - - switch(op) { - case MDS_FSOP_RMDIR: - case MDS_FSOP_UNLINK: - nblocks += EXTN_DELETE_TRANS_BLOCKS; - break; - case MDS_FSOP_RENAME: - /* We may be modifying two directories */ - nblocks += EXTN_DATA_TRANS_BLOCKS; - case MDS_FSOP_SYMLINK: - /* Possible new block + block bitmap + GDT for long symlink */ - nblocks += 3; - case MDS_FSOP_CREATE: - case MDS_FSOP_MKDIR: - case MDS_FSOP_MKNOD: - /* New inode + block bitmap + GDT for new file */ - nblocks += 3; - case MDS_FSOP_LINK: - /* Change parent directory */ - nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS; - break; - case MDS_FSOP_SETATTR: - /* Setattr on inode */ - nblocks += 1; - break; - default: CERROR("unknown transaction start op %d\n", op); - LBUG(); - } - - lock_kernel(); - handle = journal_start(EXTN_JOURNAL(inode), nblocks); - unlock_kernel(); - - return handle; -} - -static int mds_extN_commit(struct inode *inode, void *handle) -{ - int rc; - - lock_kernel(); - rc = journal_stop((handle_t *)handle); - unlock_kernel(); - - return rc; -} - -static int mds_extN_setattr(struct dentry *dentry, void *handle, - struct iattr *iattr) -{ - struct inode *inode = dentry->d_inode; - int rc; - - lock_kernel(); - if (inode->i_op->setattr) - rc = inode->i_op->setattr(dentry, iattr); - else - rc = inode_setattr(inode, iattr); - - unlock_kernel(); - - return rc; -} - -static int mds_extN_set_md(struct inode *inode, void *handle, - struct lov_mds_md *lmm, int lmm_size) -{ - int rc; - - down(&inode->i_sem); - lock_kernel(); - rc = extN_xattr_set(handle, inode, EXTN_XATTR_INDEX_LUSTRE, - XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0); - unlock_kernel(); - up(&inode->i_sem); - - if (rc) { - CERROR("error adding objectid "LPX64" to inode %lu: rc = %d\n", - lmm->lmm_object_id, inode->i_ino, rc); - if (rc != -ENOSPC) LBUG(); - } - return rc; -} - -static int mds_extN_get_md(struct inode *inode, struct lov_mds_md *lmm,int size) -{ - int rc; - - down(&inode->i_sem); - lock_kernel(); - rc = extN_xattr_get(inode, EXTN_XATTR_INDEX_LUSTRE, - XATTR_LUSTRE_MDS_OBJID, lmm, size); - unlock_kernel(); - up(&inode->i_sem); - - /* This gives us the MD size */ - if (lmm == NULL) - return (rc == -ENODATA) ? 0 : rc; - - if (rc < 0) { - CDEBUG(D_INFO, "error getting EA %s from MDS inode %lu: " - "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc); - memset(lmm, 0, size); - return (rc == -ENODATA) ? 0 : rc; - } - - /* This field is byteswapped because it appears in the - * catalogue. All others are opaque to the MDS */ - lmm->lmm_object_id = le64_to_cpu(lmm->lmm_object_id); - - return rc; -} - -static ssize_t mds_extN_readpage(struct file *file, char *buf, size_t count, - loff_t *offset) -{ - struct inode *inode = file->f_dentry->d_inode; - int rc = 0; - - if (S_ISREG(inode->i_mode)) - rc = file->f_op->read(file, buf, count, offset); - else { - struct buffer_head *bh; - - /* FIXME: this assumes the blocksize == count, but the calling - * function will detect this as an error for now */ - bh = extN_bread(NULL, inode, - *offset >> inode->i_sb->s_blocksize_bits, - 0, &rc); - - if (bh) { - memcpy(buf, bh->b_data, inode->i_blksize); - brelse(bh); - rc = inode->i_blksize; - } - } - - return rc; -} - -static void mds_extN_delete_inode(struct inode *inode) -{ - if (S_ISREG(inode->i_mode)) { - void *handle = mds_extN_start(inode, MDS_FSOP_UNLINK); - - if (IS_ERR(handle)) { - CERROR("unable to start transaction"); - EXIT; - return; - } - if (mds_extN_set_md(inode, handle, NULL, 0)) - CERROR("error clearing objid on %lu\n", inode->i_ino); - - if (mds_extN_fs_ops.cl_delete_inode) - mds_extN_fs_ops.cl_delete_inode(inode); - - if (mds_extN_commit(inode, handle)) - CERROR("error closing handle on %lu\n", inode->i_ino); - } else - mds_extN_fs_ops.cl_delete_inode(inode); -} - -static void mds_extN_callback_status(struct journal_callback *jcb, int error) -{ - struct mds_cb_data *mcb = (struct mds_cb_data *)jcb; - - CDEBUG(D_EXT2, "got callback for last_rcvd "LPD64": rc = %d\n", - mcb->cb_last_rcvd, error); - if (!error && mcb->cb_last_rcvd > mcb->cb_mds->mds_last_committed) - mcb->cb_mds->mds_last_committed = mcb->cb_last_rcvd; - - kmem_cache_free(mcb_cache, mcb); - --mcb_cache_count; -} - -static int mds_extN_set_last_rcvd(struct mds_obd *mds, void *handle) -{ - struct mds_cb_data *mcb; - - mcb = kmem_cache_alloc(mcb_cache, GFP_NOFS); - if (!mcb) - RETURN(-ENOMEM); - - ++mcb_cache_count; - mcb->cb_mds = mds; - mcb->cb_last_rcvd = mds->mds_last_rcvd; - -#ifdef HAVE_JOURNAL_CALLBACK_STATUS - CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", - mcb->cb_last_rcvd); - lock_kernel(); - /* Note that an "incompatible pointer" warning here is OK for now */ - journal_callback_set(handle, mds_extN_callback_status, - (struct journal_callback *)mcb); - unlock_kernel(); -#else -#warning "no journal callback kernel patch, faking it..." - { - static long next = 0; - - if (time_after(jiffies, next)) { - CERROR("no journal callback kernel patch, faking it...\n"); - next = jiffies + 300 * HZ; - } - - mds_extN_callback_status((struct journal_callback *)mcb, 0); -#endif - - return 0; -} - -static int mds_extN_journal_data(struct file *filp) -{ - struct inode *inode = filp->f_dentry->d_inode; - - EXTN_I(inode)->i_flags |= EXTN_JOURNAL_DATA_FL; - - return 0; -} - -/* - * We need to hack the return value for the free inode counts because - * the current EA code requires one filesystem block per inode with EAs, - * so it is possible to run out of blocks before we run out of inodes. - * - * This can be removed when the extN EA code is fixed. - */ -static int mds_extN_statfs(struct super_block *sb, struct statfs *sfs) -{ - int rc = vfs_statfs(sb, sfs); - - if (!rc && sfs->f_bfree < sfs->f_ffree) - sfs->f_ffree = sfs->f_bfree; - - return rc; -} - -static struct mds_fs_operations mds_extN_fs_ops = { - fs_owner: THIS_MODULE, - fs_start: mds_extN_start, - fs_commit: mds_extN_commit, - fs_setattr: mds_extN_setattr, - fs_set_md: mds_extN_set_md, - fs_get_md: mds_extN_get_md, - fs_readpage: mds_extN_readpage, - fs_delete_inode: mds_extN_delete_inode, - cl_delete_inode: clear_inode, - fs_journal_data: mds_extN_journal_data, - fs_set_last_rcvd: mds_extN_set_last_rcvd, - fs_statfs: mds_extN_statfs, -}; - -static int __init mds_extN_init(void) -{ - int rc; - - //rc = extN_xattr_register(); - mcb_cache = kmem_cache_create("mds_extN_mcb", - sizeof(struct mds_cb_data), 0, - 0, NULL, NULL); - if (!mcb_cache) { - CERROR("error allocating MDS journal callback cache\n"); - GOTO(out, rc = -ENOMEM); - } - - rc = mds_register_fs_type(&mds_extN_fs_ops, "extN"); - - if (rc) - kmem_cache_destroy(mcb_cache); -out: - return rc; -} - -static void __exit mds_extN_exit(void) -{ - int rc; - - mds_unregister_fs_type("extN"); - rc = kmem_cache_destroy(mcb_cache); - - if (rc || mcb_cache_count) { - CERROR("can't free MDS callback cache: count %d, rc = %d\n", - mcb_cache_count, rc); - } - - //rc = extN_xattr_unregister(); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. "); -MODULE_DESCRIPTION("Lustre MDS extN Filesystem Helper v0.1"); -MODULE_LICENSE("GPL"); - -module_init(mds_extN_init); -module_exit(mds_extN_exit); diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index 163a45f..83201aa 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -23,14 +23,7 @@ #include #include #include - -LIST_HEAD(mds_fs_types); - -struct mds_fs_type { - struct list_head mft_list; - struct mds_fs_operations *mft_ops; - char *mft_name; -}; +#include /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */ #define MDS_MAX_CLIENTS (PAGE_SIZE * 8) @@ -322,7 +315,7 @@ static int mds_fs_prep(struct obd_device *obddev) GOTO(err_pop, rc = -ENOENT); } - rc = mds_fs_journal_data(mds, f); + rc = fsfilt_journal_data(obddev, f); if (rc) { CERROR("cannot journal data on %s: rc = %d\n", LAST_RCVD, rc); GOTO(err_filp, rc); @@ -347,113 +340,10 @@ err_filp: goto err_pop; } -static struct mds_fs_operations *mds_search_fs_type(const char *name) -{ - struct list_head *p; - struct mds_fs_type *type; - - /* lock mds_fs_types list */ - list_for_each(p, &mds_fs_types) { - type = list_entry(p, struct mds_fs_type, mft_list); - if (!strcmp(type->mft_name, name)) { - /* unlock mds_fs_types list */ - return type->mft_ops; - } - } - /* unlock mds_fs_types list */ - return NULL; -} - -int mds_register_fs_type(struct mds_fs_operations *ops, const char *name) -{ - struct mds_fs_operations *found; - struct mds_fs_type *type; - - if ((found = mds_search_fs_type(name))) { - if (found != ops) { - CERROR("different operations for type %s\n", name); - RETURN(-EEXIST); - } - return 0; - } - OBD_ALLOC(type, sizeof(*type)); - if (!type) - RETURN(-ENOMEM); - - INIT_LIST_HEAD(&type->mft_list); - type->mft_ops = ops; - type->mft_name = strdup(name); - if (!type->mft_name) { - OBD_FREE(type, sizeof(*type)); - RETURN(-ENOMEM); - } - MOD_INC_USE_COUNT; - list_add(&type->mft_list, &mds_fs_types); - - return 0; -} - -void mds_unregister_fs_type(const char *name) -{ - struct list_head *p; - - /* lock mds_fs_types list */ - list_for_each(p, &mds_fs_types) { - struct mds_fs_type *type; - - type = list_entry(p, struct mds_fs_type, mft_list); - if (!strcmp(type->mft_name, name)) { - list_del(p); - kfree(type->mft_name); - OBD_FREE(type, sizeof(*type)); - MOD_DEC_USE_COUNT; - break; - } - } - /* unlock mds_fs_types list */ -} - -struct mds_fs_operations *mds_fs_get_ops(char *fstype) -{ - struct mds_fs_operations *fs_ops; - - if (!(fs_ops = mds_search_fs_type(fstype))) { - char name[32]; - int rc; - - snprintf(name, sizeof(name) - 1, "mds_%s", fstype); - name[sizeof(name) - 1] = '\0'; - - if ((rc = request_module(name))) { - fs_ops = mds_search_fs_type(fstype); - CDEBUG(D_INFO, "Loaded module '%s'\n", name); - if (!fs_ops) - rc = -ENOENT; - } - - if (rc) { - CERROR("Can't find MDS fs interface '%s'\n", name); - RETURN(ERR_PTR(rc)); - } - } - __MOD_INC_USE_COUNT(fs_ops->fs_owner); - - return fs_ops; -} - -void mds_fs_put_ops(struct mds_fs_operations *fs_ops) -{ - __MOD_DEC_USE_COUNT(fs_ops->fs_owner); -} - int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt) { struct mds_obd *mds = &obddev->u.mds; - int rc; - - mds->mds_fsops = mds_fs_get_ops(mds->mds_fstype); - if (IS_ERR(mds->mds_fsops)) - RETURN(PTR_ERR(mds->mds_fsops)); + ENTRY; mds->mds_vfsmnt = mnt; @@ -462,40 +352,7 @@ int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt) mds->mds_ctxt.pwd = mnt->mnt_root; mds->mds_ctxt.fs = get_ds(); - /* - * Replace the client filesystem delete_inode method with our own, - * so that we can clear the object ID before the inode is deleted. - * The fs_delete_inode method will call cl_delete_inode for us. - * We need to do this for the MDS superblock only, hence we install - * a modified copy of the original superblock method table. - * - * We still assume that there is only a single MDS client filesystem - * type, as we don't have access to the mds struct in delete_inode - * and store the client delete_inode method in a global table. This - * will only become a problem if/when multiple MDSs are running on a - * single host with different underlying filesystems. - */ - OBD_ALLOC(mds->mds_sop, sizeof(*mds->mds_sop)); - if (!mds->mds_sop) - GOTO(out_dec, rc = -ENOMEM); - - memcpy(mds->mds_sop, mds->mds_sb->s_op, sizeof(*mds->mds_sop)); - mds->mds_fsops->cl_delete_inode = mds->mds_sop->delete_inode; - mds->mds_sop->delete_inode = mds->mds_fsops->fs_delete_inode; - mds->mds_sb->s_op = mds->mds_sop; - - rc = mds_fs_prep(obddev); - - if (rc) - GOTO(out_free, rc); - - return 0; - -out_free: - OBD_FREE(mds->mds_sop, sizeof(*mds->mds_sop)); -out_dec: - mds_fs_put_ops(mds->mds_fsops); - return rc; + RETURN(mds_fs_prep(obddev)); } void mds_fs_cleanup(struct obd_device *obddev) @@ -504,10 +361,4 @@ void mds_fs_cleanup(struct obd_device *obddev) class_disconnect_all(obddev); /* this cleans up client info too */ mds_server_free_data(mds); - - OBD_FREE(mds->mds_sop, sizeof(*mds->mds_sop)); - mds_fs_put_ops(mds->mds_fsops); } - -EXPORT_SYMBOL(mds_register_fs_type); -EXPORT_SYMBOL(mds_unregister_fs_type); diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index ca34445..689e424 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -35,10 +35,20 @@ #include #include #include -#include +#include extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req); +static void mds_last_rcvd_cb(struct obd_device *obd, __u64 last_rcvd, int error) +{ + struct mds_obd *mds = &obd->u.mds; + + CDEBUG(D_HA, "got callback for last_rcvd "LPD64": rc = %d\n", + last_rcvd, error); + if (!error && last_rcvd > mds->mds_last_committed) + mds->mds_last_committed = last_rcvd; +} + void mds_start_transno(struct mds_obd *mds) { ENTRY; @@ -57,13 +67,11 @@ int mds_finish_transno(struct mds_obd *mds, void *handle, /* Propagate error code. */ if (rc) - goto out; + GOTO(out, rc); /* we don't allocate new transnos for replayed requests */ - if (req->rq_level == LUSTRE_CONN_RECOVD) { - rc = 0; - goto out; - } + if (req->rq_level == LUSTRE_CONN_RECOVD) + GOTO(out, rc = 0); off = MDS_LR_CLIENT + med->med_off * MDS_LR_SIZE; @@ -73,7 +81,8 @@ int mds_finish_transno(struct mds_obd *mds, void *handle, mcd->mcd_mount_count = cpu_to_le64(mds->mds_mount_count); mcd->mcd_last_xid = cpu_to_le64(req->rq_xid); - mds_fs_set_last_rcvd(mds, handle); + fsfilt_set_last_rcvd(req->rq_export->exp_obd, last_rcvd, handle, + mds_last_rcvd_cb); written = lustre_fwrite(mds->mds_rcvd_filp, (char *)mcd, sizeof(*mcd), &off); CDEBUG(D_INODE, "wrote trans #"LPD64" for client %s at #%d: written = " @@ -87,8 +96,8 @@ int mds_finish_transno(struct mds_obd *mds, void *handle, rc = 0; - out: EXIT; + out: up(&mds->mds_transno_sem); return rc; } @@ -144,14 +153,14 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, to_kdev_t(inode->i_sb->s_dev)); mds_start_transno(mds); - handle = mds_fs_start(mds, inode, MDS_FSOP_SETATTR); + handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR); if (IS_ERR(handle)) { rc = PTR_ERR(handle); (void)mds_finish_transno(mds, handle, req, rc); GOTO(out_setattr_de, rc); } - rc = mds_fs_setattr(mds, de, handle, &rec->ur_iattr); + rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr); if (offset) { body = lustre_msg_buf(req->rq_repmsg, 1); @@ -161,7 +170,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, rc = mds_finish_transno(mds, handle, req, rc); - err = mds_fs_commit(mds, de->d_inode, handle); + err = fsfilt_commit(obd, de->d_inode, handle); if (err) { CERROR("error on commit: err = %d\n", err); if (!rc) @@ -264,7 +273,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, switch (type) { case S_IFREG:{ - handle = mds_fs_start(mds, dir, MDS_FSOP_CREATE); + handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE); if (IS_ERR(handle)) GOTO(out_transno_dchild, rc = PTR_ERR(handle)); rc = vfs_create(dir, dchild, rec->ur_mode); @@ -272,7 +281,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, break; } case S_IFDIR:{ - handle = mds_fs_start(mds, dir, MDS_FSOP_MKDIR); + handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR); if (IS_ERR(handle)) GOTO(out_transno_dchild, rc = PTR_ERR(handle)); rc = vfs_mkdir(dir, dchild, rec->ur_mode); @@ -280,7 +289,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, break; } case S_IFLNK:{ - handle = mds_fs_start(mds, dir, MDS_FSOP_SYMLINK); + handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK); if (IS_ERR(handle)) GOTO(out_transno_dchild, rc = PTR_ERR(handle)); rc = vfs_symlink(dir, dchild, rec->ur_tgt); @@ -292,7 +301,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, case S_IFIFO: case S_IFSOCK:{ int rdev = rec->ur_rdev; - handle = mds_fs_start(mds, dir, MDS_FSOP_MKNOD); + handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD); if (IS_ERR(handle)) GOTO(out_transno_dchild, rc = PTR_ERR(handle)); rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev); @@ -331,7 +340,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, CDEBUG(D_INODE, "created ino %lu\n", inode->i_ino); } - rc = mds_fs_setattr(mds, dchild, handle, &iattr); + rc = fsfilt_setattr(obd, dchild, handle, &iattr); if (rc) { CERROR("error on setattr: rc = %d\n", rc); /* XXX should we abort here in case of error? */ @@ -350,7 +359,7 @@ out_create_commit: if (rc) GOTO(out_create_unlink, rc); } - err = mds_fs_commit(mds, dir, handle); + err = fsfilt_commit(obd, dir, handle); if (err) { CERROR("error on commit: err = %d\n", err); if (!rc) @@ -466,7 +475,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, mds_start_transno(mds); switch (rec->ur_mode /* & S_IFMT ? */) { case S_IFDIR: - handle = mds_fs_start(mds, dir, MDS_FSOP_RMDIR); + handle = fsfilt_start(obd, dir, FSFILT_OP_RMDIR); if (IS_ERR(handle)) GOTO(out_unlink_cancel_transno, rc = PTR_ERR(handle)); rc = vfs_rmdir(dir, dchild); @@ -481,7 +490,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, case S_IFBLK: case S_IFIFO: case S_IFSOCK: - handle = mds_fs_start(mds, dir, MDS_FSOP_UNLINK); + handle = fsfilt_start(obd, dir, FSFILT_OP_UNLINK); if (IS_ERR(handle)) GOTO(out_unlink_cancel_transno, rc = PTR_ERR(handle)); rc = vfs_unlink(dir, dchild); @@ -494,7 +503,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, } rc = mds_finish_transno(mds, handle, req, rc); - err = mds_fs_commit(mds, dir, handle); + err = fsfilt_commit(obd, dir, handle); if (err) { CERROR("error on commit: err = %d\n", err); if (!rc) @@ -626,7 +635,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset, to_kdev_t(de_src->d_inode->i_sb->s_dev)); mds_start_transno(mds); - handle = mds_fs_start(mds, de_tgt_dir->d_inode, MDS_FSOP_LINK); + handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK); if (IS_ERR(handle)) { rc = PTR_ERR(handle); mds_finish_transno(mds, handle, req, rc); @@ -638,7 +647,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset, CERROR("link error %d\n", rc); rc = mds_finish_transno(mds, handle, req, rc); - err = mds_fs_commit(mds, de_tgt_dir->d_inode, handle); + err = fsfilt_commit(obd, de_tgt_dir->d_inode, handle); if (err) { CERROR("error on commit: err = %d\n", err); if (!rc) @@ -760,7 +769,7 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset, to_kdev_t(de_srcdir->d_inode->i_sb->s_dev)); mds_start_transno(mds); - handle = mds_fs_start(mds, de_tgtdir->d_inode, MDS_FSOP_RENAME); + handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME); if (IS_ERR(handle)) { rc = PTR_ERR(handle); mds_finish_transno(mds, handle, req, rc); @@ -774,7 +783,7 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset, rc = mds_finish_transno(mds, handle, req, rc); - err = mds_fs_commit(mds, de_tgtdir->d_inode, handle); + err = fsfilt_commit(obd, de_tgtdir->d_inode, handle); if (err) { CERROR("error on commit: err = %d\n", err); if (!rc) diff --git a/lustre/obdclass/Makefile.am b/lustre/obdclass/Makefile.am index d7df0bb..ed2b321 100644 --- a/lustre/obdclass/Makefile.am +++ b/lustre/obdclass/Makefile.am @@ -2,9 +2,19 @@ # lustre_build_version, or 'make -j2' breaks! DEFS= MODULE = obdclass -modulefs_DATA = lustre_build_version obdclass.o -EXTRA_PROGRAMS = obdclass + +if LINUX25 +FSMOD = fsfilt_ext3 +else +FSMOD = fsfilt_extN +endif + +modulefs_DATA = lustre_build_version obdclass.o $(FSMOD).o +EXTRA_PROGRAMS = obdclass $(FSMOD) + obdclass_SOURCES = debug.c genops.c class_obd.c sysctl.c uuid.c lprocfs_status.c +obdclass_SOURCES += fsfilt.c + include $(top_srcdir)/Rules lustre_build_version: perl $(top_srcdir)/scripts/version_tag.pl $(top_srcdir) > tmpver diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 420a3fb..03faf17 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -51,7 +51,7 @@ struct semaphore obd_conf_sem; /* serialize configuration commands */ struct obd_device obd_dev[MAX_OBD_DEVICES]; struct list_head obd_types; -unsigned long obd_memory; +atomic_t obd_memory; /* The following are visible and mutable through /proc/sys/lustre/. */ unsigned long obd_fail_loc; diff --git a/lustre/obdclass/fsfilt.c b/lustre/obdclass/fsfilt.c new file mode 100644 index 0000000..97a84df --- /dev/null +++ b/lustre/obdclass/fsfilt.c @@ -0,0 +1,110 @@ +#define EXPORT_SYMTAB +#define DEBUG_SUBSYSTEM S_FILTER + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LIST_HEAD(fsfilt_types); + +static struct fsfilt_operations *fsfilt_search_type(const char *type) +{ + struct fsfilt_operations *found; + struct list_head *p; + + list_for_each(p, &fsfilt_types) { + found = list_entry(p, struct fsfilt_operations, fs_list); + if (!strcmp(found->fs_type, type)) { + return found; + } + } + return NULL; +} + +int fsfilt_register_ops(struct fsfilt_operations *fs_ops) +{ + struct fsfilt_operations *found; + + /* lock fsfilt_types list */ + if ((found = fsfilt_search_type(fs_ops->fs_type))) { + if (found != fs_ops) { + CERROR("different operations for type %s\n", + fs_ops->fs_type); + /* unlock fsfilt_types list */ + RETURN(-EEXIST); + } + } else { + MOD_INC_USE_COUNT; + list_add(&fs_ops->fs_list, &fsfilt_types); + } + + /* unlock fsfilt_types list */ + return 0; +} + +void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops) +{ + struct list_head *p; + + /* lock fsfilt_types list */ + list_for_each(p, &fsfilt_types) { + struct fsfilt_operations *found; + + found = list_entry(p, typeof(*found), fs_list); + if (found == fs_ops) { + list_del(p); + MOD_DEC_USE_COUNT; + break; + } + } + /* unlock fsfilt_types list */ +} + +struct fsfilt_operations *fsfilt_get_ops(char *type) +{ + struct fsfilt_operations *fs_ops; + + /* lock fsfilt_types list */ + if (!(fs_ops = fsfilt_search_type(type))) { + char name[32]; + int rc; + + snprintf(name, sizeof(name) - 1, "fsfilt_%s", type); + name[sizeof(name) - 1] = '\0'; + + if ((rc = request_module(name))) { + fs_ops = fsfilt_search_type(type); + CDEBUG(D_INFO, "Loaded module '%s'\n", name); + if (!fs_ops) + rc = -ENOENT; + } + + if (rc) { + CERROR("Can't find fsfilt_%s interface\n", name); + RETURN(ERR_PTR(rc)); + /* unlock fsfilt_types list */ + } + } + __MOD_INC_USE_COUNT(fs_ops->fs_owner); + /* unlock fsfilt_types list */ + + return fs_ops; +} + +void fsfilt_put_ops(struct fsfilt_operations *fs_ops) +{ + __MOD_DEC_USE_COUNT(fs_ops->fs_owner); +} + + +EXPORT_SYMBOL(fsfilt_register_ops); +EXPORT_SYMBOL(fsfilt_unregister_ops); +EXPORT_SYMBOL(fsfilt_get_ops); +EXPORT_SYMBOL(fsfilt_put_ops); diff --git a/lustre/mds/mds_ext3.c b/lustre/obdclass/fsfilt_ext3.c similarity index 55% rename from lustre/mds/mds_ext3.c rename to lustre/obdclass/fsfilt_ext3.c index 7dede30..3878315 100644 --- a/lustre/mds/mds_ext3.c +++ b/lustre/obdclass/fsfilt_ext3.c @@ -1,8 +1,8 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * lustre/mds/mds_ext3.c - * Lustre Metadata Server (mds) journal abstraction routines + * lustre/lib/fsfilt_ext3.c + * Lustre filesystem abstraction routines * * Copyright (C) 2002 Cluster File Systems, Inc. * Author: Andreas Dilger @@ -23,7 +23,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#define DEBUG_SUBSYSTEM S_MDS +#define DEBUG_SUBSYSTEM S_FILTER #include #include @@ -31,20 +31,20 @@ #include #include #include -#include <../fs/ext3/xattr.h> +#include #include -#include +#include #include #include -static struct mds_fs_operations mds_ext3_fs_ops; -static kmem_cache_t *mcb_cache; -static int mcb_cache_count; +static kmem_cache_t *fcb_cache; +static int fcb_cache_count; -struct mds_cb_data { - struct journal_callback cb_jcb; - struct mds_obd *cb_mds; - __u64 cb_last_rcvd; +struct fsfilt_cb_data { + struct journal_callback cb_jcb; /* data private to jbd */ + fsfilt_cb_t cb_func; /* MDS/OBD completion function */ + struct obd_device *cb_obd; /* MDS/OBD completion device */ + __u64 cb_last_rcvd; /* MDS/OST last committed operation */ }; #define EXT3_XATTR_INDEX_LUSTRE 5 @@ -56,33 +56,33 @@ struct mds_cb_data { * the inode (which we will be changing anyways as part of this * transaction). */ -static void *mds_ext3_start(struct inode *inode, int op) +static void *fsfilt_ext3_start(struct inode *inode, int op) { /* For updates to the last recieved file */ int nblocks = EXT3_DATA_TRANS_BLOCKS; void *handle; switch(op) { - case MDS_FSOP_RMDIR: - case MDS_FSOP_UNLINK: + case FSFILT_OP_RMDIR: + case FSFILT_OP_UNLINK: nblocks += EXT3_DELETE_TRANS_BLOCKS; break; - case MDS_FSOP_RENAME: + case FSFILT_OP_RENAME: /* We may be modifying two directories */ nblocks += EXT3_DATA_TRANS_BLOCKS; - case MDS_FSOP_SYMLINK: + case FSFILT_OP_SYMLINK: /* Possible new block + block bitmap + GDT for long symlink */ nblocks += 3; - case MDS_FSOP_CREATE: - case MDS_FSOP_MKDIR: - case MDS_FSOP_MKNOD: + case FSFILT_OP_CREATE: + case FSFILT_OP_MKDIR: + case FSFILT_OP_MKNOD: /* New inode + block bitmap + GDT for new file */ nblocks += 3; - case MDS_FSOP_LINK: + case FSFILT_OP_LINK: /* Change parent directory */ nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS; break; - case MDS_FSOP_SETATTR: + case FSFILT_OP_SETATTR: /* Setattr on inode */ nblocks += 1; break; @@ -97,7 +97,7 @@ static void *mds_ext3_start(struct inode *inode, int op) return handle; } -static int mds_ext3_commit(struct inode *inode, void *handle) +static int fsfilt_ext3_commit(struct inode *inode, void *handle) { int rc; @@ -108,8 +108,8 @@ static int mds_ext3_commit(struct inode *inode, void *handle) return rc; } -static int mds_ext3_setattr(struct dentry *dentry, void *handle, - struct iattr *iattr) +static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle, + struct iattr *iattr) { struct inode *inode = dentry->d_inode; int rc; @@ -125,8 +125,8 @@ static int mds_ext3_setattr(struct dentry *dentry, void *handle, return rc; } -static int mds_ext3_set_md(struct inode *inode, void *handle, - struct lov_mds_md *lmm, int lmm_size) +static int fsfilt_ext3_set_md(struct inode *inode, void *handle, + void *lmm, int lmm_size) { int rc; @@ -138,14 +138,14 @@ static int mds_ext3_set_md(struct inode *inode, void *handle, up(&inode->i_sem); if (rc) { - CERROR("error adding objectid "LPX64" to inode %lu: %d\n", - lmm->lmm_object_id, inode->i_ino, rc); + CERROR("error adding MD data to inode %lu: rc = %d\n", + inode->i_ino, rc); if (rc != -ENOSPC) LBUG(); } return rc; } -static int mds_ext3_get_md(struct inode *inode, struct lov_mds_md *lmm,int size) +static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int size) { int rc; @@ -161,21 +161,17 @@ static int mds_ext3_get_md(struct inode *inode, struct lov_mds_md *lmm,int size) return (rc == -ENODATA) ? 0 : rc; if (rc < 0) { - CDEBUG(D_INFO, "error getting EA %s from MDS inode %lu: " + CDEBUG(D_INFO, "error getting EA %s from inode %lu: " "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc); memset(lmm, 0, size); return (rc == -ENODATA) ? 0 : rc; } - /* This field is byteswapped because it appears in the - * catalogue. All others are opaque to the MDS */ - lmm->lmm_object_id = le64_to_cpu(lmm->lmm_object_id); - return rc; } -static ssize_t mds_ext3_readpage(struct file *file, char *buf, size_t count, - loff_t *offset) +static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count, + loff_t *offset) { struct inode *inode = file->f_dentry->d_inode; int rc = 0; @@ -201,64 +197,39 @@ static ssize_t mds_ext3_readpage(struct file *file, char *buf, size_t count, return rc; } -static void mds_ext3_delete_inode(struct inode *inode) -{ - if (S_ISREG(inode->i_mode)) { - void *handle = mds_ext3_start(inode, MDS_FSOP_UNLINK); - - if (IS_ERR(handle)) { - CERROR("unable to start transaction"); - EXIT; - return; - } - if (mds_ext3_set_md(inode, handle, NULL, 0)) - CERROR("error clearing objid on %lu\n", inode->i_ino); - - if (mds_ext3_fs_ops.cl_delete_inode) - mds_ext3_fs_ops.cl_delete_inode(inode); - - if (mds_ext3_commit(inode, handle)) - CERROR("error closing handle on %lu\n", inode->i_ino); - } else - mds_ext3_fs_ops.cl_delete_inode(inode); -} - -static void mds_ext3_callback_status(struct journal_callback *jcb, int error) +static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error) { - struct mds_cb_data *mcb = (struct mds_cb_data *)jcb; + struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb; - CDEBUG(D_EXT2, "got callback for last_rcvd "LPD64": rc = %d\n", - mcb->cb_last_rcvd, error); - if (!error && mcb->cb_last_rcvd > mcb->cb_mds->mds_last_committed) - mcb->cb_mds->mds_last_committed = mcb->cb_last_rcvd; + fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error); - kmem_cache_free(mcb_cache, mcb); - --mcb_cache_count; + kmem_cache_free(fcb_cache, fcb); + --fcb_cache_count; } -static int mds_ext3_set_last_rcvd(struct mds_obd *mds, void *handle) +static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, + void *handle, fsfilt_cb_t cb_func) { - struct mds_cb_data *mcb; +#ifdef HAVE_JOURNAL_CALLBACK_STATUS + struct fsfilt_cb_data *fcb; - mcb = kmem_cache_alloc(mcb_cache, GFP_NOFS); - if (!mcb) + fcb = kmem_cache_alloc(fcb_cache, GFP_NOFS); + if (!fcb) RETURN(-ENOMEM); - ++mcb_cache_count; - mcb->cb_mds = mds; - mcb->cb_last_rcvd = mds->mds_last_rcvd; + ++fcb_cache_count; + fcb->cb_func = cb_func; + fcb->cb_obd = obd; + fcb->cb_last_rcvd = last_rcvd; -#ifdef HAVE_JOURNAL_CALLBACK_STATUS - CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", - mcb->cb_last_rcvd); + CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd); lock_kernel(); /* Note that an "incompatible pointer" warning here is OK for now */ - journal_callback_set(handle, mds_ext3_callback_status, - (struct journal_callback *)mcb); + journal_callback_set(handle, fsfilt_ext3_cb_func, + (struct journal_callback *)fcb); unlock_kernel(); #else #warning "no journal callback kernel patch, faking it..." - { static long next = 0; if (time_after(jiffies, next)) { @@ -266,13 +237,13 @@ static int mds_ext3_set_last_rcvd(struct mds_obd *mds, void *handle) next = jiffies + 300 * HZ; } - mds_ext3_callback_status((struct journal_callback *)mcb, 0); + cb_func(obd, last_rcvd, 0); #endif return 0; } -static int mds_ext3_journal_data(struct file *filp) +static int fsfilt_ext3_journal_data(struct file *filp) { struct inode *inode = filp->f_dentry->d_inode; @@ -288,7 +259,7 @@ static int mds_ext3_journal_data(struct file *filp) * * This can be removed when the ext3 EA code is fixed. */ -static int mds_ext3_statfs(struct super_block *sb, struct statfs *sfs) +static int fsfilt_ext3_statfs(struct super_block *sb, struct statfs *sfs) { int rc = vfs_statfs(sb, sfs); @@ -298,60 +269,59 @@ static int mds_ext3_statfs(struct super_block *sb, struct statfs *sfs) return rc; } -static struct mds_fs_operations mds_ext3_fs_ops = { +static struct fsfilt_operations fsfilt_ext3_ops = { + fs_type: "ext3", fs_owner: THIS_MODULE, - fs_start: mds_ext3_start, - fs_commit: mds_ext3_commit, - fs_setattr: mds_ext3_setattr, - fs_set_md: mds_ext3_set_md, - fs_get_md: mds_ext3_get_md, - fs_readpage: mds_ext3_readpage, - fs_delete_inode: mds_ext3_delete_inode, - cl_delete_inode: clear_inode, - fs_journal_data: mds_ext3_journal_data, - fs_set_last_rcvd: mds_ext3_set_last_rcvd, - fs_statfs: mds_ext3_statfs, + fs_start: fsfilt_ext3_start, + fs_commit: fsfilt_ext3_commit, + fs_setattr: fsfilt_ext3_setattr, + fs_set_md: fsfilt_ext3_set_md, + fs_get_md: fsfilt_ext3_get_md, + fs_readpage: fsfilt_ext3_readpage, + fs_journal_data: fsfilt_ext3_journal_data, + fs_set_last_rcvd: fsfilt_ext3_set_last_rcvd, + fs_statfs: fsfilt_ext3_statfs, }; -static int __init mds_ext3_init(void) +static int __init fsfilt_ext3_init(void) { int rc; //rc = ext3_xattr_register(); - mcb_cache = kmem_cache_create("mds_ext3_mcb", - sizeof(struct mds_cb_data), 0, + fcb_cache = kmem_cache_create("fsfilt_ext3_fcb", + sizeof(struct fsfilt_cb_data), 0, 0, NULL, NULL); - if (!mcb_cache) { - CERROR("error allocating MDS journal callback cache\n"); + if (!fcb_cache) { + CERROR("error allocating fsfilt journal callback cache\n"); GOTO(out, rc = -ENOMEM); } - rc = mds_register_fs_type(&mds_ext3_fs_ops, "ext3"); + rc = fsfilt_register_ops(&fsfilt_ext3_fs_ops); if (rc) - kmem_cache_destroy(mcb_cache); + kmem_cache_destroy(fcb_cache); out: return rc; } -static void __exit mds_ext3_exit(void) +static void __exit fsfilt_ext3_exit(void) { int rc; - mds_unregister_fs_type("ext3"); - rc = kmem_cache_destroy(mcb_cache); + fsfilt_unregister_ops(&fsfilt_ext3_fs_ops); + rc = kmem_cache_destroy(fcb_cache); - if (rc || mcb_cache_count) { - CERROR("can't free MDS callback cache: count %d, rc = %d\n", - mcb_cache_count, rc); + if (rc || fcb_cache_count) { + CERROR("can't free fsfilt callback cache: count %d, rc = %d\n", + fcb_cache_count, rc); } //rc = ext3_xattr_unregister(); } MODULE_AUTHOR("Cluster File Systems, Inc. "); -MODULE_DESCRIPTION("Lustre MDS ext3 Filesystem Helper v0.1"); +MODULE_DESCRIPTION("Lustre ext3 Filesystem Helper v0.1"); MODULE_LICENSE("GPL"); -module_init(mds_ext3_init); -module_exit(mds_ext3_exit); +module_init(fsfilt_ext3_init); +module_exit(fsfilt_ext3_exit); diff --git a/lustre/obdclass/fsfilt_extN.c b/lustre/obdclass/fsfilt_extN.c new file mode 100644 index 0000000..9b5a1f9 --- /dev/null +++ b/lustre/obdclass/fsfilt_extN.c @@ -0,0 +1,449 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/lib/fsfilt_extN.c + * Lustre filesystem abstraction routines + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Andreas Dilger + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_FILTER + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static kmem_cache_t *fcb_cache; +static int fcb_cache_count; + +struct fsfilt_cb_data { + struct journal_callback cb_jcb; /* data private to jbd */ + fsfilt_cb_t cb_func; /* MDS/OBD completion function */ + struct obd_device *cb_obd; /* MDS/OBD completion device */ + __u64 cb_last_rcvd; /* MDS/OST last committed operation */ +}; + +#define EXTN_XATTR_INDEX_LUSTRE 5 +#define XATTR_LUSTRE_MDS_OBJID "system.lustre_mds_objid" + +/* + * We don't currently need any additional blocks for rmdir and + * unlink transactions because we are storing the OST oa_id inside + * the inode (which we will be changing anyways as part of this + * transaction). + */ +static void *fsfilt_extN_start(struct inode *inode, int op) +{ + /* For updates to the last recieved file */ + int nblocks = EXTN_DATA_TRANS_BLOCKS; + void *handle; + + switch(op) { + case FSFILT_OP_RMDIR: + case FSFILT_OP_UNLINK: + nblocks += EXTN_DELETE_TRANS_BLOCKS; + break; + case FSFILT_OP_RENAME: + /* modify additional directory */ + nblocks += EXTN_DATA_TRANS_BLOCKS; + /* no break */ + case FSFILT_OP_SYMLINK: + /* additional block + block bitmap + GDT for long symlink */ + nblocks += 3; + /* no break */ + case FSFILT_OP_CREATE: + case FSFILT_OP_MKDIR: + case FSFILT_OP_MKNOD: + /* modify one inode + block bitmap + GDT */ + nblocks += 3; + /* no break */ + case FSFILT_OP_LINK: + /* modify parent directory */ + nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS; + break; + case FSFILT_OP_SETATTR: + /* Setattr on inode */ + nblocks += 1; + break; + default: CERROR("unknown transaction start op %d\n", op); + LBUG(); + } + + LASSERT(!current->journal_info); + lock_kernel(); + handle = journal_start(EXTN_JOURNAL(inode), nblocks); + unlock_kernel(); + + return handle; +} + +/* + * Calculate the number of buffer credits needed to write multiple pages in + * a single extN transaction. No, this shouldn't be here, but as yet extN + * doesn't have a nice API for calculating this sort of thing in advance. + * + * See comment above extN_writepage_trans_blocks for details. We assume + * no data journaling is being done, but it does allow for all of the pages + * being non-contiguous. If we are guaranteed contiguous pages we could + * reduce the number of (d)indirect blocks a lot. + * + * With N blocks per page and P pages, for each inode we have at most: + * N*P indirect + * min(N*P, blocksize/4 + 1) dindirect blocks + * niocount tindirect + * + * For the entire filesystem, we have at most: + * min(sum(nindir + P), ngroups) bitmap blocks (from the above) + * min(sum(nindir + P), gdblocks) group descriptor blocks (from the above) + * objcount inode blocks + * 1 superblock + * 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quota files + */ +static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso) +{ + struct super_block *sb = fso->fso_dentry->d_inode->i_sb; + int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); + int addrpp = EXTN_ADDR_PER_BLOCK(sb) * blockpp; + int nbitmaps = 0; + int ngdblocks = 0; + int needed = objcount + 1; + int i; + + for (i = 0; i < objcount; i++, fso++) { + int nblocks = fso->fso_bufcnt * blockpp; + int ndindirect = min(nblocks, addrpp + 1); + int nindir = nblocks + ndindirect + 1; + + nbitmaps += nindir + nblocks; + ngdblocks += nindir + nblocks; + + needed += nindir; + } + + /* Assumes extN and extN have same sb_info layout at the start. */ + if (nbitmaps > EXTN_SB(sb)->s_groups_count) + nbitmaps = EXTN_SB(sb)->s_groups_count; + if (ngdblocks > EXTN_SB(sb)->s_gdb_count) + ngdblocks = EXTN_SB(sb)->s_gdb_count; + + needed += nbitmaps + ngdblocks; + +#ifdef CONFIG_QUOTA + /* We assume that there will be 1 bit set in s_dquot.flags for each + * quota file that is active. This is at least true for now. + */ + needed += hweight32(sb_any_quota_enabled(sb)) * + EXTN_SINGLEDATA_TRANS_BLOCKS; +#endif + + return needed; +} + +/* We have to start a huge journal transaction here to hold all of the + * metadata for the pages being written here. This is necessitated by + * the fact that we do lots of prepare_write operations before we do + * any of the matching commit_write operations, so even if we split + * up to use "smaller" transactions none of them could complete until + * all of them were opened. By having a single journal transaction, + * we eliminate duplicate reservations for common blocks like the + * superblock and group descriptors or bitmaps. + * + * We will start the transaction here, but each prepare_write will + * add a refcount to the transaction, and each commit_write will + * remove a refcount. The transaction will be closed when all of + * the pages have been written. + */ +static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso, + int niocount, struct niobuf_remote *nb) +{ + journal_t *journal; + handle_t *handle; + int needed; + ENTRY; + + LASSERT(!current->journal_info); + journal = EXTN_SB(fso->fso_dentry->d_inode->i_sb)->s_journal; + needed = fsfilt_extN_credits_needed(objcount, fso); + + /* The number of blocks we could _possibly_ dirty can very large. + * We reduce our request if it is absurd (and we couldn't get that + * many credits for a single handle anyways). + * + * At some point we have to limit the size of I/Os sent at one time, + * increase the size of the journal, or we have to calculate the + * actual journal requirements more carefully by checking all of + * the blocks instead of being maximally pessimistic. It remains to + * be seen if this is a real problem or not. + */ + if (needed > journal->j_max_transaction_buffers) { + CERROR("want too many journal credits (%d) using %d instead\n", + needed, journal->j_max_transaction_buffers); + needed = journal->j_max_transaction_buffers; + } + + lock_kernel(); + handle = journal_start(journal, needed); + unlock_kernel(); + if (IS_ERR(handle)) + CERROR("can't get handle for %d credits: rc = %ld\n", needed, + PTR_ERR(handle)); + + RETURN(handle); +} + +static int fsfilt_extN_commit(struct inode *inode, void *handle) +{ + int rc; + + lock_kernel(); + rc = journal_stop((handle_t *)handle); + unlock_kernel(); + + return rc; +} + +static int fsfilt_extN_setattr(struct dentry *dentry, void *handle, + struct iattr *iattr) +{ + struct inode *inode = dentry->d_inode; + int rc; + + lock_kernel(); + if (inode->i_op->setattr) + rc = inode->i_op->setattr(dentry, iattr); + else + rc = inode_setattr(inode, iattr); + + unlock_kernel(); + + return rc; +} + +static int fsfilt_extN_set_md(struct inode *inode, void *handle, + void *lmm, int lmm_size) +{ + int rc; + + down(&inode->i_sem); + lock_kernel(); + rc = extN_xattr_set(handle, inode, EXTN_XATTR_INDEX_LUSTRE, + XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0); + unlock_kernel(); + up(&inode->i_sem); + + if (rc) { + CERROR("error adding MD data to inode %lu: rc = %d\n", + inode->i_ino, rc); + if (rc != -ENOSPC) LBUG(); + } + return rc; +} + +static int fsfilt_extN_get_md(struct inode *inode, void *lmm, int size) +{ + int rc; + + down(&inode->i_sem); + lock_kernel(); + rc = extN_xattr_get(inode, EXTN_XATTR_INDEX_LUSTRE, + XATTR_LUSTRE_MDS_OBJID, lmm, size); + unlock_kernel(); + up(&inode->i_sem); + + /* This gives us the MD size */ + if (lmm == NULL) + return (rc == -ENODATA) ? 0 : rc; + + if (rc < 0) { + CDEBUG(D_INFO, "error getting EA %s from inode %lu: " + "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc); + memset(lmm, 0, size); + return (rc == -ENODATA) ? 0 : rc; + } + + return rc; +} + +static ssize_t fsfilt_extN_readpage(struct file *file, char *buf, size_t count, + loff_t *offset) +{ + struct inode *inode = file->f_dentry->d_inode; + int rc = 0; + + if (S_ISREG(inode->i_mode)) + rc = file->f_op->read(file, buf, count, offset); + else { + struct buffer_head *bh; + + /* FIXME: this assumes the blocksize == count, but the calling + * function will detect this as an error for now */ + bh = extN_bread(NULL, inode, + *offset >> inode->i_sb->s_blocksize_bits, + 0, &rc); + + if (bh) { + memcpy(buf, bh->b_data, inode->i_blksize); + brelse(bh); + rc = inode->i_blksize; + } + } + + return rc; +} + +static void fsfilt_extN_cb_func(struct journal_callback *jcb, int error) +{ + struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb; + + fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error); + + kmem_cache_free(fcb_cache, fcb); + --fcb_cache_count; +} + +static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, + void *handle, fsfilt_cb_t cb_func) +{ +#ifdef HAVE_JOURNAL_CALLBACK_STATUS + struct fsfilt_cb_data *fcb; + + fcb = kmem_cache_alloc(fcb_cache, GFP_NOFS); + if (!fcb) + RETURN(-ENOMEM); + + ++fcb_cache_count; + fcb->cb_func = cb_func; + fcb->cb_obd = obd; + fcb->cb_last_rcvd = last_rcvd; + + CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd); + lock_kernel(); + /* Note that an "incompatible pointer" warning here is OK for now */ + journal_callback_set(handle, fsfilt_extN_cb_func, + (struct journal_callback *)fcb); + unlock_kernel(); +#else +#warning "no journal callback kernel patch, faking it..." + static long next = 0; + + if (time_after(jiffies, next)) { + CERROR("no journal callback kernel patch, faking it...\n"); + next = jiffies + 300 * HZ; + } + + cb_func(obd, last_rcvd, 0); +#endif + + return 0; +} + +static int fsfilt_extN_journal_data(struct file *filp) +{ + struct inode *inode = filp->f_dentry->d_inode; + + EXTN_I(inode)->i_flags |= EXTN_JOURNAL_DATA_FL; + + return 0; +} + +/* + * We need to hack the return value for the free inode counts because + * the current EA code requires one filesystem block per inode with EAs, + * so it is possible to run out of blocks before we run out of inodes. + * + * This can be removed when the extN EA code is fixed. + */ +static int fsfilt_extN_statfs(struct super_block *sb, struct obd_statfs *osfs) +{ + struct statfs sfs; + int rc = vfs_statfs(sb, &sfs); + + if (!rc && sfs.f_bfree < sfs.f_ffree) + sfs.f_ffree = sfs.f_bfree; + + statfs_pack(osfs, &sfs); + return rc; +} + +static struct fsfilt_operations fsfilt_extN_ops = { + fs_type: "extN", + fs_owner: THIS_MODULE, + fs_start: fsfilt_extN_start, + fs_brw_start: fsfilt_extN_brw_start, + fs_commit: fsfilt_extN_commit, + fs_setattr: fsfilt_extN_setattr, + fs_set_md: fsfilt_extN_set_md, + fs_get_md: fsfilt_extN_get_md, + fs_readpage: fsfilt_extN_readpage, + fs_journal_data: fsfilt_extN_journal_data, + fs_set_last_rcvd: fsfilt_extN_set_last_rcvd, + fs_statfs: fsfilt_extN_statfs, +}; + +static int __init fsfilt_extN_init(void) +{ + int rc; + + //rc = extN_xattr_register(); + fcb_cache = kmem_cache_create("fsfilt_extN_fcb", + sizeof(struct fsfilt_cb_data), 0, + 0, NULL, NULL); + if (!fcb_cache) { + CERROR("error allocating fsfilt journal callback cache\n"); + GOTO(out, rc = -ENOMEM); + } + + rc = fsfilt_register_ops(&fsfilt_extN_ops); + + if (rc) + kmem_cache_destroy(fcb_cache); +out: + return rc; +} + +static void __exit fsfilt_extN_exit(void) +{ + int rc; + + fsfilt_unregister_ops(&fsfilt_extN_ops); + rc = kmem_cache_destroy(fcb_cache); + + if (rc || fcb_cache_count) { + CERROR("can't free fsfilt callback cache: count %d, rc = %d\n", + fcb_cache_count, rc); + } + + //rc = extN_xattr_unregister(); +} + +MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_DESCRIPTION("Lustre extN Filesystem Helper v0.1"); +MODULE_LICENSE("GPL"); + +module_init(fsfilt_extN_init); +module_exit(fsfilt_extN_exit); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 926991a..8a0ed36 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -84,25 +84,44 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars, ENTRY; + LASSERT (strnlen (nm, 1024) < 1024); /* sanity check */ + if (class_search_type(nm)) { CDEBUG(D_IOCTL, "Type %s already registered\n", nm); RETURN(-EEXIST); } + rc = -ENOMEM; OBD_ALLOC(type, sizeof(*type)); + if (type == NULL) + RETURN(rc); + OBD_ALLOC(type->typ_ops, sizeof(*type->typ_ops)); OBD_ALLOC(type->typ_name, strlen(nm) + 1); - if (!type) - RETURN(-ENOMEM); - INIT_LIST_HEAD(&type->typ_chain); + if (type->typ_ops == NULL || + type->typ_name == NULL) + GOTO (failed, rc); + + *(type->typ_ops) = *ops; + strcpy(type->typ_name, nm); + list_add(&type->typ_chain, &obd_types); + + rc = lprocfs_reg_class(type, vars, type); + if (rc != 0) { + list_del (&type->typ_chain); + GOTO (failed, rc); + } + CDEBUG(D_INFO, "MOD_INC_USE for register_type: count = %d\n", atomic_read(&(THIS_MODULE)->uc.usecount)); MOD_INC_USE_COUNT; - list_add(&type->typ_chain, &obd_types); - memcpy(type->typ_ops, ops, sizeof(*type->typ_ops)); - strcpy(type->typ_name, nm); - rc = lprocfs_reg_class(type, vars, type); + RETURN (0); + failed: + if (type->typ_ops != NULL) + OBD_FREE (type->typ_name, strlen (nm) + 1); + if (type->typ_ops != NULL) + OBD_FREE (type->typ_ops, sizeof (*type->typ_ops)); RETURN(rc); } diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index 4ccc043..3d2f222 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -57,8 +57,10 @@ static int echo_iocontrol(unsigned int cmd, struct lustre_handle *obdconn, int l case OBD_IOC_CREATE: { struct lov_stripe_md *tmp_lsm = NULL; rc = obd_create(&ec->conn, &data->ioc_obdo1, &tmp_lsm); - if (lsm) + if (lsm && tmp_lsm ) { memcpy(lsm, tmp_lsm, sizeof(*tmp_lsm)); + data->ioc_conn2 = 1; + } GOTO(out, rc); } diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 31047b5..a370e56 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -23,24 +23,25 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +/* + * Invariant: get O/R i_sem for lookup, if needed, before any journal ops + * (which need to get journal_lock, may block if journal full). + */ + #define EXPORT_SYMTAB #define DEBUG_SUBSYSTEM S_FILTER +#include #include -#include +#include // XXX kill me soon #include #include #include #include #include -#include -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include -#endif -#include #include #include -#include +#include #include extern struct lprocfs_vars status_class_var[]; @@ -49,9 +50,6 @@ extern struct lprocfs_vars status_var_nm_1[]; static kmem_cache_t *filter_open_cache; static kmem_cache_t *filter_dentry_cache; -#define FILTER_ROOTINO 2 -#define FILTER_ROOTINO_STR __stringify(FILTER_ROOTINO) - #define S_SHIFT 12 static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = { [0] NULL, @@ -96,13 +94,14 @@ struct dentry_operations filter_dops = { .d_release = filter_drelease, }; +#define LAST_RCVD "last_rcvd" + /* setup the object store with correct subdirectories */ static int filter_prep(struct obd_device *obd) { struct obd_run_ctxt saved; struct filter_obd *filter = &obd->u.filter; struct dentry *dentry; - struct dentry *root; struct file *file; struct inode *inode; int rc = 0; @@ -118,58 +117,33 @@ static int filter_prep(struct obd_device *obd) GOTO(out, rc); } filter->fo_dentry_O = dentry; - dentry = simple_mkdir(current->fs->pwd, "P", 0700); - CDEBUG(D_INODE, "got/created P: %p\n", dentry); - if (IS_ERR(dentry)) { - rc = PTR_ERR(dentry); - CERROR("cannot open/create P: rc = %d\n", rc); - GOTO(out_O, rc); - } - f_dput(dentry); - dentry = simple_mkdir(current->fs->pwd, "D", 0700); - CDEBUG(D_INODE, "got/created D: %p\n", dentry); - if (IS_ERR(dentry)) { - rc = PTR_ERR(dentry); - CERROR("cannot open/create D: rc = %d\n", rc); - GOTO(out_O, rc); - } - - root = simple_mknod(dentry, FILTER_ROOTINO_STR, S_IFREG | 0755); - f_dput(dentry); - if (IS_ERR(root)) { - rc = PTR_ERR(root); - CERROR("OBD filter: cannot open/create root %d: rc = %d\n", - FILTER_ROOTINO, rc); - GOTO(out_O, rc); - } - f_dput(root); /* * Create directories and/or get dentries for each object type. * This saves us from having to do multiple lookups for each one. */ for (mode = 0; mode < (S_IFMT >> S_SHIFT); mode++) { - char *type = obd_type_by_mode[mode]; + char *name = obd_type_by_mode[mode]; - if (!type) { + if (!name) { filter->fo_dentry_O_mode[mode] = NULL; continue; } - dentry = simple_mkdir(filter->fo_dentry_O, type, 0700); - CDEBUG(D_INODE, "got/created O/%s: %p\n", type, dentry); + dentry = simple_mkdir(filter->fo_dentry_O, name, 0700); + CDEBUG(D_INODE, "got/created O/%s: %p\n", name, dentry); if (IS_ERR(dentry)) { rc = PTR_ERR(dentry); - CERROR("cannot create O/%s: rc = %d\n", type, rc); + CERROR("cannot create O/%s: rc = %d\n", name, rc); GOTO(out_O_mode, rc); } filter->fo_dentry_O_mode[mode] = dentry; } - file = filp_open("D/status", O_RDWR | O_CREAT, 0700); + file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700); if ( !file || IS_ERR(file) ) { rc = PTR_ERR(file); - CERROR("OBD filter: cannot open/create status %s: rc = %d\n", - "D/status", rc); + CERROR("OBD filter: cannot open/create %s: rc = %d\n", + LAST_RCVD, rc); GOTO(out_O_mode, rc); } @@ -210,7 +184,7 @@ static int filter_prep(struct obd_device *obd) return(rc); -out_O_mode: + out_O_mode: while (mode-- > 0) { struct dentry *dentry = filter->fo_dentry_O_mode[mode]; if (dentry) { @@ -218,7 +192,6 @@ out_O_mode: filter->fo_dentry_O_mode[mode] = NULL; } } -out_O: f_dput(filter->fo_dentry_O); filter->fo_dentry_O = NULL; goto out; @@ -235,9 +208,9 @@ static void filter_post(struct obd_device *obd) int mode; push_ctxt(&saved, &filter->fo_ctxt, NULL); - file = filp_open("D/status", O_RDWR | O_CREAT, 0700); + file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700); if (IS_ERR(file)) { - CERROR("OBD filter: cannot create status file\n"); + CERROR("OBD filter: cannot create %s\n", LAST_RCVD); goto out; } @@ -273,7 +246,6 @@ static __u64 filter_next_id(struct obd_device *obd) id = ++obd->u.filter.fo_lastobjid; spin_unlock(&obd->u.filter.fo_objidlock); - /* FIXME: write the lastobjid to disk here */ return id; } @@ -281,7 +253,7 @@ static __u64 filter_next_id(struct obd_device *obd) /* parent i_sem is already held if needed for exclusivity */ static struct dentry *filter_fid2dentry(struct obd_device *obd, struct dentry *dparent, - __u64 id, __u32 type, int locked) + __u64 id, int locked) { struct super_block *sb = obd->u.filter.fo_sb; struct dentry *dchild; @@ -295,20 +267,14 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd, } if (id == 0) { - CERROR("fatal: invalid object #0\n"); + CERROR("fatal: invalid object id 0\n"); LBUG(); RETURN(ERR_PTR(-ESTALE)); } - if (!(type & S_IFMT)) { - CERROR("OBD %s, object "LPU64" has bad type: %o\n", - __FUNCTION__, id, type); - RETURN(ERR_PTR(-EINVAL)); - } - len = sprintf(name, LPU64, id); - CDEBUG(D_INODE, "opening object O/%s/%s\n", obd_mode_to_type(type), - name); + CDEBUG(D_INODE, "opening object O/%*s/%s\n", + dparent->d_name.len, dparent->d_name.name, name); if (!locked) down(&dparent->d_inode->i_sem); dchild = lookup_one_len(name, dparent, len); @@ -319,8 +285,8 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd, RETURN(dchild); } - CDEBUG(D_INODE, "got child obj O/%s/%s: %p, count = %d\n", - obd_mode_to_type(type), name, dchild, + CDEBUG(D_INODE, "got child obj O/%*s/%s: %p, count = %d\n", + dparent->d_name.len, dparent->d_name.name, name, dchild, atomic_read(&dchild->d_count)); LASSERT(atomic_read(&dchild->d_count) > 0); @@ -333,6 +299,7 @@ static inline struct dentry *filter_parent(struct obd_device *obd, { struct filter_obd *filter = &obd->u.filter; + LASSERT((mode & S_IFMT) == S_IFREG); /* only regular files for now */ return filter->fo_dentry_O_mode[(mode & S_IFMT) >> S_SHIFT]; } @@ -453,7 +420,6 @@ static int filter_destroy_internal(struct obd_device *obd, push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); rc = vfs_unlink(dir_dentry->d_inode, object_dentry); - /* XXX unlink from PENDING directory now too */ pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); if (rc) @@ -483,7 +449,10 @@ static int filter_close_internal(struct obd_device *obd, struct dentry *dir_dentry = filter_parent(obd, S_IFREG); down(&dir_dentry->d_inode->i_sem); + /* XXX start transaction */ + /* XXX unlink from PENDING directory now too */ rc2 = filter_destroy_internal(obd, dir_dentry, object_dentry); + /* XXX finish transaction */ if (rc2 && !rc) rc = rc2; up(&dir_dentry->d_inode->i_sem); @@ -496,126 +465,70 @@ static int filter_close_internal(struct obd_device *obd, } /* obd methods */ -static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, - obd_uuid_t cluuid, struct recovd_obd *recovd, - ptlrpc_recovery_cb_t recover) -{ - struct obd_export *exp; - int rc; - - ENTRY; - MOD_INC_USE_COUNT; - rc = class_connect(conn, obd, cluuid); - if (rc) - GOTO(out_dec, rc); - exp = class_conn2export(conn); - LASSERT(exp); - - INIT_LIST_HEAD(&exp->exp_filter_data.fed_open_head); - spin_lock_init(&exp->exp_filter_data.fed_lock); -out: - RETURN(rc); - -out_dec: - MOD_DEC_USE_COUNT; - goto out; -} - -static int filter_disconnect(struct lustre_handle *conn) -{ - struct obd_export *exp = class_conn2export(conn); - struct filter_export_data *fed; - int rc; - ENTRY; - - LASSERT(exp); - fed = &exp->exp_filter_data; - spin_lock(&fed->fed_lock); - while (!list_empty(&fed->fed_open_head)) { - struct filter_file_data *ffd; - - ffd = list_entry(fed->fed_open_head.next, typeof(*ffd), - ffd_export_list); - list_del(&ffd->ffd_export_list); - spin_unlock(&fed->fed_lock); - - CERROR("force closing file %*s on disconnect\n", - ffd->ffd_file->f_dentry->d_name.len, - ffd->ffd_file->f_dentry->d_name.name); - - filter_close_internal(exp->exp_obd, ffd); - spin_lock(&fed->fed_lock); - } - spin_unlock(&fed->fed_lock); - - ldlm_cancel_locks_for_export(exp); - rc = class_disconnect(conn); - if (!rc) - MOD_DEC_USE_COUNT; - - /* XXX cleanup preallocated inodes */ - RETURN(rc); -} - /* mount the file system (secretly) */ static int filter_setup(struct obd_device *obd, obd_count len, void *buf) { struct obd_ioctl_data* data = buf; struct filter_obd *filter; struct vfsmount *mnt; - int err = 0; + int rc = 0; ENTRY; + MOD_INC_USE_COUNT; if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2) - RETURN(-EINVAL); + GOTO(err_dec, rc = -EINVAL); + + obd->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2); + if (IS_ERR(obd->obd_fsops)) + GOTO(err_dec, rc = PTR_ERR(obd->obd_fsops)); - MOD_INC_USE_COUNT; mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, NULL); - err = PTR_ERR(mnt); + rc = PTR_ERR(mnt); if (IS_ERR(mnt)) - GOTO(err_dec, err); + GOTO(err_ops, rc); filter = &obd->u.filter;; filter->fo_vfsmnt = mnt; filter->fo_fstype = strdup(data->ioc_inlbuf2); filter->fo_sb = mnt->mnt_root->d_inode->i_sb; - CERROR("%s: mnt is %p\n", data->ioc_inlbuf1, filter->fo_vfsmnt); - /* XXX is this even possible if do_kern_mount succeeded? */ - if (!filter->fo_sb) - GOTO(err_kfree, err = -ENODEV); + CDEBUG(D_SUPER, "%s: mnt = %p\n", data->ioc_inlbuf1, mnt); OBD_SET_CTXT_MAGIC(&filter->fo_ctxt); filter->fo_ctxt.pwdmnt = mnt; filter->fo_ctxt.pwd = mnt->mnt_root; filter->fo_ctxt.fs = get_ds(); - err = filter_prep(obd); - if (err) - GOTO(err_kfree, err); + rc = filter_prep(obd); + if (rc) + GOTO(err_kfree, rc); + spin_lock_init(&filter->fo_fddlock); spin_lock_init(&filter->fo_objidlock); INIT_LIST_HEAD(&filter->fo_export_list); obd->obd_namespace = ldlm_namespace_new("filter-tgt", LDLM_NAMESPACE_SERVER); - if (obd->obd_namespace == NULL) - LBUG(); + if (!obd->obd_namespace) + GOTO(err_post, rc = -ENOMEM); ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, "filter_ldlm_cb_client", &obd->obd_ldlm_client); RETURN(0); +err_post: + filter_post(obd); err_kfree: kfree(filter->fo_fstype); unlock_kernel(); mntput(filter->fo_vfsmnt); filter->fo_sb = 0; lock_kernel(); - +err_ops: + fsfilt_put_ops(obd->obd_fsops); err_dec: MOD_DEC_USE_COUNT; - return err; + return rc; } @@ -646,6 +559,7 @@ static int filter_cleanup(struct obd_device *obd) mntput(obd->u.filter.fo_vfsmnt); obd->u.filter.fo_sb = 0; kfree(obd->u.filter.fo_fstype); + fsfilt_put_ops(obd->obd_fsops); lock_kernel(); @@ -653,6 +567,76 @@ static int filter_cleanup(struct obd_device *obd) RETURN(0); } +int filter_attach(struct obd_device *dev, obd_count len, void *data) +{ + return lprocfs_reg_obd(dev, status_var_nm_1, dev); +} + +int filter_detach(struct obd_device *dev) +{ + return lprocfs_dereg_obd(dev); +} + +static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, + obd_uuid_t cluuid, struct recovd_obd *recovd, + ptlrpc_recovery_cb_t recover) +{ + struct obd_export *exp; + int rc; + + ENTRY; + MOD_INC_USE_COUNT; + rc = class_connect(conn, obd, cluuid); + if (rc) + GOTO(out_dec, rc); + exp = class_conn2export(conn); + LASSERT(exp); + + INIT_LIST_HEAD(&exp->exp_filter_data.fed_open_head); + spin_lock_init(&exp->exp_filter_data.fed_lock); +out: + RETURN(rc); + +out_dec: + MOD_DEC_USE_COUNT; + goto out; +} + +static int filter_disconnect(struct lustre_handle *conn) +{ + struct obd_export *exp = class_conn2export(conn); + struct filter_export_data *fed; + int rc; + ENTRY; + + LASSERT(exp); + fed = &exp->exp_filter_data; + spin_lock(&fed->fed_lock); + while (!list_empty(&fed->fed_open_head)) { + struct filter_file_data *ffd; + + ffd = list_entry(fed->fed_open_head.next, typeof(*ffd), + ffd_export_list); + list_del(&ffd->ffd_export_list); + spin_unlock(&fed->fed_lock); + + CERROR("force closing file %*s on disconnect\n", + ffd->ffd_file->f_dentry->d_name.len, + ffd->ffd_file->f_dentry->d_name.name); + + filter_close_internal(exp->exp_obd, ffd); + spin_lock(&fed->fed_lock); + } + spin_unlock(&fed->fed_lock); + + ldlm_cancel_locks_for_export(exp); + rc = class_disconnect(conn); + if (!rc) + MOD_DEC_USE_COUNT; + + /* XXX cleanup preallocated inodes */ + RETURN(rc); +} static void filter_from_inode(struct obdo *oa, struct inode *inode, int valid) { @@ -714,7 +698,7 @@ static struct dentry *__filter_oa2dentry(struct lustre_handle *conn, RETURN(ERR_PTR(-EINVAL)); } dentry = filter_fid2dentry(obd, filter_parent(obd, oa->o_mode), - oa->o_id, oa->o_mode, locked); + oa->o_id, locked); } if (IS_ERR(dentry)) { @@ -751,6 +735,7 @@ static int filter_getattr(struct lustre_handle *conn, struct obdo *oa, RETURN(rc); } +/* this is called from filter_truncate() until we have filter_punch() */ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *md) { @@ -771,21 +756,26 @@ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa, iattr.ia_mode = (iattr.ia_mode & ~S_IFMT) | S_IFREG; inode = dentry->d_inode; + push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); lock_kernel(); if (iattr.ia_valid & ATTR_SIZE) down(&inode->i_sem); - push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); + + /* XXX start transaction */ if (inode->i_op->setattr) rc = inode->i_op->setattr(dentry, &iattr); else rc = inode_setattr(inode, &iattr); - pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); + /* XXX update last_rcvd, finish transaction */ + if (iattr.ia_valid & ATTR_SIZE) { up(&inode->i_sem); oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLCTIME | OBD_MD_FLMTIME; obdo_from_inode(oa, inode, oa->o_valid); } + unlock_kernel(); + pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); f_dput(dentry); RETURN(rc); @@ -865,10 +855,11 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md **ea) { struct obd_device *obd = class_conn2obd(conn); - char name[64]; struct obd_run_ctxt saved; + struct dentry *dir_dentry; struct dentry *new; struct iattr; + int rc; ENTRY; if (!obd) { @@ -876,31 +867,42 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa, return -EINVAL; } - if (!(oa->o_mode & S_IFMT)) { - CERROR("OBD %s, object "LPU64" has bad type: %o\n", - __FUNCTION__, oa->o_id, oa->o_mode); - return -ENOENT; - } - oa->o_id = filter_next_id(obd); - //filter_id(name, oa->o_id, oa->o_mode); - sprintf(name, LPU64, oa->o_id); push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); - new = simple_mknod(filter_parent(obd, oa->o_mode), name, oa->o_mode); - pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); - if (IS_ERR(new)) { - CERROR("Error mknod obj %s, err %ld\n", name, PTR_ERR(new)); - return -ENOENT; + dir_dentry = filter_parent(obd, oa->o_mode); + down(&dir_dentry->d_inode->i_sem); + new = filter_fid2dentry(obd, dir_dentry, oa->o_id, 1); + if (IS_ERR(new)) + GOTO(out, rc = PTR_ERR(new)); + + if (new->d_inode) { + /* This would only happen if lastobjid was bad on disk */ + CERROR("objid O/%*s/"LPU64" already exists\n", + dir_dentry->d_name.len, dir_dentry->d_name.name, + oa->o_id); + LBUG(); + GOTO(out, rc = -EEXIST); } + /* XXX start transaction */ + rc = vfs_create(dir_dentry->d_inode, new, oa->o_mode); + if (rc) + GOTO(out_put, rc); + /* XXX update last_rcvd+lastobjid on disk, finish transaction */ + /* Set flags for fields we have set in the inode struct */ oa->o_valid = OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME; filter_from_inode(oa, new->d_inode, oa->o_valid); - f_dput(new); - return 0; + EXIT; +out_put: + f_dput(new); +out: + up(&dir_dentry->d_inode->i_sem); + pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); + return rc; } static int filter_destroy(struct lustre_handle *conn, struct obdo *oa, @@ -927,6 +929,7 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa, GOTO(out, rc = -ENOENT); fdd = object_dentry->d_fsdata; + /* XXX start transaction */ if (fdd && atomic_read(&fdd->fdd_open_count)) { if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) { fdd->fdd_flags |= FILTER_FLAG_DESTROY; @@ -943,6 +946,7 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa, rc = filter_destroy_internal(obd, dir_dentry, object_dentry); out_dput: + /* XXX update last_rcvd on disk, finish transaction */ f_dput(object_dentry); EXIT; @@ -951,7 +955,7 @@ out: return rc; } -/* NB count and offset are used for punch, but not truncate */ +/* NB start and end are used for punch, but not truncate */ static int filter_truncate(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *lsm, obd_off start, obd_off end) @@ -969,266 +973,6 @@ static int filter_truncate(struct lustre_handle *conn, struct obdo *oa, RETURN(error); } -static int filter_pgcache_brw(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *lsm, obd_count oa_bufs, - struct brw_page *pga, struct obd_brw_set *set) -{ - struct obd_export *export = class_conn2export(conn); - struct obd_run_ctxt saved; - struct super_block *sb; - int pnum; /* index to pages (bufs) */ - unsigned long retval; - int error; - struct file *file; - int pg; - ENTRY; - - if (!export) { - CDEBUG(D_IOCTL, "invalid client "LPX64"\n", conn->addr); - RETURN(-EINVAL); - } - - sb = export->exp_obd->u.filter.fo_sb; - push_ctxt(&saved, &export->exp_obd->u.filter.fo_ctxt, NULL); - pnum = 0; /* pnum indexes buf 0..num_pages */ - - file = filter_obj_open(export, lsm->lsm_object_id, S_IFREG); - if (IS_ERR(file)) - GOTO(out, retval = PTR_ERR(file)); - - /* count doubles as retval */ - for (pg = 0; pg < oa_bufs; pg++) { - CDEBUG(D_INODE, "OP %d inode %lu pgno: (%d) "LPU64 - ") off count ("LPU64",%d)\n", - cmd, file->f_dentry->d_inode->i_ino, pnum, - pga[pnum].off >> PAGE_CACHE_SHIFT, pga[pnum].off, - (int)pga[pnum].count); - if (cmd & OBD_BRW_WRITE) { - loff_t off; - char *buffer; - off = pga[pnum].off; - buffer = kmap(pga[pnum].pg); - retval = file->f_op->write(file, buffer, - pga[pnum].count, - &off); - kunmap(pga[pnum].pg); - CDEBUG(D_INODE, "retval %ld\n", retval); - } else { - loff_t off = pga[pnum].off; - char *buffer = kmap(pga[pnum].pg); - - if (off >= file->f_dentry->d_inode->i_size) { - memset(buffer, 0, pga[pnum].count); - retval = pga[pnum].count; - } else { - retval = file->f_op->read(file, buffer, - pga[pnum].count, &off); - } - kunmap(pga[pnum].pg); - - if (retval != pga[pnum].count) { - filp_close(file, 0); - GOTO(out, retval = -EIO); - } - CDEBUG(D_INODE, "retval %ld\n", retval); - } - pnum++; - } - /* sizes and blocks are set by generic_file_write */ - /* ctimes/mtimes will follow with a setattr call */ - filp_close(file, 0); - - /* XXX: do something with callback if it is set? */ - - EXIT; -out: - pop_ctxt(&saved, &export->exp_obd->u.filter.fo_ctxt, NULL); - error = (retval >= 0) ? 0 : retval; - return error; -} - -/* - * Calculate the number of buffer credits needed to write multiple pages in - * a single ext3/extN transaction. No, this shouldn't be here, but as yet - * ext3 doesn't have a nice API for calculating this sort of thing in advance. - * - * See comment above ext3_writepage_trans_blocks for details. We assume - * no data journaling is being done, but it does allow for all of the pages - * being non-contiguous. If we are guaranteed contiguous pages we could - * reduce the number of (d)indirect blocks a lot. - * - * With N blocks per page and P pages, for each inode we have at most: - * N*P indirect - * min(N*P, blocksize/4 + 1) dindirect blocks - * 1 tindirect - * - * For the entire filesystem, we have at most: - * min(sum(nindir + P), ngroups) bitmap blocks (from the above) - * min(sum(nindir + P), gdblocks) group descriptor blocks (from the above) - * 1 inode block - * 1 superblock - * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quota files - */ -static int ext3_credits_needed(struct super_block *sb, int objcount, - struct obd_ioobj *obj) -{ - struct obd_ioobj *o = obj; - int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); - int addrpp = EXT3_ADDR_PER_BLOCK(sb) * blockpp; - int nbitmaps = 0; - int ngdblocks = 0; - int needed = objcount + 1; - int i; - - for (i = 0; i < objcount; i++, o++) { - int nblocks = o->ioo_bufcnt * blockpp; - int ndindirect = min(nblocks, addrpp + 1); - int nindir = nblocks + ndindirect + 1; - - nbitmaps += nindir + nblocks; - ngdblocks += nindir + nblocks; - - needed += nindir; - } - - /* Assumes ext3 and extN have same sb_info layout at the start. */ - if (nbitmaps > EXT3_SB(sb)->s_groups_count) - nbitmaps = EXT3_SB(sb)->s_groups_count; - if (ngdblocks > EXT3_SB(sb)->s_gdb_count) - ngdblocks = EXT3_SB(sb)->s_gdb_count; - - needed += nbitmaps + ngdblocks; - -#ifdef CONFIG_QUOTA - /* We assume that there will be 1 bit set in s_dquot.flags for each - * quota file that is active. This is at least true for now. - */ - needed += hweight32(sb_any_quota_enabled(sb)) * - EXT3_SINGLEDATA_TRANS_BLOCKS; -#endif - - return needed; -} - -/* We have to start a huge journal transaction here to hold all of the - * metadata for the pages being written here. This is necessitated by - * the fact that we do lots of prepare_write operations before we do - * any of the matching commit_write operations, so even if we split - * up to use "smaller" transactions none of them could complete until - * all of them were opened. By having a single journal transaction, - * we eliminate duplicate reservations for common blocks like the - * superblock and group descriptors or bitmaps. - * - * We will start the transaction here, but each prepare_write will - * add a refcount to the transaction, and each commit_write will - * remove a refcount. The transaction will be closed when all of - * the pages have been written. - */ -static void *ext3_filter_journal_start(struct filter_obd *filter, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *nb) -{ - journal_t *journal = NULL; - handle_t *handle = NULL; - int needed; - - /* It appears that some kernels have different values for - * EXT*_MAX_GROUP_LOADED (either 8 or 32), so we cannot - * assume anything after s_inode_bitmap_number is the same. - */ - if (!strcmp(filter->fo_fstype, "ext3")) - journal = EXT3_SB(filter->fo_sb)->s_journal; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - else if (!strcmp(filter->fo_fstype, "extN")) - journal = EXTN_SB(filter->fo_sb)->s_journal; -#endif - needed = ext3_credits_needed(filter->fo_sb, objcount, obj); - - /* The number of blocks we could _possibly_ dirty can very large. - * We reduce our request if it is absurd (and we couldn't get that - * many credits for a single handle anyways). - * - * At some point we have to limit the size of I/Os sent at one time, - * increase the size of the journal, or we have to calculate the - * actual journal requirements more carefully by checking all of - * the blocks instead of being maximally pessimistic. It remains to - * be seen if this is a real problem or not. - */ - if (needed > journal->j_max_transaction_buffers) { - CERROR("want too many journal credits (%d) using %d instead\n", - needed, journal->j_max_transaction_buffers); - needed = journal->j_max_transaction_buffers; - } - - lock_kernel(); - handle = journal_start(journal, needed); - unlock_kernel(); - if (IS_ERR(handle)) - CERROR("can't get handle for %d credits: rc = %ld\n", needed, - PTR_ERR(handle)); - - return(handle); -} - -static void *filter_journal_start(void **journal_save, - struct filter_obd *filter, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *nb) -{ - void *handle = NULL; - - /* This may not be necessary - we probably never have a - * transaction started when we enter here, so we can - * remove the saving of the journal state entirely. - * For now leave it in just to see if it ever happens. - */ - *journal_save = current->journal_info; - if (*journal_save) { - CERROR("Already have handle %p???\n", *journal_save); - LBUG(); - current->journal_info = NULL; - } - - if (!strcmp(filter->fo_fstype, "ext3") || - !strcmp(filter->fo_fstype, "extN")) - handle = ext3_filter_journal_start(filter, objcount, obj, - niocount, nb); - return handle; -} - -static int ext3_filter_journal_stop(void *handle) -{ - int rc; - - /* We got a refcount on the handle for each call to prepare_write, - * so we can drop the "parent" handle here to avoid the need for - * osc to call back into filterobd to close the handle. The - * remaining references will be dropped in commit_write. - */ - lock_kernel(); - rc = journal_stop((handle_t *)handle); - unlock_kernel(); - - return rc; -} - -static int filter_journal_stop(void *journal_save, struct filter_obd *filter, - void *handle) -{ - int rc = 0; - - if (!strcmp(filter->fo_fstype, "ext3") || - !strcmp(filter->fo_fstype, "extN")) - rc = ext3_filter_journal_stop(handle); - - if (rc) - CERROR("error on journal stop: rc = %d\n", rc); - - current->journal_info = journal_save; - - return rc; -} - static inline void lustre_put_page(struct page *page) { kunmap(page); @@ -1440,57 +1184,73 @@ static int filter_preprw(int cmd, struct lustre_handle *conn, { struct obd_run_ctxt saved; struct obd_device *obd; - struct obd_ioobj *o = obj; + struct obd_ioobj *o; struct niobuf_remote *rnb = nb; struct niobuf_local *lnb = res; - void *journal_save = NULL; + struct dentry *dir_dentry; + struct fsfilt_objinfo *fso; int pglocked = 0; int rc = 0; int i; ENTRY; + memset(res, 0, niocount * sizeof(*res)); + obd = class_conn2obd(conn); if (!obd) { CDEBUG(D_IOCTL, "invalid client "LPX64"\n", conn->addr); RETURN(-EINVAL); } - memset(res, 0, sizeof(*res) * niocount); - push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); + LASSERT(objcount < 16); // theoretically we support multi-obj BRW - if (cmd & OBD_BRW_WRITE) { - *desc_private = filter_journal_start(&journal_save, - &obd->u.filter, - objcount, obj, niocount, - nb); - if (IS_ERR(*desc_private)) - GOTO(out_ctxt, rc = PTR_ERR(*desc_private)); - } + OBD_ALLOC(fso, objcount * sizeof(*fso)); + if (!fso) + RETURN(-ENOMEM); - obd_kmap_get(niocount, 1); + push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); + dir_dentry = filter_parent(obd, S_IFREG); - for (i = 0; i < objcount; i++, o++) { + for (i = 0, o = obj; i < objcount; i++, o++) { struct filter_dentry_data *fdd; struct dentry *dentry; - struct inode *inode; - int j; - dentry = filter_fid2dentry(obd, filter_parent(obd, S_IFREG), - o->ioo_id, S_IFREG, 0); + LASSERT(o->ioo_bufcnt); - if (!(fdd = dentry->d_fsdata) || !atomic_read(&fdd->fdd_open_count)) - CERROR("I/O to unopened object "LPX64"\n", o->ioo_id); + dentry = filter_fid2dentry(obd, dir_dentry, o->ioo_id, 0); if (IS_ERR(dentry)) - GOTO(out_clean, rc = PTR_ERR(dentry)); - inode = dentry->d_inode; - if (!inode) { + GOTO(out_objinfo, rc = PTR_ERR(dentry)); + + fso[i].fso_dentry = dentry; + fso[i].fso_bufcnt = o->ioo_bufcnt; + + if (!dentry->d_inode) { CERROR("trying to BRW to non-existent file "LPU64"\n", o->ioo_id); - f_dput(dentry); - GOTO(out_clean, rc = -ENOENT); + GOTO(out_objinfo, rc = -ENOENT); } + fdd = dentry->d_fsdata; + if (!fdd || !atomic_read(&fdd->fdd_open_count)) + CDEBUG(D_PAGE, "I/O to unopened object "LPX64"\n", + o->ioo_id); + } + + if (cmd & OBD_BRW_WRITE) { + *desc_private = fsfilt_brw_start(obd, objcount, fso, + niocount, nb); + if (IS_ERR(*desc_private)) + GOTO(out_objinfo, rc = PTR_ERR(*desc_private)); + } + + obd_kmap_get(niocount, 1); + + for (i = 0, o = obj; i < objcount; i++, o++) { + struct dentry *dentry = fso->fso_dentry; + struct inode *inode = dentry->d_inode; + int j; + for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) { struct page *page; @@ -1506,8 +1266,11 @@ static int filter_preprw(int cmd, struct lustre_handle *conn, page = lustre_get_page_read(inode, rnb); if (IS_ERR(page)) { - f_dput(dentry); - GOTO(out_clean, rc = PTR_ERR(page)); + if (cmd & OBD_BRW_WRITE) + fsfilt_commit(obd, dir_dentry->d_inode, + *desc_private); + + GOTO(out_pages, rc = PTR_ERR(page)); } lnb->addr = page_address(page); @@ -1517,27 +1280,34 @@ static int filter_preprw(int cmd, struct lustre_handle *conn, } } -out_stop: if (cmd & OBD_BRW_WRITE) { - int err = filter_journal_stop(journal_save, &obd->u.filter, - *desc_private); - if (!rc) - rc = err; + int err = fsfilt_commit(obd, dir_dentry->d_inode, + *desc_private); + if (err) + GOTO(out_pages, rc = err); } -out_ctxt: + + EXIT; +out: + OBD_FREE(fso, objcount * sizeof(*fso)); + current->journal_info = NULL; pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); - RETURN(rc); -out_clean: + return rc; + +out_pages: while (lnb-- > res) { CERROR("error cleanup on brw\n"); - f_dput(lnb->dentry); if (cmd & OBD_BRW_WRITE) filter_commit_write(lnb->page, 0, PAGE_SIZE, rc); else lustre_put_page(lnb->page); } obd_kmap_put(niocount); - goto out_stop; +out_objinfo: + for (i = 0; i < objcount && fso[i].fso_dentry; i++) + f_dput(fso[i].fso_dentry); + + goto out; } static int filter_write_locked_page(struct niobuf_local *lnb) @@ -1588,21 +1358,19 @@ static int filter_commitrw(int cmd, struct lustre_handle *conn, struct obd_ioobj *o; struct niobuf_local *r; struct obd_device *obd = class_conn2obd(conn); - void *journal_save; int found_locked = 0; int rc = 0; int i; ENTRY; push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); - lock_kernel(); - journal_save = current->journal_info; - LASSERT(!journal_save); + LASSERT(!current->journal_info); current->journal_info = private; - unlock_kernel(); + for (i = 0, o = obj, r = res; i < objcount; i++, o++) { int j; + for (j = 0 ; j < o->ioo_bufcnt ; j++, r++) { struct page *page = r->page; @@ -1627,9 +1395,6 @@ static int filter_commitrw(int cmd, struct lustre_handle *conn, f_dput(r->dentry); } } - lock_kernel(); - current->journal_info = journal_save; - unlock_kernel(); if (!found_locked) goto out_ctxt; @@ -1650,22 +1415,76 @@ static int filter_commitrw(int cmd, struct lustre_handle *conn, } out_ctxt: + LASSERT(!current->journal_info); + current->journal_info = NULL; + pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); RETURN(rc); } -static int filter_statfs(struct lustre_handle *conn, struct obd_statfs *osfs) +static int filter_brw(int cmd, struct lustre_handle *conn, + struct lov_stripe_md *lsm, obd_count oa_bufs, + struct brw_page *pga, struct obd_brw_set *set) { - struct obd_device *obd = class_conn2obd(conn); - struct statfs sfs; - int rc; + struct obd_ioobj ioo; + struct niobuf_local *lnb; + struct niobuf_remote *rnb; + obd_count i; + void *desc_private; + int ret = 0; + ENTRY; + + OBD_ALLOC(lnb, oa_bufs * sizeof(struct niobuf_local)); + OBD_ALLOC(rnb, oa_bufs * sizeof(struct niobuf_remote)); + + if ( lnb == NULL || rnb == NULL ) + GOTO(out, ret = -ENOMEM); + + for ( i = 0 ; i < oa_bufs ; i++ ) { + rnb[i].offset = pga[i].off; + rnb[i].len = pga[i].count; + } + + ioo.ioo_id = lsm->lsm_object_id; + ioo.ioo_gr = 0; + ioo.ioo_type = S_IFREG; + ioo.ioo_bufcnt = oa_bufs; + + ret = filter_preprw(cmd, conn, 1, &ioo, oa_bufs, rnb, lnb, + &desc_private); + if ( ret != 0 ) + GOTO(out, ret); + + for ( i = 0; i < oa_bufs ; i++ ) { + void *virt = kmap(pga[i].pg); + obd_off off = pga[i].off & ~PAGE_MASK; + + if ( cmd & OBD_BRW_WRITE ) + memcpy(lnb[i].addr + off, virt + off, pga[i].count); + else + memcpy(virt + off, lnb[i].addr + off, pga[i].count); + + kunmap(virt); + } + ret = filter_commitrw(cmd, conn, 1, &ioo, oa_bufs, lnb, desc_private); + +out: + if ( lnb ) + OBD_FREE(lnb, oa_bufs * sizeof(struct niobuf_local)); + if ( rnb ) + OBD_FREE(rnb, oa_bufs * sizeof(struct niobuf_remote)); + RETURN(ret); +} + +static int filter_statfs(struct lustre_handle *conn, struct obd_statfs *osfs) +{ + struct obd_device *obd; ENTRY; - rc = vfs_statfs(obd->u.filter.fo_sb, &sfs); - if (!rc) - statfs_pack(osfs, &sfs); - return rc; + obd = class_conn2obd(conn); + + RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs)); } static int filter_get_info(struct lustre_handle *conn, obd_count keylen, @@ -1694,13 +1513,6 @@ static int filter_get_info(struct lustre_handle *conn, obd_count keylen, RETURN(0); } - if ( keylen == strlen("root_ino") && - memcmp(key, "root_ino", keylen) == 0 ){ - *vallen = sizeof(obd_id); - *val = (void *)(obd_id)FILTER_ROOTINO; - RETURN(0); - } - CDEBUG(D_IOCTL, "invalid key\n"); RETURN(-EINVAL); } @@ -1794,15 +1606,7 @@ int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst, RETURN(err); } -int filter_attach(struct obd_device *dev, obd_count len, void *data) -{ - return lprocfs_reg_obd(dev, status_var_nm_1, dev); -} -int filter_detach(struct obd_device *dev) -{ - return lprocfs_dereg_obd(dev); -} static struct obd_ops filter_obd_ops = { o_attach: filter_attach, o_detach: filter_detach, @@ -1818,7 +1622,7 @@ static struct obd_ops filter_obd_ops = { o_destroy: filter_destroy, o_open: filter_open, o_close: filter_close, - o_brw: filter_pgcache_brw, + o_brw: filter_brw, o_punch: filter_truncate, o_preprw: filter_preprw, o_commitrw: filter_commitrw diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 2d47fcd..3dea05f 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -270,6 +270,7 @@ static int ost_brw_read(struct ptlrpc_request *req) desc = ptlrpc_prep_bulk(req->rq_connection); if (desc == NULL) GOTO(out_local, rc = -ENOMEM); + desc->bd_ptl_ev_hdlr = NULL; desc->bd_portal = OST_BULK_PORTAL; for (i = 0; i < niocount; i++) { @@ -287,7 +288,8 @@ static int ost_brw_read(struct ptlrpc_request *req) GOTO(out_bulk, rc); lwi = LWI_TIMEOUT(obd_timeout * HZ, ost_bulk_timeout, desc); - rc = l_wait_event(desc->bd_waitq, desc->bd_flags &PTL_BULK_FL_SENT, &lwi); + rc = l_wait_event(desc->bd_waitq, desc->bd_flags & PTL_BULK_FL_SENT, + &lwi); if (rc) { LASSERT(rc == -ETIMEDOUT); GOTO(out_bulk, rc); @@ -299,7 +301,7 @@ static int ost_brw_read(struct ptlrpc_request *req) rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); out_bulk: - ptlrpc_free_bulk(desc); + ptlrpc_bulk_decref(desc); out_local: OBD_FREE(local_nb, sizeof(*local_nb) * niocount); out: @@ -418,7 +420,7 @@ static int ost_brw_write(struct ptlrpc_request *req) rc = obd_commitrw(cmd, conn, objcount, tmp1, niocount, local_nb, desc->bd_desc_private); - ptlrpc_free_bulk(desc); + ptlrpc_bulk_decref(desc); EXIT; out_free: OBD_FREE(local_nb, niocount * sizeof(*local_nb)); diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index af371d8..069fd2a 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -131,6 +131,10 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) LASSERT(list_empty(&desc->bd_set_chain)); + if (atomic_read(&desc->bd_refcount) != 0) + CERROR("freeing desc %p with refcount %d!\n", desc, + atomic_read(&desc->bd_refcount)); + list_for_each_safe(tmp, next, &desc->bd_page_list) { struct ptlrpc_bulk_page *bulk; bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link); @@ -253,11 +257,14 @@ int ll_brw_sync_wait(struct obd_brw_set *set, int phase) struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode, int count, int *lengths, char **bufs) { - struct ptlrpc_connection *conn = imp->imp_connection; + struct ptlrpc_connection *conn; struct ptlrpc_request *request; int rc; ENTRY; + LASSERT(imp); + conn = imp->imp_connection; + OBD_ALLOC(request, sizeof(*request)); if (!request) { CERROR("request allocation out of memory\n"); @@ -726,8 +733,8 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) imp->imp_max_transno = req->rq_transno; } else if (req->rq_transno != 0 && imp->imp_level == LUSTRE_CONN_FULL) { - CERROR("got transno "LPD64" after "LPD64": recovery " - "may not work\n", req->rq_transno, + CDEBUG(D_HA, "got transno "LPD64" after "LPD64 + ": recovery may not work\n", req->rq_transno, imp->imp_max_transno); } diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 0bdf0d8..c260f5d 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -170,6 +170,8 @@ static int bulk_source_callback(ptl_event_t *ev) LASSERT(ev->mem_desc.niov == desc->bd_page_count); if (atomic_dec_and_test(&desc->bd_source_callback_count)) { + void (*event_handler)(struct ptlrpc_bulk_desc *); + list_for_each_safe(tmp, next, &desc->bd_page_list) { bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link); @@ -177,10 +179,18 @@ static int bulk_source_callback(ptl_event_t *ev) if (bulk->bp_cb != NULL) bulk->bp_cb(bulk); } + + /* We need to make a note of whether there's an event handler + * before we call wake_up, because if there is no event handler, + * 'desc' might be freed before we're scheduled again. */ + event_handler = desc->bd_ptl_ev_hdlr; + desc->bd_flags |= PTL_BULK_FL_SENT; wake_up(&desc->bd_waitq); - if (desc->bd_ptl_ev_hdlr != NULL) - desc->bd_ptl_ev_hdlr(desc); + if (event_handler) { + LASSERT(desc->bd_ptl_ev_hdlr == event_handler); + event_handler(desc); + } } RETURN(0); @@ -193,35 +203,39 @@ static int bulk_sink_callback(ptl_event_t *ev) struct list_head *tmp; struct list_head *next; ptl_size_t total = 0; + void (*event_handler)(struct ptlrpc_bulk_desc *); ENTRY; - if (ev->type == PTL_EVENT_PUT) { - /* put with zero offset */ - LASSERT(ev->offset == 0); - /* used iovs */ - LASSERT((ev->mem_desc.options & PTL_MD_IOV) != 0); - /* 1 fragment for each page always */ - LASSERT(ev->mem_desc.niov == desc->bd_page_count); - - list_for_each_safe (tmp, next, &desc->bd_page_list) { - bulk = list_entry(tmp, struct ptlrpc_bulk_page, - bp_link); + LASSERT(ev->type == PTL_EVENT_PUT); + + /* put with zero offset */ + LASSERT(ev->offset == 0); + /* used iovs */ + LASSERT((ev->mem_desc.options & PTL_MD_IOV) != 0); + /* 1 fragment for each page always */ + LASSERT(ev->mem_desc.niov == desc->bd_page_count); - total += bulk->bp_buflen; + list_for_each_safe (tmp, next, &desc->bd_page_list) { + bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link); - if (bulk->bp_cb != NULL) - bulk->bp_cb(bulk); - } + total += bulk->bp_buflen; + + if (bulk->bp_cb != NULL) + bulk->bp_cb(bulk); + } - LASSERT(ev->mem_desc.length == total); + LASSERT(ev->mem_desc.length == total); - desc->bd_flags |= PTL_BULK_FL_RCVD; - wake_up(&desc->bd_waitq); - if (desc->bd_ptl_ev_hdlr != NULL) - desc->bd_ptl_ev_hdlr(desc); - } else { - CERROR("Unexpected event type!\n"); - LBUG(); + /* We need to make a note of whether there's an event handler + * before we call wake_up, because if there is no event + * handler, 'desc' might be freed before we're scheduled again. */ + event_handler = desc->bd_ptl_ev_hdlr; + + desc->bd_flags |= PTL_BULK_FL_RCVD; + wake_up(&desc->bd_waitq); + if (event_handler) { + LASSERT(desc->bd_ptl_ev_hdlr == event_handler); + event_handler(desc); } RETURN(1); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index c2c2b32..bfd11bc 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -280,7 +280,7 @@ int ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc) void obd_brw_set_add(struct obd_brw_set *set, struct ptlrpc_bulk_desc *desc) { - atomic_inc(&desc->bd_refcount); + ptlrpc_bulk_addref(desc); atomic_inc(&set->brw_refcount); desc->bd_brw_set = set; list_add(&desc->bd_set_chain, &set->brw_desc_head); diff --git a/lustre/scripts/lustre.spec.in b/lustre/scripts/lustre.spec.in index 2b5b786..dd1e33c 100644 --- a/lustre/scripts/lustre.spec.in +++ b/lustre/scripts/lustre.spec.in @@ -92,7 +92,7 @@ make distdir distdir=lustre-source/lustre-%{version} %attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/llite.o %attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/mdc.o %attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/mds.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/mds_extN.o +%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/fsfilt_extN.o %attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/obdclass.o %attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/obdecho.o %attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/obdfilter.o diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore index 5563923..e34f984 100644 --- a/lustre/tests/.cvsignore +++ b/lustre/tests/.cvsignore @@ -27,3 +27,4 @@ lovstripe *.xml stat setuid +multifstat diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 77c3039..12b7d52 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -24,7 +24,9 @@ noinst_SCRIPTS += fs.sh intent-test.sh intent-test2.sh leak_finder.pl \ runtests runvmstat snaprun.sh tbox.sh common.sh noinst_PROGRAMS = openunlink testreq truncate directio openme writeme mcreate noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy -noinst_PROGRAMS += lovstripe stat createmany mkdirmany multifstat # ldaptest +noinst_PROGRAMS += lovstripe stat createmany mkdirmany multifstat +# noinst_PROGRAMS += ldaptest +noinst_PROGRAMS += checkstat # ldaptest_SOURCES = ldaptest.c tchmod_SOURCES = tchmod.c @@ -46,5 +48,6 @@ stat_SOURCES = stat.c createmany_SOURCES = createmany.c mkdirmany_SOURCES = mkdirmany.c multifstat_SOURCES = multifstat.c +checkstat_SOURCES = checkstat.c include $(top_srcdir)/Rules diff --git a/lustre/tests/checkstat.c b/lustre/tests/checkstat.c new file mode 100644 index 0000000..ed97bd6 --- /dev/null +++ b/lustre/tests/checkstat.c @@ -0,0 +1,315 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void +usage (char *argv0, int help) +{ + char *progname = strrchr(argv0, '/'); + + if (progname == NULL) + progname = argv0; + + fprintf (help ? stdout : stderr, + "Usage: %s [flags] file[s]\n", + progname); + + if (!help) + { + fprintf (stderr, " or try '-h' for help\n"); + exit (1); + } + + printf ("Check given files have...\n"); + printf (" -p permission file must have required permissions\n"); + printf (" -t dir|file|link file must be of the specified type\n"); + printf (" -l link_name file must be a link to the given name\n"); + printf (" -s size file must have the given size\n"); + printf (" -u user file must be owned by given user\n"); + printf (" -g group file must be owned by given group\n"); + printf (" -f follow symlinks\n"); + printf (" -a file must be absent\n"); + printf (" -v increase verbosity\n"); + printf (" -h print help\n"); + printf (" Exit status is 0 on success, 1 on failure\n"); +} + +int +main (int argc, char **argv) +{ + int c; + struct stat64 buf; + int perms = -1; + uid_t uid = (uid_t)-1; + gid_t gid = (gid_t)-1; + char *type = NULL; + long absent = 0; + char *checklink = NULL; + int verbose = 0; + long long size = -1; + int follow = 0; + char *term; + + while ((c = getopt (argc, argv, "p:t:l:s:u:g:avfh")) != -1) + switch (c) + { + case 'p': + perms = (int)strtol (optarg, &term, 0); + if (term == optarg) + { + fprintf (stderr, "Can't parse permission %s\n", optarg); + return (1); + } + break; + + case 'l': + checklink = optarg; + break; + + case 's': + size = strtoll (optarg, &term, 0); + if (term == optarg) + { + fprintf (stderr, "Can't parse size %s\n", optarg); + return (1); + } + break; + + case 'u': + if (*optarg == '#') + { + uid = (uid_t)strtol (optarg + 1, &term, 0); + if (term == optarg + 1) + { + fprintf (stderr, "Can't parse numeric uid %s\n", optarg); + return (1); + } + } else { + struct passwd *pw = getpwnam (optarg); + + if (pw == NULL) + { + fprintf (stderr, "Can't find user %s\n", optarg); + return (1); + } + uid = pw->pw_uid; + } + break; + + case 'g': + if (*optarg == '#') + { + gid = (gid_t)strtol (optarg + 1, &term, 0); + if (term == optarg + 1) + { + fprintf (stderr, "Can't parse numeric gid %s\n", optarg); + return (1); + } + } else { + struct group *gr = getgrnam (optarg); + + if (gr == NULL) + { + fprintf (stderr, "Can't find group %s\n", optarg); + return (1); + } + uid = gr->gr_gid; + } + break; + + case 't': + type = optarg; + break; + + case 'a': + absent = 1; + break; + + case 'v': + verbose++; + break; + + case 'f': + follow++; + break; + + case 'h': + usage (argv[0], 1); + return (0); + + default: + usage (argv[0], 0); + } + + if (optind == argc) + usage (argv[0], 0); + + do + { + char *fname = argv[optind]; + int rc = follow ? stat64 (fname, &buf) : lstat64 (fname, &buf); + + if (rc != 0) + { + if (!(absent && errno == ENOENT)) + { + if (verbose) + printf ("Can't %sstat %s: %s\n", + follow ? "" : "l", + fname, strerror (errno)); + return (1); + } + + if (verbose) + printf ("%s: absent OK\n", fname); + continue; + } + + if (absent) + { + if (verbose) + printf ("%s exists\n", fname); + return (1); + } + + if (type != NULL) + { + if (!strcmp (type, "d") || + !strcmp (type, "dir")) + { + if (!S_ISDIR (buf.st_mode)) + { + if (verbose) + printf ("%s is not a directory\n", + fname); + return (1); + } + } + else if (!strcmp (type, "f") || + !strcmp (type, "file")) + { + if (!S_ISREG (buf.st_mode)) + { + if (verbose) + printf ("%s is not a regular file\n", + fname); + return (1); + } + } + else if (!strcmp (type, "l") || + !strcmp (type, "link")) + { + if (!S_ISLNK (buf.st_mode)) + { + if (verbose) + printf ("%s is not a link\n", + fname); + return (1); + } + } + else + { + fprintf (stderr, "Can't parse file type %s\n", type); + return (1); + } + + if (verbose) + printf ("%s has type %s OK\n", fname, type); + } + + if (perms != -1) + { + if ((buf.st_mode & ~S_IFMT) != perms) + { + if (verbose) + printf ("%s has perms 0%o, not 0%o\n", + fname, (buf.st_mode & ~S_IFMT), perms); + return (1); + } + + if (verbose) + printf ("%s has perms 0%o OK\n", + fname, perms); + } + + if (size != -1) + { + if (buf.st_size != size) + { + if (verbose) + printf ("%s has size %Ld, not %Ld\n", + fname, (long long)buf.st_size, size); + return (1); + } + + if (verbose) + printf ("%s has size %Ld OK\n", fname, size); + } + + if (checklink != NULL) + { + static char lname[4<<10]; + + rc = readlink (fname, lname, sizeof (lname) - 1); + + if (rc < 0) + { + if (verbose) + printf ("%s: can't read link: %s\n", + fname, strerror (errno)); + return (1); + } + + lname[rc] = 0; + if (strcmp (checklink, lname)) + { + if (verbose) + printf ("%s is a link to %s and not %s\n", + fname, lname, checklink); + return (1); + } + + if (verbose) + printf ("%s links to %s OK\n", fname, checklink); + } + + if (uid != (uid_t)-1) + { + if (buf.st_uid != uid) + { + if (verbose) + printf ("%s is owned by user #%ld and not #%ld\n", + fname, (long)buf.st_uid, (long)uid); + return (1); + } + + if (verbose) + printf ("%s is owned by user #%ld OK\n", + fname, (long)uid); + } + + if (gid != (gid_t)-1) + { + if (buf.st_gid != gid) + { + if (verbose) + printf ("%s is owned by group #%ld and not #%ld\n", + fname, (long)buf.st_gid, (long)gid); + return (1); + } + + if (verbose) + printf ("%s is owned by group #%ld OK\n", + fname, (long)gid); + } + + } while (++optind < argc); + + return (0); +} diff --git a/lustre/tests/common.sh b/lustre/tests/common.sh index 34e3b83..6a4429e 100644 --- a/lustre/tests/common.sh +++ b/lustre/tests/common.sh @@ -270,10 +270,10 @@ setup_lustre() { do_insmod $LUSTRE/extN/extN.o || \ echo "info: can't load extN.o module, not fatal if using ext3" do_insmod $LUSTRE/mds/mds.o || exit -1 - #do_insmod $LUSTRE/mds/mds_ext2.o || exit -1 - #do_insmod $LUSTRE/mds/mds_ext3.o || exit -1 - do_insmod $LUSTRE/mds/mds_extN.o || \ - echo "info: can't load mds_extN.o module, needs extN.o" + #do_insmod $LUSTRE/obdclass/fsfilt_ext2.o || exit -1 + #do_insmod $LUSTRE/obdclass/fsfilt_ext3.o || exit -1 + do_insmod $LUSTRE/obdclass/fsfilt_extN.o || \ + echo "info: can't load fsfilt_extN.o module, needs extN.o" do_insmod $LUSTRE/obdecho/obdecho.o || exit -1 #do_insmod $LUSTRE/obdext2/obdext2.o || exit -1 do_insmod $LUSTRE/obdfilter/obdfilter.o || exit -1 @@ -571,9 +571,9 @@ cleanup_lustre() { do_rmmod mdc do_rmmod osc - do_rmmod mds_extN - do_rmmod mds_ext3 - do_rmmod mds_ext2 + do_rmmod fsfilt_extN + do_rmmod fsfilt_ext3 + do_rmmod fsfilt_ext2 do_rmmod mds do_rmmod ost do_rmmod obdecho diff --git a/lustre/tests/fsx.c b/lustre/tests/fsx.c index aba33d5..859f40a 100644 --- a/lustre/tests/fsx.c +++ b/lustre/tests/fsx.c @@ -532,6 +532,13 @@ domapread(unsigned offset, unsigned size) prterr("domapread: mmap"); report_failure(190); } + if (!quiet && (debug > 1 && + (monitorstart == -1 || + (offset + size > monitorstart && + (monitorend == -1 || offset <= monitorend))))) { + gettimeofday(&t, NULL); + prt(" %lu.%06lu mmap done\n", t.tv_sec, t.tv_usec); + } memcpy(temp_buf, p + pg_offset, size); if (!quiet && (debug > 1 && (monitorstart == -1 || @@ -683,6 +690,13 @@ domapwrite(unsigned offset, unsigned size) prterr("domapwrite: ftruncate"); exit(201); } + if (!quiet && (debug > 1 && + (monitorstart == -1 || + (offset + size > monitorstart && + (monitorend == -1 || offset <= monitorend))))) { + gettimeofday(&t, NULL); + prt(" %lu.%06lu truncate done\n", t.tv_sec, t.tv_usec); + } } pg_offset = offset & page_mask; map_size = pg_offset + size; @@ -693,6 +707,13 @@ domapwrite(unsigned offset, unsigned size) prterr("domapwrite: mmap"); report_failure(202); } + if (!quiet && (debug > 1 && + (monitorstart == -1 || + (offset + size > monitorstart && + (monitorend == -1 || offset <= monitorend))))) { + gettimeofday(&t, NULL); + prt(" %lu.%06lu mmap done\n", t.tv_sec, t.tv_usec); + } memcpy(p + pg_offset, good_buf + offset, size); if (!quiet && (debug > 1 && (monitorstart == -1 || @@ -821,7 +842,7 @@ docloseopen(void) } if (!quiet && debug > 1) { gettimeofday(&t, NULL); - prt(" %lu.%06lu opendone\n", t.tv_sec, t.tv_usec); + prt(" %lu.%06lu open done\n", t.tv_sec, t.tv_usec); } } diff --git a/lustre/tests/llmodules.sh b/lustre/tests/llmodules.sh index 649e96d..a39b73c 100644 --- a/lustre/tests/llmodules.sh +++ b/lustre/tests/llmodules.sh @@ -25,8 +25,8 @@ do_insmod $LUSTRE/ldlm/ldlm.o || exit -1 do_insmod $LUSTRE/extN/extN.o || \ echo "info: can't load extN.o module, not fatal if using ext3" do_insmod $LUSTRE/mds/mds.o || exit -1 -do_insmod $LUSTRE/mds/mds_extN.o || \ - echo "info: can't load mds_extN.o module, needs extN.o" +do_insmod $LUSTRE/obdclass/fsfilt_extN.o || \ + echo "info: can't load fsfilt_extN.o module, needs extN.o" do_insmod $LUSTRE/obdecho/obdecho.o || exit -1 do_insmod $LUSTRE/obdfilter/obdfilter.o || exit -1 do_insmod $LUSTRE/ost/ost.o || exit -1 diff --git a/lustre/tests/runslabinfo b/lustre/tests/runslabinfo index 48d6602..eba407d 100755 --- a/lustre/tests/runslabinfo +++ b/lustre/tests/runslabinfo @@ -1,5 +1,5 @@ #!/bin/sh while sleep 1 ; do - egrep "ll_|ldlm|filp|dentry|inode|portals|size-[0-9]* " /proc/slabinfo echo '-----------------------' + egrep "ll_|ldlm|filp|dentry|inode|portals|size-[0-9]* " /proc/slabinfo done diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index cf305a4..cbd17ca 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -1,17 +1,21 @@ #!/bin/bash +set -e + +CHECKSTAT=${CHECKSTAT:-"./checkstat -v"} +MOUNT=${MOUNT:-/mnt/lustre} export NAME=$NAME clean() { echo -n "cleanup..." sh llmountcleanup.sh > /dev/null } -CLEAN=clean +CLEAN=${CLEAN:-clean} start() { echo -n "mounting..." sh llrmount.sh > /dev/null echo -n "mounted" } -START=start +START=${START:-start} error () { echo FAIL @@ -23,283 +27,373 @@ pass() { } echo '== touch .../f ; rm .../f ======================== test 0' -touch /mnt/lustre/f -[ -f /mnt/lustre/f ] || error -rm /mnt/lustre/f -[ ! -f /mnt/lustre/f ] || error +touch $MOUNT/f +$CHECKSTAT -t file $MOUNT/f || error +rm $MOUNT/f +$CHECKSTAT -a $MOUNT/f || error pass $CLEAN $START echo '== mkdir .../d1; mkdir .../d1/d2 ================= test 1' -mkdir /mnt/lustre/d1 -mkdir /mnt/lustre/d1/d2 -[ -d /mnt/lustre/d1/d2 ] || error +mkdir $MOUNT/d1 +mkdir $MOUNT/d1/d2 +$CHECKSTAT -t dir $MOUNT/d1/d2 || error pass $CLEAN $START echo '== rmdir .../d1/d2; rmdir .../d1 ================= test 1b' -rmdir /mnt/lustre/d1/d2 -rmdir /mnt/lustre/d1 -[ ! -d /mnt/lustre/d1 ] || error +rmdir $MOUNT/d1/d2 +rmdir $MOUNT/d1 +$CHECKSTAT -a $MOUNT/d1 || error pass $CLEAN $START echo '== mkdir .../d2; touch .../d2/f ================== test 2' -mkdir /mnt/lustre/d2 -touch /mnt/lustre/d2/f +mkdir $MOUNT/d2 +touch $MOUNT/d2/f +$CHECKSTAT -t file $MOUNT/d2/f || error +pass $CLEAN $START echo '== rm -r .../d2; touch .../d2/f ================== test 2b' -rm -r /mnt/lustre/d2 +rm -r $MOUNT/d2 +$CHECKSTAT -a $MOUNT/d2 || error +pass $CLEAN $START echo '== mkdir .../d3 ================================== test 3' -mkdir /mnt/lustre/d3 +mkdir $MOUNT/d3 +$CHECKSTAT -t dir $MOUNT/d3 || error +pass $CLEAN $START echo '== touch .../d3/f ================================ test 3b' -touch /mnt/lustre/d3/f +touch $MOUNT/d3/f +$CHECKSTAT -t file $MOUNT/d3/f || error +pass $CLEAN $START echo '== rm -r .../d3 ================================== test 3c' -rm -r /mnt/lustre/d3 +rm -r $MOUNT/d3 +$CHECKSTAT -a $MOUNT/d3 || error +pass $CLEAN $START echo '== mkdir .../d4 ================================== test 4' -mkdir /mnt/lustre/d4 +mkdir $MOUNT/d4 +$CHECKSTAT -t dir $MOUNT/d4 || error +pass $CLEAN $START echo '== mkdir .../d4/d2 =============================== test 4b' -mkdir /mnt/lustre/d4/d2 +mkdir $MOUNT/d4/d2 +$CHECKSTAT -t dir $MOUNT/d4/d2 || error +pass $CLEAN $START echo '== mkdir .../d5; mkdir .../d5/d2; chmod .../d5/d2 = test 5' -mkdir /mnt/lustre/d5 -mkdir /mnt/lustre/d5/d2 -chmod 0666 /mnt/lustre/d5/d2 +mkdir $MOUNT/d5 +mkdir $MOUNT/d5/d2 +chmod 0666 $MOUNT/d5/d2 +$CHECKSTAT -t dir -p 0666 $MOUNT/d5/d2 || error +pass $CLEAN $START echo '== touch .../f6; chmod .../f6 ==================== test 6' -touch /mnt/lustre/f6 -chmod 0666 /mnt/lustre/f6 +touch $MOUNT/f6 +chmod 0666 $MOUNT/f6 +$CHECKSTAT -t file -p 0666 $MOUNT/f6 || error +pass $CLEAN $START echo '== mkdir .../d7; mcreate .../d7/f; chmod .../d7/f = test 7' -mkdir /mnt/lustre/d7 -./mcreate /mnt/lustre/d7/f -chmod 0666 /mnt/lustre/d7/f +mkdir $MOUNT/d7 +./mcreate $MOUNT/d7/f +chmod 0666 $MOUNT/d7/f +$CHECKSTAT -t file -p 0666 $MOUNT/d7/f || error +pass $CLEAN $START echo '== mkdir .../d8; touch .../d8/f; chmod .../d8/f == test 8' -mkdir /mnt/lustre/d8 -touch /mnt/lustre/d8/f -chmod 0666 /mnt/lustre/d8/f +mkdir $MOUNT/d8 +touch $MOUNT/d8/f +chmod 0666 $MOUNT/d8/f +$CHECKSTAT -t file -p 0666 $MOUNT/d8/f || error +pass $CLEAN $START -echo '== mkdir .../d9; mkdir .../d9/d2; mkdir .../d9/d2/d3 == test 9' -mkdir /mnt/lustre/d9 -mkdir /mnt/lustre/d9/d2 -mkdir /mnt/lustre/d9/d2/d3 +echo '== mkdir .../d9 .../d9/d2 .../d9/d2/d3 =========== test 9' +mkdir $MOUNT/d9 +mkdir $MOUNT/d9/d2 +mkdir $MOUNT/d9/d2/d3 +$CHECKSTAT -t dir $MOUNT/d9/d2/d3 || error +pass $CLEAN $START echo '== mkdir .../d10; mkdir .../d10/d2; touch .../d10/d2/f = test 10' -mkdir /mnt/lustre/d10 -mkdir /mnt/lustre/d10/d2 -touch /mnt/lustre/d10/d2/f +mkdir $MOUNT/d10 +mkdir $MOUNT/d10/d2 +touch $MOUNT/d10/d2/f +$CHECKSTAT -t file $MOUNT/d10/d2/f || error +pass $CLEAN $START -echo '=================================================== test 11' -mkdir /mnt/lustre/d11 -mkdir /mnt/lustre/d11/d2 -chmod 0666 /mnt/lustre/d11/d2 -chmod 0555 /mnt/lustre/d11/d2 +echo '== mkdir .../d11 d11/d2; chmod .../d11/d2 .../d11/d2 == test 11' +mkdir $MOUNT/d11 +mkdir $MOUNT/d11/d2 +chmod 0666 $MOUNT/d11/d2 +chmod 0555 $MOUNT/d11/d2 +$CHECKSTAT -t dir -p 0555 $MOUNT/d11/d2 || error +pass $CLEAN $START -echo '=================================================== test 12' -mkdir /mnt/lustre/d12 -touch /mnt/lustre/d12/f -chmod 0666 /mnt/lustre/d12/f -chmod 0555 /mnt/lustre/d12/f +echo '== mkdir .../d12; touch .../d12/f; chmod .../d12/f d12/f == test 12' +mkdir $MOUNT/d12 +touch $MOUNT/d12/f +chmod 0666 $MOUNT/d12/f +chmod 0555 $MOUNT/d12/f +$CHECKSTAT -t file -p 0555 $MOUNT/d12/f || error +pass $CLEAN $START -echo '=================================================== test 13' -mkdir /mnt/lustre/d13 -cp /etc/passwd /mnt/lustre/d13/f -> /mnt/lustre/d13/f +echo '== mkdir .../d13; cp /etc/passwd .../d13/f; > .../d13/f == test 13' +mkdir $MOUNT/d13 +cp /etc/hosts $MOUNT/d13/f +> $MOUNT/d13/f +$CHECKSTAT -t file -s 0 $MOUNT/d13/f || error +pass $CLEAN $START -echo '=================================================== test 14' -mkdir /mnt/lustre/d14 -touch /mnt/lustre/d14/f -rm /mnt/lustre/d14/f +echo '================================================== test 14' +mkdir $MOUNT/d14 +touch $MOUNT/d14/f +rm $MOUNT/d14/f +$CHECKSTAT -a $MOUNT/d14/f || error +pass $CLEAN $START -echo '=================================================== test 15' -mkdir /mnt/lustre/d15 -touch /mnt/lustre/d15/f -mv /mnt/lustre/d15/f /mnt/lustre/d15/f2 +echo '================================================== test 15' +mkdir $MOUNT/d15 +touch $MOUNT/d15/f +mv $MOUNT/d15/f $MOUNT/d15/f2 +$CHECKSTAT -t file $MOUNT/d15/f2 || error +pass $CLEAN $START -echo '=================================================== test 16' -mkdir /mnt/lustre/d16 -touch /mnt/lustre/d16/f -rm -rf /mnt/lustre/d16/f +echo '================================================== test 16' +mkdir $MOUNT/d16 +touch $MOUNT/d16/f +rm -rf $MOUNT/d16/f +$CHECKSTAT -a $MOUNT/d16/f || error +pass $CLEAN $START -echo '== symlinks: create, remove (dangling and real) === test 17' -mkdir /mnt/lustre/d17 -touch /mnt/lustre/d17/f -ln -s /mnt/lustre/d17/f /mnt/lustre/d17/l-exist -ln -s no-such-file /mnt/lustre/d17/l-dangle -ls -l /mnt/lustre/d17 -rm -f /mnt/lustre/l-dangle -rm -f /mnt/lustre/l-exist +echo '== symlinks: create, remove (dangling and real) == test 17' +mkdir $MOUNT/d17 +touch $MOUNT/d17/f +ln -s $MOUNT/d17/f $MOUNT/d17/l-exist +ln -s no-such-file $MOUNT/d17/l-dangle +ls -l $MOUNT/d17 +$CHECKSTAT -l $MOUNT/d17/f $MOUNT/d17/l-exist || error +$CHECKSTAT -f -t f $MOUNT/d17/l-exist || error +$CHECKSTAT -l no-such-file $MOUNT/d17/l-dangle || error +$CHECKSTAT -fa $MOUNT/d17/l-dangle || error +rm -f $MOUNT/l-dangle +rm -f $MOUNT/l-exist +$CHECKSTAT -a $MOUNT/l-dangle || error +$CHECKSTAT -a $MOUNT/l-exist || error +pass $CLEAN $START -echo '== touch /mnt/lustre/f ; ls /mnt/lustre ========== test 18' -touch /mnt/lustre/f -ls /mnt/lustre +echo "== touch $MOUNT/f ; ls $MOUNT ==================== test 18" +touch $MOUNT/f +ls $MOUNT || error +pass $CLEAN $START -echo '== touch /mnt/lustre/f ; ls -l /mnt/lustre ======= test 19' -touch /mnt/lustre/f -ls -l /mnt/lustre -rm /mnt/lustre/f +echo "== touch $MOUNT/f ; ls -l $MOUNT ================= test 19" +touch $MOUNT/f +ls -l $MOUNT +rm $MOUNT/f +$CHECKSTAT -a $MOUNT/f || error +pass $CLEAN $START -echo '== touch /mnt/lustre/f ; ls -l /mnt/lustre ======= test 20' -touch /mnt/lustre/f -rm /mnt/lustre/f +echo "== touch $MOUNT/f ; ls -l $MOUNT ================= test 20" +touch $MOUNT/f +rm $MOUNT/f echo "1 done" -touch /mnt/lustre/f -rm /mnt/lustre/f +touch $MOUNT/f +rm $MOUNT/f echo "2 done" -touch /mnt/lustre/f -rm /mnt/lustre/f +touch $MOUNT/f +rm $MOUNT/f echo "3 done" +$CHECKSTAT -a $MOUNT/f || error +pass $CLEAN $START -echo '== write to dangling link ======================= test 21' -mkdir /mnt/lustre/d21 -ln -s dangle /mnt/lustre/d21/link -echo foo >> /mnt/lustre/d21/link -cat /mnt/lustre/d21/dangle +echo '== write to dangling link ======================== test 21' +mkdir $MOUNT/d21 +[ -f $MOUNT/d21/dangle ] && rm -f $MOUNT/d21/dangle +ln -s dangle $MOUNT/d21/link +echo foo >> $MOUNT/d21/link +cat $MOUNT/d21/dangle +$CHECKSTAT -t link $MOUNT/d21/link || error +$CHECKSTAT -f -t file $MOUNT/d21/link || error +pass $CLEAN $START -echo '== unpack tar archive as nonroot user =========== test 22' -mkdir /mnt/lustre/d22 -chown 4711 /mnt/lustre/d22 -sudo -u \#4711 tar cf - /etc/hosts /etc/sysconfig/network | sudo -u \#4711 tar xfC - /mnt/lustre/d22 -ls -lR /mnt/lustre/d22/etc +echo '== unpack tar archive as non-root user =========== test 22' +mkdir $MOUNT/d22 +which sudo && chown 4711 $MOUNT/d22 +SUDO=`which sudo 2> /dev/null` && SUDO="$SUDO -u #4711" || SUDO="" +$SUDO tar cf - /etc/hosts /etc/sysconfig/network | $SUDO tar xfC - $MOUNT/d22 +ls -lR $MOUNT/d22/etc +$CHECKSTAT -t dir $MOUNT/d22/etc || error +[ -z "$SUDO" ] || $CHECKSTAT -u \#4711 $MOUNT/d22/etc || error +pass $CLEAN $START -echo '== O_CREAT|O_EXCL in subdir ===================== test 23' -mkdir /mnt/lustre/d23 -./toexcl /mnt/lustre/d23/f23 -./toexcl /mnt/lustre/d23/f23 +echo '== O_CREAT|O_EXCL in subdir ====================== test 23' +mkdir $MOUNT/d23 +./toexcl $MOUNT/d23/f23 +./toexcl -e $MOUNT/d23/f23 || error +pass $CLEAN $START -echo '== rename sanity ============================= test24' +echo '== rename sanity ================================= test24' echo '-- same directory rename' echo '-- test 24-R1: touch a ; rename a b' -mkdir /mnt/lustre/R1 -touch /mnt/lustre/R1/f -mv /mnt/lustre/R1/f /mnt/lustre/R1/g +mkdir $MOUNT/R1 +touch $MOUNT/R1/f +mv $MOUNT/R1/f $MOUNT/R1/g +$CHECKSTAT -t file $MOUNT/R1/g || error +pass $CLEAN $START echo '-- test 24-R2: touch a b ; rename a b;' -mkdir /mnt/lustre/R2 -touch /mnt/lustre/R2/{f,g} -mv /mnt/lustre/R2/f /mnt/lustre/R2/g +mkdir $MOUNT/R2 +touch $MOUNT/R2/{f,g} +mv $MOUNT/R2/f $MOUNT/R2/g +$CHECKSTAT -a $MOUNT/R2/f || error +$CHECKSTAT -t file $MOUNT/R2/g || error +pass $CLEAN $START echo '-- test 24-R3: mkdir a ; rename a b;' -mkdir /mnt/lustre/R3 -mkdir /mnt/lustre/R3/f -mv /mnt/lustre/R3/f /mnt/lustre/R3/g +mkdir $MOUNT/R3 +mkdir $MOUNT/R3/f +mv $MOUNT/R3/f $MOUNT/R3/g +$CHECKSTAT -a $MOUNT/R3/f || error +$CHECKSTAT -t dir $MOUNT/R3/g || error +pass $CLEAN $START echo '-- test 24-R4: mkdir a b ; rename a b;' -mkdir /mnt/lustre/R4 -mkdir /mnt/lustre/R4/{f,g} -perl -e 'rename "/mnt/lustre/R3/f", "/mnt/lustre/R3/g";' +mkdir $MOUNT/R4 +mkdir $MOUNT/R4/{f,g} +perl -e "rename \"$MOUNT/R4/f\", \"$MOUNT/R4/g\";" +$CHECKSTAT -a $MOUNT/R4/f || error +$CHECKSTAT -t dir $MOUNT/R4/g || error +pass $CLEAN $START echo '-- cross directory renames --' echo '-- test 24-R5: touch a ; rename a b' -mkdir /mnt/lustre/R5{a,b} -touch /mnt/lustre/R5a/f -mv /mnt/lustre/R5a/f /mnt/lustre/R5b/g +mkdir $MOUNT/R5{a,b} +touch $MOUNT/R5a/f +mv $MOUNT/R5a/f $MOUNT/R5b/g +$CHECKSTAT -a $MOUNT/R5a/f || error +$CHECKSTAT -t file $MOUNT/R5b/g || error +pass $CLEAN $START echo '-- test 24-R6: touch a ; rename a b' -mkdir /mnt/lustre/R6{a,b} -touch /mnt/lustre/R6a/f /mnt/lustre/R6b/g -mv /mnt/lustre/R6a/f /mnt/lustre/R6b/g +mkdir $MOUNT/R6{a,b} +touch $MOUNT/R6a/f $MOUNT/R6b/g +mv $MOUNT/R6a/f $MOUNT/R6b/g +$CHECKSTAT -a $MOUNT/R6a/f || error +$CHECKSTAT -t file $MOUNT/R6b/g || error +pass $CLEAN $START echo '-- test 24-R7: touch a ; rename a b' -mkdir /mnt/lustre/R7{a,b} -mkdir /mnt/lustre/R7a/f -mv /mnt/lustre/R7a/f /mnt/lustre/R7b/g +mkdir $MOUNT/R7{a,b} +mkdir $MOUNT/R7a/f +mv $MOUNT/R7a/f $MOUNT/R7b/g +$CHECKSTAT -a $MOUNT/R7a/f || error +$CHECKSTAT -t dir $MOUNT/R7b/g || error +pass $CLEAN $START echo '-- test 24-R8: touch a ; rename a b' -mkdir /mnt/lustre/R8{a,b} -mkdir /mnt/lustre/R8a/f /mnt/lustre/R8b/g -perl -e 'rename "/mnt/lustre/R8a/f", "/mnt/lustre/R8b/g";' +mkdir $MOUNT/R8{a,b} +mkdir $MOUNT/R8a/f $MOUNT/R8b/g +perl -e "rename \"$MOUNT/R8a/f\", \"$MOUNT/R8b/g\";" +$CHECKSTAT -a $MOUNT/R8a/f || error +$CHECKSTAT -t dir $MOUNT/R8b/g || error +pass $CLEAN $START echo "-- rename error cases" echo "-- test 24-R9 target error: touch f ; mkdir a ; rename f a" -mkdir /mnt/lustre/R9 -mkdir /mnt/lustre/R9/a -touch /mnt/lustre/R9/f -perl -e 'rename "/mnt/lustre/R9/f", "/mnt/lustre/R9/a";' +mkdir $MOUNT/R9 +mkdir $MOUNT/R9/a +touch $MOUNT/R9/f +perl -e "rename \"$MOUNT/R9/f\", \"$MOUNT/R9/a\";" +$CHECKSTAT -t file $MOUNT/R9/f || error +$CHECKSTAT -t dir $MOUNT/R9/a || error +$CHECKSTAT -a file $MOUNT/R9/a/f || error +pass $CLEAN $START echo "--test 24-R10 source does not exist" -mkdir /mnt/lustre/R10 -mv /mnt/lustre/R10/f /mnt/lustre/R10/g +mkdir $MOUNT/R10 +perl -e "rename \"$MOUNT/R10/f\", \"$MOUNT/R10/g\"" +$CHECKSTAT -t dir $MOUNT/R10 || error +$CHECKSTAT -a $MOUNT/R10/f || error +$CHECKSTAT -a $MOUNT/R10/g || error +pass $CLEAN $START diff --git a/lustre/tests/toexcl.c b/lustre/tests/toexcl.c index da13217..7f099e8 100644 --- a/lustre/tests/toexcl.c +++ b/lustre/tests/toexcl.c @@ -5,20 +5,73 @@ #include #include #include +#include + +void +usage (char *argv0, int help) +{ + char *progname = strrchr(argv0, '/'); + + if (progname == NULL) + progname = argv0; + + fprintf (help ? stdout : stderr, + "Usage: %s [-e] file\n", progname); + + if (!help) + { + fprintf (stderr, " or try '-h' for help\n"); + exit (1); + } + + printf ("Create the given file with O_EXCL...\n"); + printf (" -e expect EEXIST\n"); + printf (" -h print help"); + printf (" Exit status is 0 on success, 1 on failure\n"); +} int main(int argc, char **argv) { int rc; - - if (argc != 2) { - printf("usage: %s name\n", argv[0]); + int want_eexist = 0; + + while ((rc = getopt (argc, argv, "eh")) != -1) + switch (rc) + { + case 'e': + want_eexist = 1; + break; + case 'h': + usage (argv[1], 1); + return (0); + default: + usage (argv[0], 0); + } + + if (optind != argc - 1) { + usage (argv[0], 0); return 1; } - rc = open(argv[1], O_CREAT|O_EXCL, 0644); + rc = open(argv[optind], O_CREAT|O_EXCL, 0644); if (rc == -1) - printf("open failed: %s\n", strerror(errno)); - else - printf("open success.\n"); - return 0; + { + if (want_eexist && errno == EEXIST) + { + printf("open failed: %s (expected)\n", strerror(errno)); + return (0); + } + printf("open failed: %s\n", strerror(errno)); + return (1); + } else { + if (want_eexist) + { + printf("open success (expecting EEXIST).\n"); + return (1); + } + printf("open success.\n"); + return (0); + } + + return ((rc == 0) ? 0 : 1); } diff --git a/lustre/utils/.cvsignore b/lustre/utils/.cvsignore index 7695706..de7b425 100644 --- a/lustre/utils/.cvsignore +++ b/lustre/utils/.cvsignore @@ -11,3 +11,4 @@ obdctl lctl lfind lstripe +lconf diff --git a/lustre/utils/lconf b/lustre/utils/lconf.in similarity index 96% rename from lustre/utils/lconf rename to lustre/utils/lconf.in index d460503..d7ca788 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf.in @@ -37,6 +37,7 @@ DEFAULT_TCPBUF = 1048576 # Maximum number of devices to search for. # (the /dev/loop* nodes need to be created beforehand) MAX_LOOP_DEVICES = 256 +PORTALS_DIR = '@PORTALSLOC@' first_cleanup_error = 0 def cleanup_error(rc): @@ -470,9 +471,10 @@ def find_prog(cmd): syspath = string.split(os.environ['PATH'], ':') cmdpath = os.path.dirname(sys.argv[0]) syspath.insert(0, cmdpath); - syspath.insert(0, os.path.join(cmdpath, '../../portals/linux/utils/')) + syspath.insert(0, os.path.join(cmdpath, PORTALS_DIR+'/linux/utils/')) for d in syspath: prog = os.path.join(d,cmd) + debug(prog) if os.access(prog, os.X_OK): return prog return '' @@ -489,9 +491,10 @@ def do_find_file(base, mod): if module: return module -def find_module(src_dir, dev_dir, modname): +def find_module(dev_dir, modname): mod = '%s.o' % (modname) - module = src_dir +'/'+ dev_dir +'/'+ mod + + module = dev_dir +'/'+ mod try: if os.access(module, os.R_OK): return module @@ -738,7 +741,7 @@ class Module: continue log ('loading module:', mod) if config.src_dir(): - module = find_module(config.src_dir(),dev_dir, mod) + module = find_module(dev_dir, mod) if not module: panic('module not found:', mod) (rc, out) = run('/sbin/insmod', module) @@ -782,19 +785,19 @@ class Network(Module): panic("unable to set nid for", self.net_type, self.nid) debug("nid:", self.nid) - self.add_module('portals/linux/oslib/', 'portals') + self.add_module(PORTALS_DIR+"/linux/oslib", 'portals') if node_needs_router(): - self.add_module('portals/linux/router', 'kptlrouter') + self.add_module(PORTALS_DIR+"/linux/router", 'kptlrouter') if self.net_type == 'tcp': - self.add_module('portals/linux/socknal', 'ksocknal') + self.add_module(PORTALS_DIR+"/linux/socknal", 'ksocknal') if self.net_type == 'toe': - self.add_module('portals/linux/toenal', 'ktoenal') + self.add_module(PORTALS_DIR+"/linux/toenal", 'ktoenal') if self.net_type == 'elan': - self.add_module('portals/linux/rqswnal', 'kqswnal') + self.add_module(PORTALS_DIR+"/linux/rqswnal", 'kqswnal') if self.net_type == 'gm': - self.add_module('portals/linux/gmnal', 'kgmnal') - self.add_module('lustre/obdclass', 'obdclass') - self.add_module('lustre/ptlrpc', 'ptlrpc') + self.add_module(PORTALS_DIR+"/linux/gmnal", 'kgmnal') + self.add_module(config.src_dir()+'obdclass', 'obdclass') + self.add_module(config.src_dir()+'ptlrpc', 'ptlrpc') def prepare(self): self.info(self.net_type, self.nid, self.port) @@ -868,7 +871,7 @@ class Network(Module): class LDLM(Module): def __init__(self,dom_node): Module.__init__(self, 'LDLM', dom_node) - self.add_module('lustre/ldlm', 'ldlm') + self.add_module(config.src_dir()+'ldlm', 'ldlm') def prepare(self): if is_prepared(self.uuid): return @@ -890,8 +893,8 @@ class LOV(Module): self.pattern = get_attr_int(dev_node, 'pattern', 0) self.devlist = get_all_refs(dev_node, 'osc') self.stripe_cnt = get_attr_int(dev_node, 'stripecount', len(self.devlist)) - self.add_module('lustre/mdc', 'mdc') - self.add_module('lustre/lov', 'lov') + self.add_module(config.src_dir()+'mdc', 'mdc') + self.add_module(config.src_dir()+'lov', 'lov') def prepare(self): if is_prepared(self.uuid): @@ -979,9 +982,9 @@ class MDS(Module): # FIXME: if fstype not set, then determine based on kernel version self.format = get_text(dom_node, 'autoformat', "no") if self.fstype == 'extN': - self.add_module('lustre/extN', 'extN') - self.add_module('lustre/mds', 'mds') - self.add_module('lustre/mds', 'mds_%s' % (self.fstype)) + self.add_module(config.src_dir()+'extN', 'extN') + self.add_module(config.src_dir()+'mds', 'mds') + self.add_module(config.src_dir()+'obdclass', 'fsfilt_%s'%(self.fstype)) def prepare(self): if is_prepared(self.uuid): @@ -1023,7 +1026,7 @@ class MDC(Module): int(random.random() * 1048576)) self.lookup_server(self.mds.uuid) - self.add_module('lustre/mdc', 'mdc') + self.add_module(config.src_dir()+'mdc', 'mdc') def prepare(self): if is_prepared(self.uuid): @@ -1043,8 +1046,9 @@ class OBD(Module): # FIXME: if fstype not set, then determine based on kernel version self.format = get_text(dom_node, 'autoformat', 'yes') if self.fstype == 'extN': - self.add_module('lustre/extN', 'extN') - self.add_module('lustre/' + self.obdtype, self.obdtype) + self.add_module(config.src_dir()+'extN', 'extN') + self.add_module(config.src_dir()+'' + self.obdtype, self.obdtype) + self.add_module(config.src_dir()+'obdclass' , 'fsfilt_%s' % (self.fstype)) # need to check /proc/mounts and /etc/mtab before # formatting anything. @@ -1070,7 +1074,7 @@ class OST(Module): def __init__(self,dom_node): Module.__init__(self, 'OST', dom_node) self.obd_uuid = get_first_ref(dom_node, 'obd') - self.add_module('lustre/ost', 'ost') + self.add_module(config.src_dir()+'ost', 'ost') def prepare(self): if is_prepared(self.uuid): @@ -1104,7 +1108,7 @@ class OSC(Module): self.obd_uuid = get_first_ref(dom_node, 'obd') self.ost_uuid = get_first_ref(dom_node, 'ost') self.lookup_server(self.ost_uuid) - self.add_module('lustre/osc', 'osc') + self.add_module(config.src_dir()+'osc', 'osc') def prepare(self, ignore_connect_failure = 0): if is_prepared(self.uuid): @@ -1182,8 +1186,8 @@ class Mountpoint(Module): self.path = get_text(dom_node, 'path') self.mds_uuid = get_first_ref(dom_node, 'mds') self.lov_uuid = get_first_ref(dom_node, 'osc') - self.add_module('lustre/mdc', 'mdc') - self.add_module('lustre/llite', 'llite') + self.add_module(config.src_dir()+'mdc', 'mdc') + self.add_module(config.src_dir()+'llite', 'llite') l = lookup(self.dom_node.parentNode, self.lov_uuid) self.osc = VOSC(l) @@ -1566,9 +1570,8 @@ def doHost(lustreNode, hosts): dom_node = getByName(lustreNode, h, 'node') if dom_node: break - if not dom_node: - print 'No host entry found.' + print 'lconf: No host entry found in '+sys.argv[1] return if not get_attr(dom_node, 'router'): @@ -1661,9 +1664,12 @@ def fetch(url): return data def setupModulePath(cmd): + global PORTALS_DIR base = os.path.dirname(cmd) if os.access(base+"/Makefile", os.R_OK): - config.src_dir(base + "/../../") + config.src_dir(base + "/../") + if PORTALS_DIR[0] != '/': + PORTALS_DIR= config.src_dir()+PORTALS_DIR def sys_set_debug_path(): debug("debug path: ", config.debug_path()) @@ -1720,6 +1726,8 @@ def sanitise_path(): # def main(): global TCP_ACCEPTOR, lctl, MAXTCPBUF + setupModulePath(sys.argv[0]) + host = socket.gethostname() # the PRNG is normally seeded with time(), which is not so good for starting @@ -1769,7 +1777,6 @@ def main(): lctl = LCTLInterface('lctl') - setupModulePath(sys.argv[0]) sys_make_devices() sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF) sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF) diff --git a/lustre/utils/llanalyze b/lustre/utils/llanalyze index d2c9273..56e58c8 100644 --- a/lustre/utils/llanalyze +++ b/lustre/utils/llanalyze @@ -66,7 +66,16 @@ sub extractpid if ($line =~ m/\(\) ([0-9]*)\+[0-9]*\):/) { return $1; } - if ($line =~ m/\(\) ([0-9]*) | [0-9]*\+[0-9]*\):/) { + if ($line =~ m/\(\) ([0-9]*) \| [0-9]*\+[0-9]*\):/) { + return $1; + } +} + +sub extracthostpid +{ + $line = shift; +# print "$_\n"; + if ($line =~ m/\(\) [0-9]* \| ([0-9]*)\+[0-9]*\):/) { return $1; } } @@ -213,6 +222,7 @@ sub unmatched_intents { while () { $linepid = extractpid($_); + $linehpid = extracthostpid($_); $linemask = getmask($_); $linesubsys = getsubsys($_); @@ -230,7 +240,7 @@ while () { study_lock($_); } - if ( !$pid || $linepid == $pid) { + if ( !$pid || $linepid == $pid || $linehpid == $pid) { next if ($rpctrace && $linemask != $masks->{RPCTRACE}); next if ($trace && $linemask != $masks->{TRACE}); diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index 4a3b336..ba22a9e 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -810,7 +810,8 @@ int jt_obd_setup(int argc, char **argv) /* The ioctl API has been extended to provide the LOV stripe metadata to the * caller when applicable. This utility, however, only saves the LSM for the - * latest CREATE. */ + * latest CREATE. It only saves the LSM when the ioctl indicates that it + * is valid by overloading 'ioc_conn2' as a boolean. */ int jt_obd_create(int argc, char **argv) { struct obd_ioctl_data data; @@ -880,7 +881,7 @@ int jt_obd_create(int argc, char **argv) break; } - lsm_valid = 1; + lsm_valid = data.ioc_conn2; if (be_verbose(verbose, &next_time, i, &next_count, count)) printf("%s: #%d is object id "LPX64"\n", -- 1.8.3.1