From: pschwan Date: Thu, 26 Jun 2003 14:34:02 +0000 (+0000) Subject: merge b_devel into HEAD (20030626 merge tag) for 0.7.1 X-Git-Tag: v1_7_100~3567 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=067c526a4e5592095a3335478ec2580535ed2be4 merge b_devel into HEAD (20030626 merge tag) for 0.7.1 --- diff --git a/lnet/archdep.m4 b/lnet/archdep.m4 index 7cb00cf..7a4e05c 100644 --- a/lnet/archdep.m4 +++ b/lnet/archdep.m4 @@ -106,7 +106,7 @@ case ${host_cpu} in ia64 ) AC_MSG_RESULT($host_cpu) - KCFLAGS='-gstabs -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step' + KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step' KCPPFLAGS='-D__KERNEL__ -DMODULE' MOD_LINK=elf64_ia64 ;; diff --git a/lnet/include/config.h.in b/lnet/include/config.h.in index b05d0c4..3aa6909 100644 --- a/lnet/include/config.h.in +++ b/lnet/include/config.h.in @@ -1,11 +1,58 @@ -/* ../include/config.h.in. Generated automatically from configure.in by autoheader. */ +/* portals/include/config.h.in. Generated from configure.in by autoheader. */ -/* Define if you have the readline library (-lreadline). */ -#undef HAVE_LIBREADLINE +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* IOCTL Buffer Size */ +#undef OBD_MAX_IOCTL_BUFFER /* Name of package */ #undef PACKAGE +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* The size of a `unsigned long long', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_LONG_LONG + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + /* Version number of package */ #undef VERSION - diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 6d7f3f3..ee3b9fc 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -262,28 +262,27 @@ do { \ #define PORTAL_ALLOC(ptr, size) \ do { \ - long s = size; \ LASSERT (!in_interrupt()); \ - if (s > PORTAL_VMALLOC_SIZE) \ - (ptr) = vmalloc(s); \ + if ((size) > PORTAL_VMALLOC_SIZE) \ + (ptr) = vmalloc(size); \ else \ - (ptr) = kmalloc(s, GFP_NOFS); \ + (ptr) = kmalloc((size), GFP_NOFS); \ if ((ptr) == NULL) \ - CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \ - " '" #ptr "' = %ld)\n", __FILE__, __LINE__, s); \ + CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\ + #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ else { \ - portal_kmem_inc((ptr), s); \ - memset((ptr), 0, s); \ + portal_kmem_inc((ptr), (size)); \ + memset((ptr), 0, (size)); \ } \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \ - s, (ptr), atomic_read (&portal_kmemory)); \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), (ptr), atomic_read (&portal_kmemory)); \ } while (0) #define PORTAL_FREE(ptr, size) \ do { \ - long s = (size); \ + int s = (size); \ if ((ptr) == NULL) { \ - CERROR("PORTALS: free NULL '" #ptr "' (%ld bytes) at " \ + CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at " \ "%s:%d\n", s, __FILE__, __LINE__); \ break; \ } \ @@ -292,39 +291,38 @@ do { \ else \ kfree(ptr); \ portal_kmem_dec((ptr), s); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %ld at %p (tot %d).\n", \ - s, (ptr), atomic_read (&portal_kmemory)); \ + CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ + s, (ptr), atomic_read(&portal_kmemory)); \ } while (0) #define PORTAL_SLAB_ALLOC(ptr, slab, size) \ do { \ - long s = (size); \ - LASSERT (!in_interrupt()); \ + LASSERT(!in_interrupt()); \ (ptr) = kmem_cache_alloc((slab), SLAB_KERNEL); \ if ((ptr) == NULL) { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \ " '" #ptr "' from slab '" #slab "')\n", __FILE__, \ __LINE__); \ } else { \ - portal_kmem_inc((ptr), s); \ - memset((ptr), 0, s); \ + portal_kmem_inc((ptr), (size)); \ + memset((ptr), 0, (size)); \ } \ CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \ - s, (ptr), atomic_read (&portal_kmemory)); \ + (int)(size), (ptr), atomic_read(&portal_kmemory)); \ } while (0) #define PORTAL_SLAB_FREE(ptr, slab, size) \ do { \ - long s = (size); \ + int s = (size); \ if ((ptr) == NULL) { \ - CERROR("PORTALS: free NULL '" #ptr "' (%ld bytes) at " \ + CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at " \ "%s:%d\n", s, __FILE__, __LINE__); \ break; \ } \ memset((ptr), 0x5a, s); \ kmem_cache_free((slab), ptr); \ portal_kmem_dec((ptr), s); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %ld at %p (tot %d).\n", \ + CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ s, (ptr), atomic_read (&portal_kmemory)); \ } while (0) diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h index 88ab74f..85e585b 100644 --- a/lnet/klnds/qswlnd/qswlnd.h +++ b/lnet/klnds/qswlnd/qswlnd.h @@ -104,7 +104,7 @@ typedef unsigned long kqsw_csum_t; #define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */ #define KQSW_NTXMSGS 8 /* # normal transmit messages */ -#define KQSW_NNBLK_TXMSGS 128 /* # reserved transmit messages if can't block */ +#define KQSW_NNBLK_TXMSGS 256 /* # reserved transmit messages if can't block */ #define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */ #define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */ diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c index 3b47a25..c03d592 100644 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ b/lnet/klnds/qswlnd/qswlnd_cb.c @@ -617,6 +617,7 @@ kqswnal_sendmsg (nal_cb_t *nal, if (ktx == NULL) { kqswnal_cerror_hdr (hdr); lib_finalize (&kqswnal_lib, private, cookie); + return (-1); } memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */ diff --git a/lustre/configure.in b/lustre/configure.in index 0850115..8e12135 100644 --- a/lustre/configure.in +++ b/lustre/configure.in @@ -15,7 +15,7 @@ AC_ARG_ENABLE(extN, [ --enable-extN use extN instead of ext3 for lustre backend AM_CONDITIONAL(EXTN, test x$enable_extN = xyes) AC_ARG_WITH(obd-buffer-size, [ --with-obd-buffer-size=[size] set lctl ioctl maximum (default=8K)],OBD_BUFFER_SIZE=$with_obd_buffer_size,OBD_BUFFER_SIZE=8192) -AC_SUBST(OBD_BUFFER_SIZE) +AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size]) sinclude(portals/build.m4) sinclude(portals/archdep.m4) diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index 017d5b6..202a761 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -57,6 +57,14 @@ typedef unsigned short umode_t; #endif +/* This is because lprocfs_status.h gets included here indirectly. It would + * be much better to just avoid lprocfs being included into liblustre entirely + * but that requires more header surgery than I can handle right now. + */ +#ifndef smp_processor_id +#define smp_processor_id() 0 +#endif + /* always adopt 2.5 definitions */ #define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c) #define LINUX_VERSION_CODE (2*200+5*10+0) diff --git a/lustre/include/linux/lprocfs_status.h b/lustre/include/linux/lprocfs_status.h index 5ce5e98..fb96bde 100644 --- a/lustre/include/linux/lprocfs_status.h +++ b/lustre/include/linux/lprocfs_status.h @@ -25,10 +25,14 @@ #define _LPROCFS_SNMP_H #ifdef __KERNEL__ +#include #include #include +#include #endif +#include + #ifndef LPROCFS #ifdef CONFIG_PROC_FS /* Ensure that /proc is configured */ #define LPROCFS @@ -47,7 +51,7 @@ struct lprocfs_static_vars { struct lprocfs_vars *obd_vars; }; -/* Lprocfs counters are can be configured using the enum bit masks below. +/* An lprocfs counter can be configured using the enum bit masks below. * * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already * protects this counter from concurrent updates. If not specified, @@ -69,98 +73,105 @@ struct lprocfs_static_vars { */ enum { - LPROCFS_CNTR_EXTERNALLOCK = 1, - LPROCFS_CNTR_AVGMINMAX = 2, - LPROCFS_CNTR_STDDEV = 4, + LPROCFS_CNTR_EXTERNALLOCK = 0x0001, + LPROCFS_CNTR_AVGMINMAX = 0x0002, + LPROCFS_CNTR_STDDEV = 0x0004, + + /* counter data type */ + LPROCFS_TYPE_REGS = 0x0100, + LPROCFS_TYPE_BYTES = 0x0200, + LPROCFS_TYPE_PAGES = 0x0400, + LPROCFS_TYPE_CYCLE = 0x0800, +}; + +struct lprocfs_atomic { + atomic_t la_entry; + atomic_t la_exit; }; struct lprocfs_counter { - union { - spinlock_t internal; /* when there is no external lock */ - spinlock_t *external; /* external lock, when available */ - } l; - unsigned int config; - __u64 count; - __u64 sum; - __u64 min; - __u64 max; - __u64 sumsquare; - const char *name; /* must be static */ - const char *units; /* must be static */ + struct lprocfs_atomic lc_cntl; /* may need to move to per set */ + unsigned int lc_config; + __u64 lc_count; + __u64 lc_sum; + __u64 lc_min; + __u64 lc_max; + __u64 lc_sumsquare; + const char *lc_name; /* must be static */ + const char *lc_units; /* must be static */ }; +struct lprocfs_percpu { + struct lprocfs_counter lp_cntr[0]; +}; -struct lprocfs_counters { - unsigned int num; - unsigned int padto8byteboundary; - struct lprocfs_counter cntr[0]; + +struct lprocfs_stats { + unsigned int ls_num; /* # of counters */ + unsigned int ls_percpu_size; + struct lprocfs_percpu *ls_percpu[0]; }; /* class_obd.c */ extern struct proc_dir_entry *proc_lustre_root; + +/* lproc_lov.c */ +extern struct file_operations ll_proc_target_fops; struct obd_device; #ifdef LPROCFS -/* Two optimized LPROCFS counter increment macros are provided: - * LPROCFS_COUNTER_INCR(cntr, value) - use for multi-valued counters - * LPROCFS_COUNTER_INCBY1(cntr) - optimized for by-one counters +/* Two optimized LPROCFS counter increment functions are provided: + * lprocfs_counter_incr(cntr, value) - optimized for by-one counters + * lprocfs_counter_add(cntr) - use for multi-valued counters * Counter data layout allows config flag, counter lock and the * count itself to reside within a single cache line. */ -#define LPROCFS_COUNTER_INCR(cntr, value) \ - do { \ - struct lprocfs_counter *c = (cntr); \ - LASSERT(c != NULL); \ - if (!(c->config & LPROCFS_CNTR_EXTERNALLOCK)) \ - spin_lock(&c->l.internal); \ - c->count++; \ - if (c->config & LPROCFS_CNTR_AVGMINMAX) { \ - __u64 val = (__u64) (value); \ - c->sum += val; \ - if (c->config & LPROCFS_CNTR_STDDEV) \ - c->sumsquare += (val*val); \ - if (val < c->min) c->min = val; \ - if (val > c->max) c->max = val; \ - } \ - if (!(c->config & LPROCFS_CNTR_EXTERNALLOCK)) \ - spin_unlock(&c->l.internal); \ - } while (0) - -#define LPROCFS_COUNTER_INCBY1(cntr) \ - do { \ - struct lprocfs_counter *c = (cntr); \ - LASSERT(c != NULL); \ - if (!(c->config & LPROCFS_CNTR_EXTERNALLOCK)) \ - spin_lock(&c->l.internal); \ - c->count++; \ - if (!(c->config & LPROCFS_CNTR_EXTERNALLOCK)) \ - spin_unlock(&c->l.internal); \ - } while (0) - -#define LPROCFS_COUNTER_INIT(cntr, conf, lck, nam, un) \ - do { \ - struct lprocfs_counter *c = (cntr); \ - LASSERT(c != NULL); \ - memset(c, 0, sizeof(struct lprocfs_counter)); \ - if (conf & LPROCFS_CNTR_EXTERNALLOCK) c->l.external = (lck); \ - else spin_lock_init(&c->l.internal); \ - c->config = conf; \ - c->min = (~(__u64)0); \ - c->name = (nam); \ - c->units = (un); \ - } while (0) - -extern struct lprocfs_counters* lprocfs_alloc_counters(unsigned int num); -extern void lprocfs_free_counters(struct lprocfs_counters* cntrs); -extern int lprocfs_alloc_obd_counters(struct obd_device *obddev, - unsigned int num_private_counters); -extern void lprocfs_free_obd_counters(struct obd_device *obddev); -extern int lprocfs_register_counters(struct proc_dir_entry *root, - const char* name, - struct lprocfs_counters *cntrs); +static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, + long amount) +{ + struct lprocfs_counter *percpu_cntr; + + LASSERT(stats != NULL); + percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]); + atomic_inc(&percpu_cntr->lc_cntl.la_entry); + percpu_cntr->lc_count++; + + if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) { + percpu_cntr->lc_sum += amount; + if (percpu_cntr->lc_config & LPROCFS_CNTR_STDDEV) + percpu_cntr->lc_sumsquare += (__u64)amount * amount; + if (amount < percpu_cntr->lc_min) + percpu_cntr->lc_min = amount; + if (amount > percpu_cntr->lc_max) + percpu_cntr->lc_max = amount; + } + atomic_inc(&percpu_cntr->lc_cntl.la_exit); +} + +static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx) +{ + struct lprocfs_counter *percpu_cntr; + + LASSERT(stats != NULL); + percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]); + atomic_inc(&percpu_cntr->lc_cntl.la_entry); + percpu_cntr->lc_count++; + atomic_inc(&percpu_cntr->lc_cntl.la_exit); +} + +extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num); +extern void lprocfs_free_stats(struct lprocfs_stats *stats); +extern int lprocfs_alloc_obd_stats(struct obd_device *obddev, + unsigned int num_private_stats); +extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index, + unsigned conf, const char *name, + const char *units); +extern void lprocfs_free_obd_stats(struct obd_device *obddev); +extern int lprocfs_register_stats(struct proc_dir_entry *root, const char *name, + struct lprocfs_stats *stats); #define LPROCFS_INIT_MULTI_VARS(array, size) \ void lprocfs_init_multi_vars(unsigned int idx, \ @@ -195,6 +206,9 @@ extern struct proc_dir_entry *lprocfs_register(const char *name, extern void lprocfs_remove(struct proc_dir_entry *root); +extern struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *root, + const char *name); + extern int lprocfs_obd_attach(struct obd_device *dev, struct lprocfs_vars *list); extern int lprocfs_obd_detach(struct obd_device *dev); @@ -248,23 +262,28 @@ int fct_name(char *page, char **start, off_t off, \ #else /* LPROCFS is not defined */ -#define LPROCFS_COUNTER_INCR(cntr, value) -#define LPROCFS_COUNTER_INCBY1(cntr) -#define LPROCFS_COUNTER_INIT(cntr, conf, lock, nam, un) +static inline void lprocfs_counter_add(struct lprocfs_stats *stats, + int index, long amount) { return; } +static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, + int index) { return; } +static inline void lprocfs_counter_init(struct lprocfs_stats *stats, + int index, unsigned conf, + const char *name, const char *units) +{ return; } -static inline struct lprocfs_counters* lprocfs_alloc_counters(unsigned int num) +static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num) { return NULL; } -static inline void lprocfs_free_counters(struct lprocfs_counters* cntrs) +static inline void lprocfs_free_stats(struct lprocfs_stats *stats) { return; } -static inline int lprocfs_register_counters(struct proc_dir_entry *root, - const char* name, - struct lprocfs_counters *cntrs) +static inline int lprocfs_register_stats(struct proc_dir_entry *root, + const char *name, + struct lprocfs_stats *stats) { return 0; } -static inline int lprocfs_alloc_obd_counters(struct obd_device *obddev, - unsigned int num_private_counters) +static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev, + unsigned int num_private_stats) { return 0; } -static inline void lprocfs_free_obd_counters(struct obd_device *obddev) +static inline void lprocfs_free_obd_stats(struct obd_device *obddev) { return; } static inline struct proc_dir_entry * @@ -279,6 +298,8 @@ static inline int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *var, void *data) { return 0; } static inline void lprocfs_remove(struct proc_dir_entry *root) {}; +static inline struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head, + const char *name) {return 0;} struct obd_device; static inline int lprocfs_obd_attach(struct obd_device *dev, struct lprocfs_vars *list) { return 0; } diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h index c2a54b9..2db4196 100644 --- a/lustre/include/linux/lustre_dlm.h +++ b/lustre/include/linux/lustre_dlm.h @@ -63,6 +63,7 @@ typedef enum { #define LDLM_FL_CANCELING (1 << 13) /* lock cancel has already been sent */ #define LDLM_FL_LOCAL (1 << 14) // a local lock (ie, no srv/cli split) #define LDLM_FL_WARN (1 << 15) /* see ldlm_cli_cancel_unused */ +#define LDLM_FL_MATCH_DATA (1 << 16) /* see ldlm_lock_match */ /* The blocking callback is overloaded to perform two functions. These flags * indicate which operation should be performed. */ @@ -371,7 +372,7 @@ void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode); void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen); int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *, __u32 type, void *cookie, int cookielen, ldlm_mode_t mode, - struct lustre_handle *); + void *data, struct lustre_handle *); struct ldlm_lock * ldlm_lock_create(struct ldlm_namespace *ns, struct lustre_handle *parent_lock_handle, struct ldlm_res_id, @@ -440,19 +441,6 @@ int ldlm_cli_enqueue(struct lustre_handle *conn, ldlm_blocking_callback callback, void *data, struct lustre_handle *lockh); -int ldlm_match_or_enqueue(struct lustre_handle *connh, - struct ptlrpc_request *req, - struct ldlm_namespace *ns, - struct lustre_handle *parent_lock_handle, - struct ldlm_res_id, - __u32 type, - void *cookie, int cookielen, - ldlm_mode_t mode, - int *flags, - ldlm_completion_callback completion, - ldlm_blocking_callback callback, - void *data, - struct lustre_handle *lockh); int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new, void *data, __u32 data_len); int ldlm_cli_convert(struct lustre_handle *, int new_mode, int *flags); diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index f736d4b..fc00fe1 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -46,7 +46,7 @@ struct fsfilt_operations { int niocount, struct niobuf_remote *nb); int (* fs_commit)(struct inode *inode, void *handle,int force_sync); int (* fs_setattr)(struct dentry *dentry, void *handle, - struct iattr *iattr); + struct iattr *iattr, int do_trunc); int (* fs_set_md)(struct inode *inode, void *handle, void *md, int size); int (* fs_get_md)(struct inode *inode, void *md, int size); @@ -79,8 +79,11 @@ extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops); static inline void *fsfilt_start(struct obd_device *obd, struct inode *inode, int op) { + unsigned long now = jiffies; void *handle = obd->obd_fsops->fs_start(inode, op); - CDEBUG(D_HA, "starting handle %p\n", handle); + CDEBUG(D_HA, "started handle %p\n", handle); + if (time_after(jiffies, now + 15*HZ)) + CERROR("long journal start time %lus\n", (jiffies - now) / HZ); return handle; } @@ -88,32 +91,33 @@ static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount, struct fsfilt_objinfo *fso, int niocount, struct niobuf_remote *nb) { + unsigned long now = jiffies; void *handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount,nb); - CDEBUG(D_HA, "starting handle %p\n", handle); + CDEBUG(D_HA, "started handle %p\n", handle); + if (time_after(jiffies, now + 15*HZ)) + CERROR("long journal start time %lus\n", (jiffies - now) / HZ); return handle; } static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode, void *handle, int force_sync) { + unsigned long now = jiffies; + int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync); CDEBUG(D_HA, "committing handle %p\n", handle); - return obd->obd_fsops->fs_commit(inode, handle, force_sync); + if (time_after(jiffies, now + 15*HZ)) + CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + return rc; } static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, - void *handle, struct iattr *iattr) + void *handle, struct iattr *iattr,int do_trunc) { + unsigned long now = jiffies; int rc; - /* - * NOTE: we probably don't need to take i_sem here when changing - * ATTR_SIZE because the MDS never needs to truncate a file. - * The ext2/ext3 code never truncates a directory, and files - * stored on the MDS are entirely sparse (no data blocks). - * If we do need to get it, we can do it here. - */ - lock_kernel(); - rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr); - unlock_kernel(); + rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc); + if (time_after(jiffies, now + 15*HZ)) + CERROR("long setattr time %lus\n", (jiffies - now) / HZ); return rc; } diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index b3acada..b49fd16 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -216,27 +216,29 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags) */ /* opcodes */ -#define OST_REPLY 0 /* reply ? */ -#define OST_GETATTR 1 -#define OST_SETATTR 2 -#define OST_READ 3 -#define OST_WRITE 4 -#define OST_CREATE 5 -#define OST_DESTROY 6 -#define OST_GET_INFO 7 -#define OST_CONNECT 8 -#define OST_DISCONNECT 9 -#define OST_PUNCH 10 -#define OST_OPEN 11 -#define OST_CLOSE 12 -#define OST_STATFS 13 -#define OST_SAN_READ 14 -#define OST_SAN_WRITE 15 -#define OST_SYNCFS 16 +typedef enum { + OST_REPLY = 0, /* reply ? */ + OST_GETATTR = 1, + OST_SETATTR = 2, + OST_READ = 3, + OST_WRITE = 4, + OST_CREATE = 5, + OST_DESTROY = 6, + OST_GET_INFO = 7, + OST_CONNECT = 8, + OST_DISCONNECT = 9, + OST_PUNCH = 10, + OST_OPEN = 11, + OST_CLOSE = 12, + OST_STATFS = 13, + OST_SAN_READ = 14, + OST_SAN_WRITE = 15, + OST_SYNCFS = 16, + OST_LAST_OPC +} ost_cmd_t; +#define OST_FIRST_OPC OST_REPLY /* When adding OST RPC opcodes, please update * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */ -#define OST_LAST_OPC (OST_SYNCFS+1) -#define OST_FIRST_OPC OST_REPLY typedef uint64_t obd_id; @@ -385,20 +387,23 @@ extern void lustre_swab_ost_body (struct ost_body *b); */ /* opcodes */ -#define MDS_GETATTR 33 -#define MDS_GETATTR_NAME 34 -#define MDS_CLOSE 35 -#define MDS_REINT 36 -#define MDS_READPAGE 37 -#define MDS_CONNECT 38 -#define MDS_DISCONNECT 39 -#define MDS_GETSTATUS 40 -#define MDS_STATFS 41 -#define MDS_GETLOVINFO 42 +typedef enum { + MDS_GETATTR = 33, + MDS_GETATTR_NAME = 34, + MDS_CLOSE = 35, + MDS_REINT = 36, + MDS_READPAGE = 37, + MDS_CONNECT = 38, + MDS_DISCONNECT = 39, + MDS_GETSTATUS = 40, + MDS_STATFS = 41, + MDS_GETLOVINFO = 42, + MDS_LAST_OPC +} mds_cmd_t; +#define MDS_FIRST_OPC MDS_GETATTR /* When adding MDS RPC opcodes, please update * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */ -#define MDS_LAST_OPC (MDS_GETLOVINFO+1) -#define MDS_FIRST_OPC MDS_GETATTR + /* * Do not exceed 63 */ @@ -586,15 +591,17 @@ extern void lustre_swab_lov_desc (struct lov_desc *ld); * LDLM requests: */ /* opcodes -- MUST be distinct from OST/MDS opcodes */ -#define LDLM_ENQUEUE 101 -#define LDLM_CONVERT 102 -#define LDLM_CANCEL 103 -#define LDLM_BL_CALLBACK 104 -#define LDLM_CP_CALLBACK 105 +typedef enum { + LDLM_ENQUEUE = 101, + LDLM_CONVERT = 102, + LDLM_CANCEL = 103, + LDLM_BL_CALLBACK = 104, + LDLM_CP_CALLBACK = 105, + LDLM_LAST_OPC +} ldlm_cmd_t; +#define LDLM_FIRST_OPC LDLM_ENQUEUE /* When adding LDLM RPC opcodes, please update * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */ -#define LDLM_LAST_OPC (LDLM_CP_CALLBACK+1) -#define LDLM_FIRST_OPC LDLM_ENQUEUE #define RES_NAME_SIZE 3 #define RES_VERSION_SIZE 4 @@ -679,11 +686,11 @@ typedef enum { PTLBD_FLUSH = 203, PTLBD_CONNECT = 204, PTLBD_DISCONNECT = 205, + PTLBD_LAST_OPC } ptlbd_cmd_t; +#define PTLBD_FIRST_OPC PTLBD_QUERY /* When adding PTLBD RPC opcodes, please update * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */ -#define PTLBD_LAST_OPC (PTLBD_FLUSH+1) -#define PTLBD_FIRST_OPC PTLBD_QUERY struct ptlbd_op { __u16 op_cmd; diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index c43cf95..57e9620 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -25,6 +25,8 @@ #ifndef _LUSTRE_LIB_H #define _LUSTRE_LIB_H +#include + #ifndef __KERNEL__ # include # include @@ -195,7 +197,7 @@ static inline int ll_fid2str(char *str, __u64 id, __u32 generation) /* * OBD IOCTLS */ -#define OBD_IOCTL_VERSION 0x00010002 +#define OBD_IOCTL_VERSION 0x00010003 struct obd_ioctl_data { uint32_t ioc_len; @@ -222,13 +224,15 @@ struct obd_ioctl_data { uint32_t ioc_plen2; char *ioc_pbuf2; - /* two inline buffers */ + /* inline buffers for various arguments */ uint32_t ioc_inllen1; char *ioc_inlbuf1; uint32_t ioc_inllen2; char *ioc_inlbuf2; uint32_t ioc_inllen3; char *ioc_inlbuf3; + uint32_t ioc_inllen4; + char *ioc_inlbuf4; char ioc_bulk[0]; }; @@ -244,6 +248,7 @@ static inline int obd_ioctl_packlen(struct obd_ioctl_data *data) len += size_round(data->ioc_inllen1); len += size_round(data->ioc_inllen2); len += size_round(data->ioc_inllen3); + len += size_round(data->ioc_inllen4); return len; } @@ -262,11 +267,14 @@ static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data) printk("OBD ioctl: ioc_inllen2 larger than 1<<30\n"); return 1; } - if (data->ioc_inllen3 > (1<<30)) { printk("OBD ioctl: ioc_inllen3 larger than 1<<30\n"); return 1; } + if (data->ioc_inllen4 > (1<<30)) { + printk("OBD ioctl: ioc_inllen4 larger than 1<<30\n"); + return 1; + } if (data->ioc_inlbuf1 && !data->ioc_inllen1) { printk("OBD ioctl: inlbuf1 pointer but 0 length\n"); return 1; @@ -279,6 +287,10 @@ static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data) printk("OBD ioctl: inlbuf3 pointer but 0 length\n"); return 1; } + if (data->ioc_inlbuf4 && !data->ioc_inllen4) { + printk("OBD ioctl: inlbuf4 pointer but 0 length\n"); + return 1; + } if (data->ioc_pbuf1 && !data->ioc_plen1) { printk("OBD ioctl: pbuf1 pointer but 0 length\n"); return 1; @@ -287,20 +299,6 @@ static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data) printk("OBD ioctl: pbuf2 pointer but 0 length\n"); return 1; } - /* - if (data->ioc_inllen1 && !data->ioc_inlbuf1) { - printk("OBD ioctl: inllen1 set but NULL pointer\n"); - return 1; - } - if (data->ioc_inllen2 && !data->ioc_inlbuf2) { - printk("OBD ioctl: inllen2 set but NULL pointer\n"); - return 1; - } - if (data->ioc_inllen3 && !data->ioc_inlbuf3) { - printk("OBD ioctl: inllen3 set but NULL pointer\n"); - return 1; - } - */ if (data->ioc_plen1 && !data->ioc_pbuf1) { printk("OBD ioctl: plen1 set but NULL pointer\n"); return 1; @@ -314,24 +312,6 @@ static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data) obd_ioctl_packlen(data), data->ioc_len); return 1; } -#if 0 - if (data->ioc_inllen1 && - data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { - printk("OBD ioctl: inlbuf1 not 0 terminated\n"); - return 1; - } - if (data->ioc_inllen2 && - data->ioc_bulk[size_round(data->ioc_inllen1) + data->ioc_inllen2 - 1] != '\0') { - printk("OBD ioctl: inlbuf2 not 0 terminated\n"); - return 1; - } - if (data->ioc_inllen3 && - data->ioc_bulk[size_round(data->ioc_inllen1) + size_round(data->ioc_inllen2) - + data->ioc_inllen3 - 1] != '\0') { - printk("OBD ioctl: inlbuf3 not 0 terminated\n"); - return 1; - } -#endif return 0; } @@ -361,6 +341,8 @@ static inline int obd_ioctl_pack(struct obd_ioctl_data *data, char **pbuf, LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); if (data->ioc_inlbuf3) LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr); + if (data->ioc_inlbuf4) + LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr); if (obd_ioctl_is_invalid(overlay)) return 1; @@ -381,6 +363,7 @@ static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, overlay->ioc_inlbuf1 = data->ioc_inlbuf1; overlay->ioc_inlbuf2 = data->ioc_inlbuf2; overlay->ioc_inlbuf3 = data->ioc_inlbuf3; + overlay->ioc_inlbuf4 = data->ioc_inlbuf4; memcpy(data, pbuf, sizeof(*data)); @@ -391,6 +374,8 @@ static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr); if (data->ioc_inlbuf3) LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr); + if (data->ioc_inlbuf4) + LOGU(data->ioc_inlbuf4, data->ioc_inllen4, ptr); return 0; } @@ -398,8 +383,6 @@ static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, #include -#define OBD_MAX_IOCTL_BUFFER 8192 - /* buffer MUST be at least the size of obd_ioctl_hdr */ static inline int obd_ioctl_getdata(char **buf, int *len, void *arg) { @@ -467,6 +450,13 @@ static inline int obd_ioctl_getdata(char **buf, int *len, void *arg) size_round(data->ioc_inllen2); } + if (data->ioc_inllen4) { + data->ioc_inlbuf4 = &data->ioc_bulk[0] + + size_round(data->ioc_inllen1) + + size_round(data->ioc_inllen2) + + size_round(data->ioc_inllen3) ; + } + EXIT; return 0; } diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index 35d4994..17ea754 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -98,32 +98,6 @@ struct ll_read_extent { int ll_check_dirty( struct super_block *sb ); int ll_batch_writepage( struct inode *inode, struct page *page ); -struct file_io_stats { - spinlock_t fis_lock; - __u64 fis_dirty_pages; - __u64 fis_dirty_hits; - __u64 fis_dirty_misses; - __u64 fis_forced_pages; - __u64 fis_writepage_pages; - __u64 fis_wb_ok; - __u64 fis_wb_fail; - __u64 fis_wb_from_writepage; - __u64 fis_wb_from_pressure; -}; - -#define IO_STAT_ADD(FIS, STAT, VAL) do { \ - struct file_io_stats *_fis_ = (FIS); \ - spin_lock(&_fis_->fis_lock); \ - _fis_->fis_##STAT += VAL; \ - spin_unlock(&_fis_->fis_lock); \ -} while (0) - -#define INODE_IO_STAT_ADD(INODE, STAT, VAL) \ - IO_STAT_ADD(&ll_i2sbi(INODE)->ll_iostats, STAT, VAL) - -#define PAGE_IO_STAT_ADD(PAGE, STAT, VAL) \ - INODE_IO_STAT_ADD((PAGE)->mapping, STAT, VAL) - /* interpet return codes from intent lookup */ #define LL_LOOKUP_POSITIVE 1 #define LL_LOOKUP_NEGATIVE 2 @@ -155,7 +129,7 @@ struct ll_sb_info { struct list_head ll_orphan_dentry_list; /*please don't ask -p*/ - struct file_io_stats ll_iostats; + struct lprocfs_stats *ll_stats; /* lprocfs stats counter */ }; static inline struct ll_sb_info *ll_s2sbi(struct super_block *sb) @@ -306,6 +280,40 @@ do { \ #define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0") +enum { + LPROC_LL_DIRTY_PAGES = 0, + LPROC_LL_DIRTY_HITS, + LPROC_LL_DIRTY_MISSES, + LPROC_LL_WB_WRITEPAGE, + LPROC_LL_WB_PRESSURE, + LPROC_LL_WB_OK, + LPROC_LL_WB_FAIL, + LPROC_LL_READ_BYTES, + LPROC_LL_WRITE_BYTES, + LPROC_LL_BRW_READ, + LPROC_LL_BRW_WRITE, + LPROC_LL_IOCTL, + LPROC_LL_OPEN, + LPROC_LL_RELEASE, + LPROC_LL_MAP, + LPROC_LL_LLSEEK, + LPROC_LL_FSYNC, + LPROC_LL_SETATTR_RAW, + LPROC_LL_SETATTR, + LPROC_LL_TRUNC, + +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) + LPROC_LL_GETATTR, +#else + LPROC_LL_REVALIDATE, +#endif + LPROC_LL_STAFS, + LPROC_LL_ALLOC_INODE, + + LPROC_LL_DIRECT_READ, + LPROC_LL_DIRECT_WRITE, + LPROC_LL_FILE_OPCODES +}; /* dcache.c */ int ll_have_md_lock(struct dentry *de); @@ -318,8 +326,8 @@ extern struct file_operations ll_file_operations; extern struct inode_operations ll_file_inode_operations; extern struct inode_operations ll_special_inode_operations; struct ldlm_lock; -int ll_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *, void *data, - int flag); +int ll_extent_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *, + void *data, int flag); int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode, struct lov_stripe_md *lsm, int mode, struct ldlm_extent *extent, struct lustre_handle *lockh); diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index ed5db88..f71802c 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -412,8 +412,8 @@ struct ptlrpc_service { struct list_head srv_threads; int (*srv_handler)(struct ptlrpc_request *req); char *srv_name; /* only statically allocated strings here; we don't clean them */ - struct proc_dir_entry *svc_procroot; - struct lprocfs_counters *svc_counters; + struct proc_dir_entry *svc_procroot; + struct lprocfs_stats *svc_stats; int srv_interface_rover; struct ptlrpc_srv_ni srv_interfaces[0]; diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index fe53974..2fb2c5c 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -74,25 +74,26 @@ struct brw_page { struct ost_server_data; struct filter_obd { - char *fo_fstype; - struct super_block *fo_sb; - struct vfsmount *fo_vfsmnt; - struct obd_run_ctxt fo_ctxt; - struct dentry *fo_dentry_O; - struct dentry *fo_dentry_O_mode[16]; - struct dentry **fo_dentry_O_sub; - spinlock_t fo_objidlock; /* protects fo_lastobjid increment */ - spinlock_t fo_translock; /* protects fsd_last_rcvd increment */ - struct file *fo_rcvd_filp; + const char *fo_fstype; + char *fo_nspath; + struct super_block *fo_sb; + struct vfsmount *fo_vfsmnt; + struct obd_run_ctxt fo_ctxt; + struct dentry *fo_dentry_O; + struct dentry *fo_dentry_O_mode[16]; + struct dentry **fo_dentry_O_sub; + spinlock_t fo_objidlock; /* protect fo_lastobjid increment */ + spinlock_t fo_translock; /* protect fsd_last_rcvd increment */ + struct file *fo_rcvd_filp; struct filter_server_data *fo_fsd; - unsigned long *fo_last_rcvd_slots; + unsigned long *fo_last_rcvd_slots; struct file_operations *fo_fop; struct inode_operations *fo_iop; struct address_space_operations *fo_aops; - struct list_head fo_export_list; - spinlock_t fo_fddlock; /* protects setting dentry->d_fsdata */ - int fo_subdir_count; + struct list_head fo_export_list; + spinlock_t fo_fddlock; /* protect setting dentry->d_fsdata */ + int fo_subdir_count; }; struct mds_server_data; @@ -223,6 +224,7 @@ struct niobuf_local { __u32 rc; struct page *page; struct dentry *dentry; + unsigned long start; }; /* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */ @@ -287,8 +289,8 @@ struct obd_device { struct ptlbd_obd ptlbd; } u; /* Fields used by LProcFS */ - unsigned int cntr_base; - void *counters; + unsigned int obd_cntr_base; + struct lprocfs_stats *obd_stats; }; struct obd_ops { @@ -364,11 +366,11 @@ struct obd_ops { int (*o_enqueue)(struct lustre_handle *conn, struct lov_stripe_md *md, struct lustre_handle *parent_lock, __u32 type, void *cookie, int cookielen, __u32 mode, - int *flags, void *cb, void *data, int datalen, + int *flags, void *cb, void *data, struct lustre_handle *lockh); int (*o_match)(struct lustre_handle *conn, struct lov_stripe_md *md, __u32 type, void *cookie, int cookielen, __u32 mode, - int *flags, struct lustre_handle *lockh); + int *flags, void *data, struct lustre_handle *lockh); int (*o_cancel)(struct lustre_handle *, struct lov_stripe_md *md, __u32 mode, struct lustre_handle *); int (*o_cancel_unused)(struct lustre_handle *, struct lov_stripe_md *, diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 64b0a68..e93032a 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -210,17 +210,17 @@ do { \ offsetof(struct obd_ops, o_iocontrol)) \ / sizeof(((struct obd_ops *)(0))->o_iocontrol)) -#define OBD_COUNTER_INCREMENT(obd, op) \ - if ((obd)->counters != NULL) { \ - struct lprocfs_counters* cntrs = obd->counters; \ - unsigned int coffset; \ - coffset = (obd)->cntr_base + OBD_COUNTER_OFFSET(op); \ - LASSERT(coffset < cntrs->num); \ - LPROCFS_COUNTER_INCBY1(&cntrs->cntr[coffset]); \ +#define OBD_COUNTER_INCREMENT(obd, op) \ + if ((obd)->obd_stats != NULL) { \ + unsigned int coffset; \ + coffset = (unsigned int)(obd)->obd_cntr_base + \ + OBD_COUNTER_OFFSET(op); \ + LASSERT(coffset < obd->obd_stats->ls_num); \ + lprocfs_counter_incr(obd->obd_stats, coffset); \ } #else -#define OBD_COUNTER_OFFSET(op) -#define OBD_COUNTER_INCREMENT(obd, op) +#define OBD_COUNTER_OFFSET(op) +#define OBD_COUNTER_INCREMENT(obd, op) #endif #define OBD_CHECK_OP(obd, op) \ @@ -230,7 +230,6 @@ do { \ obd->obd_minor); \ RETURN(-EOPNOTSUPP); \ } \ - OBD_COUNTER_INCREMENT(obd, op); \ } while (0) static inline int obd_get_info(struct lustre_handle *conn, __u32 keylen, @@ -242,6 +241,7 @@ static inline int obd_get_info(struct lustre_handle *conn, __u32 keylen, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, get_info); + OBD_COUNTER_INCREMENT(exp->exp_obd, get_info); rc = OBP(exp->exp_obd, get_info)(conn, keylen, key, vallen, val); class_export_put(exp); @@ -257,6 +257,7 @@ static inline int obd_set_info(struct lustre_handle *conn, obd_count keylen, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, set_info); + OBD_COUNTER_INCREMENT(exp->exp_obd, set_info); rc = OBP(exp->exp_obd, set_info)(conn, keylen, key, vallen, val); class_export_put(exp); @@ -269,6 +270,7 @@ static inline int obd_setup(struct obd_device *obd, int datalen, void *data) ENTRY; OBD_CHECK_OP(obd, setup); + OBD_COUNTER_INCREMENT(obd, setup); rc = OBP(obd, setup)(obd, datalen, data); RETURN(rc); @@ -281,6 +283,7 @@ static inline int obd_cleanup(struct obd_device *obd, int force, int failover) OBD_CHECK_DEV_STOPPING(obd); OBD_CHECK_OP(obd, cleanup); + OBD_COUNTER_INCREMENT(obd, cleanup); rc = OBP(obd, cleanup)(obd, force, failover); RETURN(rc); @@ -303,6 +306,7 @@ static inline int obd_packmd(struct lustre_handle *conn, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, packmd); + OBD_COUNTER_INCREMENT(exp->exp_obd, packmd); rc = OBP(exp->exp_obd, packmd)(conn, disk_tgt, mem_src); class_export_put(exp); @@ -350,6 +354,7 @@ static inline int obd_unpackmd(struct lustre_handle *conn, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, unpackmd); + OBD_COUNTER_INCREMENT(exp->exp_obd, unpackmd); rc = OBP(exp->exp_obd, unpackmd)(conn, mem_tgt, disk_src, disk_len); class_export_put(exp); @@ -390,6 +395,7 @@ static inline int obd_create(struct lustre_handle *conn, struct obdo *obdo, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, create); + OBD_COUNTER_INCREMENT(exp->exp_obd, create); rc = OBP(exp->exp_obd, create)(conn, obdo, ea, oti); class_export_put(exp); @@ -406,6 +412,7 @@ static inline int obd_destroy(struct lustre_handle *conn, struct obdo *obdo, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, destroy); + OBD_COUNTER_INCREMENT(exp->exp_obd, destroy); rc = OBP(exp->exp_obd, destroy)(conn, obdo, ea, oti); class_export_put(exp); @@ -421,14 +428,15 @@ static inline int obd_getattr(struct lustre_handle *conn, struct obdo *obdo, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, getattr); + OBD_COUNTER_INCREMENT(exp->exp_obd, getattr); rc = OBP(exp->exp_obd, getattr)(conn, obdo, ea); class_export_put(exp); RETURN(rc); } -static inline int obd_getattr_async(struct lustre_handle *conn, struct obdo *obdo, - struct lov_stripe_md *ea, +static inline int obd_getattr_async(struct lustre_handle *conn, + struct obdo *obdo, struct lov_stripe_md *ea, struct ptlrpc_request_set *set) { struct obd_export *exp; @@ -437,6 +445,7 @@ static inline int obd_getattr_async(struct lustre_handle *conn, struct obdo *obd OBD_CHECK_SETUP(conn, exp); OBD_CHECK_OP(exp->exp_obd, getattr); + OBD_COUNTER_INCREMENT(exp->exp_obd, getattr); rc = OBP(exp->exp_obd, getattr_async)(conn, obdo, ea, set); class_export_put(exp); @@ -453,6 +462,7 @@ static inline int obd_close(struct lustre_handle *conn, struct obdo *obdo, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, close); + OBD_COUNTER_INCREMENT(exp->exp_obd, close); rc = OBP(exp->exp_obd, close)(conn, obdo, ea, oti); class_export_put(exp); @@ -469,6 +479,7 @@ static inline int obd_open(struct lustre_handle *conn, struct obdo *obdo, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, open); + OBD_COUNTER_INCREMENT(exp->exp_obd, open); rc = OBP(exp->exp_obd, open)(conn, obdo, ea, oti, och); class_export_put(exp); @@ -485,6 +496,7 @@ static inline int obd_setattr(struct lustre_handle *conn, struct obdo *obdo, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, setattr); + OBD_COUNTER_INCREMENT(exp->exp_obd, setattr); rc = OBP(exp->exp_obd, setattr)(conn, obdo, ea, oti); class_export_put(exp); @@ -499,6 +511,7 @@ static inline int obd_connect(struct lustre_handle *conn, OBD_CHECK_DEV_ACTIVE(obd); OBD_CHECK_OP(obd, connect); + OBD_COUNTER_INCREMENT(obd, connect); rc = OBP(obd, connect)(conn, obd, cluuid); RETURN(rc); @@ -512,6 +525,7 @@ static inline int obd_disconnect(struct lustre_handle *conn, int failover) OBD_CHECK_SETUP(conn, exp); OBD_CHECK_OP(exp->exp_obd, disconnect); + OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect); rc = OBP(exp->exp_obd, disconnect)(conn, failover); class_export_put(exp); @@ -534,6 +548,7 @@ static inline int obd_statfs(struct lustre_handle *conn,struct obd_statfs *osfs) OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, statfs); + OBD_COUNTER_INCREMENT(exp->exp_obd, statfs); rc = OBP(exp->exp_obd, statfs)(conn, osfs); class_export_put(exp); @@ -546,6 +561,7 @@ static inline int obd_syncfs(struct obd_export *exp) ENTRY; OBD_CHECK_OP(exp->exp_obd, syncfs); + OBD_COUNTER_INCREMENT(exp->exp_obd, syncfs); rc = OBP(exp->exp_obd, syncfs)(exp); RETURN(rc); @@ -561,6 +577,7 @@ static inline int obd_punch(struct lustre_handle *conn, struct obdo *oa, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, punch); + OBD_COUNTER_INCREMENT(exp->exp_obd, punch); rc = OBP(exp->exp_obd, punch)(conn, oa, ea, start, end, oti); class_export_put(exp); @@ -577,6 +594,7 @@ static inline int obd_brw(int cmd, struct lustre_handle *conn, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, brw); + OBD_COUNTER_INCREMENT(exp->exp_obd, brw); if (!(cmd & (OBD_BRW_RWMASK | OBD_BRW_CHECK))) { CERROR("obd_brw: cmd must be OBD_BRW_READ, OBD_BRW_WRITE, " @@ -601,6 +619,7 @@ static inline int obd_brw_async(int cmd, struct lustre_handle *conn, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, brw_async); + OBD_COUNTER_INCREMENT(exp->exp_obd, brw_async); if (!(cmd & OBD_BRW_RWMASK)) { CERROR("obd_brw: cmd must be OBD_BRW_READ or OBD_BRW_WRITE\n"); @@ -622,6 +641,7 @@ static inline int obd_preprw(int cmd, struct obd_export *exp, ENTRY; OBD_CHECK_OP(exp->exp_obd, preprw); + OBD_COUNTER_INCREMENT(exp->exp_obd, preprw); rc = OBP(exp->exp_obd, preprw)(cmd, exp, objcount, obj, niocount, remote, local, desc_private, oti); @@ -637,6 +657,7 @@ static inline int obd_commitrw(int cmd, struct obd_export *exp, ENTRY; OBD_CHECK_OP(exp->exp_obd, commitrw); + OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw); rc = OBP(exp->exp_obd, commitrw)(cmd, exp, objcount, obj, niocount, local, desc_private, oti); @@ -652,6 +673,7 @@ static inline int obd_iocontrol(unsigned int cmd, struct lustre_handle *conn, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, iocontrol); + OBD_COUNTER_INCREMENT(exp->exp_obd, iocontrol); rc = OBP(exp->exp_obd, iocontrol)(cmd, conn, len, karg, uarg); class_export_put(exp); @@ -663,7 +685,7 @@ static inline int obd_enqueue(struct lustre_handle *conn, struct lustre_handle *parent_lock, __u32 type, void *cookie, int cookielen, __u32 mode, int *flags, void *cb, void *data, - int datalen, struct lustre_handle *lockh) + struct lustre_handle *lockh) { struct obd_export *exp; int rc; @@ -671,19 +693,19 @@ static inline int obd_enqueue(struct lustre_handle *conn, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, enqueue); + OBD_COUNTER_INCREMENT(exp->exp_obd, enqueue); rc = OBP(exp->exp_obd, enqueue)(conn, ea, parent_lock, type, cookie, cookielen, mode, flags, cb, - data, datalen, lockh); + data, lockh); class_export_put(exp); RETURN(rc); } static inline int obd_match(struct lustre_handle *conn, - struct lov_stripe_md *ea, - __u32 type, void *cookie, int cookielen, - __u32 mode, int *flags, - struct lustre_handle *lockh) + struct lov_stripe_md *ea, __u32 type, void *cookie, + int cookielen, __u32 mode, int *flags, void *data, + struct lustre_handle *lockh) { struct obd_export *exp; int rc; @@ -691,9 +713,10 @@ static inline int obd_match(struct lustre_handle *conn, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, match); + OBD_COUNTER_INCREMENT(exp->exp_obd, match); rc = OBP(exp->exp_obd, match)(conn, ea, type, cookie, cookielen, mode, - flags, lockh); + flags, data, lockh); class_export_put(exp); RETURN(rc); } @@ -709,6 +732,7 @@ static inline int obd_cancel(struct lustre_handle *conn, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, cancel); + OBD_COUNTER_INCREMENT(exp->exp_obd, cancel); rc = OBP(exp->exp_obd, cancel)(conn, ea, mode, lockh); class_export_put(exp); @@ -725,6 +749,7 @@ static inline int obd_cancel_unused(struct lustre_handle *conn, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, cancel_unused); + OBD_COUNTER_INCREMENT(exp->exp_obd, cancel_unused); rc = OBP(exp->exp_obd, cancel_unused)(conn, ea, flags, opaque); class_export_put(exp); @@ -740,6 +765,7 @@ static inline int obd_san_preprw(int cmd, struct lustre_handle *conn, OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, preprw); + OBD_COUNTER_INCREMENT(exp->exp_obd, preprw); rc = OBP(exp->exp_obd, san_preprw)(cmd, conn, objcount, obj, niocount, remote); diff --git a/lustre/include/linux/obd_filter.h b/lustre/include/linux/obd_filter.h index 74bb784..3d8188a 100644 --- a/lustre/include/linux/obd_filter.h +++ b/lustre/include/linux/obd_filter.h @@ -91,11 +91,13 @@ struct filter_file_data { }; struct filter_dentry_data { - obd_id fdd_objid; - atomic_t fdd_open_count; - int fdd_flags; + obd_id fdd_objid; + __u32 fdd_magic; + atomic_t fdd_open_count; + int fdd_flags; }; +#define FILTER_DENTRY_MAGIC 0x9efba101 #define FILTER_FLAG_DESTROY 0x0001 /* destroy dentry on last file close */ diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 69a47dc..aecef05 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -96,6 +96,7 @@ extern unsigned long obd_sync_filter; #define OBD_FAIL_OST_ALL_REPLY_NET 0x211 #define OBD_FAIL_OST_ALL_REQUESTS_NET 0x212 #define OBD_FAIL_OST_LDLM_REPLY_NET 0x213 +#define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 @@ -146,6 +147,19 @@ do { \ } \ } while(0) +#define OBD_FAIL_TIMEOUT(id, secs) \ +do { \ + if (OBD_FAIL_CHECK_ONCE(id)) { \ + CERROR("obd_fail_timeout id %x sleeping for %ld secs\n", \ + (id), (secs)); \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + schedule_timeout((secs) * HZ); \ + set_current_state(TASK_RUNNING); \ + CERROR("obd_fail_timeout id %x awake\n", \ + (id)); \ + } \ +} while(0) + #define fixme() CDEBUG(D_OTHER, "FIXME\n"); #ifdef __KERNEL__ @@ -194,47 +208,39 @@ static inline void OBD_FAIL_WRITE(int id, kdev_t dev) #define LTIME_S(time) (time) #endif /* __KERNEL__ */ -#define OBD_ALLOC(ptr, size) \ -do { \ - void *lptr; \ - int s = (size); \ - (ptr) = lptr = kmalloc(s, GFP_KERNEL); \ - if (lptr == NULL) { \ - CERROR("kmalloc of '" #ptr "' (%d bytes) failed " \ - "at %s:%d\n", s, __FILE__, __LINE__); \ - } else { \ - int obd_curmem; \ - memset(lptr, 0, s); \ - atomic_add(s, &obd_memory); \ - obd_curmem = atomic_read(&obd_memory); \ - if (obd_curmem > obd_memmax) \ - obd_memmax = obd_curmem; \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p " \ - "(tot %d)\n", s, lptr, obd_curmem); \ - } \ +#define OBD_ALLOC(ptr, size) \ +do { \ + (ptr) = kmalloc(size, GFP_KERNEL); \ + if ((ptr) == NULL) { \ + CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ + (int)(size), __FILE__, __LINE__); \ + } else { \ + memset(ptr, 0, size); \ + atomic_add(size, &obd_memory); \ + if (atomic_read(&obd_memory) > obd_memmax) \ + obd_memmax = atomic_read(&obd_memory); \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + } \ } while (0) #ifdef __arch_um__ # define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size) #else -# define OBD_VMALLOC(ptr, size) \ -do { \ - void *lptr; \ - int s = (size); \ - (ptr) = lptr = vmalloc(s); \ - if (lptr == NULL) { \ - CERROR("vmalloc of '" #ptr "' (%d bytes) failed " \ - "at %s:%d\n", s, __FILE__, __LINE__); \ - } else { \ - int obd_curmem; \ - memset(lptr, 0, s); \ - atomic_add(s, &obd_memory); \ - obd_curmem = atomic_read(&obd_memory); \ - if (obd_curmem > obd_memmax) \ - obd_memmax = obd_curmem; \ - CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p " \ - "(tot %d)\n", s, lptr, obd_curmem); \ - } \ +# define OBD_VMALLOC(ptr, size) \ +do { \ + (ptr) = vmalloc(size); \ + if ((ptr) == NULL) { \ + CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ + (int)(size), __FILE__, __LINE__); \ + } else { \ + memset(ptr, 0, size); \ + atomic_add(size, &obd_memory); \ + if (atomic_read(&obd_memory) > obd_memmax) \ + obd_memmax = atomic_read(&obd_memory); \ + CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + } \ } while (0) #endif @@ -244,68 +250,58 @@ do { \ #define POISON(lptr, c, s) memset(lptr, c, s) #endif -#define OBD_FREE(ptr, size) \ -do { \ - void *lptr = (ptr); \ - int s = (size); \ - LASSERT(lptr); \ - POISON(lptr, 0x5a, s); \ - kfree(lptr); \ - atomic_sub(s, &obd_memory); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - s, lptr, atomic_read(&obd_memory)); \ - (ptr) = (void *)0xdeadbeef; \ +#define OBD_FREE(ptr, size) \ +do { \ + LASSERT(ptr); \ + atomic_sub(size, &obd_memory); \ + CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + POISON(ptr, 0x5a, size); \ + kfree(ptr); \ + (ptr) = (void *)0xdeadbeef; \ } while (0) #ifdef __arch_um__ # define OBD_VFREE(ptr, size) OBD_FREE(ptr, size) #else -# define OBD_VFREE(ptr, size) \ -do { \ - void *lptr = (ptr); \ - int s = (size); \ - LASSERT(lptr); \ - POISON(lptr, 0x5a, s); \ - vfree(lptr); \ - atomic_sub(s, &obd_memory); \ - CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n", \ - s, lptr, atomic_read(&obd_memory)); \ - (ptr) = (void *)0xdeadbeef; \ +# define OBD_VFREE(ptr, size) \ +do { \ + LASSERT(ptr); \ + atomic_sub(size, &obd_memory); \ + CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + POISON(ptr, 0x5a, size); \ + vfree(ptr); \ + (ptr) = (void *)0xdeadbeef; \ } while (0) #endif -#define OBD_SLAB_ALLOC(ptr, slab, type, size) \ -do { \ - long s = (size); \ - void *lptr; \ - LASSERT (!in_interrupt()); \ - (ptr) = lptr = kmem_cache_alloc((slab), type); \ - if (lptr == NULL) { \ - CERROR("slab-alloc of '" #ptr "' (%ld bytes) failed " \ - "at %s:%d\n", s, __FILE__, __LINE__); \ - } else { \ - int obd_curmem; \ - memset(lptr, 0, s); \ - atomic_add(s, &obd_memory); \ - obd_curmem = atomic_read(&obd_memory); \ - if (obd_curmem > obd_memmax) \ - obd_memmax = obd_curmem; \ - CDEBUG(D_MALLOC, "slab-alloced '" #ptr "': %ld at %p " \ - "(tot %d)\n", s, lptr, obd_curmem); \ - } \ +#define OBD_SLAB_ALLOC(ptr, slab, type, size) \ +do { \ + LASSERT (!in_interrupt()); \ + (ptr) = kmem_cache_alloc(slab, type); \ + if ((ptr) == NULL) { \ + CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \ + (int)(size), __FILE__, __LINE__); \ + } else { \ + memset(ptr, 0, size); \ + atomic_add(size, &obd_memory); \ + if (atomic_read(&obd_memory) > obd_memmax) \ + obd_memmax = atomic_read(&obd_memory); \ + CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\ + (int)(size), ptr, atomic_read(&obd_memory)); \ + } \ } while (0) -#define OBD_SLAB_FREE(ptr, slab, size) \ -do { \ - long s = (size); \ - void *lptr = (ptr); \ - LASSERT(lptr); \ - POISON(lptr, 0x5a, s); \ - CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %ld at %p (tot %d).\n", \ - s, lptr, atomic_read(&obd_memory)); \ - kmem_cache_free((slab), lptr); \ - atomic_sub(s, &obd_memory); \ - (ptr) = (void *)0xdeadbeef; \ +#define OBD_SLAB_FREE(ptr, slab, size) \ +do { \ + LASSERT(ptr); \ + CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + atomic_sub(size, &obd_memory); \ + POISON(ptr, 0x5a, size); \ + kmem_cache_free(slab, ptr); \ + (ptr) = (void *)0xdeadbeef; \ } while (0) #endif diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch index 6b9a348..e01feca 100644 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch +++ b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch @@ -1,8 +1,11 @@ - 0 files changed + fs/ext3/super.c | 229 +++++++++++++++++++++++++++++++++++++++++++++ + include/linux/ext3_fs.h | 2 + include/linux/ext3_fs_sb.h | 10 + + 3 files changed, 241 insertions(+) ---- linux-2.4.18-chaos52/fs/ext3/super.c~ext3-delete_thread-2.4.18 2003-06-01 03:24:13.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/super.c 2003-06-03 17:01:49.000000000 +0800 -@@ -398,6 +398,210 @@ static void dump_orphan_list(struct supe +--- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:21 2003 ++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c Wed Jun 18 11:59:14 2003 +@@ -396,6 +396,219 @@ static void dump_orphan_list(struct supe } } @@ -36,22 +39,24 @@ + + INIT_LIST_HEAD(&sbi->s_delete_list); + wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("EXT3-fs: delete thread on %s started\n", -+ kdevname(sb->s_dev)); ++ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); + + /* main loop */ + for (;;) { -+ sleep_on(&sbi->s_delete_thread_queue); ++ wait_event_interruptible(sbi->s_delete_thread_queue, ++ !list_empty(&sbi->s_delete_list) || ++ !test_opt(sb, ASYNCDEL)); + ext3_debug("%s woken up: %lu inodes, %lu blocks\n", + tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); + + spin_lock(&sbi->s_delete_lock); + if (list_empty(&sbi->s_delete_list)) { ++ clear_opt(sbi->s_mount_opt, ASYNCDEL); + memset(&sbi->s_delete_list, 0, + sizeof(sbi->s_delete_list)); + spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("ext3 delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); ++ ext3_debug("delete thread on %s exiting\n", ++ kdevname(sb->s_dev)); + wake_up(&sbi->s_delete_waiter_queue); + break; + } @@ -73,12 +78,13 @@ + sbi->s_delete_blocks -= blocks; + sbi->s_delete_inodes--; + } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) ++ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { + ext3_warning(sb, __FUNCTION__, + "%lu blocks, %lu inodes on list?\n", + sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; ++ sbi->s_delete_blocks = 0; ++ sbi->s_delete_inodes = 0; ++ } + spin_unlock(&sbi->s_delete_lock); + wake_up(&sbi->s_delete_waiter_queue); + } @@ -92,11 +98,11 @@ + int rc; + + spin_lock_init(&sbi->s_delete_lock); -+ memset(&sbi->s_delete_list, 0, sizeof(sbi->s_delete_list)); + init_waitqueue_head(&sbi->s_delete_thread_queue); + init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; ++ ++ if (!test_opt(sb, ASYNCDEL)) ++ return; + + rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); + if (rc < 0) @@ -108,6 +114,10 @@ + +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) +{ ++ if (sbi->s_delete_list.next == 0) /* thread never started */ ++ return; ++ ++ clear_opt(sbi->s_mount_opt, ASYNCDEL); + wake_up(&sbi->s_delete_thread_queue); + wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list)); +} @@ -135,8 +145,8 @@ + clear_inode(old_inode); + return; + } -+ -+ if (!test_opt (old_inode->i_sb, ASYNCDEL)) { ++ ++ if (!test_opt(old_inode->i_sb, ASYNCDEL)) { + ext3_delete_inode(old_inode); + return; + } @@ -148,7 +158,8 @@ + return; + } + -+ if (EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) { ++ if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) || ++ (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { + ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", + old_inode->i_ino, blocks); + ext3_delete_inode(old_inode); @@ -174,7 +185,7 @@ + } + if (!new_inode) { + up(&sbi->s_orphan_lock); -+ ext3_debug(KERN_DEBUG "delete inode %lu directly (bad read)\n", ++ ext3_debug("delete inode %lu directly (bad read)\n", + old_inode->i_ino); + ext3_delete_inode(old_inode); + return; @@ -194,8 +205,6 @@ + + clear_inode(old_inode); + -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); + spin_lock(&sbi->s_delete_lock); + J_ASSERT(list_empty(&new_inode->i_dentry)); + list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); @@ -203,6 +212,9 @@ + sbi->s_delete_inodes++; + spin_unlock(&sbi->s_delete_lock); + ++ ext3_debug("delete inode %lu (%lu blocks) by thread\n", ++ new_inode->i_ino, blocks); ++ + wake_up(&sbi->s_delete_thread_queue); +} +#else @@ -213,7 +225,7 @@ void ext3_put_super (struct super_block * sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -405,6 +609,7 @@ void ext3_put_super (struct super_block +@@ -403,6 +615,7 @@ void ext3_put_super (struct super_block kdev_t j_dev = sbi->s_journal->j_dev; int i; @@ -221,7 +233,7 @@ ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { -@@ -453,7 +658,11 @@ static struct super_operations ext3_sops +@@ -451,7 +664,11 @@ static struct super_operations ext3_sops write_inode: ext3_write_inode, /* BKL not held. Don't need */ dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ put_inode: ext3_put_inode, /* BKL not held. Don't need */ @@ -232,21 +244,23 @@ +#endif put_super: ext3_put_super, /* BKL held */ write_super: ext3_write_super, /* BKL held */ - sync_fs: ext3_sync_fs, -@@ -514,6 +723,12 @@ static int parse_options (char * options + write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */ +@@ -511,6 +728,14 @@ static int parse_options (char * options this_char = strtok (NULL, ",")) { if ((value = strchr (this_char, '=')) != NULL) *value++ = 0; +#ifdef EXT3_DELETE_THREAD + if (!strcmp(this_char, "asyncdel")) + set_opt(*mount_options, ASYNCDEL); ++ else if (!strcmp(this_char, "noasyncdel")) ++ clear_opt(*mount_options, ASYNCDEL); + else +#endif + if (!strcmp (this_char, "bsddf")) clear_opt (*mount_options, MINIX_DF); else if (!strcmp (this_char, "nouid32")) { -@@ -1209,6 +1424,7 @@ struct super_block * ext3_read_super (st +@@ -1206,6 +1431,7 @@ struct super_block * ext3_read_super (st } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -254,8 +268,18 @@ /* * akpm: core read_super() calls in here with the superblock locked. * That deadlocks, because orphan cleanup needs to lock the superblock ---- linux-2.4.18-chaos52/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18 2003-06-01 03:24:11.000000000 +0800 -+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs.h 2003-06-03 17:03:28.000000000 +0800 +@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s + if (!parse_options(data, &tmp, sbi, &tmp, 1)) + return -EINVAL; + ++ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) ++ ext3_stop_delete_thread(sbi); ++ + if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) + ext3_abort(sb, __FUNCTION__, "Abort forced by user"); + +--- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:20 2003 ++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h Tue Jun 17 12:36:56 2003 @@ -190,6 +190,7 @@ struct ext3_group_desc */ #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ @@ -272,8 +296,8 @@ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H ---- linux-2.4.18-chaos52/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18 2003-06-01 03:24:13.000000000 +0800 -+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs_sb.h 2003-06-03 16:59:24.000000000 +0800 +--- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:21 2003 ++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Tue Jun 17 12:36:56 2003 @@ -29,6 +29,8 @@ #define EXT3_MAX_GROUP_LOADED 32 diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch index be2723c..34c5158 100644 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch +++ b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch @@ -1,7 +1,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c --- origin/fs/ext3/super.c 2003-05-04 17:23:52.000000000 +0400 +++ linux/fs/ext3/super.c 2003-05-04 17:09:20.000000000 +0400 -@@ -398,6 +398,210 @@ static void dump_orphan_list(struct supe +@@ -398,6 +398,219 @@ static void dump_orphan_list(struct supe } } @@ -35,22 +35,24 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + + INIT_LIST_HEAD(&sbi->s_delete_list); + wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("EXT3-fs: delete thread on %s started\n", -+ kdevname(sb->s_dev)); ++ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); + + /* main loop */ + for (;;) { -+ sleep_on(&sbi->s_delete_thread_queue); ++ wait_event_interruptible(sbi->s_delete_thread_queue, ++ !list_empty(&sbi->s_delete_list) || ++ !test_opt(sb, ASYNCDEL)); + ext3_debug("%s woken up: %lu inodes, %lu blocks\n", + tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); + + spin_lock(&sbi->s_delete_lock); + if (list_empty(&sbi->s_delete_list)) { ++ clear_opt(sbi->s_mount_opt, ASYNCDEL); + memset(&sbi->s_delete_list, 0, + sizeof(sbi->s_delete_list)); + spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("ext3 delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); ++ ext3_debug("delete thread on %s exiting\n", ++ kdevname(sb->s_dev)); + wake_up(&sbi->s_delete_waiter_queue); + break; + } @@ -72,12 +74,13 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + sbi->s_delete_blocks -= blocks; + sbi->s_delete_inodes--; + } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) ++ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { + ext3_warning(sb, __FUNCTION__, + "%lu blocks, %lu inodes on list?\n", + sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; ++ sbi->s_delete_blocks = 0; ++ sbi->s_delete_inodes = 0; ++ } + spin_unlock(&sbi->s_delete_lock); + wake_up(&sbi->s_delete_waiter_queue); + } @@ -91,11 +94,11 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + int rc; + + spin_lock_init(&sbi->s_delete_lock); -+ memset(&sbi->s_delete_list, 0, sizeof(sbi->s_delete_list)); + init_waitqueue_head(&sbi->s_delete_thread_queue); + init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; ++ ++ if (!test_opt(sb, ASYNCDEL)) ++ return; + + rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); + if (rc < 0) @@ -107,6 +110,10 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) +{ ++ if (sbi->s_delete_list.next == 0) /* thread never started */ ++ return; ++ ++ clear_opt(sbi->s_mount_opt, ASYNCDEL); + wake_up(&sbi->s_delete_thread_queue); + wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list)); +} @@ -134,8 +141,8 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + clear_inode(old_inode); + return; + } -+ -+ if (!test_opt (old_inode->i_sb, ASYNCDEL)) { ++ ++ if (!test_opt(old_inode->i_sb, ASYNCDEL)) { + ext3_delete_inode(old_inode); + return; + } @@ -147,7 +154,8 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + return; + } + -+ if (EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) { ++ if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) || ++ (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { + ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", + old_inode->i_ino, blocks); + ext3_delete_inode(old_inode); @@ -173,7 +181,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + } + if (!new_inode) { + up(&sbi->s_orphan_lock); -+ ext3_debug(KERN_DEBUG "delete inode %lu directly (bad read)\n", ++ ext3_debug("delete inode %lu directly (bad read)\n", + old_inode->i_ino); + ext3_delete_inode(old_inode); + return; @@ -193,8 +201,6 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + + clear_inode(old_inode); + -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); + spin_lock(&sbi->s_delete_lock); + J_ASSERT(list_empty(&new_inode->i_dentry)); + list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); @@ -202,6 +208,9 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + sbi->s_delete_inodes++; + spin_unlock(&sbi->s_delete_lock); + ++ ext3_debug("delete inode %lu (%lu blocks) by thread\n", ++ new_inode->i_ino, blocks); ++ + wake_up(&sbi->s_delete_thread_queue); +} +#else @@ -232,13 +241,15 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c put_super: ext3_put_super, /* BKL held */ write_super: ext3_write_super, /* BKL held */ write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */ -@@ -514,6 +725,11 @@ static int parse_options (char * options +@@ -514,6 +725,13 @@ static int parse_options (char * options this_char = strtok (NULL, ",")) { if ((value = strchr (this_char, '=')) != NULL) *value++ = 0; +#ifdef EXT3_DELETE_THREAD + if (!strcmp(this_char, "asyncdel")) + set_opt(*mount_options, ASYNCDEL); ++ else if (!strcmp(this_char, "noasyncdel")) ++ clear_opt(*mount_options, ASYNCDEL); + else +#endif #ifdef CONFIG_EXT3_FS_XATTR_USER @@ -252,6 +263,16 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c /* * akpm: core read_super() calls in here with the superblock locked. * That deadlocks, because orphan cleanup needs to lock the superblock +@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s + if (!parse_options(data, &tmp, sbi, &tmp, 1)) + return -EINVAL; + ++ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) ++ ext3_stop_delete_thread(sbi); ++ + if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) + ext3_abort(sb, __FUNCTION__, "Abort forced by user"); + diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h --- origin/include/linux/ext3_fs.h 2003-05-04 17:22:49.000000000 +0400 +++ linux/include/linux/ext3_fs.h 2003-05-04 15:06:10.000000000 +0400 diff --git a/lustre/kernel_patches/patches/iopen-2.4.18.patch b/lustre/kernel_patches/patches/iopen-2.4.18.patch index d8dbdfb..6eabe85 100644 --- a/lustre/kernel_patches/patches/iopen-2.4.18.patch +++ b/lustre/kernel_patches/patches/iopen-2.4.18.patch @@ -407,7 +407,7 @@ #define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */ +#define EXT3_MOUNT_IOPEN 0x8000 /* Allow access via iopen */ +#define EXT3_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ + #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ diff --git a/lustre/ldlm/Makefile.am b/lustre/ldlm/Makefile.am index 1ceb276..b4e4cea 100644 --- a/lustre/ldlm/Makefile.am +++ b/lustre/ldlm/Makefile.am @@ -6,7 +6,7 @@ DEFS= LDLMSOURCES= l_lock.c ldlm_lock.c ldlm_resource.c ldlm_lib.c \ -ldlm_extent.c ldlm_request.c ldlm_lockd.c +ldlm_extent.c ldlm_request.c ldlm_lockd.c ldlm_internal.h if LIBLUSTRE lib_LIBRARIES = libldlm.a diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 62272fa..c5f8873 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -612,7 +612,8 @@ void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen) * comment above ldlm_lock_match */ static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode, struct ldlm_extent *extent, - struct ldlm_lock *old_lock, int flags) + struct ldlm_lock *old_lock, void *data, + int flags) { struct ldlm_lock *lock; struct list_head *tmp; @@ -651,6 +652,9 @@ static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode, !(lock->l_flags & LDLM_FL_LOCAL)) continue; + if ((flags & LDLM_FL_MATCH_DATA) && lock->l_data != data) + continue; + ldlm_lock_addref_internal(lock, mode); return lock; } @@ -672,13 +676,16 @@ static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode, * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked * to be canceled can still be matched as long as they still have reader * or writer refernces + * If 'flags' contains LDLM_FL_MATCH_DATA, then only match a lock if the opaque + * data is the same. * * Returns 1 if it finds an already-existing lock that is compatible; in this * case, lockh is filled in with a addref()ed lock */ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *res_id, __u32 type, void *cookie, - int cookielen, ldlm_mode_t mode,struct lustre_handle *lockh) + int cookielen, ldlm_mode_t mode, void *data, + struct lustre_handle *lockh) { struct ldlm_resource *res; struct ldlm_lock *lock, *old_lock = NULL; @@ -703,15 +710,18 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, l_lock(&ns->ns_lock); - lock = search_queue(&res->lr_granted, mode, cookie, old_lock, flags); + lock = search_queue(&res->lr_granted, mode, cookie, old_lock, data, + flags); if (lock != NULL) GOTO(out, rc = 1); if (flags & LDLM_FL_BLOCK_GRANTED) GOTO(out, rc = 0); - lock = search_queue(&res->lr_converting, mode, cookie, old_lock, flags); + lock = search_queue(&res->lr_converting, mode, cookie, old_lock, data, + flags); if (lock != NULL) GOTO(out, rc = 1); - lock = search_queue(&res->lr_waiting, mode, cookie, old_lock, flags); + lock = search_queue(&res->lr_waiting, mode, cookie, old_lock, data, + flags); if (lock != NULL) GOTO(out, rc = 1); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 9d2857e7..3f46618 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1080,7 +1080,6 @@ EXPORT_SYMBOL(ldlm_cli_convert); EXPORT_SYMBOL(ldlm_cli_enqueue); EXPORT_SYMBOL(ldlm_cli_cancel); EXPORT_SYMBOL(ldlm_cli_cancel_unused); -EXPORT_SYMBOL(ldlm_match_or_enqueue); EXPORT_SYMBOL(ldlm_replay_locks); EXPORT_SYMBOL(ldlm_resource_foreach); EXPORT_SYMBOL(ldlm_namespace_foreach); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index e5d9c24..008adab 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -60,6 +60,7 @@ int ldlm_expired_completion_wait(void *data) int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data) { + /* XXX ALLOCATE - 160 mytes */ struct lock_wait_data lwd; unsigned long irqflags; struct obd_device *obd; @@ -373,43 +374,6 @@ int ldlm_cli_enqueue(struct lustre_handle *connh, return rc; } -int ldlm_match_or_enqueue(struct lustre_handle *connh, - struct ptlrpc_request *req, - struct ldlm_namespace *ns, - struct lustre_handle *parent_lock_handle, - struct ldlm_res_id res_id, - __u32 type, - void *cookie, int cookielen, - ldlm_mode_t mode, - int *flags, - ldlm_completion_callback completion, - ldlm_blocking_callback blocking, - void *data, - struct lustre_handle *lockh) -{ - int rc; - ENTRY; - if (connh == NULL) { - /* Just to make sure that I understand things --phil */ - LASSERT(*flags & LDLM_FL_LOCAL_ONLY); - } - - LDLM_DEBUG_NOLOCK("resource "LPU64"/"LPU64, res_id.name[0], - res_id.name[1]); - rc = ldlm_lock_match(ns, *flags, &res_id, type, cookie, cookielen, mode, - lockh); - if (rc == 0) { - rc = ldlm_cli_enqueue(connh, req, ns, parent_lock_handle, - res_id, type, cookie, cookielen, mode, - flags, completion, blocking, data, - lockh); - if (rc != ELDLM_OK) - CERROR("ldlm_cli_enqueue: err: %d\n", rc); - RETURN(rc); - } - RETURN(0); -} - int ldlm_cli_replay_enqueue(struct ldlm_lock *lock) { struct lustre_handle lockh; @@ -666,16 +630,20 @@ static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock; lock = list_entry(tmp, struct ldlm_lock, l_res_link); + if (opaque != NULL && lock->l_data != opaque) { + LDLM_ERROR(lock, "data %p doesn't match opaque %p res" + LPU64":"LPU64, lock->l_data, opaque, + res_id.name[0], res_id.name[1]); + //LBUG(); + continue; + } + if (lock->l_readers || lock->l_writers) { if (flags & LDLM_FL_WARN) { LDLM_ERROR(lock, "lock in use"); - LBUG(); + //LBUG(); } - } - if (opaque != NULL && lock->l_data != opaque) { - LDLM_ERROR(lock, "data %p doesn't match opaque %p", - lock->l_data, opaque); - LBUG(); + continue; } /* See CBPENDING comment in ldlm_cancel_lru */ diff --git a/lustre/llite/Makefile.am b/lustre/llite/Makefile.am index ddb9657..b6fc501 100644 --- a/lustre/llite/Makefile.am +++ b/lustre/llite/Makefile.am @@ -11,6 +11,6 @@ EXTRA_PROGRAMS = llite llite_SOURCES = dcache.c commit_callback.c super.c rw.c iod.c super25.c llite_SOURCES += file.c dir.c sysctl.c symlink.c -llite_SOURCES += namei.c lproc_llite.c +llite_SOURCES += namei.c lproc_llite.c llite_internal.h include $(top_srcdir)/Rules diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 0c9fcf7..20924fc 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -167,6 +167,7 @@ int ll_have_md_lock(struct dentry *de) struct lustre_handle lockh; struct ldlm_res_id res_id = { .name = {0} }; struct obd_device *obddev; + int flags; ENTRY; if (!de->d_inode) @@ -178,14 +179,15 @@ int ll_have_md_lock(struct dentry *de) CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]); - if (ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED, - &res_id, LDLM_PLAIN, NULL, 0, LCK_PR, &lockh)) { + flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA; + if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN, + NULL, 0, LCK_PR, de->d_inode, &lockh)) { ldlm_lock_decref(&lockh, LCK_PR); RETURN(1); } - if (ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED, - &res_id, LDLM_PLAIN, NULL, 0, LCK_PW, &lockh)) { + if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN, + NULL, 0, LCK_PW, de->d_inode, &lockh)) { ldlm_lock_decref(&lockh, LCK_PW); RETURN(1); } @@ -217,9 +219,11 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it) struct ldlm_res_id res_id = { .name = {inode->i_ino, (__u64)inode->i_generation} }; struct lustre_handle lockh; - rc = ldlm_lock_match(obddev->obd_namespace, - LDLM_FL_BLOCK_GRANTED, &res_id, - LDLM_PLAIN, NULL, 0, LCK_PR, &lockh); + int flags; + flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA; + rc = ldlm_lock_match(obddev->obd_namespace, flags, &res_id, + LDLM_PLAIN, NULL, 0, LCK_PR, inode, + &lockh); if (rc) { de->d_flags &= ~DCACHE_LUSTRE_INVALID; if (it && it->it_op == IT_GETATTR) { @@ -232,9 +236,9 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it) } RETURN(1); } - rc = ldlm_lock_match(obddev->obd_namespace, - LDLM_FL_BLOCK_GRANTED, &res_id, - LDLM_PLAIN, NULL, 0, LCK_PW, &lockh); + rc = ldlm_lock_match(obddev->obd_namespace, flags, &res_id, + LDLM_PLAIN, NULL, 0, LCK_PW, inode, + &lockh); if (rc) { de->d_flags &= ~DCACHE_LUSTRE_INVALID; if (it && it->it_op == IT_GETATTR) { diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 8759598..2d5954d 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -35,7 +35,7 @@ #include #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) #include // for wait_on_buffer -#else +#else #include // for wait_on_buffer #endif @@ -117,7 +117,7 @@ static int ll_dir_readpage(struct file *file, struct page *page) body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body)); LASSERT (body != NULL); /* checked by mdc_readpage() */ LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_readpage() */ - + inode->i_size = body->size; } ptlrpc_req_finished(request); @@ -770,6 +770,9 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino, inode->i_generation, inode, cmd); + if ((cmd & 0xffffff00) == ((int)'T') << 8) /* tty ioctls */ + return -ENOTTY; + switch(cmd) { case IOC_MDC_LOOKUP: { struct ptlrpc_request *request = NULL; @@ -803,9 +806,9 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, } body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body)); - LASSERT (body != NULL); /* checked by mdc_getattr_name() */ - LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_getattr_name() */ - + LASSERT(body != NULL); /* checked by mdc_getattr_name */ + LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */ + /* surely there's a better way -phik */ data->ioc_obdo1.o_mode = body->mode; data->ioc_obdo1.o_uid = body->uid; diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 3429b28..bd3fa7d 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -132,6 +132,7 @@ int ll_file_release(struct inode *inode, struct file *file) if (inode->i_sb->s_root == file->f_dentry) RETURN(0); + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_RELEASE); fd = (struct ll_file_data *)file->private_data; if (!fd) /* no process opened the file after an mcreate */ RETURN(rc = 0); @@ -345,6 +346,7 @@ int ll_file_open(struct inode *inode, struct file *file) if (inode->i_sb->s_root == file->f_dentry) RETURN(0); + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN); LL_GET_INTENT(file->f_dentry, it); rc = ll_it_open_error(IT_OPEN_OPEN, it); if (rc) @@ -495,8 +497,8 @@ int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode, inode->i_ino, extent->start, extent->end); rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent, - sizeof(extent), mode, &flags, ll_lock_callback, - inode, sizeof(*inode), lockh); + sizeof(extent), mode, &flags, ll_extent_lock_callback, + inode, lockh); RETURN(rc); } @@ -506,15 +508,13 @@ int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode, * the OST is returning the file size with each lock acquisition. */ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, - int mode, struct ldlm_extent *extent, - struct lustre_handle *lockh) + struct lov_stripe_md *lsm, int mode, + struct ldlm_extent *extent, struct lustre_handle *lockh) { struct ll_inode_info *lli = ll_i2info(inode); struct ldlm_extent size_lock; struct lustre_handle match_lockh = {0}; - int flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED; - int rc, matched; + int flags, rc, matched; ENTRY; rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh); @@ -534,9 +534,10 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, size_lock.end = OBD_OBJECT_EOF; /* XXX I bet we should be checking the lock ignore flags.. */ + flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA; matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT, - &size_lock, sizeof(size_lock), LCK_PR, &flags, - &match_lockh); + &size_lock, sizeof(size_lock), LCK_PR, &flags, + inode, &match_lockh); /* hey, alright, we hold a size lock that covers the size we * just found, its not going to change for a while.. */ @@ -756,8 +757,8 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, EXIT; } -int ll_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new, - void *data, int flag) +int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new, + void *data, int flag) { struct inode *inode = data; struct ll_inode_info *lli = ll_i2info(inode); @@ -811,6 +812,8 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, if (count == 0) RETURN(0); + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES, + count); /* grab a -> eof extent to push extending writes out of node's caches * so we can see them at the getattr after lock acquisition. this will * turn into a seperate [*ppos + count, EOF] 'size intent' lock attempt @@ -916,6 +919,8 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) out: /* XXX errors? */ + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_WRITE_BYTES, + retval); ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh); RETURN(retval); } @@ -983,6 +988,7 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, if ((cmd & 0xffffff00) == ((int)'T') << 8) /* tty ioctls */ return -ENOTTY; + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL); switch(cmd) { case LL_IOC_GETFLAGS: /* Get the current value of the file flags */ @@ -1034,6 +1040,7 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin) inode->i_generation, inode, offset + ((origin==2) ? inode->i_size : file->f_pos)); + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_LLSEEK); if (origin == 2) { /* SEEK_END */ ldlm_error_t err; struct ldlm_extent extent = {0, OBD_OBJECT_EOF}; @@ -1071,6 +1078,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, inode->i_generation, inode); + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_FSYNC); /* * filemap_fdata{sync,wait} are also called at PW lock cancelation so * we know that they can only find data to writeback here if we are @@ -1096,6 +1104,9 @@ int ll_inode_revalidate(struct dentry *dentry) } CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n", inode->i_ino, inode->i_generation, inode, dentry->d_name.name); +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)) + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_REVALIDATE); +#endif /* this is very tricky. it is unsafe to call ll_have_md_lock when we have a referenced lock: because it may cause an RPC @@ -1160,7 +1171,7 @@ int ll_inode_revalidate(struct dentry *dentry) ptlrpc_req_finished(req); RETURN(rc); } - LASSERT(rc >= sizeof (*lsm)); + LASSERT(rc >= sizeof(*lsm)); } ll_update_inode(inode, body, lsm); @@ -1201,6 +1212,7 @@ static int ll_getattr(struct vfsmount *mnt, struct dentry *de, int res = 0; struct inode *inode = de->d_inode; + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR); res = ll_inode_revalidate(de); if (res) return res; diff --git a/lustre/llite/iod.c b/lustre/llite/iod.c index f88ed87..836a9aa 100644 --- a/lustre/llite/iod.c +++ b/lustre/llite/iod.c @@ -174,10 +174,11 @@ static void ll_writeback(struct inode *inode, struct ll_writeback_pages *llwp) */ if (rc) { CERROR("error from obd_brw_async: rc = %d\n", rc); - INODE_IO_STAT_ADD(inode, wb_fail, llwp->npgs); - } else { - INODE_IO_STAT_ADD(inode, wb_ok, llwp->npgs); - } + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, + LPROC_LL_WB_FAIL, llwp->npgs); + } else + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, + LPROC_LL_WB_OK, (llwp->npgs)); for (i = 0 ; i < llwp->npgs ; i++) { struct page *page = llwp->pga[i].pg; @@ -314,11 +315,12 @@ int ll_check_dirty(struct super_block *sb) llwp.npgs = 0; ll_get_dirty_pages(inode, &llwp); if (llwp.npgs) { - INODE_IO_STAT_ADD(inode, wb_from_pressure, - llwp.npgs); - ll_writeback(inode, &llwp); - rc += llwp.npgs; - making_progress = 1; + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, + LPROC_LL_WB_PRESSURE, + llwp.npgs); + ll_writeback(inode, &llwp); + rc += llwp.npgs; + making_progress = 1; } } while (llwp.npgs && should_writeback()); @@ -384,7 +386,8 @@ int ll_batch_writepage(struct inode *inode, struct page *page) ll_get_dirty_pages(inode, &llwp); if (llwp.npgs) { - INODE_IO_STAT_ADD(inode, wb_from_writepage, llwp.npgs); + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, + LPROC_LL_WB_WRITEPAGE, llwp.npgs); ll_writeback(inode, &llwp); } @@ -461,7 +464,8 @@ static inline void lldo_dirty_add(struct inode *inode, long val) { lldo->do_num_dirty += val; - INODE_IO_STAT_ADD(inode, dirty_pages, val); + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_DIRTY_PAGES, + val); } void ll_record_dirty(struct inode *inode, unsigned long offset) @@ -624,76 +628,3 @@ void ll_lldo_init(struct ll_dirty_offsets *lldo) lldo->do_num_dirty = 0; lldo->do_root.rb_node = NULL; } - -/* seq file export of some page cache tracking stats */ -static int ll_pgcache_seq_show(struct seq_file *seq, void *v) -{ - struct timeval now; - struct ll_sb_info *sbi = seq->private; - do_gettimeofday(&now); - - seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n", - now.tv_sec, now.tv_usec); -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - seq_printf(seq, "VM_under_pressure: %s\n", - should_writeback() ? "yes" : "no"); -#endif - seq_printf(seq, "dirty_pages: "LPU64"\n", - sbi->ll_iostats.fis_dirty_pages); - seq_printf(seq, "dirty_page_hits: "LPU64"\n", - sbi->ll_iostats.fis_dirty_hits); - seq_printf(seq, "dirty_page_misses: "LPU64"\n", - sbi->ll_iostats.fis_dirty_misses); - seq_printf(seq, "writeback_from_writepage: "LPU64"\n", - sbi->ll_iostats.fis_wb_from_writepage); - seq_printf(seq, "writeback_from_pressure: "LPU64"\n", - sbi->ll_iostats.fis_wb_from_pressure); - seq_printf(seq, "writeback_ok_pages: "LPU64"\n", - sbi->ll_iostats.fis_wb_ok); - seq_printf(seq, "writeback_failed_pages: "LPU64"\n", - sbi->ll_iostats.fis_wb_fail); - return 0; -} - -static void *ll_pgcache_seq_start(struct seq_file *p, loff_t *pos) -{ - if (*pos == 0) - return (void *)1; - return NULL; -} -static void *ll_pgcache_seq_next(struct seq_file *p, void *v, loff_t *pos) -{ - ++*pos; - return NULL; -} -static void ll_pgcache_seq_stop(struct seq_file *p, void *v) -{ -} - -struct seq_operations ll_pgcache_seq_sops = { - .start = ll_pgcache_seq_start, - .stop = ll_pgcache_seq_stop, - .next = ll_pgcache_seq_next, - .show = ll_pgcache_seq_show, -}; - -static int ll_pgcache_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = inode->u.generic_ip; - struct seq_file *seq; - int rc; - - rc = seq_open(file, &ll_pgcache_seq_sops); - if (rc) - return rc; - seq = file->private_data; - seq->private = dp->data; - return 0; -} - -struct file_operations ll_pgcache_seq_fops = { - .open = ll_pgcache_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 59cec1f..14eac3f 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -36,6 +36,7 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, { return 0; } +void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi){} #else #define LPROC_LLITE_STAT_FCT(fct_name, get_statfs_fct) \ @@ -100,16 +101,69 @@ struct lprocfs_vars lprocfs_obd_vars[] = { }; #define MAX_STRING_SIZE 128 + +struct llite_file_opcode { + __u32 opcode; + __u32 type; + const char *opname; +} llite_opcode_table[LPROC_LL_FILE_OPCODES] = { + /* file operation */ + { LPROC_LL_DIRTY_PAGES, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, + "dirty_pages" }, + { LPROC_LL_DIRTY_HITS, LPROCFS_TYPE_REGS, "dirty_pages_hits" }, + { LPROC_LL_DIRTY_MISSES, LPROCFS_TYPE_REGS, "dirty_pages_misses" }, + { LPROC_LL_WB_WRITEPAGE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, + "writeback_from_writepage" }, + { LPROC_LL_WB_PRESSURE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, + "writeback_from_pressure" }, + { LPROC_LL_WB_OK, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, + "writeback_ok_pages" }, + { LPROC_LL_WB_FAIL, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, + "writeback_failed_pages" }, + { LPROC_LL_READ_BYTES, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES, + "read_bytes" }, + { LPROC_LL_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES, + "write_bytes" }, + { LPROC_LL_BRW_READ, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, + "brw_read" }, + { LPROC_LL_BRW_WRITE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, + "brw_write" }, + + { LPROC_LL_IOCTL, LPROCFS_TYPE_REGS, "ioctl" }, + { LPROC_LL_OPEN, LPROCFS_TYPE_REGS, "open" }, + { LPROC_LL_RELEASE, LPROCFS_TYPE_REGS, "close" }, + { LPROC_LL_MAP, LPROCFS_TYPE_REGS, "mmap" }, + { LPROC_LL_LLSEEK, LPROCFS_TYPE_REGS, "seek" }, + { LPROC_LL_FSYNC, LPROCFS_TYPE_REGS, "fsync" }, + /* inode operation */ + { LPROC_LL_SETATTR_RAW, LPROCFS_TYPE_REGS, "setattr_raw" }, + { LPROC_LL_SETATTR, LPROCFS_TYPE_REGS, "setattr" }, + { LPROC_LL_TRUNC, LPROCFS_TYPE_REGS, "punch" }, +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) + { LPROC_LL_GETATTR, LPROCFS_TYPE_REGS, "getattr" }, +#else + { LPROC_LL_REVALIDATE, LPROCFS_TYPE_REGS, "getattr" }, +#endif + /* special inode operation */ + { LPROC_LL_STAFS, LPROCFS_TYPE_REGS, "statfs" }, + { LPROC_LL_ALLOC_INODE, LPROCFS_TYPE_REGS, "alloc_inode" }, + { LPROC_LL_DIRECT_READ, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, + "direct_read" }, + { LPROC_LL_DIRECT_WRITE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, + "direct_write" }, + +}; + int lprocfs_register_mountpoint(struct proc_dir_entry *parent, struct super_block *sb, char *osc, char *mdc) { struct lprocfs_vars lvars[2]; struct ll_sb_info *sbi = ll_s2sbi(sb); struct obd_device *obd; - struct proc_dir_entry *entry; char name[MAX_STRING_SIZE + 1]; struct obd_uuid uuid; - int err; + int err, id; + struct lprocfs_stats *svc_stats = NULL; ENTRY; memset(lvars, 0, sizeof(lvars)); @@ -131,17 +185,41 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, sbi->ll_proc_root = NULL; RETURN(err); } + + svc_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES); + if (svc_stats == NULL) { + err = -ENOMEM; + goto out; + } + /* do counter init */ + for (id = 0; id < LPROC_LL_FILE_OPCODES; id++) { + __u32 type = llite_opcode_table[id].type; + void *ptr = NULL; + if (type & LPROCFS_TYPE_REGS) + ptr = "regs"; + else { + if (type & LPROCFS_TYPE_BYTES) + ptr = "bytes"; + else { + if (type & LPROCFS_TYPE_PAGES) + ptr = "pages"; + } + } + lprocfs_counter_init(svc_stats, llite_opcode_table[id].opcode, + (type & LPROCFS_CNTR_AVGMINMAX), + llite_opcode_table[id].opname, ptr); + } + err = lprocfs_register_stats(sbi->ll_proc_root, "stats", svc_stats); + if (err) + goto out; + else + sbi->ll_stats = svc_stats; + /* need place to keep svc_stats */ + /* Static configuration info */ err = lprocfs_add_vars(sbi->ll_proc_root, lprocfs_obd_vars, sb); if (err) - RETURN(err); - - /* llite page cache stats */ - entry = create_proc_entry("pgcache", 0444, sbi->ll_proc_root); - if (entry == NULL) - RETURN(-ENOMEM); - entry->proc_fops = &ll_pgcache_seq_fops; - entry->data = sbi; + goto out; /* MDC info */ strncpy(uuid.uuid, mdc, sizeof(uuid.uuid)); @@ -156,13 +234,13 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, lvars[0].read_fptr = lprocfs_rd_name; err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd); if (err) - RETURN(err); + goto out; snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name); lvars[0].read_fptr = lprocfs_rd_uuid; err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd); - if (err < 0) - RETURN(err); + if (err) + goto out; /* OSC */ strncpy(uuid.uuid, osc, sizeof(uuid.uuid)); @@ -177,14 +255,32 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, lvars[0].read_fptr = lprocfs_rd_name; err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd); if (err) - RETURN(err); + goto out; snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name); lvars[0].read_fptr = lprocfs_rd_uuid; err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd); - +out: + if (err) { + if (svc_stats) + lprocfs_free_stats(svc_stats); + if (sbi->ll_proc_root) + lprocfs_remove(sbi->ll_proc_root); + } RETURN(err); } +void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) +{ + if (sbi->ll_proc_root) { + struct proc_dir_entry *file_stats = + lprocfs_srch(sbi->ll_proc_root, "stats"); + + if (file_stats) { + lprocfs_free_stats(sbi->ll_stats); + lprocfs_remove(file_stats); + } + } +} #undef MAX_STRING_SIZE #endif /* LPROCFS */ diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 5e37d55..c14fd61 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -217,9 +217,6 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct inode *inode = lock->l_data; LASSERT(inode != NULL); - //if (inode->i_state & I_FREEING) - // break; - if (S_ISDIR(inode->i_mode)) { CDEBUG(D_INODE, "invalidating inode %lu\n", inode->i_ino); @@ -227,6 +224,7 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock, ll_invalidate_inode_pages(inode); } +#warning FIXME: we should probably free this inode if there are no aliases if (inode->i_sb->s_root && inode != inode->i_sb->s_root->d_inode) d_unhash_aliases(inode); @@ -375,7 +373,7 @@ int ll_intent_lock(struct inode *parent, struct dentry **de, /*We were called from revalidate2: did we find the same inode?*/ if (inode && (ino != inode->i_ino || - mds_body->fid1.generation != inode->i_generation)) { + mds_body->fid1.generation != inode->i_generation)) { it->it_disposition |= IT_ENQ_COMPLETE; RETURN(-ESTALE); } diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index cd1fa90..af90d66 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -118,6 +118,12 @@ static int ll_brw(int cmd, struct inode *inode, struct page *page, int flags) pg.flag = flags; + if (cmd == OBD_BRW_WRITE) + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, + LPROC_LL_BRW_WRITE, pg.count); + else + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, + LPROC_LL_BRW_READ, pg.count); rc = obd_brw(cmd, ll_i2obdconn(inode), lsm, 1, &pg, NULL); if (rc) CERROR("error from obd_brw: rc = %d\n", rc); @@ -446,9 +452,11 @@ static int ll_commit_write(struct file *file, struct page *page, /* mark the page dirty, put it on mapping->dirty, * mark the inode PAGES_DIRTY, put it on sb->dirty */ if (!PageDirty(page)) - INODE_IO_STAT_ADD(inode, dirty_misses, 1); + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, + LPROC_LL_DIRTY_MISSES); else - INODE_IO_STAT_ADD(inode, dirty_hits, 1); + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, + LPROC_LL_DIRTY_HITS); size = (((obd_off)page->index) << PAGE_SHIFT) + to; if (size > inode->i_size) @@ -531,6 +539,12 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf, } } + if (rw == WRITE) + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, + LPROC_LL_DIRECT_WRITE, iobuf->length); + else + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, + LPROC_LL_DIRECT_READ, iobuf->length); rc = obd_brw_async(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ, ll_i2obdconn(inode), lsm, iobuf->nr_pages, pga, set, NULL); diff --git a/lustre/llite/super.c b/lustre/llite/super.c index 66563c7..aef3c06 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -43,6 +43,7 @@ struct super_operations ll_super_operations; /* /proc/lustre/llite root that tracks llite mount points */ struct proc_dir_entry *proc_lustre_fs_root = NULL; /* lproc_llite.c */ +extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi); extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent, struct super_block *sb, char *osc, char *mdc); @@ -141,7 +142,6 @@ static struct super_block *ll_read_super(struct super_block *sb, INIT_LIST_HEAD(&sbi->ll_conn_chain); INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); generate_random_uuid(uuid); - spin_lock_init(&sbi->ll_iostats.fis_lock); class_uuid_unparse(uuid, &sbi->ll_sb_uuid); sb->u.generic_sbp = sbi; @@ -266,6 +266,7 @@ out_osc: out_mdc: obd_disconnect(&sbi->ll_mdc_conn, 0); out_free: + lprocfs_unregister_mountpoint(sbi); OBD_FREE(sbi, sizeof(*sbi)); goto out_dev; @@ -293,6 +294,7 @@ static void ll_put_super(struct super_block *sb) if (!obd->obd_no_recov) mdc_getstatus(&sbi->ll_mdc_conn, &rootfid); + lprocfs_unregister_mountpoint(sbi); if (sbi->ll_proc_root) { lprocfs_remove(sbi->ll_proc_root); sbi->ll_proc_root = NULL; @@ -585,6 +587,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr) CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name); if (rc) return rc; + lprocfs_counter_incr(ll_i2sbi(de->d_inode)->ll_stats, LPROC_LL_SETATTR); return ll_inode_setattr(de->d_inode, attr, 1); } @@ -597,6 +600,7 @@ static int ll_statfs(struct super_block *sb, struct statfs *sfs) ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:\n"); + lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_STAFS); memset(sfs, 0, sizeof(*sfs)); rc = obd_statfs(&sbi->ll_mdc_conn, &osfs); statfs_unpack(sfs, &osfs); diff --git a/lustre/llite/super25.c b/lustre/llite/super25.c index 680c47f..e942736 100644 --- a/lustre/llite/super25.c +++ b/lustre/llite/super25.c @@ -271,6 +271,7 @@ out_osc: out_mdc: obd_disconnect(&sbi->ll_mdc_conn, 0); out_free: + lprocfs_unregister_mountpoint(sbi); OBD_FREE(sbi, sizeof(*sbi)); goto out_dev; @@ -286,6 +287,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); + LPROC_COUNTER_INODE_INCBY1(inode, LPROC_LL_SETATTR); if ((attr->ia_valid & ATTR_SIZE)) { /* writeback uses inode->i_size to determine how far out * its cached pages go. ll_truncate gets a PW lock, canceling @@ -368,6 +370,7 @@ static void ll_put_super(struct super_block *sb) */ mdc_getstatus(&sbi->ll_mdc_conn, &rootfid); + lprocfs_unregister_mountpoint(sbi); if (sbi->ll_proc_root) { lprocfs_remove(sbi->ll_proc_root); sbi->ll_proc_root = NULL; @@ -562,6 +565,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr) if (rc) return rc; + LPROC_COUNTER_INODE_INCBY1((de->d_inode), LPROC_LL_SETATTR); return ll_inode_setattr(de->d_inode, attr, 1); } @@ -573,6 +577,7 @@ static int ll_statfs(struct super_block *sb, struct statfs *sfs) ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:\n"); + LPROC_COUNTER_SBI_INCBY1(sbi, LPROC_LL_STAFS); memset(sfs, 0, sizeof(*sfs)); rc = obd_statfs(&sbi->ll_mdc_conn, &osfs); statfs_unpack(sfs, &osfs); @@ -745,6 +750,7 @@ static kmem_cache_t *ll_inode_cachep; static struct inode *ll_alloc_inode(struct super_block *sb) { struct ll_inode_info *lli; + LPROC_COUNTER_SBI_INCBY1((ll_s2sbi(sb)), LL_ALLOC_INODE); OBD_SLAB_ALLOC(lli, ll_inode_cachep, SLAB_KERNEL, sizeof *lli); if (lli == NULL) return NULL; diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 1a4f6c4..87c3fb9 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -43,6 +43,7 @@ #include #include #include +#include #include struct lov_file_handles { @@ -169,9 +170,22 @@ static void lov_llh_destroy(struct lov_lock_handles *llh) int lov_attach(struct obd_device *dev, obd_count len, void *data) { struct lprocfs_static_vars lvars; + struct proc_dir_entry *entry; + int rc; lprocfs_init_vars(&lvars); - return lprocfs_obd_attach(dev, lvars.obd_vars); + rc = lprocfs_obd_attach(dev, lvars.obd_vars); + if (rc) + return rc; + + entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry); + if (entry == NULL) + RETURN(-ENOMEM); + entry->proc_fops = &ll_proc_target_fops; + entry->data = dev; + + return rc; + } int lov_detach(struct obd_device *dev) @@ -645,9 +659,9 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa, "err %d\n", err); err = -EIO; } - if (!rc) - rc = err; } + if (!rc) + rc = err; continue; } loi->loi_id = tmp->o_id; @@ -663,13 +677,15 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa, ++loi; /* If we have allocated enough objects, we are OK */ - if (obj_alloc == lsm->lsm_stripe_count) { - rc = 0; - GOTO(out_done, rc); - } + if (obj_alloc == lsm->lsm_stripe_count) + GOTO(out_done, rc = 0); } if (*ea != NULL) { + CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n", + lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count,rc); + if (rc == 0) + rc = -EFBIG; GOTO(out_cleanup, rc); } else { struct lov_stripe_md *lsm_new; @@ -687,6 +703,8 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa, /* XXX LOV STACKING call into osc for sizes */ OBD_FREE(lsm, lov_stripe_md_size(lsm->lsm_stripe_count)); lsm = lsm_new; + + rc = 0; } out_done: *ea = lsm; @@ -1700,7 +1718,7 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn, static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm, struct lustre_handle *parent_lock, __u32 type, void *cookie, int cookielen, __u32 mode, - int *flags, void *cb, void *data, int datalen, + int *flags, void *cb, void *data, struct lustre_handle *lockh) { struct obd_export *export = class_conn2export(conn); @@ -1764,7 +1782,7 @@ static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm, *flags = 0; rc = obd_enqueue(&(lov->tgts[loi->loi_ost_idx].conn), &submd, parent_lock, type, &sub_ext, sizeof(sub_ext), - mode, flags, cb, data, datalen, lov_lockhp); + mode, flags, cb, data, lov_lockhp); // XXX add a lock debug statement here if (rc != ELDLM_OK) { @@ -1812,8 +1830,8 @@ static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm, } static int lov_match(struct lustre_handle *conn, struct lov_stripe_md *lsm, - __u32 type, void *cookie, int cookielen, __u32 mode, - int *flags, struct lustre_handle *lockh) + __u32 type, void *cookie, int cookielen, __u32 mode, + int *flags, void *data, struct lustre_handle *lockh) { struct obd_export *export = class_conn2export(conn); struct lov_lock_handles *lov_lockh = NULL; @@ -1874,7 +1892,7 @@ static int lov_match(struct lustre_handle *conn, struct lov_stripe_md *lsm, /* XXX submd is not fully initialized here */ rc = obd_match(&(lov->tgts[loi->loi_ost_idx].conn), &submd, type, &sub_ext, sizeof(sub_ext), mode, - &lov_flags, lov_lockhp); + &lov_flags, data, lov_lockhp); if (rc != 1) break; } diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c index 630148a..e0b3adb 100644 --- a/lustre/lov/lproc_lov.c +++ b/lustre/lov/lproc_lov.c @@ -27,6 +27,7 @@ #endif #include #include +#include #ifndef LPROCFS struct lprocfs_vars lprocfs_module_vars[] = { {0} }; @@ -113,42 +114,70 @@ int rd_activeobd(char *page, char **start, off_t off, int count, int *eof, return snprintf(page, count, "%u\n", desc->ld_active_tgt_count); } -int rd_target(char *page, char **start, off_t off, int count, int *eof, - void *data) +int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data) { struct obd_device *dev = (struct obd_device*) data; - int len = 0, i; struct lov_obd *lov; - struct lov_tgt_desc *tgts; - + LASSERT(dev != NULL); lov = &dev->u.lov; - tgts = lov->tgts; - LASSERT(tgts != NULL); + *eof = 1; + return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid); +} - for (i = 0; i < lov->desc.ld_tgt_count; i++, tgts++) { - int cur; - cur = snprintf(&page[len], count, "%d: %s %sACTIVE\n", - i, tgts->uuid.uuid, tgts->active ? "" : "IN"); - len += cur; - count -= cur; - } +static void *ll_tgt_seq_start(struct seq_file *p, loff_t *pos) +{ + struct obd_device *dev = p->private; + struct lov_obd *lov = &dev->u.lov; + + return (*pos >= lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]); - *eof = 1; - return len; } +static void ll_tgt_seq_stop(struct seq_file *p, void *v) +{ -int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data) +} + +static void *ll_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos) { - struct obd_device *dev = (struct obd_device*) data; - struct lov_obd *lov; + struct obd_device *dev = p->private; + struct lov_obd *lov = &dev->u.lov; - LASSERT(dev != NULL); - lov = &dev->u.lov; - *eof = 1; - return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid); + ++*pos; + return (*pos >=lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]); +} + +static int ll_tgt_seq_show(struct seq_file *p, void *v) +{ + struct lov_tgt_desc *tgt = v; + struct obd_device *dev = p->private; + struct lov_obd *lov = &dev->u.lov; + int idx = tgt - &(lov->tgts[0]); + return seq_printf(p, "%d: %s %sACTIVE\n", idx+1, tgt->uuid.uuid, + tgt->active ? "" : "IN"); } +struct seq_operations ll_tgt_sops = { + .start = ll_tgt_seq_start, + .stop = ll_tgt_seq_stop, + .next = ll_tgt_seq_next, + .show = ll_tgt_seq_show, +}; + +static int ll_target_seq_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = inode->u.generic_ip; + struct seq_file *seq; + int rc = seq_open(file, &ll_tgt_sops); + + if (rc) + return rc; + + seq = file->private_data; + seq->private = dp->data; + + return 0; +} struct lprocfs_vars lprocfs_obd_vars[] = { { "uuid", lprocfs_rd_uuid, 0, 0 }, { "stripesize", rd_stripesize, 0, 0 }, @@ -163,7 +192,6 @@ struct lprocfs_vars lprocfs_obd_vars[] = { { "blocksize", rd_blksize, 0, 0 }, { "kbytestotal", rd_kbytestotal, 0, 0 }, { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "target_obd", rd_target, 0, 0 }, { "target_mdc", rd_mdc, 0, 0 }, { 0 } }; @@ -173,5 +201,12 @@ struct lprocfs_vars lprocfs_module_vars[] = { { 0 } }; +struct file_operations ll_proc_target_fops = { + .open = ll_target_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + #endif /* LPROCFS */ LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index dfcd7af..dc90885 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -130,14 +130,14 @@ int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh, CERROR ("rcp failed\n"); GOTO (failed, rc); } - + desc = lustre_swab_repbuf (req, 0, sizeof (*desc), lustre_swab_lov_desc); if (desc == NULL) { CERROR ("Can't unpack lov_desc\n"); GOTO (failed, rc = -EPROTO); } - + LASSERT_REPSWAB (req, 1); /* array of uuids byte-sex insensitive; just verify they are all * there and terminated */ @@ -150,7 +150,7 @@ int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh, for (i = 0; i < desc->ld_tgt_count; i++) { int uid_len = strnlen (uuids[i].uuid, sizeof (uuids[i].uuid)); - + if (uid_len == sizeof (uuids[i].uuid)) { CERROR ("Unterminated uuid %d:%*s\n", i, (int)sizeof (uuids[i].uuid), uuids[i].uuid); @@ -169,7 +169,7 @@ int mdc_getattr_common (struct lustre_handle *conn, { struct mds_body *body; void *eadata; - int rc; + int rc; int size[2] = {sizeof(*body), 0}; int bufcount = 1; ENTRY; @@ -188,7 +188,7 @@ int mdc_getattr_common (struct lustre_handle *conn, mdc_put_rpc_lock(&mdc_rpc_lock, NULL); if (rc != 0) RETURN (rc); - + body = lustre_swab_repbuf (req, 0, sizeof (*body), lustre_swab_mds_body); if (body == NULL) { @@ -210,7 +210,7 @@ int mdc_getattr_common (struct lustre_handle *conn, RETURN (0); } - + int mdc_getattr(struct lustre_handle *conn, struct ll_fid *fid, unsigned long valid, unsigned int ea_size, struct ptlrpc_request **request) @@ -289,7 +289,7 @@ void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff, LASSERT (rec != NULL); LASSERT (body != NULL); - + memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid); DEBUG_REQ(D_HA, req, "storing generation %x for ino "LPD64, rec->cr_replayfid.generation, rec->cr_replayfid.id); @@ -451,8 +451,10 @@ int mdc_enqueue(struct lustre_handle *conn, LDLM_DEBUG(lock, "matching against this"); memcpy(&lockh2, lockh, sizeof(lockh2)); - if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL, - LDLM_PLAIN, NULL, 0, LCK_NL, &lockh2)) { + if (ldlm_lock_match(NULL, + LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA, + NULL, LDLM_PLAIN, NULL, 0, LCK_NL, cb_data, + &lockh2)) { /* We already have a lock; cancel the new one */ ldlm_lock_decref_and_cancel(lockh, lock_mode); memcpy(lockh, &lockh2, sizeof(lockh2)); @@ -463,7 +465,7 @@ int mdc_enqueue(struct lustre_handle *conn, dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep)); LASSERT (dlm_rep != NULL); /* checked by ldlm_cli_enqueue() */ LASSERT_REPSWABBED (req, 0); /* swabbed by ldlm_cli_enqueue() */ - + it->it_disposition = (int) dlm_rep->lock_policy_res1; it->it_status = (int) dlm_rep->lock_policy_res2; it->it_lock_mode = lock_mode; @@ -485,7 +487,7 @@ int mdc_enqueue(struct lustre_handle *conn, /* The eadata is opaque; just check that it is * there. Eventually, obd_unpackmd() will check * the contents */ - eadata = lustre_swab_repbuf (req, 2, body->eadatasize, + eadata = lustre_swab_repbuf (req, 2, body->eadatasize, NULL); if (eadata == NULL) { CERROR ("Missing/short eadata\n"); @@ -493,7 +495,7 @@ int mdc_enqueue(struct lustre_handle *conn, } } } - + RETURN(rc); } @@ -507,7 +509,7 @@ static void mdc_replay_open(struct ptlrpc_request *req) body = lustre_swab_repbuf (req, 1, sizeof (*body), lustre_swab_mds_body); LASSERT (body != NULL); - + memcpy(&old, file_fh, sizeof(old)); CDEBUG(D_HA, "updating handle from "LPD64" to "LPD64"\n", file_fh->cookie, body->handle.cookie); @@ -675,7 +677,7 @@ static int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs) CERROR ("Can't unpack obd_statfs\n"); GOTO (out, rc = -EPROTO); } - + memcpy (osfs, msfs, sizeof (*msfs)); EXIT; out: diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 259a6bc..3c2aa89e 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1477,6 +1477,18 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) if (IS_ERR(obddev->obd_fsops)) RETURN(rc = PTR_ERR(obddev->obd_fsops)); + + if (data->ioc_inllen3 > 0 && data->ioc_inlbuf3) { + if (*data->ioc_inlbuf3 == '/') { + CERROR("mds namespace mount: %s\n", + data->ioc_inlbuf3); +// mds->mds_nspath = strdup(ioc->inlbuf4); + } else { + CERROR("namespace mount must be absolute path: '%s'\n", + data->ioc_inlbuf3); + } + } + if (!(page = __get_free_page(GFP_KERNEL))) return -ENOMEM; diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 823a7a6..50949dd 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -290,7 +290,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, if (rc) GOTO(cleanup, rc); - rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr); + rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr, 0); if (rc == 0 && S_ISREG(inode->i_mode) && rec->ur_eadata != NULL) { @@ -494,7 +494,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, inode->i_ino, inode->i_generation); } - rc = fsfilt_setattr(obd, dchild, handle, &iattr); + rc = fsfilt_setattr(obd, dchild, handle, &iattr, 0); if (rc) { CERROR("error on setattr: rc = %d\n", rc); /* XXX should we abort here in case of error? */ @@ -715,9 +715,15 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, * (bug 72) */ switch (rec->ur_mode & S_IFMT) { case S_IFDIR: + /* Drop any lingering child directories before we start our + * transaction, to avoid doing multiple inode dirty/delete + * in our compound transaction (bug 1321). + */ + shrink_dcache_parent(dchild); handle = fsfilt_start(obd, dir_inode, FSFILT_OP_RMDIR); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); + cleanup_phase = 4; rc = vfs_rmdir(dir_inode, dchild); break; case S_IFREG: @@ -740,21 +746,24 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, handle = fsfilt_start(obd, dir_inode, FSFILT_OP_UNLINK); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); + cleanup_phase = 4; rc = vfs_unlink(dir_inode, dchild); break; default: - CERROR("bad file type %o unlinking %s\n", rec->ur_mode, rec->ur_name); + CERROR("bad file type %o unlinking %s\n", rec->ur_mode, + rec->ur_name); LBUG(); GOTO(cleanup, rc = -EINVAL); } cleanup: - rc = mds_finish_transno(mds, dir_inode, handle, req, rc, 0); - if (rc && body) { - /* Don't unlink the OST objects if the MDS unlink failed */ - body->valid = 0; - } switch(cleanup_phase) { + case 4: + rc = mds_finish_transno(mds, dir_inode, handle, req, rc, 0); + if (rc && body) { + /* Don't unlink the OST objects if the MDS unlink failed */ + body->valid = 0; + } case 3: /* child lock */ if (rc != 0 || return_lock == 0) ldlm_lock_decref(child_lockh, LCK_EX); diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 1e180a8..9619861 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -73,6 +73,9 @@ int obd_memmax; /* Root for /proc/lustre */ struct proc_dir_entry *proc_lustre_root = NULL; +int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data); +struct lprocfs_vars lprocfs_version[] = {{"version", obd_proc_read_version, NULL, NULL },{NULL,NULL,NULL,NULL}}; +int proc_version; /* The following are visible and mutable through /proc/sys/lustre/. */ unsigned long obd_fail_loc; @@ -244,7 +247,8 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, case OBD_IOC_DEVICE: { CDEBUG(D_IOCTL, "\n"); if (data->ioc_dev >= MAX_OBD_DEVICES || data->ioc_dev < 0) { - CERROR("OBD ioctl: DEVICE insufficient devices\n"); + CERROR("OBD ioctl: DEVICE invalid device %d\n", + data->ioc_dev); GOTO(out, err = -EINVAL); } CDEBUG(D_IOCTL, "device %d\n", data->ioc_dev); @@ -851,12 +855,23 @@ int init_obdclass(void) proc_lustre_root = proc_mkdir("lustre", proc_root_fs); if (!proc_lustre_root) printk(KERN_ERR "error registering /proc/fs/lustre\n"); + proc_version = lprocfs_add_vars(proc_lustre_root,lprocfs_version,NULL); #else proc_lustre_root = NULL; + proc_version = -1; #endif return 0; } +#ifdef LPROCFS +int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data) { + *eof = 1; + return snprintf(page, count, "%s\n", BUILD_VERSION); +} +#else +int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } +#endif + #ifdef __KERNEL__ static void __exit cleanup_obdclass(void) #else diff --git a/lustre/obdclass/fsfilt_ext3.c b/lustre/obdclass/fsfilt_ext3.c index a02f1f5..5f6322f 100644 --- a/lustre/obdclass/fsfilt_ext3.c +++ b/lustre/obdclass/fsfilt_ext3.c @@ -124,7 +124,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op) * objcount inode blocks * 1 superblock * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quota files - * + * * 1 EXT3_DATA_TRANS_BLOCKS for the last_rcvd update. */ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso) @@ -155,7 +155,7 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso) ngdblocks = EXT3_SB(sb)->s_gdb_count; needed += nbitmaps + ngdblocks; - + /* last_rcvd update */ needed += EXT3_DATA_TRANS_BLOCKS; @@ -238,7 +238,7 @@ static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync) } static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle, - struct iattr *iattr) + struct iattr *iattr, int do_trunc) { struct inode *inode = dentry->d_inode; int rc; @@ -251,11 +251,7 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle, * zero all the time (which doesn't invoke block truncate at unlink * time), so we assert we never change the MDS file size from zero. */ - if (iattr->ia_valid & ATTR_SIZE) { - CERROR("hmm, setting %*s file size to %lld\n", - dentry->d_name.len, dentry->d_name.name, iattr->ia_size); - LASSERT(iattr->ia_size == 0); -#if 0 + if (iattr->ia_valid & ATTR_SIZE && !do_trunc) { /* ATTR_SIZE would invoke truncate: clear it */ iattr->ia_valid &= ~ATTR_SIZE; inode->i_size = iattr->ia_size; @@ -267,7 +263,6 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle, iattr->ia_valid |= ATTR_MODE; iattr->ia_mode = inode->i_mode; } -#endif } if (inode->i_op->setattr) rc = inode->i_op->setattr(dentry, iattr); diff --git a/lustre/obdclass/fsfilt_extN.c b/lustre/obdclass/fsfilt_extN.c index ddec807..1fba0f4 100644 --- a/lustre/obdclass/fsfilt_extN.c +++ b/lustre/obdclass/fsfilt_extN.c @@ -238,7 +238,7 @@ static int fsfilt_extN_commit(struct inode *inode, void *h, int force_sync) } static int fsfilt_extN_setattr(struct dentry *dentry, void *handle, - struct iattr *iattr) + struct iattr *iattr, int do_trunc) { struct inode *inode = dentry->d_inode; int rc; @@ -251,11 +251,7 @@ static int fsfilt_extN_setattr(struct dentry *dentry, void *handle, * zero all the time (which doesn't invoke block truncate at unlink * time), so we assert we never change the MDS file size from zero. */ - if (iattr->ia_valid & ATTR_SIZE) { - CERROR("hmm, setting %*s file size to %lld\n", - dentry->d_name.len, dentry->d_name.name, iattr->ia_size); - LASSERT(iattr->ia_size == 0); -#if 0 + if (iattr->ia_valid & ATTR_SIZE && !do_trunc) { /* ATTR_SIZE would invoke truncate: clear it */ iattr->ia_valid &= ~ATTR_SIZE; inode->i_size = iattr->ia_size; @@ -267,7 +263,6 @@ static int fsfilt_extN_setattr(struct dentry *dentry, void *handle, iattr->ia_valid |= ATTR_MODE; iattr->ia_mode = inode->i_mode; } -#endif } if (inode->i_op->setattr) rc = inode->i_op->setattr(dentry, iattr); diff --git a/lustre/obdclass/fsfilt_reiserfs.c b/lustre/obdclass/fsfilt_reiserfs.c index 2aba0f1..ccefb92 100644 --- a/lustre/obdclass/fsfilt_reiserfs.c +++ b/lustre/obdclass/fsfilt_reiserfs.c @@ -71,7 +71,7 @@ static int fsfilt_reiserfs_commit(struct inode *inode, void *handle, } static int fsfilt_reiserfs_setattr(struct dentry *dentry, void *handle, - struct iattr *iattr) + struct iattr *iattr, int do_trunc) { struct inode *inode = dentry->d_inode; int rc; @@ -84,11 +84,7 @@ static int fsfilt_reiserfs_setattr(struct dentry *dentry, void *handle, * zero all the time (which doesn't invoke block truncate at unlink * time), so we assert we never change the MDS file size from zero. */ - if (iattr->ia_valid & ATTR_SIZE) { - CERROR("hmm, setting %*s file size to %llu\n", - dentry->d_name.len, dentry->d_name.name, iattr->ia_size); - LASSERT(iattr->ia_size == 0); -#if 0 + if (iattr->ia_valid & ATTR_SIZE && !do_trunc) { /* ATTR_SIZE would invoke truncate: clear it */ iattr->ia_valid &= ~ATTR_SIZE; inode->i_size = iattr->ia_size; @@ -100,7 +96,6 @@ static int fsfilt_reiserfs_setattr(struct dentry *dentry, void *handle, iattr->ia_valid |= ATTR_MODE; iattr->ia_mode = inode->i_mode; } -#endif } if (inode->i_op->setattr) rc = inode->i_op->setattr(dentry, iattr); diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 2984e9c..8ec50d8 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -320,80 +320,83 @@ int lprocfs_obd_detach(struct obd_device *dev) return 0; } -struct lprocfs_counters* lprocfs_alloc_counters(unsigned int num) +struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num) { - struct lprocfs_counters* cntrs; - int csize; + struct lprocfs_stats *stats; + struct lprocfs_percpu *percpu; + unsigned int percpusize; + unsigned int i; + if (num == 0) return NULL; - csize = offsetof(struct lprocfs_counters, cntr[num]); - OBD_ALLOC(cntrs, csize); - if (cntrs != NULL) { - cntrs->num = num; + OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus])); + if (stats == NULL) + return NULL; + + percpusize = L1_CACHE_ALIGN(offsetof(typeof(*percpu), lp_cntr[num])); + stats->ls_percpu_size = smp_num_cpus * percpusize; + OBD_ALLOC(stats->ls_percpu[0], stats->ls_percpu_size); + if (stats->ls_percpu[0] == NULL) { + OBD_FREE(stats, offsetof(typeof(*stats), + ls_percpu[smp_num_cpus])); + return NULL; } - return cntrs; + + stats->ls_num = num; + for (i = 1; i < smp_num_cpus; i++) + stats->ls_percpu[i] = (void *)(stats->ls_percpu[i - 1]) + + percpusize; + + return stats; } -void lprocfs_free_counters(struct lprocfs_counters* cntrs) +void lprocfs_free_stats(struct lprocfs_stats *stats) { - if (cntrs != NULL) { - int csize = offsetof(struct lprocfs_counters, cntr[cntrs->num]); OBD_FREE(cntrs, csize); - } + if (stats->ls_num == 0) + return; + + OBD_FREE(stats->ls_percpu[0], stats->ls_percpu_size); + OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus])); } /* Reset counter under lock */ int lprocfs_counter_write(struct file *file, const char *buffer, unsigned long count, void *data) { - struct lprocfs_counters *cntrs = (struct lprocfs_counters*) data; - unsigned int i; - LASSERT(cntrs != NULL); - - for (i = 0; i < cntrs->num; i++) { - struct lprocfs_counter *cntr = &(cntrs->cntr[i]); - spinlock_t *lock = (cntr->config & LPROCFS_CNTR_EXTERNALLOCK) ? - cntr->l.external : &cntr->l.internal; - - spin_lock(lock); - cntr->count = 0; - cntr->sum = 0; - cntr->min = (~(__u64)0); - cntr->max = 0; - cntr->sumsquare = 0; - spin_unlock(lock); - } + /* not supported */ return 0; } -static void *lprocfs_counters_seq_start(struct seq_file *p, loff_t *pos) +static void *lprocfs_stats_seq_start(struct seq_file *p, loff_t *pos) { - struct lprocfs_counters *cntrs = p->private; - return (*pos >= cntrs->num) ? NULL : (void*) &cntrs->cntr[*pos]; + struct lprocfs_stats *stats = p->private; + /* return 1st cpu location */ + return (*pos >= stats->ls_num) ? NULL : + &(stats->ls_percpu[0]->lp_cntr[*pos]); } -static void lprocfs_counters_seq_stop(struct seq_file *p, void *v) +static void lprocfs_stats_seq_stop(struct seq_file *p, void *v) { } -static void *lprocfs_counters_seq_next(struct seq_file *p, void *v, - loff_t *pos) +static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos) { - struct lprocfs_counters *cntrs = p->private; + struct lprocfs_stats *stats = p->private; ++*pos; - return (*pos >= cntrs->num) ? NULL : (void*) &(cntrs->cntr[*pos]); + return (*pos >= stats->ls_num) ? NULL : + &(stats->ls_percpu[0]->lp_cntr[*pos]); } /* seq file export of one lprocfs counter */ -static int lprocfs_counters_seq_show(struct seq_file *p, void *v) +static int lprocfs_stats_seq_show(struct seq_file *p, void *v) { - struct lprocfs_counters *cntrs = p->private; + struct lprocfs_stats *stats = p->private; struct lprocfs_counter *cntr = v; - spinlock_t *lock; - struct lprocfs_counter c; - int rc = 0; + struct lprocfs_counter t, ret = { .lc_min = ~(__u64)0 }; + int i, idx, rc; - if (cntr == &(cntrs->cntr[0])) { + if (cntr == &(stats->ls_percpu[0])->lp_cntr[0]) { struct timeval now; do_gettimeofday(&now); rc = seq_printf(p, "%-25s %lu.%lu secs.usecs\n", @@ -401,31 +404,42 @@ static int lprocfs_counters_seq_show(struct seq_file *p, void *v) if (rc < 0) return rc; } + idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0]; + + for (i = 0; i < smp_num_cpus; i++) { + struct lprocfs_counter *percpu_cntr = + &(stats->ls_percpu[i])->lp_cntr[idx]; + int centry; + do { + centry = atomic_read(&percpu_cntr->lc_cntl.la_entry); + t.lc_count = percpu_cntr->lc_count; + t.lc_sum = percpu_cntr->lc_sum; + t.lc_min = percpu_cntr->lc_min; + t.lc_max = percpu_cntr->lc_max; + t.lc_sumsquare = percpu_cntr->lc_sumsquare; + } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) && + centry != atomic_read(&percpu_cntr->lc_cntl.la_exit)); + ret.lc_count += t.lc_count; + ret.lc_sum += t.lc_sum; + if (t.lc_min < ret.lc_min) + ret.lc_min = t.lc_min; + if (t.lc_max > ret.lc_max) + ret.lc_max = t.lc_max; + ret.lc_sumsquare += t.lc_sumsquare; + } - /* Take a snapshot of the counter under lock */ - lock = (cntr->config & LPROCFS_CNTR_EXTERNALLOCK) ? - cntr->l.external : &cntr->l.internal; - spin_lock(lock); - - c.count = cntr->count; - c.sum = cntr->sum; - c.min = cntr->min; - c.max = cntr->max; - c.sumsquare = cntr->sumsquare; - - spin_unlock(lock); - - rc = seq_printf(p, "%-25s "LPU64" samples [%s]", cntr->name, c.count, - cntr->units); + rc = seq_printf(p, "%-25s "LPU64" samples [%s]", cntr->lc_name, + ret.lc_count, cntr->lc_units); if (rc < 0) goto out; - if ((cntr->config & LPROCFS_CNTR_AVGMINMAX) && (c.count > 0)) { - rc = seq_printf(p, " "LPU64" "LPU64" "LPU64, c.min,c.max,c.sum); + if ((cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) && (ret.lc_count > 0)) { + rc = seq_printf(p, " "LPU64" "LPU64" "LPU64, + ret.lc_min, ret.lc_max, ret.lc_sum); if (rc < 0) goto out; - if (cntr->config & LPROCFS_CNTR_STDDEV) - rc = seq_printf(p, " "LPU64, c.sumsquare); + if (cntr->lc_config & LPROCFS_CNTR_STDDEV) + rc = seq_printf(p, " "LPU64, ret.lc_sumsquare); if (rc < 0) goto out; } @@ -434,20 +448,20 @@ static int lprocfs_counters_seq_show(struct seq_file *p, void *v) return (rc < 0) ? rc : 0; } -struct seq_operations lprocfs_counters_seq_sops = { - .start = lprocfs_counters_seq_start, - .stop = lprocfs_counters_seq_stop, - .next = lprocfs_counters_seq_next, - .show = lprocfs_counters_seq_show, +struct seq_operations lprocfs_stats_seq_sops = { + .start = lprocfs_stats_seq_start, + .stop = lprocfs_stats_seq_stop, + .next = lprocfs_stats_seq_next, + .show = lprocfs_stats_seq_show, }; -static int lprocfs_counters_seq_open(struct inode *inode, struct file *file) +static int lprocfs_stats_seq_open(struct inode *inode, struct file *file) { struct proc_dir_entry *dp = inode->u.generic_ip; struct seq_file *seq; int rc; - rc = seq_open(file, &lprocfs_counters_seq_sops); + rc = seq_open(file, &lprocfs_stats_seq_sops); if (rc) return rc; seq = file->private_data; @@ -455,15 +469,15 @@ static int lprocfs_counters_seq_open(struct inode *inode, struct file *file) return 0; } -struct file_operations lprocfs_counters_seq_fops = { - .open = lprocfs_counters_seq_open, +struct file_operations lprocfs_stats_seq_fops = { + .open = lprocfs_stats_seq_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; -int lprocfs_register_counters(struct proc_dir_entry *root, const char* name, - struct lprocfs_counters *cntrs) +int lprocfs_register_stats(struct proc_dir_entry *root, const char* name, + struct lprocfs_stats *stats) { struct proc_dir_entry *entry; LASSERT(root != NULL); @@ -471,112 +485,130 @@ int lprocfs_register_counters(struct proc_dir_entry *root, const char* name, entry = create_proc_entry(name, 0444, root); if (entry == NULL) return -ENOMEM; - entry->proc_fops = &lprocfs_counters_seq_fops; - entry->data = (void*) cntrs; + entry->proc_fops = &lprocfs_stats_seq_fops; + entry->data = (void *)stats; entry->write_proc = lprocfs_counter_write; return 0; } -#define LPROCFS_OBD_OP_INIT(base, cntrs, op) \ +void lprocfs_counter_init(struct lprocfs_stats *stats, int index, + unsigned conf, const char *name, const char *units) +{ + struct lprocfs_counter *c; + int i; + + LASSERT(stats != NULL); + for (i = 0; i < smp_num_cpus; i++) { + c = &(stats->ls_percpu[i]->lp_cntr[index]); + c->lc_config = conf; + c->lc_min = ~(__u64)0; + c->lc_name = name; + c->lc_units = units; + } +} +EXPORT_SYMBOL(lprocfs_counter_init); + +#define LPROCFS_OBD_OP_INIT(base, stats, op) \ do { \ unsigned int coffset = base + OBD_COUNTER_OFFSET(op); \ - LASSERT(coffset < cntrs->num); \ - LPROCFS_COUNTER_INIT(&cntrs->cntr[coffset], 0, NULL, #op, "reqs"); \ + LASSERT(coffset < stats->ls_num); \ + lprocfs_counter_init(stats, coffset, 0, #op, "reqs"); \ } while (0) - -int lprocfs_alloc_obd_counters(struct obd_device *obddev, - unsigned int num_private_counters) +int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) { - struct lprocfs_counters* obdops_cntrs; - unsigned int num_counters; + struct lprocfs_stats *stats; + unsigned int num_stats; int rc, i; - LASSERT(obddev->counters == NULL); - LASSERT(obddev->obd_proc_entry != NULL); - LASSERT(obddev->cntr_base == 0); + LASSERT(obd->obd_stats == NULL); + LASSERT(obd->obd_proc_entry != NULL); + LASSERT(obd->obd_cntr_base == 0); - num_counters = 1 + OBD_COUNTER_OFFSET(san_preprw)+num_private_counters; - obdops_cntrs = lprocfs_alloc_counters(num_counters); - if (!obdops_cntrs) + num_stats = 1 + OBD_COUNTER_OFFSET(destroy_export) + + num_private_stats; + stats = lprocfs_alloc_stats(num_stats); + if (!stats) return -ENOMEM; - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, iocontrol); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, get_info); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, set_info); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, attach); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, detach); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, setup); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, cleanup); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, connect); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, disconnect); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, statfs); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, syncfs); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, packmd); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, unpackmd); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, preallocate); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, create); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, destroy); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, setattr); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, getattr); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, getattr_async); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, open); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, close); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, brw); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, brw_async); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, punch); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, sync); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, migrate); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, copy); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, iterate); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, preprw); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, commitrw); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, enqueue); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, match); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, cancel); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, cancel_unused); - LPROCFS_OBD_OP_INIT(num_private_counters, obdops_cntrs, san_preprw); - - for (i = num_private_counters; i < num_counters; i++) { + LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, get_info); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_info); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, attach); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, detach); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, setup); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, cleanup); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, connect); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, disconnect); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, syncfs); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, packmd); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpackmd); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, preallocate); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, create); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr_async); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, open); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, close); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw_async); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, migrate); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, copy); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, iterate); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, preprw); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, commitrw); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, enqueue); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, match); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, san_preprw); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export); + + for (i = num_private_stats; i < num_stats; i++) { /* If this assertion failed, it is likely that an obd * operation was added to struct obd_ops in * , and that the corresponding line item * LPROCFS_OBD_OP_INIT(.., .., opname) * is missing from the list above. */ - LASSERT(obdops_cntrs->cntr[i].name != NULL); + LASSERT(&(stats->ls_percpu[0])->lp_cntr[i].lc_name != NULL); } - rc = lprocfs_register_counters(obddev->obd_proc_entry, "obd_stats", - obdops_cntrs); + rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats); if (rc < 0) { - lprocfs_free_counters(obdops_cntrs); + lprocfs_free_stats(stats); } else { - obddev->counters = obdops_cntrs; - obddev->cntr_base = num_private_counters; + obd->obd_stats = stats; + obd->obd_cntr_base = num_private_stats; } return rc; } -void lprocfs_free_obd_counters(struct obd_device *obddev) +void lprocfs_free_obd_stats(struct obd_device *obd) { - struct lprocfs_counters* obdops_cntrs = obddev->counters; - if (obdops_cntrs != NULL) { - obddev->counters = NULL; - lprocfs_free_counters(obdops_cntrs); + struct lprocfs_stats *stats = obd->obd_stats; + + if (stats != NULL) { + obd->obd_stats = NULL; + lprocfs_free_stats(stats); } } #endif /* LPROCFS*/ EXPORT_SYMBOL(lprocfs_register); +EXPORT_SYMBOL(lprocfs_srch); EXPORT_SYMBOL(lprocfs_remove); EXPORT_SYMBOL(lprocfs_add_vars); EXPORT_SYMBOL(lprocfs_obd_attach); EXPORT_SYMBOL(lprocfs_obd_detach); -EXPORT_SYMBOL(lprocfs_alloc_counters); -EXPORT_SYMBOL(lprocfs_free_counters); -EXPORT_SYMBOL(lprocfs_register_counters); -EXPORT_SYMBOL(lprocfs_alloc_obd_counters); -EXPORT_SYMBOL(lprocfs_free_obd_counters); +EXPORT_SYMBOL(lprocfs_alloc_stats); +EXPORT_SYMBOL(lprocfs_free_stats); +EXPORT_SYMBOL(lprocfs_register_stats); +EXPORT_SYMBOL(lprocfs_alloc_obd_stats); +EXPORT_SYMBOL(lprocfs_free_obd_stats); EXPORT_SYMBOL(lprocfs_rd_u64); EXPORT_SYMBOL(lprocfs_rd_uuid); diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index 1eaa282..603a166 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -52,145 +52,12 @@ #define ECHO_OBJECT0_NPAGES 16 static struct page *echo_object0_pages[ECHO_OBJECT0_NPAGES]; -/* should be generic per-obd stats... */ -struct xprocfs_io_stat { - __u64 st_read_bytes; - __u64 st_read_reqs; - __u64 st_write_bytes; - __u64 st_write_reqs; - __u64 st_getattr_reqs; - __u64 st_setattr_reqs; - __u64 st_create_reqs; - __u64 st_destroy_reqs; - __u64 st_statfs_reqs; - __u64 st_syncfs_reqs; - __u64 st_open_reqs; - __u64 st_close_reqs; - __u64 st_punch_reqs; +enum { + LPROC_ECHO_READ_BYTES = 1, + LPROC_ECHO_WRITE_BYTES = 2, + LPROC_ECHO_LAST = LPROC_ECHO_WRITE_BYTES +1 }; -static struct xprocfs_io_stat xprocfs_iostats[NR_CPUS]; -static struct proc_dir_entry *xprocfs_dir; - -#define XPROCFS_BUMP_MYCPU_IOSTAT(field, count) \ -do { \ - xprocfs_iostats[smp_processor_id()].field += (count); \ -} while (0) - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define DECLARE_XPROCFS_SUM_STAT(field) \ -static long long \ -xprocfs_sum_##field (void) \ -{ \ - long long stat = 0; \ - int i; \ - \ - for (i = 0; i < smp_num_cpus; i++) \ - stat += xprocfs_iostats[i].field; \ - return (stat); \ -} - -DECLARE_XPROCFS_SUM_STAT (st_read_bytes) -DECLARE_XPROCFS_SUM_STAT (st_read_reqs) -DECLARE_XPROCFS_SUM_STAT (st_write_bytes) -DECLARE_XPROCFS_SUM_STAT (st_write_reqs) -DECLARE_XPROCFS_SUM_STAT (st_getattr_reqs) -DECLARE_XPROCFS_SUM_STAT (st_setattr_reqs) -DECLARE_XPROCFS_SUM_STAT (st_create_reqs) -DECLARE_XPROCFS_SUM_STAT (st_destroy_reqs) -DECLARE_XPROCFS_SUM_STAT (st_statfs_reqs) -DECLARE_XPROCFS_SUM_STAT (st_syncfs_reqs) -DECLARE_XPROCFS_SUM_STAT (st_open_reqs) -DECLARE_XPROCFS_SUM_STAT (st_close_reqs) -DECLARE_XPROCFS_SUM_STAT (st_punch_reqs) -#endif - -static int -xprocfs_rd_stat (char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - long long (*fn)(void) = (long long(*)(void))data; - int len; - - *eof = 1; - if (off != 0) - return (0); - - len = snprintf (page, count, "%Ld\n", fn()); - *start = page; - return (len); -} - - -static void -xprocfs_add_stat(char *name, long long (*fn)(void)) -{ - struct proc_dir_entry *entry; - - entry = create_proc_entry (name, S_IFREG|S_IRUGO, xprocfs_dir); - if (entry == NULL) { - CERROR ("Can't add procfs stat %s\n", name); - return; - } - - entry->data = fn; - entry->read_proc = xprocfs_rd_stat; - entry->write_proc = NULL; -} - -static void -xprocfs_init (char *name) -{ - char dirname[64]; - - snprintf (dirname, sizeof (dirname), "sys/%s", name); - - xprocfs_dir = proc_mkdir (dirname, NULL); - if (xprocfs_dir == NULL) { - CERROR ("Can't make procfs dir %s\n", dirname); - return; - } - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - xprocfs_add_stat ("read_bytes", xprocfs_sum_st_read_bytes); - xprocfs_add_stat ("read_reqs", xprocfs_sum_st_read_reqs); - xprocfs_add_stat ("write_bytes", xprocfs_sum_st_write_bytes); - xprocfs_add_stat ("write_reqs", xprocfs_sum_st_write_reqs); - xprocfs_add_stat ("getattr_reqs", xprocfs_sum_st_getattr_reqs); - xprocfs_add_stat ("setattr_reqs", xprocfs_sum_st_setattr_reqs); - xprocfs_add_stat ("create_reqs", xprocfs_sum_st_create_reqs); - xprocfs_add_stat ("destroy_reqs", xprocfs_sum_st_destroy_reqs); - xprocfs_add_stat ("statfs_reqs", xprocfs_sum_st_statfs_reqs); - xprocfs_add_stat ("syncfs_reqs", xprocfs_sum_st_syncfs_reqs); - xprocfs_add_stat ("open_reqs", xprocfs_sum_st_open_reqs); - xprocfs_add_stat ("close_reqs", xprocfs_sum_st_close_reqs); - xprocfs_add_stat ("punch_reqs", xprocfs_sum_st_punch_reqs); -#endif -} - -void xprocfs_fini (void) -{ - if (xprocfs_dir == NULL) - return; - - remove_proc_entry ("read_bytes", xprocfs_dir); - remove_proc_entry ("read_reqs", xprocfs_dir); - remove_proc_entry ("write_bytes", xprocfs_dir); - remove_proc_entry ("write_reqs", xprocfs_dir); - remove_proc_entry ("getattr_reqs", xprocfs_dir); - remove_proc_entry ("setattr_reqs", xprocfs_dir); - remove_proc_entry ("create_reqs", xprocfs_dir); - remove_proc_entry ("destroy_reqs", xprocfs_dir); - remove_proc_entry ("statfs_reqs", xprocfs_dir); - remove_proc_entry ("syncfs_reqs", xprocfs_dir); - remove_proc_entry ("open_reqs", xprocfs_dir); - remove_proc_entry ("close_reqs", xprocfs_dir); - remove_proc_entry ("punch_reqs", xprocfs_dir); - - remove_proc_entry (xprocfs_dir->name, xprocfs_dir->parent); - xprocfs_dir = NULL; -} - static int echo_connect(struct lustre_handle *conn, struct obd_device *obd, struct obd_uuid *cluuid) { @@ -224,8 +91,6 @@ int echo_create(struct lustre_handle *conn, struct obdo *oa, { struct obd_device *obd = class_conn2obd(conn); - XPROCFS_BUMP_MYCPU_IOSTAT (st_create_reqs, 1); - if (!obd) { CERROR("invalid client cookie "LPX64"\n", conn->cookie); return -EINVAL; @@ -253,8 +118,6 @@ int echo_destroy(struct lustre_handle *conn, struct obdo *oa, { struct obd_device *obd = class_conn2obd(conn); - XPROCFS_BUMP_MYCPU_IOSTAT (st_destroy_reqs, 1); - if (!obd) { CERROR("invalid client cookie "LPX64"\n", conn->cookie); RETURN(-EINVAL); @@ -282,8 +145,6 @@ static int echo_open(struct lustre_handle *conn, struct obdo *oa, struct lustre_handle *fh = obdo_handle (oa); struct obd_device *obd = class_conn2obd (conn); - XPROCFS_BUMP_MYCPU_IOSTAT (st_open_reqs, 1); - if (!obd) { CERROR("invalid client cookie "LPX64"\n", conn->cookie); return (-EINVAL); @@ -306,8 +167,6 @@ static int echo_close(struct lustre_handle *conn, struct obdo *oa, struct lustre_handle *fh = obdo_handle (oa); struct obd_device *obd = class_conn2obd(conn); - XPROCFS_BUMP_MYCPU_IOSTAT (st_close_reqs, 1); - if (!obd) { CERROR("invalid client cookie "LPX64"\n", conn->cookie); return (-EINVAL); @@ -332,8 +191,6 @@ static int echo_getattr(struct lustre_handle *conn, struct obdo *oa, struct obd_device *obd = class_conn2obd(conn); obd_id id = oa->o_id; - XPROCFS_BUMP_MYCPU_IOSTAT (st_getattr_reqs, 1); - if (!obd) { CERROR("invalid client cookie "LPX64"\n", conn->cookie); RETURN(-EINVAL); @@ -355,8 +212,6 @@ static int echo_setattr(struct lustre_handle *conn, struct obdo *oa, { struct obd_device *obd = class_conn2obd(conn); - XPROCFS_BUMP_MYCPU_IOSTAT (st_setattr_reqs, 1); - if (!obd) { CERROR("invalid client cookie "LPX64"\n", conn->cookie); RETURN(-EINVAL); @@ -384,15 +239,11 @@ int echo_preprw(int cmd, struct obd_export *export, int objcount, { struct obd_device *obd; struct niobuf_local *r = res; + int tot_bytes = 0; int rc = 0; int i; ENTRY; - if ((cmd & OBD_BRW_WRITE) != 0) - XPROCFS_BUMP_MYCPU_IOSTAT (st_write_reqs, 1); - else - XPROCFS_BUMP_MYCPU_IOSTAT (st_read_reqs, 1); - obd = export->exp_obd; if (obd == NULL) RETURN(-EINVAL); @@ -428,6 +279,8 @@ int echo_preprw(int cmd, struct obd_export *export, int objcount, } } + tot_bytes += r->len; + atomic_inc(&obd->u.echo.eo_prep); r->offset = nb->offset; @@ -437,9 +290,8 @@ int echo_preprw(int cmd, struct obd_export *export, int objcount, CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n", r->page, r->offset, r->len); - if (cmd == OBD_BRW_READ) { + if (cmd & OBD_BRW_READ) { r->rc = r->len; - XPROCFS_BUMP_MYCPU_IOSTAT(st_read_bytes,r->len); if (verify) { page_debug_setup(kmap (r->page), r->len, r->offset,obj->ioo_id); @@ -447,8 +299,6 @@ int echo_preprw(int cmd, struct obd_export *export, int objcount, } r->rc = r->len; } else { - XPROCFS_BUMP_MYCPU_IOSTAT(st_write_bytes, - r->len); if (verify) { page_debug_setup(kmap (r->page), r->len, 0xecc0ecc0ecc0ecc0, @@ -458,6 +308,13 @@ int echo_preprw(int cmd, struct obd_export *export, int objcount, } } } + if (cmd & OBD_BRW_READ) + lprocfs_counter_add(obd->obd_stats, LPROC_ECHO_READ_BYTES, + tot_bytes); + else + lprocfs_counter_add(obd->obd_stats, LPROC_ECHO_WRITE_BYTES, + tot_bytes); + CDEBUG(D_PAGE, "%d pages allocated after prep\n", atomic_read(&obd->u.echo.eo_prep)); @@ -518,14 +375,12 @@ int echo_commitrw(int cmd, struct obd_export *export, int objcount, struct page *page = r->page; void *addr; - kmap (page); - - if (!page || !(addr = page_address(page)) || + if (!page || !(addr = kmap(page)) || !kern_addr_valid(addr)) { CERROR("bad page objid "LPU64":%p, buf %d/%d\n", obj->ioo_id, page, j, obj->ioo_bufcnt); - kunmap (page); + kunmap(page); GOTO(commitrw_cleanup, rc = -EFAULT); } @@ -593,16 +448,29 @@ static int echo_cleanup(struct obd_device *obddev, int force, int failover) RETURN(0); } -int echo_attach(struct obd_device *dev, obd_count len, void *data) +int echo_attach(struct obd_device *obd, obd_count len, void *data) { struct lprocfs_static_vars lvars; + int rc; lprocfs_init_vars(&lvars); - return lprocfs_obd_attach(dev, lvars.obd_vars); + rc = lprocfs_obd_attach(obd, lvars.obd_vars); + if (rc != 0) + return rc; + rc = lprocfs_alloc_obd_stats(obd, LPROC_ECHO_LAST); + if (rc != 0) + return rc; + + lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_READ_BYTES, + LPROCFS_CNTR_AVGMINMAX, "read_bytes", "bytes"); + lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_WRITE_BYTES, + LPROCFS_CNTR_AVGMINMAX, "write_bytes", "bytes"); + return rc; } int echo_detach(struct obd_device *dev) { + lprocfs_free_obd_stats(dev); return lprocfs_obd_detach(dev); } @@ -673,8 +541,6 @@ static int __init obdecho_init(void) lprocfs_init_vars(&lvars); - xprocfs_init ("echo"); - rc = echo_object0_pages_init (); if (rc != 0) goto failed_0; @@ -692,8 +558,6 @@ static int __init obdecho_init(void) failed_1: echo_object0_pages_fini (); failed_0: - xprocfs_fini (); - RETURN(rc); } @@ -702,7 +566,6 @@ static void __exit obdecho_exit(void) echo_client_cleanup(); class_unregister_type(OBD_ECHO_DEVICENAME); echo_object0_pages_fini (); - xprocfs_fini (); } MODULE_AUTHOR("Cluster File Systems, Inc. "); diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index 31f7334..1d8233b 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -750,10 +750,10 @@ echo_enqueue (struct obd_export *exp, struct obdo *oa, ecl->ecl_extent.end = (nob == 0) ? ((obd_off) -1) : (offset + nob - 1); flags = 0; - rc = obd_enqueue (&ec->ec_conn, eco->eco_lsm, NULL, LDLM_EXTENT, - &ecl->ecl_extent,sizeof(ecl->ecl_extent), mode, - &flags, echo_ldlm_callback, eco, sizeof (*eco), - &ecl->ecl_lock_handle); + rc = obd_enqueue(&ec->ec_conn, eco->eco_lsm, NULL, LDLM_EXTENT, + &ecl->ecl_extent,sizeof(ecl->ecl_extent), mode, + &flags, echo_ldlm_callback, eco, + &ecl->ecl_lock_handle); if (rc != 0) goto failed_1; diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 21d05ef..e6c223c 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -54,152 +54,11 @@ #endif enum { - LPROC_FILTER_READS = 0, - LPROC_FILTER_READ_BYTES = 1, - LPROC_FILTER_WRITES = 2, - LPROC_FILTER_WRITE_BYTES = 3, - LPROC_FILTER_LAST = LPROC_FILTER_WRITE_BYTES +1 + LPROC_FILTER_READ_BYTES = 0, + LPROC_FILTER_WRITE_BYTES = 1, + LPROC_FILTER_LAST, }; -/* should be generic per-obd stats... */ -struct xprocfs_io_stat { - __u64 st_read_bytes; - __u64 st_read_reqs; - __u64 st_write_bytes; - __u64 st_write_reqs; - __u64 st_getattr_reqs; - __u64 st_setattr_reqs; - __u64 st_create_reqs; - __u64 st_destroy_reqs; - __u64 st_statfs_reqs; - __u64 st_syncfs_reqs; - __u64 st_open_reqs; - __u64 st_close_reqs; - __u64 st_punch_reqs; -}; - -static struct xprocfs_io_stat xprocfs_iostats[NR_CPUS]; -static struct proc_dir_entry *xprocfs_dir; - -#define XPROCFS_BUMP_MYCPU_IOSTAT(field, count) \ -do { \ - xprocfs_iostats[smp_processor_id()].field += (count); \ -} while (0) - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define DECLARE_XPROCFS_SUM_STAT(field) \ -static long long \ -xprocfs_sum_##field (void) \ -{ \ - long long stat = 0; \ - int i; \ - \ - for (i = 0; i < smp_num_cpus; i++) \ - stat += xprocfs_iostats[i].field; \ - return (stat); \ -} - -DECLARE_XPROCFS_SUM_STAT (st_read_bytes) -DECLARE_XPROCFS_SUM_STAT (st_read_reqs) -DECLARE_XPROCFS_SUM_STAT (st_write_bytes) -DECLARE_XPROCFS_SUM_STAT (st_write_reqs) -DECLARE_XPROCFS_SUM_STAT (st_getattr_reqs) -DECLARE_XPROCFS_SUM_STAT (st_setattr_reqs) -DECLARE_XPROCFS_SUM_STAT (st_create_reqs) -DECLARE_XPROCFS_SUM_STAT (st_destroy_reqs) -DECLARE_XPROCFS_SUM_STAT (st_statfs_reqs) -DECLARE_XPROCFS_SUM_STAT (st_syncfs_reqs) -DECLARE_XPROCFS_SUM_STAT (st_open_reqs) -DECLARE_XPROCFS_SUM_STAT (st_close_reqs) -DECLARE_XPROCFS_SUM_STAT (st_punch_reqs) -#endif - -static int -xprocfs_rd_stat (char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - long long (*fn)(void) = (long long(*)(void))data; - int len; - - *eof = 1; - if (off != 0) - return (0); - - len = snprintf (page, count, "%Ld\n", fn()); - *start = page; - return (len); -} - - -static void -xprocfs_add_stat(char *name, long long (*fn)(void)) -{ - struct proc_dir_entry *entry; - - entry = create_proc_entry (name, S_IFREG|S_IRUGO, xprocfs_dir); - if (entry == NULL) { - CERROR ("Can't add procfs stat %s\n", name); - return; - } - - entry->data = fn; - entry->read_proc = xprocfs_rd_stat; - entry->write_proc = NULL; -} - -static void -xprocfs_init (char *name) -{ - char dirname[64]; - - snprintf (dirname, sizeof (dirname), "sys/%s", name); - - xprocfs_dir = proc_mkdir (dirname, NULL); - if (xprocfs_dir == NULL) { - CERROR ("Can't make procfs dir %s\n", dirname); - return; - } - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - xprocfs_add_stat ("read_bytes", xprocfs_sum_st_read_bytes); - xprocfs_add_stat ("read_reqs", xprocfs_sum_st_read_reqs); - xprocfs_add_stat ("write_bytes", xprocfs_sum_st_write_bytes); - xprocfs_add_stat ("write_reqs", xprocfs_sum_st_write_reqs); - xprocfs_add_stat ("getattr_reqs", xprocfs_sum_st_getattr_reqs); - xprocfs_add_stat ("setattr_reqs", xprocfs_sum_st_setattr_reqs); - xprocfs_add_stat ("create_reqs", xprocfs_sum_st_create_reqs); - xprocfs_add_stat ("destroy_reqs", xprocfs_sum_st_destroy_reqs); - xprocfs_add_stat ("statfs_reqs", xprocfs_sum_st_statfs_reqs); - xprocfs_add_stat ("syncfs_reqs", xprocfs_sum_st_syncfs_reqs); - xprocfs_add_stat ("open_reqs", xprocfs_sum_st_open_reqs); - xprocfs_add_stat ("close_reqs", xprocfs_sum_st_close_reqs); - xprocfs_add_stat ("punch_reqs", xprocfs_sum_st_punch_reqs); -#endif -} - -void xprocfs_fini (void) -{ - if (xprocfs_dir == NULL) - return; - - remove_proc_entry ("read_bytes", xprocfs_dir); - remove_proc_entry ("read_reqs", xprocfs_dir); - remove_proc_entry ("write_bytes", xprocfs_dir); - remove_proc_entry ("write_reqs", xprocfs_dir); - remove_proc_entry ("getattr_reqs", xprocfs_dir); - remove_proc_entry ("setattr_reqs", xprocfs_dir); - remove_proc_entry ("create_reqs", xprocfs_dir); - remove_proc_entry ("destroy_reqs", xprocfs_dir); - remove_proc_entry ("statfs_reqs", xprocfs_dir); - remove_proc_entry ("syncfs_reqs", xprocfs_dir); - remove_proc_entry ("open_reqs", xprocfs_dir); - remove_proc_entry ("close_reqs", xprocfs_dir); - remove_proc_entry ("punch_reqs", xprocfs_dir); - - remove_proc_entry (xprocfs_dir->name, xprocfs_dir->parent); - xprocfs_dir = NULL; -} - #define S_SHIFT 12 static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = { [0] NULL, @@ -337,19 +196,6 @@ int filter_finish_transno(struct obd_export *export, void *handle, RETURN(written); } -/* write the pathname into the string */ -static char *filter_id(char *buf, struct filter_obd *filter, obd_id id, - obd_mode mode) -{ - if (!S_ISREG(mode) || filter->fo_subdir_count == 0) - sprintf(buf, "O/%s/"LPU64, obd_mode_to_type(mode), id); - else - sprintf(buf, "O/%s/d%d/"LPU64, obd_mode_to_type(mode), - (int)id & (filter->fo_subdir_count - 1), id); - - return buf; -} - static inline void f_dput(struct dentry *dentry) { /* Can't go inside filter_ddelete because it can block */ @@ -935,64 +781,19 @@ static void filter_post(struct obd_device *obd) } -static __u64 filter_next_id(struct obd_device *obd) +static __u64 filter_next_id(struct filter_obd *filter) { obd_id id; - LASSERT(obd->u.filter.fo_fsd != NULL); + LASSERT(filter->fo_fsd != NULL); - spin_lock(&obd->u.filter.fo_objidlock); - id = le64_to_cpu(obd->u.filter.fo_fsd->fsd_last_objid); - obd->u.filter.fo_fsd->fsd_last_objid = cpu_to_le64(id + 1); - spin_unlock(&obd->u.filter.fo_objidlock); + spin_lock(&filter->fo_objidlock); + id = le64_to_cpu(filter->fo_fsd->fsd_last_objid); + filter->fo_fsd->fsd_last_objid = cpu_to_le64(id + 1); + spin_unlock(&filter->fo_objidlock); return id; } -/* how to get files, dentries, inodes from object id's */ -/* parent i_sem is already held if needed for exclusivity */ -static struct dentry *filter_fid2dentry(struct obd_device *obd, - struct dentry *dparent, - __u64 id, int lockit) -{ - struct super_block *sb = obd->u.filter.fo_sb; - struct dentry *dchild; - char name[32]; - int len; - ENTRY; - - if (!sb || !sb->s_dev) { - CERROR("fatal: device not initialized.\n"); - RETURN(ERR_PTR(-ENXIO)); - } - - if (id == 0) { - CERROR("fatal: invalid object id 0\n"); - LBUG(); - RETURN(ERR_PTR(-ESTALE)); - } - - len = sprintf(name, LPU64, id); - CDEBUG(D_INODE, "looking up object O/%*s/%s\n", - dparent->d_name.len, dparent->d_name.name, name); - if (lockit) - down(&dparent->d_inode->i_sem); - dchild = lookup_one_len(name, dparent, len); - if (lockit) - up(&dparent->d_inode->i_sem); - if (IS_ERR(dchild)) { - CERROR("child lookup error %ld\n", PTR_ERR(dchild)); - RETURN(dchild); - } - - CDEBUG(D_INODE, "got child obj O/%*s/%s: %p, count = %d\n", - dparent->d_name.len, dparent->d_name.name, name, dchild, - atomic_read(&dchild->d_count)); - - LASSERT(atomic_read(&dchild->d_count) > 0); - - RETURN(dchild); -} - /* direct cut-n-paste of mds_blocking_ast() */ int filter_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag) @@ -1038,7 +839,7 @@ int filter_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, } static int filter_lock_dentry(struct obd_device *obd, struct dentry *de, - int lock_mode, struct lustre_handle *lockh) + ldlm_mode_t lock_mode,struct lustre_handle *lockh) { struct ldlm_res_id res_id = { .name = {0} }; int flags = 0, rc; @@ -1054,6 +855,14 @@ static int filter_lock_dentry(struct obd_device *obd, struct dentry *de, RETURN(rc == ELDLM_OK ? 0 : -ENOLCK); /* XXX translate ldlm code */ } +static void filter_parent_unlock(struct dentry *dparent, + struct lustre_handle *lockh, + ldlm_mode_t lock_mode) +{ + ldlm_lock_decref(lockh, lock_mode); +} + +/* We never dget the object parent, so DON'T dput it either */ static inline struct dentry *filter_parent(struct obd_device *obd, obd_mode mode, obd_id objid) { @@ -1066,11 +875,13 @@ static inline struct dentry *filter_parent(struct obd_device *obd, return filter->fo_dentry_O_sub[objid & (filter->fo_subdir_count - 1)]; } +/* We never dget the object parent, so DON'T dput it either */ static inline struct dentry *filter_parent_lock(struct obd_device *obd, obd_mode mode, obd_id objid, - int lock_mode, + ldlm_mode_t lock_mode, struct lustre_handle *lockh) { + unsigned long now = jiffies; struct dentry *de = filter_parent(obd, mode, objid); int rc; @@ -1078,17 +889,75 @@ static inline struct dentry *filter_parent_lock(struct obd_device *obd, return de; rc = filter_lock_dentry(obd, de, lock_mode, lockh); + if (time_after(jiffies, now + 15*HZ)) + CERROR("slow parent lock %lus\n", (jiffies - now) / HZ); return rc ? ERR_PTR(rc) : de; } +/* How to get files, dentries, inodes from object id's. + * + * If dir_dentry is passed, the caller has already locked the parent + * appropriately for this operation (normally a write lock). If + * dir_dentry is NULL, we do a read lock while we do the lookup to + * avoid races with create/destroy and such changing the directory + * internal to the filesystem code. + */ +static struct dentry *filter_fid2dentry(struct obd_device *obd, + struct dentry *dir_dentry, + obd_mode mode, obd_id id) +{ + struct super_block *sb = obd->u.filter.fo_sb; + struct lustre_handle lockh; + struct dentry *dparent = dir_dentry; + struct dentry *dchild; + char name[32]; + int len; + ENTRY; + + if (!sb || !sb->s_dev) { + CERROR("device not initialized.\n"); + RETURN(ERR_PTR(-ENXIO)); + } + + if (id == 0) { + CERROR("fatal: invalid object id 0\n"); + LBUG(); + RETURN(ERR_PTR(-ESTALE)); + } + + len = sprintf(name, LPU64, id); + if (!dir_dentry) { + dparent = filter_parent_lock(obd, mode, id, LCK_PR, &lockh); + if (IS_ERR(dparent)) + RETURN(dparent); + } + CDEBUG(D_INODE, "looking up object O/%*s/%s\n", + dparent->d_name.len, dparent->d_name.name, name); + dchild = ll_lookup_one_len(name, dparent, len); + if (!dir_dentry) + filter_parent_unlock(dparent, &lockh, LCK_PR); + if (IS_ERR(dchild)) { + CERROR("child lookup error %ld\n", PTR_ERR(dchild)); + RETURN(dchild); + } + + CDEBUG(D_INODE, "got child objid %s: %p, count = %d\n", + name, dchild, atomic_read(&dchild->d_count)); + + LASSERT(atomic_read(&dchild->d_count) > 0); + + RETURN(dchild); +} + static struct file *filter_obj_open(struct obd_export *export, - __u64 id, __u32 type, int parent_mode, + __u64 id, __u32 type, + ldlm_mode_t parent_mode, struct lustre_handle *parent_lockh) { struct obd_device *obd = export->exp_obd; struct filter_obd *filter = &obd->u.filter; struct super_block *sb = filter->fo_sb; - struct dentry *dchild = NULL, *parent; + struct dentry *dchild = NULL, *dparent = NULL; struct filter_export_data *fed = &export->exp_filter_data; struct filter_dentry_data *fdd = NULL; struct filter_file_data *ffd = NULL; @@ -1133,21 +1002,26 @@ static struct file *filter_obj_open(struct obd_export *export, cleanup_phase = 2; - parent = filter_parent_lock(obd, type, id, parent_mode, parent_lockh); - if (IS_ERR(parent)) - GOTO(cleanup, file = (void *)parent); + dparent = filter_parent_lock(obd, type, id, parent_mode, parent_lockh); + if (IS_ERR(dparent)) + GOTO(cleanup, file = (void *)dparent); cleanup_phase = 3; len = snprintf(name, sizeof(name), LPU64, id); - dchild = lookup_one_len(name, parent, len); + dchild = ll_lookup_one_len(name, dparent, len); if (IS_ERR(dchild)) GOTO(cleanup, file = (void *)dchild); - LASSERT(dchild->d_inode); cleanup_phase = 4; - /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */ + if (dchild->d_inode == NULL) { + CERROR("opening non-existent object %s - O_CREAT?\n", name); + file = ERR_PTR(-ENOENT); + GOTO(cleanup, file); + } + + /* dentry_open does a dput(dchild) and mntput(mnt) on error */ mntget(filter->fo_vfsmnt); file = dentry_open(dchild, filter->fo_vfsmnt, O_RDWR | O_LARGEFILE); if (IS_ERR(file)) { @@ -1161,12 +1035,14 @@ static struct file *filter_obj_open(struct obd_export *export, spin_unlock(&filter->fo_fddlock); OBD_FREE(fdd, sizeof *fdd); fdd = dchild->d_fsdata; + LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC); /* should only happen during client recovery */ if (fdd->fdd_flags & FILTER_FLAG_DESTROY) CDEBUG(D_INODE,"opening destroyed object "LPU64"\n",id); atomic_inc(&fdd->fdd_open_count); } else { atomic_set(&fdd->fdd_open_count, 1); + fdd->fdd_magic = FILTER_DENTRY_MAGIC; fdd->fdd_flags = 0; fdd->fdd_objid = id; /* If this is racy, then we can use {cmp}xchg and atomic_add */ @@ -1192,10 +1068,10 @@ cleanup: switch (cleanup_phase) { case 4: if (IS_ERR(file)) - l_dput(dchild); + f_dput(dchild); case 3: if (IS_ERR(file)) - ldlm_lock_decref(parent_lockh, parent_mode); + filter_parent_unlock(dparent, parent_lockh,parent_mode); case 2: if (IS_ERR(file)) OBD_FREE(fdd, sizeof *fdd); @@ -1209,29 +1085,28 @@ cleanup: RETURN(file); } -/* Caller must hold i_sem on dir_dentry->d_inode */ -/* Caller must push us into kernel context */ +/* Caller must hold LCK_PW on parent and push us into kernel context. + * Caller is also required to ensure that dchild->d_inode exists. + */ static int filter_destroy_internal(struct obd_device *obd, - struct dentry *dir_dentry, - struct dentry *object_dentry) + struct dentry *dparent, + struct dentry *dchild) { - struct inode *inode = object_dentry->d_inode; + struct inode *inode = dchild->d_inode; int rc; ENTRY; if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) { CERROR("destroying objid %*s nlink = %d, count = %d\n", - object_dentry->d_name.len, - object_dentry->d_name.name, + dchild->d_name.len, dchild->d_name.name, inode->i_nlink, atomic_read(&inode->i_count)); } - rc = vfs_unlink(dir_dentry->d_inode, object_dentry); + rc = vfs_unlink(dparent->d_inode, dchild); if (rc) CERROR("error unlinking objid %*s: rc %d\n", - object_dentry->d_name.len, - object_dentry->d_name.name, rc); + dchild->d_name.len, dchild->d_name.name, rc); RETURN(rc); } @@ -1239,24 +1114,25 @@ static int filter_destroy_internal(struct obd_device *obd, /* If closing because we are failing this device, then don't do the unlink on close. */ -static int filter_close_internal(struct obd_export *export, +static int filter_close_internal(struct obd_export *exp, struct filter_file_data *ffd, struct obd_trans_info *oti, int failover) { - struct obd_device *obd = export->exp_obd; + struct obd_device *obd = exp->exp_obd; struct filter_obd *filter = &obd->u.filter; struct file *filp = ffd->ffd_file; - struct dentry *object_dentry = dget(filp->f_dentry); - struct filter_dentry_data *fdd = object_dentry->d_fsdata; + struct dentry *dchild = dget(filp->f_dentry); + struct filter_dentry_data *fdd = dchild->d_fsdata; struct lustre_handle parent_lockh; int rc, rc2, cleanup_phase = 0; - struct dentry *dir_dentry; + struct dentry *dparent; struct obd_run_ctxt saved; ENTRY; LASSERT(filp->private_data == ffd); LASSERT(fdd); + LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC); rc = filp_close(filp, 0); @@ -1267,23 +1143,24 @@ static int filter_close_internal(struct obd_export *export, push_ctxt(&saved, &filter->fo_ctxt, NULL); cleanup_phase = 1; - dir_dentry = filter_parent_lock(obd, S_IFREG, fdd->fdd_objid, - LCK_PW, &parent_lockh); - if (IS_ERR(dir_dentry)) - GOTO(cleanup, rc = PTR_ERR(dir_dentry)); + LASSERT(fdd->fdd_objid > 0); + dparent = filter_parent_lock(obd, S_IFREG, fdd->fdd_objid, + LCK_PW, &parent_lockh); + if (IS_ERR(dparent)) + GOTO(cleanup, rc = PTR_ERR(dparent)); cleanup_phase = 2; - handle = fsfilt_start(obd, dir_dentry->d_inode, + handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); /* XXX unlink from PENDING directory now too */ - rc2 = filter_destroy_internal(obd, dir_dentry, object_dentry); + rc2 = filter_destroy_internal(obd, dparent, dchild); if (rc2 && !rc) rc = rc2; - rc = filter_finish_transno(export, handle, oti, rc); - rc2 = fsfilt_commit(obd, dir_dentry->d_inode, handle, 0); + rc = filter_finish_transno(exp, handle, oti, rc); + rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0); if (rc2) { CERROR("error on commit, err = %d\n", rc2); if (!rc) @@ -1295,7 +1172,7 @@ cleanup: switch(cleanup_phase) { case 2: if (rc || oti == NULL) { - ldlm_lock_decref(&parent_lockh, LCK_PW); + filter_parent_unlock(dparent, &parent_lockh, LCK_PW); } else { memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh, sizeof(parent_lockh)); @@ -1304,7 +1181,7 @@ cleanup: case 1: pop_ctxt(&saved, &filter->fo_ctxt, NULL); case 0: - f_dput(object_dentry); + f_dput(dchild); filter_ffd_destroy(ffd); break; default: @@ -1321,7 +1198,8 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, char *option) { struct obd_ioctl_data* data = buf; - struct filter_obd *filter; + struct filter_obd *filter = &obd->u.filter; + struct vfsmount *mnt; int rc = 0; ENTRY; @@ -1345,16 +1223,28 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, CERROR("%s: configured for recovery and sync write\n", obd->obd_name); } else { - CERROR("unrecognised flag '%c'\n", - *data->ioc_inlbuf3); + if (*data->ioc_inlbuf3 != 'n') { + CERROR("unrecognised flag '%c'\n", + *data->ioc_inlbuf3); + } + } + } + + if (data->ioc_inllen4 > 0 && data->ioc_inlbuf4) { + if (*data->ioc_inlbuf4 == '/') { + CERROR("filter namespace mount: %s\n", + data->ioc_inlbuf4); + filter->fo_nspath = strdup(data->ioc_inlbuf4); + } else { + CERROR("namespace mount must be absolute path: '%s'\n", + data->ioc_inlbuf4); } } - filter = &obd->u.filter; filter->fo_vfsmnt = mnt; - filter->fo_fstype = strdup(data->ioc_inlbuf2); - filter->fo_sb = mnt->mnt_root->d_inode->i_sb; - CDEBUG(D_SUPER, "%s: mnt = %p\n", data->ioc_inlbuf1, mnt); + filter->fo_sb = mnt->mnt_sb; + filter->fo_fstype = mnt->mnt_sb->s_type->name; + CDEBUG(D_SUPER, "%s: mnt = %p\n", filter->fo_fstype, mnt); OBD_SET_CTXT_MAGIC(&filter->fo_ctxt); filter->fo_ctxt.pwdmnt = mnt; @@ -1363,15 +1253,15 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, rc = filter_prep(obd); if (rc) - GOTO(err_kfree, rc); + GOTO(err_mntput, rc); spin_lock_init(&filter->fo_translock); spin_lock_init(&filter->fo_fddlock); spin_lock_init(&filter->fo_objidlock); INIT_LIST_HEAD(&filter->fo_export_list); - obd->obd_namespace = - ldlm_namespace_new("filter-tgt", LDLM_NAMESPACE_SERVER); + obd->obd_namespace = ldlm_namespace_new("filter-tgt", + LDLM_NAMESPACE_SERVER); if (!obd->obd_namespace) GOTO(err_post, rc = -ENOMEM); @@ -1382,10 +1272,9 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, err_post: filter_post(obd); -err_kfree: - kfree(filter->fo_fstype); +err_mntput: unlock_kernel(); - mntput(filter->fo_vfsmnt); + mntput(mnt); filter->fo_sb = 0; lock_kernel(); err_ops: @@ -1445,7 +1334,7 @@ static int filter_cleanup(struct obd_device *obd, int force, int failover) ldlm_namespace_free(obd->obd_namespace); sb = obd->u.filter.fo_sb; - if (!obd->u.filter.fo_sb) + if (!sb) RETURN(0); filter_post(obd); @@ -1462,48 +1351,37 @@ static int filter_cleanup(struct obd_device *obd, int force, int failover) obd->u.filter.fo_sb = 0; /* destroy_buffers(obd->u.filter.fo_sb->s_dev);*/ - kfree(obd->u.filter.fo_fstype); fsfilt_put_ops(obd->obd_fsops); - lock_kernel(); RETURN(0); } -int filter_attach(struct obd_device *dev, obd_count len, void *data) +int filter_attach(struct obd_device *obd, obd_count len, void *data) { struct lprocfs_static_vars lvars; - struct lprocfs_counters* cntrs; int rc; lprocfs_init_vars(&lvars); - rc = lprocfs_obd_attach(dev, lvars.obd_vars); + rc = lprocfs_obd_attach(obd, lvars.obd_vars); if (rc != 0) return rc; - rc = lprocfs_alloc_obd_counters(dev, LPROC_FILTER_LAST); + rc = lprocfs_alloc_obd_stats(obd, LPROC_FILTER_LAST); if (rc != 0) return rc; - /* Init obdfilter private counters here */ - cntrs = dev->counters; - LPROCFS_COUNTER_INIT(&cntrs->cntr[LPROC_FILTER_READS], - 0, NULL, "read", "reqs"); - LPROCFS_COUNTER_INIT(&cntrs->cntr[LPROC_FILTER_READ_BYTES], - LPROCFS_CNTR_AVGMINMAX, - NULL, "read_bytes", "bytes"); - LPROCFS_COUNTER_INIT(&cntrs->cntr[LPROC_FILTER_WRITES], - 0, NULL, "write", "reqs"); - - LPROCFS_COUNTER_INIT(&cntrs->cntr[LPROC_FILTER_WRITE_BYTES], - LPROCFS_CNTR_AVGMINMAX, - NULL, "write_bytes", "bytes"); + /* Init obdfilter private stats here */ + lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_READ_BYTES, + LPROCFS_CNTR_AVGMINMAX, "read_bytes", "bytes"); + lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_WRITE_BYTES, + LPROCFS_CNTR_AVGMINMAX, "write_bytes", "bytes"); return rc; } int filter_detach(struct obd_device *dev) { - lprocfs_free_obd_counters(dev); + lprocfs_free_obd_stats(dev); return lprocfs_obd_detach(dev); } @@ -1531,8 +1409,8 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, fed = &exp->exp_filter_data; class_export_put(exp); - INIT_LIST_HEAD(&exp->exp_filter_data.fed_open_head); - spin_lock_init(&exp->exp_filter_data.fed_lock); + INIT_LIST_HEAD(&fed->fed_open_head); + spin_lock_init(&fed->fed_lock); if (!obd->obd_replayable) RETURN(0); @@ -1635,47 +1513,53 @@ static void filter_from_inode(struct obdo *oa, struct inode *inode, int valid) } static struct dentry *__filter_oa2dentry(struct lustre_handle *conn, - struct obdo *oa, int locked,char *what) + struct obdo *oa, char *what) { - struct dentry *dentry = NULL; + struct dentry *dchild = NULL; if (oa->o_valid & OBD_MD_FLHANDLE) { struct lustre_handle *ost_handle = obdo_handle(oa); struct filter_file_data *ffd = filter_handle2ffd(ost_handle); if (ffd != NULL) { - dentry = dget(ffd->ffd_file->f_dentry); + struct filter_dentry_data *fdd; + dchild = dget(ffd->ffd_file->f_dentry); + fdd = dchild->d_fsdata; + LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC); filter_ffd_put(ffd); + + CDEBUG(D_INODE, + "got child objid %*s: %p, count = %d\n", + dchild->d_name.len, dchild->d_name.name, + dchild, atomic_read(&dchild->d_count)); } } - if (!dentry) { + if (!dchild) { struct obd_device *obd = class_conn2obd(conn); + if (!obd) { CERROR("invalid client cookie "LPX64"\n", conn->cookie); RETURN(ERR_PTR(-EINVAL)); } - dentry = filter_fid2dentry(obd, filter_parent(obd, oa->o_mode, - oa->o_id), - oa->o_id, locked); + dchild = filter_fid2dentry(obd, NULL, oa->o_mode, oa->o_id); } - if (IS_ERR(dentry)) { + if (IS_ERR(dchild)) { CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id); - RETURN(dentry); + RETURN(dchild); } - if (!dentry->d_inode) { + if (!dchild->d_inode) { CERROR("%s on non-existent object: "LPU64"\n", what, oa->o_id); - f_dput(dentry); + f_dput(dchild); RETURN(ERR_PTR(-ENOENT)); } - return dentry; + return dchild; } -#define filter_oa2dentry(conn, oa, locked) __filter_oa2dentry(conn, oa, locked,\ - __FUNCTION__) +#define filter_oa2dentry(conn, oa) __filter_oa2dentry(conn, oa, __FUNCTION__) static int filter_getattr(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *md) @@ -1684,9 +1568,7 @@ static int filter_getattr(struct lustre_handle *conn, struct obdo *oa, int rc = 0; ENTRY; - XPROCFS_BUMP_MYCPU_IOSTAT (st_getattr_reqs, 1); - - dentry = filter_oa2dentry(conn, oa, 1); + dentry = filter_oa2dentry(conn, oa); if (IS_ERR(dentry)) RETURN(PTR_ERR(dentry)); @@ -1711,9 +1593,7 @@ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa, int rc, rc2; ENTRY; - XPROCFS_BUMP_MYCPU_IOSTAT (st_setattr_reqs, 1); - - dentry = filter_oa2dentry(conn, oa, 0); + dentry = filter_oa2dentry(conn, oa); if (IS_ERR(dentry)) GOTO(out_exp, rc = PTR_ERR(dentry)); @@ -1731,10 +1611,7 @@ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa, if (IS_ERR(handle)) GOTO(out_unlock, rc = PTR_ERR(handle)); - if (inode->i_op->setattr) - rc = inode->i_op->setattr(dentry, &iattr); - else - rc = inode_setattr(inode, &iattr); + rc = fsfilt_setattr(obd, dentry, handle, &iattr, 1); rc = filter_finish_transno(export, handle, oti, rc); rc2 = fsfilt_commit(obd, dentry->d_inode, handle, 0); if (rc2) { @@ -1763,7 +1640,7 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, struct obd_client_handle *och) { - struct obd_export *export; + struct obd_export *export = NULL; struct lustre_handle *handle; struct filter_file_data *ffd; struct file *filp; @@ -1778,8 +1655,6 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa, GOTO(out, rc = -EINVAL); } - XPROCFS_BUMP_MYCPU_IOSTAT (st_open_reqs, 1); - filp = filter_obj_open(export, oa->o_id, oa->o_mode, LCK_PR, &parent_lockh); if (IS_ERR(filp)) @@ -1816,8 +1691,6 @@ static int filter_close(struct lustre_handle *conn, struct obdo *oa, GOTO(out, rc = -EINVAL); } - XPROCFS_BUMP_MYCPU_IOSTAT (st_close_reqs, 1); - if (!(oa->o_valid & OBD_MD_FLHANDLE)) { CERROR("no handle for close of objid "LPU64"\n", oa->o_id); GOTO(out, rc = -EINVAL); @@ -1846,13 +1719,13 @@ static int filter_close(struct lustre_handle *conn, struct obdo *oa, static int filter_create(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { - struct obd_export *export; + struct obd_export *exp; struct obd_device *obd = class_conn2obd(conn); struct filter_obd *filter = &obd->u.filter; struct obd_run_ctxt saved; - struct dentry *dir_dentry; struct lustre_handle parent_lockh; - struct dentry *new = NULL; + struct dentry *dparent; + struct dentry *dchild = NULL; struct iattr; void *handle; int err, rc, cleanup_phase; @@ -1863,53 +1736,49 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa, RETURN(-EINVAL); } - export = class_conn2export(conn); - XPROCFS_BUMP_MYCPU_IOSTAT (st_create_reqs, 1); - - oa->o_id = filter_next_id(obd); + exp = class_conn2export(conn); push_ctxt(&saved, &filter->fo_ctxt, NULL); retry: + oa->o_id = filter_next_id(filter); + cleanup_phase = 0; - dir_dentry = filter_parent_lock(obd, S_IFREG, oa->o_id, LCK_PW, - &parent_lockh); - if (IS_ERR(dir_dentry)) - GOTO(cleanup, rc = PTR_ERR(dir_dentry)); + dparent = filter_parent_lock(obd, S_IFREG, oa->o_id, LCK_PW, + &parent_lockh); + if (IS_ERR(dparent)) + GOTO(cleanup, rc = PTR_ERR(dparent)); cleanup_phase = 1; - new = filter_fid2dentry(obd, dir_dentry, oa->o_id, 0); - if (IS_ERR(new)) - GOTO(cleanup, rc = PTR_ERR(new)); - if (new->d_inode) { - char buf[32]; - + dchild = filter_fid2dentry(obd, dparent, S_IFREG, oa->o_id); + if (IS_ERR(dchild)) + GOTO(cleanup, rc = PTR_ERR(dchild)); + if (dchild->d_inode) { /* This would only happen if lastobjid was bad on disk */ - CERROR("Serious error: objid %s already exists; is this " + CERROR("Serious error: objid %*s already exists; is this " "filesystem corrupt? I will try to work around it.\n", - filter_id(buf, filter, oa->o_id, oa->o_mode)); - f_dput(new); - ldlm_lock_decref(&parent_lockh, LCK_PW); - oa->o_id = filter_next_id(obd); + dchild->d_name.len, dchild->d_name.name); + f_dput(dchild); + filter_parent_unlock(dparent, &parent_lockh, LCK_PW); goto retry; } cleanup_phase = 2; - handle = fsfilt_start(obd, dir_dentry->d_inode, FSFILT_OP_CREATE); + handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); - rc = vfs_create(dir_dentry->d_inode, new, oa->o_mode); + rc = vfs_create(dparent->d_inode, dchild, oa->o_mode); if (rc) CERROR("create failed rc = %d\n", rc); - rc = filter_finish_transno(export, handle, oti, rc); + rc = filter_finish_transno(exp, handle, oti, rc); err = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd); if (err) { CERROR("unable to write lastobjid but file created\n"); if (!rc) rc = err; } - err = fsfilt_commit(obd, dir_dentry->d_inode, handle, 0); + err = fsfilt_commit(obd, dparent->d_inode, handle, 0); if (err) { CERROR("error on commit, err = %d\n", err); if (!rc) @@ -1922,16 +1791,16 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa, /* Set flags for fields we have set in the inode struct */ oa->o_valid = OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME; - filter_from_inode(oa, new->d_inode, oa->o_valid); + filter_from_inode(oa, dchild->d_inode, oa->o_valid); EXIT; cleanup: switch(cleanup_phase) { case 2: - f_dput(new); + f_dput(dchild); case 1: /* locked parent dentry */ if (rc || oti == NULL) { - ldlm_lock_decref(&parent_lockh, LCK_PW); + filter_parent_unlock(dparent, &parent_lockh, LCK_PW); } else { memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh, sizeof(parent_lockh)); @@ -1939,7 +1808,7 @@ cleanup: } case 0: pop_ctxt(&saved, &filter->fo_ctxt, NULL); - class_export_put(export); + class_export_put(exp); break; default: CERROR("invalid cleanup_phase %d\n", cleanup_phase); @@ -1952,10 +1821,10 @@ cleanup: static int filter_destroy(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti) { - struct obd_export *export; + struct obd_export *exp; struct obd_device *obd = class_conn2obd(conn); struct filter_obd *filter = &obd->u.filter; - struct dentry *dir_dentry, *object_dentry = NULL; + struct dentry *dparent, *dchild = NULL; struct filter_dentry_data *fdd; struct obd_run_ctxt saved; void *handle = NULL; @@ -1968,30 +1837,35 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa, RETURN(-EINVAL); } - export = class_conn2export(conn); - XPROCFS_BUMP_MYCPU_IOSTAT (st_destroy_reqs, 1); + exp = class_conn2export(conn); CDEBUG(D_INODE, "destroying objid "LPU64"\n", oa->o_id); push_ctxt(&saved, &filter->fo_ctxt, NULL); - dir_dentry = filter_parent_lock(obd, oa->o_mode, oa->o_id, - LCK_PW, &parent_lockh); - if (IS_ERR(dir_dentry)) - GOTO(cleanup, rc = PTR_ERR(dir_dentry)); + dparent = filter_parent_lock(obd, oa->o_mode, oa->o_id, + LCK_PW, &parent_lockh); + if (IS_ERR(dparent)) + GOTO(cleanup, rc = PTR_ERR(dparent)); cleanup_phase = 1; - object_dentry = filter_oa2dentry(conn, oa, 0); - if (IS_ERR(object_dentry)) + dchild = filter_fid2dentry(obd, dparent, S_IFREG, oa->o_id); + if (IS_ERR(dchild)) GOTO(cleanup, rc = -ENOENT); cleanup_phase = 2; - handle = fsfilt_start(obd, dir_dentry->d_inode, FSFILT_OP_UNLINK); + if (!dchild->d_inode) { + CERROR("destroying non-existent object "LPU64"\n", oa->o_id); + GOTO(cleanup, rc = -ENOENT); + } + + handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); cleanup_phase = 3; - fdd = object_dentry->d_fsdata; + fdd = dchild->d_fsdata; if (fdd && atomic_read(&fdd->fdd_open_count)) { + LASSERT(fdd->fdd_magic = FILTER_DENTRY_MAGIC); if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) { fdd->fdd_flags |= FILTER_FLAG_DESTROY; /* XXX put into PENDING directory in case of crash */ @@ -2005,23 +1879,23 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa, GOTO(cleanup, rc = 0); } - rc = filter_destroy_internal(obd, dir_dentry, object_dentry); + rc = filter_destroy_internal(obd, dparent, dchild); cleanup: switch(cleanup_phase) { case 3: - rc = filter_finish_transno(export, handle, oti, rc); - rc2 = fsfilt_commit(obd, dir_dentry->d_inode, handle, 0); + rc = filter_finish_transno(exp, handle, oti, rc); + rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0); if (rc2) { CERROR("error on commit, err = %d\n", rc2); if (!rc) rc = rc2; } case 2: - f_dput(object_dentry); + f_dput(dchild); case 1: if (rc || oti == NULL) { - ldlm_lock_decref(&parent_lockh, LCK_PW); + filter_parent_unlock(dparent, &parent_lockh, LCK_PW); } else { memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh, sizeof(parent_lockh)); @@ -2029,7 +1903,7 @@ cleanup: } case 0: pop_ctxt(&saved, &filter->fo_ctxt, NULL); - class_export_put(export); + class_export_put(exp); break; default: CERROR("invalid cleanup_phase %d\n", cleanup_phase); @@ -2048,8 +1922,6 @@ static int filter_truncate(struct lustre_handle *conn, struct obdo *oa, int error; ENTRY; - XPROCFS_BUMP_MYCPU_IOSTAT (st_punch_reqs, 1); - if (end != OBD_OBJECT_EOF) CERROR("PUNCH not supported, only truncate: end = "LPX64"\n", end); @@ -2199,7 +2071,7 @@ static int lustre_commit_write(struct niobuf_local *lnb) LASSERT(to <= PAGE_SIZE); err = page->mapping->a_ops->commit_write(NULL, page, from, to); if (!err && IS_SYNC(inode)) - waitfor_one_page(page); + err = waitfor_one_page(page); //SetPageUptodate(page); // the client commit_write will do this SetPageReferenced(page); @@ -2225,15 +2097,12 @@ int filter_get_page_write(struct inode *inode, struct niobuf_local *lnb, /* This page is currently locked, so get a temporary page instead. */ if (!page) { - unsigned long addr; CDEBUG(D_ERROR,"ino %lu page %ld locked\n", inode->i_ino,index); - addr = __get_free_pages(GFP_KERNEL, 0); /* locked page */ - if (!addr) { + page = alloc_pages(GFP_KERNEL, 0); /* locked page */ + if (!page) { CERROR("no memory for a temp page\n"); GOTO(err, rc = -ENOMEM); } - POISON((void *)addr, 0xBA, PAGE_SIZE); - page = virt_to_page(addr); page->index = index; lnb->page = page; lnb->flags |= N_LOCAL_TEMP_PAGE; @@ -2305,7 +2174,7 @@ static int filter_commit_write(struct niobuf_local *lnb, int err) return lustre_commit_write(lnb); } -static int filter_preprw(int cmd, struct obd_export *export, +static int filter_preprw(int cmd, struct obd_export *exp, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, struct niobuf_local *res, void **desc_private, @@ -2319,28 +2188,16 @@ static int filter_preprw(int cmd, struct obd_export *export, struct fsfilt_objinfo *fso; struct dentry *dentry; struct inode *inode; - struct lprocfs_counters *cntrs; - int pglocked = 0, rc = 0, i, j; - + int pglocked = 0, rc = 0, i, j, tot_bytes = 0; + unsigned long now = jiffies; ENTRY; - if ((cmd & OBD_BRW_WRITE) != 0) - XPROCFS_BUMP_MYCPU_IOSTAT (st_write_reqs, 1); - else - XPROCFS_BUMP_MYCPU_IOSTAT (st_read_reqs, 1); - memset(res, 0, niocount * sizeof(*res)); - obd = export->exp_obd; + obd = exp->exp_obd; if (obd == NULL) RETURN(-EINVAL); - cntrs = obd->counters; - if ((cmd & OBD_BRW_WRITE) != 0) - LPROCFS_COUNTER_INCBY1(&cntrs->cntr[LPROC_FILTER_WRITES]); - else - LPROCFS_COUNTER_INCBY1(&cntrs->cntr[LPROC_FILTER_READS]); - // theoretically we support multi-obj BRW RPCs, but until then... LASSERT(objcount == 1); @@ -2355,9 +2212,7 @@ static int filter_preprw(int cmd, struct obd_export *export, LASSERT(o->ioo_bufcnt); - dentry = filter_fid2dentry(obd, filter_parent(obd, S_IFREG, - o->ioo_id), - o->ioo_id, 0); + dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id); if (IS_ERR(dentry)) GOTO(out_objinfo, rc = PTR_ERR(dentry)); @@ -2368,6 +2223,7 @@ static int filter_preprw(int cmd, struct obd_export *export, if (!dentry->d_inode) { CERROR("trying to BRW to non-existent file "LPU64"\n", o->ioo_id); + f_dput(dentry); GOTO(out_objinfo, rc = -ENOENT); } @@ -2394,6 +2250,9 @@ static int filter_preprw(int cmd, struct obd_export *export, o->ioo_id); } + if (time_after(jiffies, now + 15*HZ)) + CERROR("slow prep setup %lus\n", (jiffies - now) / HZ); + if (cmd & OBD_BRW_WRITE) { *desc_private = fsfilt_brw_start(obd, objcount, fso, niocount, nb); @@ -2419,36 +2278,34 @@ static int filter_preprw(int cmd, struct obd_export *export, lnb->offset = rnb->offset; lnb->len = rnb->len; lnb->flags = rnb->flags; + lnb->start = jiffies; if (cmd & OBD_BRW_WRITE) { rc = filter_get_page_write(inode,lnb,&pglocked); - - XPROCFS_BUMP_MYCPU_IOSTAT(st_write_bytes, - lnb->len); - LPROCFS_COUNTER_INCR(&cntrs->cntr[LPROC_FILTER_WRITE_BYTES], lnb->len); + if (rc) + up(&dentry->d_inode->i_sem); } else if (inode->i_size <= rnb->offset) { /* If there's no more data, abort early. * lnb->page == NULL and lnb->rc == 0, so it's * easy to detect later. */ - f_dput(lnb->dentry); + f_dput(dentry); lnb->dentry = NULL; break; } else { rc = filter_start_page_read(inode, lnb); - - XPROCFS_BUMP_MYCPU_IOSTAT(st_read_bytes, - lnb->len); - LPROCFS_COUNTER_INCR(&cntrs->cntr[LPROC_FILTER_READ_BYTES], lnb->len); } if (rc) { CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, - "error on page @"LPU64"%u/%u: rc = %d\n", - lnb->offset, j, o->ioo_bufcnt, rc); + "page err %u@"LPU64" %u/%u %p: rc %d\n", + lnb->len, lnb->offset, j, o->ioo_bufcnt, + dentry, rc); f_dput(dentry); GOTO(out_pages, rc); } + tot_bytes += lnb->len; + if ((cmd & OBD_BRW_READ) && lnb->rc < lnb->len) { /* Likewise with a partial read */ break; @@ -2456,15 +2313,29 @@ static int filter_preprw(int cmd, struct obd_export *export, } } - while ((cmd & OBD_BRW_READ) && lnb-- > res) { - rc = filter_finish_page_read(lnb); - if (rc) { - CERROR("error on page %u@"LPU64": rc = %d\n", - lnb->len, lnb->offset, rc); - f_dput(lnb->dentry); - GOTO(out_pages, rc); + if (time_after(jiffies, now + 15*HZ)) + CERROR("slow prep get page %lus\n", (jiffies - now) / HZ); + + if (cmd & OBD_BRW_READ) { + lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES, + tot_bytes); + while (lnb-- > res) { + rc = filter_finish_page_read(lnb); + if (rc) { + CERROR("error page %u@"LPU64" %u %p: rc %d\n", + lnb->len, lnb->offset, lnb - res, + lnb->dentry, rc); + f_dput(lnb->dentry); + GOTO(out_pages, rc); + } } - } + } else + lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_WRITE_BYTES, + tot_bytes); + + if (time_after(jiffies, now + 15*HZ)) + CERROR("slow prep finish page %lus\n", (jiffies - now) / HZ); + EXIT; out: OBD_FREE(fso, objcount * sizeof(*fso)); @@ -2483,7 +2354,7 @@ out_pages: f_dput(lnb->dentry); } if (cmd & OBD_BRW_WRITE) { - filter_finish_transno(export, *desc_private, oti, rc); + filter_finish_transno(exp, *desc_private, oti, rc); fsfilt_commit(obd, filter_parent(obd,S_IFREG,obj->ioo_id)->d_inode, *desc_private, 0); @@ -2553,12 +2424,10 @@ static int filter_syncfs(struct obd_export *exp) struct obd_device *obd = exp->exp_obd; ENTRY; - XPROCFS_BUMP_MYCPU_IOSTAT (st_syncfs_reqs, 1); - RETURN(fsfilt_sync(obd, obd->u.filter.fo_sb)); } -static int filter_commitrw(int cmd, struct obd_export *export, +static int filter_commitrw(int cmd, struct obd_export *exp, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *res, void *desc_private, struct obd_trans_info *oti) @@ -2566,8 +2435,9 @@ static int filter_commitrw(int cmd, struct obd_export *export, struct obd_run_ctxt saved; struct obd_ioobj *o; struct niobuf_local *lnb; - struct obd_device *obd = export->exp_obd; + struct obd_device *obd = exp->exp_obd; int found_locked = 0, rc = 0, i; + unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */ ENTRY; push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); @@ -2586,11 +2456,16 @@ static int filter_commitrw(int cmd, struct obd_export *export, if (lnb->page == NULL) { continue; } + if (lnb->flags & N_LOCAL_TEMP_PAGE) { found_locked++; continue; } + if (time_after(jiffies, lnb->start + 15*HZ)) + CERROR("slow commitrw %lus\n", + (jiffies - lnb->start) / HZ); + if (cmd & OBD_BRW_WRITE) { int err = filter_commit_write(lnb, 0); @@ -2601,6 +2476,9 @@ static int filter_commitrw(int cmd, struct obd_export *export, } f_dput(lnb->dentry); + if (time_after(jiffies, lnb->start + 15*HZ)) + CERROR("slow commit_write %lus\n", + (jiffies - lnb->start) / HZ); } } @@ -2612,27 +2490,37 @@ static int filter_commitrw(int cmd, struct obd_export *export, if (!(lnb->flags & N_LOCAL_TEMP_PAGE)) continue; + if (time_after(jiffies, lnb->start + 15*HZ)) + CERROR("slow commitrw locked %lus\n", + (jiffies - lnb->start) / HZ); + err = filter_write_locked_page(lnb); if (!rc) rc = err; f_dput(lnb->dentry); found_locked--; + + if (time_after(jiffies, lnb->start + 15*HZ)) + CERROR("slow commit_write locked %lus\n", + (jiffies - lnb->start) / HZ); } } if (cmd & OBD_BRW_WRITE) { /* We just want any dentry for the commit, for now */ - struct dentry *dir_dentry = filter_parent(obd, S_IFREG, 0); + struct dentry *dparent = filter_parent(obd, S_IFREG, 0); int err; - rc = filter_finish_transno(export, desc_private, oti, rc); - err = fsfilt_commit(obd, dir_dentry->d_inode, desc_private, + rc = filter_finish_transno(exp, desc_private, oti, rc); + err = fsfilt_commit(obd, dparent->d_inode, desc_private, obd_sync_filter); if (err) rc = err; if (obd_sync_filter) LASSERT(oti->oti_transno <= obd->obd_last_committed); + if (time_after(jiffies, now + 15*HZ)) + CERROR("slow commitrw commit %lus\n", (jiffies-now)/HZ); } LASSERT(!current->journal_info); @@ -2717,11 +2605,6 @@ static int filter_san_preprw(int cmd, struct lustre_handle *conn, int i; ENTRY; - if ((cmd & OBD_BRW_WRITE) != 0) - XPROCFS_BUMP_MYCPU_IOSTAT (st_write_reqs, 1); - else - XPROCFS_BUMP_MYCPU_IOSTAT (st_read_reqs, 1); - obd = class_conn2obd(conn); if (!obd) { CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n", @@ -2735,9 +2618,7 @@ static int filter_san_preprw(int cmd, struct lustre_handle *conn, int (*fs_bmap)(struct address_space *, long); int j; - dentry = filter_fid2dentry(obd, filter_parent(obd, S_IFREG, - o->ioo_id), - o->ioo_id, 0); + dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id); if (IS_ERR(dentry)) GOTO(out, rc = PTR_ERR(dentry)); inode = dentry->d_inode; @@ -2790,8 +2671,6 @@ static int filter_statfs(struct lustre_handle *conn, struct obd_statfs *osfs) obd = class_conn2obd(conn); - XPROCFS_BUMP_MYCPU_IOSTAT (st_statfs_reqs, 1); - RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs)); } @@ -2897,6 +2776,24 @@ int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst, RETURN(err); } +int filter_iocontrol(unsigned int cmd, struct lustre_handle *conn, + int len, void *karg, void *uarg) +{ + struct obd_device *obd = class_conn2obd(conn); + + switch (cmd) { + case OBD_IOC_ABORT_RECOVERY: + CERROR("aborting recovery for device %s\n", obd->obd_name); + target_abort_recovery(obd); + RETURN(0); + + default: + RETURN(-EINVAL); + } + RETURN(0); +} + + static struct obd_ops filter_obd_ops = { o_owner: THIS_MODULE, o_attach: filter_attach, @@ -2919,6 +2816,7 @@ static struct obd_ops filter_obd_ops = { o_preprw: filter_preprw, o_commitrw: filter_commitrw, o_destroy_export: filter_destroy_export, + o_iocontrol: filter_iocontrol, #if 0 o_san_preprw: filter_san_preprw, o_preallocate: filter_preallocate_inodes, @@ -2949,7 +2847,8 @@ static struct obd_ops filter_sanobd_ops = { o_preprw: filter_preprw, o_commitrw: filter_commitrw, o_san_preprw: filter_san_preprw, - o_destroy_export: filter_destroy_export + o_destroy_export: filter_destroy_export, + o_iocontrol: filter_iocontrol, #if 0 o_preallocate: filter_preallocate_inodes, o_migrate: filter_migrate, @@ -2966,7 +2865,6 @@ static int __init obdfilter_init(void) printk(KERN_INFO "Lustre Filtering OBD driver; info@clusterfs.com\n"); - xprocfs_init ("filter"); lprocfs_init_vars(&lvars); rc = class_register_type(&filter_obd_ops, lvars.module_vars, @@ -2985,7 +2883,6 @@ static void __exit obdfilter_exit(void) { class_unregister_type(OBD_FILTER_SAN_DEVICENAME); class_unregister_type(OBD_FILTER_DEVICENAME); - xprocfs_fini (); } MODULE_AUTHOR("Cluster File Systems, Inc. "); diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index 89203e5..1319dbd 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -56,14 +56,14 @@ int rd_fstype(char *page, char **start, off_t off, int count, int *eof, } int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, int count, - int *eof, void *data) + int *eof, void *data) { struct obd_device* obd = (struct obd_device *)data; LASSERT(obd != NULL); LASSERT(obd->u.filter.fo_vfsmnt->mnt_devname); *eof = 1; - return snprintf(page, count, "%s\n", + return snprintf(page, count, "%s\n", obd->u.filter.fo_vfsmnt->mnt_devname); } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 2289c74..9e46952 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -1398,7 +1398,7 @@ static int sanosc_brw(int cmd, struct lustre_handle *conn, static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm, struct lustre_handle *parent_lock, __u32 type, void *extentp, int extent_len, __u32 mode, - int *flags, void *callback, void *data, int datalen, + int *flags, void *callback, void *data, struct lustre_handle *lockh) { struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; @@ -1413,8 +1413,8 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm, extent->end |= ~PAGE_MASK; /* Next, search for already existing extent locks that will cover us */ - rc = ldlm_lock_match(obddev->obd_namespace, 0, &res_id, type, extent, - sizeof(extent), mode, lockh); + rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA, &res_id, + type, extent, sizeof(extent), mode, data, lockh); if (rc == 1) /* We already have a lock, and it's referenced */ RETURN(ELDLM_OK); @@ -1432,8 +1432,9 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm, * locks out from other users right now, too. */ if (mode == LCK_PR) { - rc = ldlm_lock_match(obddev->obd_namespace, 0, &res_id, type, - extent, sizeof(extent), LCK_PW, lockh); + rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA, + &res_id, type, extent, sizeof(extent), + LCK_PW, data, lockh); if (rc == 1) { /* FIXME: This is not incredibly elegant, but it might * be more elegant than adding another parameter to @@ -1453,7 +1454,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm, static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm, __u32 type, void *extentp, int extent_len, __u32 mode, - int *flags, struct lustre_handle *lockh) + int *flags, void *data, struct lustre_handle *lockh) { struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; struct obd_device *obddev = class_conn2obd(connh); @@ -1468,7 +1469,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm, /* Next, search for already existing extent locks that will cover us */ rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id, type, - extent, sizeof(extent), mode, lockh); + extent, sizeof(extent), mode, data, lockh); if (rc) RETURN(rc); @@ -1478,7 +1479,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm, if (mode == LCK_PR) { rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id, type, extent, sizeof(extent), LCK_PW, - lockh); + data, lockh); if (rc == 1) { /* FIXME: This is not incredibly elegant, but it might * be more elegant than adding another parameter to diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index f14d82f..7569a7a 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -411,8 +411,7 @@ static int ost_brw_read(struct ptlrpc_request *req) if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK)) GOTO(out, rc = -EIO); - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); + body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) { CERROR ("Missing/short ost_body\n"); GOTO (out, rc = -EFAULT); @@ -499,8 +498,10 @@ static int ost_brw_read(struct ptlrpc_request *req) CERROR ("timeout waiting for bulk PUT\n"); ptlrpc_abort_bulk (desc); } - } - comms_error = rc != 0; + } else { + CERROR("ptlrpc_bulk_put failed RC: %d\n", rc); + } + comms_error = rc != 0; } /* Must commit after prep above in all cases */ @@ -565,6 +566,10 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK)) GOTO(out, rc = -EIO); + /* pause before transaction has been started */ + OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE, + obd_timeout +1); + swab = lustre_msg_swabbed (req->rq_reqmsg); body = lustre_swab_reqbuf (req, 0, sizeof (*body), lustre_swab_ost_body); @@ -649,8 +654,10 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) CERROR ("timeout waiting for bulk GET\n"); ptlrpc_abort_bulk (desc); } - } - comms_error = rc != 0; + } else { + CERROR("ptlrpc_bulk_get failed RC: %d\n", rc); + } + comms_error = rc != 0; } #if CHECKSUM_BULK diff --git a/lustre/portals/archdep.m4 b/lustre/portals/archdep.m4 index 7cb00cf..7a4e05c 100644 --- a/lustre/portals/archdep.m4 +++ b/lustre/portals/archdep.m4 @@ -106,7 +106,7 @@ case ${host_cpu} in ia64 ) AC_MSG_RESULT($host_cpu) - KCFLAGS='-gstabs -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step' + KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step' KCPPFLAGS='-D__KERNEL__ -DMODULE' MOD_LINK=elf64_ia64 ;; diff --git a/lustre/portals/include/config.h.in b/lustre/portals/include/config.h.in index b05d0c4..3aa6909 100644 --- a/lustre/portals/include/config.h.in +++ b/lustre/portals/include/config.h.in @@ -1,11 +1,58 @@ -/* ../include/config.h.in. Generated automatically from configure.in by autoheader. */ +/* portals/include/config.h.in. Generated from configure.in by autoheader. */ -/* Define if you have the readline library (-lreadline). */ -#undef HAVE_LIBREADLINE +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* IOCTL Buffer Size */ +#undef OBD_MAX_IOCTL_BUFFER /* Name of package */ #undef PACKAGE +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* The size of a `unsigned long long', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_LONG_LONG + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + /* Version number of package */ #undef VERSION - diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 6d7f3f3..ee3b9fc 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -262,28 +262,27 @@ do { \ #define PORTAL_ALLOC(ptr, size) \ do { \ - long s = size; \ LASSERT (!in_interrupt()); \ - if (s > PORTAL_VMALLOC_SIZE) \ - (ptr) = vmalloc(s); \ + if ((size) > PORTAL_VMALLOC_SIZE) \ + (ptr) = vmalloc(size); \ else \ - (ptr) = kmalloc(s, GFP_NOFS); \ + (ptr) = kmalloc((size), GFP_NOFS); \ if ((ptr) == NULL) \ - CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \ - " '" #ptr "' = %ld)\n", __FILE__, __LINE__, s); \ + CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\ + #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ else { \ - portal_kmem_inc((ptr), s); \ - memset((ptr), 0, s); \ + portal_kmem_inc((ptr), (size)); \ + memset((ptr), 0, (size)); \ } \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \ - s, (ptr), atomic_read (&portal_kmemory)); \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), (ptr), atomic_read (&portal_kmemory)); \ } while (0) #define PORTAL_FREE(ptr, size) \ do { \ - long s = (size); \ + int s = (size); \ if ((ptr) == NULL) { \ - CERROR("PORTALS: free NULL '" #ptr "' (%ld bytes) at " \ + CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at " \ "%s:%d\n", s, __FILE__, __LINE__); \ break; \ } \ @@ -292,39 +291,38 @@ do { \ else \ kfree(ptr); \ portal_kmem_dec((ptr), s); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %ld at %p (tot %d).\n", \ - s, (ptr), atomic_read (&portal_kmemory)); \ + CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ + s, (ptr), atomic_read(&portal_kmemory)); \ } while (0) #define PORTAL_SLAB_ALLOC(ptr, slab, size) \ do { \ - long s = (size); \ - LASSERT (!in_interrupt()); \ + LASSERT(!in_interrupt()); \ (ptr) = kmem_cache_alloc((slab), SLAB_KERNEL); \ if ((ptr) == NULL) { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \ " '" #ptr "' from slab '" #slab "')\n", __FILE__, \ __LINE__); \ } else { \ - portal_kmem_inc((ptr), s); \ - memset((ptr), 0, s); \ + portal_kmem_inc((ptr), (size)); \ + memset((ptr), 0, (size)); \ } \ CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \ - s, (ptr), atomic_read (&portal_kmemory)); \ + (int)(size), (ptr), atomic_read(&portal_kmemory)); \ } while (0) #define PORTAL_SLAB_FREE(ptr, slab, size) \ do { \ - long s = (size); \ + int s = (size); \ if ((ptr) == NULL) { \ - CERROR("PORTALS: free NULL '" #ptr "' (%ld bytes) at " \ + CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at " \ "%s:%d\n", s, __FILE__, __LINE__); \ break; \ } \ memset((ptr), 0x5a, s); \ kmem_cache_free((slab), ptr); \ portal_kmem_dec((ptr), s); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %ld at %p (tot %d).\n", \ + CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ s, (ptr), atomic_read (&portal_kmemory)); \ } while (0) diff --git a/lustre/portals/knals/qswnal/qswnal.h b/lustre/portals/knals/qswnal/qswnal.h index 88ab74f..85e585b 100644 --- a/lustre/portals/knals/qswnal/qswnal.h +++ b/lustre/portals/knals/qswnal/qswnal.h @@ -104,7 +104,7 @@ typedef unsigned long kqsw_csum_t; #define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */ #define KQSW_NTXMSGS 8 /* # normal transmit messages */ -#define KQSW_NNBLK_TXMSGS 128 /* # reserved transmit messages if can't block */ +#define KQSW_NNBLK_TXMSGS 256 /* # reserved transmit messages if can't block */ #define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */ #define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */ diff --git a/lustre/portals/knals/qswnal/qswnal_cb.c b/lustre/portals/knals/qswnal/qswnal_cb.c index 3b47a25..c03d592 100644 --- a/lustre/portals/knals/qswnal/qswnal_cb.c +++ b/lustre/portals/knals/qswnal/qswnal_cb.c @@ -617,6 +617,7 @@ kqswnal_sendmsg (nal_cb_t *nal, if (ktx == NULL) { kqswnal_cerror_hdr (hdr); lib_finalize (&kqswnal_lib, private, cookie); + return (-1); } memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */ diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 94a068d..c79329c 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -113,11 +113,11 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req, struct ptlrpc_bulk_desc *desc; LASSERT (type == BULK_PUT_SINK || type == BULK_GET_SOURCE); - + desc = new_bulk(); if (desc == NULL) RETURN(NULL); - + /* Is this sampled at the right place? Do we want to get the import * generation just before we send? Should it match the generation of * the request? */ @@ -143,7 +143,7 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req, struct ptlrpc_bulk_desc *desc; LASSERT (type == BULK_PUT_SOURCE || type == BULK_GET_SINK); - + desc = new_bulk(); if (desc == NULL) RETURN(NULL); @@ -191,7 +191,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) LASSERT (desc != NULL); LASSERT (desc->bd_page_count != 0x5a5a5a5a); /* not freed already */ LASSERT (!desc->bd_network_rw); /* network hands off or */ - + list_for_each_safe(tmp, next, &desc->bd_page_list) { struct ptlrpc_bulk_page *bulk; bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link); @@ -213,7 +213,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk) { LASSERT (bulk != NULL); - + list_del(&bulk->bp_link); bulk->bp_desc->bd_page_count--; OBD_FREE(bulk, sizeof(*bulk)); @@ -247,7 +247,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode, request->rq_type = PTL_RPC_MSG_REQUEST; request->rq_import = class_import_get(imp); request->rq_phase = RQ_PHASE_NEW; - + /* XXX FIXME bug 249 */ request->rq_request_portal = imp->imp_client->cli_request_portal; request->rq_reply_portal = imp->imp_client->cli_reply_portal; @@ -290,7 +290,7 @@ void ptlrpc_set_destroy(struct ptlrpc_request_set *set) ENTRY; /* Requests on the set should either all be completed, or all be new */ - expected_phase = (set->set_remaining == 0) ? + expected_phase = (set->set_remaining == 0) ? RQ_PHASE_COMPLETE : RQ_PHASE_NEW; list_for_each (tmp, &set->set_requests) { struct ptlrpc_request *req = @@ -299,9 +299,9 @@ void ptlrpc_set_destroy(struct ptlrpc_request_set *set) LASSERT (req->rq_phase == expected_phase); n++; } - + LASSERT (set->set_remaining == 0 || set->set_remaining == n); - + list_for_each_safe(tmp, next, &set->set_requests) { struct ptlrpc_request *req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); @@ -310,12 +310,13 @@ void ptlrpc_set_destroy(struct ptlrpc_request_set *set) LASSERT (req->rq_phase == expected_phase); if (req->rq_phase == RQ_PHASE_NEW) { - + if (req->rq_interpret_reply != NULL) { - int (*interpreter)(struct ptlrpc_request *, void *, int) = + int (*interpreter)(struct ptlrpc_request *, + void *, int) = req->rq_interpret_reply; - - /* higher level (i.e. LOV) failed; + + /* higher level (i.e. LOV) failed; * let the sub reqs clean up */ req->rq_status = -EBADR; interpreter(req, &req->rq_async_args, req->rq_status); @@ -401,8 +402,7 @@ static int ptlrpc_check_status(struct ptlrpc_request *req) } #warning this needs to change after robert fixes eviction handling -static int -after_reply(struct ptlrpc_request *req, int *restartp) +static int after_reply(struct ptlrpc_request *req, int *restartp) { unsigned long flags; struct obd_import *imp = req->rq_import; @@ -414,7 +414,7 @@ after_reply(struct ptlrpc_request *req, int *restartp) if (restartp != NULL) *restartp = 0; - + /* NB Until this point, the whole of the incoming message, * including buflens, status etc is in the sender's byte order. */ @@ -441,8 +441,8 @@ after_reply(struct ptlrpc_request *req, int *restartp) rc = ptlrpc_check_status(req); /* Either we've been evicted, or the server has failed for - * some reason. Try to reconnect, and if that fails, punt to - * upcall */ + * some reason. Try to reconnect, and if that fails, punt to the + * upcall. */ if (rc == -ENOTCONN) { if (req->rq_level < LUSTRE_CONN_FULL || req->rq_no_recov || imp->imp_obd->obd_no_recov || imp->imp_dlm_fake) { @@ -451,7 +451,7 @@ after_reply(struct ptlrpc_request *req, int *restartp) rc = ptlrpc_request_handle_eviction(req); if (rc) - CERROR("can't reconnect to %s@%s: %d\n", + CERROR("can't reconnect to %s@%s: %d\n", imp->imp_target_uuid.uuid, imp->imp_connection->c_remote_uuid.uuid, rc); else @@ -486,7 +486,7 @@ after_reply(struct ptlrpc_request *req, int *restartp) /* Replay-enabled imports return commit-status information. */ if (req->rq_repmsg->last_committed) { - if (req->rq_repmsg->last_committed < + if (req->rq_repmsg->last_committed < imp->imp_peer_committed_transno) { CERROR("%s went back in time (transno "LPD64 " was committed, server claims "LPD64 @@ -501,7 +501,7 @@ after_reply(struct ptlrpc_request *req, int *restartp) ptlrpc_free_committed(imp); spin_unlock_irqrestore(&imp->imp_lock, flags); } - + RETURN(rc); } @@ -520,50 +520,57 @@ static int check_set(struct ptlrpc_request_set *set) struct obd_import *imp = req->rq_import; int rc = 0; - LASSERT (req->rq_phase == RQ_PHASE_RPC || - req->rq_phase == RQ_PHASE_BULK || - req->rq_phase == RQ_PHASE_COMPLETE); + if (!(req->rq_phase == RQ_PHASE_RPC || + req->rq_phase == RQ_PHASE_BULK || + req->rq_phase == RQ_PHASE_INTERPRET || + req->rq_phase == RQ_PHASE_COMPLETE)) { + DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase); + LBUG(); + } if (req->rq_phase == RQ_PHASE_COMPLETE) continue; + if (req->rq_phase == RQ_PHASE_INTERPRET) + GOTO (interpret, req->rq_status); + if (req->rq_err) { ptlrpc_unregister_reply(req); if (req->rq_status == 0) req->rq_status = -EIO; req->rq_phase = RQ_PHASE_INTERPRET; - + spin_lock_irqsave(&imp->imp_lock, flags); list_del_init(&req->rq_list); spin_unlock_irqrestore(&imp->imp_lock, flags); GOTO (interpret, req->rq_status); - } - + } + if (req->rq_intr) { /* NB could be on delayed list */ ptlrpc_unregister_reply(req); req->rq_status = -EINTR; req->rq_phase = RQ_PHASE_INTERPRET; - + spin_lock_irqsave(&imp->imp_lock, flags); list_del_init(&req->rq_list); spin_unlock_irqrestore(&imp->imp_lock, flags); GOTO (interpret, req->rq_status); } - + if (req->rq_phase == RQ_PHASE_RPC) { int do_restart = 0; if (req->rq_waiting || req->rq_resend) { spin_lock_irqsave(&imp->imp_lock, flags); - + if (req->rq_level > imp->imp_level) { spin_unlock_irqrestore(&imp->imp_lock, flags); continue; } - + list_del(&req->rq_list); list_add_tail(&req->rq_list, &imp->imp_sending_list); @@ -578,19 +585,19 @@ static int check_set(struct ptlrpc_request_set *set) spin_unlock_irqrestore(&req->rq_lock, flags); ptlrpc_unregister_reply(req); - if (req->rq_bulk) + if (req->rq_bulk) ptlrpc_unregister_bulk(req); } - + rc = ptl_send_rpc(req); if (rc) { req->rq_status = rc; req->rq_phase = RQ_PHASE_INTERPRET; GOTO (interpret, req->rq_status); } - + } - + /* Ensure the network callback returned */ spin_lock_irqsave (&req->rq_lock, flags); if (!req->rq_replied) { @@ -598,18 +605,25 @@ static int check_set(struct ptlrpc_request_set *set) continue; } spin_unlock_irqrestore (&req->rq_lock, flags); - + spin_lock_irqsave(&imp->imp_lock, flags); list_del_init(&req->rq_list); spin_unlock_irqrestore(&imp->imp_lock, flags); req->rq_status = after_reply(req, &do_restart); if (do_restart) { + spin_lock_irqsave (&req->rq_lock, flags); req->rq_resend = 1; /* ugh */ + spin_unlock_irqrestore (&req->rq_lock, flags); continue; } - - if (req->rq_bulk == NULL) { + + /* If there is no bulk associated with this request, + * then we're done and should let the interpreter + * process the reply. Similarly if the RPC returned + * an error, and therefore the bulk will never arrive. + */ + if (req->rq_bulk == NULL || req->rq_status != 0) { req->rq_phase = RQ_PHASE_INTERPRET; GOTO (interpret, req->rq_status); } @@ -620,20 +634,20 @@ static int check_set(struct ptlrpc_request_set *set) LASSERT (req->rq_phase == RQ_PHASE_BULK); if (!ptlrpc_bulk_complete (req->rq_bulk)) continue; - + req->rq_phase = RQ_PHASE_INTERPRET; - + interpret: LASSERT (req->rq_phase == RQ_PHASE_INTERPRET); LASSERT (!req->rq_receiving_reply); if (req->rq_bulk != NULL) ptlrpc_unregister_bulk (req); - + if (req->rq_interpret_reply != NULL) { - int (*interpreter)(struct ptlrpc_request *, void *, int) = + int (*interpreter)(struct ptlrpc_request *,void *,int) = req->rq_interpret_reply; - req->rq_status = interpreter(req, &req->rq_async_args, + req->rq_status = interpreter(req, &req->rq_async_args, req->rq_status); } @@ -694,7 +708,6 @@ static int expired_set(void *data) ENTRY; LASSERT (set != NULL); - CERROR("EXPIRED SET %p\n", set); /* A timeout expired; see which reqs it applies to... */ list_for_each (tmp, &set->set_requests) { @@ -705,7 +718,7 @@ static int expired_set(void *data) if (!((req->rq_phase == RQ_PHASE_RPC && !req->rq_waiting) || (req->rq_phase == RQ_PHASE_BULK))) continue; - + if (req->rq_timedout || /* already dealt with */ req->rq_sent + req->rq_timeout > now) /* not expired */ continue; @@ -736,7 +749,7 @@ static void interrupted_set(void *data) if (req->rq_phase != RQ_PHASE_RPC) continue; - + spin_lock_irqsave (&req->rq_lock, flags); req->rq_intr = 1; spin_unlock_irqrestore (&req->rq_lock, flags); @@ -756,13 +769,14 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) int timeout; ENTRY; + LASSERT(!list_empty(&set->set_requests)); list_for_each(tmp, &set->set_requests) { req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); LASSERT (req->rq_level == LUSTRE_CONN_FULL); LASSERT (req->rq_phase == RQ_PHASE_NEW); req->rq_phase = RQ_PHASE_RPC; - + imp = req->rq_import; spin_lock_irqsave(&imp->imp_lock, flags); @@ -827,7 +841,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) if (req->rq_timedout) /* already timed out */ continue; - + deadline = req->rq_sent + req->rq_timeout; if (deadline <= now) /* actually expired already */ timeout = 1; /* ASAP */ @@ -839,10 +853,10 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) * req times out */ CDEBUG(D_HA, "set %p going to sleep for %d seconds\n", set, timeout); - lwi = LWI_TIMEOUT_INTR(timeout * HZ, + lwi = LWI_TIMEOUT_INTR(timeout ? timeout * HZ : 1, expired_set, interrupted_set, set); rc = l_wait_event(set->set_waitq, check_set(set), &lwi); - + LASSERT (rc == 0 || rc == -EINTR || rc == -ETIMEDOUT); /* -EINTR => all requests have been flagged rq_intr so next @@ -864,13 +878,13 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) if (req->rq_status != 0) rc = req->rq_status; } - + if (set->set_interpret != NULL) { - int (*interpreter)(struct ptlrpc_request_set *set, void *, int) = + int (*interpreter)(struct ptlrpc_request_set *set,void *,int) = set->set_interpret; rc = interpreter (set, &set->set_args, rc); } - + RETURN(rc); } @@ -883,7 +897,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) } LASSERT (!request->rq_receiving_reply); - + /* We must take it off the imp_replay_list first. Otherwise, we'll set * request->rq_reqmsg to NULL while osc_close is dereferencing it. */ if (request->rq_import != NULL) { @@ -967,7 +981,7 @@ static void ptlrpc_cleanup_request_buf(struct ptlrpc_request *request) } /* Disengage the client's reply buffer from the network - * NB does _NOT_ unregister any client-side bulk. + * NB does _NOT_ unregister any client-side bulk. * IDEMPOTENT, but _not_ safe against concurrent callers. * The request owner (i.e. the thread doing the I/O) must call... */ @@ -989,7 +1003,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request) LASSERT (!request->rq_replied); /* callback hasn't completed */ spin_unlock_irqrestore (&request->rq_lock, flags); - + rc = PtlMDUnlink (request->rq_reply_md_h); switch (rc) { default: @@ -1005,24 +1019,24 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request) request->rq_repmsg = NULL; EXIT; return; - + case PTL_MD_INUSE: /* callback in progress */ for (;;) { /* Network access will complete in finite time but * the timeout lets us CERROR for visibility */ - struct l_wait_info lwi = LWI_TIMEOUT (10 * HZ, NULL, NULL); - + struct l_wait_info lwi = LWI_TIMEOUT(10*HZ, NULL, NULL); + rc = l_wait_event (request->rq_wait_for_rep, request->rq_replied, &lwi); LASSERT (rc == 0 || rc == -ETIMEDOUT); if (rc == 0) { spin_lock_irqsave (&request->rq_lock, flags); - /* Ensure the callback has completed scheduling me - * and taken its hands off the request */ - spin_unlock_irqrestore (&request->rq_lock, flags); + /* Ensure the callback has completed scheduling + * me and taken its hands off the request */ + spin_unlock_irqrestore(&request->rq_lock,flags); break; } - + CERROR ("Unexpectedly long timeout: req %p\n", request); } /* fall through */ @@ -1091,7 +1105,7 @@ void ptlrpc_cleanup_client(struct obd_import *imp) void ptlrpc_resend_req(struct ptlrpc_request *req) { unsigned long flags; - + DEBUG_REQ(D_HA, req, "resending"); req->rq_reqmsg->handle.cookie = 0; ptlrpc_put_connection(req->rq_connection); @@ -1138,7 +1152,7 @@ static int expired_request(void *data) static void interrupted_request(void *data) { unsigned long flags; - + struct ptlrpc_request *req = data; DEBUG_REQ(D_HA, req, "request interrupted"); spin_lock_irqsave (&req->rq_lock, flags); @@ -1206,7 +1220,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) LASSERT (req->rq_set == NULL); LASSERT (!req->rq_receiving_reply); - + /* for distributed debugging */ req->rq_reqmsg->status = current->pid; LASSERT(imp->imp_obd != NULL); @@ -1219,7 +1233,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) /* Mark phase here for a little debug help */ req->rq_phase = RQ_PHASE_RPC; - + restart: /* * If the import has been invalidated (such as by an OST failure), the @@ -1265,7 +1279,7 @@ restart: spin_unlock_irqrestore(&imp->imp_lock, flags); GOTO (out, rc); } - + CERROR("process %d resumed\n", current->pid); } @@ -1287,7 +1301,7 @@ restart: } DEBUG_REQ(D_ERROR, req, "send failed (%d); recovering", rc); - + ptlrpc_fail_import(imp, req->rq_import_generation); /* If we've been told to not wait, we're done. */ @@ -1340,7 +1354,7 @@ restart: * (ensuring the reply callback has returned), sees that * req->rq_receiving_reply is clear and returns. */ ptlrpc_unregister_reply (req); - + if (req->rq_err) GOTO(out, rc = -EIO); @@ -1360,7 +1374,7 @@ restart: if (req->rq_bulk != NULL) ptlrpc_unregister_bulk (req); - + DEBUG_REQ(D_HA, req, "resending: "); goto restart; } @@ -1376,7 +1390,7 @@ restart: if (req->rq_timedout) { /* non-recoverable timeout */ GOTO(out, rc = -ETIMEDOUT); } - + if (!req->rq_replied) { /* How can this be? -eeb */ DEBUG_REQ(D_ERROR, req, "!rq_replied: "); @@ -1396,9 +1410,10 @@ restart: out: if (req->rq_bulk != NULL) { if (rc >= 0) { /* success so far */ - lwi = LWI_TIMEOUT (timeout, NULL, NULL); - brc = l_wait_event (req->rq_wait_for_rep, - ptlrpc_bulk_complete (req->rq_bulk), &lwi); + lwi = LWI_TIMEOUT(timeout, NULL, NULL); + brc = l_wait_event(req->rq_wait_for_rep, + ptlrpc_bulk_complete(req->rq_bulk), + &lwi); if (brc != 0) { LASSERT (brc == -ETIMEDOUT); CERROR ("Timed out waiting for bulk\n"); @@ -1412,7 +1427,7 @@ restart: ptlrpc_unregister_bulk (req); } } - + LASSERT (!req->rq_receiving_reply); req->rq_phase = RQ_PHASE_INTERPRET; RETURN (rc); @@ -1427,10 +1442,10 @@ int ptlrpc_replay_req(struct ptlrpc_request *req) /* I don't touch rq_phase here, so the debug log can show what * state it was left in */ - + /* Not handling automatic bulk replay yet (or ever?) */ LASSERT (req->rq_bulk == NULL); - + DEBUG_REQ(D_NET, req, "about to replay"); /* Update request's state, since we might have a new connection. */ diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index cc9982c..07be1af 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -29,7 +29,7 @@ #include "ptlrpc_internal.h" -struct ll_rpc_opcode { +struct ll_rpc_opcode { __u32 opcode; const char *opname; } ll_rpc_opcode_table[LUSTRE_MAX_OPCODES] = { @@ -69,6 +69,8 @@ struct ll_rpc_opcode { { PTLBD_READ, "ptlbd_read" }, { PTLBD_WRITE, "ptlbd_write" }, { PTLBD_FLUSH, "ptlbd_flush" }, + { PTLBD_CONNECT, "ptlbd_connect" }, + { PTLBD_DISCONNECT, "ptlbd_disconnect" }, { OBD_PING, "obd_ping" } }; @@ -77,8 +79,8 @@ const char* ll_opcode2str(__u32 opcode) /* When one of the assertions below fail, chances are that: * 1) A new opcode was added in lustre_idl.h, but was * is missing from the table above. - * or 2) The opcode space was renumbered or rearranged, - * and the opcode_offset() function in + * or 2) The opcode space was renumbered or rearranged, + * and the opcode_offset() function in * ptlrpc_internals.h needs to be modified. */ __u32 offset = opcode_offset(opcode); @@ -96,51 +98,50 @@ void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc) { return; } void ptlrpc_lprocfs_register_service(struct obd_device *obddev, struct ptlrpc_service *svc) { - struct proc_dir_entry *svc_procroot; - struct lprocfs_counters *svc_cntrs; + struct proc_dir_entry *svc_procroot; + struct lprocfs_stats *svc_stats; int i, rc; - unsigned int svc_counter_config = LPROCFS_CNTR_EXTERNALLOCK | + unsigned int svc_counter_config = LPROCFS_CNTR_EXTERNALLOCK | LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV; LASSERT(svc->svc_procroot == NULL); - LASSERT(svc->svc_counters == NULL); + LASSERT(svc->svc_stats == NULL); svc_procroot = lprocfs_register(svc->srv_name, obddev->obd_proc_entry, NULL, NULL); - if (svc_procroot == NULL) + if (svc_procroot == NULL) return; - - svc_cntrs = - lprocfs_alloc_counters(PTLRPC_LAST_CNTR+LUSTRE_MAX_OPCODES); - if (svc_cntrs == NULL) { + + svc_stats = lprocfs_alloc_stats(PTLRPC_LAST_CNTR + LUSTRE_MAX_OPCODES); + if (svc_stats == NULL) { lprocfs_remove(svc_procroot); return; } - - LPROCFS_COUNTER_INIT(&svc_cntrs->cntr[PTLRPC_REQWAIT_CNTR], - svc_counter_config, &svc->srv_lock, - "req_waittime", "cycles"); - LPROCFS_COUNTER_INIT(&svc_cntrs->cntr[PTLRPC_SVCEQDEPTH_CNTR], - svc_counter_config, &svc->srv_lock, - "svc_eqdepth", "reqs"); + + lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR, + svc_counter_config, "req_waittime", "cycles"); + /* Wait for b_eq branch + lprocfs_counter_init(svc_stats, PTLRPC_SVCEQDEPTH_CNTR, + svc_counter_config, "svc_eqdepth", "reqs"); + */ /* no stddev on idletime */ - LPROCFS_COUNTER_INIT(&svc_cntrs->cntr[PTLRPC_SVCIDLETIME_CNTR], - (LPROCFS_CNTR_EXTERNALLOCK | LPROCFS_CNTR_AVGMINMAX), - &svc->srv_lock, "svc_idletime", "cycles"); - for (i=0; i < LUSTRE_MAX_OPCODES; i++) { + lprocfs_counter_init(svc_stats, PTLRPC_SVCIDLETIME_CNTR, + (LPROCFS_CNTR_EXTERNALLOCK|LPROCFS_CNTR_AVGMINMAX), + "svc_idletime", "cycles"); + for (i = 0; i < LUSTRE_MAX_OPCODES; i++) { __u32 opcode = ll_rpc_opcode_table[i].opcode; - LPROCFS_COUNTER_INIT(&svc_cntrs->cntr[PTLRPC_LAST_CNTR+i], - svc_counter_config, &svc->srv_lock, - ll_opcode2str(opcode), "cycles"); + lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i, + svc_counter_config, ll_opcode2str(opcode), + "cycles"); } - rc = lprocfs_register_counters(svc_procroot, "service_stats", - svc_cntrs); + + rc = lprocfs_register_stats(svc_procroot, "stats", svc_stats); if (rc < 0) { lprocfs_remove(svc_procroot); - lprocfs_free_counters(svc_cntrs); + lprocfs_free_stats(svc_stats); } else { svc->svc_procroot = svc_procroot; - svc->svc_counters = svc_cntrs; + svc->svc_stats = svc_stats; } } @@ -150,9 +151,9 @@ void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc) lprocfs_remove(svc->svc_procroot); svc->svc_procroot = NULL; } - if (svc->svc_counters) { - lprocfs_free_counters(svc->svc_counters); - svc->svc_counters = NULL; + if (svc->svc_stats) { + lprocfs_free_stats(svc->svc_stats); + svc->svc_stats = NULL; } } #endif /* LPROCFS */ diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index 7100707..575ed07 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -56,23 +56,23 @@ static inline int opcode_offset(__u32 opc) { (OST_LAST_OPC - OST_FIRST_OPC)); } else if (opc < LDLM_LAST_OPC) { /* LDLM Opcode */ - return (opc - LDLM_FIRST_OPC + - (MDS_LAST_OPC - MDS_FIRST_OPC) + + return (opc - LDLM_FIRST_OPC + + (MDS_LAST_OPC - MDS_FIRST_OPC) + (OST_LAST_OPC - OST_FIRST_OPC)); } else if (opc < PTLBD_LAST_OPC) { /* Portals Block Device */ - return (opc - PTLBD_FIRST_OPC + + return (opc - PTLBD_FIRST_OPC + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + (MDS_LAST_OPC - MDS_FIRST_OPC) + (OST_LAST_OPC - OST_FIRST_OPC)); } else if (opc == OBD_PING) { /* OBD Ping */ - return (opc - OBD_PING + + return (opc - OBD_PING + (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + (MDS_LAST_OPC - MDS_FIRST_OPC) + (OST_LAST_OPC - OST_FIRST_OPC)); - } else { + } else { /* Unknown Opcode */ return -1; } @@ -84,10 +84,10 @@ static inline int opcode_offset(__u32 opc) { + (OST_LAST_OPC - OST_FIRST_OPC)) enum { - PTLRPC_REQWAIT_CNTR = 0, - PTLRPC_SVCEQDEPTH_CNTR = 1, - PTLRPC_SVCIDLETIME_CNTR = 2, - PTLRPC_LAST_CNTR = 3 + PTLRPC_REQWAIT_CNTR = 0, + PTLRPC_SVCIDLETIME_CNTR = 1, + //PTLRPC_SVCEQDEPTH_CNTR, + PTLRPC_LAST_CNTR }; #endif /* PTLRPC_INTERNAL_H */ diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index f9475b0..f2a1089 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -79,12 +79,11 @@ static int ptlrpc_check_event(struct ptlrpc_service *svc, return rc; } -struct ptlrpc_service * -ptlrpc_init_svc(__u32 nevents, __u32 nbufs, - __u32 bufsize, __u32 max_req_size, - int req_portal, int rep_portal, - svc_handler_t handler, char *name, - struct obd_device *obddev) +struct ptlrpc_service * ptlrpc_init_svc(__u32 nevents, __u32 nbufs, + __u32 bufsize, __u32 max_req_size, + int req_portal, int rep_portal, + svc_handler_t handler, char *name, + struct obd_device *obddev) { int i, j, ssize, rc; struct ptlrpc_service *service; @@ -300,8 +299,8 @@ static int ptlrpc_main(void *arg) ptl_event_t *event; int rc = 0; unsigned long flags; - cycles_t workdone_time; - cycles_t svc_workcycles; + cycles_t workdone_time = -1; + cycles_t svc_workcycles = -1; ENTRY; lock_kernel(); @@ -331,7 +330,6 @@ static int ptlrpc_main(void *arg) /* Record that the thread is running */ thread->t_flags = SVC_RUNNING; - svc_workcycles = workdone_time = 0; wake_up(&thread->t_ctl_waitq); /* XXX maintain a list of all managed devices: insert here */ @@ -353,34 +351,42 @@ static int ptlrpc_main(void *arg) if (thread->t_flags & SVC_EVENT) { cycles_t workstart_time; + spin_lock(&svc->srv_lock); thread->t_flags &= ~SVC_EVENT; /* Update Service Statistics */ workstart_time = get_cycles(); - if (workdone_time && (svc->svc_counters != NULL)) { + if (workdone_time != -1 && svc->svc_stats != NULL) { /* Stats for req(n) are updated just before * req(n+1) is executed. This avoids need to * reacquire svc->srv_lock after * call to handling_request(). */ - int opc_offset; + int opc; + /* req_waittime */ - LPROCFS_COUNTER_INCR(&svc->svc_counters->cntr[PTLRPC_REQWAIT_CNTR], - (workstart_time - - event->arrival_time)); + lprocfs_counter_add(svc->svc_stats, + PTLRPC_REQWAIT_CNTR, + (workstart_time - + event->arrival_time)); /* svc_eqdepth */ - LPROCFS_COUNTER_INCR(&svc->svc_counters->cntr[PTLRPC_SVCEQDEPTH_CNTR], - 0); /* Wait for b_eq branch */ + /* Wait for b_eq branch + lprocfs_counter_add(svc->svc_stats, + PTLRPC_SVCEQDEPTH_CNTR, + 0); + */ /* svc_idletime */ - LPROCFS_COUNTER_INCR(&svc->svc_counters->cntr[PTLRPC_SVCIDLETIME_CNTR], - (workstart_time - - workdone_time)); + lprocfs_counter_add(svc->svc_stats, + PTLRPC_SVCIDLETIME_CNTR, + (workstart_time - + workdone_time)); /* previous request */ - opc_offset = - opcode_offset(request->rq_reqmsg->opc); - if (opc_offset >= 0) { - LASSERT(opc_offset < LUSTRE_MAX_OPCODES); - LPROCFS_COUNTER_INCR(&svc->svc_counters->cntr[PTLRPC_LAST_CNTR+opc_offset], svc_workcycles); + opc = opcode_offset(request->rq_reqmsg->opc); + if (opc > 0) { + LASSERT(opc < LUSTRE_MAX_OPCODES); + lprocfs_counter_add(svc->svc_stats, opc, + PTLRPC_LAST_CNTR + + svc_workcycles); } } spin_unlock(&svc->srv_lock); diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index e874f5d..0d2d836 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -29,8 +29,10 @@ for NAME in $CONFIGS; do sh runtests fi - [ "$SANITY" != "no" ] && sh sanity.sh - [ "$SANITY" != "no" ] && START=" " CLEAN=" " sh sanity.sh + #[ "$SANITY" != "no" ] && sh sanity.sh + if [ "$SANITY" != "no" ]; then + START=: CLEAN=: sh sanity.sh + fi if [ "$DBENCH" != "no" ]; then mount | grep $MNT || sh llmount.sh diff --git a/lustre/tests/crash-mod.sh b/lustre/tests/crash-mod.sh new file mode 100644 index 0000000..d5ce473 --- /dev/null +++ b/lustre/tests/crash-mod.sh @@ -0,0 +1,11 @@ +#!/bin/sh +TMP=${TMP:-/tmp} +BASEDIR=${1:-`dirname $0`/..} +LCMD=$TMP/crash-mod-`hostname` +echo "Storing crash module info in $LCMD" +cat /tmp/ogdb-`hostname` | while read JUNK M JUNK; do + MOD="$BASEDIR/$M" + MODNAME=`basename $MOD .o` + + echo mod -s $MODNAME $MOD | tee -a $LCMD +done diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh index 2bd47ae..7278dfa 100755 --- a/lustre/tests/local.sh +++ b/lustre/tests/local.sh @@ -20,10 +20,10 @@ ${LMC} --add node --node localhost || exit 10 ${LMC} --add net --node localhost --nid localhost --nettype tcp || exit 11 # configure mds server -${LMC} --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20 +${LMC} --add mds --nspath /mnt/mds_ns --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20 # configure ost -${LMC} --add ost --node localhost --ost ost1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 30 +${LMC} --add ost --nspath /mnt/ost_ns --node localhost --ost ost1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 30 # create client config ${LMC} --add mtpt --node localhost --path /mnt/lustre --mds mds1 --ost ost1 || exit 40 diff --git a/lustre/tests/openfile.c b/lustre/tests/openfile.c index ab5cbdb..7d8cc6b 100644 --- a/lustre/tests/openfile.c +++ b/lustre/tests/openfile.c @@ -49,7 +49,7 @@ void Usage_and_abort(void) int main(int argc, char** argv) { - int i; + int fd; int flags=0; mode_t mode=0; char* fname=NULL; @@ -59,49 +59,40 @@ int main(int argc, char** argv) char c; char* cloned_flags; - if(argc == 1) { + if (argc == 1) Usage_and_abort(); - } while ((c = getopt (argc, argv, "f:m:")) != -1) { switch (c) { case 'f': { char *tmp; - cloned_flags = (char*)malloc(strlen(optarg)); - if (cloned_flags==NULL) { + cloned_flags = (char *)malloc(strlen(optarg)); + if (cloned_flags == NULL) { fprintf(stderr, "Insufficient memory.\n"); exit(-1); } strncpy(cloned_flags, optarg, strlen(optarg)); - tmp = strtok(optarg, ":"); - while (tmp) { + for (tmp = strtok(optarg, ":|"); tmp; + tmp = strtok(NULL, ":|")) { int i = 0; #ifdef DEBUG printf("flags = %s\n",tmp); #endif flag_set = 1; - while (flag_table[i].flag != -1) { - int r; - r = strncasecmp(tmp, (flag_table[i].string), - strlen((flag_table[i].string)) ); - - if (r == 0) + for (i = 0; flag_table[i].flag != -1; i++) { + if (!strcmp(tmp, flag_table[i].string)){ + flags |= flag_table[i].flag; break; - i++; + } } - if (flag_table[i].flag != -1) { - flags |= flag_table[i].flag; - } else { + if (flag_table[i].flag == -1) { fprintf(stderr, "No such flag: %s\n", tmp); exit(-1); } - - tmp = strtok(NULL, ":"); - } #ifdef DEBUG printf("flags = %x\n", flags); @@ -112,7 +103,7 @@ int main(int argc, char** argv) #ifdef DEBUG printf("mode = %s\n", optarg); #endif - mode = strtol (optarg, NULL, 8); + mode = strtol(optarg, NULL, 8); mode_set = 1; #ifdef DEBUG printf("mode = %o\n", mode); @@ -139,24 +130,24 @@ int main(int argc, char** argv) if (mode_set) - i = open(fname, flags, mode); + fd = open(fname, flags, mode); else - i = open(fname, flags); + fd = open(fname, flags); - if (i != -1) { + if (fd != -1) { fprintf(stderr, "Succeed in opening file \"%s\"(flags=%s", fname, cloned_flags); if (mode_set) fprintf(stderr, ", mode=%o", mode); fprintf(stderr, ")\n"); - close (i); + close(fd); } else { fprintf(stderr, "Error in opening file \"%s\"(flags=%s", fname, cloned_flags); if (mode_set) fprintf(stderr, ", mode=%o", mode); - fprintf(stderr, ") %s\n", strerror(errno)); + fprintf(stderr, ") %d: %s\n", errno, strerror(errno)); } - return(i); + return errno; } diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 42a1e18..0707f1b 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -31,7 +31,7 @@ MDSSIZE=${MDSSIZE:-100000} OSTSIZE=${OSTSIZE:-100000} UPCALL=${UPCALL:-$RPWD/recovery-small-upcall.sh} FSTYPE=${FSTYPE:-ext3} - +TIMEOUT=${TIMEOUT:-5} do_mds() { $PDSH $MDSNODE "PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests; cd $RPWD; $@" || exit $? } @@ -58,6 +58,13 @@ drop_reply() { do_mds "echo 0 > /proc/sys/lustre/fail_loc" } +pause_bulk() { +#define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 + do_ost "echo 0x214 > /proc/sys/lustre/fail_loc" + do_client "$1" + do_client "sync" + do_ost "echo 0 > /proc/sys/lustre/fail_loc" +} make_config() { rm -f $CONFIG for NODE in $CLIENT $MDSNODE $OSTNODE; do @@ -98,9 +105,9 @@ unmount_client() { setup() { start_mds ${REFORMAT} - start_ost ${REFORMAT} + start_ost --timeout=$(($TIMEOUT*2)) ${REFORMAT} # XXX we should write our own upcall, when we move this somewhere better. - mount_client --timeout=${TIMEOUT:-5} \ + mount_client --timeout=${TIMEOUT} \ --lustre_upcall=$UPCALL } diff --git a/lustre/tests/runas.c b/lustre/tests/runas.c index 8731699..11e888b 100644 --- a/lustre/tests/runas.c +++ b/lustre/tests/runas.c @@ -11,11 +11,10 @@ #define DEBUG 0 -void -Usage_and_abort() +void Usage_and_abort(void) { - fprintf(stderr, "Usage: runas -u user_id [ -g grp_id ]" \ - " command_to_be_run \n"); + fprintf(stderr, "Usage: runas -u user_id [ -g grp_id ]" + " command_to_be_run \n"); exit(-1); } @@ -27,8 +26,7 @@ Usage_and_abort() // ROOT runs "runas" for free // Other users run "runas" requires chmod 6755 "command_to_be_run" -int -main(int argc, char**argv) +int main(int argc, char **argv) { char **my_argv; int status; @@ -38,9 +36,8 @@ main(int argc, char**argv) uid_t user_id; gid_t grp_id; - if(argc == 1) { + if (argc == 1) Usage_and_abort(); - } // get UID and GID while ((c = getopt (argc, argv, "+u:g:h")) != -1) { @@ -48,58 +45,56 @@ main(int argc, char**argv) case 'u': user_id = (uid_t)atoi(optarg); uid_is_set = 1; - if(!gid_is_set) { - grp_id = user_id; - } - break; - - case 'g': - grp_id = (gid_t)atoi(optarg); - gid_is_set = 1; - break; - - case 'h': - Usage_and_abort (); - break; - - default: - // fprintf(stderr, "Bad parameters.\n"); - // Usage_and_abort (); - } + if (!gid_is_set) + grp_id = user_id; + break; + + case 'g': + grp_id = (gid_t)atoi(optarg); + gid_is_set = 1; + break; + + case 'h': + Usage_and_abort(); + break; + + default: + //fprintf(stderr, "Bad parameters.\n"); + //Usage_and_abort (); + } } - if (!uid_is_set){ - Usage_and_abort (); - } - + if (!uid_is_set) + Usage_and_abort(); - if(optind == argc) { + if (optind == argc) { fprintf(stderr, "Bad parameters.\n"); Usage_and_abort(); } // assemble the command my_argv = (char**)malloc(sizeof(char*)*(argc+1-optind)); - if(my_argv == NULL) { - fprintf(stderr, "Error in allocating memory. (%s)\n", strerror(errno)); - exit(-1); - } - - for(i=optind; i< argc; i++) { + if (my_argv == NULL) { + fprintf(stderr, "Error in allocating memory. (%s)\n", + strerror(errno)); + exit(-1); + } + + for (i = optind; i < argc; i++) { my_argv[i-optind] = argv[i]; -// printf("%s\n",my_argv[i-optind]); + //printf("%s\n",my_argv[i-optind]); } - my_argv[i-optind]=NULL; + my_argv[i-optind] = NULL; #if DEBUG - system("whoami"); + system("whoami"); #endif // set GID - status = setregid(grp_id, grp_id ); - if( status == -1) { + status = setregid(grp_id, grp_id); + if (status == -1) { fprintf(stderr, "Cannot change grp_ID to %d, errno=%d (%s)\n", - grp_id, errno, strerror(errno) ); + grp_id, errno, strerror(errno) ); exit(-1); } @@ -112,10 +107,9 @@ main(int argc, char**argv) } - fprintf(stderr, "running as USER(%d), Grp (%d): ", - user_id, grp_id ); + fprintf(stderr, "running as USER(%d), Grp (%d): ", user_id, grp_id ); - for(i=0; i> $1 + LINE="`date +%s`: $LINE" + echo $LINE + [ "$1" ] && echo $LINE >> $1 done diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 84572bf..a694ed7 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -1,6 +1,14 @@ #!/bin/bash +# +# Run select tests by setting ONLY, or as arguments to the script. +# Skip specific tests by setting EXCEPT. +# +# e.g. ONLY="22 23" or ONLY="`seq 32 39`" or EXCEPT="31" set -e +ONLY=${ONLY:-"$*"} +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"34 35"} # bugs 1365 and 1360 respectively + SRCDIR=`dirname $0` PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH @@ -12,16 +20,25 @@ MCREATE=${MCREATE:-mcreate} TOEXCL=${TOEXCL:-toexcl} TRUNCATE=${TRUNCATE:-truncate} -RUNAS_ID=${RUNAS_ID:-500} -RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} +if [ $UID -ne 0 ]; then + RUNAS_ID="$UID" + RUNAS="" +else + RUNAS_ID=${RUNAS_ID:-500} + RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} +fi MOUNT=${MOUNT:-/mnt/lustre} DIR=${DIR:-$MOUNT} export NAME=$NAME + +SAVE_PWD=$PWD + clean() { echo -n "cln.." sh llmountcleanup.sh > /dev/null || exit 20 } + CLEAN=${CLEAN:-clean} start() { echo -n "mnt.." @@ -32,7 +49,40 @@ START=${START:-start} log() { echo "$*" - lctl mark "$*" || /bin/true + lctl mark "$*" || true +} + +run_one() { + if ! mount | grep -q $MOUNT; then + $START + fi + log "== test $1: $2" + test_$1 || error + pass + cd $SAVE_PWD + $CLEAN +} + +run_test() { + for O in $ONLY; do + if [ "`echo $1 | grep '\<'$O'[a-z]*\>'`" ]; then + echo "" + run_one $1 "$2" + return $? + else + echo -n "." + fi + done + for X in $EXCEPT $ALWAYS_EXCEPT; do + if [ "`echo $1 | grep '\<'$X'[a-z]*\>'`" ]; then + echo "skipping excluded test $1" + return 0 + fi + done + if [ -z "$ONLY" ]; then + run_one $1 "$2" + return $? + fi } error() { @@ -44,804 +94,754 @@ pass() { echo PASS } -mount | grep $MOUNT || sh llmount.sh +if ! mount | grep $MOUNT; then + sh llmount.sh + I_MOUNTED=yes +fi echo preparing for tests involving mounts EXT2_DEV=/tmp/SANITY.LOOP -dd if=/dev/zero of=$EXT2_DEV bs=1k count=1000 -#losetup /dev/loop0 || losetup /dev/loop0 /tmp/SANITY.LOOP -#mke2fs -c /dev/loop0 100 -#losetup -d /dev/loop0 -mke2fs -F /tmp/SANITY.LOOP - - -log '== touch .../f ; rm .../f ======================== test 0' -touch $DIR/f -$CHECKSTAT -t file $DIR/f || error -rm $DIR/f -$CHECKSTAT -a $DIR/f || error -pass -$CLEAN -$START - -log '== mkdir .../d1; mkdir .../d1/d2 ================= test 1' -mkdir $DIR/d1 -mkdir $DIR/d1/d2 -$CHECKSTAT -t dir $DIR/d1/d2 || error -pass -$CLEAN -$START - -log '== rmdir .../d1/d2; rmdir .../d1 ================= test 1b' -rmdir $DIR/d1/d2 -rmdir $DIR/d1 -$CHECKSTAT -a $DIR/d1 || error -pass -$CLEAN -$START - -log '== mkdir .../d2; touch .../d2/f ================== test 2' -mkdir $DIR/d2 -touch $DIR/d2/f -$CHECKSTAT -t file $DIR/d2/f || error -pass -$CLEAN -$START - -log '== rm -r .../d2; touch .../d2/f ================== test 2b' -rm -r $DIR/d2 -$CHECKSTAT -a $DIR/d2 || error -pass -$CLEAN -$START - -log '== mkdir .../d3 ================================== test 3' -mkdir $DIR/d3 -$CHECKSTAT -t dir $DIR/d3 || error -pass -$CLEAN -$START -log '== touch .../d3/f ================================ test 3b' -touch $DIR/d3/f -$CHECKSTAT -t file $DIR/d3/f || error -pass -$CLEAN -$START -log '== rm -r .../d3 ================================== test 3c' -rm -r $DIR/d3 -$CHECKSTAT -a $DIR/d3 || error -pass -$CLEAN -$START - -log '== mkdir .../d4 ================================== test 4' -mkdir $DIR/d4 -$CHECKSTAT -t dir $DIR/d4 || error -pass -$CLEAN -$START -log '== mkdir .../d4/d2 =============================== test 4b' -mkdir $DIR/d4/d2 -$CHECKSTAT -t dir $DIR/d4/d2 || error -pass -$CLEAN -$START - -log '== mkdir .../d5; mkdir .../d5/d2; chmod .../d5/d2 = test 5' -mkdir $DIR/d5 -mkdir $DIR/d5/d2 -chmod 0707 $DIR/d5/d2 -$CHECKSTAT -t dir -p 0707 $DIR/d5/d2 || error -pass -$CLEAN -$START - -log '== touch .../f6; chmod .../f6 ==================== test 6' -touch $DIR/f6 -chmod 0666 $DIR/f6 -$CHECKSTAT -t file -p 0666 $DIR/f6 || error -pass -$CLEAN -$START - -log '== mkdir .../d7; mcreate .../d7/f; chmod .../d7/f = test 7' -mkdir $DIR/d7 -$MCREATE $DIR/d7/f -chmod 0666 $DIR/d7/f -$CHECKSTAT -t file -p 0666 $DIR/d7/f || error -pass -$CLEAN -$START - -log '== mkdir .../d7; mcreate .../d7/f2; echo foo > .../d7/f2 = test 7b' -$MCREATE $DIR/d7/f2 -echo -n foo > $DIR/d7/f2 -[ "`cat $DIR/d7/f2`" = "foo" ] || error -$CHECKSTAT -t file -s 3 $DIR/d7/f2 || error -pass -$CLEAN -$START - -log '== mkdir .../d8; touch .../d8/f; chmod .../d8/f == test 8' -mkdir $DIR/d8 -touch $DIR/d8/f -chmod 0666 $DIR/d8/f -$CHECKSTAT -t file -p 0666 $DIR/d8/f || error -pass -$CLEAN -$START - - -log '== mkdir .../d9 .../d9/d2 .../d9/d2/d3 =========== test 9' -mkdir $DIR/d9 -mkdir $DIR/d9/d2 -mkdir $DIR/d9/d2/d3 -$CHECKSTAT -t dir $DIR/d9/d2/d3 || error -pass -$CLEAN -$START - - -log '== mkdir .../d10 .../d10/d2; touch .../d10/d2/f = test 10' -mkdir $DIR/d10 -mkdir $DIR/d10/d2 -touch $DIR/d10/d2/f -$CHECKSTAT -t file $DIR/d10/d2/f || error -pass -$CLEAN -$START - -log '== mkdir .../d11 d11/d2; chmod .../d11/d2 ======= test 11' -mkdir $DIR/d11 -mkdir $DIR/d11/d2 -chmod 0666 $DIR/d11/d2 -chmod 0705 $DIR/d11/d2 -$CHECKSTAT -t dir -p 0705 $DIR/d11/d2 || error -pass -$CLEAN -$START - -log '== mkdir .../d12; touch .../d12/f; chmod .../d12/f == test 12' -mkdir $DIR/d12 -touch $DIR/d12/f -chmod 0666 $DIR/d12/f -chmod 0654 $DIR/d12/f -$CHECKSTAT -t file -p 0654 $DIR/d12/f || error -pass -$CLEAN -$START - -log '== mkdir .../d13; creat .../d13/f; .../d13/f; > .../d13/f == test 13' -mkdir $DIR/d13 -dd if=/dev/zero of=$DIR/d13/f count=10 -> $DIR/d13/f -$CHECKSTAT -t file -s 0 $DIR/d13/f || error -pass -$CLEAN -$START - -log '================================================== test 14' -mkdir $DIR/d14 -touch $DIR/d14/f -rm $DIR/d14/f -$CHECKSTAT -a $DIR/d14/f || error -pass -$CLEAN -$START - -log '================================================== test 15' -mkdir $DIR/d15 -touch $DIR/d15/f -mv $DIR/d15/f $DIR/d15/f2 -$CHECKSTAT -t file $DIR/d15/f2 || error -pass -$CLEAN -$START - -log '================================================== test 16' -mkdir $DIR/d16 -touch $DIR/d16/f -rm -rf $DIR/d16/f -$CHECKSTAT -a $DIR/d16/f || error -pass -$CLEAN -$START - -log '== symlinks: create, remove (dangling and real) == test 17' -mkdir $DIR/d17 -touch $DIR/d17/f -ln -s $DIR/d17/f $DIR/d17/l-exist -ln -s no-such-file $DIR/d17/l-dangle -ls -l $DIR/d17 -$CHECKSTAT -l $DIR/d17/f $DIR/d17/l-exist || error -$CHECKSTAT -f -t f $DIR/d17/l-exist || error -$CHECKSTAT -l no-such-file $DIR/d17/l-dangle || error -$CHECKSTAT -fa $DIR/d17/l-dangle || error -rm -f $DIR/l-dangle -rm -f $DIR/l-exist -$CHECKSTAT -a $DIR/l-dangle || error -$CHECKSTAT -a $DIR/l-exist || error -pass -$CLEAN -$START - -log "== touch .../f ; ls ... ========================= test 18" -touch $DIR/f -ls $DIR || error -pass -$CLEAN -$START - -log "== touch .../f ; ls -l ... ====================== test 19" -touch $DIR/f -ls -l $DIR -rm $DIR/f -$CHECKSTAT -a $DIR/f || error -pass -$CLEAN -$START - -log "== touch .../f ; ls -l ... ====================== test 20" -touch $DIR/f -rm $DIR/f -log "1 done" -touch $DIR/f -rm $DIR/f -log "2 done" -touch $DIR/f -rm $DIR/f -log "3 done" -$CHECKSTAT -a $DIR/f || error -pass -$CLEAN -$START - -log '== write to dangling link ======================== test 21' -mkdir $DIR/d21 -[ -f $DIR/d21/dangle ] && rm -f $DIR/d21/dangle -ln -s dangle $DIR/d21/link -echo foo >> $DIR/d21/link -cat $DIR/d21/dangle -$CHECKSTAT -t link $DIR/d21/link || error -$CHECKSTAT -f -t file $DIR/d21/link || error -pass -$CLEAN -$START - -log '== unpack tar archive as non-root user =========== test 22' -mkdir $DIR/d22 -[ $UID -ne 0 ] && RUNAS_ID="$UID" -[ $UID -ne 0 ] && RUNAS="" -chown $RUNAS_ID $DIR/d22 -# Tar gets pissy if it can't access $PWD *sigh* -(cd /tmp ; $RUNAS tar cf - /etc/hosts /etc/sysconfig/network | $RUNAS tar xfC - $DIR/d22) -ls -lR $DIR/d22/etc -$CHECKSTAT -t dir $DIR/d22/etc || error -$CHECKSTAT -u \#$RUNAS_ID $DIR/d22/etc || error -pass -$CLEAN -$START - - -log '== O_CREAT|O_EXCL in subdir ====================== test 23' -mkdir $DIR/d23 -$TOEXCL $DIR/d23/f23 -$TOEXCL -e $DIR/d23/f23 || error -pass -$CLEAN -$START - -echo '== rename sanity ================================= test24' -echo '-- same directory rename' -log '-- test 24-R1: touch a ; rename a b' -mkdir $DIR/R1 -touch $DIR/R1/f -mv $DIR/R1/f $DIR/R1/g -$CHECKSTAT -t file $DIR/R1/g || error -pass -$CLEAN -$START - -log '-- test 24-R2: touch a b ; rename a b;' -mkdir $DIR/R2 -touch $DIR/R2/{f,g} -mv $DIR/R2/f $DIR/R2/g -$CHECKSTAT -a $DIR/R2/f || error -$CHECKSTAT -t file $DIR/R2/g || error -pass -$CLEAN -$START - -log '-- test 24-R3: mkdir a ; rename a b;' -mkdir $DIR/R3 -mkdir $DIR/R3/f -mv $DIR/R3/f $DIR/R3/g -$CHECKSTAT -a $DIR/R3/f || error -$CHECKSTAT -t dir $DIR/R3/g || error -pass -$CLEAN -$START - -log '-- test 24-R4: mkdir a b ; rename a b;' -mkdir $DIR/R4 -mkdir $DIR/R4/{f,g} -perl -e "rename \"$DIR/R4/f\", \"$DIR/R4/g\";" -$CHECKSTAT -a $DIR/R4/f || error -$CHECKSTAT -t dir $DIR/R4/g || error -pass -$CLEAN -$START - -echo '-- cross directory renames --' -log '-- test 24-R5: touch a ; rename a b' -mkdir $DIR/R5{a,b} -touch $DIR/R5a/f -mv $DIR/R5a/f $DIR/R5b/g -$CHECKSTAT -a $DIR/R5a/f || error -$CHECKSTAT -t file $DIR/R5b/g || error -pass -$CLEAN -$START - -log '-- test 24-R6: touch a ; rename a b' -mkdir $DIR/R6{a,b} -touch $DIR/R6a/f $DIR/R6b/g -mv $DIR/R6a/f $DIR/R6b/g -$CHECKSTAT -a $DIR/R6a/f || error -$CHECKSTAT -t file $DIR/R6b/g || error -pass -$CLEAN -$START - -log '-- test 24-R7: touch a ; rename a b' -mkdir $DIR/R7{a,b} -mkdir $DIR/R7a/f -mv $DIR/R7a/f $DIR/R7b/g -$CHECKSTAT -a $DIR/R7a/f || error -$CHECKSTAT -t dir $DIR/R7b/g || error -pass -$CLEAN -$START - -log '-- test 24-R8: touch a ; rename a b' -mkdir $DIR/R8{a,b} -mkdir $DIR/R8a/f $DIR/R8b/g -perl -e "rename \"$DIR/R8a/f\", \"$DIR/R8b/g\";" -$CHECKSTAT -a $DIR/R8a/f || error -$CHECKSTAT -t dir $DIR/R8b/g || error -pass -$CLEAN -$START - -echo "-- rename error cases" -log "-- test 24-R9 target error: touch f ; mkdir a ; rename f a" -mkdir $DIR/R9 -mkdir $DIR/R9/a -touch $DIR/R9/f -perl -e "rename \"$DIR/R9/f\", \"$DIR/R9/a\";" -$CHECKSTAT -t file $DIR/R9/f || error -$CHECKSTAT -t dir $DIR/R9/a || error -$CHECKSTAT -a file $DIR/R9/a/f || error -pass -$CLEAN -$START - -log "--test 24-R10 source does not exist" -mkdir $DIR/R10 -perl -e "rename \"$DIR/R10/f\", \"$DIR/R10/g\"" -$CHECKSTAT -t dir $DIR/R10 || error -$CHECKSTAT -a $DIR/R10/f || error -$CHECKSTAT -a $DIR/R10/g || error -pass -$CLEAN -$START - -echo '== symlink sanity ================================ test25' -log "--test 25.1 create file in symlinked directory" -mkdir $DIR/d25 -ln -s d25 $DIR/s25 -touch $DIR/s25/foo -pass -$CLEAN -$START - -log "--test 25.2 lookup file in symlinked directory" -$CHECKSTAT -t file $DIR/s25/foo -pass -$CLEAN -$START - -log "--test 26 multiple component symlink" -mkdir $DIR/d26 -mkdir $DIR/d26/d26-2 -ln -s d26/d26-2 $DIR/s26 -touch $DIR/s26/foo -pass -$CLEAN -$START - -log "--test 26.1 multiple component symlink at the end of a lookup" -ln -s d26/d26-2/foo $DIR/s26-2 -touch $DIR/s26-2 -pass -$CLEAN -$START - -log "--test 26.2 a chain of symlinks" -mkdir $DIR/d26.2 -touch $DIR/d26.2/foo -ln -s d26.2 $DIR/s26.2-1 -ln -s s26.2-1 $DIR/s26.2-2 -ln -s s26.2-2 $DIR/s26.2-3 -chmod 0666 $DIR/s26.2-3/foo -pass -$CLEAN -$START +dd if=/dev/zero of=$EXT2_DEV bs=1k seek=1000 count=1 > /dev/null +mke2fs -F $EXT2_DEV > /dev/null + +test_0() { + touch $DIR/f + $CHECKSTAT -t file $DIR/f || error + rm $DIR/f + $CHECKSTAT -a $DIR/f || error +} +run_test 0 "touch .../f ; rm .../f =============================" + +test_1a() { + mkdir $DIR/d1 + mkdir $DIR/d1/d2 + $CHECKSTAT -t dir $DIR/d1/d2 || error +} +run_test 1a "mkdir .../d1; mkdir .../d1/d2 =====================" + +test_1b() { + rmdir $DIR/d1/d2 + rmdir $DIR/d1 + $CHECKSTAT -a $DIR/d1 || error +} +run_test 1b "rmdir .../d1/d2; rmdir .../d1 =====================" + +test_2a() { + mkdir $DIR/d2 + touch $DIR/d2/f + $CHECKSTAT -t file $DIR/d2/f || error +} +run_test 2a "mkdir .../d2; touch .../d2/f ======================" + +test_2b() { + rm -r $DIR/d2 + $CHECKSTAT -a $DIR/d2 || error +} +run_test 2b "rm -r .../d2; touch .../d2/f ======================" + +test_3a() { + mkdir $DIR/d3 + $CHECKSTAT -t dir $DIR/d3 || error +} +run_test 3a "mkdir .../d3 ======================================" + +test_3b() { + touch $DIR/d3/f + $CHECKSTAT -t file $DIR/d3/f || error +} +run_test 3b "touch .../d3/f ====================================" + +test_3c() { + rm -r $DIR/d3 + $CHECKSTAT -a $DIR/d3 || error +} +run_test 3c "rm -r .../d3 ======================================" + +test_4a() { + mkdir $DIR/d4 + $CHECKSTAT -t dir $DIR/d4 || error +} +run_test 4a "mkdir .../d4 ======================================" + +test_4b() { + mkdir $DIR/d4/d2 + $CHECKSTAT -t dir $DIR/d4/d2 || error +} +run_test 4b "mkdir .../d4/d2 ===================================" + +test_5() { + mkdir $DIR/d5 + mkdir $DIR/d5/d2 + chmod 0707 $DIR/d5/d2 + $CHECKSTAT -t dir -p 0707 $DIR/d5/d2 || error +} +run_test 5 "mkdir .../d5 .../d5/d2; chmod .../d5/d2 ============" + +test_6() { + touch $DIR/f6 + chmod 0666 $DIR/f6 + $CHECKSTAT -t file -p 0666 $DIR/f6 || error +} +run_test 6 "touch .../f6; chmod .../f6 =========================" + +test_7a() { + mkdir $DIR/d7 + $MCREATE $DIR/d7/f + chmod 0666 $DIR/d7/f + $CHECKSTAT -t file -p 0666 $DIR/d7/f || error +} +run_test 7a "mkdir .../d7; mcreate .../d7/f; chmod .../d7/f ====" + +test_7b() { + $MCREATE $DIR/d7/f2 + echo -n foo > $DIR/d7/f2 + [ "`cat $DIR/d7/f2`" = "foo" ] || error + $CHECKSTAT -t file -s 3 $DIR/d7/f2 || error +} +run_test 7b "mkdir .../d7; mcreate d7/f2; echo foo > d7/f2 =====" + +test_8() { + mkdir $DIR/d8 + touch $DIR/d8/f + chmod 0666 $DIR/d8/f + $CHECKSTAT -t file -p 0666 $DIR/d8/f || error +} +run_test 8 "mkdir .../d8; touch .../d8/f; chmod .../d8/f =======" + +test_9() { + mkdir $DIR/d9 + mkdir $DIR/d9/d2 + mkdir $DIR/d9/d2/d3 + $CHECKSTAT -t dir $DIR/d9/d2/d3 || error +} +run_test 9 "mkdir .../d9 .../d9/d2 .../d9/d2/d3 ================" + +test_10() { + mkdir $DIR/d10 + mkdir $DIR/d10/d2 + touch $DIR/d10/d2/f + $CHECKSTAT -t file $DIR/d10/d2/f || error +} +run_test 10 "mkdir .../d10 .../d10/d2; touch .../d10/d2/f ======" + +test_11() { + mkdir $DIR/d11 + mkdir $DIR/d11/d2 + chmod 0666 $DIR/d11/d2 + chmod 0705 $DIR/d11/d2 + $CHECKSTAT -t dir -p 0705 $DIR/d11/d2 || error +} +run_test 11 "mkdir .../d11 d11/d2; chmod .../d11/d2 ============" + +test_12() { + mkdir $DIR/d12 + touch $DIR/d12/f + chmod 0666 $DIR/d12/f + chmod 0654 $DIR/d12/f + $CHECKSTAT -t file -p 0654 $DIR/d12/f || error +} +run_test 12 "touch .../d12/f; chmod .../d12/f .../d12/f ========" + +test_13() { + mkdir $DIR/d13 + dd if=/dev/zero of=$DIR/d13/f count=10 + > $DIR/d13/f + $CHECKSTAT -t file -s 0 $DIR/d13/f || error +} +run_test 13 "creat .../d13/f; dd .../d13/f; > .../d13/f ========" + +test_14() { + mkdir $DIR/d14 + touch $DIR/d14/f + rm $DIR/d14/f + $CHECKSTAT -a $DIR/d14/f || error +} +run_test 14 "touch .../d14/f; rm .../d14/f; rm .../d14/f =======" + +test_15() { + mkdir $DIR/d15 + touch $DIR/d15/f + mv $DIR/d15/f $DIR/d15/f2 + $CHECKSTAT -t file $DIR/d15/f2 || error +} +run_test 15 "touch .../d15/f; mv .../d15/f .../d15/f2 ==========" + +test_16() { + mkdir $DIR/d16 + touch $DIR/d16/f + rm -rf $DIR/d16/f + $CHECKSTAT -a $DIR/d16/f || error +} +run_test 16 "touch .../d16/f; rm -rf .../d16/f =================" + +test_17a() { + mkdir $DIR/d17 + touch $DIR/d17/f + ln -s $DIR/d17/f $DIR/d17/l-exist + ls -l $DIR/d17 + $CHECKSTAT -l $DIR/d17/f $DIR/d17/l-exist || error + $CHECKSTAT -f -t f $DIR/d17/l-exist || error + rm -f $DIR/l-exist + $CHECKSTAT -a $DIR/l-exist || error +} +run_test 17a "symlinks: create, remove (real) ==================" + +test_17b() { + ln -s no-such-file $DIR/d17/l-dangle + ls -l $DIR/d17 + $CHECKSTAT -l no-such-file $DIR/d17/l-dangle || error + $CHECKSTAT -fa $DIR/d17/l-dangle || error + rm -f $DIR/l-dangle + $CHECKSTAT -a $DIR/l-dangle || error +} +run_test 17b "symlinks: create, remove (dangling) ==============" + +test_18() { + touch $DIR/f + ls $DIR || error +} +run_test 18 "touch .../f ; ls ... ==============================" + +test_19() { + touch $DIR/f + ls -l $DIR + rm $DIR/f + $CHECKSTAT -a $DIR/f || error +} +run_test 19 "touch .../f ; ls -l ... ===========================" + +test_20() { + touch $DIR/f + rm $DIR/f + log "1 done" + touch $DIR/f + rm $DIR/f + log "2 done" + touch $DIR/f + rm $DIR/f + log "3 done" + $CHECKSTAT -a $DIR/f || error +} +run_test 20 "touch .../f ; ls -l ... ===========================" + +test_21() { + mkdir $DIR/d21 + [ -f $DIR/d21/dangle ] && rm -f $DIR/d21/dangle + ln -s dangle $DIR/d21/link + echo foo >> $DIR/d21/link + cat $DIR/d21/dangle + $CHECKSTAT -t link $DIR/d21/link || error + $CHECKSTAT -f -t file $DIR/d21/link || error +} +run_test 21 "write to dangling link ============================" + +test_22() { + mkdir $DIR/d22 + chown $RUNAS_ID $DIR/d22 + # Tar gets pissy if it can't access $PWD *sigh* + (cd /tmp; + $RUNAS tar cf - /etc/hosts /etc/sysconfig/network | \ + $RUNAS tar xfC - $DIR/d22) + ls -lR $DIR/d22/etc + $CHECKSTAT -t dir $DIR/d22/etc || error + $CHECKSTAT -u \#$RUNAS_ID $DIR/d22/etc || error +} +run_test 22 "unpack tar archive as non-root user ===============" + +test_23() { + mkdir $DIR/d23 + $TOEXCL $DIR/d23/f23 + $TOEXCL -e $DIR/d23/f23 || error +} +run_test 23 "O_CREAT|O_EXCL in subdir ==========================" + +test_24a() { + echo '============ rename sanity =================================' + echo '-- same directory rename' + mkdir $DIR/R1 + touch $DIR/R1/f + mv $DIR/R1/f $DIR/R1/g + $CHECKSTAT -t file $DIR/R1/g || error +} +run_test 24a "touch .../R1/f; rename .../R1/f .../R1/g =========" + +test_24b() { + mkdir $DIR/R2 + touch $DIR/R2/{f,g} + mv $DIR/R2/f $DIR/R2/g + $CHECKSTAT -a $DIR/R2/f || error + $CHECKSTAT -t file $DIR/R2/g || error +} +run_test 24b "touch .../R2/{f,g}; rename .../R2/f .../R2/g =====" + +test_24c() { + mkdir $DIR/R3 + mkdir $DIR/R3/f + mv $DIR/R3/f $DIR/R3/g + $CHECKSTAT -a $DIR/R3/f || error + $CHECKSTAT -t dir $DIR/R3/g || error +} +run_test 24c "mkdir .../R3/f; rename .../R3/f .../R3/g =========" + +test_24d() { + mkdir $DIR/R4 + mkdir $DIR/R4/{f,g} + perl -e "rename \"$DIR/R4/f\", \"$DIR/R4/g\";" + $CHECKSTAT -a $DIR/R4/f || error + $CHECKSTAT -t dir $DIR/R4/g || error +} +run_test 24d "mkdir .../R4/{f,g}; rename .../R4/f .../R4/g =====" + +test_24e() { + echo '-- cross directory renames --' + mkdir $DIR/R5{a,b} + touch $DIR/R5a/f + mv $DIR/R5a/f $DIR/R5b/g + $CHECKSTAT -a $DIR/R5a/f || error + $CHECKSTAT -t file $DIR/R5b/g || error +} +run_test 24e "touch .../R5a/f; rename .../R5a/f .../R5b/g ======" + +test_24f() { + mkdir $DIR/R6{a,b} + touch $DIR/R6a/f $DIR/R6b/g + mv $DIR/R6a/f $DIR/R6b/g + $CHECKSTAT -a $DIR/R6a/f || error + $CHECKSTAT -t file $DIR/R6b/g || error +} +run_test 24f "touch .../R6a/f R6b/g; mv .../R6a/f .../R6b/g ====" + +test_24g() { + mkdir $DIR/R7{a,b} + mkdir $DIR/R7a/d + mv $DIR/R7a/d $DIR/R7b/e + $CHECKSTAT -a $DIR/R7a/d || error + $CHECKSTAT -t dir $DIR/R7b/e || error +} +run_test 24g "mkdir .../R7a/d; rename .../R7a/d .../R5b/e ======" + +test_24h() { + mkdir $DIR/R8{a,b} + mkdir $DIR/R8a/d $DIR/R8b/e + perl -e "rename \"$DIR/R8a/d\", \"$DIR/R8b/e\";" + $CHECKSTAT -a $DIR/R8a/d || error + $CHECKSTAT -t dir $DIR/R8b/e || error +} +run_test 24h "mkdir .../R8{a,b} R8a/{d,e}; mv .../R8a/d .../R8b/e" + +test_24i() { + echo "-- rename error cases" + mkdir $DIR/R9 + mkdir $DIR/R9/a + touch $DIR/R9/f + perl -e "rename \"$DIR/R9/f\", \"$DIR/R9/a\";" + $CHECKSTAT -t file $DIR/R9/f || error + $CHECKSTAT -t dir $DIR/R9/a || error + $CHECKSTAT -a file $DIR/R9/a/f || error +} +run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a =====" + +test_24j() { + mkdir $DIR/R10 + perl -e "rename \"$DIR/R10/f\", \"$DIR/R10/g\"" + $CHECKSTAT -t dir $DIR/R10 || error + $CHECKSTAT -a $DIR/R10/f || error + $CHECKSTAT -a $DIR/R10/g || error +} +run_test 24j "source does not exist ============================" + +test_25a() { + echo '== symlink sanity =======================================' + mkdir $DIR/d25 + ln -s d25 $DIR/s25 + touch $DIR/s25/foo || error +} +run_test 25a "create file in symlinked directory ===============" + +test_25b() { + $CHECKSTAT -t file $DIR/s25/foo || error +} +run_test 25b "lookup file in symlinked directory ===============" + +test_26a() { + mkdir $DIR/d26 + mkdir $DIR/d26/d26-2 + ln -s d26/d26-2 $DIR/s26 + touch $DIR/s26/foo || error +} +run_test 26a "multiple component symlink =======================" + +test_26b() { + ln -s d26/d26-2/foo $DIR/s26-2 + touch $DIR/s26-2 || error +} +run_test 26b "multiple component symlink at end of lookup ======" + +test_26c() { + mkdir $DIR/d26.2 + touch $DIR/d26.2/foo + ln -s d26.2 $DIR/s26.2-1 + ln -s s26.2-1 $DIR/s26.2-2 + ln -s s26.2-2 $DIR/s26.2-3 + chmod 0666 $DIR/s26.2-3/foo +} +run_test 26c "chain of symlinks ================================" # recursive symlinks (bug 439) -log "--test 26.3 create multiple component recursive symlink" -ln -s d26-3/foo $DIR/d26-3 -pass -$CLEAN -$START - -log "--test 26.3 unlink multiple component recursive symlink" -rm $DIR/d26-3 -pass -$CLEAN -$START - -echo '== stripe sanity ================================= test27' -log "--test 27.1 create one stripe" -mkdir $DIR/d27 -$LSTRIPE $DIR/d27/f0 8192 0 1 -$CHECKSTAT -t file $DIR/d27/f0 -log "--test 27.2 write to one stripe file" -cp /etc/hosts $DIR/d27/f0 -pass - -log "--test 27.3 create two stripe file f01" -$LSTRIPE $DIR/d27/f01 8192 0 2 -log "--test 27.4 write to two stripe file file f01" -dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4 -pass - -log "--test 27.5 create file with default settings" -$LSTRIPE $DIR/d27/fdef 0 -1 0 -$CHECKSTAT -t file $DIR/d27/fdef -#dd if=/dev/zero of=$DIR/d27/fdef bs=4k count=4 - -log "--test 27.6 lstripe existing file (should return error)" -$LSTRIPE $DIR/d27/f12 8192 1 2 -! $LSTRIPE $DIR/d27/f12 8192 1 2 -$CHECKSTAT -t file $DIR/d27/f12 -#dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4 -pass - - -log "--test 27.7 lstripe with bad stripe size (should return error on LOV)" -$LSTRIPE $DIR/d27/fbad 100 1 2 || /bin/true -dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4 -pass -$CLEAN -$START - -log "--test 27.8 mcreate file without objects to test lfind" -$MCREATE $DIR/d27/fnone || error - -log "--test 27.9 lfind " -$LFIND $DIR/d27 -pass -$CLEAN -$START - -log '== create/mknod/mkdir with bad file types ======== test28' -mkdir $DIR/d28 -$CREATETEST $DIR/d28/ct || error -pass - -log '== IT_GETATTR regression ======================== test29' -mkdir $DIR/d29 -touch $DIR/d29/foo -ls -l $DIR/d29 -MDCDIR=${MDCDIR:-/proc/fs/lustre/ldlm/ldlm/MDC_*} -LOCKCOUNTORIG=`cat $MDCDIR/lock_count` -LOCKUNUSEDCOUNTORIG=`cat $MDCDIR/lock_unused_count` -ls -l $DIR/d29 -LOCKCOUNTCURRENT=`cat $MDCDIR/lock_count` -LOCKUNUSEDCOUNTCURRENT=`cat $MDCDIR/lock_unused_count` -if [ $LOCKCOUNTCURRENT -gt $LOCKCOUNTORIG ] || [ $LOCKUNUSEDCOUNTCURRENT -gt $LOCKUNUSEDCOUNTORIG ]; then - error -fi -pass -$CLEAN -$START - -log '== run binary from Lustre (execve) =============== test30' -cp `which ls` $DIR -$DIR/ls / -$CLEAN -$START - -log '== open-unlink file ============================== test31' -./openunlink $DIR/f31 $DIR/f31 || error -pass - - -log '== more mountpoints and symlinks ================= test32' - -log '-- test 32-R1: stat d32/ext2-mountpoint/..' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -mkdir -p $DIR/d32/ext2-mountpoint -mount -t ext2 -o loop $EXT2_DEV $DIR/d32/ext2-mountpoint -$CHECKSTAT -t dir $DIR/d32/ext2-mountpoint/.. || error -umount $DIR/d32/ext2-mountpoint/ -pass -$CLEAN -$START - -log '-- test 32-R2: open d32/ext2-mountpoint/..' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -mkdir -p $DIR/d32/ext2-mountpoint -mount -t ext2 -o loop $EXT2_DEV $DIR/d32/ext2-mountpoint -ls -al $DIR/d32/ext2-mountpoint/.. || error -umount $DIR/d32/ext2-mountpoint/ -pass -$CLEAN -$START +test_26d() { + ln -s d26-3/foo $DIR/d26-3 +} +run_test 26d "create multiple component recursive symlink ======" + +test_26e() { + rm $DIR/d26-3 +} +run_test 26e "unlink multiple component recursive symlink ======" + +test_27a() { + echo '== stripe sanity ========================================' + mkdir $DIR/d27 + $LSTRIPE $DIR/d27/f0 8192 0 1 + $CHECKSTAT -t file $DIR/d27/f0 + pass + log "test_27b: write to one stripe file =========================" + cp /etc/hosts $DIR/d27/f0 +} +run_test 27a "one stripe file ==================================" + +test_27c() { + $LSTRIPE $DIR/d27/f01 8192 0 2 + pass + log "test_27d: write to two stripe file file f01 ================" + dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4 +} +run_test 27c "create two stripe file f01 =======================" + +test_27d() { + $LSTRIPE $DIR/d27/fdef 0 -1 0 + $CHECKSTAT -t file $DIR/d27/fdef + #dd if=/dev/zero of=$DIR/d27/fdef bs=4k count=4 +} +run_test 27d "create file with default settings ================" + +test_27e() { + $LSTRIPE $DIR/d27/f12 8192 1 2 + $LSTRIPE $DIR/d27/f12 8192 1 2 && error + $CHECKSTAT -t file $DIR/d27/f12 || error + #dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4 +} +run_test 27e "lstripe existing file (should return error) ======" + + +test_27f() { + $LSTRIPE $DIR/d27/fbad 100 1 2 || true + dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4 +} +run_test 27f "lstripe with bad stripe size (should return error on LOV)" + +test_27g() { + $MCREATE $DIR/d27/fnone || error + pass + log "test 27.9: lfind ============================================" + $LFIND $DIR/d27 +} +run_test 27g "mcreate file without objects to test lfind =======" + +test_28() { + mkdir $DIR/d28 + $CREATETEST $DIR/d28/ct || error +} +run_test 28 "create/mknod/mkdir with bad file types ============" + +test_29() { + mkdir $DIR/d29 + touch $DIR/d29/foo + ls -l $DIR/d29 + MDCDIR=${MDCDIR:-/proc/fs/lustre/ldlm/ldlm/MDC_*} + LOCKCOUNTORIG=`cat $MDCDIR/lock_count` + LOCKUNUSEDCOUNTORIG=`cat $MDCDIR/lock_unused_count` + ls -l $DIR/d29 + LOCKCOUNTCURRENT=`cat $MDCDIR/lock_count` + LOCKUNUSEDCOUNTCURRENT=`cat $MDCDIR/lock_unused_count` + if [ $LOCKCOUNTCURRENT -gt $LOCKCOUNTORIG ]; then + echo "CURRENT: $LOCKCOUNTCURRENT > $LOCKCOUNTORIG" + error + fi + if [ $LOCKUNUSEDCOUNTCURRENT -gt $LOCKUNUSEDCOUNTORIG ]; then + echo "UNUSED: $LOCKUNUSEDCOUNTCURRENT > $LOCKUNUSEDCOUNTORIG" + error + fi +} +run_test 29 "IT_GETATTR regression ============================" + +test_30() { + cp `which ls` $DIR + $DIR/ls / + rm $DIR/ls +} +run_test 30 "run binary from Lustre (execve) ===================" + +test_31() { + ./openunlink $DIR/f31 $DIR/f31 || error +} +run_test 31 "open-unlink file ==================================" + +test_32a() { + echo "== more mountpoints and symlinks =================" + [ -e $DIR/d32a ] && rm -fr $DIR/d32a + mkdir -p $DIR/d32a/ext2-mountpoint + mount -t ext2 -o loop $EXT2_DEV $DIR/d32a/ext2-mountpoint || error + $CHECKSTAT -t dir $DIR/d32a/ext2-mountpoint/.. || error + umount $DIR/d32a/ext2-mountpoint || error +} +run_test 32a "stat d32a/ext2-mountpoint/.. =====================" + +test_32b() { + [ -e $DIR/d32b ] && rm -fr $DIR/d32b + mkdir -p $DIR/d32b/ext2-mountpoint + mount -t ext2 -o loop $EXT2_DEV $DIR/d32b/ext2-mountpoint || error + ls -al $DIR/d32b/ext2-mountpoint/.. || error + umount $DIR/d32b/ext2-mountpoint || error +} +run_test 32b "open d32b/ext2-mountpoint/.. =====================" -log '-- test 32-R3: stat d32/ext2-mountpoint/../d2/test_dir' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -mkdir -p $DIR/d32/ext2-mountpoint -mount -t ext2 -o loop $EXT2_DEV $DIR/d32/ext2-mountpoint -mkdir -p $DIR/d32/d2/test_dir -$CHECKSTAT -t dir $DIR/d32/ext2-mountpoint/../d2/test_dir || error -umount $DIR/d32/ext2-mountpoint/ -pass -$CLEAN -$START - -log '-- test 32-R4: open d32/ext2-mountpoint/../d2/test_dir' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -mkdir -p $DIR/d32/ext2-mountpoint -mount -t ext2 -o loop $EXT2_DEV $DIR/d32/ext2-mountpoint -mkdir -p $DIR/d32/d2/test_dir -ls -al $DIR/d32/ext2-mountpoint/../d2/test_dir || error -umount $DIR/d32/ext2-mountpoint/ -pass -$CLEAN -$START - -log '-- test 32-R5: stat d32/symlink->tmp/symlink->lustre-subdir' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -mkdir -p $DIR/d32/tmp -TMP_DIR=$DIR/d32/tmp -ln -s $DIR/d32 $TMP_DIR/symlink11 -ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 -$CHECKSTAT -t link $DIR/d32/tmp/symlink11 || error -$CHECKSTAT -t link $DIR/d32/symlink01 || error -pass -$CLEAN -$START - -log '-- test 32-R6: open d32/symlink->tmp/symlink->lustre-subdir' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -mkdir -p $DIR/d32/tmp -TMP_DIR=$DIR/d32/tmp -ln -s $DIR/d32 $TMP_DIR/symlink11 -ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 -ls $DIR/d32/tmp/symlink11 || error -ls $DIR/d32/symlink01 || error -pass -$CLEAN -$START - -log '-- test 32-R7: stat d32/symlink->tmp/symlink->lustre-subdir/test_dir' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -[ -e $DIR/test_dir ] && rm -fr $DIR/test_dir -mkdir -p $DIR/test_dir -mkdir -p $DIR/d32/tmp -TMP_DIR=$DIR/d32/tmp -ln -s $DIR/test_dir $TMP_DIR/symlink12 -ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 -$CHECKSTAT -t link $DIR/d32/tmp/symlink12 || error -$CHECKSTAT -t link $DIR/d32/symlink02 || error -$CHECKSTAT -t dir -f $DIR/d32/tmp/symlink12 || error -$CHECKSTAT -t dir -f $DIR/d32/symlink02 || error -pass -$CLEAN -$START - -log '-- test 32-R8: open d32/symlink->tmp/symlink->lustre-subdir/test_dir' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -[ -e $DIR/test_dir ] && rm -fr $DIR/test_dir -mkdir -p $DIR/test_dir -mkdir -p $DIR/d32/tmp -TMP_DIR=$DIR/d32/tmp -ln -s $DIR/test_dir $TMP_DIR/symlink12 -ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 -ls $DIR/d32/tmp/symlink12 || error -ls $DIR/d32/symlink02 || error -pass -$CLEAN -$START - -log '-- test 32-R9: stat d32/ext2-mountpoint/../test_file' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -mkdir -p $DIR/d32/ext2-mountpoint -mount -t ext2 -o loop $EXT2_DEV $DIR/d32/ext2-mountpoint -touch $DIR/d32/test_file -$CHECKSTAT -t file $DIR/d32/ext2-mountpoint/../test_file || error -umount $DIR/d32/ext2-mountpoint -pass -$CLEAN -$START - -log '-- test 32-R10: open d32/ext2-mountpoint/../test_file' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -mkdir -p $DIR/d32/ext2-mountpoint -mount -t ext2 -o loop $EXT2_DEV $DIR/d32/ext2-mountpoint -touch $DIR/d32/test_file -cat $DIR/d32/ext2-mountpoint/../test_file || error -umount $DIR/d32/ext2-mountpoint/ -pass -$CLEAN -$START - -log '-- test 32-R11: stat d32/ext2-mountpoint/../d2/test_file' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -mkdir -p $DIR/d32/ext2-mountpoint -mount -t ext2 -o loop $EXT2_DEV $DIR/d32/ext2-mountpoint -mkdir -p $DIR/d32/d2 -touch $DIR/d32/d2/test_file -$CHECKSTAT -t file $DIR/d32/ext2-mountpoint/../d2/test_file || error -umount $DIR/d32/ext2-mountpoint/ -pass -$CLEAN -$START - -log '-- test 32-R12: open d32/ext2-mountpoint/../d2/test_file' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -mkdir -p $DIR/d32/ext2-mountpoint -mount -t ext2 -o loop $EXT2_DEV $DIR/d32/ext2-mountpoint -mkdir -p $DIR/d32/d2 -touch $DIR/d32/d2/test_file -cat $DIR/d32/ext2-mountpoint/../d2/test_file || error -umount $DIR/d32/ext2-mountpoint/ -pass -$CLEAN -$START - -log '-- test 32-R13: stat d32/symlink->tmp/symlink->lustre-root' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -mkdir -p $DIR/d32/tmp -TMP_DIR=$DIR/d32/tmp -ln -s $DIR $TMP_DIR/symlink11 -ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 -$CHECKSTAT -t link $DIR/d32/tmp/symlink11 || error -$CHECKSTAT -t link $DIR/d32/symlink01 || error -pass -$CLEAN -$START - -log '-- test 32-R14: open d32/symlink->tmp/symlink->lustre-root' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -mkdir -p $DIR/d32/tmp -TMP_DIR=$DIR/d32/tmp -ln -s $DIR $TMP_DIR/symlink11 -ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 -ls -l $DIR/d32/tmp/symlink11 || error -ls -l $DIR/d32/symlink01 || error -pass -$CLEAN -$START - -log '-- test 32-R15: stat d32/symlink->tmp/symlink->lustre-root/test_file' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -[ -e $DIR/test_file ] && rm -fr $DIR/test_file -touch $DIR/test_file -mkdir -p $DIR/d32/tmp -TMP_DIR=$DIR/d32/tmp -ln -s $DIR/test_file $TMP_DIR/symlink12 -ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 -$CHECKSTAT -t link $DIR/d32/tmp/symlink12 || error -$CHECKSTAT -t link $DIR/d32/symlink02 || error -$CHECKSTAT -t file -f $DIR/d32/tmp/symlink12 || error -$CHECKSTAT -t file -f $DIR/d32/symlink02 || error -pass -$CLEAN -$START - -log '-- test 32-R16: open d32/symlink->tmp/symlink->lustre-root/test_file' -[ -e $DIR/d32 ] && rm -fr $DIR/d32 -[ -e $DIR/test_file ] && rm -fr $DIR/test_file -touch $DIR/test_file -mkdir -p $DIR/d32/tmp -TMP_DIR=$DIR/d32/tmp -ln -s $DIR/test_file $TMP_DIR/symlink12 -ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 -cat $DIR/d32/tmp/symlink12 || error -cat $DIR/d32/symlink02 || error -pass -$CLEAN -$START - -log '-- test 33: write file with mode 444 (should return error)' +test_32c() { + [ -e $DIR/d32c ] && rm -fr $DIR/d32c + mkdir -p $DIR/d32c/ext2-mountpoint + mount -t ext2 -o loop $EXT2_DEV $DIR/d32c/ext2-mountpoint || error + mkdir -p $DIR/d32c/d2/test_dir + $CHECKSTAT -t dir $DIR/d32c/ext2-mountpoint/../d2/test_dir || error + umount $DIR/d32c/ext2-mountpoint || error +} +run_test 32c "stat d32c/ext2-mountpoint/../d2/test_dir =========" + +test_32d() { + [ -e $DIR/d32d ] && rm -fr $DIR/d32d + mkdir -p $DIR/d32d/ext2-mountpoint + mount -t ext2 -o loop $EXT2_DEV $DIR/d32d/ext2-mountpoint || error + mkdir -p $DIR/d32d/d2/test_dir + ls -al $DIR/d32d/ext2-mountpoint/../d2/test_dir || error + umount $DIR/d32d/ext2-mountpoint || error +} +run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir ==========" + +test_32e() { + [ -e $DIR/d32e ] && rm -fr $DIR/d32e + mkdir -p $DIR/d32e/tmp + TMP_DIR=$DIR/d32e/tmp + ln -s $DIR/d32e $TMP_DIR/symlink11 + ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 + $CHECKSTAT -t link $DIR/d32e/tmp/symlink11 || error + $CHECKSTAT -t link $DIR/d32e/symlink01 || error +} +run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir =====" + +test_32f() { + [ -e $DIR/d32f ] && rm -fr $DIR/d32f + mkdir -p $DIR/d32f/tmp + TMP_DIR=$DIR/d32f/tmp + ln -s $DIR/d32f $TMP_DIR/symlink11 + ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 + ls $DIR/d32f/tmp/symlink11 || error + ls $DIR/d32f/symlink01 || error +} +run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir =====" + +test_32g() { + [ -e $DIR/d32g ] && rm -fr $DIR/d32g + [ -e $DIR/test_dir ] && rm -fr $DIR/test_dir + mkdir -p $DIR/test_dir + mkdir -p $DIR/d32g/tmp + TMP_DIR=$DIR/d32g/tmp + ln -s $DIR/test_dir $TMP_DIR/symlink12 + ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 + $CHECKSTAT -t link $DIR/d32g/tmp/symlink12 || error + $CHECKSTAT -t link $DIR/d32g/symlink02 || error + $CHECKSTAT -t dir -f $DIR/d32g/tmp/symlink12 || error + $CHECKSTAT -t dir -f $DIR/d32g/symlink02 || error +} +run_test 32g "stat d32g/symlink->tmp/symlink->lustre-subdir/test_dir" + +test_32h() { + [ -e $DIR/d32h ] && rm -fr $DIR/d32h + [ -e $DIR/test_dir ] && rm -fr $DIR/test_dir + mkdir -p $DIR/test_dir + mkdir -p $DIR/d32h/tmp + TMP_DIR=$DIR/d32h/tmp + ln -s $DIR/test_dir $TMP_DIR/symlink12 + ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 + ls $DIR/d32h/tmp/symlink12 || error + ls $DIR/d32h/symlink02 || error +} +run_test 32h "open d32h/symlink->tmp/symlink->lustre-subdir/test_dir" + +test_32i() { + [ -e $DIR/d32i ] && rm -fr $DIR/d32i + mkdir -p $DIR/d32i/ext2-mountpoint + mount -t ext2 -o loop $EXT2_DEV $DIR/d32i/ext2-mountpoint || error + touch $DIR/d32i/test_file + $CHECKSTAT -t file $DIR/d32i/ext2-mountpoint/../test_file || error + umount $DIR/d32i/ext2-mountpoint || error +} +run_test 32i "stat d32i/ext2-mountpoint/../test_file ============" + +test_32j() { + [ -e $DIR/d32j ] && rm -fr $DIR/d32j + mkdir -p $DIR/d32j/ext2-mountpoint + mount -t ext2 -o loop $EXT2_DEV $DIR/d32j/ext2-mountpoint || error + touch $DIR/d32j/test_file + cat $DIR/d32j/ext2-mountpoint/../test_file || error + umount $DIR/d32j/ext2-mountpoint || error +} +run_test 32j "open d32j/ext2-mountpoint/../test_file ============" + +test_32k() { + [ -e $DIR/d32k ] && rm -fr $DIR/d32k + mkdir -p $DIR/d32k/ext2-mountpoint + mount -t ext2 -o loop $EXT2_DEV $DIR/d32k/ext2-mountpoint + mkdir -p $DIR/d32k/d2 + touch $DIR/d32k/d2/test_file || error + $CHECKSTAT -t file $DIR/d32k/ext2-mountpoint/../d2/test_file || error + umount $DIR/d32k/ext2-mountpoint || error +} +run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file =========" + +test_32l() { + [ -e $DIR/d32l ] && rm -fr $DIR/d32l + mkdir -p $DIR/d32l/ext2-mountpoint + mount -t ext2 -o loop $EXT2_DEV $DIR/d32l/ext2-mountpoint || error + mkdir -p $DIR/d32l/d2 + touch $DIR/d32l/d2/test_file + cat $DIR/d32l/ext2-mountpoint/../d2/test_file || error + umount $DIR/d32l/ext2-mountpoint || error +} +run_test 32l "open d32l/ext2-mountpoint/../d2/test_file =========" + +test_32m() { + [ -e $DIR/d32m ] && rm -fr $DIR/d32m + mkdir -p $DIR/d32m/tmp + TMP_DIR=$DIR/d32m/tmp + ln -s $DIR $TMP_DIR/symlink11 + ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 + $CHECKSTAT -t link $DIR/d32m/tmp/symlink11 || error + $CHECKSTAT -t link $DIR/d32m/symlink01 || error +} +run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root =======" + +test_32n() { + [ -e $DIR/d32n ] && rm -fr $DIR/d32n + mkdir -p $DIR/d32n/tmp + TMP_DIR=$DIR/d32n/tmp + ln -s $DIR $TMP_DIR/symlink11 + ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 + ls -l $DIR/d32n/tmp/symlink11 || error + ls -l $DIR/d32n/symlink01 || error +} +run_test 32n "open d32n/symlink->tmp/symlink->lustre-root =======" + +test_32o() { + [ -e $DIR/d32o ] && rm -fr $DIR/d32o + [ -e $DIR/test_file ] && rm -fr $DIR/test_file + touch $DIR/test_file + mkdir -p $DIR/d32o/tmp + TMP_DIR=$DIR/d32o/tmp + ln -s $DIR/test_file $TMP_DIR/symlink12 + ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 + $CHECKSTAT -t link $DIR/d32o/tmp/symlink12 || error + $CHECKSTAT -t link $DIR/d32o/symlink02 || error + $CHECKSTAT -t file -f $DIR/d32o/tmp/symlink12 || error + $CHECKSTAT -t file -f $DIR/d32o/symlink02 || error +} +run_test 32o "stat d32o/symlink->tmp/symlink->lustre-root/test_file" + +test_32p() { + [ -e $DIR/d32p ] && rm -fr $DIR/d32p + [ -e $DIR/test_file ] && rm -fr $DIR/test_file + touch $DIR/test_file + mkdir -p $DIR/d32p/tmp + TMP_DIR=$DIR/d32p/tmp + ln -s $DIR/test_file $TMP_DIR/symlink12 + ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 + cat $DIR/d32p/tmp/symlink12 || error + cat $DIR/d32p/symlink02 || error +} +run_test 32p "open d32p/symlink->tmp/symlink->lustre-root/test_file" + # chmod 444 /mnt/lustre/somefile # open(/mnt/lustre/somefile, O_RDWR) # Should return -1 -[ $UID -ne 0 ] && RUNAS_ID="$UID" -[ $UID -ne 0 ] && RUNAS="" -[ -e $DIR/test_33_file ] && rm -fr $DIR/test_33_file -touch $DIR/test_33_file -chmod 444 $DIR/test_33_file -chown $RUNAS_ID $DIR/test_33_file -$RUNAS openfile -f O_RDWR $DIR/test_33_file && error -pass -$CLEAN -$START - -if [ -n "$BUG1360" ]; then -log '-- test 34: execute a file with mode 444 (should return error)' -[ $UID -ne 0 ] && RUNAS_ID="$UID" -[ $UID -ne 0 ] && RUNAS="" -[ -e $DIR/test_35_file ] && rm -fr $DIR/test_35_file -cp /bin/sh $DIR/test_35_file -chmod 444 $DIR/test_35_file -chown $RUNAS_ID $DIR/test_35_file -$DIR/test_35_file && error -pass -$CLEAN -$START -else -echo "Skipping test for 1360: set \$BUG_1360 to run it (fail cleanup, likely)." -fi +test_33() { + [ -e $DIR/test_33_file ] && rm -fr $DIR/test_33_file + touch $DIR/test_33_file + chmod 444 $DIR/test_33_file + chown $RUNAS_ID $DIR/test_33_file + $RUNAS openfile -f O_RDWR $DIR/test_33_file && error || true +} +run_test 33 "write file with mode 444 (should return error) ====" -if [ -n "$BUG_1365" ]; then -log '-- test 35: truncate file that has not been opened' -$MCREATE $DIR/f -$TRUNCATE $DIR/f 100 -rm $DIR/f -pass -$CLEAN -$START -else -echo "Skipping test for 1365: set \$BUG_1365 to run it (and crash, likely)." -fi +test_34() { + $MCREATE $DIR/f + $TRUNCATE $DIR/f 100 + rm $DIR/f +} +run_test 34 "truncate file that has not been opened ============" + +test_35() { + [ -e $DIR/test_35_file ] && rm -fr $DIR/test_35_file + cp /bin/sh $DIR/test_35_file + chmod 444 $DIR/test_35_file + chown $RUNAS_ID $DIR/test_35_file + $DIR/test_35_file && error + return 0 +} +run_test 35 "exec file with mode 444 (should return error) =====" -log '-- test 36: cvs operations' -[ $UID -ne 0 ] && RUNAS_ID="$UID" -[ $UID -ne 0 ] && RUNAS="" -mkdir -p $DIR/cvsroot -log '-- test 36-1: cvs init' -cvs -d $DIR/cvsroot init -$CLEAN -$START -log '-- test 36-2: cvs import' -(cd /etc/init.d ; cvs -d $DIR/cvsroot import -m "nomesg" reposname vtag rtag ) -$CLEAN -$START -log '-- test 36-3: cvs checkout' -(cd $DIR ; cvs -d $DIR/cvsroot co reposname ) -$CLEAN -$START -log '-- test 36-4: cvs add' -(cd $DIR/reposname ; touch foo34 ; cvs add -m 'addmsg' foo34 ) -$CLEAN -$START -log '-- test 36-5: cvs update' -(cd $DIR/reposname ; cvs update ) -$CLEAN -$START -log '-- test 36-5: cvs commit' -# -# XXX change this: use a non rooot users -(cd $DIR/reposname ; cvs commit -m 'nomsg' foo32 ) -pass -$CLEAN -$START +test_36a() { + log 36 "cvs operations ====================================" + mkdir -p $DIR/cvsroot + chown $RUNAS_ID $DIR/cvsroot + $RUNAS cvs -d $DIR/cvsroot init +} +run_test 36a "cvs init =========================================" + +test_36b() { + cd /etc/init.d + $RUNAS cvs -d $DIR/cvsroot import -m "nomesg" reposname vtag rtag +} +run_test 36b "cvs import =======================================" -log '== cleanup =============================================' -rm -r $DIR/[Rdfs][1-9]* $DIR/ls +test_36c() { + cd $DIR + mkdir -p $DIR/reposname + chown $RUNAS_ID $DIR/reposname + $RUNAS cvs -d $DIR/cvsroot co reposname +} +run_test 36c "cvs checkout =====================================" + +test_36d() { + cd $DIR/reposname + $RUNAS touch foo36 + $RUNAS cvs add -m 'addmsg' foo36 +} +run_test 36d "cvs add ==========================================" + +test_36e() { + cd $DIR/reposname + $RUNAS cvs update +} +run_test 36e "cvs update =======================================" + +# XXX change this: use a non root user +test_36f() { + cd $DIR/reposname + $RUNAS cvs commit -m 'nomsg' foo36 +} +run_test 36f "cvs commit =======================================" + +log "cleanup: ======================================================" +rm -r $DIR/[Rdfs][1-9]* +if [ "$I_MOUNTED" = "yes" ]; then + sh llmountcleanup.sh || error +fi -echo '======================= finished =======================' +echo '=========================== finished ===============================' diff --git a/lustre/tests/utime.c b/lustre/tests/utime.c new file mode 100644 index 0000000..b59cc6c --- /dev/null +++ b/lustre/tests/utime.c @@ -0,0 +1,83 @@ +/* + * Simple test for validating mtime on a file create and set via utime. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void usage(char *prog) +{ + fprintf(stderr, "usage: %s \n", prog); + exit(1); +} + +int main(int argc, char *argv[]) +{ + long before_mknod, after_mknod; + long before_utime, after_utime; + struct stat st; + int rc; + + if (argc != 2) + usage(argv[0]); + + before_mknod = time(0); + rc = mknod(argv[1], 0700, S_IFREG); + after_mknod = time(0); + if (rc) { + fprintf(stderr, "%s: mknod(%s) failed: rc %d: %s\n", + argv[0], argv[1], rc, strerror(rc)); + return 2; + } + + rc = stat(argv[1], &st); + if (rc) { + fprintf(stderr, "%s: stat(%s) failed: rc %d: %s\n", + argv[0], argv[1], rc, strerror(rc)); + return 3; + } + + if (st.st_mtime < before_mknod || st.st_mtime > after_mknod) { + fprintf(stderr, "%s: bad mknod times %lu <= %lu <= %lu false\n", + argv[0], before_mknod, st.st_mtime, after_mknod); + return 4; + } + + printf("%s: good mknod times %lu <= %lu <= %lu\n", + argv[0], before_mknod, st.st_mtime, after_mknod); + + sleep(5); + + before_utime = time(0); + rc = utime(argv[0], NULL); + after_utime = time(0); + if (rc) { + fprintf(stderr, "%s: stat(%s) failed: rc %d: %s\n", + argv[0], argv[1], rc, strerror(rc)); + return 5; + } + + rc = stat(argv[1], &st); + if (rc) { + fprintf(stderr, "%s: second stat(%s) failed: rc %d: %s\n", + argv[0], argv[1], rc, strerror(rc)); + return 6; + } + + if (st.st_mtime < before_utime || st.st_mtime > after_utime) { + fprintf(stderr, "%s: bad utime times %lu <= %lu <= %lu false\n", + argv[0], before_utime, st.st_mtime, after_utime); + return 7; + } + + printf("%s: good utime times %lu <= %lu <= %lu\n", + argv[0], before_mknod, st.st_mtime, after_mknod); + + return 0; +} diff --git a/lustre/utils/Lustre/lustredb.py b/lustre/utils/Lustre/lustredb.py index 35bca56..82d487c 100644 --- a/lustre/utils/Lustre/lustredb.py +++ b/lustre/utils/Lustre/lustredb.py @@ -37,7 +37,7 @@ class LustreDB: return int(str) return default except ValueError: - raise LconfError("text value is not integer:", str) + raise Lustre.LconfError("text value is not integer: " + str) def get_first_ref(self, tag): """ Get the first uuidref of the type TAG. Only @@ -70,7 +70,7 @@ class LustreDB: node_db = self.lookup_name(node_name) if not node_db: return None - return self.get_tgt_dev(target_uuid) + return node_db.get_tgt_dev(target_uuid) # get all network uuids for this node def get_networks(self): @@ -264,7 +264,7 @@ class LustreDB_XML(LustreDB): return ret def _update_active(self, tgt, new): - raise LconfError("updates not implemented for XML") + raise Lustre.LconfError("updates not implemented for XML") # ================================================================ # LDAP Support @@ -274,7 +274,7 @@ class LustreDB_LDAP(LustreDB): parent = None, url = "ldap://localhost", user = "cn=Manager, fs=lustre", - pw = "secret" + pw = "" ): self._name = name self._attrs = attrs diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index e78bb7d..a237f43 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -7,7 +7,7 @@ KFLAGS:= CPPFLAGS = $(HAVE_LIBREADLINE) lctl_LDADD := $(LIBREADLINE) -lptlctl lload_LDADD := -lptlctl -sbin_PROGRAMS = lctl lfind lstripe obdio obdbarrier obdstat lload wirecheck +sbin_PROGRAMS = lctl lfind lstripe obdio obdbarrier lload wirecheck sbin_SCRIPTS = lconf lmc llanalyze wirecheck_SOURCES = wirecheck.c lctl_SOURCES = parser.c obd.c lctl.c parser.h obdctl.h diff --git a/lustre/utils/lactive b/lustre/utils/lactive index 6d7771d5..e691423 100644 --- a/lustre/utils/lactive +++ b/lustre/utils/lactive @@ -57,7 +57,8 @@ if not config.config: fatal("Missing config") base = "config=%s,fs=lustre" % (config.config,) -db = Lustre.LustreDB_LDAP('', {}, base=base, url = config.ldapurl) +db = Lustre.LustreDB_LDAP('', {}, base=base, pw = "secret", + url = config.ldapurl) active_node = db.lookup_name(config.active) if not active_node: diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 7b31fef..44e8337 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -1010,7 +1010,6 @@ class Network(Module): gw_nid = my_int(gw.nid) self_nid = my_int(self.nid) except ValueError, e: - print "Error!", str(e) gw_nid = gw.nid self_nid = self.nid if gw_nid < self_nid: @@ -1021,6 +1020,9 @@ class Network(Module): if router.get_val_int('router', 0): # if this is a peer with a nid less than mine, # then connect. + for netuuid in router.get_networks(): + net = self.db.lookup(netuuid) + gw = Network(net) if (gw.cluster_id == self.cluster_id and gw.net_type == self.net_type): # hack: compare as numbers if possible, this should all @@ -1030,7 +1032,6 @@ class Network(Module): gw_nid = my_int(gw.nid) self_nid = my_int(self.nid) except ValueError, e: - print "Error!", str(e) gw_nid = gw.nid self_nid = self.nid if gw_nid < self_nid: @@ -1209,6 +1210,7 @@ class MDSDEV(Module): self.size = self.db.get_val_int('devsize', 0) self.journal_size = self.db.get_val_int('journalsize', 0) self.fstype = self.db.get_val('fstype', '') + self.nspath = self.db.get_val('nspath', '') # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid target_uuid = self.db.get_first_ref('target') mds = self.db.lookup(target_uuid) @@ -1220,7 +1222,7 @@ class MDSDEV(Module): if mds.get_val('failover', 0): self.failover_mds = 'f' else: - self.failover_mds = '' + self.failover_mds = 'n' active_uuid = get_active_target(mds) if not active_uuid: panic("No target device found:", target_uuid) @@ -1255,8 +1257,10 @@ class MDSDEV(Module): if not is_prepared('MDT_UUID'): lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'), setup ="") + if self.nspath: + run ("mkdir", self.nspath) lctl.newdev(attach="mds %s %s" % (self.name, self.uuid), - setup ="%s %s" %(blkdev, self.fstype)) + setup ="%s %s %s" %(blkdev, self.fstype, self.nspath)) for uuid in self.lovconfig_uuids: db = self.db.lookup(uuid) lovconfig = LOVConfig(db) @@ -1322,6 +1326,7 @@ class OSD(Module): self.size = self.db.get_val_int('devsize', 0) self.journal_size = self.db.get_val_int('journalsize', 0) self.fstype = self.db.get_val('fstype', '') + self.nspath = self.db.get_val('nspath', '') target_uuid = self.db.get_first_ref('target') ost = self.db.lookup(target_uuid) self.name = ost.getName() @@ -1329,7 +1334,7 @@ class OSD(Module): if ost.get_val('failover', 0): self.failover_ost = 'f' else: - self.failover_ost = '' + self.failover_ost = 'n' active_uuid = get_active_target(ost) if not active_uuid: @@ -1371,9 +1376,11 @@ class OSD(Module): else: blkdev = block_dev(self.devpath, self.size, self.fstype, self.format, self.journal_size) + if self.nspath: + run ("mkdir", self.nspath) lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid), - setup ="%s %s %s" %(blkdev, self.fstype, - self.failover_ost)) + setup ="%s %s %s %s" %(blkdev, self.fstype, + self.failover_ost, self.nspath)) if not is_prepared('OSS_UUID'): lctl.newdev(attach="ost %s %s" % ('OSS', 'OSS_UUID'), setup ="") @@ -1461,7 +1468,7 @@ class Client(Module): else: srv, r = find_route(self.get_servers()) if srv: - lctl.add_route_host(r[0], srv.uuid, r[1], r[2]) + lctl.add_route_host(r[0], srv.uuid, r[1], r[3]) else: panic ("no route to", self.target_uuid) except CommandError, e: @@ -1818,7 +1825,7 @@ def get_active_target(db): target_name = db.getName() node_name = get_select(target_name) if node_name: - tgt_dev_uuid = db.get_target_device(target_uuid, node_name) + tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid) else: tgt_dev_uuid = db.get_first_ref('active') return tgt_dev_uuid @@ -2127,10 +2134,11 @@ tgt_select = {} def init_select(arg): # arg = "service=nodeA,service2=nodeB" global tgt_select - list = string.split(arg, ',') - for entry in list: - srv, node = string.split(entry, '=') - tgt_select[srv] = node + if arg: + list = string.split(arg, ',') + for entry in list: + srv, node = string.split(entry, '=') + tgt_select[srv] = node def get_select(srv): if tgt_select.has_key(srv): @@ -2224,6 +2232,8 @@ def main(): random.seed(seed) sanitise_path() + + init_select(config.select) if len(args) > 0: if not os.access(args[0], os.R_OK): diff --git a/lustre/utils/lfind.c b/lustre/utils/lfind.c index aac0e16..b88f548 100644 --- a/lustre/utils/lfind.c +++ b/lustre/utils/lfind.c @@ -275,11 +275,11 @@ getobdindex(const char *path) exit(1); } + memset(&data, 0, sizeof data); data.ioc_inllen1 = sizeof(desc); data.ioc_inlbuf1 = (char *)&desc; data.ioc_inllen2 = uuidslen; data.ioc_inlbuf2 = (char *)uuids; - data.ioc_inllen3 = 0; memset(&desc, 0, sizeof(desc)); desc.ld_tgt_count = max_ost_count; diff --git a/lustre/utils/llobdstat.pl b/lustre/utils/llobdstat.pl new file mode 100755 index 0000000..cb39d30 --- /dev/null +++ b/lustre/utils/llobdstat.pl @@ -0,0 +1,160 @@ +#!/usr/bin/perl + +my $pname = $0; + +my $defaultpath = "/proc/fs/lustre"; +my $obdstats = "stats"; + +sub usage() +{ + print STDERR "Usage: $pname []\n"; + print STDERR "example: $pname help (to get help message)\n"; + print STDERR "example: $pname ost1 1 (monitor /proc/fs/lustre/ost1/obd_stats once per second\n"; + exit 1; +} + +my $statspath = "None"; +my $interval = 0; + +if (($#ARGV < 0) || ($#ARGV > 1)) { + usage(); +} else { + if ( $ARGV[0] =~ /help$/ ) { + usage(); + } + if ( -f $ARGV[0] ) { + $statspath = $ARGV[0]; + } elsif ( -f "$ARGV[0]/$obdstats" ) { + $statspath = "$ARGV[0]/$obdstats"; + } else { + my $st = `ls $defaultpath/*/$ARGV[0]/$obdstats 2> /dev/null`; + chop $st; + if ( -f "$st" ) { + $statspath = $st; + } + } + if ( $statspath =~ /^None$/ ) { + die "Cannot locate stat file for: $ARGV[0]\n"; + } + if ($#ARGV == 1) { + $interval = $ARGV[1]; + } +} + +print "$pname on $statspath\n"; + +my %cur; +my %last; +my $mhz = 0; +my ($read_bytes, $read, $write_bytes, $write, $getattr, $setattr, $open, $close, $create, $destroy, $statfs, $punch, $snapshot_time) = + ("read_bytes", "read", "write_bytes", "write", "getattr", "setattr", "open", "close", "create", "destroy", "statfs", "punch", "snapshot_time"); + +my @extinfo = ($setattr, $open, $close, $create, $destroy, $statfs, $punch); +my %shortname = ($setattr => "sa", $open => "op", $close => "cl", + $create => "cx", $destroy => "dx", $statfs => "st", $punch => "pu"); + +sub get_cpumhz() +{ + my $cpu_freq; + my $itc_freq; # On Itanium systems use this + if (open(CPUINFO, "/proc/cpuinfo")==0) { + return; + } + while () { + if (/^cpu MHz\s+:\s*([\d\.]+)/) { $cpu_freq=$1; } + elsif (/^itc MHz\s+:\s*([\d\.]+)/) { $itc_freq=$1; } + } + if (defined($itc_freq)) { $mhz = $itc_freq; } + elsif (defined($cpu_freq)) { $mhz = $cpu_freq; } + else { $mhz = 1; } + close CPUINFO; +} + +get_cpumhz(); +print "Processor counters run at $mhz MHz\n"; + +sub readall() +{ + my $prevcount; + my @iodata; + + seek STATS, 0, 0; + while () { + chop; +# ($name, $cumulcount, $samples, $unit, $min, $max, $sum, $sumsquare) + @iodata = split(/\s+/, $_); + my $name = $iodata[0]; + + $prevcount = $cur{$name}; + if (defined($prevcount)) { + $last{$name} = $prevcount; + } + if ($name =~ /^read_bytes$/ || $name =~ /^write_bytes$/) { + $cur{$name} = $iodata[6]; + } + elsif ($name =~ /^snapshot_time$/) { +# $cumulcount =~ /(\d+)/; + $cur{$name} = $iodata[1]; + } + else { + $cur{$name} = $iodata[1]; + } + } +} +sub process_stats() +{ + my $delta; + my $data; + my $last_time = $last{$snapshot_time}; + if (!defined($last_time)) { + printf "R %-g/%-g W %-g/%-g attr %-g/%-g open %-g/%-g create %-g/%-g stat %-g punch %-g\n", + $cur{$read_bytes}, $cur{$read}, + $cur{$write_bytes}, $cur{$write}, + $cur{$getattr}, $cur{$setattr}, + $cur{$open}, $cur{$close}, + $cur{$create}, $cur{$destroy}, + $cur{$statfs}, $cur{$punch}; + } + else { + my $timespan = $cur{$snapshot_time} - $last{$snapshot_time}; + + my $rdelta = $cur{$read} - $last{$read}; + my $rvdelta = int ($rdelta / $timespan); + my $rrate = ($cur{$read_bytes} - $last{$read_bytes}) / + ($timespan * ( 1 << 20 )); + my $wdelta = $cur{$write} - $last{$write}; + my $wvdelta = int ($wdelta / $timespan); + my $wrate = ($cur{$write_bytes} - $last{$write_bytes}) / + ($timespan * ( 1 << 20 )); + printf "R %6lu (%5lu %6.2fMb)/s W %6lu (%5lu %6.2fMb)/s", + $rdelta, $rvdelta, $rrate, + $wdelta, $wvdelta, $wrate; + + $delta = $cur{$getattr} - $last{$getattr}; + if ( $delta != 0 ) { + $rdelta = int ($delta/$timespan); + print " ga:$delta,$rdelta/s"; + } + + for $data ( @extinfo ) { + $delta = $cur{$data} - $last{$data}; + if ($delta != 0) { + print " $shortname{$data}:$delta"; + } + } + print "\n"; + $| = 1; + } +} + +open(STATS, $statspath) || die "Cannot open $statspath: $!\n"; +do { + readall(); + process_stats(); + if ($interval) { + sleep($interval); + %last = %cur; + } +} while ($interval); +close STATS; + diff --git a/lustre/utils/llstat.pl b/lustre/utils/llstat.pl index 28eb778..eb65e46 100755 --- a/lustre/utils/llstat.pl +++ b/lustre/utils/llstat.pl @@ -70,9 +70,10 @@ sub readstat() printf "%10s", "stddev"; } printf "\n"; + $| = 1; } elsif ($cumulcount!=0) { - printf "%-25s %-10Lu %-10Lu %-10Lu", + printf "%-25s %-10lu %-10lu %-10lu", $name, $diff, ($diff/$tdiff), $cumulcount; if (defined($sum)) { @@ -83,7 +84,7 @@ sub readstat() $sum = $sum/$mhz; $max = $max/$mhz; } - printf "%-8s %10Lu %12.2f %10Lu", $unit, $min, ($sum/$cumulcount), $max; + printf "%-8s %10lu %12.2f %10lu", $unit, $min, ($sum/$cumulcount), $max; if (defined($sumsquare)) { my $s = $sumsquare - (($sum_orig*$sum_orig)/$cumulcount); if ($s >= 0) { @@ -97,6 +98,7 @@ sub readstat() } } printf "\n"; + $| = 1; } } else { diff --git a/lustre/utils/lmc b/lustre/utils/lmc index 8ab7278..65f8fc2 100755 --- a/lustre/utils/lmc +++ b/lustre/utils/lmc @@ -74,6 +74,7 @@ Object creation command summary: --dev path --fstype extN|ext3 --size size + --nspath --add lov --lov lov_name @@ -90,6 +91,7 @@ Object creation command summary: --size size --fstype extN|ext3 --ostuuid uuid + --nspath --add mtpt - Mountpoint --node node_name @@ -148,6 +150,7 @@ lmc_options = [ ('journal_size', "", PARAM, 0), ('fstype', "", PARAM, "ext3"), ('ostuuid', "", PARAM, ""), + ('nspath', "Local mount point of server namespace.", PARAM, ""), ('format', ""), # clients: mountpoint and echo @@ -319,7 +322,7 @@ class GenConfig: return ldlm def osd(self, name, uuid, fs, osdtype, devname, format, ost_uuid, - node_uuid, dev_size=0, journal_size=0): + node_uuid, dev_size=0, journal_size=0, nspath=""): osd = self.newService("osd", name, uuid) osd.setAttribute('osdtype', osdtype) osd.appendChild(self.ref("target", ost_uuid)) @@ -333,6 +336,8 @@ class GenConfig: self.addElement(osd, "devsize", "%s" % (dev_size)) if journal_size: self.addElement(osd, "journalsize", "%s" % (journal_size)) + if nspath: + self.addElement(osd, "nspath", nspath) return osd def cobd(self, name, uuid, real_uuid, cache_uuid): @@ -373,7 +378,7 @@ class GenConfig: return mds def mdsdev(self, name, uuid, fs, devname, format, node_uuid, - mds_uuid, dev_size=0, journal_size=0): + mds_uuid, dev_size=0, journal_size=0, nspath=""): mdd = self.newService("mdsdev", name, uuid) self.addElement(mdd, "fstype", fs) dev = self.addElement(mdd, "devpath", devname) @@ -382,6 +387,8 @@ class GenConfig: self.addElement(mdd, "devsize", "%s" % (dev_size)) if journal_size: self.addElement(mdd, "journalsize", "%s" % (journal_size)) + if nspath: + self.addElement(mdd, "nspath", nspath) mdd.appendChild(self.ref("node", node_uuid)) mdd.appendChild(self.ref("target", mds_uuid)) return mdd @@ -637,6 +644,7 @@ def add_mds(gen, lustre, options): size = get_option(options, 'size') fstype = get_option(options, 'fstype') journal_size = get_option(options, 'journal_size') + nspath = get_option(options, 'nspath') node_uuid = name2uuid(lustre, node_name, 'node') @@ -648,7 +656,7 @@ def add_mds(gen, lustre, options): mdd = gen.mdsdev(mdd_name, mdd_uuid, fstype, devname, get_format_flag(options), node_uuid, mds_uuid, - dev_size=size, journal_size=journal_size) + size, journal_size, nspath) lustre.appendChild(mdd) @@ -671,6 +679,8 @@ def add_ost(gen, lustre, options): fstype = get_option(options, 'fstype') journal_size = get_option(options, 'journal_size') + nspath = get_option(options, 'nspath') + ostname = get_option(options, 'ost') if not ostname: ostname = new_name('OST_'+ node_name) @@ -703,7 +713,7 @@ def add_ost(gen, lustre, options): osd = gen.osd(osdname, osd_uuid, fstype, osdtype, devname, get_format_flag(options), ost_uuid, node_uuid, size, - journal_size) + journal_size, nspath) node = findByName(lustre, node_name, "node") diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index a89e15d..d2d8744 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -968,7 +968,7 @@ int jt_obd_setup(int argc, char **argv) IOC_INIT(data); - if (argc > 4) + if (argc > 5) return CMD_HELP; data.ioc_dev = -1; @@ -987,6 +987,10 @@ int jt_obd_setup(int argc, char **argv) data.ioc_inllen3 = strlen(argv[3]) + 1; data.ioc_inlbuf3 = argv[3]; } + if (argc > 4) { + data.ioc_inllen4 = strlen(argv[4]) + 1; + data.ioc_inlbuf4 = argv[4]; + } IOC_PACK(argv[0], data); rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SETUP, buf); diff --git a/lustre/utils/obdstat.c b/lustre/utils/obdstat.c deleted file mode 100644 index 8139fb5..0000000 --- a/lustre/utils/obdstat.c +++ /dev/null @@ -1,198 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -struct one_stat { - char *name; - int fd; - long long current; - long long delta; -}; - -struct one_stat *read_bytes; -struct one_stat *read_reqs; -struct one_stat *write_bytes; -struct one_stat *write_reqs; -struct one_stat *getattr_reqs; -struct one_stat *setattr_reqs; -struct one_stat *create_reqs; -struct one_stat *destroy_reqs; -struct one_stat *statfs_reqs; -struct one_stat *open_reqs; -struct one_stat *close_reqs; -struct one_stat *punch_reqs; - -struct one_stat * -init_one_stat (char *basename, char *name) -{ - char fname[1024]; - struct one_stat *stat = (struct one_stat *)malloc (sizeof (*stat)); - - if (stat == NULL) { - fprintf (stderr, "Can't allocate stat %s: %s\n", - name, strerror (errno)); - abort (); - } - - snprintf (fname, sizeof (fname), "%s/%s", basename, name); - - memset (stat, 0, sizeof (*stat)); - stat->name = name; - - stat->fd = open (fname, O_RDONLY); - if (stat->fd < 0 ) { - fprintf (stderr, "Can't open stat %s: %s\n", - fname, strerror (errno)); - abort (); - } - - return (stat); -} - -void -update_one_stat (struct one_stat *stat) -{ - static char buffer[1024]; - long long prev = stat->current; - int nob; - - lseek (stat->fd, 0, SEEK_SET); - nob = read (stat->fd, buffer, sizeof (buffer) - 1); - if (nob < 0) { - fprintf (stderr, "Can't read stat %s: %s\n", - stat->name, strerror (errno)); - abort (); - } - - buffer[nob] = 0; - if (sscanf (buffer, "%Ld", &stat->current) != 1) { - fprintf (stderr, "Can't parse stat %s: %s\n", - stat->name, strerror (errno)); - abort (); - } - - stat->delta = stat->current - prev; -} - -double -timenow () -{ - struct timeval tv; - - gettimeofday (&tv, NULL); - return (tv.tv_sec + tv.tv_usec / 1000000.0); -} - -void -do_stat (void) -{ - static double last = 0.0; - double now; - double t; - - now = timenow(); - - update_one_stat (read_bytes); - update_one_stat (read_reqs); - update_one_stat (write_bytes); - update_one_stat (write_reqs); - update_one_stat (getattr_reqs); - update_one_stat (setattr_reqs); - update_one_stat (open_reqs); - update_one_stat (close_reqs); - update_one_stat (create_reqs); - update_one_stat (destroy_reqs); - update_one_stat (statfs_reqs); - update_one_stat (punch_reqs); - - if (last == 0.0) { - printf ("R %Ld/%Ld W %Ld/%Ld attr %Ld/%Ld open %Ld/%Ld create %Ld/%Ld stat %Ld punch %Ld\n", - read_bytes->current, read_reqs->current, - write_bytes->current, write_reqs->current, - getattr_reqs->current, setattr_reqs->current, - open_reqs->current, close_reqs->current, - create_reqs->current, destroy_reqs->current, - statfs_reqs->current, punch_reqs->current); - } else { - t = now - last; - - printf ("R %6Ld (%5d %6.2fMb)/s W %6Ld (%5d %6.2fMb)/s", - read_reqs->delta, (int)(read_reqs->delta / t), - read_bytes->delta / ((1<<20) * t), - write_reqs->delta, (int)(write_reqs->delta / t), - write_bytes->delta / ((1<<20) * t)); - - if (getattr_reqs->delta != 0) - printf (" ga:%Ld,%d/s", getattr_reqs->delta, - (int)(getattr_reqs->delta / t)); - - if (setattr_reqs->delta != 0) - printf (" sa:%Ld", setattr_reqs->delta); - - if (open_reqs->delta != 0) - printf (" op:%Ld", open_reqs->delta); - - if (close_reqs->delta != 0) - printf (" cl:%Ld", close_reqs->delta); - - if (create_reqs->delta != 0) - printf (" cx:%Ld", create_reqs->delta); - - if (destroy_reqs->delta != 0) - printf (" dx:%Ld", destroy_reqs->delta); - - if (statfs_reqs->delta != 0) - printf (" st:%Ld", statfs_reqs->delta); - - if (punch_reqs->delta != 0) - printf (" pu:%Ld", punch_reqs->delta); - - printf ("\n"); - } - - fflush(stdout); - last = timenow(); -} - -int main (int argc, char **argv) -{ - char basedir[128]; - int interval = 0; - - if (argc < 2) { - fprintf (stderr, "obd type not specified\n"); - return (1); - } - - snprintf (basedir, sizeof (basedir), "/proc/sys/%s", argv[1]); - - if (argc > 2) - interval = atoi (argv[2]); - - read_bytes = init_one_stat (basedir, "read_bytes"); - read_reqs = init_one_stat (basedir, "read_reqs"); - write_bytes = init_one_stat (basedir, "write_bytes"); - write_reqs = init_one_stat (basedir, "write_reqs"); - getattr_reqs = init_one_stat (basedir, "getattr_reqs"); - setattr_reqs = init_one_stat (basedir, "setattr_reqs"); - create_reqs = init_one_stat (basedir, "create_reqs"); - destroy_reqs = init_one_stat (basedir, "destroy_reqs"); - statfs_reqs = init_one_stat (basedir, "statfs_reqs"); - open_reqs = init_one_stat (basedir, "open_reqs"); - close_reqs = init_one_stat (basedir, "close_reqs"); - punch_reqs = init_one_stat (basedir, "punch_reqs"); - - do_stat (); - - if (interval == 0) - return (0); - - for (;;) { - sleep (interval); - do_stat (); - } -}