From 6c509e0b4bffd7b612f318ef79f9bf7085905f32 Mon Sep 17 00:00:00 2001 From: shadow Date: Wed, 11 Jun 2008 06:48:53 +0000 Subject: [PATCH] timeout and assert in invalidate import. b=15716 i=rread i=johann --- lustre/ChangeLog | 9 ++- lustre/include/linux/obd_class.h | 4 -- lustre/include/obd_class.h | 4 ++ lustre/obdclass/class_obd.c | 1 + lustre/obdclass/genops.c | 143 ++++++++++++++++++++++++++++++++++++--- lustre/obdclass/obd_config.c | 4 +- lustre/ptlrpc/import.c | 1 + lustre/ptlrpc/ptlrpcd.c | 13 ---- 8 files changed, 150 insertions(+), 29 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 04dee50..cd60758 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -12,6 +12,13 @@ tbd Sun Microsystems, Inc. * RHEL 4 and RHEL 5/SLES 10 clients behaves differently on 'cd' to a removed cwd "./" (refer to Bugzilla 14399). +Severity : minor +Bugzilla : 15716 +Description: timeout with invalidate import. +Details : ptlrpcd_check call obd_zombie_impexp_cull and wait request which should be + handled by ptlrpcd. This produce long age waiting and -ETIMEOUT + ptlrpc_invalidate_import and as result LASSERT. + Severity : enhancement Bugzilla : 15741 Description: Update to RHEL5 kernel-2.6.18-53.1.19.el5. @@ -73,7 +80,7 @@ Description: Obsolete CURRENT_SECONDS and use cfs_time_current_sec() instead. Severity : minor Bugzilla : 14645 - rare, on shutdown ost +Frequency : rare, on shutdown ost Description: don't hit live lock with umount ost. Details : shrink_dcache_parent can be in long loop with destroy dentries, use shrink_dcache_sb instead. diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index b074853..914e6d0 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -65,10 +65,6 @@ int ldiskfsfs_dirhash(const char *name, int len, struct ldiskfs_dx_hash_info *hi # endif /* HAVE_SERVER_SUPPORT */ #endif /* __KERNEL__ */ -void obd_zombie_impexp_init(void); -void obd_zombie_impexp_cull(void); -extern void (*obd_zombie_impexp_notify)(void); - /* obdo.c */ #ifdef __KERNEL__ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid); diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index c5d7f79..6fcbd52 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -90,6 +90,10 @@ char *obd_export_nid2str(struct obd_export *exp); int obd_export_evict_by_nid(struct obd_device *obd, const char *nid); int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid); +int obd_zombie_impexp_init(void); +void obd_zombie_impexp_stop(void); +void obd_zombie_impexp_cull(void); + /* obd_config.c */ int class_process_config(struct lustre_cfg *lcfg); int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 1c16400..9de3728 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -634,6 +634,7 @@ static void cleanup_obdclass(void) class_handle_cleanup(); class_exit_uuidlist(); + obd_zombie_impexp_stop(); memory_leaked = obd_memory_sum(); pages_leaked = obd_pages_sum(); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index e906d83..2bb7514 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -45,9 +45,7 @@ cfs_mem_cache_t *import_cachep; struct list_head obd_zombie_imports; struct list_head obd_zombie_exports; spinlock_t obd_zombie_impexp_lock; -void (*obd_zombie_impexp_notify)(void) = NULL; -EXPORT_SYMBOL(obd_zombie_impexp_notify); - +static void obd_zombie_impexp_notify(void); int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); @@ -1349,12 +1347,15 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) } EXPORT_SYMBOL(obd_export_evict_by_uuid); -void obd_zombie_impexp_cull(void) +/** + * kill zombie imports and exports + */ +void obd_zombie_impexp_cull(void) { struct obd_import *import; struct obd_export *export; ENTRY; - + do { spin_lock (&obd_zombie_impexp_lock); @@ -1365,7 +1366,7 @@ void obd_zombie_impexp_cull(void) imp_zombie_chain); list_del(&import->imp_zombie_chain); } - + export = NULL; if (!list_empty(&obd_zombie_exports)) { export = list_entry(obd_zombie_exports.next, @@ -1375,7 +1376,7 @@ void obd_zombie_impexp_cull(void) } spin_unlock(&obd_zombie_impexp_lock); - + if (import != NULL) class_import_destroy(import); @@ -1385,12 +1386,136 @@ void obd_zombie_impexp_cull(void) } while (import != NULL || export != NULL); EXIT; } -EXPORT_SYMBOL(obd_zombie_impexp_cull); -void obd_zombie_impexp_init(void) +static struct completion obd_zombie_start; +static struct completion obd_zombie_stop; +static unsigned long obd_zombie_flags; +static cfs_waitq_t obd_zombie_waitq; + +enum { + OBD_ZOMBIE_STOP = 1 +}; + +/** + * check for work for kill zombie import/export thread. + */ +int obd_zombie_impexp_check(void *arg) +{ + int rc; + + spin_lock(&obd_zombie_impexp_lock); + rc = list_empty(&obd_zombie_imports) && + list_empty(&obd_zombie_exports) && + !test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); + + spin_unlock(&obd_zombie_impexp_lock); + + RETURN(rc); +} + +/** + * notify import/export destroy thread about new zombie. + */ +static void obd_zombie_impexp_notify(void) +{ + cfs_waitq_signal(&obd_zombie_waitq); +} + +#ifdef __KERNEL__ + +/** + * destroy zombie export/import thread. + */ +static int obd_zombie_impexp_thread(void *unused) +{ + int rc; + + if ((rc = cfs_daemonize_ctxt("obd_zombid"))) { + complete(&obd_zombie_start); + RETURN(rc); + } + + complete(&obd_zombie_start); + + while(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)) { + struct l_wait_info lwi = { 0 }; + + l_wait_event(obd_zombie_waitq, !obd_zombie_impexp_check(NULL), &lwi); + + obd_zombie_impexp_cull(); + } + + complete(&obd_zombie_stop); + + RETURN(0); +} + +#else /* ! KERNEL */ + +static atomic_t zombie_recur = ATOMIC_INIT(0); +static void *obd_zombie_impexp_work_cb; +static void *obd_zombie_impexp_idle_cb; + +int obd_zombie_impexp_kill(void *arg) +{ + int rc = 0; + + if (atomic_inc_return(&zombie_recur) == 1) { + obd_zombie_impexp_cull(); + rc = 1; + } + atomic_dec(&zombie_recur); + return rc; +} + +#endif + +/** + * start destroy zombie import/export thread + */ +int obd_zombie_impexp_init(void) { + int rc; + CFS_INIT_LIST_HEAD(&obd_zombie_imports); CFS_INIT_LIST_HEAD(&obd_zombie_exports); spin_lock_init(&obd_zombie_impexp_lock); + init_completion(&obd_zombie_start); + init_completion(&obd_zombie_stop); + cfs_waitq_init(&obd_zombie_waitq); + +#ifdef __KERNEL__ + rc = cfs_kernel_thread(obd_zombie_impexp_thread, NULL, 0); + if (rc < 0) + RETURN(rc); + + wait_for_completion(&obd_zombie_start); +#else + + obd_zombie_impexp_work_cb = + liblustre_register_wait_callback("obd_zombi_impexp_kill", + &obd_zombie_impexp_kill, NULL); + + obd_zombie_impexp_idle_cb = + liblustre_register_idle_callback("obd_zombi_impexp_check", + &obd_zombie_impexp_check, NULL); + rc = 0; + +#endif + RETURN(rc); +} +/** + * stop destroy zombie import/export thread + */ +void obd_zombie_impexp_stop(void) +{ + set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); + obd_zombie_impexp_notify(); +#ifdef __KERNEL__ + wait_for_completion(&obd_zombie_stop); +#else + liblustre_deregister_wait_callback(obd_zombie_impexp_work_cb); + liblustre_deregister_idle_callback(obd_zombie_impexp_idle_cb); +#endif } diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 0dd3ab6..b2a15f0 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -343,10 +343,10 @@ int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg) obd->obd_name, obd->obd_uuid.uuid); class_decref(obd); - + /* not strictly necessary, but cleans up eagerly */ obd_zombie_impexp_cull(); - + RETURN(0); } diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 2ea79c2..213f381 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -245,6 +245,7 @@ void ptlrpc_invalidate_import(struct obd_import *imp) DEBUG_REQ(D_ERROR, req, "still on delayed list"); } spin_unlock(&imp->imp_lock); + LASSERT(atomic_read(&imp->imp_inflight) == 0); } out: diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index b000473..b2c06be 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -98,8 +98,6 @@ static int ptlrpcd_check(struct ptlrpcd_ctl *pc) if (test_bit(LIOD_STOP, &pc->pc_flags)) RETURN(1); - obd_zombie_impexp_cull(); - spin_lock(&pc->pc_set->set_new_req_lock); list_for_each_safe(pos, tmp, &pc->pc_set->set_new_requests) { req = list_entry(pos, struct ptlrpc_request, rq_set_chain); @@ -177,13 +175,6 @@ static int ptlrpcd(void *arg) return 0; } -static void ptlrpcd_zombie_impexp_notify(void) -{ - LASSERT(ptlrpcd_pc.pc_set != NULL); // call before ptlrpcd inited ? - - cfs_waitq_signal(&ptlrpcd_pc.pc_set->set_waitq); -} - #else int ptlrpcd_check_async_rpcs(void *arg) @@ -234,9 +225,6 @@ static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc) RETURN(-ENOMEM); #ifdef __KERNEL__ - /* wake ptlrpcd when zombie imports or exports exist */ - obd_zombie_impexp_notify = ptlrpcd_zombie_impexp_notify; - rc = cfs_kernel_thread(ptlrpcd, pc, 0); if (rc < 0) { ptlrpc_set_destroy(pc->pc_set); @@ -261,7 +249,6 @@ static void ptlrpcd_stop(struct ptlrpcd_ctl *pc) set_bit(LIOD_STOP, &pc->pc_flags); cfs_waitq_signal(&pc->pc_set->set_waitq); #ifdef __KERNEL__ - obd_zombie_impexp_notify = NULL; wait_for_completion(&pc->pc_finishing); #else liblustre_deregister_wait_callback(pc->pc_wait_callback); -- 1.8.3.1