From 859678cc6b075f7c81903e44b99bdbd18c635cbb Mon Sep 17 00:00:00 2001 From: eeb Date: Fri, 16 Feb 2007 18:01:23 +0000 Subject: [PATCH] * landing 11667 (debug_peer_on_timeout) and 11684 (liblustre block for asynch OST_CONNECTs to complete at startup) on HEAD --- lustre/ChangeLog | 17 ++++++++ lustre/include/liblustre.h | 9 ++++- lustre/include/obd_support.h | 1 + lustre/liblustre/llite_lib.c | 21 +++++++--- lustre/liblustre/super.c | 5 ++- lustre/obdclass/class_obd.c | 1 + lustre/obdclass/darwin/darwin-sysctl.c | 4 ++ lustre/obdclass/linux/linux-sysctl.c | 4 ++ lustre/ptlrpc/client.c | 8 ++-- lustre/ptlrpc/events.c | 73 ++++++++++++++++++++++++++++++++-- lustre/ptlrpc/pinger.c | 3 +- lustre/ptlrpc/ptlrpcd.c | 23 +++++++++-- 12 files changed, 147 insertions(+), 22 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 429fcab..a188c3f 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -20,6 +20,23 @@ tbd Cluster File Systems, Inc. * Recommended e2fsprogs version: 1.39.cfs2-0 * bug fixes +Severity : major +Frequency : liblustre (e.g. catamount) on a large cluster with >= 8 OSTs + per OSS +Bugzilla : 11684 +Description: System hang on startup +Details : This bug allowed the liblustre (e.g. catamount) client to + return to the app before handling all startup RPCs. This + could leave the node unresponsive to lustre network traffic + and manifested as a server ptllnd timeout. + +Severity : enhancement +Bugzilla : 11667 +Description: Add "/proc/sys/lustre/debug_peer_on_timeout" + (liblustre envirable: LIBLUSTRE_DEBUG_PEER_ON_TIMEOUT) + boolean to control whether to print peer debug info when a + client's RPC times out. + Severity : enhancement Bugzilla : 8007 Description: MountConf diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index 8b14f03..e801dc6 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -772,14 +772,21 @@ static inline void init_completion(struct completion *x) struct liblustre_wait_callback { struct list_head llwc_list; + const char *llwc_name; int (*llwc_fn)(void *arg); void *llwc_arg; }; -void *liblustre_register_wait_callback(int (*fn)(void *arg), void *arg); +void *liblustre_register_wait_callback(const char *name, + int (*fn)(void *arg), void *arg); void liblustre_deregister_wait_callback(void *notifier); int liblustre_wait_event(int timeout); +void *liblustre_register_idle_callback(const char *name, + int (*fn)(void *arg), void *arg); +void liblustre_deregister_idle_callback(void *notifier); +void liblustre_wait_idle(void); + /* flock related */ struct nfs_lock_info { __u32 state; diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index c7e9a9d..6cdcc9a 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -29,6 +29,7 @@ extern atomic_t obd_memory; extern int obd_memmax; extern unsigned int obd_fail_loc; +extern unsigned int obd_debug_peer_on_timeout; extern unsigned int obd_dump_on_timeout; extern unsigned int obd_dump_on_eviction; extern unsigned int obd_timeout; /* seconds */ diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c index bda1766..8adbb45 100644 --- a/lustre/liblustre/llite_lib.c +++ b/lustre/liblustre/llite_lib.c @@ -265,7 +265,7 @@ int ll_parse_mount_target(const char *target, char **mgsnid, int _sysio_lustre_init(void) { int err; - char *timeout = NULL; + char *envstr; #ifndef INIT_SYSIO extern void __liblustre_cleanup_(void); #endif @@ -277,13 +277,23 @@ int _sysio_lustre_init(void) perror("init llite driver"); return err; } - timeout = getenv("LIBLUSTRE_TIMEOUT"); - if (timeout) { - obd_timeout = (unsigned int) strtol(timeout, NULL, 0); - printf("LibLustre: set obd timeout as %u seconds\n", + + envstr = getenv("LIBLUSTRE_TIMEOUT"); + if (envstr != NULL) { + obd_timeout = (unsigned int)strtol(envstr, NULL, 0); + printf("LibLustre: obd timeout=%u seconds\n", obd_timeout); } + /* debug peer on timeout? */ + envstr = getenv("LIBLUSTRE_DEBUG_PEER_ON_TIMEOUT"); + if (envstr != NULL) { + obd_debug_peer_on_timeout = + (unsigned int)strtol(envstr, NULL, 0); + printf("LibLustre: debug peer on timeout=%d\n", + obd_debug_peer_on_timeout ? 0 : 1); + } + #ifndef INIT_SYSIO (void)atexit(__liblustre_cleanup_); #endif @@ -291,7 +301,6 @@ int _sysio_lustre_init(void) } extern int _sysio_native_init(); -extern unsigned int obd_timeout; char *lustre_path = NULL; diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index fe16b85..66f8681 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -103,6 +103,7 @@ static void llu_fsop_gone(struct filesys *fs) OBD_FREE(sbi, sizeof(*sbi)); + liblustre_wait_idle(); EXIT; } @@ -2027,7 +2028,7 @@ llu_fsswop_mount(const char *source, ptlrpc_req_finished(request); CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source); - liblustre_wait_event(0); + liblustre_wait_idle(); return 0; @@ -2045,6 +2046,8 @@ out_free: if (mdc) OBD_FREE(mdc, strlen(mdc) + 1); OBD_FREE(sbi, sizeof(*sbi)); + + liblustre_wait_idle(); return err; } diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index d907912..5aec6ae 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -60,6 +60,7 @@ int obd_memmax; /* The following are visible and mutable through /proc/sys/lustre/. */ unsigned int obd_fail_loc; +unsigned int obd_debug_peer_on_timeout; unsigned int obd_dump_on_timeout; unsigned int obd_dump_on_eviction; unsigned int obd_timeout = 100; /* seconds */ diff --git a/lustre/obdclass/darwin/darwin-sysctl.c b/lustre/obdclass/darwin/darwin-sysctl.c index 3443d83..019fff3 100644 --- a/lustre/obdclass/darwin/darwin-sysctl.c +++ b/lustre/obdclass/darwin/darwin-sysctl.c @@ -49,6 +49,9 @@ SYSCTL_PROC(_lustre, OID_AUTO, lustre_kernel_version, SYSCTL_INT(_lustre, OID_AUTO, dump_on_timeout, CTLTYPE_INT | CTLFLAG_RW, &obd_dump_on_timeout, 0, "lustre_dump_on_timeout"); +SYSCTL_INT(_lustre, OID_AUTO, debug_peer_on_timeout, + CTLTYPE_INT | CTLFLAG_RW, &obd_debug_peer_on_timeout, + 0, "lustre_dump_on_timeout"); SYSCTL_INT(_lustre, OID_AUTO, memused, CTLTYPE_INT | CTLFLAG_RW, (int *)&obd_memory.counter, 0, "lustre_memory_used"); @@ -61,6 +64,7 @@ static cfs_sysctl_table_t parent_table[] = { &sysctl__lustre_fail_loc, &sysctl__lustre_timeout, &sysctl__lustre_dump_on_timeout, + &sysctl__lustre_debug_peer_on_timeout, &sysctl__lustre_upcall, &sysctl__lustre_memused, &sysctl__lustre_filter_sync_on_commit, diff --git a/lustre/obdclass/linux/linux-sysctl.c b/lustre/obdclass/linux/linux-sysctl.c index bbb4769..f8e4800 100644 --- a/lustre/obdclass/linux/linux-sysctl.c +++ b/lustre/obdclass/linux/linux-sysctl.c @@ -58,6 +58,7 @@ enum { OBD_SYNCFILTER, /* XXX temporary, as we play with sync osts.. */ OBD_LDLM_TIMEOUT, /* LDLM timeout for ASTs before client eviction */ OBD_DUMP_ON_EVICTION, /* dump kernel debug log upon eviction */ + OBD_DEBUG_PEER_ON_TIMEOUT, /* dump peer debug when RPC times out */ }; int LL_PROC_PROTO(proc_fail_loc) @@ -86,6 +87,9 @@ static ctl_table obd_table[] = { &proc_fail_loc}, {OBD_TIMEOUT, "timeout", &obd_timeout, sizeof(int), 0644, NULL, &proc_set_timeout}, + {OBD_DEBUG_PEER_ON_TIMEOUT, "debug_peer_on_timeout", + &obd_debug_peer_on_timeout, + sizeof(int), 0644, NULL, &proc_dointvec}, {OBD_DUMP_ON_TIMEOUT, "dump_on_timeout", &obd_dump_on_timeout, sizeof(int), 0644, NULL, &proc_dointvec}, {OBD_DUMP_ON_EVICTION, "dump_on_eviction", &obd_dump_on_eviction, diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 142f4e4..7f279b4 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -948,12 +948,10 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req) DEBUG_REQ(D_ERROR|D_NETERROR, req, "timeout (sent at %lu, %lus ago)", (long)req->rq_sent, CURRENT_SECONDS - req->rq_sent); -#if 0 - /* Leave this out normally: it creates too much console noise and we - * only need it when we're debugging networks */ - if (imp != NULL) + + if (imp != NULL && obd_debug_peer_on_timeout) LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer); -#endif + spin_lock(&req->rq_lock); req->rq_timedout = 1; spin_unlock(&req->rq_lock); diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 501a739..777ef75 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -484,25 +484,29 @@ int ptlrpc_ni_init(void) #ifndef __KERNEL__ CFS_LIST_HEAD(liblustre_wait_callbacks); +CFS_LIST_HEAD(liblustre_idle_callbacks); void *liblustre_services_callback; void * -liblustre_register_wait_callback (int (*fn)(void *arg), void *arg) +liblustre_register_waitidle_callback (struct list_head *callback_list, + const char *name, + int (*fn)(void *arg), void *arg) { struct liblustre_wait_callback *llwc; OBD_ALLOC(llwc, sizeof(*llwc)); LASSERT (llwc != NULL); + llwc->llwc_name = name; llwc->llwc_fn = fn; llwc->llwc_arg = arg; - list_add_tail(&llwc->llwc_list, &liblustre_wait_callbacks); + list_add_tail(&llwc->llwc_list, callback_list); return (llwc); } void -liblustre_deregister_wait_callback (void *opaque) +liblustre_deregister_waitidle_callback (void *opaque) { struct liblustre_wait_callback *llwc = opaque; @@ -510,6 +514,34 @@ liblustre_deregister_wait_callback (void *opaque) OBD_FREE(llwc, sizeof(*llwc)); } +void * +liblustre_register_wait_callback (const char *name, + int (*fn)(void *arg), void *arg) +{ + return liblustre_register_waitidle_callback(&liblustre_wait_callbacks, + name, fn, arg); +} + +void +liblustre_deregister_wait_callback (void *opaque) +{ + liblustre_deregister_waitidle_callback(opaque); +} + +void * +liblustre_register_idle_callback (const char *name, + int (*fn)(void *arg), void *arg) +{ + return liblustre_register_waitidle_callback(&liblustre_idle_callbacks, + name, fn, arg); +} + +void +liblustre_deregister_idle_callback (void *opaque) +{ + liblustre_deregister_waitidle_callback(opaque); +} + int liblustre_check_events (int timeout) { @@ -575,6 +607,38 @@ liblustre_wait_event (int timeout) return found_something; } +void +liblustre_wait_idle(void) +{ + static int recursed = 0; + + struct list_head *tmp; + struct liblustre_wait_callback *llwc; + int idle = 0; + + LASSERT(!recursed); + recursed = 1; + + do { + liblustre_wait_event(0); + + idle = 1; + + list_for_each(tmp, &liblustre_idle_callbacks) { + llwc = list_entry(tmp, struct liblustre_wait_callback, + llwc_list); + + if (!llwc->llwc_fn(llwc->llwc_arg)) { + idle = 0; + break; + } + } + + } while (!idle); + + recursed = 0; +} + #endif /* __KERNEL__ */ int ptlrpc_init_portals(void) @@ -587,7 +651,8 @@ int ptlrpc_init_portals(void) } #ifndef __KERNEL__ liblustre_services_callback = - liblustre_register_wait_callback(&liblustre_check_services, NULL); + liblustre_register_wait_callback("liblustre_check_services", + &liblustre_check_services, NULL); #endif rc = ptlrpcd_addref(); if (rc == 0) diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index be3840b..77507ca 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -594,7 +594,8 @@ int ptlrpc_start_pinger(void) { #ifdef ENABLE_PINGER memset(&pinger_args, 0, sizeof(pinger_args)); - pinger_callback = liblustre_register_wait_callback(&pinger_check_rpcs, + pinger_callback = liblustre_register_wait_callback("pinger_check_rpcs", + &pinger_check_rpcs, &pinger_args); #endif return 0; diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index efde23a..1979de5 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -54,7 +54,8 @@ struct ptlrpcd_ctl { char pc_name[16]; #ifndef __KERNEL__ int pc_recurred; - void *pc_callback; + void *pc_wait_callback; + void *pc_idle_callback; #endif }; @@ -210,6 +211,15 @@ int ptlrpcd_check_async_rpcs(void *arg) pc->pc_recurred--; return rc; } + +int ptlrpcd_idle(void *arg) +{ + struct ptlrpcd_ctl *pc = arg; + + return (list_empty(&pc->pc_set->set_new_requests) && + pc->pc_set->set_remaining == 0); +} + #endif static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc) @@ -242,8 +252,12 @@ static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc) wait_for_completion(&pc->pc_starting); #else - pc->pc_callback = - liblustre_register_wait_callback(&ptlrpcd_check_async_rpcs, pc); + pc->pc_wait_callback = + liblustre_register_wait_callback("ptlrpcd_check_async_rpcs", + &ptlrpcd_check_async_rpcs, pc); + pc->pc_idle_callback = + liblustre_register_wait_callback("ptlrpcd_check_idle_rpcs", + &ptlrpcd_idle, pc); (void)rc; #endif RETURN(0); @@ -257,7 +271,8 @@ static void ptlrpcd_stop(struct ptlrpcd_ctl *pc) obd_zombie_impexp_notify = NULL; wait_for_completion(&pc->pc_finishing); #else - liblustre_deregister_wait_callback(pc->pc_callback); + liblustre_deregister_wait_callback(pc->pc_wait_callback); + liblustre_deregister_wait_callback(pc->pc_idle_callback); #endif ptlrpc_set_destroy(pc->pc_set); } -- 1.8.3.1