* Recommended e2fsprogs version: 1.39.cfs2-0
* bug fixes
+Severity : major
+Frequency : liblustre (e.g. catamount) on a large cluster with >= 8 OSTs
+ per OSS
+Bugzilla : 11684
+Description: System hang on startup
+Details : This bug allowed the liblustre (e.g. catamount) client to
+ return to the app before handling all startup RPCs. This
+ could leave the node unresponsive to lustre network traffic
+ and manifested as a server ptllnd timeout.
+
+Severity : enhancement
+Bugzilla : 11667
+Description: Add "/proc/sys/lustre/debug_peer_on_timeout"
+ (liblustre envirable: LIBLUSTRE_DEBUG_PEER_ON_TIMEOUT)
+ boolean to control whether to print peer debug info when a
+ client's RPC times out.
+
Severity : enhancement
Bugzilla : 8007
Description: MountConf
struct liblustre_wait_callback {
struct list_head llwc_list;
+ const char *llwc_name;
int (*llwc_fn)(void *arg);
void *llwc_arg;
};
-void *liblustre_register_wait_callback(int (*fn)(void *arg), void *arg);
+void *liblustre_register_wait_callback(const char *name,
+ int (*fn)(void *arg), void *arg);
void liblustre_deregister_wait_callback(void *notifier);
int liblustre_wait_event(int timeout);
+void *liblustre_register_idle_callback(const char *name,
+ int (*fn)(void *arg), void *arg);
+void liblustre_deregister_idle_callback(void *notifier);
+void liblustre_wait_idle(void);
+
/* flock related */
struct nfs_lock_info {
__u32 state;
extern atomic_t obd_memory;
extern int obd_memmax;
extern unsigned int obd_fail_loc;
+extern unsigned int obd_debug_peer_on_timeout;
extern unsigned int obd_dump_on_timeout;
extern unsigned int obd_dump_on_eviction;
extern unsigned int obd_timeout; /* seconds */
int _sysio_lustre_init(void)
{
int err;
- char *timeout = NULL;
+ char *envstr;
#ifndef INIT_SYSIO
extern void __liblustre_cleanup_(void);
#endif
perror("init llite driver");
return err;
}
- timeout = getenv("LIBLUSTRE_TIMEOUT");
- if (timeout) {
- obd_timeout = (unsigned int) strtol(timeout, NULL, 0);
- printf("LibLustre: set obd timeout as %u seconds\n",
+
+ envstr = getenv("LIBLUSTRE_TIMEOUT");
+ if (envstr != NULL) {
+ obd_timeout = (unsigned int)strtol(envstr, NULL, 0);
+ printf("LibLustre: obd timeout=%u seconds\n",
obd_timeout);
}
+ /* debug peer on timeout? */
+ envstr = getenv("LIBLUSTRE_DEBUG_PEER_ON_TIMEOUT");
+ if (envstr != NULL) {
+ obd_debug_peer_on_timeout =
+ (unsigned int)strtol(envstr, NULL, 0);
+ printf("LibLustre: debug peer on timeout=%d\n",
+ obd_debug_peer_on_timeout ? 0 : 1);
+ }
+
#ifndef INIT_SYSIO
(void)atexit(__liblustre_cleanup_);
#endif
}
extern int _sysio_native_init();
-extern unsigned int obd_timeout;
char *lustre_path = NULL;
OBD_FREE(sbi, sizeof(*sbi));
+ liblustre_wait_idle();
EXIT;
}
ptlrpc_req_finished(request);
CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source);
- liblustre_wait_event(0);
+ liblustre_wait_idle();
return 0;
if (mdc)
OBD_FREE(mdc, strlen(mdc) + 1);
OBD_FREE(sbi, sizeof(*sbi));
+
+ liblustre_wait_idle();
return err;
}
/* The following are visible and mutable through /proc/sys/lustre/. */
unsigned int obd_fail_loc;
+unsigned int obd_debug_peer_on_timeout;
unsigned int obd_dump_on_timeout;
unsigned int obd_dump_on_eviction;
unsigned int obd_timeout = 100; /* seconds */
SYSCTL_INT(_lustre, OID_AUTO, dump_on_timeout,
CTLTYPE_INT | CTLFLAG_RW, &obd_dump_on_timeout,
0, "lustre_dump_on_timeout");
+SYSCTL_INT(_lustre, OID_AUTO, debug_peer_on_timeout,
+ CTLTYPE_INT | CTLFLAG_RW, &obd_debug_peer_on_timeout,
+ 0, "lustre_dump_on_timeout");
SYSCTL_INT(_lustre, OID_AUTO, memused,
CTLTYPE_INT | CTLFLAG_RW, (int *)&obd_memory.counter,
0, "lustre_memory_used");
&sysctl__lustre_fail_loc,
&sysctl__lustre_timeout,
&sysctl__lustre_dump_on_timeout,
+ &sysctl__lustre_debug_peer_on_timeout,
&sysctl__lustre_upcall,
&sysctl__lustre_memused,
&sysctl__lustre_filter_sync_on_commit,
OBD_SYNCFILTER, /* XXX temporary, as we play with sync osts.. */
OBD_LDLM_TIMEOUT, /* LDLM timeout for ASTs before client eviction */
OBD_DUMP_ON_EVICTION, /* dump kernel debug log upon eviction */
+ OBD_DEBUG_PEER_ON_TIMEOUT, /* dump peer debug when RPC times out */
};
int LL_PROC_PROTO(proc_fail_loc)
&proc_fail_loc},
{OBD_TIMEOUT, "timeout", &obd_timeout, sizeof(int), 0644, NULL,
&proc_set_timeout},
+ {OBD_DEBUG_PEER_ON_TIMEOUT, "debug_peer_on_timeout",
+ &obd_debug_peer_on_timeout,
+ sizeof(int), 0644, NULL, &proc_dointvec},
{OBD_DUMP_ON_TIMEOUT, "dump_on_timeout", &obd_dump_on_timeout,
sizeof(int), 0644, NULL, &proc_dointvec},
{OBD_DUMP_ON_EVICTION, "dump_on_eviction", &obd_dump_on_eviction,
DEBUG_REQ(D_ERROR|D_NETERROR, req, "timeout (sent at %lu, %lus ago)",
(long)req->rq_sent, CURRENT_SECONDS - req->rq_sent);
-#if 0
- /* Leave this out normally: it creates too much console noise and we
- * only need it when we're debugging networks */
- if (imp != NULL)
+
+ if (imp != NULL && obd_debug_peer_on_timeout)
LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer);
-#endif
+
spin_lock(&req->rq_lock);
req->rq_timedout = 1;
spin_unlock(&req->rq_lock);
#ifndef __KERNEL__
CFS_LIST_HEAD(liblustre_wait_callbacks);
+CFS_LIST_HEAD(liblustre_idle_callbacks);
void *liblustre_services_callback;
void *
-liblustre_register_wait_callback (int (*fn)(void *arg), void *arg)
+liblustre_register_waitidle_callback (struct list_head *callback_list,
+ const char *name,
+ int (*fn)(void *arg), void *arg)
{
struct liblustre_wait_callback *llwc;
OBD_ALLOC(llwc, sizeof(*llwc));
LASSERT (llwc != NULL);
+ llwc->llwc_name = name;
llwc->llwc_fn = fn;
llwc->llwc_arg = arg;
- list_add_tail(&llwc->llwc_list, &liblustre_wait_callbacks);
+ list_add_tail(&llwc->llwc_list, callback_list);
return (llwc);
}
void
-liblustre_deregister_wait_callback (void *opaque)
+liblustre_deregister_waitidle_callback (void *opaque)
{
struct liblustre_wait_callback *llwc = opaque;
OBD_FREE(llwc, sizeof(*llwc));
}
+void *
+liblustre_register_wait_callback (const char *name,
+ int (*fn)(void *arg), void *arg)
+{
+ return liblustre_register_waitidle_callback(&liblustre_wait_callbacks,
+ name, fn, arg);
+}
+
+void
+liblustre_deregister_wait_callback (void *opaque)
+{
+ liblustre_deregister_waitidle_callback(opaque);
+}
+
+void *
+liblustre_register_idle_callback (const char *name,
+ int (*fn)(void *arg), void *arg)
+{
+ return liblustre_register_waitidle_callback(&liblustre_idle_callbacks,
+ name, fn, arg);
+}
+
+void
+liblustre_deregister_idle_callback (void *opaque)
+{
+ liblustre_deregister_waitidle_callback(opaque);
+}
+
int
liblustre_check_events (int timeout)
{
return found_something;
}
+void
+liblustre_wait_idle(void)
+{
+ static int recursed = 0;
+
+ struct list_head *tmp;
+ struct liblustre_wait_callback *llwc;
+ int idle = 0;
+
+ LASSERT(!recursed);
+ recursed = 1;
+
+ do {
+ liblustre_wait_event(0);
+
+ idle = 1;
+
+ list_for_each(tmp, &liblustre_idle_callbacks) {
+ llwc = list_entry(tmp, struct liblustre_wait_callback,
+ llwc_list);
+
+ if (!llwc->llwc_fn(llwc->llwc_arg)) {
+ idle = 0;
+ break;
+ }
+ }
+
+ } while (!idle);
+
+ recursed = 0;
+}
+
#endif /* __KERNEL__ */
int ptlrpc_init_portals(void)
}
#ifndef __KERNEL__
liblustre_services_callback =
- liblustre_register_wait_callback(&liblustre_check_services, NULL);
+ liblustre_register_wait_callback("liblustre_check_services",
+ &liblustre_check_services, NULL);
#endif
rc = ptlrpcd_addref();
if (rc == 0)
{
#ifdef ENABLE_PINGER
memset(&pinger_args, 0, sizeof(pinger_args));
- pinger_callback = liblustre_register_wait_callback(&pinger_check_rpcs,
+ pinger_callback = liblustre_register_wait_callback("pinger_check_rpcs",
+ &pinger_check_rpcs,
&pinger_args);
#endif
return 0;
char pc_name[16];
#ifndef __KERNEL__
int pc_recurred;
- void *pc_callback;
+ void *pc_wait_callback;
+ void *pc_idle_callback;
#endif
};
pc->pc_recurred--;
return rc;
}
+
+int ptlrpcd_idle(void *arg)
+{
+ struct ptlrpcd_ctl *pc = arg;
+
+ return (list_empty(&pc->pc_set->set_new_requests) &&
+ pc->pc_set->set_remaining == 0);
+}
+
#endif
static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc)
wait_for_completion(&pc->pc_starting);
#else
- pc->pc_callback =
- liblustre_register_wait_callback(&ptlrpcd_check_async_rpcs, pc);
+ pc->pc_wait_callback =
+ liblustre_register_wait_callback("ptlrpcd_check_async_rpcs",
+ &ptlrpcd_check_async_rpcs, pc);
+ pc->pc_idle_callback =
+ liblustre_register_wait_callback("ptlrpcd_check_idle_rpcs",
+ &ptlrpcd_idle, pc);
(void)rc;
#endif
RETURN(0);
obd_zombie_impexp_notify = NULL;
wait_for_completion(&pc->pc_finishing);
#else
- liblustre_deregister_wait_callback(pc->pc_callback);
+ liblustre_deregister_wait_callback(pc->pc_wait_callback);
+ liblustre_deregister_wait_callback(pc->pc_idle_callback);
#endif
ptlrpc_set_destroy(pc->pc_set);
}