Whamcloud - gitweb
* landing 11667 (debug_peer_on_timeout) and 11684 (liblustre block for asynch
authoreeb <eeb>
Fri, 16 Feb 2007 18:01:23 +0000 (18:01 +0000)
committereeb <eeb>
Fri, 16 Feb 2007 18:01:23 +0000 (18:01 +0000)
    OST_CONNECTs to complete at startup) on HEAD

12 files changed:
lustre/ChangeLog
lustre/include/liblustre.h
lustre/include/obd_support.h
lustre/liblustre/llite_lib.c
lustre/liblustre/super.c
lustre/obdclass/class_obd.c
lustre/obdclass/darwin/darwin-sysctl.c
lustre/obdclass/linux/linux-sysctl.c
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/ptlrpcd.c

index 429fcab..a188c3f 100644 (file)
@@ -20,6 +20,23 @@ tbd         Cluster File Systems, Inc. <info@clusterfs.com>
        * Recommended e2fsprogs version: 1.39.cfs2-0
        * bug fixes
 
+Severity   : major
+Frequency  : liblustre (e.g. catamount) on a large cluster with >= 8 OSTs
+             per OSS
+Bugzilla   : 11684
+Description: System hang on startup
+Details    : This bug allowed the liblustre (e.g. catamount) client to
+             return to the app before handling all startup RPCs.  This
+            could leave the node unresponsive to lustre network traffic
+            and manifested as a server ptllnd timeout.
+
+Severity   : enhancement
+Bugzilla   : 11667
+Description: Add "/proc/sys/lustre/debug_peer_on_timeout"
+             (liblustre envirable: LIBLUSTRE_DEBUG_PEER_ON_TIMEOUT)
+            boolean to control whether to print peer debug info when a
+            client's RPC times out.
+
 Severity   : enhancement
 Bugzilla   : 8007
 Description: MountConf
index 8b14f03..e801dc6 100644 (file)
@@ -772,14 +772,21 @@ static inline void init_completion(struct completion *x)
 
 struct liblustre_wait_callback {
         struct list_head    llwc_list;
+        const char         *llwc_name;
         int               (*llwc_fn)(void *arg);
         void               *llwc_arg;
 };
 
-void *liblustre_register_wait_callback(int (*fn)(void *arg), void *arg);
+void *liblustre_register_wait_callback(const char *name,
+                                       int (*fn)(void *arg), void *arg);
 void liblustre_deregister_wait_callback(void *notifier);
 int liblustre_wait_event(int timeout);
 
+void *liblustre_register_idle_callback(const char *name, 
+                                       int (*fn)(void *arg), void *arg);
+void liblustre_deregister_idle_callback(void *notifier);
+void liblustre_wait_idle(void);
+
 /* flock related */
 struct nfs_lock_info {
         __u32             state;
index c7e9a9d..6cdcc9a 100644 (file)
@@ -29,6 +29,7 @@
 extern atomic_t obd_memory;
 extern int obd_memmax;
 extern unsigned int obd_fail_loc;
+extern unsigned int obd_debug_peer_on_timeout;
 extern unsigned int obd_dump_on_timeout;
 extern unsigned int obd_dump_on_eviction;
 extern unsigned int obd_timeout;          /* seconds */
index bda1766..8adbb45 100644 (file)
@@ -265,7 +265,7 @@ int ll_parse_mount_target(const char *target, char **mgsnid,
 int _sysio_lustre_init(void)
 {
         int err;
-        char *timeout = NULL;
+        char *envstr;
 #ifndef INIT_SYSIO
         extern void __liblustre_cleanup_(void);
 #endif
@@ -277,13 +277,23 @@ int _sysio_lustre_init(void)
                 perror("init llite driver");
                 return err;
         }
-        timeout = getenv("LIBLUSTRE_TIMEOUT");
-        if (timeout) {
-                obd_timeout = (unsigned int) strtol(timeout, NULL, 0);
-                printf("LibLustre: set obd timeout as %u seconds\n",
+
+        envstr = getenv("LIBLUSTRE_TIMEOUT");
+        if (envstr != NULL) {
+                obd_timeout = (unsigned int)strtol(envstr, NULL, 0);
+                printf("LibLustre: obd timeout=%u seconds\n",
                         obd_timeout);
         }
 
+       /* debug peer on timeout? */
+        envstr = getenv("LIBLUSTRE_DEBUG_PEER_ON_TIMEOUT");
+        if (envstr != NULL) {
+                obd_debug_peer_on_timeout = 
+                        (unsigned int)strtol(envstr, NULL, 0);
+                printf("LibLustre: debug peer on timeout=%d\n",
+                        obd_debug_peer_on_timeout ? 0 : 1);
+        }
+
 #ifndef INIT_SYSIO
         (void)atexit(__liblustre_cleanup_);
 #endif
@@ -291,7 +301,6 @@ int _sysio_lustre_init(void)
 }
 
 extern int _sysio_native_init();
-extern unsigned int obd_timeout;
 
 char *lustre_path = NULL;
 
index fe16b85..66f8681 100644 (file)
@@ -103,6 +103,7 @@ static void llu_fsop_gone(struct filesys *fs)
 
         OBD_FREE(sbi, sizeof(*sbi));
 
+        liblustre_wait_idle();
         EXIT;
 }
 
@@ -2027,7 +2028,7 @@ llu_fsswop_mount(const char *source,
         ptlrpc_req_finished(request);
 
         CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source);
-        liblustre_wait_event(0);
+        liblustre_wait_idle();
 
         return 0;
 
@@ -2045,6 +2046,8 @@ out_free:
         if (mdc)
                 OBD_FREE(mdc, strlen(mdc) + 1);
         OBD_FREE(sbi, sizeof(*sbi));
+
+        liblustre_wait_idle();
         return err;
 }
 
index d907912..5aec6ae 100644 (file)
@@ -60,6 +60,7 @@ int obd_memmax;
 
 /* The following are visible and mutable through /proc/sys/lustre/. */
 unsigned int obd_fail_loc;
+unsigned int obd_debug_peer_on_timeout;
 unsigned int obd_dump_on_timeout;
 unsigned int obd_dump_on_eviction;
 unsigned int obd_timeout = 100; /* seconds */
index 3443d83..019fff3 100644 (file)
@@ -49,6 +49,9 @@ SYSCTL_PROC(_lustre,          OID_AUTO,       lustre_kernel_version,
 SYSCTL_INT(_lustre,            OID_AUTO,       dump_on_timeout, 
           CTLTYPE_INT | CTLFLAG_RW,            &obd_dump_on_timeout,
           0,           "lustre_dump_on_timeout");
+SYSCTL_INT(_lustre,            OID_AUTO,       debug_peer_on_timeout, 
+          CTLTYPE_INT | CTLFLAG_RW,            &obd_debug_peer_on_timeout,
+          0,           "lustre_dump_on_timeout");
 SYSCTL_INT(_lustre,            OID_AUTO,       memused, 
           CTLTYPE_INT | CTLFLAG_RW,            (int *)&obd_memory.counter,
           0,           "lustre_memory_used");
@@ -61,6 +64,7 @@ static cfs_sysctl_table_t      parent_table[] = {
        &sysctl__lustre_fail_loc,
        &sysctl__lustre_timeout,
        &sysctl__lustre_dump_on_timeout,
+       &sysctl__lustre_debug_peer_on_timeout,
        &sysctl__lustre_upcall,
        &sysctl__lustre_memused,
        &sysctl__lustre_filter_sync_on_commit,
index bbb4769..f8e4800 100644 (file)
@@ -58,6 +58,7 @@ enum {
         OBD_SYNCFILTER,         /* XXX temporary, as we play with sync osts.. */
         OBD_LDLM_TIMEOUT,       /* LDLM timeout for ASTs before client eviction */
         OBD_DUMP_ON_EVICTION,   /* dump kernel debug log upon eviction */
+        OBD_DEBUG_PEER_ON_TIMEOUT, /* dump peer debug when RPC times out */
 };
 
 int LL_PROC_PROTO(proc_fail_loc)
@@ -86,6 +87,9 @@ static ctl_table obd_table[] = {
                 &proc_fail_loc},
         {OBD_TIMEOUT, "timeout", &obd_timeout, sizeof(int), 0644, NULL,
                 &proc_set_timeout},
+        {OBD_DEBUG_PEER_ON_TIMEOUT, "debug_peer_on_timeout", 
+                &obd_debug_peer_on_timeout,
+                sizeof(int), 0644, NULL, &proc_dointvec},
         {OBD_DUMP_ON_TIMEOUT, "dump_on_timeout", &obd_dump_on_timeout,
                 sizeof(int), 0644, NULL, &proc_dointvec},
         {OBD_DUMP_ON_EVICTION, "dump_on_eviction", &obd_dump_on_eviction,
index 142f4e4..7f279b4 100644 (file)
@@ -948,12 +948,10 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req)
 
         DEBUG_REQ(D_ERROR|D_NETERROR, req, "timeout (sent at %lu, %lus ago)",
                   (long)req->rq_sent, CURRENT_SECONDS - req->rq_sent);
-#if 0
-        /* Leave this out normally: it creates too much console noise and we
-         * only need it when we're debugging networks */
-        if (imp != NULL)
+
+        if (imp != NULL && obd_debug_peer_on_timeout)
                 LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer);
-#endif
+
         spin_lock(&req->rq_lock);
         req->rq_timedout = 1;
         spin_unlock(&req->rq_lock);
index 501a739..777ef75 100644 (file)
@@ -484,25 +484,29 @@ int ptlrpc_ni_init(void)
 
 #ifndef __KERNEL__
 CFS_LIST_HEAD(liblustre_wait_callbacks);
+CFS_LIST_HEAD(liblustre_idle_callbacks);
 void *liblustre_services_callback;
 
 void *
-liblustre_register_wait_callback (int (*fn)(void *arg), void *arg)
+liblustre_register_waitidle_callback (struct list_head *callback_list,
+                                      const char *name,
+                                      int (*fn)(void *arg), void *arg)
 {
         struct liblustre_wait_callback *llwc;
         
         OBD_ALLOC(llwc, sizeof(*llwc));
         LASSERT (llwc != NULL);
         
+        llwc->llwc_name = name;
         llwc->llwc_fn = fn;
         llwc->llwc_arg = arg;
-        list_add_tail(&llwc->llwc_list, &liblustre_wait_callbacks);
+        list_add_tail(&llwc->llwc_list, callback_list);
         
         return (llwc);
 }
 
 void
-liblustre_deregister_wait_callback (void *opaque)
+liblustre_deregister_waitidle_callback (void *opaque)
 {
         struct liblustre_wait_callback *llwc = opaque;
         
@@ -510,6 +514,34 @@ liblustre_deregister_wait_callback (void *opaque)
         OBD_FREE(llwc, sizeof(*llwc));
 }
 
+void *
+liblustre_register_wait_callback (const char *name,
+                                  int (*fn)(void *arg), void *arg)
+{
+        return liblustre_register_waitidle_callback(&liblustre_wait_callbacks,
+                                                    name, fn, arg);
+}
+
+void
+liblustre_deregister_wait_callback (void *opaque)
+{
+        liblustre_deregister_waitidle_callback(opaque);
+}
+
+void *
+liblustre_register_idle_callback (const char *name,
+                                  int (*fn)(void *arg), void *arg)
+{
+        return liblustre_register_waitidle_callback(&liblustre_idle_callbacks,
+                                                    name, fn, arg);
+}
+
+void
+liblustre_deregister_idle_callback (void *opaque)
+{
+        liblustre_deregister_waitidle_callback(opaque);
+}
+
 int
 liblustre_check_events (int timeout)
 {
@@ -575,6 +607,38 @@ liblustre_wait_event (int timeout)
         return found_something;
 }
 
+void
+liblustre_wait_idle(void)
+{
+        static int recursed = 0;
+        
+        struct list_head               *tmp;
+        struct liblustre_wait_callback *llwc;
+        int                             idle = 0;
+
+        LASSERT(!recursed);
+        recursed = 1;
+        
+        do {
+                liblustre_wait_event(0);
+
+                idle = 1;
+
+                list_for_each(tmp, &liblustre_idle_callbacks) {
+                        llwc = list_entry(tmp, struct liblustre_wait_callback,
+                                          llwc_list);
+                        
+                        if (!llwc->llwc_fn(llwc->llwc_arg)) {
+                                idle = 0;
+                                break;
+                        }
+                }
+                        
+        } while (!idle);
+
+        recursed = 0;
+}
+
 #endif /* __KERNEL__ */
 
 int ptlrpc_init_portals(void)
@@ -587,7 +651,8 @@ int ptlrpc_init_portals(void)
         }
 #ifndef __KERNEL__
         liblustre_services_callback = 
-                liblustre_register_wait_callback(&liblustre_check_services, NULL);
+                liblustre_register_wait_callback("liblustre_check_services",
+                                                 &liblustre_check_services, NULL);
 #endif
         rc = ptlrpcd_addref();
         if (rc == 0)
index be3840b..77507ca 100644 (file)
@@ -594,7 +594,8 @@ int ptlrpc_start_pinger(void)
 {
 #ifdef ENABLE_PINGER
         memset(&pinger_args, 0, sizeof(pinger_args));
-        pinger_callback = liblustre_register_wait_callback(&pinger_check_rpcs,
+        pinger_callback = liblustre_register_wait_callback("pinger_check_rpcs",
+                                                           &pinger_check_rpcs,
                                                            &pinger_args);
 #endif
         return 0;
index efde23a..1979de5 100644 (file)
@@ -54,7 +54,8 @@ struct ptlrpcd_ctl {
         char                      pc_name[16];
 #ifndef __KERNEL__
         int                       pc_recurred;
-        void                     *pc_callback;
+        void                     *pc_wait_callback;
+        void                     *pc_idle_callback;
 #endif
 };
 
@@ -210,6 +211,15 @@ int ptlrpcd_check_async_rpcs(void *arg)
         pc->pc_recurred--;
         return rc;
 }
+
+int ptlrpcd_idle(void *arg)
+{
+        struct ptlrpcd_ctl *pc = arg;
+
+        return (list_empty(&pc->pc_set->set_new_requests) &&
+                pc->pc_set->set_remaining == 0);
+}
+
 #endif
 
 static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc)
@@ -242,8 +252,12 @@ static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc)
 
         wait_for_completion(&pc->pc_starting);
 #else
-        pc->pc_callback =
-                liblustre_register_wait_callback(&ptlrpcd_check_async_rpcs, pc);
+        pc->pc_wait_callback =
+                liblustre_register_wait_callback("ptlrpcd_check_async_rpcs",
+                                                 &ptlrpcd_check_async_rpcs, pc);
+        pc->pc_idle_callback =
+                liblustre_register_wait_callback("ptlrpcd_check_idle_rpcs",
+                                                 &ptlrpcd_idle, pc);
         (void)rc;
 #endif
         RETURN(0);
@@ -257,7 +271,8 @@ static void ptlrpcd_stop(struct ptlrpcd_ctl *pc)
         obd_zombie_impexp_notify = NULL;
         wait_for_completion(&pc->pc_finishing);
 #else
-        liblustre_deregister_wait_callback(pc->pc_callback);
+        liblustre_deregister_wait_callback(pc->pc_wait_callback);
+        liblustre_deregister_wait_callback(pc->pc_idle_callback);
 #endif
         ptlrpc_set_destroy(pc->pc_set);
 }