Whamcloud - gitweb
The pinger was deadlocking on itself, in some rare and improbably but
authorphil <phil>
Mon, 13 Oct 2003 22:31:01 +0000 (22:31 +0000)
committerphil <phil>
Mon, 13 Oct 2003 22:31:01 +0000 (22:31 +0000)
very real cases, so I reworked it to be simpler.

It's started once from ptlrpc_init now, and stopped once from ptlrpc_exit

Adding and deleting imports no longer causes the thread to be created
and go away.  Most importantly, we no longer hold the sem in
stop_pinger across the wake_up and subsequent l_wait_event; if the
pinger thread was already doing something, blocked on the pinger_sem,
it would never wake up and deadlock would ensue.

lustre/ptlrpc/pinger.c

index d972076..f8661cf 100644 (file)
@@ -35,74 +35,6 @@ static struct ptlrpc_thread *pinger_thread = NULL;
 static DECLARE_MUTEX(pinger_sem);
 static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
 
-int ptlrpc_start_pinger(void);
-int ptlrpc_stop_pinger(void);
-
-void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
-{
-        down(&pinger_sem);
-        imp->imp_next_ping = jiffies + (obd_timeout * HZ);
-        up(&pinger_sem);
-}
-
-int ptlrpc_pinger_add_import(struct obd_import *imp)
-{
-#ifndef ENABLE_PINGER
-        return 0;
-#else
-        int rc;
-        ENTRY;
-
-        if (!list_empty(&imp->imp_pinger_chain))
-                RETURN(-EALREADY);
-
-        down(&pinger_sem);
-        if (list_empty(&pinger_imports)) {
-                up(&pinger_sem);
-                rc = ptlrpc_start_pinger();
-                if (rc < 0)
-                        RETURN(rc);
-                down(&pinger_sem);
-        }
-                
-        CDEBUG(D_HA, "adding pingable import %s->%s\n",
-               imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
-        imp->imp_next_ping = jiffies + (obd_timeout * HZ);
-        list_add_tail(&imp->imp_pinger_chain, &pinger_imports); /* XXX sort, blah blah */
-        class_import_get(imp);
-        up(&pinger_sem);
-        RETURN(0);
-#endif
-}
-
-int ptlrpc_pinger_del_import(struct obd_import *imp)
-{
-#ifndef ENABLE_PINGER
-        return 0;
-#else
-        int rc;
-        ENTRY;
-
-        if (list_empty(&imp->imp_pinger_chain))
-                RETURN(-ENOENT);
-
-        down(&pinger_sem);
-        list_del_init(&imp->imp_pinger_chain);
-        CDEBUG(D_HA, "removing pingable import %s->%s\n",
-               imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
-        class_import_put(imp);
-        if (list_empty(&pinger_imports)) {
-                up(&pinger_sem);
-                rc = ptlrpc_stop_pinger();
-                if (rc)
-                        RETURN(rc);
-                down(&pinger_sem);
-        }
-        up(&pinger_sem);
-        RETURN(0);
-#endif
-}
-
 static int ptlrpc_pinger_main(void *arg)
 {
         struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
@@ -173,9 +105,9 @@ static int ptlrpc_pinger_main(void *arg)
                                 req->rq_import_generation = generation;
                                 ptlrpc_set_add_req(set, req);
                         } else {
-                                CDEBUG(D_HA, "don't need to ping %s (%lu > %lu)\n",
-                                       imp->imp_target_uuid.uuid, imp->imp_next_ping,
-                                       this_ping);
+                                CDEBUG(D_HA, "don't need to ping %s (%lu > "
+                                       "%lu)\n", imp->imp_target_uuid.uuid,
+                                       imp->imp_next_ping, this_ping);
                         }
                 }
                 up(&pinger_sem);
@@ -185,7 +117,8 @@ static int ptlrpc_pinger_main(void *arg)
                         CDEBUG(D_HA, "nothing to ping\n");
                 list_for_each(iter, &set->set_requests) {
                         struct ptlrpc_request *req =
-                                list_entry(iter, struct ptlrpc_request, rq_set_chain);
+                                list_entry(iter, struct ptlrpc_request,
+                                           rq_set_chain);
                         DEBUG_REQ(D_HA, req, "pinging %s->%s",
                                   req->rq_import->imp_obd->obd_uuid.uuid,
                                   req->rq_import->imp_target_uuid.uuid);
@@ -196,7 +129,8 @@ static int ptlrpc_pinger_main(void *arg)
                 init_waitqueue_entry(&set_wait, current);
                 add_wait_queue(&set->set_waitq, &set_wait);
                 rc = l_wait_event(thread->t_ctl_waitq,
-                                  thread->t_flags & SVC_STOPPING || ptlrpc_check_set(set),
+                                  thread->t_flags & SVC_STOPPING ||
+                                  ptlrpc_check_set(set),
                                   &lwi);
                 remove_wait_queue(&set->set_waitq, &set_wait);
                 CDEBUG(D_HA, "ping complete (%lu)\n", jiffies);
@@ -217,7 +151,8 @@ static int ptlrpc_pinger_main(void *arg)
                 /* Expire all the requests that didn't come back. */
                 down(&pinger_sem);
                 list_for_each(iter, &set->set_requests) {
-                        req = list_entry(iter, struct ptlrpc_request, rq_set_chain);
+                        req = list_entry(iter, struct ptlrpc_request,
+                                         rq_set_chain);
 
                         if (req->rq_replied)
                                 continue;
@@ -239,8 +174,8 @@ static int ptlrpc_pinger_main(void *arg)
                        this_ping + (obd_timeout * HZ));
                 if (time_to_next_ping > 0) {
                         lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL);
-                        l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPING,
-                                     &lwi);
+                        l_wait_event(thread->t_ctl_waitq,
+                                     thread->t_flags & SVC_STOPPING, &lwi);
                         if (thread->t_flags & SVC_STOPPING) {
                                 thread->t_flags &= ~SVC_STOPPING;
                                 EXIT;
@@ -261,15 +196,17 @@ int ptlrpc_start_pinger(void)
         struct l_wait_info lwi = { 0 };
         struct ptlrpc_svc_data d;
         int rc;
+#ifndef ENABLE_PINGER
+        return 0;
+#endif
         ENTRY;
 
-        down(&pinger_sem);
         if (pinger_thread != NULL)
-                GOTO(out, rc = -EALREADY);
+                RETURN(-EALREADY);
 
         OBD_ALLOC(pinger_thread, sizeof(*pinger_thread));
         if (pinger_thread == NULL)
-                GOTO(out, rc = -ENOMEM);
+                RETURN(-ENOMEM);
         init_waitqueue_head(&pinger_thread->t_ctl_waitq);
 
         d.name = "ll_ping";
@@ -281,13 +218,11 @@ int ptlrpc_start_pinger(void)
         if (rc < 0) {
                 CERROR("cannot start thread: %d\n", rc);
                 OBD_FREE(pinger_thread, sizeof(*pinger_thread));
-                GOTO(out, rc);
+                RETURN(rc);
         }
         l_wait_event(pinger_thread->t_ctl_waitq,
                      pinger_thread->t_flags & SVC_RUNNING, &lwi);
 
- out:
-        up(&pinger_sem);
         RETURN(rc);
 }
 
@@ -295,21 +230,62 @@ int ptlrpc_stop_pinger(void)
 {
         struct l_wait_info lwi = { 0 };
         int rc = 0;
+#ifndef ENABLE_PINGER
+        return 0;
+#endif
         ENTRY;
 
-        down(&pinger_sem);
         if (pinger_thread == NULL)
-                GOTO(out, rc = -EALREADY);
-
+                RETURN(-EALREADY);
+        down(&pinger_sem);
         pinger_thread->t_flags = SVC_STOPPING;
         wake_up(&pinger_thread->t_ctl_waitq);
+        up(&pinger_sem);
+
         l_wait_event(pinger_thread->t_ctl_waitq,
                      (pinger_thread->t_flags & SVC_STOPPED), &lwi);
 
         OBD_FREE(pinger_thread, sizeof(*pinger_thread));
         pinger_thread = NULL;
+        RETURN(rc);
+}
 
- out:
+void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
+{
+        down(&pinger_sem);
+        imp->imp_next_ping = jiffies + (obd_timeout * HZ);
         up(&pinger_sem);
-        RETURN(rc);
+}
+
+int ptlrpc_pinger_add_import(struct obd_import *imp)
+{
+        ENTRY;
+        if (!list_empty(&imp->imp_pinger_chain))
+                RETURN(-EALREADY);
+
+        down(&pinger_sem);
+        CDEBUG(D_HA, "adding pingable import %s->%s\n",
+               imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
+        imp->imp_next_ping = jiffies + (obd_timeout * HZ);
+        /* XXX sort, blah blah */
+        list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
+        class_import_get(imp);
+        up(&pinger_sem);
+
+        RETURN(0);
+}
+
+int ptlrpc_pinger_del_import(struct obd_import *imp)
+{
+        ENTRY;
+        if (list_empty(&imp->imp_pinger_chain))
+                RETURN(-ENOENT);
+
+        down(&pinger_sem);
+        list_del_init(&imp->imp_pinger_chain);
+        CDEBUG(D_HA, "removing pingable import %s->%s\n",
+               imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
+        class_import_put(imp);
+        up(&pinger_sem);
+        RETURN(0);
 }