From: phil Date: Mon, 13 Oct 2003 22:31:01 +0000 (+0000) Subject: The pinger was deadlocking on itself, in some rare and improbably but X-Git-Tag: v1_7_0_51~2^7~405 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=2e034dbab0f8baf28c6ab3f5425c6fe8ac584ab1;p=fs%2Flustre-release.git The pinger was deadlocking on itself, in some rare and improbably but very real cases, so I reworked it to be simpler. It's started once from ptlrpc_init now, and stopped once from ptlrpc_exit Adding and deleting imports no longer causes the thread to be created and go away. Most importantly, we no longer hold the sem in stop_pinger across the wake_up and subsequent l_wait_event; if the pinger thread was already doing something, blocked on the pinger_sem, it would never wake up and deadlock would ensue. --- diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index d972076..f8661cf 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -35,74 +35,6 @@ static struct ptlrpc_thread *pinger_thread = NULL; static DECLARE_MUTEX(pinger_sem); static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports); -int ptlrpc_start_pinger(void); -int ptlrpc_stop_pinger(void); - -void ptlrpc_pinger_sending_on_import(struct obd_import *imp) -{ - down(&pinger_sem); - imp->imp_next_ping = jiffies + (obd_timeout * HZ); - up(&pinger_sem); -} - -int ptlrpc_pinger_add_import(struct obd_import *imp) -{ -#ifndef ENABLE_PINGER - return 0; -#else - int rc; - ENTRY; - - if (!list_empty(&imp->imp_pinger_chain)) - RETURN(-EALREADY); - - down(&pinger_sem); - if (list_empty(&pinger_imports)) { - up(&pinger_sem); - rc = ptlrpc_start_pinger(); - if (rc < 0) - RETURN(rc); - down(&pinger_sem); - } - - CDEBUG(D_HA, "adding pingable import %s->%s\n", - imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); - imp->imp_next_ping = jiffies + (obd_timeout * HZ); - list_add_tail(&imp->imp_pinger_chain, &pinger_imports); /* XXX sort, blah blah */ - class_import_get(imp); - up(&pinger_sem); - RETURN(0); -#endif -} - -int ptlrpc_pinger_del_import(struct obd_import *imp) -{ -#ifndef ENABLE_PINGER - return 0; -#else - int rc; - ENTRY; - - if (list_empty(&imp->imp_pinger_chain)) - RETURN(-ENOENT); - - down(&pinger_sem); - list_del_init(&imp->imp_pinger_chain); - CDEBUG(D_HA, "removing pingable import %s->%s\n", - imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); - class_import_put(imp); - if (list_empty(&pinger_imports)) { - up(&pinger_sem); - rc = ptlrpc_stop_pinger(); - if (rc) - RETURN(rc); - down(&pinger_sem); - } - up(&pinger_sem); - RETURN(0); -#endif -} - static int ptlrpc_pinger_main(void *arg) { struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg; @@ -173,9 +105,9 @@ static int ptlrpc_pinger_main(void *arg) req->rq_import_generation = generation; ptlrpc_set_add_req(set, req); } else { - CDEBUG(D_HA, "don't need to ping %s (%lu > %lu)\n", - imp->imp_target_uuid.uuid, imp->imp_next_ping, - this_ping); + CDEBUG(D_HA, "don't need to ping %s (%lu > " + "%lu)\n", imp->imp_target_uuid.uuid, + imp->imp_next_ping, this_ping); } } up(&pinger_sem); @@ -185,7 +117,8 @@ static int ptlrpc_pinger_main(void *arg) CDEBUG(D_HA, "nothing to ping\n"); list_for_each(iter, &set->set_requests) { struct ptlrpc_request *req = - list_entry(iter, struct ptlrpc_request, rq_set_chain); + list_entry(iter, struct ptlrpc_request, + rq_set_chain); DEBUG_REQ(D_HA, req, "pinging %s->%s", req->rq_import->imp_obd->obd_uuid.uuid, req->rq_import->imp_target_uuid.uuid); @@ -196,7 +129,8 @@ static int ptlrpc_pinger_main(void *arg) init_waitqueue_entry(&set_wait, current); add_wait_queue(&set->set_waitq, &set_wait); rc = l_wait_event(thread->t_ctl_waitq, - thread->t_flags & SVC_STOPPING || ptlrpc_check_set(set), + thread->t_flags & SVC_STOPPING || + ptlrpc_check_set(set), &lwi); remove_wait_queue(&set->set_waitq, &set_wait); CDEBUG(D_HA, "ping complete (%lu)\n", jiffies); @@ -217,7 +151,8 @@ static int ptlrpc_pinger_main(void *arg) /* Expire all the requests that didn't come back. */ down(&pinger_sem); list_for_each(iter, &set->set_requests) { - req = list_entry(iter, struct ptlrpc_request, rq_set_chain); + req = list_entry(iter, struct ptlrpc_request, + rq_set_chain); if (req->rq_replied) continue; @@ -239,8 +174,8 @@ static int ptlrpc_pinger_main(void *arg) this_ping + (obd_timeout * HZ)); if (time_to_next_ping > 0) { lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL); - l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPING, - &lwi); + l_wait_event(thread->t_ctl_waitq, + thread->t_flags & SVC_STOPPING, &lwi); if (thread->t_flags & SVC_STOPPING) { thread->t_flags &= ~SVC_STOPPING; EXIT; @@ -261,15 +196,17 @@ int ptlrpc_start_pinger(void) struct l_wait_info lwi = { 0 }; struct ptlrpc_svc_data d; int rc; +#ifndef ENABLE_PINGER + return 0; +#endif ENTRY; - down(&pinger_sem); if (pinger_thread != NULL) - GOTO(out, rc = -EALREADY); + RETURN(-EALREADY); OBD_ALLOC(pinger_thread, sizeof(*pinger_thread)); if (pinger_thread == NULL) - GOTO(out, rc = -ENOMEM); + RETURN(-ENOMEM); init_waitqueue_head(&pinger_thread->t_ctl_waitq); d.name = "ll_ping"; @@ -281,13 +218,11 @@ int ptlrpc_start_pinger(void) if (rc < 0) { CERROR("cannot start thread: %d\n", rc); OBD_FREE(pinger_thread, sizeof(*pinger_thread)); - GOTO(out, rc); + RETURN(rc); } l_wait_event(pinger_thread->t_ctl_waitq, pinger_thread->t_flags & SVC_RUNNING, &lwi); - out: - up(&pinger_sem); RETURN(rc); } @@ -295,21 +230,62 @@ int ptlrpc_stop_pinger(void) { struct l_wait_info lwi = { 0 }; int rc = 0; +#ifndef ENABLE_PINGER + return 0; +#endif ENTRY; - down(&pinger_sem); if (pinger_thread == NULL) - GOTO(out, rc = -EALREADY); - + RETURN(-EALREADY); + down(&pinger_sem); pinger_thread->t_flags = SVC_STOPPING; wake_up(&pinger_thread->t_ctl_waitq); + up(&pinger_sem); + l_wait_event(pinger_thread->t_ctl_waitq, (pinger_thread->t_flags & SVC_STOPPED), &lwi); OBD_FREE(pinger_thread, sizeof(*pinger_thread)); pinger_thread = NULL; + RETURN(rc); +} - out: +void ptlrpc_pinger_sending_on_import(struct obd_import *imp) +{ + down(&pinger_sem); + imp->imp_next_ping = jiffies + (obd_timeout * HZ); up(&pinger_sem); - RETURN(rc); +} + +int ptlrpc_pinger_add_import(struct obd_import *imp) +{ + ENTRY; + if (!list_empty(&imp->imp_pinger_chain)) + RETURN(-EALREADY); + + down(&pinger_sem); + CDEBUG(D_HA, "adding pingable import %s->%s\n", + imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); + imp->imp_next_ping = jiffies + (obd_timeout * HZ); + /* XXX sort, blah blah */ + list_add_tail(&imp->imp_pinger_chain, &pinger_imports); + class_import_get(imp); + up(&pinger_sem); + + RETURN(0); +} + +int ptlrpc_pinger_del_import(struct obd_import *imp) +{ + ENTRY; + if (list_empty(&imp->imp_pinger_chain)) + RETURN(-ENOENT); + + down(&pinger_sem); + list_del_init(&imp->imp_pinger_chain); + CDEBUG(D_HA, "removing pingable import %s->%s\n", + imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); + class_import_put(imp); + up(&pinger_sem); + RETURN(0); }