X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fptlrpc%2Fpinger.c;h=01d7d236cb443ab8fbb6471dafdfdcb1388b34ad;hb=7c50b2ddca2883c732b8d71e6f20671572bec907;hp=ebc69e1293d76bf3b63bd9d0a082c110338af263;hpb=576c9a8212bc6607146d99e3413f7a24cbf91b5c;p=fs%2Flustre-release.git diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index ebc69e1..01d7d23 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -23,86 +23,50 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#ifndef __KERNEL__ +#include +#else #include #include - #define DEBUG_SUBSYSTEM S_RPC +#endif + #include #include #include "ptlrpc_internal.h" -static struct ptlrpc_thread *pinger_thread = NULL; static DECLARE_MUTEX(pinger_sem); static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports); -int ptlrpc_start_pinger(void); -int ptlrpc_stop_pinger(void); - -void ptlrpc_pinger_sending_on_import(struct obd_import *imp) -{ - down(&pinger_sem); - imp->imp_next_ping = jiffies + (obd_timeout * HZ); - up(&pinger_sem); -} +static struct ptlrpc_thread *pinger_thread = NULL; -int ptlrpc_pinger_add_import(struct obd_import *imp) +int ptlrpc_ping(struct obd_import *imp) { - int rc; + struct ptlrpc_request *req; + int rc = 0; ENTRY; -#ifndef ENABLE_PINGER - RETURN(0); -#else - if (!list_empty(&imp->imp_pinger_chain)) - RETURN(-EALREADY); - - down(&pinger_sem); - if (list_empty(&pinger_imports)) { - up(&pinger_sem); - rc = ptlrpc_start_pinger(); - if (rc < 0) - RETURN(rc); - down(&pinger_sem); + req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, + NULL); + if (req) { + DEBUG_REQ(D_HA, req, "pinging %s->%s", + imp->imp_obd->obd_uuid.uuid, + imp->imp_target_uuid.uuid); + req->rq_no_resend = req->rq_no_delay = 1; + req->rq_replen = lustre_msg_size(0, + NULL); + ptlrpcd_add_req(req); + } else { + CERROR("OOM trying to ping %s->%s\n", + imp->imp_obd->obd_uuid.uuid, + imp->imp_target_uuid.uuid); + rc = -ENOMEM; } - - CDEBUG(D_HA, "adding pingable import %s->%s\n", - imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); - imp->imp_next_ping = jiffies + (obd_timeout * HZ); - list_add_tail(&imp->imp_pinger_chain, &pinger_imports); /* XXX sort, blah blah */ - class_import_get(imp); - up(&pinger_sem); - RETURN(0); -#endif -} - -int ptlrpc_pinger_del_import(struct obd_import *imp) -{ - int rc; - ENTRY; - -#ifndef ENABLE_PINGER - RETURN(0); -#else - if (list_empty(&imp->imp_pinger_chain)) - RETURN(-ENOENT); - down(&pinger_sem); - list_del_init(&imp->imp_pinger_chain); - CDEBUG(D_HA, "removing pingable import %s->%s\n", - imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); - class_import_put(imp); - if (list_empty(&pinger_imports)) { - up(&pinger_sem); - rc = ptlrpc_stop_pinger(); - if (rc) - RETURN(rc); - down(&pinger_sem); - } - up(&pinger_sem); - RETURN(0); -#endif + RETURN(rc); } +#ifdef __KERNEL__ static int ptlrpc_pinger_main(void *arg) { struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg; @@ -118,14 +82,7 @@ static int ptlrpc_pinger_main(void *arg) RECALC_SIGPENDING; SIGNAL_MASK_UNLOCK(current, flags); -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) - sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid); -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - sprintf(current->comm, "%s|%d", data->name, - current->thread.mode.tt.extern_pid); -#else - strcpy(current->comm, data->name); -#endif + THREAD_NAME(current->comm, "%s", data->name); unlock_kernel(); /* Record that the thread is running */ @@ -136,104 +93,55 @@ static int ptlrpc_pinger_main(void *arg) while (1) { unsigned long this_ping = jiffies; long time_to_next_ping; - struct l_wait_info lwi = LWI_TIMEOUT(10 * HZ, NULL, NULL); - struct ptlrpc_request_set *set; - struct ptlrpc_request *req; + struct l_wait_info lwi = LWI_TIMEOUT(obd_timeout * HZ, + NULL, NULL); struct list_head *iter; - wait_queue_t set_wait; - int rc; - set = ptlrpc_prep_set(); down(&pinger_sem); list_for_each(iter, &pinger_imports) { struct obd_import *imp = - list_entry(iter, struct obd_import, imp_pinger_chain); - int generation, level; + list_entry(iter, struct obd_import, + imp_pinger_chain); + int force, level; unsigned long flags; - if (imp->imp_next_ping <= this_ping) { - /* Add a ping. */ - spin_lock_irqsave(&imp->imp_lock, flags); - generation = imp->imp_generation; - level = imp->imp_level; - spin_unlock_irqrestore(&imp->imp_lock, flags); - - if (level != LUSTRE_CONN_FULL) { - CDEBUG(D_HA, "not pinging %s (in recovery)\n", - imp->imp_target_uuid.uuid); - continue; - } - req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL); - if (!req) { - CERROR("OOM trying to ping\n"); - break; + spin_lock_irqsave(&imp->imp_lock, flags); + level = imp->imp_state; + force = imp->imp_force_verify; + if (force) + imp->imp_force_verify = 0; + spin_unlock_irqrestore(&imp->imp_lock, flags); + + if (imp->imp_next_ping <= this_ping || force) { + if (level == LUSTRE_IMP_DISCON) { + /* wait at least a timeout before + trying recovery again. */ + imp->imp_next_ping = jiffies + + (obd_timeout * HZ); + ptlrpc_initiate_recovery(imp); + } + else if (level != LUSTRE_IMP_FULL || + imp->imp_obd->obd_no_recov) { + CDEBUG(D_HA, + "not pinging %s (in recovery " + " or recovery disabled: %s)\n", + imp->imp_target_uuid.uuid, + ptlrpc_import_state_name(level)); + } + else if (imp->imp_pingable || force) { + ptlrpc_ping(imp); } - req->rq_replen = lustre_msg_size(0, NULL); - req->rq_level = LUSTRE_CONN_FULL; - req->rq_phase = RQ_PHASE_RPC; - req->rq_import_generation = generation; - ptlrpc_set_add_req(set, req); - } else { - CDEBUG(D_HA, "don't need to ping %s (%lu > %lu)\n", - imp->imp_target_uuid.uuid, imp->imp_next_ping, - this_ping); - } - } - up(&pinger_sem); - /* Might be empty, that's OK. */ - if (set->set_remaining == 0) - CDEBUG(D_HA, "nothing to ping\n"); - list_for_each(iter, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(iter, struct ptlrpc_request, rq_set_chain); - DEBUG_REQ(D_HA, req, "pinging %s->%s", - req->rq_import->imp_obd->obd_uuid.uuid, - req->rq_import->imp_target_uuid.uuid); - (void)ptl_send_rpc(req); - } - - /* Have to wait on both the thread's queue and the set's. */ - init_waitqueue_entry(&set_wait, current); - add_wait_queue(&set->set_waitq, &set_wait); - rc = l_wait_event(thread->t_ctl_waitq, - thread->t_flags & SVC_STOPPING || ptlrpc_check_set(set), - &lwi); - remove_wait_queue(&set->set_waitq, &set_wait); - CDEBUG(D_HA, "ping complete (%lu)\n", jiffies); - - if (thread->t_flags & SVC_STOPPING) { - thread->t_flags &= ~SVC_STOPPING; - list_for_each(iter, &set->set_requests) { - req = list_entry(iter, struct ptlrpc_request, - rq_set_chain); - if (!req->rq_replied) - ptlrpc_unregister_reply(req); + } else { + if (imp->imp_pingable) + CDEBUG(D_HA, "don't need to ping %s " + "(%lu > %lu)\n", + imp->imp_target_uuid.uuid, + imp->imp_next_ping, this_ping); } - ptlrpc_set_destroy(set); - EXIT; - break; - } - - /* Expire all the requests that didn't come back. */ - down(&pinger_sem); - list_for_each(iter, &set->set_requests) { - req = list_entry(iter, struct ptlrpc_request, rq_set_chain); - - if (req->rq_replied) - continue; - - req->rq_phase = RQ_PHASE_COMPLETE; - set->set_remaining--; - /* If it was disconnected, don't sweat it. */ - if (list_empty(&req->rq_import->imp_pinger_chain)) - continue; - - ptlrpc_expire_one_request(req); } up(&pinger_sem); - ptlrpc_set_destroy(set); /* Wait until the next ping time, or until we're stopped. */ time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies; @@ -241,12 +149,16 @@ static int ptlrpc_pinger_main(void *arg) this_ping + (obd_timeout * HZ)); if (time_to_next_ping > 0) { lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL); - l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPING, + l_wait_event(thread->t_ctl_waitq, + thread->t_flags & (SVC_STOPPING|SVC_EVENT), &lwi); if (thread->t_flags & SVC_STOPPING) { thread->t_flags &= ~SVC_STOPPING; EXIT; break; + } else if (thread->t_flags & SVC_EVENT) { + /* woken after adding import to reset timer */ + thread->t_flags &= ~SVC_EVENT; } } } @@ -263,15 +175,17 @@ int ptlrpc_start_pinger(void) struct l_wait_info lwi = { 0 }; struct ptlrpc_svc_data d; int rc; +#ifndef ENABLE_PINGER + return 0; +#endif ENTRY; - down(&pinger_sem); if (pinger_thread != NULL) - GOTO(out, rc = -EALREADY); + RETURN(-EALREADY); OBD_ALLOC(pinger_thread, sizeof(*pinger_thread)); if (pinger_thread == NULL) - GOTO(out, rc = -ENOMEM); + RETURN(-ENOMEM); init_waitqueue_head(&pinger_thread->t_ctl_waitq); d.name = "ll_ping"; @@ -283,13 +197,11 @@ int ptlrpc_start_pinger(void) if (rc < 0) { CERROR("cannot start thread: %d\n", rc); OBD_FREE(pinger_thread, sizeof(*pinger_thread)); - GOTO(out, rc); + RETURN(rc); } l_wait_event(pinger_thread->t_ctl_waitq, pinger_thread->t_flags & SVC_RUNNING, &lwi); - out: - up(&pinger_sem); RETURN(rc); } @@ -297,20 +209,237 @@ int ptlrpc_stop_pinger(void) { struct l_wait_info lwi = { 0 }; int rc = 0; +#ifndef ENABLE_PINGER + return 0; +#endif ENTRY; - down(&pinger_sem); if (pinger_thread == NULL) - GOTO(out, rc = -EALREADY); - + RETURN(-EALREADY); + down(&pinger_sem); pinger_thread->t_flags = SVC_STOPPING; wake_up(&pinger_thread->t_ctl_waitq); + up(&pinger_sem); + l_wait_event(pinger_thread->t_ctl_waitq, (pinger_thread->t_flags & SVC_STOPPED), &lwi); OBD_FREE(pinger_thread, sizeof(*pinger_thread)); + pinger_thread = NULL; + RETURN(rc); +} - out: +void ptlrpc_pinger_sending_on_import(struct obd_import *imp) +{ + down(&pinger_sem); + imp->imp_next_ping = jiffies + (obd_timeout * HZ); up(&pinger_sem); - RETURN(rc); } + +int ptlrpc_pinger_add_import(struct obd_import *imp) +{ + ENTRY; + if (!list_empty(&imp->imp_pinger_chain)) + RETURN(-EALREADY); + + down(&pinger_sem); + CDEBUG(D_HA, "adding pingable import %s->%s\n", + imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); + imp->imp_next_ping = jiffies + (obd_timeout * HZ); + /* XXX sort, blah blah */ + list_add_tail(&imp->imp_pinger_chain, &pinger_imports); + class_import_get(imp); + + ptlrpc_pinger_wake_up(); + up(&pinger_sem); + + RETURN(0); +} + +int ptlrpc_pinger_del_import(struct obd_import *imp) +{ + ENTRY; + if (list_empty(&imp->imp_pinger_chain)) + RETURN(-ENOENT); + + down(&pinger_sem); + list_del_init(&imp->imp_pinger_chain); + CDEBUG(D_HA, "removing pingable import %s->%s\n", + imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); + class_import_put(imp); + up(&pinger_sem); + RETURN(0); +} + +void ptlrpc_pinger_wake_up() +{ +#ifdef ENABLE_PINGER + pinger_thread->t_flags |= SVC_EVENT; + wake_up(&pinger_thread->t_ctl_waitq); +#endif +} + +#else /* !__KERNEL__ */ + +/* XXX + * the current implementation of pinger in liblustre is not optimized + */ + +static struct pinger_data { + int pd_recursion; + unsigned long pd_this_ping; + unsigned long pd_next_ping; + int pd_force_check; +} pinger_args; + +static int pinger_check_rpcs(void *arg) +{ + unsigned long curtime = time(NULL); + struct list_head *iter; + struct pinger_data *pd = &pinger_args; + + /* prevent recursion */ + if (pd->pd_recursion++) { + CDEBUG(D_HA, "pinger: recursion! quit\n"); + pd->pd_recursion--; + return 0; + } + + /* have we reached ping point? */ + if (pd->pd_next_ping > curtime && !pd->pd_force_check) { + pd->pd_recursion--; + return 0; + } + + if (pd->pd_force_check) + pd->pd_force_check = 0; + + pd->pd_this_ping = curtime; + + /* add rpcs into set */ + down(&pinger_sem); + list_for_each(iter, &pinger_imports) { + struct obd_import *imp = + list_entry(iter, struct obd_import, + imp_pinger_chain); + int level, force; + unsigned long flags; + + + spin_lock_irqsave(&imp->imp_lock, flags); + level = imp->imp_state; + force = imp->imp_force_verify; + if (force) + imp->imp_force_verify = 0; + spin_unlock_irqrestore(&imp->imp_lock, flags); + + if (imp->imp_next_ping <= pd->pd_this_ping || force) { + if (level == LUSTRE_IMP_DISCON) { + /* wait at least a timeout before + trying recovery again. */ + imp->imp_next_ping = time(NULL) + + (obd_timeout * HZ); + ptlrpc_initiate_recovery(imp); + } + else if (level != LUSTRE_IMP_FULL || + imp->imp_obd->obd_no_recov) { + CDEBUG(D_HA, + "not pinging %s (in recovery " + " or recovery disabled: %s)\n", + imp->imp_target_uuid.uuid, + ptlrpc_import_state_name(level)); + } + else if (imp->imp_pingable || force) { + ptlrpc_ping(imp); + } + + } else { + if (imp->imp_pingable) { + CDEBUG(D_HA, "don't need to ping %s " + "(%lu > %lu)\n", + imp->imp_target_uuid.uuid, + imp->imp_next_ping, pd->pd_this_ping); + } + } + } + + up(&pinger_sem); + + pd->pd_next_ping = pd->pd_this_ping + (obd_timeout * HZ); + + CDEBUG(D_HA, "finished a round ping\n"); + pd->pd_recursion--; + return 0; +} + +static void *pinger_callback = NULL; + +int ptlrpc_start_pinger(void) +{ + memset(&pinger_args, 0, sizeof(pinger_args)); +#ifdef ENABLE_PINGER + pinger_callback = + liblustre_register_wait_callback(&pinger_check_rpcs, &pinger_args); +#endif + return 0; +} + +int ptlrpc_stop_pinger(void) +{ +#ifdef ENABLE_PINGER + if (pinger_callback) + liblustre_deregister_wait_callback(pinger_callback); +#endif + return 0; +} + +void ptlrpc_pinger_sending_on_import(struct obd_import *imp) +{ + down(&pinger_sem); + imp->imp_next_ping = time(NULL) + obd_timeout; + if (pinger_args.pd_next_ping > imp->imp_next_ping) { + CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n", + imp->imp_next_ping, time(NULL)); + pinger_args.pd_next_ping = imp->imp_next_ping; + } + up(&pinger_sem); +} + +int ptlrpc_pinger_add_import(struct obd_import *imp) +{ + ENTRY; + if (!list_empty(&imp->imp_pinger_chain)) + RETURN(-EALREADY); + + CDEBUG(D_HA, "adding pingable import %s->%s\n", + imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); + ptlrpc_pinger_sending_on_import(imp); + + down(&pinger_sem); + list_add_tail(&imp->imp_pinger_chain, &pinger_imports); + class_import_get(imp); + up(&pinger_sem); + + RETURN(0); +} + +int ptlrpc_pinger_del_import(struct obd_import *imp) +{ + ENTRY; + if (list_empty(&imp->imp_pinger_chain)) + RETURN(-ENOENT); + + down(&pinger_sem); + list_del_init(&imp->imp_pinger_chain); + CDEBUG(D_HA, "removing pingable import %s->%s\n", + imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); + class_import_put(imp); + up(&pinger_sem); + RETURN(0); +} + +void ptlrpc_pinger_wake_up() +{ + pinger_args.pd_force_check = 1; +} +#endif /* !__KERNEL__ */