X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fptlrpc%2Fpinger.c;h=866e7eb649260a13a50ab42bb21242fda1f59c80;hb=8c9a3b75ac27ee0b04593ac5424a0b74d176ccd1;hp=c81fb516d89b4368918dded4abea4a088b5ea6ad;hpb=a2a0746305449dbd925879b14dc2c0d6040bb8bf;p=fs%2Flustre-release.git diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index c81fb51..866e7eb 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -23,84 +23,138 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#ifndef __KERNEL__ +#include +#else #include #include - #define DEBUG_SUBSYSTEM S_RPC +#endif + #include #include #include "ptlrpc_internal.h" -static struct ptlrpc_thread *pinger_thread = NULL; +#define PINGER_RATE 3 /* how many pings we'll do in obd_timeout period */ + static DECLARE_MUTEX(pinger_sem); static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports); -int ptlrpc_start_pinger(void); -int ptlrpc_stop_pinger(void); +int ptlrpc_ping(struct obd_import *imp) +{ + struct ptlrpc_request *req; + int rc = 0; + ENTRY; + + req = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, OBD_PING, 0, NULL, NULL); + if (req) { + DEBUG_REQ(D_HA, req, "pinging %s->%s", + imp->imp_obd->obd_uuid.uuid, + imp->imp_target_uuid.uuid); + req->rq_no_resend = req->rq_no_delay = 1; + req->rq_replen = lustre_msg_size(0, NULL); + req->rq_timeout = obd_timeout / PINGER_RATE; + ptlrpcd_add_req(req); + } else { + CERROR("OOM trying to ping %s->%s\n", + imp->imp_obd->obd_uuid.uuid, + imp->imp_target_uuid.uuid); + rc = -ENOMEM; + } -void ptlrpc_pinger_sending_on_import(struct obd_import *imp) + RETURN(rc); +} + +#ifdef __KERNEL__ +static inline int ptlrpc_next_ping(struct obd_import *imp) { - down(&pinger_sem); - imp->imp_next_ping = jiffies + (obd_timeout * HZ); - up(&pinger_sem); + return jiffies + (obd_timeout / PINGER_RATE * HZ); } -int ptlrpc_pinger_add_import(struct obd_import *imp) +static inline int ptlrpc_next_reconnect(struct obd_import *imp) { -#ifndef ENABLE_PINGER - return 0; -#else - int rc; - ENTRY; + if (imp->imp_server_timeout) + return jiffies + (obd_timeout / 2 * HZ); + else + return jiffies + (obd_timeout * HZ); +} - if (!list_empty(&imp->imp_pinger_chain)) - RETURN(-EALREADY); +static atomic_t suspend_timeouts = ATOMIC_INIT(0); +static wait_queue_head_t suspend_timeouts_waitq; - down(&pinger_sem); - if (list_empty(&pinger_imports)) { - up(&pinger_sem); - rc = ptlrpc_start_pinger(); - if (rc < 0) - RETURN(rc); - down(&pinger_sem); - } +void ptlrpc_deactivate_timeouts(void) +{ + CDEBUG(D_HA, "deactivate timeouts\n"); + atomic_inc(&suspend_timeouts); +} - CDEBUG(D_HA, "adding pingable import %s->%s\n", - imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); - imp->imp_next_ping = jiffies + (obd_timeout * HZ); - list_add_tail(&imp->imp_pinger_chain, &pinger_imports); /* XXX sort, blah blah */ - class_import_get(imp); - up(&pinger_sem); - RETURN(0); -#endif +void ptlrpc_activate_timeouts(void) +{ + CDEBUG(D_HA, "activate timeouts\n"); + LASSERT(atomic_read(&suspend_timeouts) > 0); + if (atomic_dec_and_test(&suspend_timeouts)) + wake_up(&suspend_timeouts_waitq); } -int ptlrpc_pinger_del_import(struct obd_import *imp) +int ptlrpc_check_suspend(void) { -#ifndef ENABLE_PINGER + if (atomic_read(&suspend_timeouts)) + return 1; return 0; -#else - int rc; - ENTRY; +} - if (list_empty(&imp->imp_pinger_chain)) - RETURN(-ENOENT); +int ptlrpc_check_and_wait_suspend(struct ptlrpc_request *req) +{ + struct l_wait_info lwi; + + if (atomic_read(&suspend_timeouts)) { + DEBUG_REQ(D_NET, req, "-- suspend %d regular timeout", + atomic_read(&suspend_timeouts)); + lwi = LWI_INTR(NULL, NULL); + l_wait_event(suspend_timeouts_waitq, + atomic_read(&suspend_timeouts) == 0, &lwi); + DEBUG_REQ(D_NET, req, "-- recharge regular timeout"); + return 1; + } + return 0; +} - down(&pinger_sem); - list_del_init(&imp->imp_pinger_chain); - CDEBUG(D_HA, "removing pingable import %s->%s\n", - imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); - class_import_put(imp); - if (list_empty(&pinger_imports)) { - up(&pinger_sem); - rc = ptlrpc_stop_pinger(); - if (rc) - RETURN(rc); - down(&pinger_sem); +static void ptlrpc_pinger_process_import(struct obd_import *imp, + unsigned long this_ping) +{ + unsigned long flags; + int force, level; + + spin_lock_irqsave(&imp->imp_lock, flags); + level = imp->imp_state; + force = imp->imp_force_verify; + if (force) + imp->imp_force_verify = 0; + spin_unlock_irqrestore(&imp->imp_lock, flags); + + if ((imp->imp_next_ping - this_ping > 0) && + (imp->imp_next_ping - this_ping > obd_timeout * HZ)) { + CWARN("wrong ping time %lu (current %lu)\n", + imp->imp_next_ping, this_ping); + imp->imp_next_ping = ptlrpc_next_reconnect(imp); + } + + if (imp->imp_next_ping > this_ping && force == 0) + return; + + if (level == LUSTRE_IMP_DISCON && !imp->imp_deactive) { + /* wait at least a timeout before trying recovery again */ + imp->imp_next_ping = ptlrpc_next_reconnect(imp); + ptlrpc_initiate_recovery(imp); + } else if (level != LUSTRE_IMP_FULL || imp->imp_obd->obd_no_recov) { + CDEBUG(D_HA, "not pinging %s (in recovery " + " or recovery disabled: %s)\n", + imp->imp_target_uuid.uuid, + ptlrpc_import_state_name(level)); + } else if (imp->imp_pingable || force) { + imp->imp_next_ping = ptlrpc_next_ping(imp); + ptlrpc_ping(imp); } - up(&pinger_sem); - RETURN(0); -#endif } static int ptlrpc_pinger_main(void *arg) @@ -118,7 +172,10 @@ static int ptlrpc_pinger_main(void *arg) RECALC_SIGPENDING; SIGNAL_MASK_UNLOCK(current, flags); - THREAD_NAME(current->comm, "%s", data->name); + LASSERTF(strlen(data->name) < sizeof(current->comm), + "name %d > len %d\n", + (int)strlen(data->name), (int)sizeof(current->comm)); + THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name); unlock_kernel(); /* Record that the thread is running */ @@ -129,121 +186,45 @@ static int ptlrpc_pinger_main(void *arg) while (1) { unsigned long this_ping = jiffies; long time_to_next_ping; - struct l_wait_info lwi = LWI_TIMEOUT(10 * HZ, NULL, NULL); - struct ptlrpc_request_set *set; - struct ptlrpc_request *req; + struct l_wait_info lwi = LWI_TIMEOUT(obd_timeout * HZ, + NULL, NULL); struct list_head *iter; - wait_queue_t set_wait; - int rc; - set = ptlrpc_prep_set(); + time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies; down(&pinger_sem); list_for_each(iter, &pinger_imports) { struct obd_import *imp = list_entry(iter, struct obd_import, imp_pinger_chain); - int generation, level; - unsigned long flags; - - if (imp->imp_next_ping <= this_ping) { - /* Add a ping. */ - spin_lock_irqsave(&imp->imp_lock, flags); - generation = imp->imp_generation; - level = imp->imp_level; - spin_unlock_irqrestore(&imp->imp_lock, flags); - - if (level != LUSTRE_CONN_FULL) { - CDEBUG(D_HA, - "not pinging %s (in recovery)\n", - imp->imp_target_uuid.uuid); - continue; - } - - req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, - NULL); - if (!req) { - CERROR("OOM trying to ping\n"); - break; - } - req->rq_no_resend = 1; - req->rq_replen = lustre_msg_size(0, NULL); - req->rq_level = LUSTRE_CONN_FULL; - req->rq_phase = RQ_PHASE_RPC; - req->rq_import_generation = generation; - ptlrpc_set_add_req(set, req); - } else { - CDEBUG(D_HA, "don't need to ping %s (%lu > %lu)\n", - imp->imp_target_uuid.uuid, imp->imp_next_ping, - this_ping); - } - } - up(&pinger_sem); - /* Might be empty, that's OK. */ - if (set->set_remaining == 0) - CDEBUG(D_HA, "nothing to ping\n"); - list_for_each(iter, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(iter, struct ptlrpc_request, rq_set_chain); - DEBUG_REQ(D_HA, req, "pinging %s->%s", - req->rq_import->imp_obd->obd_uuid.uuid, - req->rq_import->imp_target_uuid.uuid); - (void)ptl_send_rpc(req); - } + ptlrpc_pinger_process_import(imp, this_ping); - /* Have to wait on both the thread's queue and the set's. */ - init_waitqueue_entry(&set_wait, current); - add_wait_queue(&set->set_waitq, &set_wait); - rc = l_wait_event(thread->t_ctl_waitq, - thread->t_flags & SVC_STOPPING || ptlrpc_check_set(set), - &lwi); - remove_wait_queue(&set->set_waitq, &set_wait); - CDEBUG(D_HA, "ping complete (%lu)\n", jiffies); - - if (thread->t_flags & SVC_STOPPING) { - thread->t_flags &= ~SVC_STOPPING; - list_for_each(iter, &set->set_requests) { - req = list_entry(iter, struct ptlrpc_request, - rq_set_chain); - if (!req->rq_replied) - ptlrpc_unregister_reply(req); - } - ptlrpc_set_destroy(set); - EXIT; - break; - } - - /* Expire all the requests that didn't come back. */ - down(&pinger_sem); - list_for_each(iter, &set->set_requests) { - req = list_entry(iter, struct ptlrpc_request, rq_set_chain); + CDEBUG(D_OTHER, "%s: pingable %d, next_ping %lu(%lu)\n", + imp->imp_target_uuid.uuid, + imp->imp_pingable, imp->imp_next_ping, jiffies); - if (req->rq_replied) - continue; - - req->rq_phase = RQ_PHASE_COMPLETE; - set->set_remaining--; - /* If it was disconnected, don't sweat it. */ - if (list_empty(&req->rq_import->imp_pinger_chain)) - continue; - - ptlrpc_expire_one_request(req); + if (imp->imp_pingable && imp->imp_next_ping && + imp->imp_next_ping - jiffies < time_to_next_ping && + imp->imp_next_ping > jiffies) + time_to_next_ping = imp->imp_next_ping - jiffies; } up(&pinger_sem); - ptlrpc_set_destroy(set); /* Wait until the next ping time, or until we're stopped. */ - time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies; CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping, this_ping + (obd_timeout * HZ)); if (time_to_next_ping > 0) { lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL); - l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPING, + l_wait_event(thread->t_ctl_waitq, + thread->t_flags & (SVC_STOPPING|SVC_EVENT), &lwi); if (thread->t_flags & SVC_STOPPING) { thread->t_flags &= ~SVC_STOPPING; EXIT; break; + } else if (thread->t_flags & SVC_EVENT) { + /* woken after adding import to reset timer */ + thread->t_flags &= ~SVC_EVENT; } } } @@ -255,21 +236,28 @@ static int ptlrpc_pinger_main(void *arg) return 0; } +static struct ptlrpc_thread *pinger_thread = NULL; + int ptlrpc_start_pinger(void) { struct l_wait_info lwi = { 0 }; struct ptlrpc_svc_data d; int rc; +#ifndef ENABLE_PINGER + return 0; +#endif ENTRY; - down(&pinger_sem); + LASSERT(obd_timeout > PINGER_RATE); + if (pinger_thread != NULL) - GOTO(out, rc = -EALREADY); + RETURN(-EALREADY); OBD_ALLOC(pinger_thread, sizeof(*pinger_thread)); if (pinger_thread == NULL) - GOTO(out, rc = -ENOMEM); + RETURN(-ENOMEM); init_waitqueue_head(&pinger_thread->t_ctl_waitq); + init_waitqueue_head(&suspend_timeouts_waitq); d.name = "ll_ping"; d.thread = pinger_thread; @@ -280,13 +268,11 @@ int ptlrpc_start_pinger(void) if (rc < 0) { CERROR("cannot start thread: %d\n", rc); OBD_FREE(pinger_thread, sizeof(*pinger_thread)); - GOTO(out, rc); + RETURN(rc); } l_wait_event(pinger_thread->t_ctl_waitq, pinger_thread->t_flags & SVC_RUNNING, &lwi); - out: - up(&pinger_sem); RETURN(rc); } @@ -294,20 +280,240 @@ int ptlrpc_stop_pinger(void) { struct l_wait_info lwi = { 0 }; int rc = 0; +#ifndef ENABLE_PINGER + return 0; +#endif ENTRY; - down(&pinger_sem); if (pinger_thread == NULL) - GOTO(out, rc = -EALREADY); - + RETURN(-EALREADY); + down(&pinger_sem); pinger_thread->t_flags = SVC_STOPPING; wake_up(&pinger_thread->t_ctl_waitq); + up(&pinger_sem); + l_wait_event(pinger_thread->t_ctl_waitq, (pinger_thread->t_flags & SVC_STOPPED), &lwi); OBD_FREE(pinger_thread, sizeof(*pinger_thread)); + pinger_thread = NULL; + RETURN(rc); +} - out: +void ptlrpc_pinger_sending_on_import(struct obd_import *imp) +{ + down(&pinger_sem); + imp->imp_next_ping = ptlrpc_next_ping(imp); up(&pinger_sem); - RETURN(rc); } + +int ptlrpc_pinger_add_import(struct obd_import *imp) +{ + ENTRY; + if (!list_empty(&imp->imp_pinger_chain)) + RETURN(-EALREADY); + + down(&pinger_sem); + CDEBUG(D_HA, "adding pingable import %s->%s\n", + imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); + imp->imp_next_ping = jiffies + (obd_timeout * HZ); + /* XXX sort, blah blah */ + list_add_tail(&imp->imp_pinger_chain, &pinger_imports); + class_import_get(imp); + + ptlrpc_pinger_wake_up(); + up(&pinger_sem); + + RETURN(0); +} + +int ptlrpc_pinger_del_import(struct obd_import *imp) +{ + ENTRY; + if (list_empty(&imp->imp_pinger_chain)) + RETURN(-ENOENT); + + down(&pinger_sem); + list_del_init(&imp->imp_pinger_chain); + CDEBUG(D_HA, "removing pingable import %s->%s\n", + imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); + class_import_put(imp); + up(&pinger_sem); + RETURN(0); +} + +void ptlrpc_pinger_wake_up() +{ +#ifdef ENABLE_PINGER + pinger_thread->t_flags |= SVC_EVENT; + wake_up(&pinger_thread->t_ctl_waitq); +#endif +} + +#else /* !__KERNEL__ */ + +/* XXX + * the current implementation of pinger in liblustre is not optimized + */ + +static struct pinger_data { + int pd_recursion; + unsigned long pd_this_ping; + unsigned long pd_next_ping; + int pd_force_check; +} pinger_args; + +static int pinger_check_rpcs(void *arg) +{ + unsigned long curtime = time(NULL); + struct list_head *iter; + struct pinger_data *pd = &pinger_args; + + /* prevent recursion */ + if (pd->pd_recursion++) { + CDEBUG(D_HA, "pinger: recursion! quit\n"); + pd->pd_recursion--; + return 0; + } + + /* have we reached ping point? */ + if (pd->pd_next_ping > curtime && !pd->pd_force_check) { + pd->pd_recursion--; + return 0; + } + + if (pd->pd_force_check) + pd->pd_force_check = 0; + + pd->pd_this_ping = curtime; + + /* add rpcs into set */ + down(&pinger_sem); + list_for_each(iter, &pinger_imports) { + struct obd_import *imp = + list_entry(iter, struct obd_import, + imp_pinger_chain); + int level, force; + unsigned long flags; + + + spin_lock_irqsave(&imp->imp_lock, flags); + level = imp->imp_state; + force = imp->imp_force_verify; + if (force) + imp->imp_force_verify = 0; + spin_unlock_irqrestore(&imp->imp_lock, flags); + + if (imp->imp_next_ping <= pd->pd_this_ping || force) { + if (level == LUSTRE_IMP_DISCON) { + /* wait at least a timeout before + trying recovery again. */ + unsigned long timeout = obd_timeout; + if (imp->imp_server_timeout) + timeout = obd_timeout / 2; + imp->imp_next_ping = time(NULL) + + (timeout * HZ); + ptlrpc_initiate_recovery(imp); + } + else if (level != LUSTRE_IMP_FULL || + imp->imp_obd->obd_no_recov) { + CDEBUG(D_HA, + "not pinging %s (in recovery " + " or recovery disabled: %s)\n", + imp->imp_target_uuid.uuid, + ptlrpc_import_state_name(level)); + } + else if (imp->imp_pingable || force) { + ptlrpc_ping(imp); + } + + } else { + if (imp->imp_pingable) { + CDEBUG(D_HA, "don't need to ping %s " + "(%lu > %lu)\n", + imp->imp_target_uuid.uuid, + imp->imp_next_ping, pd->pd_this_ping); + } + } + } + + up(&pinger_sem); + + pd->pd_next_ping = pd->pd_this_ping + (obd_timeout * HZ); + + CDEBUG(D_HA, "finished a round ping\n"); + pd->pd_recursion--; + return 0; +} + +static void *pinger_callback = NULL; + +int ptlrpc_start_pinger(void) +{ + memset(&pinger_args, 0, sizeof(pinger_args)); +#ifdef ENABLE_PINGER + pinger_callback = + liblustre_register_wait_callback(&pinger_check_rpcs, &pinger_args); +#endif + return 0; +} + +int ptlrpc_stop_pinger(void) +{ +#ifdef ENABLE_PINGER + if (pinger_callback) + liblustre_deregister_wait_callback(pinger_callback); +#endif + return 0; +} + +void ptlrpc_pinger_sending_on_import(struct obd_import *imp) +{ + down(&pinger_sem); + imp->imp_next_ping = time(NULL) + obd_timeout; + if (pinger_args.pd_next_ping > imp->imp_next_ping) { + CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n", + imp->imp_next_ping, time(NULL)); + pinger_args.pd_next_ping = imp->imp_next_ping; + } + up(&pinger_sem); +} + +int ptlrpc_pinger_add_import(struct obd_import *imp) +{ + ENTRY; + if (!list_empty(&imp->imp_pinger_chain)) + RETURN(-EALREADY); + + CDEBUG(D_HA, "adding pingable import %s->%s\n", + imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); + ptlrpc_pinger_sending_on_import(imp); + + down(&pinger_sem); + list_add_tail(&imp->imp_pinger_chain, &pinger_imports); + class_import_get(imp); + up(&pinger_sem); + + RETURN(0); +} + +int ptlrpc_pinger_del_import(struct obd_import *imp) +{ + ENTRY; + if (list_empty(&imp->imp_pinger_chain)) + RETURN(-ENOENT); + + down(&pinger_sem); + list_del_init(&imp->imp_pinger_chain); + CDEBUG(D_HA, "removing pingable import %s->%s\n", + imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); + class_import_put(imp); + up(&pinger_sem); + RETURN(0); +} + +void ptlrpc_pinger_wake_up() +{ + pinger_args.pd_force_check = 1; +} +#endif /* !__KERNEL__ */