Whamcloud - gitweb
fix formatting
[fs/lustre-release.git] / lustre / ptlrpc / pinger.c
index 88f9694..ee65bd0 100644 (file)
@@ -47,7 +47,7 @@ int ptlrpc_ping(struct obd_import *imp)
         req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
                               NULL);
         if (req) {
-                DEBUG_REQ(D_HA, req, "pinging %s->%s",
+                DEBUG_REQ(D_INFO, req, "pinging %s->%s",
                           imp->imp_obd->obd_uuid.uuid,
                           imp->imp_target_uuid.uuid);
                 req->rq_no_resend = req->rq_no_delay = 1;
@@ -64,6 +64,17 @@ int ptlrpc_ping(struct obd_import *imp)
         RETURN(rc);
 }
 
+static inline void ptlrpc_update_next_ping(struct obd_import *imp)
+{
+        imp->imp_next_ping = jiffies + HZ *
+                (imp->imp_state == LUSTRE_IMP_DISCON ? 10 : PING_INTERVAL);
+}
+
+void ptlrpc_ping_import_soon(struct obd_import *imp)
+{
+        imp->imp_next_ping = jiffies;
+}
+
 #ifdef __KERNEL__
 static int ptlrpc_pinger_main(void *arg)
 {
@@ -94,7 +105,7 @@ static int ptlrpc_pinger_main(void *arg)
         while (1) {
                 unsigned long this_ping = jiffies;
                 long time_to_next_ping;
-                struct l_wait_info lwi = LWI_TIMEOUT(obd_timeout * HZ,
+                struct l_wait_info lwi = LWI_TIMEOUT(PING_INTERVAL * HZ,
                                                      NULL, NULL);
                 struct list_head *iter;
 
@@ -110,47 +121,66 @@ static int ptlrpc_pinger_main(void *arg)
                         spin_lock_irqsave(&imp->imp_lock, flags);
                         level = imp->imp_state;
                         force = imp->imp_force_verify;
-                        if (force)
-                                imp->imp_force_verify = 0;
+                        imp->imp_force_verify = 0;
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
-
-                        if (imp->imp_next_ping <= this_ping || force) {
+                        CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA,
+                               "level %s/%u force %u deactive %u pingable %u\n",
+                               ptlrpc_import_state_name(level), level,
+                               force, imp->imp_deactive, imp->imp_pingable);
+
+                        if (force ||
+                            /* if the next ping is within, say, 5 jiffies from
+                               now, go ahead and ping. See note below. */
+                            time_after_eq(this_ping, imp->imp_next_ping - 5)) {
                                 if (level == LUSTRE_IMP_DISCON &&
                                     !imp->imp_deactive) {
                                         /* wait at least a timeout before
                                            trying recovery again. */
                                         imp->imp_next_ping = jiffies +
-                                                (obd_timeout * HZ);
+                                                obd_timeout * HZ;
                                         ptlrpc_initiate_recovery(imp);
-                                }
-                                else if (level != LUSTRE_IMP_FULL ||
+                                } else if (level != LUSTRE_IMP_FULL ||
                                          imp->imp_obd->obd_no_recov) {
-                                        CDEBUG(D_HA,
-                                               "not pinging %s (in recovery "
-                                               "or recovery disabled: %s)\n",
+                                        CDEBUG(D_HA, "not pinging %s "
+                                               "(in recovery: %s or recovery "
+                                               "disabled: %u/%u)\n",
                                                imp->imp_target_uuid.uuid,
-                                               ptlrpc_import_state_name(level));
-                                }
-                                else if (imp->imp_pingable || force) {
+                                               ptlrpc_import_state_name(level),
+                                               imp->imp_deactive,
+                                               imp->imp_obd->obd_no_recov);
+                                } else if (imp->imp_pingable || force) {
                                         ptlrpc_ping(imp);
                                 }
 
                         } else {
-                                if (imp->imp_pingable)
-                                        CDEBUG(D_HA, "don't need to ping %s "
-                                               "(%lu > %lu)\n",
-                                               imp->imp_target_uuid.uuid,
-                                               imp->imp_next_ping, this_ping);
+                                if (!imp->imp_pingable)
+                                        continue;
+                                CDEBUG(D_INFO,
+                                       "don't need to ping %s (%lu > %lu)\n",
+                                       imp->imp_target_uuid.uuid,
+                                       imp->imp_next_ping, this_ping);
                         }
+
+                        /* obd_timeout might have changed */
+                        if (time_after(imp->imp_next_ping,
+                                       this_ping + PING_INTERVAL * HZ))
+                                ptlrpc_update_next_ping(imp);
                 }
                 up(&pinger_sem);
 
                 /* Wait until the next ping time, or until we're stopped. */
-                time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies;
-                CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping,
-                       this_ping + (obd_timeout * HZ));
+                time_to_next_ping = this_ping + (PING_INTERVAL * HZ) - jiffies;
+                /* The ping sent by ptlrpc_send_rpc may get sent out
+                   say .01 second after this.
+                   ptlrpc_pinger_sending_on_import will then set the
+                   next ping time to next_ping + .01 sec, which means
+                   we will SKIP the next ping at next_ping, and the
+                   ping will get sent 2 timeouts from now!  Beware. */
+                CDEBUG(D_INFO, "next ping in %lu (%lu)\n", time_to_next_ping,
+                       this_ping + PING_INTERVAL * HZ);
                 if (time_to_next_ping > 0) {
-                        lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL);
+                        lwi = LWI_TIMEOUT(max_t(long, time_to_next_ping, HZ),
+                                          NULL, NULL);
                         l_wait_event(thread->t_ctl_waitq,
                                      thread->t_flags & (SVC_STOPPING|SVC_EVENT),
                                      &lwi);
@@ -235,9 +265,7 @@ int ptlrpc_stop_pinger(void)
 
 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 {
-        down(&pinger_sem);
-        imp->imp_next_ping = jiffies + (obd_timeout * HZ);
-        up(&pinger_sem);
+        ptlrpc_update_next_ping(imp);
 }
 
 int ptlrpc_pinger_add_import(struct obd_import *imp)
@@ -249,7 +277,7 @@ int ptlrpc_pinger_add_import(struct obd_import *imp)
         down(&pinger_sem);
         CDEBUG(D_HA, "adding pingable import %s->%s\n",
                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
-        imp->imp_next_ping = jiffies + (obd_timeout * HZ);
+        ptlrpc_update_next_ping(imp);
         /* XXX sort, blah blah */
         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
         class_import_get(imp);
@@ -291,14 +319,14 @@ void ptlrpc_pinger_wake_up()
 
 static struct pinger_data {
         int             pd_recursion;
-        unsigned long   pd_this_ping;
-        unsigned long   pd_next_ping;
+        unsigned long   pd_this_ping;   /* jiffies */
+        unsigned long   pd_next_ping;   /* jiffies */
         struct ptlrpc_request_set *pd_set;
 } pinger_args;
 
 static int pinger_check_rpcs(void *arg)
 {
-        unsigned long curtime = time(NULL);
+        unsigned long curtime = jiffies;
         struct ptlrpc_request *req;
         struct ptlrpc_request_set *set;
         struct list_head *iter;
@@ -314,7 +342,7 @@ static int pinger_check_rpcs(void *arg)
         }
 
         /* have we reached ping point? */
-        if (!pd->pd_set && pd->pd_next_ping > curtime) {
+        if (!pd->pd_set && time_before(curtime, pd->pd_next_ping)) {
                 pd->pd_recursion--;
                 return 0;
         }
@@ -334,12 +362,11 @@ static int pinger_check_rpcs(void *arg)
         down(&pinger_sem);
         list_for_each(iter, &pinger_imports) {
                 struct obd_import *imp =
-                        list_entry(iter, struct obd_import,
-                                   imp_pinger_chain);
+                        list_entry(iter, struct obd_import, imp_pinger_chain);
                 int generation, level;
                 unsigned long flags;
 
-                if (imp->imp_next_ping <= pd->pd_this_ping) {
+                if (time_after_eq(pd->pd_this_ping, imp->imp_next_ping - 5)) {
                         /* Add a ping. */
                         spin_lock_irqsave(&imp->imp_lock, flags);
                         generation = imp->imp_generation;
@@ -392,7 +419,7 @@ do_check_set:
         rc = ptlrpc_check_set(set);
 
         /* not finished, and we are not expired, simply return */
-        if (!rc && curtime < pd->pd_this_ping + obd_timeout) {
+        if (!rc && time_before(curtime, pd->pd_this_ping + PING_INTERVAL * HZ)) {
                 CDEBUG(D_HA, "not finished, but also not expired\n");
                 pd->pd_recursion--;
                 return 0;
@@ -423,7 +450,7 @@ do_check_set:
         ptlrpc_set_destroy(set);
         pd->pd_set = NULL;
 
-        pd->pd_next_ping = pd->pd_this_ping + obd_timeout;
+        pd->pd_next_ping = pd->pd_this_ping + PING_INTERVAL * HZ;
         pd->pd_this_ping = 0; /* XXX for debug */
 
         CDEBUG(D_HA, "finished a round ping\n");
@@ -455,11 +482,11 @@ int ptlrpc_stop_pinger(void)
 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 {
         down(&pinger_sem);
-        imp->imp_next_ping = time(NULL) + obd_timeout;
+        ptlrpc_update_next_ping(imp);
         if (pinger_args.pd_set == NULL &&
-            pinger_args.pd_next_ping > imp->imp_next_ping) {
+            time_before(imp->imp_next_ping, pinger_args.pd_next_ping)) {
                 CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n",
-                        imp->imp_next_ping, time(NULL));
+                        imp->imp_next_ping, jiffies);
                 pinger_args.pd_next_ping = imp->imp_next_ping;
         }
         up(&pinger_sem);