Whamcloud - gitweb
fix formatting
[fs/lustre-release.git] / lustre / ptlrpc / pinger.c
index ab0f7b2..ee65bd0 100644 (file)
@@ -38,9 +38,6 @@
 static DECLARE_MUTEX(pinger_sem);
 static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
 
-#ifdef __KERNEL__
-static struct ptlrpc_thread *pinger_thread = NULL;
-
 int ptlrpc_ping(struct obd_import *imp) 
 {
         struct ptlrpc_request *req;
@@ -50,7 +47,7 @@ int ptlrpc_ping(struct obd_import *imp)
         req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
                               NULL);
         if (req) {
-                DEBUG_REQ(D_HA, req, "pinging %s->%s",
+                DEBUG_REQ(D_INFO, req, "pinging %s->%s",
                           imp->imp_obd->obd_uuid.uuid,
                           imp->imp_target_uuid.uuid);
                 req->rq_no_resend = req->rq_no_delay = 1;
@@ -67,6 +64,18 @@ int ptlrpc_ping(struct obd_import *imp)
         RETURN(rc);
 }
 
+static inline void ptlrpc_update_next_ping(struct obd_import *imp)
+{
+        imp->imp_next_ping = jiffies + HZ *
+                (imp->imp_state == LUSTRE_IMP_DISCON ? 10 : PING_INTERVAL);
+}
+
+void ptlrpc_ping_import_soon(struct obd_import *imp)
+{
+        imp->imp_next_ping = jiffies;
+}
+
+#ifdef __KERNEL__
 static int ptlrpc_pinger_main(void *arg)
 {
         struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
@@ -82,7 +91,10 @@ static int ptlrpc_pinger_main(void *arg)
         RECALC_SIGPENDING;
         SIGNAL_MASK_UNLOCK(current, flags);
 
-        THREAD_NAME(current->comm, "%s", data->name);
+        LASSERTF(strlen(data->name) < sizeof(current->comm),
+                 "name %d > len %d\n",
+                 (int)strlen(data->name), (int)sizeof(current->comm));
+        THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name);
         unlock_kernel();
 
         /* Record that the thread is running */
@@ -93,7 +105,7 @@ static int ptlrpc_pinger_main(void *arg)
         while (1) {
                 unsigned long this_ping = jiffies;
                 long time_to_next_ping;
-                struct l_wait_info lwi = LWI_TIMEOUT(obd_timeout * HZ,
+                struct l_wait_info lwi = LWI_TIMEOUT(PING_INTERVAL * HZ,
                                                      NULL, NULL);
                 struct list_head *iter;
 
@@ -109,45 +121,66 @@ static int ptlrpc_pinger_main(void *arg)
                         spin_lock_irqsave(&imp->imp_lock, flags);
                         level = imp->imp_state;
                         force = imp->imp_force_verify;
-                        if (force)
-                                imp->imp_force_verify = 0;
+                        imp->imp_force_verify = 0;
                         spin_unlock_irqrestore(&imp->imp_lock, flags);
-
-                        if (imp->imp_next_ping <= this_ping || force) {
-                                if (level == LUSTRE_IMP_DISCON) {
-                                        /* wait at least a timeout before 
+                        CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA,
+                               "level %s/%u force %u deactive %u pingable %u\n",
+                               ptlrpc_import_state_name(level), level,
+                               force, imp->imp_deactive, imp->imp_pingable);
+
+                        if (force ||
+                            /* if the next ping is within, say, 5 jiffies from
+                               now, go ahead and ping. See note below. */
+                            time_after_eq(this_ping, imp->imp_next_ping - 5)) {
+                                if (level == LUSTRE_IMP_DISCON &&
+                                    !imp->imp_deactive) {
+                                        /* wait at least a timeout before
                                            trying recovery again. */
-                                        imp->imp_next_ping = jiffies + 
-                                                (obd_timeout * HZ);
+                                        imp->imp_next_ping = jiffies +
+                                                obd_timeout * HZ;
                                         ptlrpc_initiate_recovery(imp);
-                                } 
-                                else if (level != LUSTRE_IMP_FULL) {
-                                        CDEBUG(D_HA, 
-                                               "not pinging %s " 
-                                               "(in recovery: %s)\n",
+                                } else if (level != LUSTRE_IMP_FULL ||
+                                         imp->imp_obd->obd_no_recov) {
+                                        CDEBUG(D_HA, "not pinging %s "
+                                               "(in recovery: %s or recovery "
+                                               "disabled: %u/%u)\n",
                                                imp->imp_target_uuid.uuid,
-                                               ptlrpc_import_state_name(level));
-                                } 
-                                else if (imp->imp_pingable || force) {
+                                               ptlrpc_import_state_name(level),
+                                               imp->imp_deactive,
+                                               imp->imp_obd->obd_no_recov);
+                                } else if (imp->imp_pingable || force) {
                                         ptlrpc_ping(imp);
                                 }
 
                         } else {
-                                if (imp->imp_pingable)
-                                        CDEBUG(D_HA, "don't need to ping %s "
-                                               "(%lu > %lu)\n", 
-                                               imp->imp_target_uuid.uuid,
-                                               imp->imp_next_ping, this_ping);
+                                if (!imp->imp_pingable)
+                                        continue;
+                                CDEBUG(D_INFO,
+                                       "don't need to ping %s (%lu > %lu)\n",
+                                       imp->imp_target_uuid.uuid,
+                                       imp->imp_next_ping, this_ping);
                         }
+
+                        /* obd_timeout might have changed */
+                        if (time_after(imp->imp_next_ping,
+                                       this_ping + PING_INTERVAL * HZ))
+                                ptlrpc_update_next_ping(imp);
                 }
                 up(&pinger_sem);
 
                 /* Wait until the next ping time, or until we're stopped. */
-                time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies;
-                CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping,
-                       this_ping + (obd_timeout * HZ));
+                time_to_next_ping = this_ping + (PING_INTERVAL * HZ) - jiffies;
+                /* The ping sent by ptlrpc_send_rpc may get sent out
+                   say .01 second after this.
+                   ptlrpc_pinger_sending_on_import will then set the
+                   next ping time to next_ping + .01 sec, which means
+                   we will SKIP the next ping at next_ping, and the
+                   ping will get sent 2 timeouts from now!  Beware. */
+                CDEBUG(D_INFO, "next ping in %lu (%lu)\n", time_to_next_ping,
+                       this_ping + PING_INTERVAL * HZ);
                 if (time_to_next_ping > 0) {
-                        lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL);
+                        lwi = LWI_TIMEOUT(max_t(long, time_to_next_ping, HZ),
+                                          NULL, NULL);
                         l_wait_event(thread->t_ctl_waitq,
                                      thread->t_flags & (SVC_STOPPING|SVC_EVENT),
                                      &lwi);
@@ -169,6 +202,8 @@ static int ptlrpc_pinger_main(void *arg)
         return 0;
 }
 
+static struct ptlrpc_thread *pinger_thread = NULL;
+
 int ptlrpc_start_pinger(void)
 {
         struct l_wait_info lwi = { 0 };
@@ -230,9 +265,7 @@ int ptlrpc_stop_pinger(void)
 
 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 {
-        down(&pinger_sem);
-        imp->imp_next_ping = jiffies + (obd_timeout * HZ);
-        up(&pinger_sem);
+        ptlrpc_update_next_ping(imp);
 }
 
 int ptlrpc_pinger_add_import(struct obd_import *imp)
@@ -244,7 +277,7 @@ int ptlrpc_pinger_add_import(struct obd_import *imp)
         down(&pinger_sem);
         CDEBUG(D_HA, "adding pingable import %s->%s\n",
                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
-        imp->imp_next_ping = jiffies + (obd_timeout * HZ);
+        ptlrpc_update_next_ping(imp);
         /* XXX sort, blah blah */
         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
         class_import_get(imp);
@@ -278,21 +311,22 @@ void ptlrpc_pinger_wake_up()
 #endif
 }
 
-#else
+#else /* !__KERNEL__ */
+
 /* XXX
  * the current implementation of pinger in liblustre is not optimized
  */
 
 static struct pinger_data {
         int             pd_recursion;
-        unsigned long   pd_this_ping;
-        unsigned long   pd_next_ping;
+        unsigned long   pd_this_ping;   /* jiffies */
+        unsigned long   pd_next_ping;   /* jiffies */
         struct ptlrpc_request_set *pd_set;
 } pinger_args;
 
 static int pinger_check_rpcs(void *arg)
 {
-        unsigned long curtime = time(NULL);
+        unsigned long curtime = jiffies;
         struct ptlrpc_request *req;
         struct ptlrpc_request_set *set;
         struct list_head *iter;
@@ -308,7 +342,7 @@ static int pinger_check_rpcs(void *arg)
         }
 
         /* have we reached ping point? */
-        if (!pd->pd_set && pd->pd_next_ping > curtime) {
+        if (!pd->pd_set && time_before(curtime, pd->pd_next_ping)) {
                 pd->pd_recursion--;
                 return 0;
         }
@@ -328,12 +362,11 @@ static int pinger_check_rpcs(void *arg)
         down(&pinger_sem);
         list_for_each(iter, &pinger_imports) {
                 struct obd_import *imp =
-                        list_entry(iter, struct obd_import,
-                                   imp_pinger_chain);
+                        list_entry(iter, struct obd_import, imp_pinger_chain);
                 int generation, level;
                 unsigned long flags;
 
-                if (imp->imp_next_ping <= pd->pd_this_ping) {
+                if (time_after_eq(pd->pd_this_ping, imp->imp_next_ping - 5)) {
                         /* Add a ping. */
                         spin_lock_irqsave(&imp->imp_lock, flags);
                         generation = imp->imp_generation;
@@ -386,7 +419,7 @@ do_check_set:
         rc = ptlrpc_check_set(set);
 
         /* not finished, and we are not expired, simply return */
-        if (!rc && curtime < pd->pd_this_ping + obd_timeout) {
+        if (!rc && time_before(curtime, pd->pd_this_ping + PING_INTERVAL * HZ)) {
                 CDEBUG(D_HA, "not finished, but also not expired\n");
                 pd->pd_recursion--;
                 return 0;
@@ -417,7 +450,7 @@ do_check_set:
         ptlrpc_set_destroy(set);
         pd->pd_set = NULL;
 
-        pd->pd_next_ping = pd->pd_this_ping + obd_timeout;
+        pd->pd_next_ping = pd->pd_this_ping + PING_INTERVAL * HZ;
         pd->pd_this_ping = 0; /* XXX for debug */
 
         CDEBUG(D_HA, "finished a round ping\n");
@@ -434,7 +467,6 @@ int ptlrpc_start_pinger(void)
         pinger_callback =
                 liblustre_register_wait_callback(&pinger_check_rpcs, &pinger_args);
 #endif
-        obd_timeout = 10;
         return 0;
 }
 
@@ -450,11 +482,11 @@ int ptlrpc_stop_pinger(void)
 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 {
         down(&pinger_sem);
-        imp->imp_next_ping = time(NULL) + obd_timeout;
+        ptlrpc_update_next_ping(imp);
         if (pinger_args.pd_set == NULL &&
-            pinger_args.pd_next_ping > imp->imp_next_ping) {
+            time_before(imp->imp_next_ping, pinger_args.pd_next_ping)) {
                 CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n",
-                        imp->imp_next_ping, time(NULL));
+                        imp->imp_next_ping, jiffies);
                 pinger_args.pd_next_ping = imp->imp_next_ping;
         }
         up(&pinger_sem);