Whamcloud - gitweb
b=6019
authoralex <alex>
Sat, 7 May 2005 15:40:03 +0000 (15:40 +0000)
committeralex <alex>
Sat, 7 May 2005 15:40:03 +0000 (15:40 +0000)
 - workaround for cascading timeouts when failure of 1 server node causes
   failures of unrelated connections. this caused different problems. for
   example, lock cancel timeouts.

lustre/include/linux/lustre_ha.h
lustre/include/linux/lustre_import.h
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/obdclass/genops.c
lustre/ptlrpc/client.c
lustre/ptlrpc/import.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/recover.c

index 739a875..ecc6543 100644 (file)
@@ -24,5 +24,8 @@ void ptlrpc_deactivate_import(struct obd_import *imp);
 void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc);
 void ptlrpc_fail_import(struct obd_import *imp, int generation);
 void ptlrpc_fail_export(struct obd_export *exp);
 void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc);
 void ptlrpc_fail_import(struct obd_import *imp, int generation);
 void ptlrpc_fail_export(struct obd_export *exp);
+int ptlrpc_check_suspend(void);
+void ptlrpc_activate_timeouts(void);
+void ptlrpc_deactivate_timeouts(void);
 
 #endif
 
 #endif
index 0f7b8b3..716028d 100644 (file)
@@ -101,10 +101,15 @@ struct obd_import {
                                   imp_dlm_fake:1, imp_server_timeout:1,
                                   imp_initial_recov:1, imp_force_verify:1,
                                   imp_pingable:1, imp_resend_replay:1,
                                   imp_dlm_fake:1, imp_server_timeout:1,
                                   imp_initial_recov:1, imp_force_verify:1,
                                   imp_pingable:1, imp_resend_replay:1,
-                                  imp_deactive:1;
+                                  imp_deactive:1,
+                                  imp_waiting_ping_reply:1;
         __u32                     imp_connect_op;
         __u32                     imp_connect_flags;
         struct obd_connect_data   imp_connect_data;
         __u32                     imp_connect_op;
         __u32                     imp_connect_flags;
         struct obd_connect_data   imp_connect_data;
+
+        unsigned long             imp_last_ping_xid;
+        int                       imp_reqs_replayed;
+        int                       imp_locks_replayed;
 };
 
 typedef void (*obd_import_callback)(struct obd_import *imp, void *closure,
 };
 
 typedef void (*obd_import_callback)(struct obd_import *imp, void *closure,
index 0a5d6a1..0634cb7 100644 (file)
@@ -95,6 +95,8 @@ struct ldlm_bl_work_item {
 
 #ifdef __KERNEL__
 
 
 #ifdef __KERNEL__
 
+static int ldlm_add_waiting_lock(struct ldlm_lock *lock);
+
 static inline int have_expired_locks(void)
 {
         int need_to_run;
 static inline int have_expired_locks(void)
 {
         int need_to_run;
@@ -179,6 +181,7 @@ static void waiting_locks_callback(unsigned long unused)
         if (obd_dump_on_timeout)
                 portals_debug_dumplog();
 
         if (obd_dump_on_timeout)
                 portals_debug_dumplog();
 
+repeat:
         spin_lock_bh(&waiting_locks_spinlock);
         while (!list_empty(&waiting_locks_list)) {
                 lock = list_entry(waiting_locks_list.next, struct ldlm_lock,
         spin_lock_bh(&waiting_locks_spinlock);
         while (!list_empty(&waiting_locks_list)) {
                 lock = list_entry(waiting_locks_list.next, struct ldlm_lock,
@@ -188,6 +191,24 @@ static void waiting_locks_callback(unsigned long unused)
                     (lock->l_req_mode == LCK_GROUP))
                         break;
 
                     (lock->l_req_mode == LCK_GROUP))
                         break;
 
+                if (ptlrpc_check_suspend()) {
+                        /* there is a case when we talk to one mds, holding
+                         * lock from another mds. this way we easily can get
+                         * here, if second mds is being recovered. so, we
+                         * suspend timeouts. bug 6019 */
+
+                        LDLM_ERROR(lock, "recharge timeout: %s@%s nid %s ",
+                                   lock->l_export->exp_client_uuid.uuid,
+                                   lock->l_export->exp_connection->c_remote_uuid.uuid,
+                                   ptlrpc_peernid2str(&lock->l_export->exp_connection->c_peer, str));
+
+                        list_del_init(&lock->l_pending_chain);
+                        spin_unlock_bh(&waiting_locks_spinlock);
+                        ldlm_add_waiting_lock(lock);
+
+                        goto repeat;
+                }
+
                 LDLM_ERROR(lock, "lock callback timer expired: evicting client "
                            "%s@%s nid %s ",
                            lock->l_export->exp_client_uuid.uuid,
                 LDLM_ERROR(lock, "lock callback timer expired: evicting client "
                            "%s@%s nid %s ",
                            lock->l_export->exp_client_uuid.uuid,
index 094ae03..90c988a 100644 (file)
@@ -50,6 +50,9 @@ int ldlm_expired_completion_wait(void *data)
         if (lock->l_conn_export == NULL) {
                 static unsigned long next_dump = 0, last_dump = 0;
 
         if (lock->l_conn_export == NULL) {
                 static unsigned long next_dump = 0, last_dump = 0;
 
+                if (ptlrpc_check_suspend())
+                        RETURN(0);
+
                 LDLM_ERROR(lock, "lock timed out; not entering recovery in "
                            "server code, just going back to sleep");
                 if (time_after(jiffies, next_dump)) {
                 LDLM_ERROR(lock, "lock timed out; not entering recovery in "
                            "server code, just going back to sleep");
                 if (time_after(jiffies, next_dump)) {
@@ -1036,6 +1039,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
 
         LDLM_DEBUG(lock, "replaying lock:");
 
 
         LDLM_DEBUG(lock, "replaying lock:");
 
+        imp->imp_locks_replayed++;
         atomic_inc(&req->rq_import->imp_replay_inflight);
         req->rq_async_args.pointer_arg[0] = lock;
         req->rq_interpret_reply = replay_lock_interpret;
         atomic_inc(&req->rq_import->imp_replay_inflight);
         req->rq_async_args.pointer_arg[0] = lock;
         req->rq_interpret_reply = replay_lock_interpret;
index bb7781b..36ae1e8 100644 (file)
@@ -591,6 +591,7 @@ struct obd_import *class_new_import(void)
         INIT_LIST_HEAD(&imp->imp_conn_list);
         INIT_LIST_HEAD(&imp->imp_handle.h_link);
         class_handle_hash(&imp->imp_handle, import_handle_addref);
         INIT_LIST_HEAD(&imp->imp_conn_list);
         INIT_LIST_HEAD(&imp->imp_handle.h_link);
         class_handle_hash(&imp->imp_handle, import_handle_addref);
+        imp->imp_waiting_ping_reply = 0;
 
         return imp;
 }
 
         return imp;
 }
index 8bbbf62..a3c8f3b 100644 (file)
@@ -1289,8 +1289,13 @@ void ptlrpc_restart_req(struct ptlrpc_request *req)
 static int expired_request(void *data)
 {
         struct ptlrpc_request *req = data;
 static int expired_request(void *data)
 {
         struct ptlrpc_request *req = data;
+        struct obd_import *imp;
         ENTRY;
 
         ENTRY;
 
+        /* some failure can suspend regular timeouts */
+        if (ptlrpc_check_suspend())
+                RETURN(1);
+
         RETURN(ptlrpc_expire_one_request(req));
 }
 
         RETURN(ptlrpc_expire_one_request(req));
 }
 
@@ -1465,9 +1470,12 @@ restart:
                 timeout = MAX(req->rq_timeout * HZ, 1);
                 DEBUG_REQ(D_NET, req, "-- sleeping for %d jiffies", timeout);
         }
                 timeout = MAX(req->rq_timeout * HZ, 1);
                 DEBUG_REQ(D_NET, req, "-- sleeping for %d jiffies", timeout);
         }
+repeat:
         lwi = LWI_TIMEOUT_INTR(timeout, expired_request, interrupted_request,
                                req);
         lwi = LWI_TIMEOUT_INTR(timeout, expired_request, interrupted_request,
                                req);
-        l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi);
+        rc = l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi);
+        if (rc == -ETIMEDOUT && ptlrpc_check_and_wait_suspend(req))
+                goto repeat;
         DEBUG_REQ(D_NET, req, "-- done sleeping");
 
         CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:ni:nid:opc "
         DEBUG_REQ(D_NET, req, "-- done sleeping");
 
         CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:ni:nid:opc "
index 12e63b3..7cf82f1 100644 (file)
@@ -101,6 +101,7 @@ int ptlrpc_set_import_discon(struct obd_import *imp)
                       imp->imp_obd->obd_name, 
                       imp->imp_target_uuid.uuid,
                       imp->imp_connection->c_remote_uuid.uuid);
                       imp->imp_obd->obd_name, 
                       imp->imp_target_uuid.uuid,
                       imp->imp_connection->c_remote_uuid.uuid);
+                ptlrpc_deactivate_timeouts();
                 IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
                 IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
@@ -191,6 +192,7 @@ void ptlrpc_activate_import(struct obd_import *imp)
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
         obd_import_event(obd, imp, IMP_EVENT_ACTIVE);
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
         obd_import_event(obd, imp, IMP_EVENT_ACTIVE);
+        ptlrpc_activate_timeouts();
 }
 
 void ptlrpc_fail_import(struct obd_import *imp, int generation)
 }
 
 void ptlrpc_fail_import(struct obd_import *imp, int generation)
@@ -397,6 +399,8 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
                                         MSG_CONNECT_INITIAL);
                 imp->imp_replayable = 1; 
         }
                                         MSG_CONNECT_INITIAL);
                 imp->imp_replayable = 1; 
         }
+        
+        imp->imp_reqs_replayed = imp->imp_locks_replayed = 0;
 
         ptlrpcd_add_req(request);
         rc = 0;
 
         ptlrpcd_add_req(request);
         rc = 0;
@@ -459,6 +463,7 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
                 imp->imp_conn_cnt = request->rq_repmsg->conn_cnt;
                 imp->imp_remote_handle = request->rq_repmsg->handle;
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
                 imp->imp_conn_cnt = request->rq_repmsg->conn_cnt;
                 imp->imp_remote_handle = request->rq_repmsg->handle;
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
+                ptlrpc_pinger_sending_on_import(imp);
                 GOTO(finish, rc = 0);
         }
 
                 GOTO(finish, rc = 0);
         }
 
@@ -686,10 +691,13 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                         GOTO(out, rc);
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
                 ptlrpc_activate_import(imp);
                         GOTO(out, rc);
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
                 ptlrpc_activate_import(imp);
-                CWARN("%s: connection restored to %s@%s\n",
+                CWARN("%s: connection restored to %s@%s, "
+                       "%d/%d req/lock replayed\n",
                       imp->imp_obd->obd_name, 
                       imp->imp_target_uuid.uuid,
                       imp->imp_obd->obd_name, 
                       imp->imp_target_uuid.uuid,
-                      imp->imp_connection->c_remote_uuid.uuid);
+                      imp->imp_connection->c_remote_uuid.uuid,
+                      imp->imp_reqs_replayed,
+                      imp->imp_locks_replayed);
         }
 
         if (imp->imp_state == LUSTRE_IMP_FULL) {
         }
 
         if (imp->imp_state == LUSTRE_IMP_FULL) {
index af48920..f883a48 100644 (file)
@@ -478,7 +478,6 @@ int ptl_send_rpc(struct ptlrpc_request *request)
         ptlrpc_request_addref(request);        /* +1 ref for the SENT callback */
 
         request->rq_sent = LTIME_S(CURRENT_TIME);
         ptlrpc_request_addref(request);        /* +1 ref for the SENT callback */
 
         request->rq_sent = LTIME_S(CURRENT_TIME);
-        ptlrpc_pinger_sending_on_import(request->rq_import);
         rc = ptl_send_buf(&request->rq_req_md_h, 
                           request->rq_reqbuf, request->rq_reqdata_len,
                           PTL_NOACK_REQ, &request->rq_req_cbid, 
         rc = ptl_send_buf(&request->rq_req_md_h, 
                           request->rq_reqbuf, request->rq_reqdata_len,
                           PTL_NOACK_REQ, &request->rq_req_cbid, 
index 89b1191..756cd63 100644 (file)
 #include <linux/obd_class.h>
 #include "ptlrpc_internal.h"
 
 #include <linux/obd_class.h>
 #include "ptlrpc_internal.h"
 
+#define PINGER_RATE     3 /* how many pings we'll do in obd_timeout period */
+
 static DECLARE_MUTEX(pinger_sem);
 static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
 
 static DECLARE_MUTEX(pinger_sem);
 static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
 
+static int ptlrpc_ping_interpret(struct ptlrpc_request *req,
+                                    void *data, int rc)
+{
+        struct obd_import *imp = req->rq_import;
+        DEBUG_REQ(D_HA, req, "ping reply");
+        if (imp->imp_waiting_ping_reply == 0)
+                DEBUG_REQ(D_ERROR, req, "late ping reply?");
+        if (imp->imp_last_ping_xid != req->rq_xid)
+                DEBUG_REQ(D_ERROR, req, "uh, wrong ping reply on x%lx",
+                          imp->imp_last_ping_xid);
+        else
+                imp->imp_last_ping_xid = 0;
+
+        /* if ping reply is an error, don't drop "replied" flag
+         * on import, so pinger will invalidate it */
+        if (ptlrpc_client_replied(req) && req->rq_repmsg->type == PTL_RPC_MSG_ERR)
+                return 0;
+        
+        imp->imp_waiting_ping_reply = 0;
+        return 0;
+}
+
 int ptlrpc_ping(struct obd_import *imp) 
 {
         struct ptlrpc_request *req;
 int ptlrpc_ping(struct obd_import *imp) 
 {
         struct ptlrpc_request *req;
@@ -51,6 +75,10 @@ int ptlrpc_ping(struct obd_import *imp)
                           imp->imp_target_uuid.uuid);
                 req->rq_no_resend = req->rq_no_delay = 1;
                 req->rq_replen = lustre_msg_size(0, NULL);
                           imp->imp_target_uuid.uuid);
                 req->rq_no_resend = req->rq_no_delay = 1;
                 req->rq_replen = lustre_msg_size(0, NULL);
+                req->rq_interpret_reply = ptlrpc_ping_interpret;
+                req->rq_timeout = obd_timeout / PINGER_RATE;
+                imp->imp_waiting_ping_reply = 1;
+                imp->imp_last_ping_xid = req->rq_xid;
                 ptlrpcd_add_req(req);
         } else {
                 CERROR("OOM trying to ping %s->%s\n",
                 ptlrpcd_add_req(req);
         } else {
                 CERROR("OOM trying to ping %s->%s\n",
@@ -63,12 +91,97 @@ int ptlrpc_ping(struct obd_import *imp)
 }
 
 #ifdef __KERNEL__
 }
 
 #ifdef __KERNEL__
-int ptlrpc_next_ping(struct obd_import *imp)
+static inline int ptlrpc_next_ping(struct obd_import *imp)
+{
+        return jiffies + (obd_timeout / PINGER_RATE * HZ);
+}
+
+static inline int ptlrpc_next_reconnect(struct obd_import *imp)
 {
         if (imp->imp_server_timeout)
 {
         if (imp->imp_server_timeout)
-                return jiffies + (obd_timeout / 4 * HZ);
-        else
                 return jiffies + (obd_timeout / 2 * HZ);
                 return jiffies + (obd_timeout / 2 * HZ);
+        else
+                return jiffies + (obd_timeout * HZ);
+}
+
+static atomic_t suspend_timeouts = ATOMIC_INIT(0);
+static wait_queue_head_t suspend_timeouts_waitq;
+
+void ptlrpc_deactivate_timeouts(void)
+{
+        CDEBUG(D_HA, "deactivate timeouts\n");
+        atomic_inc(&suspend_timeouts);
+}
+                
+void ptlrpc_activate_timeouts(void)
+{
+        CDEBUG(D_HA, "activate timeouts\n");
+        LASSERT(atomic_read(&suspend_timeouts) > 0);
+        if (atomic_dec_and_test(&suspend_timeouts))
+                wake_up(&suspend_timeouts_waitq);
+}
+
+int ptlrpc_check_suspend(void)
+{
+        if (atomic_read(&suspend_timeouts))
+                return 1;
+        return 0;
+}
+
+int ptlrpc_check_and_wait_suspend(struct ptlrpc_request *req)
+{
+        struct l_wait_info lwi;
+
+        if (atomic_read(&suspend_timeouts)) {
+                DEBUG_REQ(D_NET, req, "-- suspend %d regular timeout",
+                          atomic_read(&suspend_timeouts));
+                lwi = LWI_INTR(NULL, NULL);
+                l_wait_event(suspend_timeouts_waitq,
+                             atomic_read(&suspend_timeouts) == 0, &lwi);
+                DEBUG_REQ(D_NET, req, "-- recharge regular timeout");
+                return 1;
+        }
+        return 0;
+}
+
+static void ptlrpc_pinger_process_import(struct obd_import *imp,
+                                         unsigned long this_ping)
+{
+        unsigned long flags;
+        int force, level;
+
+        spin_lock_irqsave(&imp->imp_lock, flags);
+        level = imp->imp_state;
+        force = imp->imp_force_verify;
+        if (force)
+                imp->imp_force_verify = 0;
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+        if (imp->imp_next_ping > this_ping && force == 0)
+                return;
+
+        if (level == LUSTRE_IMP_DISCON && !imp->imp_deactive) {
+                /* wait at least a timeout before trying recovery again */
+                imp->imp_next_ping = ptlrpc_next_reconnect(imp);
+                ptlrpc_initiate_recovery(imp);
+        } else if (level != LUSTRE_IMP_FULL || imp->imp_obd->obd_no_recov) {
+                CDEBUG(D_HA, "not pinging %s (in recovery "
+                       " or recovery disabled: %s)\n",
+                       imp->imp_target_uuid.uuid,
+                       ptlrpc_import_state_name(level));
+        } else if (level == LUSTRE_IMP_FULL && imp->imp_waiting_ping_reply &&
+                        imp->imp_next_ping >= this_ping && imp->imp_pingable) {
+                CDEBUG(D_HA, "%s: %s hasn't respond on ping x%lu\n",
+                       imp->imp_obd->obd_uuid.uuid,
+                       imp->imp_target_uuid.uuid, imp->imp_last_ping_xid);
+                CDEBUG(D_ERROR, "%s: %s hasn't respond on ping x%lu\n",
+                       imp->imp_obd->obd_uuid.uuid,
+                       imp->imp_target_uuid.uuid, imp->imp_last_ping_xid);
+                ptlrpc_fail_import(imp, 0);
+        } else if (imp->imp_pingable || force) {
+                imp->imp_next_ping = ptlrpc_next_ping(imp);
+                ptlrpc_ping(imp);
+        }
 }
 
 static int ptlrpc_pinger_main(void *arg)
 }
 
 static int ptlrpc_pinger_main(void *arg)
@@ -110,45 +223,13 @@ static int ptlrpc_pinger_main(void *arg)
                         struct obd_import *imp =
                                 list_entry(iter, struct obd_import,
                                            imp_pinger_chain);
                         struct obd_import *imp =
                                 list_entry(iter, struct obd_import,
                                            imp_pinger_chain);
-                        int force, level;
-                        unsigned long flags;
-
-
-                        spin_lock_irqsave(&imp->imp_lock, flags);
-                        level = imp->imp_state;
-                        force = imp->imp_force_verify;
-                        if (force)
-                                imp->imp_force_verify = 0;
-                        spin_unlock_irqrestore(&imp->imp_lock, flags);
-
-                        if (imp->imp_next_ping <= this_ping || force) {
-                                if (level == LUSTRE_IMP_DISCON &&
-                                    !imp->imp_deactive) {
-                                        /* wait at least a timeout before
-                                           trying recovery again. */
-                                        imp->imp_next_ping =
-                                                ptlrpc_next_ping(imp);
-                                        ptlrpc_initiate_recovery(imp);
-                                } else if (level != LUSTRE_IMP_FULL ||
-                                           imp->imp_obd->obd_no_recov) {
-                                        CDEBUG(D_HA, 
-                                               "not pinging %s (in recovery "
-                                               "or recovery disabled: %s)\n",
-                                               imp->imp_target_uuid.uuid,
-                                               ptlrpc_import_state_name(level));
-                                } else if (imp->imp_pingable || force) {
-                                        ptlrpc_ping(imp);
-                                }
-
-                        } else if (imp->imp_pingable) {
-                                CDEBUG(D_HA, "don't need to ping %s "
-                                       "(%lu > %lu)\n",
-                                       imp->imp_target_uuid.uuid,
-                                       imp->imp_next_ping, this_ping);
-                        }
+
+                        ptlrpc_pinger_process_import(imp, this_ping);
+
                         CDEBUG(D_OTHER, "%s: pingable %d, next_ping %lu(%lu)\n",
                                 imp->imp_target_uuid.uuid,
                                 imp->imp_pingable, imp->imp_next_ping, jiffies);
                         CDEBUG(D_OTHER, "%s: pingable %d, next_ping %lu(%lu)\n",
                                 imp->imp_target_uuid.uuid,
                                 imp->imp_pingable, imp->imp_next_ping, jiffies);
+
                         if (imp->imp_pingable && imp->imp_next_ping &&
                             imp->imp_next_ping - jiffies < time_to_next_ping &&
                             imp->imp_next_ping > jiffies)
                         if (imp->imp_pingable && imp->imp_next_ping &&
                             imp->imp_next_ping - jiffies < time_to_next_ping &&
                             imp->imp_next_ping > jiffies)
@@ -194,6 +275,8 @@ int ptlrpc_start_pinger(void)
 #endif
         ENTRY;
 
 #endif
         ENTRY;
 
+        LASSERT(obd_timeout > PINGER_RATE);
+
         if (pinger_thread != NULL)
                 RETURN(-EALREADY);
 
         if (pinger_thread != NULL)
                 RETURN(-EALREADY);
 
@@ -201,6 +284,7 @@ int ptlrpc_start_pinger(void)
         if (pinger_thread == NULL)
                 RETURN(-ENOMEM);
         init_waitqueue_head(&pinger_thread->t_ctl_waitq);
         if (pinger_thread == NULL)
                 RETURN(-ENOMEM);
         init_waitqueue_head(&pinger_thread->t_ctl_waitq);
+        init_waitqueue_head(&suspend_timeouts_waitq);
 
         d.name = "ll_ping";
         d.thread = pinger_thread;
 
         d.name = "ll_ping";
         d.thread = pinger_thread;
@@ -246,7 +330,7 @@ int ptlrpc_stop_pinger(void)
 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 {
         down(&pinger_sem);
 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 {
         down(&pinger_sem);
-        imp->imp_next_ping = jiffies + (obd_timeout * HZ);
+        imp->imp_next_ping = ptlrpc_next_ping(imp);
         up(&pinger_sem);
 }
 
         up(&pinger_sem);
 }
 
index 6731c7d..a09c8a2 100644 (file)
@@ -214,6 +214,7 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
                                LPD64"\n", rc, req->rq_xid);
                         RETURN(rc);
                 }
                                LPD64"\n", rc, req->rq_xid);
                         RETURN(rc);
                 }
+                imp->imp_reqs_replayed++;
                 *inflight = 1;
         }
         RETURN(rc);
                 *inflight = 1;
         }
         RETURN(rc);