In ptlrpc_pinger_main, if the process to ping the recoverable
clients or obd_update_maxusage takes too long time, it could
be stuck in endless loop because of the negative value returned
by pinger_check_timeout.
Lustre-change: https://review.whamcloud.com/38915
Lustre-commit:
6be2dbb2595121fabceda86c5f7bdcb45e10b320
Change-Id: Ib7fc22b3cc31255223bc2be60224ced1a3585f87
Signed-off-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Olaf Faaland-LLNL <faaland1@llnl.gov>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: Minh Diep <mdiep@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/39344
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
static void ptlrpc_pinger_main(struct work_struct *ws)
{
static void ptlrpc_pinger_main(struct work_struct *ws)
{
- time64_t this_ping = ktime_get_seconds();
- time64_t time_to_next_wake;
+ time64_t this_ping, time_after_ping, time_to_next_wake;
struct timeout_item *item;
struct obd_import *imp;
struct list_head *iter;
do {
struct timeout_item *item;
struct obd_import *imp;
struct list_head *iter;
do {
+ this_ping = ktime_get_seconds();
+
mutex_lock(&pinger_mutex);
list_for_each_entry(item, &timeout_list, ti_chain)
item->ti_cb(item, item->ti_cb_data);
mutex_lock(&pinger_mutex);
list_for_each_entry(item, &timeout_list, ti_chain)
item->ti_cb(item, item->ti_cb_data);
ptlrpc_update_next_ping(imp, 0);
}
mutex_unlock(&pinger_mutex);
ptlrpc_update_next_ping(imp, 0);
}
mutex_unlock(&pinger_mutex);
+
+ time_after_ping = ktime_get_seconds();
/* update memory usage info */
obd_update_maxusage();
/* update memory usage info */
obd_update_maxusage();
+ if ((ktime_get_seconds() - this_ping - 3) > PING_INTERVAL)
+ CDEBUG(D_HA, "long time to ping: %lld, %lld, %lld\n",
+ this_ping, time_after_ping, ktime_get_seconds());
+
/* Wait until the next ping time, or until we're stopped. */
time_to_next_wake = pinger_check_timeout(this_ping);
/* The ping sent by ptlrpc_send_rpc may get sent out
/* Wait until the next ping time, or until we're stopped. */
time_to_next_wake = pinger_check_timeout(this_ping);
/* The ping sent by ptlrpc_send_rpc may get sent out