From: Hongchao Zhang Date: Fri, 19 Jun 2020 02:53:12 +0000 (+0800) Subject: LU-13667 ptlrpc: fix endless loop issue X-Git-Tag: 2.13.55~28 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=6be2dbb2595121fabceda86c5f7bdcb45e10b320;p=fs%2Flustre-release.git LU-13667 ptlrpc: fix endless loop issue In ptlrpc_pinger_main, if the process to ping the recoverable clients or obd_update_maxusage takes too long time, it could be stuck in endless loop because of the negative value returned by pinger_check_timeout. Change-Id: Ib7fc22b3cc31255223bc2be60224ced1a3585f87 Signed-off-by: Hongchao Zhang Reviewed-on: https://review.whamcloud.com/38915 Reviewed-by: Andreas Dilger Tested-by: jenkins Tested-by: Maloo Reviewed-by: Olaf Faaland-LLNL Reviewed-by: Oleg Drokin --- diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 83344bd..6fecf81 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -271,13 +271,14 @@ static DECLARE_DELAYED_WORK(ping_work, ptlrpc_pinger_main); static void ptlrpc_pinger_main(struct work_struct *ws) { - time64_t this_ping = ktime_get_seconds(); - time64_t time_to_next_wake; + time64_t this_ping, time_after_ping, time_to_next_wake; struct timeout_item *item; struct obd_import *imp; struct list_head *iter; do { + this_ping = ktime_get_seconds(); + mutex_lock(&pinger_mutex); list_for_each_entry(item, &timeout_list, ti_chain) item->ti_cb(item, item->ti_cb_data); @@ -293,9 +294,15 @@ static void ptlrpc_pinger_main(struct work_struct *ws) ptlrpc_update_next_ping(imp, 0); } mutex_unlock(&pinger_mutex); + + time_after_ping = ktime_get_seconds(); /* update memory usage info */ obd_update_maxusage(); + if ((ktime_get_seconds() - this_ping - 3) > PING_INTERVAL) + CDEBUG(D_HA, "long time to ping: %lld, %lld, %lld\n", + this_ping, time_after_ping, ktime_get_seconds()); + /* Wait until the next ping time, or until we're stopped. */ time_to_next_wake = pinger_check_timeout(this_ping); /*