Whamcloud - gitweb
LU-14183 ldlm: wrong ldlm_add_waiting_lock usage 68/40868/2
authorVitaly Fertman <c17818@cray.com>
Fri, 4 Dec 2020 17:22:55 +0000 (20:22 +0300)
committerOleg Drokin <green@whamcloud.com>
Tue, 16 Mar 2021 18:15:33 +0000 (18:15 +0000)
exp_bl_lock_at accounted the period since BLAST send until cancel RPC
came to server originally. LU-6032 started to update l_blast_sent for
expired locks which are still busy - prolonged locks when the timeout
expired. In fact, this is a good idea to cover not the whole period
but until any involved RPC comes - it avoids excessively large lock
callback timeouts - and the IO which does the lock prolong is also
able to re-start the AT cycle by updating the l_blast_sent.

Unfortunately, the change seems to be made occasionally as the main
prolong code was not adjusted accordingly.

Fixes: 292aa42e08 ("LU-6032 ldlm: don't disable softirq for exp_rpc_lock")
HPE-bug-id: LUS-9278
Signed-off-by: Vitaly Fertman <c17818@cray.com>
Change-Id: Idc598508fc13aa33ac9fce56f13310ca6fc819d4
Tested-by: Jenkins Build User <nssreleng@cray.com>
Reviewed-by: Andriy Skulysh <c17819@cray.com>
Reviewed-by: Alexander Boyko <c17825@cray.com>
Reviewed-on: https://review.whamcloud.com/40868
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Andriy Skulysh <askulysh@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ldlm/ldlm_extent.c
lustre/ldlm/ldlm_lockd.c

index d557f49..37aa87e 100644 (file)
@@ -657,8 +657,6 @@ void ldlm_lock_prolong_one(struct ldlm_lock *lock,
         */
        timeout = arg->lpa_timeout + (ldlm_bl_timeout(lock) >> 1);
 
         */
        timeout = arg->lpa_timeout + (ldlm_bl_timeout(lock) >> 1);
 
-       LDLM_DEBUG(lock, "refreshed to %ds.\n", timeout);
-
        arg->lpa_blocks_cnt++;
 
        /* OK. this is a possible lock the user holds doing I/O
        arg->lpa_blocks_cnt++;
 
        /* OK. this is a possible lock the user holds doing I/O
index cbf9479..52930be 100644 (file)
@@ -255,7 +255,7 @@ static int expired_lock_main(void *arg)
 
                                LDLM_ERROR(lock,
                                           "lock callback timer expired after %llds: evicting client at %s ",
 
                                LDLM_ERROR(lock,
                                           "lock callback timer expired after %llds: evicting client at %s ",
-                                          ktime_get_real_seconds() -
+                                          ktime_get_seconds() -
                                           lock->l_blast_sent,
                                           obd_export_nid2str(export));
                                ldlm_lock_to_ns(lock)->ns_timeouts++;
                                           lock->l_blast_sent,
                                           obd_export_nid2str(export));
                                ldlm_lock_to_ns(lock)->ns_timeouts++;
@@ -377,10 +377,10 @@ static void waiting_locks_callback(TIMER_DATA_TYPE unused)
 static int __ldlm_add_waiting_lock(struct ldlm_lock *lock, timeout_t delay)
 {
        unsigned long timeout_jiffies = jiffies;
 static int __ldlm_add_waiting_lock(struct ldlm_lock *lock, timeout_t delay)
 {
        unsigned long timeout_jiffies = jiffies;
-       time64_t now = ktime_get_seconds();
        time64_t deadline;
        timeout_t timeout;
 
        time64_t deadline;
        timeout_t timeout;
 
+       lock->l_blast_sent = ktime_get_seconds();
        if (!list_empty(&lock->l_pending_chain))
                return 0;
 
        if (!list_empty(&lock->l_pending_chain))
                return 0;
 
@@ -388,11 +388,12 @@ static int __ldlm_add_waiting_lock(struct ldlm_lock *lock, timeout_t delay)
            OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
                delay = 1;
 
            OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
                delay = 1;
 
-       deadline = now + delay;
+       deadline = lock->l_blast_sent + delay;
        if (likely(deadline > lock->l_callback_timestamp))
                lock->l_callback_timestamp = deadline;
 
        if (likely(deadline > lock->l_callback_timestamp))
                lock->l_callback_timestamp = deadline;
 
-       timeout = clamp_t(timeout_t, lock->l_callback_timestamp - now,
+       timeout = clamp_t(timeout_t,
+                         lock->l_callback_timestamp - lock->l_blast_sent,
                          0, delay);
        timeout_jiffies += cfs_time_seconds(timeout);
 
                          0, delay);
        timeout_jiffies += cfs_time_seconds(timeout);
 
@@ -468,7 +469,6 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock, timeout_t timeout)
        }
 
        ldlm_set_waited(lock);
        }
 
        ldlm_set_waited(lock);
-       lock->l_blast_sent = ktime_get_real_seconds();
        ret = __ldlm_add_waiting_lock(lock, timeout);
        if (ret) {
                /*
        ret = __ldlm_add_waiting_lock(lock, timeout);
        if (ret) {
                /*
@@ -596,7 +596,7 @@ int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, timeout_t timeout)
        __ldlm_add_waiting_lock(lock, timeout);
        spin_unlock_bh(&waiting_locks_spinlock);
 
        __ldlm_add_waiting_lock(lock, timeout);
        spin_unlock_bh(&waiting_locks_spinlock);
 
-       LDLM_DEBUG(lock, "refreshed");
+       LDLM_DEBUG(lock, "refreshed to %ds", timeout);
        return 1;
 }
 EXPORT_SYMBOL(ldlm_refresh_waiting_lock);
        return 1;
 }
 EXPORT_SYMBOL(ldlm_refresh_waiting_lock);
@@ -1752,8 +1752,8 @@ int ldlm_request_cancel(struct ptlrpc_request *req,
                    lock->l_blast_sent != 0) {
                        timeout_t delay = 0;
 
                    lock->l_blast_sent != 0) {
                        timeout_t delay = 0;
 
-                       if (ktime_get_real_seconds() > lock->l_blast_sent)
-                               delay = ktime_get_real_seconds() -
+                       if (ktime_get_seconds() > lock->l_blast_sent)
+                               delay = ktime_get_seconds() -
                                        lock->l_blast_sent;
                        LDLM_DEBUG(lock,
                                   "server cancels blocked lock after %ds",
                                        lock->l_blast_sent;
                        LDLM_DEBUG(lock,
                                   "server cancels blocked lock after %ds",