Whamcloud - gitweb
LU-13600 ptlrpc: limit rate of lock replays 11/39111/5
authorMikhail Pershin <mpershin@whamcloud.com>
Fri, 12 Jun 2020 14:14:50 +0000 (17:14 +0300)
committerOleg Drokin <green@whamcloud.com>
Sat, 11 Jul 2020 07:29:03 +0000 (07:29 +0000)
Clients send all lock replays at once and that may overwhelm
server with huge amount of replays in recovery queue causing
OOM effects.

Patch adds rate control for lock replays on client.

Patch includes also later fix for signal_completed_replay()
race.

Lustre-change: https://review.whamcloud.com/38920
Lustre-commit: 3b613a442b8698596096b23ce82e157c158a5874

Lustre-change: https://review.whamcloud.com/39140
Lustre-commit: dc654756af63bd30802ebd86074019d1533a4d8f

Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: Ie557f8481c5facb690468d7136cf5feebe4e8f11
Reviewed-on: https://review.whamcloud.com/39111
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/lustre_import.h
lustre/ldlm/ldlm_request.c
lustre/obdclass/genops.c
lustre/ptlrpc/import.c

index 6b436fd..5421de4 100644 (file)
@@ -226,6 +226,9 @@ struct obd_import {
        atomic_t                  imp_unregistering;
        /** Number of replay requests inflight */
        atomic_t                  imp_replay_inflight;
+       /** In-flight replays rate control */
+       wait_queue_head_t         imp_replay_waitq;
+
        /** Number of currently happening import invalidations */
        atomic_t                  imp_inval_count;
        /** Numbner of request timeouts */
index 0a03534..5c43b15 100644 (file)
@@ -2381,6 +2381,8 @@ static int replay_lock_interpret(const struct lu_env *env,
 
        ENTRY;
        atomic_dec(&req->rq_import->imp_replay_inflight);
+       wake_up(&req->rq_import->imp_replay_waitq);
+
        if (rc != ELDLM_OK)
                GOTO(out, rc);
 
@@ -2529,7 +2531,20 @@ static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns)
                           canceled, ldlm_ns_name(ns));
 }
 
-int ldlm_replay_locks(struct obd_import *imp)
+static int lock_can_replay(struct obd_import *imp)
+{
+       struct client_obd *cli = &imp->imp_obd->u.cli;
+
+       CDEBUG(D_HA, "check lock replay limit, inflights = %u(%u)\n",
+              atomic_read(&imp->imp_replay_inflight) - 1,
+              cli->cl_max_rpcs_in_flight);
+
+       /* +1 due to ldlm_lock_replay() increment */
+       return atomic_read(&imp->imp_replay_inflight) <
+              1 + min_t(u32, cli->cl_max_rpcs_in_flight, 8);
+}
+
+int __ldlm_replay_locks(struct obd_import *imp, bool rate_limit)
 {
        struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
        struct list_head list = LIST_HEAD_INIT(list);
@@ -2538,15 +2553,12 @@ int ldlm_replay_locks(struct obd_import *imp)
 
        ENTRY;
 
-       LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
+       LASSERT(atomic_read(&imp->imp_replay_inflight) == 1);
 
        /* don't replay locks if import failed recovery */
        if (imp->imp_vbr_failed)
                RETURN(0);
 
-       /* ensure this doesn't fall to 0 before all have been queued */
-       atomic_inc(&imp->imp_replay_inflight);
-
        if (ldlm_cancel_unused_locks_before_replay)
                ldlm_cancel_unused_locks_for_replay(ns);
 
@@ -2560,9 +2572,56 @@ int ldlm_replay_locks(struct obd_import *imp)
                }
                rc = replay_one_lock(imp, lock);
                LDLM_LOCK_RELEASE(lock);
+
+               if (rate_limit)
+                       wait_event_idle_exclusive(imp->imp_replay_waitq,
+                                                 lock_can_replay(imp));
        }
 
+       RETURN(rc);
+}
+
+/**
+ * Lock replay uses rate control and can sleep waiting so
+ * must be in separate thread from ptlrpcd itself
+ */
+static int ldlm_lock_replay_thread(void *data)
+{
+       struct obd_import *imp = data;
+
+       unshare_fs_struct();
+
+       CDEBUG(D_HA, "lock replay thread %s to %s@%s\n",
+              imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
+              imp->imp_connection->c_remote_uuid.uuid);
+
+       __ldlm_replay_locks(imp, true);
        atomic_dec(&imp->imp_replay_inflight);
+       ptlrpc_import_recovery_state_machine(imp);
+       class_import_put(imp);
 
-       RETURN(rc);
+       return 0;
+}
+
+int ldlm_replay_locks(struct obd_import *imp)
+{
+       struct task_struct *task;
+       int rc = 0;
+
+       class_import_get(imp);
+       /* ensure this doesn't fall to 0 before all have been queued */
+       atomic_inc(&imp->imp_replay_inflight);
+
+       task = kthread_run(ldlm_lock_replay_thread, imp, "ldlm_lock_replay");
+       if (IS_ERR(task)) {
+               rc = PTR_ERR(task);
+               CDEBUG(D_HA, "can't start lock replay thread: rc = %d\n", rc);
+
+               /* run lock replay without rate control */
+               rc = __ldlm_replay_locks(imp, false);
+               atomic_dec(&imp->imp_replay_inflight);
+               class_import_put(imp);
+       }
+
+       return rc;
 }
index 4ab8a77..e36af1c 100644 (file)
@@ -1329,6 +1329,7 @@ struct obd_import *class_new_import(struct obd_device *obd)
        atomic_set(&imp->imp_reqs, 0);
        atomic_set(&imp->imp_inflight, 0);
        atomic_set(&imp->imp_replay_inflight, 0);
+       init_waitqueue_head(&imp->imp_replay_waitq);
        atomic_set(&imp->imp_inval_count, 0);
        INIT_LIST_HEAD(&imp->imp_conn_list);
        init_imp_at(&imp->imp_at);
index 742b587..9c2318e 100644 (file)
@@ -1430,8 +1430,8 @@ static int signal_completed_replay(struct obd_import *imp)
        if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_FINISH_REPLAY)))
                RETURN(0);
 
-       LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
-       atomic_inc(&imp->imp_replay_inflight);
+       if (!atomic_add_unless(&imp->imp_replay_inflight, 1, 1))
+               RETURN(0);
 
        req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, LUSTRE_OBD_VERSION,
                                        OBD_PING);
@@ -1512,6 +1512,8 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
 
         ENTRY;
         if (imp->imp_state == LUSTRE_IMP_EVICTED) {
+               struct task_struct *task;
+
                 deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
                           &target_start, &target_len);
                 /* Don't care about MGC eviction */
@@ -1532,24 +1534,22 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                imp->imp_vbr_failed = 0;
                spin_unlock(&imp->imp_lock);
 
-               {
-               struct task_struct *task;
                /* bug 17802:  XXX client_disconnect_export vs connect request
                 * race. if client is evicted at this time then we start
                 * invalidate thread without reference to import and import can
                 * be freed at same time. */
                class_import_get(imp);
                task = kthread_run(ptlrpc_invalidate_import_thread, imp,
-                                    "ll_imp_inval");
+                                  "ll_imp_inval");
                if (IS_ERR(task)) {
                        class_import_put(imp);
-                       CERROR("error starting invalidate thread: %d\n", rc);
                        rc = PTR_ERR(task);
+                       CERROR("%s: can't start invalidate thread: rc = %d\n",
+                              imp->imp_obd->obd_name, rc);
                } else {
                        rc = 0;
                }
                RETURN(rc);
-               }
         }
 
        if (imp->imp_state == LUSTRE_IMP_REPLAY) {