Whamcloud - gitweb
LU-14708 ptlrpc: skip unnecessary client eviction 34/43834/15
authorVladimir Saveliev <vladimir.saveliev@hpe.com>
Mon, 11 Sep 2023 19:32:00 +0000 (22:32 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 25 Oct 2023 18:04:21 +0000 (18:04 +0000)
A server does not update last_rcvd file on connection of new clients
synchronously. If the server fails over before the last_rcvd update is
committed, recently connected client may find itself evicted
unexpectedly.

If a client has not cached any data from a server and has not
performed any modifying rpcs to the server - let the client to connect
as a new one instead of considering itself as evicted.

Test to illustrate the issue is included.

Fixes: dcc8b9c00d5 "LU-9679 ptlrpc: list_for_each improvements"
Change-Id: I0c2d9c3b67cbc69c3283422f1f581b42f7f13a1a
HPE-bug-id: LUS-7141
Signed-off-by: Vladimir Saveliev <vladimir.saveliev@hpe.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/43834
Tested-by: Maloo <maloo@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mikhail Pershin <mpershin@whamcloud.com>
lustre/include/lustre_import.h
lustre/ptlrpc/client.c
lustre/ptlrpc/import.c
lustre/tests/recovery-small.sh

index 71b9d14..a4df13e 100644 (file)
@@ -328,7 +328,8 @@ struct obd_import {
                                  /* grant shrink disabled */
                                  imp_grant_shrink_disabled:1,
                                  /* to supress LCONSOLE() at conn.restore */
-                                 imp_was_idle:1;
+                                 imp_was_idle:1,
+                                 imp_no_cached_data:1;
        u32                       imp_connect_op;
        u32                       imp_idle_timeout;
        u32                       imp_idle_debug;
index d0f058e..57e012c 100644 (file)
@@ -1563,6 +1563,10 @@ static int after_reply(struct ptlrpc_request *req)
                lustre_msg_set_transno(req->rq_reqmsg, req->rq_transno);
        }
 
+       if (lustre_msg_get_transno(req->rq_repmsg) ||
+           lustre_msg_get_opc(req->rq_reqmsg) == LDLM_ENQUEUE)
+               imp->imp_no_cached_data = 0;
+
        if (imp->imp_replayable) {
                /* if other threads are waiting for ptlrpc_free_committed()
                 * they could continue the work of freeing RPCs. That reduces
index f327708..1145862 100644 (file)
@@ -640,7 +640,7 @@ static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno)
                return 1;
        }
        if (!list_empty(&imp->imp_replay_list)) {
-               req = list_first_entry(&imp->imp_committed_list,
+               req = list_first_entry(&imp->imp_replay_list,
                                       struct ptlrpc_request, rq_replay_list);
                *transno = req->rq_transno;
                if (req->rq_transno == 0) {
@@ -1168,6 +1168,8 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
                imp->imp_remote_handle =
                        *lustre_msg_get_handle(request->rq_repmsg);
 
+               imp->imp_no_cached_data = 1;
+
                /* Initial connects are allowed for clients with non-random
                 * uuids when servers are in recovery.  Simply signal the
                 * servers replay is complete and wait in REPLAY_WAIT.
@@ -1282,12 +1284,16 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
                        *lustre_msg_get_handle(request->rq_repmsg);
                import_set_state(imp, LUSTRE_IMP_RECOVER);
        } else {
-               DEBUG_REQ(D_HA, request,
-                         "%s: evicting (reconnect/recover flags not set: %x)",
-                         imp->imp_obd->obd_name, msg_flags);
                imp->imp_remote_handle =
                        *lustre_msg_get_handle(request->rq_repmsg);
-               import_set_state(imp, LUSTRE_IMP_EVICTED);
+               if (!imp->imp_no_cached_data) {
+                       DEBUG_REQ(D_HA, request,
+                                 "%s: evicting (reconnect/recover flags not set: %x)",
+                                 imp->imp_obd->obd_name, msg_flags);
+                       import_set_state(imp, LUSTRE_IMP_EVICTED);
+               } else {
+                       ptlrpc_activate_import(imp, true);
+               }
        }
 
        /* Sanity checks for a reconnected import. */
index edfa244..003f549 100755 (executable)
@@ -3466,6 +3466,27 @@ test_154() {
 }
 run_test 154 "corruption update llog can be skipped"
 
+test_155() {
+       local lsoutput1
+       local lsoutput2
+
+       touch $DIR/$tfile
+       lsoutput1=$(ls -l $DIR)
+
+       zconf_umount $HOSTNAME $MOUNT || error "umount failed"
+       # make sure that last_rcvd update is committed
+       do_facet mds1 sync
+       zconf_mount $HOSTNAME $MOUNT || error "mount failed"
+
+       replay_barrier_nosync mds1
+
+       fail_nodf mds1
+
+       lsoutput2=$(ls -l $DIR) || error "ls failed"
+       [[ $lsoutput1 == $lsoutput2 ]] || error "$lsoutput1 != $lsoutput2"
+}
+run_test 155 "failover after client remount"
+
 complete_test $SECONDS
 check_and_cleanup_lustre
 exit_status