Whamcloud - gitweb
LU-17613 tests: explicit check for eviction with dmesg parse 99/54299/7
authorVladimir Saveliev <vladimir.saveliev@hpe.com>
Tue, 30 Jul 2024 15:21:43 +0000 (18:21 +0300)
committerOleg Drokin <green@whamcloud.com>
Fri, 23 Aug 2024 21:57:31 +0000 (21:57 +0000)
client_evicted() used to check for client eviction based on result of
lfs df. When it returned any error but EOPNOTSUPP - that was taken as
"client was evicted".

When glibc's realpath() changed to not call stat()
(see for ref
  stdlib: Sync canonicalize with gnulib [BZ #10635] [BZ #26592] [BZ
  ..
  - Realpath mishandles EOVERFLOW; stat not needed anyway (BZ#24970).
)
'lfs df' started to return EOPNOTSUPP from lfs_df(). client_evicted()
was changed, now any non-zero return is taken as client was evicted.

Check for "This client was evicted" in dmesg output to make sure that
eviction happened.

Add a comment in ptlrpc_import_recovery_state_machine() to make it
clear that this specific error message is used by the test code. Avoid
ratelimiting for the message.

Fixes: a5a9ded43b ("LU-16916 tests: fix client_evicted() not to ignore EOPNOTSUPP")
Test-Parameters: trivial testlist=replay-vbr,recovery-small
HPE-bug-id: LUS-11742
Signed-off-by: Vladimir Saveliev <vladimir.saveliev@hpe.com>
Change-Id: I10ef99d23d630164bfdf167e54e2f177e9b85598
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54299
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Elena <elena.gryaznova@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ptlrpc/import.c
lustre/tests/replay-dual.sh
lustre/tests/test-framework.sh

index 47ed2e3..4d172f9 100644 (file)
@@ -1615,9 +1615,9 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                if (strcmp(imp->imp_obd->obd_type->typ_name,
                           LUSTRE_MGC_NAME) != 0 &&
                    (connect_flags & OBD_CONNECT_LIGHTWEIGHT) == 0) {
-                       LCONSOLE_ERROR("%s: This client was evicted by %.*s; in progress operations using this service will fail.\n",
-                                      imp->imp_obd->obd_name, target_len,
-                                      target_start);
+                       /* below message checked in test-framework client_evicted() */
+                       LCONSOLE(D_ERROR, "%s: This client was evicted by %.*s; in progress operations using this service will fail.\n",
+                                imp->imp_obd->obd_name, target_len, target_start);
                        LASSERTF(!obd_lbug_on_eviction, "LBUG upon eviction\n");
                }
                CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
index d9deca2..072b1d5 100755 (executable)
@@ -1273,8 +1273,7 @@ test_32() {
 
        $LFS df $DIR
 
-       local testid=$(echo $TESTNAME | tr '_' ' ')
-       dmesg | tac | sed "/$testid/,$ d" | grep "This client was evicted" &&
+       client_evicted $CLIENT1 &&
                error "client got evicted due to aborted recovery"
        return 0
 }
index 8946e67..c12c9ec 100755 (executable)
@@ -4053,9 +4053,17 @@ client_up() {
        lfs_df_check $1
 }
 
+# usage: client_evicted client [evictor, mds1 by default]
+# return true if \a client was evicted by \a evictor in current test
 client_evicted() {
-       sleep 1
-       ! _lfs_df_check $1
+       local testid=$(echo $TESTNAME | tr '_' ' ')
+       local client=$1
+       local facet=${2:-mds1}
+       local dev=$(facet_svc $facet)
+
+       client_up $client
+       $PDSH $client "dmesg | tac | sed \"/$testid/,$ d\"" |
+               grep -q "client was evicted by ${dev}"
 }
 
 client_reconnect_try() {