From fab71963c2513ec8f4eff2c1636c767c47a46034 Mon Sep 17 00:00:00 2001 From: Etienne AUJAMES Date: Thu, 14 Jan 2021 17:58:40 +0100 Subject: [PATCH] LU-14027 tests: Fix test_135 of replay-single The test_135 ("Server failure in lock replay phase") of replay-single has an error on the method to get the pid of a background process. Signed-off-by: Etienne AUJAMES Fixes: 7ca495ec67 ("LU-14027 ldlm: Do not hang if recovery restarted during lock replay") Test-Parameters: trivial Test-Parameters: testlist=replay-single env=ONLY=135,ONLY_REPEAT=50 Test-Parameters: testlist=replay-single env=ONLY_REPEAT=20,ONLY=135,FAILURE_MODE=HARD clientcount=4 mdtcount=1 mdscount=2 osscount=2 austeroptions=-R failover=true iscsi=1 testlist=replay-single Change-Id: I6ed41d75f4cbba796e39288bad8895ee1c24459f Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/41227 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Jian Yu Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/tests/replay-single.sh | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 58fd571..9dbc21c 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -4982,6 +4982,9 @@ run_test 134 "replay creation of a file created in a pool" # LU-14027 test_135() { + local PID + local old_replay + # make sure we are using the primary server [[ $(facet_active ost1) == "ost1" ]] || fail ost1 @@ -4990,31 +4993,34 @@ test_135() { # All files to ost1 $LFS setstripe -S $((128 * 1024)) -i 0 $DIR/$tdir + # Init all the clients connections (write lastrcv on the OST) + clients_up replay_barrier ost1 + old_replay=$($LCTL get_param ldlm.cancel_unused_locks_before_replay) + $LCTL set_param ldlm.cancel_unused_locks_before_replay=0 + stack_trap "$LCTL set_param $old_replay" EXIT + # Create 20 files so we have 20 ost locks for i in $(seq 20) ; do - echo blah > $DIR/$tdir/file.${i} + echo blah > $DIR/$tdir/file.${i} & PID+="$! " done + wait $PID - shutdown_facet ost1 - reboot_facet ost1 + stop ost1 change_active ost1 wait_for_facet ost1 - #define OBD_FAIL_TGT_REPLAY_RECONNECT 0x32d + #define OBD_FAIL_LDLM_LOCK_REPLAY 0x32d # Make sure lock replay server side never completes and errors out. do_rpc_nodes $(facet_active_host ost1) \ load_module ../libcfs/libcfs/libcfs - do_facet ost1 "$LCTL set_param fail_val=20" - do_facet ost1 "$LCTL set_param fail_loc=0x32d" - + do_facet ost1 "$LCTL set_param fail_loc=0x32d fail_val=20" mount_facet ost1 # Now make sure we notice - (sync;sync;sync) & - local PID=$? - sleep 20 # should we do something proactive to make reconnects go? + (sync;sync;sync;echo "End of sync") & PID=$! + wait_clients_import_state ${HOSTNAME} ost1 REPLAY_LOCKS kill -0 $PID || error "Unexpected sync success" shutdown_facet ost1 @@ -5029,6 +5035,8 @@ test_135() { unmountoss mountoss clients_up || clients_up || error "$LFS df $MOUNT failed" + + wait $PID || error "Fail to sync" echo blah > $DIR/$tdir/file.test2 rm -rf $DIR/$tdir -- 1.8.3.1