Whamcloud - gitweb
LU-6655 ptlrpc: skip delayed replay requests
[fs/lustre-release.git] / lustre / tests / replay-single.sh
index f159f93..ada7584 100755 (executable)
@@ -29,8 +29,8 @@ ALWAYS_EXCEPT="$REPLAY_SINGLE_EXCEPT "
 [ "$SLOW" = "no" ] && EXCEPT_SLOW="44b"
 
 [ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
-# bug number for skipped test:         LU-5761
-       ALWAYS_EXCEPT="$ALWAYS_EXCEPT   89"
+# bug number for skipped test:
+       ALWAYS_EXCEPT="$ALWAYS_EXCEPT "
 
 build_test_filter
 
@@ -499,7 +499,7 @@ test_20b() { # bug 10480
                (( $beforeused + $extra >= $afterused )) && break
                n_attempts=$((n_attempts + 1))
                [ $n_attempts -gt 3 ] &&
-                       error "after $afterused > before $beforeused"
+                       error "after $afterused > before $beforeused + $extra"
 
                wait_zfs_commit $SINGLEMDS 5
                sync_all_data
@@ -1912,7 +1912,7 @@ test_66b() #bug 3055
        $LCTL set_param fail_val=$(($ORIG + 5))
        #define OBD_FAIL_PTLRPC_PAUSE_REP      0x50c
        $LCTL set_param fail_loc=0x50c
-       ls $DIR/$tfile > /dev/null 2>&1
+       touch $DIR/$tfile > /dev/null 2>&1
        $LCTL set_param fail_loc=0
        CUR=$(lctl get_param -n mdc.${FSNAME}-MDT0000*.timeouts |
                awk '/network/ {print $4}')
@@ -3297,11 +3297,14 @@ test_89() {
        cancel_lru_locks osc
        mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
        rm -f $DIR/$tdir/$tfile
-       wait_mds_ost_sync
-       wait_delete_completed
-       BLOCKS1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+       wait_mds_ost_sync || error "initial MDS-OST sync timed out"
+       wait_delete_completed || error "initial wait delete timed out"
+       local blocks1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+       local write_size=$(fs_log_size)
+
        $SETSTRIPE -i 0 -c 1 $DIR/$tdir/$tfile
-       dd if=/dev/zero bs=1M count=10 of=$DIR/$tdir/$tfile
+       [ $write_size -lt 1024 ] && write_size=1024
+       dd if=/dev/zero bs=${write_size}k count=10 of=$DIR/$tdir/$tfile
        sync
        stop ost1
        facet_failover $SINGLEMDS
@@ -3310,11 +3313,12 @@ test_89() {
        mount_facet ost1
        zconf_mount $(hostname) $MOUNT || error "mount fails"
        client_up || error "client_up failed"
-       wait_mds_ost_sync
-       wait_delete_completed
-       BLOCKS2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
-       [ $((BLOCKS2 - BLOCKS1)) -le 4  ] ||
-               error $((BLOCKS2 - BLOCKS1)) blocks leaked
+       wait_mds_ost_sync || error "MDS-OST sync timed out"
+       wait_delete_completed || error "wait delete timed out"
+       local blocks2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+
+       [ $((blocks2 - blocks1)) -le $(fs_log_size)  ] ||
+               error $((blocks2 - blocks1)) blocks leaked
 }
 run_test 89 "no disk space leak on late ost connection"
 
@@ -4644,6 +4648,43 @@ test_120() {
 }
 run_test 120 "DNE fail abort should stop both normal and DNE replay"
 
+test_121() {
+       [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.90) ] &&
+               skip "Don't support it before 2.11" &&
+               return 0
+
+       local at_max_saved=$(at_max_get mds)
+
+       touch $DIR/$tfile || error "touch $DIR/$tfile failed"
+       cancel_lru_locks mdc
+
+       multiop_bg_pause $DIR/$tfile s_s || error "multiop $DIR/$tfile failed"
+       mpid=$!
+
+       lctl set_param -n ldlm.cancel_unused_locks_before_replay "0"
+
+       stop mds1
+       change_active mds1
+       wait_for_facet mds1
+
+       #define OBD_FAIL_TGT_RECOVERY_REQ_RACE  0x721
+       do_facet $SINGLEMDS "lctl set_param fail_loc=0x721 fail_val=0"
+       at_max_set 0 mds
+
+       mount_facet mds1
+       wait_clients_import_state "$clients" mds1 FULL
+       clients_up || clients_up || error "failover df: $?"
+
+       kill -USR1 $mpid
+       wait $mpid || error "multiop_bg_pause pid failed"
+
+       do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+       lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+       at_max_set $at_max_saved mds
+       rm -f $DIR/$tfile
+}
+run_test 121 "lock replay timed out and race"
+
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status