X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-lfsck.sh;h=89b42744e7449b9d749f76e40c98b45d856725c9;hp=9290efdfa7a70813f8fb9b6242f23b885a019af0;hb=9bb5a2fd3e76b460fd5121d48bc492be27a2e4f5;hpb=be0c22a64ae1675d4995ab3ae6da75fbd04f9426 diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 9290efd..89b4274 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -80,6 +80,7 @@ SHOW_LAYOUT_ON_OST="do_facet ost1 \ $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout" MOUNT_OPTS_SCRUB="-o user_xattr" MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub" +MOUNT_OPTS_SKIP_LFSCK="-o user_xattr,skip_lfsck" lfsck_prep() { local ndirs=$1 @@ -128,6 +129,33 @@ run_e2fsck_on_mdt0() { error "(3) Fail to start MDT0" } +wait_all_targets_blocked() { + local com=$1 + local status=$2 + local err=$3 + + local count=$(do_facet mds1 \ + "$LCTL lfsck_query -t $com -M ${FSNAME}-MDT0000 -w | + awk '/^${com}_mdts_${status}/ { print \\\$2 }'") + [[ $count -eq $MDSCOUNT ]] || { + do_facet mds1 "$LCTL lfsck_query -t $com -M ${FSNAME}-MDT0000" + error "($err) only $count of $MDSCOUNT MDTs are in ${status}" + } +} + +wait_all_targets() { + local com=$1 + local status=$2 + local err=$3 + + wait_update_facet mds1 "$LCTL lfsck_query -t $com -M ${FSNAME}-MDT0000 | + awk '/^${com}_mdts_${status}/ { print \\\$2 }'" \ + "$MDSCOUNT" $LTIME || { + do_facet mds1 "$LCTL lfsck_query -t $com -M ${FSNAME}-MDT0000" + error "($err) some MDTs are not in ${status}" + } +} + test_0() { lfsck_prep 3 3 @@ -439,12 +467,8 @@ test_2e() do_facet $SINGLEMDS $LCTL set_param fail_loc=0 $START_NAMESPACE -r -A || error "(3) Fail to start LFSCK for namespace!" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(4) unexpected status" - } + + wait_all_targets_blocked namespace completed 4 local repaired=$($SHOW_NAMESPACE | awk '/^linkea_repaired/ { print $2 }') @@ -940,6 +964,30 @@ test_8() [ "$STATUS" == "paused" ] || error "(20) Expect 'paused', but got '$STATUS'" + echo "stop $SINGLEMDS" + stop $SINGLEMDS > /dev/null || error "(20.1) Fail to stop MDS!" + + echo "start $SINGLEMDS without resume LFSCK" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SKIP_LFSCK > /dev/null || + error "(20.2) Fail to start MDS!" + + timer=0 + while [ $timer -lt $timeout ]; do + STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \ + mdt.${MDT_DEV}.recovery_status | + awk '/^status/ { print \\\$2 }'") + [ "$STATUS" != "RECOVERING" ] && break; + sleep 1 + timer=$((timer + 1)) + done + + [ $timer != $timeout ] || + error "(20.3) recovery timeout" + + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "paused" ] || + error "(20.4) Expect 'paused', but got '$STATUS'" + #define OBD_FAIL_LFSCK_DELAY3 0x1602 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602 @@ -1350,38 +1398,21 @@ test_12() { -s 1 -r || error "(2) Fail to start LFSCK on all devices!" echo "All the LFSCK targets should be in 'scanning-phase1' status." - for k in $(seq $MDSCOUNT); do - local STATUS=$(do_facet mds${k} $LCTL get_param -n \ - mdd.$(facet_svc mds${k}).lfsck_namespace | - awk '/^status/ { print $2 }') - [ "$STATUS" == "scanning-phase1" ] || - error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'" - done + wait_all_targets namespace scanning-phase1 3 echo "Stop namespace LFSCK on all targets by single lctl command." do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A || error "(4) Fail to stop LFSCK on all devices!" echo "All the LFSCK targets should be in 'stopped' status." - for k in $(seq $MDSCOUNT); do - local STATUS=$(do_facet mds${k} $LCTL get_param -n \ - mdd.$(facet_svc mds${k}).lfsck_namespace | - awk '/^status/ { print $2 }') - [ "$STATUS" == "stopped" ] || - error "(5) MDS${k} Expect 'stopped', but got '$STATUS'" - done + wait_all_targets_blocked namespace stopped 5 echo "Re-start namespace LFSCK on all targets by single command (-s 0)." do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \ -s 0 -r || error "(6) Fail to start LFSCK on all devices!" echo "All the LFSCK targets should be in 'completed' status." - for k in $(seq $MDSCOUNT); do - wait_update_facet mds${k} "$LCTL get_param -n \ - mdd.$(facet_svc mds${k}).lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 8 || - error "(7) MDS${k} is not the expected 'completed'" - done + wait_all_targets_blocked namespace completed 7 start_full_debug_logging @@ -1390,26 +1421,14 @@ test_12() { -s 1 -r || error "(8) Fail to start LFSCK on all devices!" echo "All the LFSCK targets should be in 'scanning-phase1' status." - for k in $(seq $MDSCOUNT); do - local STATUS=$(do_facet mds${k} $LCTL get_param -n \ - mdd.$(facet_svc mds${k}).lfsck_layout | - awk '/^status/ { print $2 }') - [ "$STATUS" == "scanning-phase1" ] || - error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'" - done + wait_all_targets layout scanning-phase1 9 echo "Stop layout LFSCK on all targets by single lctl command." do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A || error "(10) Fail to stop LFSCK on all devices!" echo "All the LFSCK targets should be in 'stopped' status." - for k in $(seq $MDSCOUNT); do - local STATUS=$(do_facet mds${k} $LCTL get_param -n \ - mdd.$(facet_svc mds${k}).lfsck_layout | - awk '/^status/ { print $2 }') - [ "$STATUS" == "stopped" ] || - error "(11) MDS${k} Expect 'stopped', but got '$STATUS'" - done + wait_all_targets_blocked layout stopped 11 for k in $(seq $OSTCOUNT); do local STATUS=$(do_facet ost${k} $LCTL get_param -n \ @@ -1424,15 +1443,7 @@ test_12() { -s 0 -r || error "(13) Fail to start LFSCK on all devices!" echo "All the LFSCK targets should be in 'completed' status." - for k in $(seq $MDSCOUNT); do - # The LFSCK status query internal is 30 seconds. For the case - # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough - # time to guarantee the status sync up. - wait_update_facet mds${k} "$LCTL get_param -n \ - mdd.$(facet_svc mds${k}).lfsck_layout | - awk '/^status/ { print \\\$2 }'" "completed" 32 || - error "(14) MDS${k} is not the expected 'completed'" - done + wait_all_targets_blocked layout completed 14 stop_full_debug_logging } @@ -1656,15 +1667,7 @@ test_15c() { echo "Trigger layout LFSCK to race with the migration" $START_LAYOUT -A -r || error "(1) Fail to start layout LFSCK!" - for k in $(seq $MDSCOUNT); do - # The LFSCK status query internal is 30 seconds. For the case - # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough - # time to guarantee the status sync up. - wait_update_facet mds${k} "$LCTL get_param -n \ - mdd.$(facet_svc mds${k}).lfsck_layout | - awk '/^status/ { print \\\$2 }'" "completed" $LTIME || - error "(2) MDS${k} is not the expected 'completed'" - done + wait_all_targets_blocked layout completed 2 do_facet mds2 $LCTL set_param fail_loc=0 fail_val=0 local repaired=$($SHOW_LAYOUT | @@ -2909,12 +2912,7 @@ test_22a() { $START_NAMESPACE -A -r || error "(5) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(6) unexpected status" - } + wait_all_targets_blocked namespace completed 6 local repaired=$($SHOW_NAMESPACE | awk '/^unmatched_pairs_repaired/ { print $2 }') @@ -2963,12 +2961,7 @@ test_22b() { $START_NAMESPACE -A -r || error "(5) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(6) unexpected status" - } + wait_all_targets_blocked namespace completed 6 local repaired=$($SHOW_NAMESPACE | awk '/^unmatched_pairs_repaired/ { print $2 }') @@ -3010,12 +3003,7 @@ test_23a() { $START_NAMESPACE -A -r || error "(5) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(6) unexpected status" - } + wait_all_targets_blocked namespace completed 6 local repaired=$($SHOW_NAMESPACE | awk '/^dangling_repaired/ { print $2 }') @@ -3029,12 +3017,7 @@ test_23a() { $START_NAMESPACE -A -r -C || error "(9) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(10) unexpected status" - } + wait_all_targets_blocked namespace completed 10 repaired=$($SHOW_NAMESPACE | awk '/^dangling_repaired/ { print $2 }') @@ -3060,15 +3043,43 @@ test_23b() { check_mount_and_prep + [[ -d $MOUNT/.lustre/lost+found/MDT0000 ]] || { + # Trigger LFSCK firstly, that will generate the + # .lustre/lost+found/MDTxxxx in advance to avoid + # reusing the local object for the dangling name + # entry. LU-7429 + $START_NAMESPACE -r || + error "(0) Fail to start LFSCK for namespace" + + wait_all_targets_blocked namespace completed 0.1 + } + $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0" + $LFS path2fid $DIR/$tdir/d0 + echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0" + $LFS path2fid $DIR/$tdir/d0/f0 + echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0" + $LFS path2fid $DIR/$tdir/d0/f1 + + local OID=$($LFS path2fid $DIR/$tdir/d0/f1 | awk -F':' '{print $2}') + OID=$(printf %d $OID) + + if [ $OID -eq 1 ]; then + # To guarantee that the f0 and f1 are in the same FID seq + rm -f $DIR/$tdir/d0/f0 || + error "(3.1) Fail to unlink $DIR/$tdir/d0/f0" + echo "dummy" > $DIR/$tdir/d0/f0 || + error "(3.2) Fail to touch on MDT0" + $LFS path2fid $DIR/$tdir/d0/f0 + fi echo "Inject failure stub on MDT0 to simulate dangling name entry" #define OBD_FAIL_LFSCK_DANGLING3 0x1621 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1621 + do_facet $SINGLEMDS $LCTL set_param fail_val=$OID fail_loc=0x1621 ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0 rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1" @@ -3117,17 +3128,47 @@ test_23c() { echo "LFSCK cannot replace it." echo "#####" + start_full_debug_logging + check_mount_and_prep + [[ -d $MOUNT/.lustre/lost+found/MDT0000 ]] || { + # Trigger LFSCK firstly, that will generate the + # .lustre/lost+found/MDTxxxx in advance to avoid + # reusing the local object for the dangling name + # entry. LU-7429 + $START_NAMESPACE -r || + error "(0) Fail to start LFSCK for namespace" + + wait_all_targets_blocked namespace completed 0.1 + } + $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0" + $LFS path2fid $DIR/$tdir/d0 + echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0" + $LFS path2fid $DIR/$tdir/d0/f0 + echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0" + $LFS path2fid $DIR/$tdir/d0/f1 + + local OID=$($LFS path2fid $DIR/$tdir/d0/f1 | awk -F':' '{print $2}') + OID=$(printf %d $OID) + + if [ $OID -eq 1 ]; then + # To guarantee that the f0 and f1 are in the same FID seq + rm -f $DIR/$tdir/d0/f0 || + error "(3.1) Fail to unlink $DIR/$tdir/d0/f0" + echo "dummy" > $DIR/$tdir/d0/f0 || + error "(3.2) Fail to touch on MDT0" + $LFS path2fid $DIR/$tdir/d0/f0 + fi echo "Inject failure stub on MDT0 to simulate dangling name entry" #define OBD_FAIL_LFSCK_DANGLING3 0x1621 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1621 + do_facet $SINGLEMDS $LCTL set_param fail_val=$OID fail_loc=0x1621 ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0 rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1" @@ -3143,7 +3184,7 @@ test_23c() { error "(7) Fail to start LFSCK for namespace" wait_update_facet client "stat $DIR/$tdir/d0/foo | - awk '/Size/ { print \\\$2 }'" "0" 32 || { + awk '/Size/ { print \\\$2 }'" "0" $LTIME || { stat $DIR/$tdir/guard $SHOW_NAMESPACE error "(8) unexpected size" @@ -3160,6 +3201,8 @@ test_23c() { error "(10) unexpected status" } + stop_full_debug_logging + local repaired=$($SHOW_NAMESPACE | awk '/^dangling_repaired/ { print $2 }') [ $repaired -eq 1 ] || @@ -3230,12 +3273,7 @@ test_24() { $START_NAMESPACE -A -r || error "(7) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(8) unexpected status" - } + wait_all_targets_blocked namespace completed 8 local repaired=$($SHOW_NAMESPACE | awk '/^multiple_referenced_repaired/ { print $2 }') @@ -3318,18 +3356,14 @@ test_26a() { rm -f $DIR/$tdir/d0/foo || error "(4) Fail to unlink $DIR/$tdir/d0/foo" do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && "(5) 'ls' should fail" + ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && + error "(5) 'ls' should fail" echo "Trigger namespace LFSCK to repair the missing remote name entry" $START_NAMESPACE -r -A || error "(6) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(7) unexpected status" - } + wait_all_targets_blocked namespace completed 7 local repaired=$($SHOW_NAMESPACE | awk '/^lost_dirent_repaired/ { print $2 }') @@ -3370,18 +3404,14 @@ test_26b() { rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo" do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && "(4) 'ls' should fail" + ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && + error "(4) 'ls' should fail" echo "Trigger namespace LFSCK to repair the missing remote name entry" $START_NAMESPACE -r -A || error "(5) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(6) unexpected status" - } + wait_all_targets_blocked namespace completed 6 local repaired=$($SHOW_NAMESPACE | awk '/^lost_dirent_repaired/ { print $2 }') @@ -3424,18 +3454,13 @@ test_27a() { do_facet $SINGLEMDS $LCTL set_param fail_loc=0 rm -rf $DIR/$tdir/d0 || error "(5) Fail to unlink the dir d0" - ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && "(6) 'ls' should fail" + ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && error "(6) 'ls' should fail" echo "Trigger namespace LFSCK to repair the lost parent" $START_NAMESPACE -r -A || error "(6) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(7) unexpected status" - } + wait_all_targets_blocked namespace completed 7 local repaired=$($SHOW_NAMESPACE | awk '/^lost_dirent_repaired/ { print $2 }') @@ -3481,18 +3506,13 @@ test_27b() { do_facet $SINGLEMDS $LCTL set_param fail_loc=0 rmdir $DIR/$tdir/d0 || error "(4) Fail to unlink the dir d0" - ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && "(5) 'ls' should fail" + ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && error "(5) 'ls' should fail" echo "Trigger namespace LFSCK to repair the missing remote name entry" $START_NAMESPACE -r -A || error "(6) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(7) unexpected status" - } + wait_all_targets_blocked namespace completed 7 local repaired=$($SHOW_NAMESPACE | awk '/^lost_dirent_repaired/ { print $2 }') @@ -3592,15 +3612,7 @@ test_28() { $START_NAMESPACE -r -A || error "(8) Fail to start LFSCK for namespace" - for k in $(seq $MDSCOUNT); do - # The LFSCK status query internal is 30 seconds. For the case - # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough - # time to guarantee the status sync up. - wait_update_facet mds${k} "$LCTL get_param -n \ - mdd.$(facet_svc mds${k}).lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || - error "(9) MDS${k} is not the expected 'completed'" - done + wait_all_targets_blocked namespace completed 9 local repaired=$(do_facet mds1 $LCTL get_param -n \ mdd.$(facet_svc mds1).lfsck_namespace | @@ -3645,12 +3657,7 @@ test_29a() { $START_NAMESPACE -r -A || error "(5) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(6) unexpected status" - } + wait_all_targets_blocked namespace completed 6 local repaired=$($SHOW_NAMESPACE | awk '/^nlinks_repaired/ { print $2 }') @@ -3692,12 +3699,7 @@ test_29b() { $START_NAMESPACE -r -A || error "(5) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(6) unexpected status" - } + wait_all_targets_blocked namespace completed 6 local repaired=$($SHOW_NAMESPACE | awk '/^nlinks_repaired/ { print $2 }') @@ -3741,18 +3743,13 @@ test_29c() { local foofid=$($LFS path2fid $DIR/$tdir/d0/foo) $LFS fid2path $DIR $foofid local count2=$($LFS fid2path $DIR $foofid | wc -l) - [ $count2 -eq 2 ] || "(6) Fail to inject error: $count2" + [ $count2 -eq 2 ] || error "(6) Fail to inject error: $count2" echo "Trigger namespace LFSCK to repair the nlink count" $START_NAMESPACE -r -A || error "(7) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(8) unexpected status" - } + wait_all_targets_blocked namespace completed 8 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 local repaired=$($SHOW_NAMESPACE | @@ -3826,12 +3823,7 @@ test_30() { $START_NAMESPACE -r -A || error "(14) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(15) unexpected status" - } + wait_all_targets_blocked namespace completed 15 local repaired=$($SHOW_NAMESPACE | awk '/^local_lost_found_moved/ { print $2 }') @@ -3840,7 +3832,7 @@ test_30() { mount_client $MOUNT || error "(17) Fail to start client!" - stat $DIR/$tdir/foo/f0 || "(18) f0 is not recovered" + stat $DIR/$tdir/foo/f0 || error "(18) f0 is not recovered" ls -ail $MOUNT/.lustre/lost+found/ @@ -3887,12 +3879,7 @@ test_31a() { $START_NAMESPACE -r -A || error "(3) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(4) unexpected status" - } + wait_all_targets_blocked namespace completed 4 local repaired=$($SHOW_NAMESPACE | awk '/^name_hash_repaired/ { print $2 }') @@ -3943,10 +3930,7 @@ test_31b() { $START_NAMESPACE -r -A || error "(3) Fail to start LFSCK for namespace" - wait_update_facet mds2 "$LCTL get_param -n \ - mdd.$(facet_svc mds2).lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || - error "(4) unexpected status" + wait_all_targets_blocked namespace completed 4 local repaired=$(do_facet mds2 $LCTL get_param -n \ mdd.$(facet_svc mds2).lfsck_namespace | @@ -3995,12 +3979,7 @@ test_31c() { $START_NAMESPACE -r -A || error "(2) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(3) unexpected status" - } + wait_all_targets_blocked namespace completed 3 local repaired=$($SHOW_NAMESPACE | awk '/^striped_dirs_repaired/ { print $2 }') @@ -4052,12 +4031,7 @@ test_31d() { $START_NAMESPACE -r -A || error "(5) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(6) unexpected status" - } + wait_all_targets_blocked namespace completed 6 local repaired=$($SHOW_NAMESPACE | awk '/^striped_dirs_repaired/ { print $2 }') @@ -4104,12 +4078,7 @@ test_31e() { $START_NAMESPACE -r -A || error "(2) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(3) unexpected status" - } + wait_all_targets_blocked namespace completed 3 local repaired=$($SHOW_NAMESPACE | awk '/^striped_shards_repaired/ { print $2 }') @@ -4134,7 +4103,7 @@ test_31f() { check_mount_and_prep echo "Inject failure stub on MDT0 to simulate the case that the" - echo "slave MDT-object (that resides on differnt MDT as the master" + echo "slave MDT-object (that resides on different MDT as the master" echo "MDT-object resides on) lost the LMV EA." #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a @@ -4147,10 +4116,7 @@ test_31f() { $START_NAMESPACE -r -A || error "(2) Fail to start LFSCK for namespace" - wait_update_facet mds2 "$LCTL get_param -n \ - mdd.$(facet_svc mds2).lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || - error "(3) unexpected status" + wait_all_targets_blocked namespace completed 3 local repaired=$(do_facet mds2 $LCTL get_param -n \ mdd.$(facet_svc mds2).lfsck_namespace | @@ -4188,12 +4154,7 @@ test_31g() { $START_NAMESPACE -r -A || error "(2) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(3) unexpected status" - } + wait_all_targets_blocked namespace completed 3 local repaired=$($SHOW_NAMESPACE | awk '/^striped_shards_repaired/ { print $2 }') @@ -4240,12 +4201,7 @@ test_31h() { $START_NAMESPACE -r -A || error "(2) Fail to start LFSCK for namespace" - wait_update_facet $SINGLEMDS "$LCTL get_param -n \ - mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 32 || { - $SHOW_NAMESPACE - error "(3) unexpected status" - } + wait_all_targets_blocked namespace completed 3 local repaired=$($SHOW_NAMESPACE | awk '/^dirent_repaired/ { print $2 }') @@ -4266,6 +4222,30 @@ test_31h() { } run_test 31h "Repair the corrupted shard's name entry" +test_32() +{ + lfsck_prep 5 5 + umount_client $MOUNT + + #define OBD_FAIL_LFSCK_ASSISTANT_DIRECT 0x162d + do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x162d + $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!" + + local STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase1" ] || + error "(3) Expect 'scanning-phase1', but got '$STATUS'" + + echo "stop ost1" + stop ost1 > /dev/null || error "(4) Fail to stop OST1!" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + sleep 1 + + echo "stop LFSCK" + $STOP_LFSCK || error "(5) Fail to stop LFSCK!" +} +run_test 32 "stop LFSCK when some OST failed" + # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE}