replay_barrier mds2
$LFS mkdir -i1 -c2 $striped_dir
+ stack_trap fail_abort_cleanup RETURN
fail_abort mds2 abort_recov_mdt
createmany -o $striped_dir/f-%d 20 &&
fail mds2
- # $striped_dir creation partly fails due to abort_recov_mdt,
- # but at least this directory should be able to be deleted
- #$LFS rm_entry $striped_dir
- #rm -rf $DIR/$tdir || error "rmdir failed"
+ # LU-16159 abort_recovery will cancel update logs, the second recovery
+ # won't replay $striped_dir creation
+ (( $MDS1_VERSION >= $(version_code 2.15.52) )) ||
+ striped_dir_check_100 || error "striped dir check failed"
}
run_test 100c "DNE: create striped dir, abort_recov_mdt mds2"
count=$(do_facet $mdt "$LCTL --device $devname llog_print update_log |
grep -c index")
(( count > 0 )) || error "no update logs found"
+ stack_trap fail_abort_cleanup RETURN
fail_abort $mdt || error "fail_abort $mdt failed"
wait_update_facet $mdt "$LCTL --device $devname llog_print update_log |
grep -c index" 0 60 || error "update logs not canceled"
}
done
+ stack_trap fail_abort_cleanup RETURN
fail_abort mds1
for ((i = 0; i < 20; i++)); do
}
run_test 135 "Server failure in lock replay phase"
-# LU-16159 abort_recovery may cause directory unlink fail, now that LFSCK can't
-# fix all the inconsistencies, formatall so it won't fail in cleanup
-(( $MDS1_VERSION >= $(version_code 2.15.52.63) )) && formatall
-
complete $SECONDS
check_and_cleanup_lustre
exit_status
all_mds_up
}
+# LU-16159: abort recovery will cancel update logs, which may leave broken
+# directories in the system, remove name entry if necessary
+fail_abort_cleanup() {
+ rm -rf $DIR/$tdir/*
+ find $DIR/$tdir -depth | while read D; do
+ rmdir "$D" || $LFS rm_entry "$D" || error "rm $D failed"
+ done
+}
+
host_nids_address() {
local nodes=$1
local net=${2:-"."}