set -e
-# bug 5493 LU2034
-ALWAYS_EXCEPT="52 $RECOVERY_SMALL_EXCEPT"
-
export MULTIOP=${MULTIOP:-multiop}
PTLDEBUG=${PTLDEBUG:--1}
LUSTRE=${LUSTRE:-`dirname $0`/..}
. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
init_logging
+ALWAYS_EXCEPT="$RECOVERY_SMALL_EXCEPT "
+# bug number for skipped test:
+ALWAYS_EXCEPT+=" "
+# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
+
require_dsh_mds || exit 0
# also long tests: 19, 21a, 21e, 21f, 23, 27
run_test 53 "touch: drop rep"
test_54() {
- zconf_mount `hostname` $MOUNT2
- touch $DIR/$tfile
- touch $DIR2/$tfile.1
- sleep 10
- cat $DIR2/$tfile.missing # save transno = 0, rc != 0 into last_rcvd
- fail $SINGLEMDS
- umount $MOUNT2
- ERROR=`dmesg | egrep "(test 54|went back in time)" | tail -n1 | grep "went back in time"`
- [ x"$ERROR" == x ] || error "back in time occured"
+ zconf_mount $(hostname) $MOUNT2
+ touch $DIR/$tfile
+ touch $DIR2/$tfile.1
+ sleep 10
+ cat $DIR2/$tfile.missing # save transno = 0, rc != 0 into last_rcvd
+ fail $SINGLEMDS
+ umount $MOUNT2
+ ERROR=$(dmesg | egrep "(test 54|went back in time)" | tail -n1 |
+ grep "went back in time")
+ [ x"$ERROR" == x ] || error "back in time occured"
}
run_test 54 "back in time"
}
run_test 100 "IR: Make sure normal recovery still works w/o IR"
-test_101()
+test_101a()
{
- do_facet mgs $LCTL list_param mgs.*.ir_timeout ||
- { skip "MGS without IR support"; return 0; }
+ do_facet mgs $LCTL list_param mgs.*.ir_timeout ||
+ skip "MGS without IR support"
- set_ir_status full
+ set_ir_status full
- local OST1_IMP=$(get_osc_import_name client ost1)
+ local ost1_imp=$(get_osc_import_name client ost1)
- # disable pinger recovery
- lctl set_param -n osc.$OST1_IMP.pinger_recov=0
+ # disable pinger recovery
+ lctl set_param -n osc.$ost1_imp.pinger_recov=0
+ stack_trap "$LCTL set_param -n osc.$ost1_imp.pinger_recov=1" EXIT
- fail ost1
+ fail ost1
- target_instance_match ost1 || error "instance mismatch"
- nidtbl_versions_match || error "version must match"
+ target_instance_match ost1 || error "instance mismatch"
+ nidtbl_versions_match || error "version must match"
+}
+run_test 101a "IR: Make sure IR works w/o normal recovery"
+
+test_101b()
+{
+ do_facet mgs $LCTL list_param mgs.*.ir_timeout ||
+ skip "MGS without IR support"
- lctl set_param -n osc.$OST1_IMP.pinger_recov=1
+ set_ir_status full
+
+ local ost1_imp=$(get_osc_import_name client ost1)
+
+#define OBD_FAIL_OST_PREPARE_DELAY 0x247
+ do_facet ost1 "$LCTL set_param fail_loc=0x247"
+ # disable pinger recovery
+ $LCTL set_param -n osc.$ost1_imp.pinger_recov=0
+ stack_trap "$LCTL set_param -n osc.$ost1_imp.pinger_recov=1" EXIT
+
+#OST may return EAGAIN if it is not configured yet
+ fail ost1
}
-run_test 101 "IR: Make sure IR works w/o normal recovery"
+run_test 101b "IR: Make sure IR works w/o normal recovery and proceed EAGAIN"
test_102()
{
# lightweight goes through LUSTRE_IMP_RECOVER during failover
touch -c $DIR2/$tfile || true
$LCTL dk $TMP/lustre-log-$TESTNAME.log
- recovered=`awk '/MDT0000-mdc-[0-9a-f]*: lwp recover/ {
- print;
- }' $TMP/lustre-log-$TESTNAME.log`
+ recovered=$(awk '/MDT0000-mdc-[0-9a-f]*. lwp recover/ { print }' \
+ $TMP/lustre-log-$TESTNAME.log)
[ -z "$recovered" ] && error "lightweight client was not recovered"
# and all operations performed by lightweight client should be
}
run_test 110j "drop update reply during cross-MDT ln"
+test_110k() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTS"
+ [[ $MDS1_VERSION -ge $(version_code 2.12.55) ]] ||
+ { skip "Need MDS version at least 2.12.55"; }
+
+ stop mds2 || error "stop mds2 failed"
+ umount $MOUNT
+
+#define OBD_FAIL_FLD_QUERY_REQ 0x1103
+ do_facet mds2 lctl set_param fail_loc=0x1103
+ start mds2 $(mdsdevname 2) -o abort_recovery ||
+ error "start MDS with abort_recovery should succeed"
+ do_facet mds2 lctl set_param fail_loc=0
+
+ # cleanup
+ stop mds2 || error "cleanup: stop mds2 failed"
+ start mds2 $(mdsdevname 2) || error "cleanup: start mds2 failed"
+ zconf_mount $(hostname) $MOUNT || error "cleanup: mount failed"
+ client_up || error "post-failover df failed"
+}
+run_test 110k "FID_QUERY failed during recovery"
+
# LU-2844 mdt prepare fail should not cause umount oops
test_111 ()
{
}
run_test 135 "DOM: open/create resend to return size"
+test_136() {
+ remote_mds_nodsh && skip "remote MDS with nodsh" && return
+ [[ $MDS1_VERSION -ge $(version_code 2.12.52) ]] ||
+ skip "Need MDS version at least 2.12.52"
+
+ local mdts=$(comma_list $(mdts_nodes))
+ local MDT0=$(facet_svc $SINGLEMDS)
+
+ local clog=$(do_facet mds1 $LCTL --device $MDT0 changelog_register -n)
+ [ -n "$clog" ] || error "changelog_register failed"
+ cl_mask=$(do_facet mds1 $LCTL get_param \
+ mdd.$MDT0.changelog_mask -n)
+ changelog_chmask "ALL"
+
+ # generate some changelog records to accumulate
+ test_mkdir -i 0 -c 0 $DIR/$tdir || error "mkdir $tdir failed"
+ createmany -m $DIR/$tdir/$tfile 10000 ||
+ error "create $DIR/$tdir/$tfile failed"
+
+ local size1=$(do_facet $SINGLEMDS \
+ $LCTL get_param -n mdd.$MDT0.changelog_size)
+ echo "Changelog size $size1"
+
+ #define OBD_FAIL_LLOG_PURGE_DELAY 0x1318
+ do_nodes $mdts $LCTL set_param fail_loc=0x1318 fail_val=30
+
+ # launch changelog_deregister in background on MDS
+ do_facet mds1 "nohup $LCTL --device $MDT0 changelog_deregister $clog \
+ > foo.out 2> foo.err < /dev/null &"
+ # give time to reach fail_loc
+ sleep 15
+
+ # fail_loc will make MDS sleep in the middle of changelog_deregister
+ # take this opportunity to abruptly kill MDS
+ FAILURE_MODE_save=$FAILURE_MODE
+ FAILURE_MODE=HARD
+ fail mds1
+ FAILURE_MODE=$FAILURE_MODE_save
+
+ do_nodes $mdts $LCTL set_param fail_loc=0x0 fail_val=0
+
+ local size2=$(do_facet $SINGLEMDS \
+ $LCTL get_param -n mdd.$MDT0.changelog_size)
+ echo "Changelog size $size2"
+ local clog2=$(do_facet $SINGLEMDS "$LCTL get_param -n \
+ mdd.$MDT0.changelog_users | grep $clog")
+ echo "After crash, changelog user $clog2"
+
+ [ -n "$clog2" -o $size2 -lt $size1 ] ||
+ error "changelog record count unchanged"
+
+ do_facet mds1 $LCTL set_param mdd.$MDT0.changelog_mask=\'$cl_mask\' -n
+}
+run_test 136 "changelog_deregister leaving pending records"
+
complete $SECONDS
check_and_cleanup_lustre
exit_status