X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fconf-sanity.sh;h=15e81ee50be97b8922fda61d3303cd760a815cdf;hp=c852868b07663e7ae18a5acbb33f54df011321c8;hb=3e4061862e751dbbbbba273403b56201e705a830;hpb=8c4f96f910786ff3d73474ef5f8d4a96a30a0bed;ds=sidebyside diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index c852868..15e81ee 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -49,6 +49,16 @@ if [ -n "$MDSSIZE" ]; then STORED_MDSSIZE=$MDSSIZE fi +# pass "-E lazy_itable_init" to mke2fs to speed up the formatting time +for facet in MGS MDS OST; do + opts=${facet}_MKFS_OPTS + if [[ ${!opts} != *lazy_itable_init* ]]; then + eval SAVED_${facet}_MKFS_OPTS=\"${!opts}\" + eval ${facet}_MKFS_OPTS=\"${!opts} \ +--mkfsoptions='\\\"-E lazy_itable_init\\\"'\" + fi +done + init_logging # @@ -165,12 +175,10 @@ mount_client() { } remount_client() { - local SAVEMOUNTOPT=$MOUNTOPT - MOUNTOPT="remount,$1" + local mountopt="-o remount,$1" local MOUNTPATH=$2 echo "remount '$1' lustre on ${MOUNTPATH}....." - zconf_mount `hostname` $MOUNTPATH || return 96 - MOUNTOPT=$SAVEMOUNTOPT + zconf_mount `hostname` $MOUNTPATH "$mountopt" || return 96 } umount_client() { @@ -192,6 +200,7 @@ setup() { start_mds || error "MDT start failed" start_ost || error "OST start failed" mount_client $MOUNT || error "client start failed" + client_up || error "client_up failed" } setup_noconfig() { @@ -217,7 +226,7 @@ cleanup_nocli() { } cleanup() { - umount_client $MOUNT || return 200 + umount_client $MOUNT || return 200 cleanup_nocli || return $? } @@ -318,7 +327,7 @@ test_5a() { # was test_5 # cleanup may return an error from the failed # disconnects; for now I'll consider this successful # if all the modules have unloaded. - umount -d $MOUNT & + umount -d $MOUNT & UMOUNT_PID=$! sleep 6 echo "killing umount" @@ -349,7 +358,7 @@ test_5a() { # was test_5 while [ "$WAIT" -ne "$MAX_WAIT" ]; do sleep $sleep grep -q $MOUNT" " /etc/mtab || break - echo "Waiting /etc/mtab updated ... " + echo "Waiting /etc/mtab updated ... " WAIT=$(( WAIT + sleep)) done [ "$WAIT" -eq "$MAX_WAIT" ] && error "/etc/mtab is not updated in $WAIT secs" @@ -724,6 +733,9 @@ test_21d() { stop_ost2 stop_mds stop_mgs + #writeconf to remove all ost2 traces for subsequent tests + writeconf + start_mgs } run_test 21d "start mgs then ost and then mds" @@ -753,6 +765,8 @@ test_22() { sleep $((TIMEOUT + TIMEOUT + TIMEOUT)) fi mount_client $MOUNT + wait_osc_import_state mds ost FULL + wait_osc_import_state client ost FULL check_mount || return 41 pass @@ -766,10 +780,10 @@ test_23a() { # was test_23 stop $SINGLEMDS # force down client so that recovering mds waits for reconnect local running=$(grep -c $MOUNT /proc/mounts) || true - if [ $running -ne 0 ]; then - echo "Stopping client $MOUNT (opts: -f)" - umount -f $MOUNT - fi + if [ $running -ne 0 ]; then + echo "Stopping client $MOUNT (opts: -f)" + umount -f $MOUNT + fi # enter recovery on mds start_mds @@ -781,10 +795,12 @@ test_23a() { # was test_23 echo mount pid is ${MOUNT_PID}, mount.lustre pid is ${MOUNT_LUSTRE_PID} ps --ppid $MOUNT_PID ps --ppid $MOUNT_LUSTRE_PID - # FIXME why o why can't I kill these? Manual "ctrl-c" works... - kill -TERM $MOUNT_LUSTRE_PID echo "waiting for mount to finish" ps -ef | grep mount + # "ctrl-c" sends SIGINT but it usually (in script) does not work on child process + # SIGTERM works but it does not spread to offspring processses + kill -s TERM $MOUNT_PID + kill -s TERM $MOUNT_LUSTRE_PID # we can not wait $MOUNT_PID because it is not a child of this shell local PID1 local PID2 @@ -877,7 +893,7 @@ test_24a() { facet_failover fs2mds facet_failover fs2ost df - umount_client $MOUNT + umount_client $MOUNT # the MDS must remain up until last MDT stop_mds MDS=$(do_facet $SINGLEMDS "lctl get_param -n devices" | awk '($3 ~ "mdt" && $4 ~ "MDT") { print $4 }' | head -1) @@ -978,7 +994,7 @@ test_28() { set_and_check client "$TEST" "$PARAM" $FINAL || return 3 FINAL=$(($FINAL + 1)) set_and_check client "$TEST" "$PARAM" $FINAL || return 4 - umount_client $MOUNT || return 200 + umount_client $MOUNT || return 200 mount_client $MOUNT RESULT=$($TEST) if [ $RESULT -ne $FINAL ]; then @@ -1036,8 +1052,11 @@ test_29() { echo "Waiting $(($MAX - $WAIT)) secs for MDT deactivated" done + # quotacheck should not fail immediately after deactivate + [ -n "$ENABLE_QUOTA" ] && { $LFS quotacheck -ug $MOUNT || error "quotacheck has failed" ; } + # test new client starts deactivated - umount_client $MOUNT || return 200 + umount_client $MOUNT || return 200 mount_client $MOUNT RESULT=$(lctl get_param -n $PROC_UUID | grep DEACTIV | grep NEW) if [ -z "$RESULT" ]; then @@ -1047,10 +1066,13 @@ test_29() { echo "New client success: got $RESULT" fi + # quotacheck should not fail after umount/mount operation + [ -n "$ENABLE_QUOTA" ] && { $LFS quotacheck -ug $MOUNT || error "quotacheck has failed" ; } + # make sure it reactivates set_and_check client "lctl get_param -n $PROC_ACT" "$PARAM" $ACTV || return 6 - umount_client $MOUNT + umount_client $MOUNT stop_ost2 cleanup_nocli #writeconf to remove all ost2 traces for subsequent tests @@ -1069,7 +1091,7 @@ test_30a() { set_and_check client "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" $i || return 3 done # make sure client restart still works - umount_client $MOUNT + umount_client $MOUNT mount_client $MOUNT || return 4 [ "$($TEST)" -ne "$i" ] && error "Param didn't stick across restart $($TEST) != $i" pass @@ -1384,7 +1406,7 @@ test_34b() { touch $DIR/$tfile || return 1 stop_mds --force || return 2 - manual_umount_client --force + manual_umount_client --force rc=$? if [ $rc -ne 0 ]; then error "mtab after failed umount - rc $rc" @@ -1400,7 +1422,7 @@ test_34c() { touch $DIR/$tfile || return 1 stop_ost --force || return 2 - manual_umount_client --force + manual_umount_client --force rc=$? if [ $rc -ne 0 ]; then error "mtab after failed umount - rc $rc" @@ -1478,25 +1500,27 @@ test_35b() { # bug 18674 return 1 local at_max_saved=0 - # adaptive timeouts may prevent seeing the issue + # adaptive timeouts may prevent seeing the issue if at_is_enabled; then at_max_saved=$(at_max_get mds) at_max_set 0 mds client fi - mkdir -p $MOUNT/testdir - touch $MOUNT/testdir/test + mkdir -p $MOUNT/$tdir log "Injecting EBUSY on MDS" # Setting OBD_FAIL_MDS_RESEND=0x136 do_facet $SINGLEMDS "$LCTL set_param fail_loc=0x80000136" || return 2 - log "Stat on a test file" - stat $MOUNT/testdir/test + $LCTL set_param mdc.${FSNAME}*.stats=clear + + log "Creating a test file and stat it" + touch $MOUNT/$tdir/$tfile + stat $MOUNT/$tdir/$tfile log "Stop injecting EBUSY on MDS" do_facet $SINGLEMDS "$LCTL set_param fail_loc=0" || return 3 - rm -f $MOUNT/testdir/test + rm -f $MOUNT/$tdir/$tfile log "done" # restore adaptive timeout @@ -1504,6 +1528,8 @@ test_35b() { # bug 18674 $LCTL dk $TMP/lustre-log-$TESTNAME.log + CONNCNT=`$LCTL get_param mdc.${FSNAME}*.stats | awk '/mds_connect/{print $2}'` + # retrieve from the log if the client has ever tried to # contact the fake server after the loss of connection FAILCONN=`awk "BEGIN {ret = 0;} @@ -1523,7 +1549,14 @@ test_35b() { # bug 18674 log "ERROR: The client tried to reconnect to the failover server while the primary was busy" && \ return 5 - cleanup + # LU-290 + # When OBD_FAIL_MDS_RESEND is hit, we sleep for 2 * obd_timeout + # Reconnects are supposed to be rate limited to one every 5s + [ $CONNCNT -gt $((2 * $TIMEOUT / 5 + 1)) ] && \ + log "ERROR: Too many reconnects $CONNCNT" && \ + return 6 + + cleanup # remove nid settings writeconf } @@ -1703,7 +1736,7 @@ run_test 39 "leak_finder recognizes both LUSTRE and LNET malloc messages" test_40() { # bug 15759 start_ost #define OBD_FAIL_TGT_TOOMANY_THREADS 0x706 - do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000706" + do_facet $SINGLEMDS "$LCTL set_param fail_loc=0x80000706" start_mds cleanup } @@ -1737,6 +1770,8 @@ run_test 41a "mount mds with --nosvc and --nomgs" test_41b() { echo $MDS_MOUNT_OPTS | grep "loop" && skip " loop devices does not work with nosvc option" && return + ! combined_mgs_mds && skip "needs combined mgs device" && return 0 + stopall reformat local MDSDEV=$(mdsdevname ${SINGLEMDS//mds/}) @@ -1757,8 +1792,8 @@ test_41b() { stop_mds -f || return 203 } - run_test 41b "mount mds with --nosvc and --nomgs on first mount" + test_42() { #bug 14693 setup check_mount || return 2 @@ -1922,7 +1957,7 @@ cleanup_46a() { stop ost${count} -f || rc=$? let count=count-1 done - stop_mds || rc=$? + stop_mds || rc=$? cleanup_nocli || rc=$? #writeconf to remove all ost2 traces for subsequent tests writeconf @@ -1940,7 +1975,7 @@ test_46a() { mount_client $MOUNT || return 3 trap "cleanup_46a $OSTCOUNT" EXIT ERR - local i + local i for (( i=2; i<=$OSTCOUNT; i++ )); do start ost$i `ostdevname $i` $OST_MOUNT_OPTS || return $((i+2)) done @@ -1948,9 +1983,9 @@ test_46a() { # wait until osts in sync for (( i=2; i<=$OSTCOUNT; i++ )); do wait_osc_import_state mds ost$i FULL + wait_osc_import_state client ost$i FULL done - #second client see all ost's mount_client $MOUNT2 || return 8 @@ -2163,9 +2198,11 @@ test_50c() { wait_osc_import_state mds ost DISCONN lazystatfs $MOUNT || error "lazystatfs failed with one down server" - umount_client $MOUNT || error "Unable to unmount client" + umount_client $MOUNT || error "Unable to unmount client" stop_ost2 || error "Unable to stop OST2" stop_mds || error "Unable to stop MDS" + #writeconf to remove all ost2 traces for subsequent tests + writeconf } run_test 50c "lazystatfs one server down ==========================" @@ -2183,9 +2220,11 @@ test_50d() { stop_ost || error "Unable to stop OST1" lazystatfs $MOUNT || error "lazystatfs failed with one down server" - umount_client $MOUNT || error "Unable to unmount client" + umount_client $MOUNT || error "Unable to unmount client" stop_ost2 || error "Unable to stop OST2" stop_mds || error "Unable to stop MDS" + #writeconf to remove all ost2 traces for subsequent tests + writeconf } run_test 50d "lazystatfs client/server conn race ==========================" @@ -2317,6 +2356,8 @@ test_51() { wait $pid stop_ost2 || return 3 cleanup + #writeconf to remove all ost2 traces for subsequent tests + writeconf } run_test 51 "Verify that mdt_reint handles RMF_MDT_MD correctly when an OST is added" @@ -2377,7 +2418,7 @@ test_52() { [ $? -eq 0 ] || { error "Unable to mount client"; return 3; } local nrfiles=8 - local ost1mnt=${MOUNT%/*}/ost1 + local ost1mnt=$(facet_mntpt ost1) local ost1node=$(facet_active_host ost1) local ost1tmp=$TMP/conf52 @@ -2413,6 +2454,8 @@ test_52() { [ $? -eq 0 ] || { error "Unable to stop ost1"; return 11; } echo mount ost1 as ldiskfs + do_node $ost1node mkdir -p $ost1mnt + [ $? -eq 0 ] || { error "Unable to create $ost1mnt"; return 23; } do_node $ost1node mount -t $FSTYPE $ost1_dev $ost1mnt $OST_MOUNT_OPTS [ $? -eq 0 ] || { error "Unable to mount ost1 as ldiskfs"; return 12; } @@ -2435,7 +2478,7 @@ test_52() { diff_files_xattrs $ost1node $ost1tmp/objects $ost1tmp/object_xattrs $objects [ $? -eq 0 ] || { error "Unable to diff objects"; return 16; } - do_node $ost1node "umount $ost1_dev" + do_node $ost1node "umount $ost1mnt" [ $? -eq 0 ] || { error "Unable to umount ost1 as ldiskfs"; return 17; } start_ost @@ -2493,21 +2536,22 @@ thread_sanity() { lassert 23 "$msg (PDSH problems?)" '(($tstarted && $tmin && $tmax))' || return $? lassert 24 "$msg" '(($tstarted >= $tmin && $tstarted <= $tmax ))' || return $? - # Check that we can lower min/max - do_facet $facet "lctl set_param ${paramp}.threads_min=$((tmin - 1))" + # Check that we can change min/max + do_facet $facet "lctl set_param ${paramp}.threads_min=$((tmin + 1))" do_facet $facet "lctl set_param ${paramp}.threads_max=$((tmax - 1))" tmin2=$(do_facet $facet "lctl get_param -n ${paramp}.threads_min" || echo 0) tmax2=$(do_facet $facet "lctl get_param -n ${paramp}.threads_max" || echo 0) - lassert 25 "$msg" '(($tmin2 == ($tmin - 1) && $tmax2 == ($tmax -1)))' || return $? + lassert 25 "$msg" '(($tmin2 == ($tmin + 1) && $tmax2 == ($tmax -1)))' || return $? # Check that we can set min/max to the same value - do_facet $facet "lctl set_param ${paramp}.threads_max=$((tmin - 1))" + tmin=$(do_facet $facet "lctl get_param -n ${paramp}.threads_min" || echo 0) + do_facet $facet "lctl set_param ${paramp}.threads_max=$tmin" tmin2=$(do_facet $facet "lctl get_param -n ${paramp}.threads_min" || echo 0) tmax2=$(do_facet $facet "lctl get_param -n ${paramp}.threads_max" || echo 0) - lassert 26 "$msg" '(($tmin2 == ($tmin - 1) && $tmax2 == ($tmin - 1)))' || return $? + lassert 26 "$msg" '(($tmin2 == $tmin && $tmax2 == $tmin))' || return $? # Check that we can't set max < min - do_facet $facet "lctl set_param ${paramp}.threads_max=$((tmin - 2))" + do_facet $facet "lctl set_param ${paramp}.threads_max=$((tmin - 1))" tmin2=$(do_facet $facet "lctl get_param -n ${paramp}.threads_min" || echo 0) tmax2=$(do_facet $facet "lctl get_param -n ${paramp}.threads_max" || echo 0) lassert 27 "$msg" '(($tmin2 <= $tmax2))' || return $? @@ -2551,33 +2595,20 @@ test_53b() { } run_test 53b "check MDT thread count params" -run_llverfs() -{ - local dir=$1 - local partial_arg="" - local size=$(df -B G $dir | tail -1 | awk '{print $2}' | sed 's/G//') # Gb - - # Run in partial (fast) mode if the size - # of a partition > 10 GB - [ $size -gt 10 ] && partial_arg="-p" - - llverfs $partial_arg $dir -} - test_54a() { - do_rpc_nodes $(facet_host ost1) run_llverdev $(ostdevname 1) + do_rpc_nodes $(facet_host ost1) run_llverdev $(ostdevname 1) -p [ $? -eq 0 ] || error "llverdev failed!" reformat_and_config } -run_test 54a "llverdev" +run_test 54a "test llverdev and partial verify of device" test_54b() { setup - run_llverfs $MOUNT + run_llverfs $MOUNT -p [ $? -eq 0 ] || error "llverfs failed!" cleanup } -run_test 54b "llverfs" +run_test 54b "test llverfs and partial verify of filesystem" lov_objid_size() { @@ -2590,7 +2621,7 @@ test_55() { local ostdev=$(ostdevname 1) local saved_opts=$OST_MKFS_OPTS - for i in 0 1023 2048 + for i in 1023 2048 do OST_MKFS_OPTS="$saved_opts --index $i" reformat @@ -2632,21 +2663,52 @@ test_56() { } run_test 56 "check big indexes" -test_57() { # bug 22656 +test_57a() { # bug 22656 local NID=$(do_facet ost1 "$LCTL get_param nis" | tail -1 | awk '{print $1}') writeconf do_facet ost1 "$TUNEFS --failnode=$NID `ostdevname 1`" || error "tunefs failed" start_mgsmds start_ost && error "OST registration from failnode should fail" - stop_mds reformat } -run_test 57 "initial registration from failnode should fail (should return errs)" +run_test 57a "initial registration from failnode should fail (should return errs)" + +test_57b() { + local NID=$(do_facet ost1 "$LCTL get_param nis" | tail -1 | awk '{print $1}') + writeconf + do_facet ost1 "$TUNEFS --servicenode=$NID `ostdevname 1`" || error "tunefs failed" + start_mgsmds + start_ost || error "OST registration from servicenode should not fail" + reformat +} +run_test 57b "initial registration from servicenode should not fail" count_osts() { do_facet mgs $LCTL get_param mgs.MGS.live.$FSNAME | grep OST | wc -l } +test_58() { # bug 22658 + [ "$FSTYPE" != "ldiskfs" ] && skip "not supported for $FSTYPE" && return + setup + mkdir -p $DIR/$tdir + createmany -o $DIR/$tdir/$tfile-%d 100 + # make sure that OSTs do not cancel llog cookies before we unmount the MDS +#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 + do_facet mds "lctl set_param fail_loc=0x601" + unlinkmany $DIR/$tdir/$tfile-%d 100 + stop mds + local MNTDIR=$(facet_mntpt mds) + # remove all files from the OBJECTS dir + do_facet mds "mount -t ldiskfs $MDSDEV $MNTDIR" + do_facet mds "find $MNTDIR/OBJECTS -type f -delete" + do_facet mds "umount $MNTDIR" + # restart MDS with missing llog files + start_mds + do_facet mds "lctl set_param fail_loc=0" + reformat +} +run_test 58 "missing llog files must not prevent MDT from mounting" + test_59() { start_mgsmds >> /dev/null local C1=$(count_osts) @@ -2674,31 +2736,27 @@ test_59() { [ $C4 -eq 2 ] || error "OST2 writeconf should add log" stop_ost2 >> /dev/null cleanup_nocli >> /dev/null + #writeconf to remove all ost2 traces for subsequent tests + writeconf } run_test 59 "writeconf mount option" +test_60() { # LU-471 + add mds1 $MDS_MKFS_OPTS --mkfsoptions='\" -E stride=64 -O ^uninit_bg\"' --reformat $(mdsdevname 1) -test_58() { # bug 22658 - [ "$FSTYPE" != "ldiskfs" ] && skip "not supported for $FSTYPE" && return - setup - mkdir -p $DIR/$tdir - createmany -o $DIR/$tdir/$tfile-%d 100 - # make sure that OSTs do not cancel llog cookies before we unmount the MDS -#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 - do_facet mds "lctl set_param fail_loc=0x601" - unlinkmany $DIR/$tdir/$tfile-%d 100 - stop mds - local MNTDIR=$(facet_mntpt mds) - # remove all files from the OBJECTS dir - do_facet mds "mount -t ldiskfs $MDSDEV $MNTDIR" - do_facet mds "find $MNTDIR/OBJECTS -type f -delete" - do_facet mds "umount $MNTDIR" - # restart MDS with missing llog files - start_mds - do_facet mds "lctl set_param fail_loc=0" + dump=$(do_facet $SINGLEMDS dumpe2fs $(mdsdevname 1)) + rc=${PIPESTATUS[0]} + [ $rc -eq 0 ] || error "dumpe2fs $(mdsdevname 1) failed" + + # MDT default has dirdata feature + echo $dump | grep dirdata > /dev/null || error "dirdata is not set" + # we disable uninit_bg feature + echo $dump | grep uninit_bg > /dev/null && error "uninit_bg is set" + # we set stride extended options + echo $dump | grep stride > /dev/null || error "stride is not set" reformat } -run_test 58 "missing llog files must not prevent MDT from mounting" +run_test 60 "check mkfs.lustre --mkfsoptions -E -O options setting" if ! combined_mgs_mds ; then stop mgs @@ -2706,5 +2764,13 @@ fi cleanup_gss +# restore the ${facet}_MKFS_OPTS variables +for facet in MGS MDS OST; do + opts=SAVED_${facet}_MKFS_OPTS + if [[ -n ${!opts} ]]; then + eval ${facet}_MKFS_OPTS=\"${!opts}\" + fi +done + complete $(basename $0) $SECONDS exit_status