X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fconf-sanity.sh;h=546a0d89ddc6f07e3861dcc9974b1f368eb9493a;hp=acab3123b307e05d2476139725881ca730ad3aa6;hb=2358e27bc14683129a4c9f69f880c20d2e65db34;hpb=89f9a5bced24ecb7c84040a1ed88dcef4384f7c6 diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index acab3123..546a0d8 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -9,78 +9,108 @@ set -e +ONLY=${ONLY:-"$*"} + +# These tests don't apply to mountconf +# xml xml xml xml xml xml dumb FIXME +MOUNTCONFSKIP="10 11 12 13 13b 14 15 18" + +# bug number for skipped test: +ALWAYS_EXCEPT=" $CONF_SANITY_EXCEPT $MOUNTCONFSKIP" +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + SRCDIR=`dirname $0` PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH LUSTRE=${LUSTRE:-`dirname $0`/..} RLUSTRE=${RLUSTRE:-$LUSTRE} -MOUNTLUSTRE=${MOUNTLUSTRE:-/sbin/mount.lustre} +HOSTNAME=`hostname` . $LUSTRE/tests/test-framework.sh - init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} -. ${CONFIG:=$LUSTRE/tests/cfg/local.sh} - -gen_config() { - rm -f $XMLCONFIG - - add_mds mds1 --dev $MDSDEV --size $MDSSIZE - add_lov lov1 mds1 --stripe_sz $STRIPE_BYTES\ - --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 - add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE - add_client client mds1 --lov lov1 --path $MOUNT +reformat() { + formatall } -gen_second_config() { - rm -f $XMLCONFIG +writeconf() { + local facet=mds + shift + stop ${facet} -f + rm -f ${facet}active + # who knows if/where $TUNEFS is installed? Better reformat if it fails... + do_facet ${facet} "$TUNEFS --writeconf $MDSDEV" || echo "tunefs failed, reformatting instead" && reformat +} - add_mds mds2 --dev $MDSDEV --size $MDSSIZE - add_lov lov2 mds2 --stripe_sz $STRIPE_BYTES\ - --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 - add_ost ost2 --lov lov2 --dev $OSTDEV --size $OSTSIZE - add_client client --mds mds2 --lov lov2 --path $MOUNT2 +gen_config() { + reformat + # The MGS must be started before the OSTs for a new fs, so start + # and stop to generate the startup logs. + start_mds + start_ost + sleep 5 + stop_ost + stop_mds } start_mds() { - echo "start mds1 service on `facet_active_host mds1`" - start mds1 --reformat $MDSLCONFARGS || return 94 - start_lsvcgssd || return 501 + echo "start mds service on `facet_active_host mds`" + start mds $MDSDEV $MDS_MOUNT_OPTS || return 94 } + stop_mds() { - echo "stop mds1 service on `facet_active_host mds1`" - stop mds1 $@ || return 97 - stop_lsvcgssd + echo "stop mds service on `facet_active_host mds`" + # These tests all use non-failover stop + stop mds -f || return 97 } start_ost() { - echo "start ost service on `facet_active_host ost`" - start ost --reformat $OSTLCONFARGS || return 95 + echo "start ost1 service on `facet_active_host ost1`" + start ost1 `ostdevname 1` $OST_MOUNT_OPTS || return 95 } stop_ost() { - echo "stop ost service on `facet_active_host ost`" - stop ost $@ || return 98 + echo "stop ost1 service on `facet_active_host ost1`" + # These tests all use non-failover stop + stop ost1 -f || return 98 +} + +start_ost2() { + echo "start ost2 service on `facet_active_host ost2`" + start ost2 `ostdevname 2` $OST_MOUNT_OPTS || return 92 +} + +stop_ost2() { + echo "stop ost2 service on `facet_active_host ost2`" + # These tests all use non-failover stop + stop ost2 -f || return 93 } mount_client() { local MOUNTPATH=$1 - start_lgssd || return 502 - echo "mount lustre on ${MOUNTPATH}....." + echo "mount $FSNAME on ${MOUNTPATH}....." + zconf_mount `hostname` $MOUNTPATH || return 96 +} + +remount_client() { + local SAVEMOUNTOPT=$MOUNTOPT + MOUNTOPT="remount,$1" + local MOUNTPATH=$2 + echo "remount '$1' lustre on ${MOUNTPATH}....." zconf_mount `hostname` $MOUNTPATH || return 96 + MOUNTOPT=$SAVEMOUNTOPT } umount_client() { local MOUNTPATH=$1 echo "umount lustre on ${MOUNTPATH}....." - zconf_umount `hostname` $MOUNTPATH || return 97 - stop_lgssd + zconf_umount `hostname` $MOUNTPATH || return 97 } manual_umount_client(){ - echo "manual umount lustre on ${MOUNTPATH}...." - do_facet client "umount $MOUNT" - stop_lgssd + echo "manual umount lustre on ${MOUNT}...." + do_facet client "umount -d $MOUNT" } setup() { @@ -89,21 +119,23 @@ setup() { mount_client $MOUNT } -cleanup() { - umount_client $MOUNT || return 200 +cleanup_nocli() { stop_mds || return 201 stop_ost || return 202 - # catch case where these return just fine, but modules are still not unloaded - /sbin/lsmod | grep -q portals - if [ 1 -ne $? ]; then - echo "modules still loaded..." - return 203 - fi + unload_modules || return 203 +} + +cleanup() { + umount_client $MOUNT || return 200 + cleanup_nocli || return $? } check_mount() { - do_facet client "touch $DIR/a" || return 71 - do_facet client "rm $DIR/a" || return 72 + do_facet client "cp /etc/passwd $DIR/a" || return 71 + do_facet client "rm $DIR/a" || return 72 + # make sure lustre is actually mounted (touch will block, + # but grep won't, so do it after) + do_facet client "grep $MOUNT' ' /proc/mounts > /dev/null" || return 73 echo "setup single mount lustre success" } @@ -117,16 +149,23 @@ check_mount2() { build_test_filter +if [ "$ONLY" == "setup" ]; then + setup + exit +fi + +if [ "$ONLY" == "cleanup" ]; then + cleanup + exit +fi + #create single point mountpoint gen_config -start_krb5_kdc || exit 1 test_0() { - start_ost - start_mds - mount_client $MOUNT + setup check_mount || return 41 cleanup || return $? } @@ -135,41 +174,36 @@ run_test 0 "single mount setup" test_1() { start_ost echo "start ost second time..." - start ost --reformat $OSTLCONFARGS - start_mds - mount_client $MOUNT + setup check_mount || return 42 cleanup || return $? } -run_test 1 "start up ost twice" +run_test 1 "start up ost twice (should return errors)" test_2() { start_ost start_mds echo "start mds second time.." - start mds1 --reformat $MDSLCONFARGS - + start_mds mount_client $MOUNT check_mount || return 43 cleanup || return $? } -run_test 2 "start up mds twice" +run_test 2 "start up mds twice (should return err)" test_3() { setup - mount_client $MOUNT - + #mount.lustre returns an error if already in mtab + mount_client $MOUNT && return $? check_mount || return 44 - - umount_client $MOUNT - cleanup || return $? + cleanup || return $? } -run_test 3 "mount client twice" +run_test 3 "mount client twice (should return err)" test_4() { setup touch $DIR/$tfile || return 85 - stop_ost --force + stop_ost -f cleanup eno=$? # ok for ost to fail shutdown @@ -183,100 +217,93 @@ run_test 4 "force cleanup ost, then cleanup" test_5() { setup touch $DIR/$tfile || return 1 - stop_mds --force || return 2 + stop_mds -f || return 2 # cleanup may return an error from the failed # disconnects; for now I'll consider this successful # if all the modules have unloaded. - umount $MOUNT & + umount -d $MOUNT & UMOUNT_PID=$! - sleep 2 + sleep 6 echo "killing umount" kill -TERM $UMOUNT_PID echo "waiting for umount to finish" wait $UMOUNT_PID - stop_lgssd + if grep " $MOUNT " /etc/mtab; then + echo "test 5: mtab after failed umount" + umount $MOUNT & + UMOUNT_PID=$! + sleep 2 + echo "killing umount" + kill -TERM $UMOUNT_PID + echo "waiting for umount to finish" + wait $UMOUNT_PID + grep " $MOUNT " /etc/mtab && echo "test 5: mtab after second umount" && return 11 + fi - # cleanup client modules - $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null - + manual_umount_client # stop_mds is a no-op here, and should not fail - stop_mds || return 4 - stop_ost || return 5 - - lsmod | grep -q portals && return 6 - return 0 + cleanup_nocli || return $? + # df may have lingering entry + manual_umount_client + # mtab may have lingering entry + grep -v $MOUNT" " /etc/mtab > $TMP/mtabtemp + mv $TMP/mtabtemp /etc/mtab } run_test 5 "force cleanup mds, then cleanup" test_5b() { start_ost - start_mds - stop_mds - [ -d $MOUNT ] || mkdir -p $MOUNT - $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null - start_lgssd || return 1 - llmount $mds_HOST://mds1_svc/client_facet $MOUNT && exit 1 - - # cleanup client modules - $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null - stop_lgssd - + grep " $MOUNT " /etc/mtab && echo "test 5b: mtab before mount" && return 10 + mount_client $MOUNT && return 1 + grep " $MOUNT " /etc/mtab && echo "test 5b: mtab after failed mount" && return 11 + umount_client $MOUNT # stop_mds is a no-op here, and should not fail - stop_mds || return 2 - stop_ost || return 3 - - lsmod | grep -q portals && return 4 + cleanup_nocli || return $? return 0 - } -run_test 5b "mds down, cleanup after failed mount (bug 2712)" +run_test 5b "mds down, cleanup after failed mount (bug 2712) (should return errs)" test_5c() { start_ost start_mds - [ -d $MOUNT ] || mkdir -p $MOUNT - $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null - start_lgssd || return 1 - llmount $mds_HOST://wrong_mds1_svc/client_facet $MOUNT && return 2 - - # cleanup client modules - $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null - stop_lgssd - - stop_mds || return 3 - stop_ost || return 4 - - lsmod | grep -q portals && return 5 - return 0 - + grep " $MOUNT " /etc/mtab && echo "test 5c: mtab before mount" && return 10 + mount -t lustre $MGSNID:/wrong.$FSNAME $MOUNT || : + grep " $MOUNT " /etc/mtab && echo "test 5c: mtab after failed mount" && return 11 + umount_client $MOUNT + cleanup_nocli || return $? } -run_test 5c "cleanup after failed mount (bug 2712)" +run_test 5c "cleanup after failed mount (bug 2712) (should return errs)" test_5d() { - start_ost - start_mds - stop_ost --force - - [ -d $MOUNT ] || mkdir -p $MOUNT - $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null - start_lgssd || return 1 - llmount $mds_HOST://mds1_svc/client_facet $MOUNT || return 1 - - umount $MOUNT || return 2 - # cleanup client modules - $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null - stop_lgssd - - stop_mds || return 3 + start_ost + start_mds + stop_ost -f + grep " $MOUNT " /etc/mtab && echo "test 5d: mtab before mount" && return 10 + mount_client $MOUNT || return 1 + cleanup || return $? + grep " $MOUNT " /etc/mtab && echo "test 5d: mtab after unmount" && return 11 + return 0 +} +run_test 5d "mount with ost down" - lsmod | grep -q portals && return 4 - return 0 +test_5e() { + start_ost + start_mds + # give MDS a chance to connect to OSTs (bz 10476) + sleep 5 +#define OBD_FAIL_PTLRPC_DELAY_SEND 0x506 + do_facet client "sysctl -w lustre.fail_loc=0x80000506" + grep " $MOUNT " /etc/mtab && echo "test 5e: mtab before mount" && return 10 + mount_client $MOUNT || echo "mount failed (not fatal)" + cleanup || return $? + grep " $MOUNT " /etc/mtab && echo "test 5e: mtab after unmount" && return 11 + return 0 } -run_test 5d "ost down, don't crash during mount attempt" +run_test 5e "delayed connect, don't crash (bug 10268)" test_6() { setup @@ -290,96 +317,50 @@ run_test 6 "manual umount, then mount again" test_7() { setup manual_umount_client - cleanup || return $? + cleanup_nocli || return $? } run_test 7 "manual umount, then cleanup" test_8() { - start_ost - start_mds - - mount_client $MOUNT + setup mount_client $MOUNT2 - check_mount2 || return 45 - umount $MOUNT umount_client $MOUNT2 - - stop_mds - stop_ost + cleanup || return $? } run_test 8 "double mount setup" test_9() { - # backup the old values of PTLDEBUG and SUBSYSTEM - OLDPTLDEBUG=$PTLDEBUG - OLDSUBSYSTEM=$SUBSYSTEM - - # generate new configuration file with lmc --ptldebug and --subsystem - PTLDEBUG="trace" - SUBSYSTEM="mdc" - gen_config - - # check the result of lmc --ptldebug/subsystem start_ost - start_mds - mount_client $MOUNT - CHECK_PTLDEBUG="`cat /proc/sys/portals/debug`" - if [ $CHECK_PTLDEBUG = "1" ]; then - echo "lmc --debug success" - else - echo "lmc --debug: want 1, have $CHECK_PTLDEBUG" - return 1 - fi - CHECK_SUBSYSTEM="`cat /proc/sys/portals/subsystem_debug`" - if [ $CHECK_SUBSYSTEM = "2" ]; then - echo "lmc --subsystem success" - else - echo "lmc --subsystem: want 2, have $CHECK_SUBSYSTEM" - return 1 - fi - check_mount || return 41 - cleanup || return $? - # the new PTLDEBUG/SUBSYSTEM used for lconf --ptldebug/subsystem - PTLDEBUG="inode+trace" - SUBSYSTEM="mds+ost" + do_facet ost1 sysctl lnet.debug=\'inode trace\' || return 1 + do_facet ost1 sysctl lnet.subsystem_debug=\'mds ost\' || return 1 - # check lconf --ptldebug/subsystem overriding lmc --ptldebug/subsystem - start_ost - start_mds - CHECK_PTLDEBUG="`do_facet mds1 cat /proc/sys/portals/debug`" - if [ $CHECK_PTLDEBUG = "3" ]; then - echo "lconf --debug success" + CHECK_PTLDEBUG="`do_facet ost1 sysctl -n lnet.debug`" + if [ "$CHECK_PTLDEBUG" ] && [ "$CHECK_PTLDEBUG" = "trace inode" ];then + echo "lnet.debug success" else - echo "lconf --debug: want 3, have $CHECK_PTLDEBUG" + echo "lnet.debug: want 'trace inode', have '$CHECK_PTLDEBUG'" return 1 fi - CHECK_SUBSYSTEM="`do_facet mds1 cat /proc/sys/portals/subsystem_debug`" - if [ $CHECK_SUBSYSTEM = "20" ]; then - echo "lconf --subsystem success" + CHECK_SUBSYS="`do_facet ost1 sysctl -n lnet.subsystem_debug`" + if [ "$CHECK_SUBSYS" ] && [ "$CHECK_SUBSYS" = "mds ost" ]; then + echo "lnet.subsystem_debug success" else - echo "lconf --subsystem: want 20, have $CHECK_SUBSYSTEM" + echo "lnet.subsystem_debug: want 'mds ost', have '$CHECK_SUBSYS'" return 1 fi - mount_client $MOUNT - check_mount || return 41 - cleanup || return $? - - # resume the old configuration - PTLDEBUG=$OLDPTLDEBUG - SUBSYSTEM=$OLDSUBSYSTEM - gen_config + stop_ost || return $? } -run_test 9 "test --ptldebug and --subsystem for lmc and lconf" +run_test 9 "test ptldebug and subsystem for mkfs" test_10() { echo "generate configuration with the same name for node and mds" OLDXMLCONFIG=$XMLCONFIG XMLCONFIG="broken.xml" [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG - facet="mds1" + facet="mds" rm -f ${facet}active add_facet $facet echo "the name for node and mds is the same" @@ -391,7 +372,7 @@ test_10() { add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE facet="client" add_facet $facet --lustre_upcall $UPCALL - do_lmc --add mtpt --node ${facet}_facet --mds mds1_facet \ + do_lmc --add mtpt --node ${facet}_facet --mds mds_facet \ --lov lov1 --path $MOUNT echo "mount lustre" @@ -411,15 +392,15 @@ test_11() { XMLCONFIG="conf11.xml" [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG - add_mds mds1 --dev $MDSDEV --size $MDSSIZE + add_mds mds --dev $MDSDEV --size $MDSSIZE add_ost ost --dev $OSTDEV --size $OSTSIZE - add_client client mds1 --path $MOUNT --ost ost_svc || return $? + add_client client mds --path $MOUNT --ost ost_svc || return $? echo "Default lov config success!" [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG - add_mds mds1 --dev $MDSDEV --size $MDSSIZE + add_mds mds --dev $MDSDEV --size $MDSSIZE add_ost ost --dev $OSTDEV --size $OSTSIZE - add_client client mds1 --path $MOUNT && return $? + add_client client mds --path $MOUNT && return $? echo "--add mtpt with neither --lov nor --ost will return error" echo "" @@ -436,8 +417,8 @@ test_12() { # test double quote [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG [ -f "$BATCHFILE" ] && rm -f $BATCHFILE - echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions \"-I 128\"" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions \"-I 128\"" >> $BATCHFILE # --mkfsoptions "-I 128" do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $? if [ `sed -n '/>-I 128 $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions \"-I 128" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions \"-I 128" >> $BATCHFILE # --mkfsoptions "-I 128 do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $? echo "unmatched double quote should return error" # test single quote rm -f $BATCHFILE - echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions '-I 128'" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions '-I 128'" >> $BATCHFILE # --mkfsoptions '-I 128' do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $? if [ `sed -n '/>-I 128 $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions '-I 128" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions '-I 128" >> $BATCHFILE # --mkfsoptions '-I 128 do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $? echo "unmatched single quote should return error" # test backslash rm -f $BATCHFILE - echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions \-\I\ \128" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions \-\I\ \128" >> $BATCHFILE # --mkfsoptions \-\I\ \128 do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $? if [ `sed -n '/>-I 128 $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions -I\ 128\\" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions -I\ 128\\" >> $BATCHFILE # --mkfsoptions -I\ 128\ do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $? echo "backslash followed by nothing should return error" @@ -502,14 +483,13 @@ run_test 12 "lmc --batch, with single/double quote, backslash in batchfile" test_13() { OLDXMLCONFIG=$XMLCONFIG XMLCONFIG="conf13-1.xml" - SECONDXMLCONFIG="conf13-2.xml" # check long uuid will be truncated properly and uniquely echo "To generate XML configuration file(with long ost name): $XMLCONFIG" [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG - do_lmc --add net --node localhost --nid localhost.localdomain --nettype tcp - do_lmc --add mds --node localhost --mds mds1_name_longer_than_31characters - do_lmc --add mds --node localhost --mds mds2_name_longer_than_31characters + do_lmc --add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp + do_lmc --add mds --node $HOSTNAME --mds mds1_name_longer_than_31characters + do_lmc --add mds --node $HOSTNAME --mds mds2_name_longer_than_31characters if [ ! -f "$XMLCONFIG" ]; then echo "Error:no file $XMLCONFIG created!" return 1 @@ -520,6 +500,8 @@ test_13() { | sed "s/ /\n\r/g" | awk -F"'" '/uuid=/{print $2}'` FOUNDMDS2UUID=`awk -F"'" '/ $SECONDXMLCONFIG || return $? echo "Generate the second XML configuration file" gen_config - if [ `diff $XMLCONFIG $SECONDXMLCONFIG | wc -l` -eq 0 ]; then + # don't compare .xml mtime, it will always be different + if [ `sed -e "s/mtime[^ ]*//" $XMLCONFIG | diff - $SECONDXMLCONFIG | wc -l` -eq 0 ]; then echo "Success:multiple invocations for lmc generate same XML file" else echo "Error: multiple invocations for lmc generate different XML file" return 1 fi - rm -f $XMLCONFIG - rm -f $SECONDXMLCONFIG + rm -f $XMLCONFIG $SECONDXMLCONFIG XMLCONFIG=$OLDXMLCONFIG } -run_test 13 "check new_uuid of lmc operating correctly" +run_test 13b "check lmc generates consistent .xml file" test_14() { rm -f $XMLCONFIG # create xml file with --mkfsoptions for ost echo "create xml file with --mkfsoptions for ost" - add_mds mds1 --dev $MDSDEV --size $MDSSIZE - add_lov lov1 mds1 --stripe_sz $STRIPE_BYTES\ + add_mds mds --dev $MDSDEV --size $MDSSIZE + add_lov lov1 mds --stripe_sz $STRIPE_BYTES\ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE \ --mkfsoptions "-Llabel_conf_14" - add_client client mds1 --lov lov1 --path $MOUNT + add_client client mds --lov lov1 --path $MOUNT FOUNDSTRING=`awk -F"<" '//{print $2}' $XMLCONFIG` EXPECTEDSTRING="mkfsoptions>-Llabel_conf_14" - if [ $EXPECTEDSTRING != $FOUNDSTRING ]; then + if [ "$EXPECTEDSTRING" != "$FOUNDSTRING" ]; then echo "Error: expected: $EXPECTEDSTRING; found: $FOUNDSTRING" return 1 fi @@ -575,7 +565,7 @@ test_14() { start_ost start_mds mount_client $MOUNT || return $? - if [ -z "`dumpe2fs -h $OSTDEV | grep label_conf_14`" ]; then + if [ -z "`do_facet ost1 dumpe2fs -h $OSTDEV | grep label_conf_14`" ]; then echo "Error: the mkoptions not applied to mke2fs of ost." return 1 fi @@ -595,175 +585,471 @@ cleanup_15() { fi } +# this only tests the kernel mount command, not anything about lustre. test_15() { + MOUNTLUSTRE=${MOUNTLUSTRE:-/sbin/mount.lustre} start_ost start_mds - echo "mount lustre on ${MOUNT} with $MOUNTLUSTRE....." + + echo "mount lustre on ${MOUNT} without $MOUNTLUSTRE....." if [ -f "$MOUNTLUSTRE" ]; then echo "save $MOUNTLUSTRE to $MOUNTLUSTRE.sav" - mv $MOUNTLUSTRE $MOUNTLUSTRE.sav + mv $MOUNTLUSTRE $MOUNTLUSTRE.sav && trap cleanup_15 EXIT INT + if [ -f $MOUNTLUSTRE ]; then + echo "$MOUNTLUSTRE cannot be moved, skipping test" + return 0 + fi fi - [ -f "$MOUNTLUSTRE" ] && echo "can't move $MOUNTLUSTRE" && return 40 - trap cleanup_15 EXIT INT - [ ! `cp $LUSTRE/utils/llmount $MOUNTLUSTRE` ] || return $? - do_node `hostname` mkdir -p $MOUNT 2> /dev/null - # load llite module on the client if it isn't in /lib/modules - do_node `hostname` lconf --nosetup --node client_facet $XMLCONFIG - do_node `hostname` mount -t lustre -o nettype=$NETTYPE \ - `facet_active_host mds1`:/mds1_svc/client_facet $MOUNT ||return $? - echo "mount lustre on $MOUNT with $MOUNTLUSTRE: success" - [ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname` - check_mount || return 41 - do_node `hostname` umount $MOUNT - [ -f "$MOUNTLUSTRE" ] && rm -f $MOUNTLUSTRE - echo "mount lustre on ${MOUNT} without $MOUNTLUSTRE....." - do_node `hostname` mount -t lustre -o nettype=$NETTYPE \ - `facet_active_host mds1`:/mds1_svc/client_facet $MOUNT &&return $? + mount_client $MOUNT && error "mount succeeded" && return 1 echo "mount lustre on $MOUNT without $MOUNTLUSTRE failed as expected" - cleanup || return $? cleanup_15 + cleanup || return $? } run_test 15 "zconf-mount without /sbin/mount.lustre (should return error)" -is_digit() { - local value=$1 - echo $value | grep -q "^[[:digit:]]*$" - return $? -} - test_16() { - TMPMTPT="/mnt/conf16" - + TMPMTPT="${MOUNT%/*}/conf16" + if [ ! -f "$MDSDEV" ]; then echo "no $MDSDEV existing, so mount Lustre to create one" - start_ost - start_mds - mount_client $MOUNT + setup check_mount || return 41 cleanup || return $? - fi - + fi + echo "change the mode of $MDSDEV/OBJECTS,LOGS,PENDING to 555" - [ -d $TMPMTPT ] || mkdir -p $TMPMTPT - mount -o loop -t ext3 $MDSDEV $TMPMTPT || return $? - chmod 555 $TMPMTPT/OBJECTS || return $? - chmod 555 $TMPMTPT/LOGS || return $? - chmod 555 $TMPMTPT/PENDING || return $? - umount $TMPMTPT || return $? - + do_facet mds "mkdir -p $TMPMTPT && + mount -o loop -t ext3 $MDSDEV $TMPMTPT && + chmod 555 $TMPMTPT/{OBJECTS,LOGS,PENDING} && + umount $TMPMTPT" || return $? + echo "mount Lustre to change the mode of OBJECTS/LOGS/PENDING, then umount Lustre" - start_ost - start_mds - mount_client $MOUNT + setup check_mount || return 41 cleanup || return $? - + echo "read the mode of OBJECTS/LOGS/PENDING and check if they has been changed properly" - EXPECTEDOBJECTSMODE=`debugfs -R "stat OBJECTS" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'` - EXPECTEDLOGSMODE=`debugfs -R "stat LOGS" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'` - EXPECTEDPENDINGMODE=`debugfs -R "stat PENDING" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'` - - # check if values are empty - test "x$EXPECTEDOBJECTSMODE" = "x" && EXPECTEDOBJECTSMODE="" - test "x$EXPECTEDLOGSMODE" = "x" && EXPECTEDLOGSMODE="" - test "x$EXPECTEDPENDINGMODE" = "x" && EXPECTEDPENDINGMODE="" - - # check if values are valid digits - is_digit $EXPECTEDOBJECTSMODE || { - echo "Invalid OBJECTS mode obtained from debugfs: $EXPECTEDOBJECTSMODE" - return 42 - } - - is_digit $EXPECTEDLOGSMODE || { - echo "Invalid LOGS mode obtained from debugfs: $EXPECTEDLOGSMODE" - return 42 - } - - is_digit $EXPECTEDPENDINGMODE || { - echo "Invalid PINDING mode obtained from debugfs: $EXPECTEDPENDINGMODE" - return 42 - } - - # check if values are those we expected - if [ "x$EXPECTEDOBJECTSMODE" = "x0777" ]; then - echo "Success: Lustre change the mode of OBJECTS correctly" + EXPECTEDOBJECTSMODE=`do_facet mds "debugfs -R 'stat OBJECTS' $MDSDEV 2> /dev/null" | grep 'Mode: ' | sed -e "s/.*Mode: *//" -e "s/ *Flags:.*//"` + EXPECTEDLOGSMODE=`do_facet mds "debugfs -R 'stat LOGS' $MDSDEV 2> /dev/null" | grep 'Mode: ' | sed -e "s/.*Mode: *//" -e "s/ *Flags:.*//"` + EXPECTEDPENDINGMODE=`do_facet mds "debugfs -R 'stat PENDING' $MDSDEV 2> /dev/null" | grep 'Mode: ' | sed -e "s/.*Mode: *//" -e "s/ *Flags:.*//"` + + if [ "$EXPECTEDOBJECTSMODE" = "0777" ]; then + echo "Success:Lustre change the mode of OBJECTS correctly" else - echo "Error: Lustre does not change the mode of OBJECTS properly" - echo "Expected value: 0777, actual one: $EXPECTEDOBJECTSMODE" + echo "Error: Lustre does not change mode of OBJECTS properly" return 1 fi - - if [ "x$EXPECTEDLOGSMODE" = "x0777" ]; then - echo "Success: Lustre change the mode of LOGS correctly" + + if [ "$EXPECTEDLOGSMODE" = "0777" ]; then + echo "Success:Lustre change the mode of LOGS correctly" else - echo "Error: Lustre does not change the mode of LOGS properly" - echo "Expected value: 0777, actual one: $EXPECTEDLOGSMODE" + echo "Error: Lustre does not change mode of LOGS properly" return 1 fi - - if [ "x$EXPECTEDPENDINGMODE" = "x0777" ]; then - echo "Success: Lustre change the mode of PENDING correctly" + + if [ "$EXPECTEDPENDINGMODE" = "0777" ]; then + echo "Success:Lustre change the mode of PENDING correctly" else - echo "Error: Lustre does not change the mode of PENDING properly" - echo "Expected value: 0777, actual one: $EXPECTEDPENDINGMODE" + echo "Error: Lustre does not change mode of PENDING properly" return 1 fi } run_test 16 "verify that lustre will correct the mode of OBJECTS/LOGS/PENDING" test_17() { - TMPMTPT="/mnt/conf17" - if [ ! -f "$MDSDEV" ]; then echo "no $MDSDEV existing, so mount Lustre to create one" - start_ost - start_mds - mount_client $MOUNT + setup check_mount || return 41 cleanup || return $? fi echo "Remove mds config log" - [ -d $TMPMTPT ] || mkdir -p $TMPMTPT - mount -o loop -t ext3 $MDSDEV $TMPMTPT || return $? - rm -f $TMPMTPT/LOGS/mds1_svc || return $? - umount $TMPMTPT || return $? + do_facet mds "debugfs -w -R 'unlink CONFIGS/$FSNAME-MDT0000' $MDSDEV || return \$?" || return $? start_ost - start mds1 $MDSLCONFARGS && return 42 - cleanup || return $? + start_mds && return 42 + gen_config } -run_test 17 "Verify failed mds_postsetup won't fail assertion (2936)" +run_test 17 "Verify failed mds_postsetup won't fail assertion (2936) (should return errs)" test_18() { [ -f $MDSDEV ] && echo "remove $MDSDEV" && rm -f $MDSDEV echo "mount mds with large journal..." OLDMDSSIZE=$MDSSIZE MDSSIZE=2000000 + #FIXME have to change MDS_MKFS_OPTS gen_config - + echo "mount lustre system..." - start_ost - start_mds - mount_client $MOUNT + setup check_mount || return 41 - + echo "check journal size..." - FOUNDJOURNALSIZE=`debugfs -R "stat <8>" $MDSDEV | awk '/Size: / { print $6; exit;}'` - if [ $FOUNDJOURNALSIZE = "79691776" ]; then + FOUNDJOURNALSIZE=`do_facet mds "debugfs -R 'stat <8>' $MDSDEV" | awk '/Size: / { print $NF; exit;}'` + if [ "$FOUNDJOURNALSIZE" = "79691776" ]; then echo "Success:lconf creates large journals" else echo "Error:lconf not create large journals correctly" echo "expected journal size: 79691776(76M), found journal size: $FOUNDJOURNALSIZE" return 1 fi - + cleanup || return $? - + MDSSIZE=$OLDMDSSIZE gen_config } run_test 18 "check lconf creates large journals" +test_19a() { + start_mds || return 1 + stop_mds -f || return 2 +} +run_test 19a "start/stop MDS without OSTs" + +test_19b() { + start_ost || return 1 + stop_ost -f || return 2 +} +run_test 19b "start/stop OSTs without MDS" + +test_20() { + # first format the ost/mdt + start_ost + start_mds + mount_client $MOUNT + check_mount || return 43 + rm -f $DIR/$tfile + remount_client ro $MOUNT || return 44 + touch $DIR/$tfile && echo "$DIR/$tfile created incorrectly" && return 45 + [ -e $DIR/$tfile ] && echo "$DIR/$tfile exists incorrectly" && return 46 + remount_client rw $MOUNT || return 47 + touch $DIR/$tfile + [ ! -f $DIR/$tfile ] && echo "$DIR/$tfile missing" && return 48 + MCNT=`grep -c $MOUNT /etc/mtab` + [ "$MCNT" -ne 1 ] && echo "$MOUNT in /etc/mtab $MCNT times" && return 49 + umount_client $MOUNT + stop_mds + stop_ost +} +run_test 20 "remount ro,rw mounts work and doesn't break /etc/mtab" + +test_21a() { + start_mds + start_ost + stop_ost + stop_mds +} +run_test 21a "start mds before ost, stop ost first" + +test_21b() { + start_ost + start_mds + stop_mds + stop_ost +} +run_test 21b "start ost before mds, stop mds first" + +test_21c() { + start_ost + start_mds + start_ost2 + stop_ost + stop_ost2 + stop_mds +} +run_test 21c "start mds between two osts, stop mds last" + +test_22() { + #reformat to remove all logs + reformat + start_mds + echo Client mount before any osts are in the logs + mount_client $MOUNT + check_mount && return 41 + pass + + echo Client mount with ost in logs, but none running + start_ost + stop_ost + mount_client $MOUNT + # check_mount will block trying to contact ost + umount_client $MOUNT + pass + + echo Client mount with a running ost + start_ost + mount_client $MOUNT + sleep 5 #bz10476 + check_mount || return 41 + pass + + cleanup +} +run_test 22 "start a client before osts (should return errs)" + +test_23() { + setup + # fail mds + stop mds + # force down client so that recovering mds waits for reconnect + zconf_umount `hostname` $MOUNT -f + # enter recovery on mds + start_mds + # try to start a new client + mount_client $MOUNT & + MOUNT_PID=$! + sleep 5 + MOUNT_LUSTRE_PID=`ps -ef | grep mount.lustre | grep -v grep | awk '{print $2}'` + echo mount pid is ${MOUNT_PID}, mount.lustre pid is ${MOUNT_LUSTRE_PID} + ps --ppid $MOUNT_PID + ps --ppid $MOUNT_LUSTRE_PID + # FIXME why o why can't I kill these? Manual "ctrl-c" works... + kill -TERM $MOUNT_PID + echo "waiting for mount to finish" + ps -ef | grep mount + wait $MOUNT_PID + + stop_mds + stop_ost +} +#this test isn't working yet +#run_test 23 "interrupt client during recovery mount delay" + +test_24a() { + local fs2mds_HOST=$mds_HOST + # test 8-char fsname as well + local FSNAME2=test1234 + add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME2} --nomgs --mgsnode=$MGSNID --reformat ${MDSDEV}_2 || exit 10 + + local fs2ost_HOST=$ost_HOST + local fs2ostdev=$(ostdevname 1)_2 + add fs2ost $OST_MKFS_OPTS --fsname=${FSNAME2} --reformat $fs2ostdev || exit 10 + + setup + start fs2mds ${MDSDEV}_2 $MDS_MOUNT_OPTS + start fs2ost $fs2ostdev $OST_MOUNT_OPTS + mkdir -p $MOUNT2 + mount -t lustre $MGSNID:/${FSNAME2} $MOUNT2 || return 1 + # 1 still works + check_mount || return 2 + # files written on 1 should not show up on 2 + cp /etc/passwd $DIR/$tfile + sleep 10 + [ -e $MOUNT2/$tfile ] && error "File bleed" && return 7 + # 2 should work + cp /etc/passwd $MOUNT2/b || return 3 + rm $MOUNT2/b || return 4 + # 2 is actually mounted + grep $MOUNT2' ' /proc/mounts > /dev/null || return 5 + # failover + facet_failover fs2mds + facet_failover fs2ost + df + umount_client $MOUNT + # the MDS must remain up until last MDT + stop_mds + MDS=$(awk '($3 ~ "mdt" && $4 ~ "MDS") { print $4 }' $LPROC/devices) + [ -z "$MDS" ] && error "No MDS" && return 8 + umount $MOUNT2 + stop fs2mds -f + stop fs2ost -f + cleanup_nocli || return 6 +} +run_test 24a "Multiple MDTs on a single node" + +test_24b() { + local fs2mds_HOST=$mds_HOST + add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME}2 --mgs --reformat ${MDSDEV}_2 || exit 10 + setup + start fs2mds ${MDSDEV}_2 $MDS_MOUNT_OPTS && return 2 + cleanup || return 6 +} +run_test 24b "Multiple MGSs on a single node (should return err)" + +test_25() { + setup + check_mount || return 2 + local MODULES=$($LCTL modules | awk '{ print $2 }') + rmmod $MODULES 2>/dev/null || true + cleanup || return 6 +} +run_test 25 "Verify modules are referenced" + +test_26() { + load_modules + # we need modules before mount for sysctl, so make sure... + [ -z "$(lsmod | grep lustre)" ] && modprobe lustre +#define OBD_FAIL_MDS_FS_SETUP 0x135 + sysctl -w lustre.fail_loc=0x80000135 + start_mds && echo MDS started && return 1 + cat $LPROC/devices + DEVS=$(cat $LPROC/devices | wc -l) + [ $DEVS -gt 0 ] && return 2 + unload_modules || return 203 +} +run_test 26 "MDT startup failure cleans LOV (should return errs)" + +set_and_check() { + local TEST=$1 + local PARAM=$2 + local ORIG=$($TEST) + if [ $# -gt 2 ]; then + local FINAL=$3 + else + local -i FINAL + FINAL=$(($ORIG + 5)) + fi + echo "Setting $PARAM from $ORIG to $FINAL" + $LCTL conf_param $PARAM=$FINAL + local RESULT + local MAX=20 + local WAIT=0 + while [ 1 ]; do + sleep 5 + RESULT=$($TEST) + if [ $RESULT -eq $FINAL ]; then + echo "Updated config after $WAIT sec (got $RESULT)" + break + fi + WAIT=$((WAIT + 5)) + if [ $WAIT -eq $MAX ]; then + echo "Config update not seen: wanted $FINAL got $RESULT" + return 3 + fi + echo "Waiting $(($MAX - $WAIT)) secs for config update" + done +} + +test_27a() { + start_ost || return 1 + start_mds || return 2 + echo "Requeue thread should have started: " + ps -e | grep ll_cfg_requeue + set_and_check "cat $LPROC/obdfilter/$FSNAME-OST0000/client_cache_seconds" "$FSNAME-OST0000.ost.client_cache_seconds" || return 3 + cleanup_nocli +} +run_test 27a "Reacquire MGS lock if OST started first" + +test_27b() { + setup + facet_failover mds + set_and_check "cat $LPROC/mds/$FSNAME-MDT0000/group_acquire_expire" "$FSNAME-MDT0000.mdt.group_acquire_expire" || return 3 + set_and_check "cat $LPROC/mdc/$FSNAME-MDT0000-mdc-*/max_rpcs_in_flight" "$FSNAME-MDT0000.mdc.max_rpcs_in_flight" || return 4 + cleanup +} +run_test 27b "Reacquire MGS lock after failover" + +test_28() { + setup + TEST="cat $LPROC/llite/$FSNAME-*/max_read_ahead_whole_mb" + ORIG=$($TEST) + declare -i FINAL + FINAL=$(($ORIG + 10)) + set_and_check "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" || return 3 + set_and_check "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" || return 3 + umount_client $MOUNT || return 200 + mount_client $MOUNT + RESULT=$($TEST) + if [ $RESULT -ne $FINAL ]; then + echo "New config not seen: wanted $FINAL got $RESULT" + return 4 + else + echo "New config success: got $RESULT" + fi + cleanup +} +run_test 28 "permanent parameter setting" + +test_29() { + [ "$OSTCOUNT" -lt "2" ] && echo "skipping deactivate test" && return + setup > /dev/null 2>&1 + start_ost2 + sleep 10 + + local PARAM="$FSNAME-OST0001.osc.active" + local PROC_ACT="$LPROC/osc/$FSNAME-OST0001-osc-*/active" + local PROC_UUID="$LPROC/osc/$FSNAME-OST0001-osc-*/ost_server_uuid" + if [ ! -r $PROC_ACT ]; then + echo "Can't read $PROC_ACT" + ls $LPROC/osc/$FSNAME-* + return 1 + fi + ACTV=$(cat $PROC_ACT) + DEAC=$((1 - $ACTV)) + set_and_check "cat $PROC_ACT" "$PARAM" $DEAC || return 2 + # also check ost_server_uuid status + RESULT=$(grep DEACTIV $PROC_UUID) + if [ -z "$RESULT" ]; then + echo "Live client not deactivated: $(cat $PROC_UUID)" + return 3 + else + echo "Live client success: got $RESULT" + fi + + # check MDT too + local MPROC="$LPROC/osc/$FSNAME-OST0001-osc/active" + if [ -r $MPROC ]; then + RESULT=$(cat $MPROC) + if [ $RESULT -ne $DEAC ]; then + echo "MDT not deactivated: $(cat $MPROC)" + return 4 + fi + echo "MDT deactivated also" + fi + + # test new client starts deactivated + umount_client $MOUNT || return 200 + mount_client $MOUNT + RESULT=$(grep DEACTIV $PROC_UUID | grep NEW) + if [ -z "$RESULT" ]; then + echo "New client not deactivated from start: $(cat $PROC_UUID)" + return 5 + else + echo "New client success: got $RESULT" + fi + + # make sure it reactivates + set_and_check "cat $PROC_ACT" "$PARAM" $ACTV || return 6 + + umount_client $MOUNT + stop_ost2 + cleanup_nocli + #writeconf to remove all ost2 traces for subsequent tests + writeconf +} +run_test 29 "permanently remove an OST" + +test_30() { + # start mds first after writeconf + start_mds + start_ost + mount_client $MOUNT + TEST="cat $LPROC/llite/$FSNAME-*/max_read_ahead_whole_mb" + ORIG=$($TEST) + for i in $(seq 1 20); do + set_and_check "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" $i || return 3 + done + # make sure client restart still works + umount_client $MOUNT + mount_client $MOUNT || return 4 + [ "$($TEST)" -ne "$i" ] && return 5 + set_and_check "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" $ORIG || return 6 + cleanup +} +run_test 30 "Big config llog" + +test_31() { # bug 10734 + # ipaddr must not exist + mount -t lustre 4.3.2.1@tcp:/lustre $MOUNT || true + cleanup +} +run_test 31 "Connect to non-existent node (shouldn't crash)" + +umount_client $MOUNT +cleanup_nocli + equals_msg "Done" +echo "$0: completed"