X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fconf-sanity.sh;h=46bad101729e63743c75a753b9bdbfd5646fa316;hp=6d3b3138a2e4bce84f0687e6ff2e06c25b454779;hb=38dca2a0d0304fd39dbf47eb20a580ee1e16a592;hpb=fb1b751f8c1c578f642d18e554eefcd832b50495 diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 6d3b313..46bad10 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -9,103 +9,133 @@ set -e +ONLY=${ONLY:-"$*"} + +# These tests don't apply to mountconf +# xml xml xml xml xml xml dumb FIXME +MOUNTCONFSKIP="10 11 12 13 13b 14 15 18" + +# bug number for skipped test: +ALWAYS_EXCEPT=" $CONF_SANITY_EXCEPT $MOUNTCONFSKIP" +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + SRCDIR=`dirname $0` PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH LUSTRE=${LUSTRE:-`dirname $0`/..} RLUSTRE=${RLUSTRE:-$LUSTRE} +HOSTNAME=`hostname` . $LUSTRE/tests/test-framework.sh - init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} -. ${CONFIG:=$LUSTRE/tests/cfg/local.sh} - -FORCE=${FORCE:-" --force"} - -if [ "$VERBOSE" == "true" ]; then - CMDVERBOSE="" -else - CMDVERBOSE=" > /dev/null" -fi - -gen_config() { - rm -f $XMLCONFIG - - add_mds mds --dev $MDSDEV --size $MDSSIZE - add_lov lov1 mds --stripe_sz $STRIPE_BYTES\ - --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 - add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE - add_client client mds --lov lov1 --path $MOUNT +reformat() { + formatall } -gen_second_config() { - rm -f $XMLCONFIG +writeconf() { + local facet=mds + shift + stop ${facet} -f + rm -f ${facet}active + # who knows if/where $TUNEFS is installed? Better reformat if it fails... + do_facet ${facet} "$TUNEFS --writeconf $MDSDEV" || echo "tunefs failed, reformatting instead" && reformat +} - add_mds mds2 --dev $MDSDEV --size $MDSSIZE - add_lov lov2 mds2 --stripe_sz $STRIPE_BYTES\ - --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 - add_ost ost2 --lov lov2 --dev $OSTDEV --size $OSTSIZE - add_client client mds2 --lov lov2 --path $MOUNT2 +gen_config() { + reformat + # The MGS must be started before the OSTs for a new fs, so start + # and stop to generate the startup logs. + start_mds + start_ost + sleep 5 + stop_ost + stop_mds } start_mds() { echo "start mds service on `facet_active_host mds`" - start mds --reformat $MDSLCONFARGS $CMDVERBOSE || return 94 + start mds $MDSDEV $MDS_MOUNT_OPTS || return 94 } + stop_mds() { echo "stop mds service on `facet_active_host mds`" - stop mds $@ $CMDVERBOSE || return 97 + # These tests all use non-failover stop + stop mds -f || return 97 } start_ost() { - echo "start ost service on `facet_active_host ost`" - start ost --reformat $OSTLCONFARGS $CMDVERBOSE || return 95 + echo "start ost1 service on `facet_active_host ost1`" + start ost1 `ostdevname 1` $OST_MOUNT_OPTS || return 95 } stop_ost() { - echo "stop ost service on `facet_active_host ost`" - stop ost $@ $CMDVERBOSE || return 98 + echo "stop ost1 service on `facet_active_host ost1`" + # These tests all use non-failover stop + stop ost1 -f || return 98 +} + +start_ost2() { + echo "start ost2 service on `facet_active_host ost2`" + start ost2 `ostdevname 2` $OST_MOUNT_OPTS || return 92 +} + +stop_ost2() { + echo "stop ost2 service on `facet_active_host ost2`" + # These tests all use non-failover stop + stop ost2 -f || return 93 } mount_client() { local MOUNTPATH=$1 - echo "mount lustre on ${MOUNTPATH}....." - zconf_mount `hostname` $MOUNTPATH $CMDVERBOSE || return 96 + echo "mount $FSNAME on ${MOUNTPATH}....." + zconf_mount `hostname` $MOUNTPATH || return 96 +} + +remount_client() { + local SAVEMOUNTOPT=$MOUNTOPT + MOUNTOPT="remount,$1" + local MOUNTPATH=$2 + echo "remount '$1' lustre on ${MOUNTPATH}....." + zconf_mount `hostname` $MOUNTPATH || return 96 + MOUNTOPT=$SAVEMOUNTOPT } umount_client() { local MOUNTPATH=$1 echo "umount lustre on ${MOUNTPATH}....." - zconf_umount `hostname` $MOUNTPATH $CMDVERBOSE || return 97 + zconf_umount `hostname` $MOUNTPATH || return 97 } manual_umount_client(){ - echo "manual umount lustre on ${MOUNTPATH}...." - do_facet client "umount $MOUNT" + echo "manual umount lustre on ${MOUNT}...." + do_facet client "umount -d $MOUNT" } setup() { start_ost start_mds - mount_client $MOUNT + mount_client $MOUNT +} + +cleanup_nocli() { + stop_mds || return 201 + stop_ost || return 202 + unload_modules || return 203 } cleanup() { umount_client $MOUNT || return 200 - stop_mds || return 201 - stop_ost || return 202 - # catch case where these return just fine, but modules are still not unloaded - /sbin/lsmod | grep -q portals - if [ 1 -ne $? ]; then - echo "modules still loaded..." - return 203 - fi + cleanup_nocli || return $? } check_mount() { - do_facet client "touch $DIR/a" || return 71 - do_facet client "rm $DIR/a" || return 72 + do_facet client "cp /etc/passwd $DIR/a" || return 71 + do_facet client "rm $DIR/a" || return 72 + # make sure lustre is actually mounted (touch will block, + # but grep won't, so do it after) + do_facet client "grep $MOUNT' ' /proc/mounts > /dev/null" || return 73 echo "setup single mount lustre success" } @@ -119,15 +149,23 @@ check_mount2() { build_test_filter +if [ "$ONLY" == "setup" ]; then + setup + exit +fi + +if [ "$ONLY" == "cleanup" ]; then + cleanup + exit +fi + #create single point mountpoint gen_config test_0() { - start_ost - start_mds - mount_client $MOUNT + setup check_mount || return 41 cleanup || return $? } @@ -136,42 +174,37 @@ run_test 0 "single mount setup" test_1() { start_ost echo "start ost second time..." - start ost --reformat $OSTLCONFARGS $CMDVERBOSE - start_mds - mount_client $MOUNT + setup check_mount || return 42 cleanup || return $? } -run_test 1 "start up ost twice" +run_test 1 "start up ost twice (should return errors)" test_2() { start_ost start_mds echo "start mds second time.." - start mds --reformat $MDSLCONFARGS $CMDVERBOSE - - mount_client $MOUNT + start_mds + mount_client $MOUNT check_mount || return 43 cleanup || return $? } -run_test 2 "start up mds twice" +run_test 2 "start up mds twice (should return err)" test_3() { - setup - mount_client $MOUNT - + setup + #mount.lustre returns an error if already in mtab + mount_client $MOUNT && return $? check_mount || return 44 - - umount_client $MOUNT - cleanup || return $? + cleanup || return $? } -run_test 3 "mount client twice" +run_test 3 "mount client twice (should return err)" test_4() { setup touch $DIR/$tfile || return 85 - stop_ost ${FORCE} - cleanup + stop_ost -f + cleanup eno=$? # ok for ost to fail shutdown if [ 202 -ne $eno ]; then @@ -184,29 +217,93 @@ run_test 4 "force cleanup ost, then cleanup" test_5() { setup touch $DIR/$tfile || return 1 - stop_mds ${FORCE} || return 2 + stop_mds -f || return 2 - # cleanup may return an error from the failed - # disconnects; for now I'll consider this successful + # cleanup may return an error from the failed + # disconnects; for now I'll consider this successful # if all the modules have unloaded. - umount $MOUNT & + umount -d $MOUNT & UMOUNT_PID=$! - sleep $TIMEOUT + sleep 6 echo "killing umount" kill -TERM $UMOUNT_PID - wait $UMOUNT_PID + echo "waiting for umount to finish" + wait $UMOUNT_PID + if grep " $MOUNT " /etc/mtab; then + echo "test 5: mtab after failed umount" + umount $MOUNT & + UMOUNT_PID=$! + sleep 2 + echo "killing umount" + kill -TERM $UMOUNT_PID + echo "waiting for umount to finish" + wait $UMOUNT_PID + grep " $MOUNT " /etc/mtab && echo "test 5: mtab after second umount" && return 11 + fi - # cleanup client modules - $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null - + manual_umount_client # stop_mds is a no-op here, and should not fail - stop_mds || return 4 - stop_ost || return 5 + cleanup_nocli || return $? + # df may have lingering entry + manual_umount_client + # mtab may have lingering entry + grep -v $MOUNT" " /etc/mtab > $TMP/mtabtemp + mv $TMP/mtabtemp /etc/mtab +} +run_test 5 "force cleanup mds, then cleanup" - lsmod | grep -q portals && return 6 +test_5b() { + start_ost + [ -d $MOUNT ] || mkdir -p $MOUNT + grep " $MOUNT " /etc/mtab && echo "test 5b: mtab before mount" && return 10 + mount_client $MOUNT && return 1 + grep " $MOUNT " /etc/mtab && echo "test 5b: mtab after failed mount" && return 11 + umount_client $MOUNT + # stop_mds is a no-op here, and should not fail + cleanup_nocli || return $? return 0 } -run_test 5 "force cleanup mds, then cleanup" +run_test 5b "mds down, cleanup after failed mount (bug 2712) (should return errs)" + +test_5c() { + start_ost + start_mds + [ -d $MOUNT ] || mkdir -p $MOUNT + grep " $MOUNT " /etc/mtab && echo "test 5c: mtab before mount" && return 10 + mount -t lustre $MGSNID:/wrong.$FSNAME $MOUNT || : + grep " $MOUNT " /etc/mtab && echo "test 5c: mtab after failed mount" && return 11 + umount_client $MOUNT + cleanup_nocli || return $? +} +run_test 5c "cleanup after failed mount (bug 2712) (should return errs)" + +test_5d() { + start_ost + start_mds + stop_ost -f + grep " $MOUNT " /etc/mtab && echo "test 5d: mtab before mount" && return 10 + mount_client $MOUNT || return 1 + cleanup || return $? + grep " $MOUNT " /etc/mtab && echo "test 5d: mtab after unmount" && return 11 + return 0 +} +run_test 5d "mount with ost down" + +test_5e() { + start_ost + start_mds + # give MDS a chance to connect to OSTs (bz 10476) + sleep 5 + +#define OBD_FAIL_PTLRPC_DELAY_SEND 0x506 + do_facet client "sysctl -w lustre.fail_loc=0x80000506" + grep " $MOUNT " /etc/mtab && echo "test 5e: mtab before mount" && return 10 + mount_client $MOUNT || echo "mount failed (not fatal)" + cleanup || return $? + grep " $MOUNT " /etc/mtab && echo "test 5e: mtab after unmount" && return 11 + return 0 +} +run_test 5e "delayed connect, don't crash (bug 10268)" test_6() { setup @@ -220,105 +317,75 @@ run_test 6 "manual umount, then mount again" test_7() { setup manual_umount_client - cleanup || return $? + cleanup_nocli || return $? } run_test 7 "manual umount, then cleanup" test_8() { - start_ost - start_mds - - mount_client $MOUNT - mount_client $MOUNT2 - + setup + mount_client $MOUNT2 check_mount2 || return 45 - umount $MOUNT - umount_client $MOUNT2 - - stop_mds - stop_ost + umount_client $MOUNT2 + cleanup || return $? } run_test 8 "double mount setup" test_9() { - # backup the old values of PTLDEBUG and SUBSYSTEM - OLDPTLDEBUG=$PTLDEBUG - OLDSUBSYSTEM=$SUBSYSTEM - - # generate new configuration file with lmc --ptldebug and --subsystem - PTLDEBUG="trace" - SUBSYSTEM="mdc" - gen_config - - # check the result of lmc --ptldebug/subsystem start_ost - start_mds - mount_client $MOUNT - CHECK_PTLDEBUG="`cat /proc/sys/portals/debug`" - if [ $CHECK_PTLDEBUG = "1" ]; then - echo "lmc --debug success" - else - echo "lmc --debug: want 1, have $CHECK_PTLDEBUG" - return 1 - fi - CHECK_SUBSYSTEM="`cat /proc/sys/portals/subsystem_debug`" - if [ $CHECK_SUBSYSTEM = "2" ]; then - echo "lmc --subsystem success" - else - echo "lmc --subsystem: want 2, have $CHECK_SUBSYSTEM" - return 1 - fi - check_mount || return 41 - cleanup || return $? - # the new PTLDEBUG/SUBSYSTEM used for lconf --ptldebug/subsystem - PTLDEBUG="inode+trace" - SUBSYSTEM="mds+ost" + do_facet ost1 sysctl lnet.debug=\'inode trace\' || return 1 + do_facet ost1 sysctl lnet.subsystem_debug=\'mds ost\' || return 1 - # check lconf --ptldebug/subsystem overriding lmc --ptldebug/subsystem - start_ost - start_mds - CHECK_PTLDEBUG="`do_facet mds cat /proc/sys/portals/debug`" - if [ $CHECK_PTLDEBUG = "3" ]; then - echo "lconf --debug success" + CHECK_PTLDEBUG="`do_facet ost1 sysctl -n lnet.debug`" + if [ "$CHECK_PTLDEBUG" ] && [ "$CHECK_PTLDEBUG" = "trace inode" ];then + echo "lnet.debug success" else - echo "lconf --debug: want 3, have $CHECK_PTLDEBUG" + echo "lnet.debug: want 'trace inode', have '$CHECK_PTLDEBUG'" return 1 fi - CHECK_SUBSYSTEM="`do_facet mds cat /proc/sys/portals/subsystem_debug`" - if [ $CHECK_SUBSYSTEM = "20" ]; then - echo "lconf --subsystem success" + CHECK_SUBSYS="`do_facet ost1 sysctl -n lnet.subsystem_debug`" + if [ "$CHECK_SUBSYS" ] && [ "$CHECK_SUBSYS" = "mds ost" ]; then + echo "lnet.subsystem_debug success" else - echo "lconf --subsystem: want 20, have $CHECK_SUBSYSTEM" + echo "lnet.subsystem_debug: want 'mds ost', have '$CHECK_SUBSYS'" return 1 fi - mount_client $MOUNT - check_mount || return 41 - cleanup || return $? - - # resume the old configuration - PTLDEBUG=$OLDPTLDEBUG - SUBSYSTEM=$OLDSUBSYSTEM - gen_config + stop_ost || return $? } -run_test 9 "test --ptldebug and --subsystem for lmc and lconf" +run_test 9 "test ptldebug and subsystem for mkfs" test_10() { + echo "generate configuration with the same name for node and mds" OLDXMLCONFIG=$XMLCONFIG XMLCONFIG="broken.xml" [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG - SAMENAME="mds1" - do_lmc --add node --node $SAMENAME - do_lmc --add net --node $SAMENAME --nid $SAMENAME --nettype tcp - do_lmc --add mds --node $SAMENAME --mds $SAMENAME --nid $SAMENAME \ - --fstype ext3 --dev /dev/mds1 || return $? - do_lmc --add lov --lov lov1 --mds $SAMENAME --stripe_sz 65536 \ - --stripe_cnt 1 --stripe_pattern 0 || return $? + facet="mds" + rm -f ${facet}active + add_facet $facet + echo "the name for node and mds is the same" + do_lmc --add mds --node ${facet}_facet --mds ${facet}_facet \ + --dev $MDSDEV --size $MDSSIZE || return $? + do_lmc --add lov --mds ${facet}_facet --lov lov1 --stripe_sz \ + $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ \ + --stripe_pattern 0 || return $? + add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE + facet="client" + add_facet $facet --lustre_upcall $UPCALL + do_lmc --add mtpt --node ${facet}_facet --mds mds_facet \ + --lov lov1 --path $MOUNT + + echo "mount lustre" + start_ost + start_mds + mount_client $MOUNT + check_mount || return 41 + cleanup || return $? + echo "Success!" XMLCONFIG=$OLDXMLCONFIG } -run_test 10 "use lmc with the same name for node and mds" +run_test 10 "mount lustre with the same name for node and mds" test_11() { OLDXMLCONFIG=$XMLCONFIG @@ -329,7 +396,7 @@ test_11() { add_ost ost --dev $OSTDEV --size $OSTSIZE add_client client mds --path $MOUNT --ost ost_svc || return $? echo "Default lov config success!" - + [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG add_mds mds --dev $MDSDEV --size $MDSSIZE add_ost ost --dev $OSTDEV --size $OSTSIZE @@ -350,8 +417,8 @@ test_12() { # test double quote [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG [ -f "$BATCHFILE" ] && rm -f $BATCHFILE - echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions \"-I 128\"" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions \"-I 128\"" >> $BATCHFILE # --mkfsoptions "-I 128" do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $? if [ `sed -n '/>-I 128 $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions \"-I 128" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions \"-I 128" >> $BATCHFILE # --mkfsoptions "-I 128 do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $? echo "unmatched double quote should return error" # test single quote rm -f $BATCHFILE - echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions '-I 128'" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions '-I 128'" >> $BATCHFILE # --mkfsoptions '-I 128' do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $? if [ `sed -n '/>-I 128 $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions '-I 128" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions '-I 128" >> $BATCHFILE # --mkfsoptions '-I 128 do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $? echo "unmatched single quote should return error" # test backslash rm -f $BATCHFILE - echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions \-\I\ \128" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions \-\I\ \128" >> $BATCHFILE # --mkfsoptions \-\I\ \128 do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $? if [ `sed -n '/>-I 128 $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions -I\ 128\\" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions -I\ 128\\" >> $BATCHFILE # --mkfsoptions -I\ 128\ do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $? echo "backslash followed by nothing should return error" @@ -416,52 +483,62 @@ run_test 12 "lmc --batch, with single/double quote, backslash in batchfile" test_13() { OLDXMLCONFIG=$XMLCONFIG XMLCONFIG="conf13-1.xml" - SECONDXMLCONFIG="conf13-2.xml" # check long uuid will be truncated properly and uniquely echo "To generate XML configuration file(with long ost name): $XMLCONFIG" [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG - do_lmc --add net --node localhost --nid localhost.localdomain --nettype tcp - do_lmc --add mds --node localhost --mds mds1_name_longer_than_31characters - do_lmc --add mds --node localhost --mds mds2_name_longer_than_31characters + do_lmc --add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp + do_lmc --add mds --node $HOSTNAME --mds mds1_name_longer_than_31characters + do_lmc --add mds --node $HOSTNAME --mds mds2_name_longer_than_31characters if [ ! -f "$XMLCONFIG" ]; then echo "Error:no file $XMLCONFIG created!" return 1 fi EXPECTEDMDS1UUID="e_longer_than_31characters_UUID" EXPECTEDMDS2UUID="longer_than_31characters_UUID_2" - FOUNDMDS1UUID=`awk -F"'" '/ $SECONDXMLCONFIG || return $? echo "Generate the second XML configuration file" gen_config - if [ `diff $XMLCONFIG $SECONDXMLCONFIG | wc -l` -eq 0 ]; then + # don't compare .xml mtime, it will always be different + if [ `sed -e "s/mtime[^ ]*//" $XMLCONFIG | diff - $SECONDXMLCONFIG | wc -l` -eq 0 ]; then echo "Success:multiple invocations for lmc generate same XML file" else echo "Error: multiple invocations for lmc generate different XML file" return 1 fi - rm -f $XMLCONFIG - rm -f $SECONDXMLCONFIG + rm -f $XMLCONFIG $SECONDXMLCONFIG XMLCONFIG=$OLDXMLCONFIG } -run_test 13 "check new_uuid of lmc operating correctly" +run_test 13b "check lmc generates consistent .xml file" test_14() { rm -f $XMLCONFIG @@ -472,13 +549,13 @@ test_14() { add_lov lov1 mds --stripe_sz $STRIPE_BYTES\ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE \ - --mkfsoptions -V + --mkfsoptions "-Llabel_conf_14" add_client client mds --lov lov1 --path $MOUNT FOUNDSTRING=`awk -F"<" '//{print $2}' $XMLCONFIG` - EXPECTEDSTRING="mkfsoptions>-V" - if [ $EXPECTEDSTRING != $FOUNDSTRING ]; then - echo "Error:expected string: $EXPECTEDSTRING; found: $FOUNDSTRING" + EXPECTEDSTRING="mkfsoptions>-Llabel_conf_14" + if [ "$EXPECTEDSTRING" != "$FOUNDSTRING" ]; then + echo "Error: expected: $EXPECTEDSTRING; found: $FOUNDSTRING" return 1 fi echo "Success:mkfsoptions for ost written to xml file correctly." @@ -488,11 +565,580 @@ test_14() { start_ost start_mds mount_client $MOUNT || return $? + if [ -z "`do_facet ost1 dumpe2fs -h $OSTDEV | grep label_conf_14`" ]; then + echo "Error: the mkoptions not applied to mke2fs of ost." + return 1 + fi cleanup - echo "lconf mkfsoptions-parsing for ost success" + echo "lconf mkfsoptions for ost success" gen_config } run_test 14 "test mkfsoptions of ost for lmc and lconf" +cleanup_15() { + trap 0 + [ -f $MOUNTLUSTRE ] && echo "remove $MOUNTLUSTRE" && rm -f $MOUNTLUSTRE + if [ -f $MOUNTLUSTRE.sav ]; then + echo "return original $MOUNTLUSTRE.sav to $MOUNTLUSTRE" + mv $MOUNTLUSTRE.sav $MOUNTLUSTRE + fi +} + +# this only tests the kernel mount command, not anything about lustre. +test_15() { + MOUNTLUSTRE=${MOUNTLUSTRE:-/sbin/mount.lustre} + start_ost + start_mds + + echo "mount lustre on ${MOUNT} without $MOUNTLUSTRE....." + if [ -f "$MOUNTLUSTRE" ]; then + echo "save $MOUNTLUSTRE to $MOUNTLUSTRE.sav" + mv $MOUNTLUSTRE $MOUNTLUSTRE.sav && trap cleanup_15 EXIT INT + if [ -f $MOUNTLUSTRE ]; then + echo "$MOUNTLUSTRE cannot be moved, skipping test" + return 0 + fi + fi + + mount_client $MOUNT && error "mount succeeded" && return 1 + echo "mount lustre on $MOUNT without $MOUNTLUSTRE failed as expected" + cleanup_15 + cleanup || return $? +} +run_test 15 "zconf-mount without /sbin/mount.lustre (should return error)" + +test_16() { + TMPMTPT="${MOUNT%/*}/conf16" + + if [ ! -f "$MDSDEV" ]; then + echo "no $MDSDEV existing, so mount Lustre to create one" + setup + check_mount || return 41 + cleanup || return $? + fi + + echo "change the mode of $MDSDEV/OBJECTS,LOGS,PENDING to 555" + do_facet mds "mkdir -p $TMPMTPT && + mount -o loop -t ext3 $MDSDEV $TMPMTPT && + chmod 555 $TMPMTPT/{OBJECTS,LOGS,PENDING} && + umount $TMPMTPT" || return $? + + echo "mount Lustre to change the mode of OBJECTS/LOGS/PENDING, then umount Lustre" + setup + check_mount || return 41 + cleanup || return $? + + echo "read the mode of OBJECTS/LOGS/PENDING and check if they has been changed properly" + EXPECTEDOBJECTSMODE=`do_facet mds "debugfs -R 'stat OBJECTS' $MDSDEV 2> /dev/null" | grep 'Mode: ' | sed -e "s/.*Mode: *//" -e "s/ *Flags:.*//"` + EXPECTEDLOGSMODE=`do_facet mds "debugfs -R 'stat LOGS' $MDSDEV 2> /dev/null" | grep 'Mode: ' | sed -e "s/.*Mode: *//" -e "s/ *Flags:.*//"` + EXPECTEDPENDINGMODE=`do_facet mds "debugfs -R 'stat PENDING' $MDSDEV 2> /dev/null" | grep 'Mode: ' | sed -e "s/.*Mode: *//" -e "s/ *Flags:.*//"` + + if [ "$EXPECTEDOBJECTSMODE" = "0777" ]; then + echo "Success:Lustre change the mode of OBJECTS correctly" + else + echo "Error: Lustre does not change mode of OBJECTS properly" + return 1 + fi + + if [ "$EXPECTEDLOGSMODE" = "0777" ]; then + echo "Success:Lustre change the mode of LOGS correctly" + else + echo "Error: Lustre does not change mode of LOGS properly" + return 1 + fi + + if [ "$EXPECTEDPENDINGMODE" = "0777" ]; then + echo "Success:Lustre change the mode of PENDING correctly" + else + echo "Error: Lustre does not change mode of PENDING properly" + return 1 + fi +} +run_test 16 "verify that lustre will correct the mode of OBJECTS/LOGS/PENDING" + +test_17() { + if [ ! -f "$MDSDEV" ]; then + echo "no $MDSDEV existing, so mount Lustre to create one" + setup + check_mount || return 41 + cleanup || return $? + fi + + echo "Remove mds config log" + do_facet mds "debugfs -w -R 'unlink CONFIGS/$FSNAME-MDT0000' $MDSDEV || return \$?" || return $? + + start_ost + start_mds && return 42 + gen_config +} +run_test 17 "Verify failed mds_postsetup won't fail assertion (2936) (should return errs)" + +test_18() { + [ -f $MDSDEV ] && echo "remove $MDSDEV" && rm -f $MDSDEV + echo "mount mds with large journal..." + OLDMDSSIZE=$MDSSIZE + MDSSIZE=2000000 + #FIXME have to change MDS_MKFS_OPTS + gen_config + + echo "mount lustre system..." + setup + check_mount || return 41 + + echo "check journal size..." + FOUNDJOURNALSIZE=`do_facet mds "debugfs -R 'stat <8>' $MDSDEV" | awk '/Size: / { print $NF; exit;}'` + if [ "$FOUNDJOURNALSIZE" = "79691776" ]; then + echo "Success:lconf creates large journals" + else + echo "Error:lconf not create large journals correctly" + echo "expected journal size: 79691776(76M), found journal size: $FOUNDJOURNALSIZE" + return 1 + fi + + cleanup || return $? + + MDSSIZE=$OLDMDSSIZE + gen_config +} +run_test 18 "check lconf creates large journals" + +test_19a() { + start_mds || return 1 + stop_mds -f || return 2 +} +run_test 19a "start/stop MDS without OSTs" + +test_19b() { + start_ost || return 1 + stop_ost -f || return 2 +} +run_test 19b "start/stop OSTs without MDS" + +test_20() { + # first format the ost/mdt + start_ost + start_mds + mount_client $MOUNT + check_mount || return 43 + rm -f $DIR/$tfile + remount_client ro $MOUNT || return 44 + touch $DIR/$tfile && echo "$DIR/$tfile created incorrectly" && return 45 + [ -e $DIR/$tfile ] && echo "$DIR/$tfile exists incorrectly" && return 46 + remount_client rw $MOUNT || return 47 + touch $DIR/$tfile + [ ! -f $DIR/$tfile ] && echo "$DIR/$tfile missing" && return 48 + MCNT=`grep -c $MOUNT /etc/mtab` + [ "$MCNT" -ne 1 ] && echo "$MOUNT in /etc/mtab $MCNT times" && return 49 + umount_client $MOUNT + stop_mds + stop_ost +} +run_test 20 "remount ro,rw mounts work and doesn't break /etc/mtab" + +test_21a() { + start_mds + start_ost + stop_ost + stop_mds +} +run_test 21a "start mds before ost, stop ost first" + +test_21b() { + start_ost + start_mds + stop_mds + stop_ost +} +run_test 21b "start ost before mds, stop mds first" + +test_21c() { + start_ost + start_mds + start_ost2 + stop_ost + stop_ost2 + stop_mds +} +run_test 21c "start mds between two osts, stop mds last" + +test_22() { + #reformat to remove all logs + reformat + start_mds + echo Client mount before any osts are in the logs + mount_client $MOUNT + check_mount && return 41 + pass + + echo Client mount with ost in logs, but none running + start_ost + stop_ost + mount_client $MOUNT + # check_mount will block trying to contact ost + umount_client $MOUNT + pass + + echo Client mount with a running ost + start_ost + mount_client $MOUNT + sleep 5 #bz10476 + check_mount || return 41 + pass + + cleanup +} +run_test 22 "start a client before osts (should return errs)" + +test_23() { + setup + # fail mds + stop mds + # force down client so that recovering mds waits for reconnect + zconf_umount `hostname` $MOUNT -f + # enter recovery on mds + start_mds + # try to start a new client + mount_client $MOUNT & + MOUNT_PID=$! + sleep 5 + MOUNT_LUSTRE_PID=`ps -ef | grep mount.lustre | grep -v grep | awk '{print $2}'` + echo mount pid is ${MOUNT_PID}, mount.lustre pid is ${MOUNT_LUSTRE_PID} + ps --ppid $MOUNT_PID + ps --ppid $MOUNT_LUSTRE_PID + # FIXME why o why can't I kill these? Manual "ctrl-c" works... + kill -TERM $MOUNT_PID + echo "waiting for mount to finish" + ps -ef | grep mount + wait $MOUNT_PID + + stop_mds + stop_ost +} +#this test isn't working yet +#run_test 23 "interrupt client during recovery mount delay" + +test_24a() { + local fs2mds_HOST=$mds_HOST + # test 8-char fsname as well + local FSNAME2=test1234 + add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME2} --nomgs --mgsnode=$MGSNID --reformat ${MDSDEV}_2 || exit 10 + + local fs2ost_HOST=$ost_HOST + local fs2ostdev=$(ostdevname 1)_2 + add fs2ost $OST_MKFS_OPTS --fsname=${FSNAME2} --reformat $fs2ostdev || exit 10 + + setup + start fs2mds ${MDSDEV}_2 $MDS_MOUNT_OPTS + start fs2ost $fs2ostdev $OST_MOUNT_OPTS + mkdir -p $MOUNT2 + mount -t lustre $MGSNID:/${FSNAME2} $MOUNT2 || return 1 + # 1 still works + check_mount || return 2 + # files written on 1 should not show up on 2 + cp /etc/passwd $DIR/$tfile + sleep 10 + [ -e $MOUNT2/$tfile ] && error "File bleed" && return 7 + # 2 should work + cp /etc/passwd $MOUNT2/b || return 3 + rm $MOUNT2/b || return 4 + # 2 is actually mounted + grep $MOUNT2' ' /proc/mounts > /dev/null || return 5 + # failover + facet_failover fs2mds + facet_failover fs2ost + df + umount_client $MOUNT + # the MDS must remain up until last MDT + stop_mds + MDS=$(awk '($3 ~ "mdt" && $4 ~ "MDS") { print $4 }' $LPROC/devices) + [ -z "$MDS" ] && error "No MDS" && return 8 + umount $MOUNT2 + stop fs2mds -f + stop fs2ost -f + cleanup_nocli || return 6 +} +run_test 24a "Multiple MDTs on a single node" + +test_24b() { + local fs2mds_HOST=$mds_HOST + add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME}2 --mgs --reformat ${MDSDEV}_2 || exit 10 + setup + start fs2mds ${MDSDEV}_2 $MDS_MOUNT_OPTS && return 2 + cleanup || return 6 +} +run_test 24b "Multiple MGSs on a single node (should return err)" + +test_25() { + setup + check_mount || return 2 + local MODULES=$($LCTL modules | awk '{ print $2 }') + rmmod $MODULES 2>/dev/null || true + cleanup || return 6 +} +run_test 25 "Verify modules are referenced" + +test_26() { + load_modules + # we need modules before mount for sysctl, so make sure... + [ -z "$(lsmod | grep lustre)" ] && modprobe lustre +#define OBD_FAIL_MDS_FS_SETUP 0x135 + sysctl -w lustre.fail_loc=0x80000135 + start_mds && echo MDS started && return 1 + cat $LPROC/devices + DEVS=$(cat $LPROC/devices | wc -l) + [ $DEVS -gt 0 ] && return 2 + unload_modules || return 203 +} +run_test 26 "MDT startup failure cleans LOV (should return errs)" + +set_and_check() { + local TEST=$1 + local PARAM=$2 + local ORIG=$($TEST) + if [ $# -gt 2 ]; then + local FINAL=$3 + else + local -i FINAL + FINAL=$(($ORIG + 5)) + fi + echo "Setting $PARAM from $ORIG to $FINAL" + $LCTL conf_param $PARAM=$FINAL + local RESULT + local MAX=20 + local WAIT=0 + while [ 1 ]; do + sleep 5 + RESULT=$($TEST) + if [ $RESULT -eq $FINAL ]; then + echo "Updated config after $WAIT sec (got $RESULT)" + break + fi + WAIT=$((WAIT + 5)) + if [ $WAIT -eq $MAX ]; then + echo "Config update not seen: wanted $FINAL got $RESULT" + return 3 + fi + echo "Waiting $(($MAX - $WAIT)) secs for config update" + done +} + +test_27a() { + start_ost || return 1 + start_mds || return 2 + echo "Requeue thread should have started: " + ps -e | grep ll_cfg_requeue + set_and_check "cat $LPROC/obdfilter/$FSNAME-OST0000/client_cache_seconds" "$FSNAME-OST0000.ost.client_cache_seconds" || return 3 + cleanup_nocli +} +run_test 27a "Reacquire MGS lock if OST started first" + +test_27b() { + setup + facet_failover mds + set_and_check "cat $LPROC/mds/$FSNAME-MDT0000/group_acquire_expire" "$FSNAME-MDT0000.mdt.group_acquire_expire" || return 3 + set_and_check "cat $LPROC/mdc/$FSNAME-MDT0000-mdc-*/max_rpcs_in_flight" "$FSNAME-MDT0000.mdc.max_rpcs_in_flight" || return 4 + cleanup +} +run_test 27b "Reacquire MGS lock after failover" + +test_28() { + setup + TEST="cat $LPROC/llite/$FSNAME-*/max_read_ahead_whole_mb" + ORIG=$($TEST) + declare -i FINAL + FINAL=$(($ORIG + 10)) + set_and_check "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" || return 3 + set_and_check "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" || return 3 + umount_client $MOUNT || return 200 + mount_client $MOUNT + RESULT=$($TEST) + if [ $RESULT -ne $FINAL ]; then + echo "New config not seen: wanted $FINAL got $RESULT" + return 4 + else + echo "New config success: got $RESULT" + fi + cleanup +} +run_test 28 "permanent parameter setting" + +test_29() { + [ "$OSTCOUNT" -lt "2" ] && echo "skipping deactivate test" && return + setup > /dev/null 2>&1 + start_ost2 + sleep 10 + + local PARAM="$FSNAME-OST0001.osc.active" + local PROC_ACT="$LPROC/osc/$FSNAME-OST0001-osc-*/active" + local PROC_UUID="$LPROC/osc/$FSNAME-OST0001-osc-*/ost_server_uuid" + if [ ! -r $PROC_ACT ]; then + echo "Can't read $PROC_ACT" + ls $LPROC/osc/$FSNAME-* + return 1 + fi + ACTV=$(cat $PROC_ACT) + DEAC=$((1 - $ACTV)) + set_and_check "cat $PROC_ACT" "$PARAM" $DEAC || return 2 + # also check ost_server_uuid status + RESULT=$(grep DEACTIV $PROC_UUID) + if [ -z "$RESULT" ]; then + echo "Live client not deactivated: $(cat $PROC_UUID)" + return 3 + else + echo "Live client success: got $RESULT" + fi + + # check MDT too + local MPROC="$LPROC/osc/$FSNAME-OST0001-osc/active" + if [ -r $MPROC ]; then + RESULT=$(cat $MPROC) + if [ $RESULT -ne $DEAC ]; then + echo "MDT not deactivated: $(cat $MPROC)" + return 4 + fi + echo "MDT deactivated also" + fi + + # test new client starts deactivated + umount_client $MOUNT || return 200 + mount_client $MOUNT + RESULT=$(grep DEACTIV $PROC_UUID | grep NEW) + if [ -z "$RESULT" ]; then + echo "New client not deactivated from start: $(cat $PROC_UUID)" + return 5 + else + echo "New client success: got $RESULT" + fi + + # make sure it reactivates + set_and_check "cat $PROC_ACT" "$PARAM" $ACTV || return 6 + + umount_client $MOUNT + stop_ost2 + cleanup_nocli + #writeconf to remove all ost2 traces for subsequent tests + writeconf +} +run_test 29 "permanently remove an OST" + +test_30() { + # start mds first after writeconf + start_mds + start_ost + mount_client $MOUNT + TEST="cat $LPROC/llite/$FSNAME-*/max_read_ahead_whole_mb" + ORIG=$($TEST) + for i in $(seq 1 20); do + set_and_check "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" $i || return 3 + done + # make sure client restart still works + umount_client $MOUNT + mount_client $MOUNT || return 4 + [ "$($TEST)" -ne "$i" ] && return 5 + set_and_check "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" $ORIG || return 6 + cleanup +} +run_test 30 "Big config llog" + +test_31() { # bug 10734 + # ipaddr must not exist + mount -t lustre 4.3.2.1@tcp:/lustre $MOUNT || true + cleanup +} +run_test 31 "Connect to non-existent node (shouldn't crash)" + +test_32a() { + [ -z "$TUNEFS" ] && echo "No tunefs" && return + [ ! -r disk1_4.zip ] && echo "Cant find disk1_4.zip, skipping" && return + unzip -o -j -d $TMP/$tdir disk1_4.zip || { echo "Cant unzip disk1_4, skipping" && return ; } + load_modules + sysctl lnet.debug=$PTLDEBUG + + $TUNEFS $TMP/$tdir/mds || error "tunefs failed" + # nids are wrong, so client wont work, but server should start + start mds $TMP/$tdir/mds "-o loop" || return 3 + local UUID=$(cat $LPROC/mds/lustre-MDT0000/uuid) + echo MDS uuid $UUID + [ "$UUID" == "mdsA_UUID" ] || error "UUID is wrong: $UUID" + + $TUNEFS --mgsnode=`hostname` $TMP/$tdir/ost1 || error "tunefs failed" + start ost1 $TMP/$tdir/ost1 "-o loop" || return 5 + UUID=$(cat $LPROC/obdfilter/lustre-OST0000/uuid) + echo OST uuid $UUID + [ "$UUID" == "ost1_UUID" ] || error "UUID is wrong: $UUID" + + local NID=$($LCTL list_nids | head -1) + + echo "OSC changes should return err:" + $LCTL conf_param lustre-OST0000.osc.max_dirty_mb=15 && return 7 + $LCTL conf_param lustre-OST0000.failover.node=$NID && return 8 + echo "ok." + echo "MDC changes should succeed:" + $LCTL conf_param lustre-MDT0000.mdc.max_rpcs_in_flight=9 || return 9 + $LCTL conf_param lustre-MDT0000.failover.node=$NID || return 10 + echo "ok." + + # With a new good MDT failover nid, we should be able to mount a client + # (but it cant talk to OST) + mount_client $MOUNT + set_and_check "cat $LPROC/mdc/*/max_rpcs_in_flight" "lustre-MDT0000.mdc.max_rpcs_in_flight" || return 11 + + zconf_umount `hostname` $MOUNT -f + cleanup_nocli + + # mount a second time to make sure we didnt leave upgrade flag on + $TUNEFS --dryrun $TMP/$tdir/mds || error "tunefs failed" + start mds $TMP/$tdir/mds "-o loop" || return 12 + cleanup_nocli + + [ -d $TMP/$tdir ] && rm -rf $TMP/$tdir +} +run_test 32a "Upgrade from 1.4 (not live)" + +test_32b() { + [ -z "$TUNEFS" ] && echo "No tunefs" && return + [ ! -r disk1_4.zip ] && echo "Cant find disk1_4.zip, skipping" && return + unzip -o -j -d $TMP/$tdir disk1_4.zip || { echo "Cant unzip disk1_4, skipping" && return ; } + load_modules + sysctl lnet.debug=$PTLDEBUG + + # writeconf will cause servers to register with their current nids + $TUNEFS --writeconf $TMP/$tdir/mds || error "tunefs failed" + start mds $TMP/$tdir/mds "-o loop" || return 3 + local UUID=$(cat $LPROC/mds/lustre-MDT0000/uuid) + echo MDS uuid $UUID + [ "$UUID" == "mdsA_UUID" ] || error "UUID is wrong: $UUID" + + $TUNEFS --mgsnode=`hostname` $TMP/$tdir/ost1 || error "tunefs failed" + start ost1 $TMP/$tdir/ost1 "-o loop" || return 5 + UUID=$(cat $LPROC/obdfilter/lustre-OST0000/uuid) + echo OST uuid $UUID + [ "$UUID" == "ost1_UUID" ] || error "UUID is wrong: $UUID" + + echo "OSC changes should succeed:" + $LCTL conf_param lustre-OST0000.osc.max_dirty_mb=15 || return 7 + $LCTL conf_param lustre-OST0000.failover.node=$NID || return 8 + echo "ok." + echo "MDC changes should succeed:" + $LCTL conf_param lustre-MDT0000.mdc.max_rpcs_in_flight=9 || return 9 + echo "ok." + + # MDT and OST should have registered with new nids, so we should have + # a fully-functioning client + echo "Check client and old fs contents" + mount_client $MOUNT + set_and_check "cat $LPROC/mdc/*/max_rpcs_in_flight" "lustre-MDT0000.mdc.max_rpcs_in_flight" || return 11 + [ "$(cksum $MOUNT/passwd | cut -d' ' -f 1,2)" == "2479747619 779" ] || return 12 + echo "ok." + + cleanup + [ -d $TMP/$tdir ] && rm -rf $TMP/$tdir +} +run_test 32b "Upgrade from 1.4 with writeconf" + +umount_client $MOUNT +cleanup_nocli + equals_msg "Done" +echo "$0: completed"