From: nathan Date: Tue, 21 Mar 2006 20:02:34 +0000 (+0000) Subject: Branch b1_4_mountconf X-Git-Tag: v1_8_0_110~486^2~2136^2~8 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=1e7d1fbca28829750fa316d5cdcd72617153546f;p=fs%2Flustre-release.git Branch b1_4_mountconf b=9858 move mountconf deltas into test-framework as much as possible replay-single now works reasonably well --- diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 8bb8f63..dd23849 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -21,35 +21,25 @@ ALWAYS_EXCEPT="0b 39 $REPLAY_SINGLE_EXCEPT" build_test_filter cleanup() { - # make sure we are using the primary MDS, so the config log will - # be able to clean up properly. - activemds=`facet_active mds` - if [ $activemds != "mds" ]; then - fail mds $MDS_MOUNT_OPTS - fi - zconf_umount `hostname` $MOUNT - stop mds ${FORCE} - stop ost2 ${FORCE} - stop ost ${FORCE} + grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT + stop ost -f + stop ost2 -f + stop mds -f #no dump option in mountconf... #stop ost ${FORCE} --dump $TMP/replay-single-`hostname`.log } +SETUP=${SETUP:-"setup"} +CLEANUP=${CLEANUP:-"cleanup"} + if [ "$ONLY" == "cleanup" ]; then sysctl -w lnet.debug=0 || true - # failover is the default, '-f' is force - FORCE="-f" + $CLEANUP exit fi -SETUP=${SETUP:-"setup"} -CLEANUP=${CLEANUP:-"cleanup"} - setup() { - grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT - stop ost -f || 1 - stop ost2 -f || 1 - stop mds -f || 1 + cleanup add mds $MDS_MKFS_OPTS --reformat $MDSDEV add ost $OST_MKFS_OPTS --reformat $OSTDEV add ost2 $OST2_MKFS_OPTS --reformat $OSTDEV2 @@ -74,14 +64,14 @@ mkdir -p $DIR test_0() { replay_barrier mds - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds } run_test 0 "empty replay" test_0b() { # this test attempts to trigger a race in the precreation code, # and must run before any other objects are created on the filesystem - fail ost $OSTDEV $OST_MOUNT_OPTS + fail ost createmany -o $DIR/$tfile 20 || return 1 unlinkmany $DIR/$tfile 20 || return 2 } @@ -90,7 +80,7 @@ run_test 0b "ensure object created after recover exists. (3284)" test_1() { replay_barrier mds mcreate $DIR/$tfile - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds $CHECKSTAT -t file $DIR/$tfile || return 1 rm $DIR/$tfile } @@ -143,7 +133,7 @@ test_1a() { test_2a() { replay_barrier mds touch $DIR/$tfile - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds $CHECKSTAT -t file $DIR/$tfile || return 1 rm $DIR/$tfile } @@ -153,7 +143,7 @@ test_2b() { ./mcreate $DIR/$tfile replay_barrier mds touch $DIR/$tfile - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds $CHECKSTAT -t file $DIR/$tfile || return 1 rm $DIR/$tfile } @@ -163,7 +153,7 @@ test_3a() { replay_barrier mds mcreate $DIR/$tfile o_directory $DIR/$tfile - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds $CHECKSTAT -t file $DIR/$tfile || return 2 rm $DIR/$tfile } @@ -175,7 +165,7 @@ test_3b() { do_facet mds "sysctl -w lustre.fail_loc=0x80000114" touch $DIR/$tfile do_facet mds "sysctl -w lustre.fail_loc=0" - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds $CHECKSTAT -t file $DIR/$tfile && return 2 return 0 } @@ -187,7 +177,7 @@ test_3c() { do_facet mds "sysctl -w lustre.fail_loc=0x80000128" touch $DIR/$tfile do_facet mds "sysctl -w lustre.fail_loc=0" - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds $CHECKSTAT -t file $DIR/$tfile && return 2 return 0 @@ -199,7 +189,7 @@ test_4() { for i in `seq 10`; do echo "tag-$i" > $DIR/$tfile-$i done - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds for i in `seq 10`; do grep -q "tag-$i" $DIR/$tfile-$i || error "$tfile-$i" done @@ -209,7 +199,7 @@ run_test 4 "|x| 10 open(O_CREAT)s" test_4b() { replay_barrier mds rm -rf $DIR/$tfile-* - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true } run_test 4b "|x| rm 10 files" @@ -221,7 +211,7 @@ test_5() { for i in `seq 220`; do echo "tag-$i" > $DIR/$tfile-$i done - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds for i in `seq 220`; do grep -q "tag-$i" $DIR/$tfile-$i || error "f1c-$i" done @@ -236,7 +226,7 @@ test_6() { replay_barrier mds mkdir $DIR/$tdir mcreate $DIR/$tdir/$tfile - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds $CHECKSTAT -t dir $DIR/$tdir || return 1 $CHECKSTAT -t file $DIR/$tdir/$tfile || return 2 sleep 2 @@ -247,7 +237,7 @@ run_test 6 "mkdir + contained create" test_6b() { replay_barrier mds rm -rf $DIR/$tdir - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds $CHECKSTAT -t dir $DIR/$tdir && return 1 || true } run_test 6b "|X| rmdir" @@ -256,7 +246,7 @@ test_7() { mkdir $DIR/$tdir replay_barrier mds mcreate $DIR/$tdir/$tfile - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds $CHECKSTAT -t dir $DIR/$tdir || return 1 $CHECKSTAT -t file $DIR/$tdir/$tfile || return 2 rm -fr $DIR/$tdir @@ -268,7 +258,7 @@ test_8() { multiop $DIR/$tfile mo_c & MULTIPID=$! sleep 1 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds ls $DIR/$tfile $CHECKSTAT -t file $DIR/$tfile || return 1 kill -USR1 $MULTIPID || return 2 @@ -281,7 +271,7 @@ test_9() { replay_barrier mds mcreate $DIR/$tfile local old_inum=`ls -i $DIR/$tfile | awk '{print $1}'` - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds local new_inum=`ls -i $DIR/$tfile | awk '{print $1}'` echo " old_inum == $old_inum, new_inum == $new_inum" @@ -301,7 +291,7 @@ test_10() { replay_barrier mds mv $DIR/$tfile $DIR/$tfile-2 rm -f $DIR/$tfile - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds $CHECKSTAT $DIR/$tfile && return 1 $CHECKSTAT $DIR/$tfile-2 ||return 2 rm $DIR/$tfile-2 @@ -317,7 +307,7 @@ test_11() { echo "new" > $DIR/$tfile grep new $DIR/$tfile grep old $DIR/$tfile-2 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds grep new $DIR/$tfile || return 1 grep old $DIR/$tfile-2 || return 2 } @@ -334,7 +324,7 @@ test_12() { kill -USR1 $pid wait $pid || return 1 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds [ -e $DIR/$tfile ] && return 2 return 0 } @@ -352,7 +342,7 @@ test_13() { chmod 0 $DIR/$tfile $CHECKSTAT -p 0 $DIR/$tfile replay_barrier mds - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid wait $pid || return 1 @@ -371,7 +361,7 @@ test_14() { kill -USR1 $pid || return 1 wait $pid || return 2 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds [ -e $DIR/$tfile ] && return 3 return 0 } @@ -388,7 +378,7 @@ test_15() { kill -USR1 $pid wait $pid || return 2 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds [ -e $DIR/$tfile ] && return 3 touch $DIR/h11 || return 4 return 0 @@ -401,7 +391,7 @@ test_16() { mcreate $DIR/$tfile munlink $DIR/$tfile mcreate $DIR/$tfile-2 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds [ -e $DIR/$tfile ] && return 1 [ -e $DIR/$tfile-2 ] || return 2 munlink $DIR/$tfile-2 || return 3 @@ -414,7 +404,7 @@ test_17() { pid=$! # give multiop a chance to open sleep 1 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid || return 1 wait $pid || return 2 $CHECKSTAT -t file $DIR/$tfile || return 3 @@ -434,7 +424,7 @@ test_18() { kill -USR1 $pid wait $pid || return 2 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds [ -e $DIR/$tfile ] && return 3 [ -e $DIR/$tfile-2 ] || return 4 # this touch frequently fails @@ -452,7 +442,7 @@ test_19() { echo "old" > $DIR/$tfile mv $DIR/$tfile $DIR/$tfile-2 grep old $DIR/$tfile-2 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds grep old $DIR/$tfile-2 || return 2 } run_test 19 "|X| mcreate, open, write, rename " @@ -465,7 +455,7 @@ test_20() { sleep 1 rm -f $DIR/$tfile - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid wait $pid || return 1 [ -e $DIR/$tfile ] && return 2 @@ -482,7 +472,7 @@ test_21() { rm -f $DIR/$tfile touch $DIR/g11 || return 1 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid wait $pid || return 2 [ -e $DIR/$tfile ] && return 3 @@ -500,7 +490,7 @@ test_22() { replay_barrier mds rm -f $DIR/$tfile - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid wait $pid || return 1 [ -e $DIR/$tfile ] && return 2 @@ -518,7 +508,7 @@ test_23() { rm -f $DIR/$tfile touch $DIR/g11 || return 1 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid wait $pid || return 2 [ -e $DIR/$tfile ] && return 3 @@ -534,7 +524,7 @@ test_24() { sleep 1 replay_barrier mds - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds rm -f $DIR/$tfile kill -USR1 $pid wait $pid || return 1 @@ -551,7 +541,7 @@ test_25() { rm -f $DIR/$tfile replay_barrier mds - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid wait $pid || return 1 [ -e $DIR/$tfile ] && return 2 @@ -572,7 +562,7 @@ test_26() { kill -USR1 $pid2 wait $pid2 || return 1 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid1 wait $pid1 || return 2 [ -e $DIR/$tfile-1 ] && return 3 @@ -592,7 +582,7 @@ test_27() { rm -f $DIR/$tfile-1 rm -f $DIR/$tfile-2 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid1 wait $pid1 || return 1 kill -USR1 $pid2 @@ -616,7 +606,7 @@ test_28() { kill -USR1 $pid2 wait $pid2 || return 1 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid1 wait $pid1 || return 2 [ -e $DIR/$tfile-1 ] && return 3 @@ -636,7 +626,7 @@ test_29() { rm -f $DIR/$tfile-1 rm -f $DIR/$tfile-2 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid1 wait $pid1 || return 1 kill -USR1 $pid2 @@ -658,7 +648,7 @@ test_30() { rm -f $DIR/$tfile-2 replay_barrier mds - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid1 wait $pid1 || return 1 kill -USR1 $pid2 @@ -680,7 +670,7 @@ test_31() { replay_barrier mds rm -f $DIR/$tfile-2 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds kill -USR1 $pid1 wait $pid1 || return 1 kill -USR1 $pid2 @@ -758,7 +748,7 @@ test_36() { replay_barrier mds touch $DIR/$tfile checkstat $DIR/$tfile - facet_failover mds $MDS_MOUNT_OPTS + facet_failover mds cancel_lru_locks MDC if dmesg | grep "unknown lock cookie"; then echo "cancel after replay failed" @@ -792,7 +782,7 @@ test_38() { createmany -o $DIR/$tfile-%d 800 unlinkmany $DIR/$tfile-%d 0 400 replay_barrier mds - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds unlinkmany $DIR/$tfile-%d 400 400 sleep 2 $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true @@ -803,7 +793,7 @@ test_39() { # bug 4176 createmany -o $DIR/$tfile-%d 800 replay_barrier mds unlinkmany $DIR/$tfile-%d 0 400 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds unlinkmany $DIR/$tfile-%d 400 400 sleep 2 $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true @@ -823,7 +813,7 @@ test_40(){ writeme -s $MOUNT/${tfile}-2 & WRITE_PID=$! sleep 1 - facet_failover mds $MDS_MOUNT_OPTS + facet_failover mds #define OBD_FAIL_MDS_CONNECT_NET 0x117 do_facet mds "sysctl -w lustre.fail_loc=0x80000117" kill -USR1 $PID @@ -882,7 +872,7 @@ test_42() { unlinkmany $DIR/$tfile-%d 0 400 DEBUG42=`sysctl -n lnet.debug` sysctl -w lnet.debug=-1 - facet_failover ost $OST_MOUNT_OPTS + facet_failover ost # osc is evicted, fs is smaller (but only with failout OSTs (bug 7287) #blocks_after=`df -P $MOUNT | tail -n 1 | awk '{ print $2 }'` @@ -901,7 +891,7 @@ test_43() { # bug 2530 # OBD_FAIL_OST_CREATE_NET 0x204 do_facet ost "sysctl -w lustre.fail_loc=0x80000204" - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds sleep 10 do_facet ost "sysctl -w lustre.fail_loc=0" @@ -947,7 +937,7 @@ run_test 45 "Handle failed close" test_46() { dmesg -c >/dev/null drop_reply "touch $DIR/$tfile" - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds # ironically, the previous test, 45, will cause a real forced close, # so just look for one for this test dmesg | grep -i "force closing client file handle for $tfile" && return 1 @@ -961,7 +951,7 @@ test_47() { # bug 2824 createmany -o $DIR/$tfile 20 || return 1 # OBD_FAIL_OST_CREATE_NET 0x204 - fail ost $OSTDEV $OST_MOUNT_OPTS + fail ost do_facet ost "sysctl -w lustre.fail_loc=0x80000204" df $MOUNT || return 2 @@ -982,7 +972,7 @@ test_48() { replay_barrier mds createmany -o $DIR/$tfile 20 || return 1 # OBD_FAIL_OST_EROFS 0x216 - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds do_facet ost "sysctl -w lustre.fail_loc=0x80000216" df $MOUNT || return 2 @@ -1011,7 +1001,7 @@ test_52() { multiop $DIR/$tfile s replay_barrier mds do_facet mds "sysctl -w lustre.fail_loc=0x8000030c" - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds do_facet mds "sysctl -w lustre.fail_loc=0x0" $CHECKSTAT -t file $DIR/$tfile-* && return 3 || true @@ -1039,7 +1029,7 @@ test_56() { ln -s foo $DIR/$tfile replay_barrier mds #drop_reply "cat $DIR/$tfile" - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds sleep 10 } run_test 56 "don't replay a symlink open request (3440)" @@ -1050,7 +1040,7 @@ test_57() { do_facet mds "sysctl -w lustre.fail_loc=0x8000012c" touch $DIR/$tfile replay_barrier mds - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds sleep 1 $CHECKSTAT -t file $DIR/$tfile || return 1 do_facet mds "sysctl -w lustre.fail_loc=0x0" @@ -1065,7 +1055,7 @@ test_58() { mkdir $DIR/$tdir createmany -o $DIR/$tdir/$tfile-%d 2500 replay_barrier mds - fail mds $MDSDEV $MDS_MOUNT_OPTS + fail mds sleep 2 $CHECKSTAT -t file $DIR/$tdir/$tfile-* || return 1 do_facet mds "sysctl -w lustre.fail_loc=0x0" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 106b785..50418b1 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -76,8 +76,7 @@ start() { shift device=$1 shift - active=`facet_active $facet` - echo "mount active=${active}, facet=${facet}" + echo "Starting ${device} as /mnt/${facet} (opts:$@)" mkdir -p /mnt/${facet} do_facet ${facet} mount -t lustre $@ ${device} /mnt/${facet} #do_facet $facet $LCONF --select ${facet}_svc=${active}_facet \ @@ -85,20 +84,26 @@ start() { # $@ $XMLCONFIG RC=${PIPESTATUS[0]} if [ $RC -ne 0 ]; then + echo mount -t lustre $@ ${device} /mnt/${facet} echo Start of ${device} on ${facet} failed ${RC} + else + label=`do_facet ${facet} e2label ${device}` + eval export ${facet}_svc=${label} + eval export ${facet}_dev=${device} + eval export ${facet}_opt=\"$@\" + echo Started ${label} fi - label=`do_facet ${facet} e2label ${device}` - eval export ${facet}_svc=${label} - echo Started ${label} return $RC } stop() { facet=$1 shift - active=`facet_active $facet` - echo "umount active=${active}, facet=${facet}" - do_facet ${facet} umount -d $@ /mnt/${facet} + local running=`do_facet ${facet} mount | grep -c /mnt/${facet}" "` + if [ $running -ne 0 ]; then + echo "Stopping /mnt/${facet} (opts:$@)" + do_facet ${facet} umount -d $@ /mnt/${facet} + fi #do_facet $facet $LCONF --select ${facet}_svc=${active}_facet \ # --node ${active}_facet --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM \ # $@ --cleanup $XMLCONFIG @@ -184,7 +189,7 @@ client_reconnect() { facet_failover() { facet=$1 - echo "Failing $facet node `facet_active_host $facet`" + echo "Failing $facet on node `facet_active_host $facet`" shutdown_facet $facet reboot_facet $facet client_df & @@ -194,7 +199,9 @@ facet_failover() { TO=`facet_active_host $facet` echo "Failover $facet to $TO" wait_for $facet - start $* + local dev=${facet}_dev + local opt=${facet}_opt + start $facet ${!dev} ${!opt} } obd_name() { @@ -208,7 +215,7 @@ replay_barrier() { local svc=${facet}_svc do_facet $facet $LCTL --device %${!svc} readonly do_facet $facet $LCTL --device %${!svc} notransno - do_facet $facet $LCTL mark "$facet REPLAY BARRIER" + do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}" $LCTL mark "local REPLAY BARRIER on ${!svc}" } @@ -219,7 +226,7 @@ replay_barrier_nodf() { echo Replay barrier on ${!svc} do_facet $facet $LCTL --device %${!svc} readonly do_facet $facet $LCTL --device %${!svc} notransno - do_facet $facet $LCTL mark "$facet REPLAY BARRIER" + do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}" $LCTL mark "local REPLAY BARRIER on ${!svc}" } @@ -229,7 +236,6 @@ mds_evict_client() { } fail() { - local facet=$1 facet_failover $* df $MOUNT || error "post-failover df: $?" } @@ -238,8 +244,10 @@ fail_abort() { local facet=$1 stop $facet change_active $facet - start $* local svc=${facet}_svc + local dev=${facet}_dev + local opt=${facet}_opt + start $facet ${!dev} ${!opt} do_facet $facet lctl --device %${!svc} abort_recovery df $MOUNT || echo "first df failed: $?" sleep 1 @@ -378,7 +386,7 @@ add() { local facet=$1 shift # failsafe - umount -d -f /mnt/${facet} || true + stop ${facet} -f rm -f ${facet}active $MKFS $* }