3 # add uml1 uml2 uml3 in your /etc/hosts
5 # FIXME - there is no reason to use all of these different
6 # return codes, espcially when most of them are mapped to something
7 # else anyway. The combination of test number and return code
8 # figure out what failed.
14 # bug number for skipped test: 13369
15 ALWAYS_EXCEPT=" $CONF_SANITY_EXCEPT 34a"
16 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
19 PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
22 LUSTRE=${LUSTRE:-`dirname $0`/..}
23 RLUSTRE=${RLUSTRE:-$LUSTRE}
26 . $LUSTRE/tests/test-framework.sh
28 # STORED_MDSSIZE is used in test_18
29 if [ -n "$MDSSIZE" ]; then
30 STORED_MDSSIZE=$MDSSIZE
32 # use small MDS + OST size to speed formatting time
35 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
37 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
38 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
41 [ "$SLOW" = "no" ] && EXCEPT_SLOW="0 1 2 3 6 7 15 18 24b 25 30 31 32 33 34a 45"
54 # who knows if/where $TUNEFS is installed? Better reformat if it fails...
55 do_facet ${facet} "$TUNEFS --writeconf $MDSDEV" ||
56 echo "tunefs failed, reformatting instead" && reformat_and_config
61 # The MGS must be started before the OSTs for a new fs, so start
62 # and stop to generate the startup logs.
65 wait_osc_import_state mds ost FULL
70 reformat_and_config() {
72 if ! combined_mgs_mds ; then
80 start mgs $MGSDEV $mgs_MOUNT_OPTS
84 echo "start mds service on `facet_active_host mds`"
85 start mds $MDSDEV $MDS_MOUNT_OPTS || return 94
89 echo "stop mds service on `facet_active_host mds`"
90 # These tests all use non-failover stop
91 stop mds -f || return 97
95 echo "start ost1 service on `facet_active_host ost1`"
96 start ost1 `ostdevname 1` $OST_MOUNT_OPTS || return 95
100 echo "stop ost1 service on `facet_active_host ost1`"
101 # These tests all use non-failover stop
102 stop ost1 -f || return 98
106 echo "start ost2 service on `facet_active_host ost2`"
107 start ost2 `ostdevname 2` $OST_MOUNT_OPTS || return 92
111 echo "stop ost2 service on `facet_active_host ost2`"
112 # These tests all use non-failover stop
113 stop ost2 -f || return 93
117 echo "start client on `facet_active_host client`"
118 start client || return 99
122 echo "stop client on `facet_active_host client`"
123 stop client || return 100
128 echo "mount $FSNAME on ${MOUNTPATH}....."
129 zconf_mount `hostname` $MOUNTPATH || return 96
133 local SAVEMOUNTOPT=$MOUNTOPT
134 MOUNTOPT="remount,$1"
136 echo "remount '$1' lustre on ${MOUNTPATH}....."
137 zconf_mount `hostname` $MOUNTPATH || return 96
138 MOUNTOPT=$SAVEMOUNTOPT
143 echo "umount lustre on ${MOUNTPATH}....."
144 zconf_umount `hostname` $MOUNTPATH || return 97
147 manual_umount_client(){
150 echo "manual umount lustre on ${MOUNT}...."
151 do_facet client "umount -d ${FORCE} $MOUNT"
164 if ! combined_mgs_mds ; then
174 stop_mds || return 201
175 stop_ost || return 202
176 unload_modules || return 203
180 umount_client $MOUNT || return 200
181 cleanup_nocli || return $?
185 do_facet client "cp /etc/passwd $DIR/a" || return 71
186 do_facet client "rm $DIR/a" || return 72
187 # make sure lustre is actually mounted (touch will block,
188 # but grep won't, so do it after)
189 do_facet client "grep $MOUNT' ' /proc/mounts > /dev/null" || return 73
190 echo "setup single mount lustre success"
194 do_facet client "touch $DIR/a" || return 71
195 do_facet client "rm $DIR/a" || return 72
196 do_facet client "touch $DIR2/a" || return 73
197 do_facet client "rm $DIR2/a" || return 74
198 echo "setup double mount lustre success"
203 if [ "$ONLY" == "setup" ]; then
208 if [ "$ONLY" == "cleanup" ]; then
213 #create single point mountpoint
219 check_mount || return 41
222 run_test 0 "single mount setup"
226 echo "start ost second time..."
228 check_mount || return 42
231 run_test 1 "start up ost twice (should return errors)"
236 echo "start mds second time.."
239 check_mount || return 43
242 run_test 2 "start up mds twice (should return err)"
246 #mount.lustre returns an error if already in mtab
247 mount_client $MOUNT && return $?
248 check_mount || return 44
251 run_test 3 "mount client twice (should return err)"
255 touch $DIR/$tfile || return 85
259 # ok for ost to fail shutdown
260 if [ 202 -ne $eno ]; then
265 run_test 4 "force cleanup ost, then cleanup"
267 test_5a() { # was test_5
269 touch $DIR/$tfile || return 1
270 fuser -m -v $MOUNT && echo "$MOUNT is in use by user space process."
272 stop_mds -f || return 2
274 # cleanup may return an error from the failed
275 # disconnects; for now I'll consider this successful
276 # if all the modules have unloaded.
280 echo "killing umount"
281 kill -TERM $UMOUNT_PID
282 echo "waiting for umount to finish"
284 if grep " $MOUNT " /proc/mounts; then
285 echo "test 5: /proc/mounts after failed umount"
289 echo "killing umount"
290 kill -TERM $UMOUNT_PID
291 echo "waiting for umount to finish"
293 grep " $MOUNT " /proc/mounts && echo "test 5: /proc/mounts after second umount" && return 11
297 # stop_mds is a no-op here, and should not fail
298 cleanup_nocli || return $?
299 # df may have lingering entry
301 # mtab may have lingering entry
305 while [ "$WAIT" -ne "$MAX_WAIT" ]; do
307 grep -q $MOUNT" " /etc/mtab || break
308 echo "Waiting /etc/mtab updated ... "
309 WAIT=$(( WAIT + sleep))
311 [ "$WAIT" -eq "$MAX_WAIT" ] && error "/etc/mtab is not updated in $WAIT secs"
312 echo "/etc/mtab updated in $WAIT secs"
314 run_test 5a "force cleanup mds, then cleanup"
318 [ -d $MOUNT ] || mkdir -p $MOUNT
319 grep " $MOUNT " /etc/mtab && echo "test 5b: mtab before mount" && return 10
320 mount_client $MOUNT && return 1
321 grep " $MOUNT " /etc/mtab && echo "test 5b: mtab after failed mount" && return 11
323 # stop_mds is a no-op here, and should not fail
324 cleanup_nocli || return $?
327 run_test 5b "mds down, cleanup after failed mount (bug 2712) (should return errs)"
332 [ -d $MOUNT ] || mkdir -p $MOUNT
333 grep " $MOUNT " /etc/mtab && echo "test 5c: mtab before mount" && return 10
334 local oldfs="${FSNAME}"
335 FSNAME="wrong.${FSNAME}"
336 mount_client $MOUNT || :
338 grep " $MOUNT " /etc/mtab && echo "test 5c: mtab after failed mount" && return 11
340 cleanup_nocli || return $?
342 run_test 5c "cleanup after failed mount (bug 2712) (should return errs)"
348 grep " $MOUNT " /etc/mtab && echo "test 5d: mtab before mount" && return 10
349 mount_client $MOUNT || return 1
351 grep " $MOUNT " /etc/mtab && echo "test 5d: mtab after unmount" && return 11
354 run_test 5d "mount with ost down"
360 #define OBD_FAIL_PTLRPC_DELAY_SEND 0x506
361 do_facet client "lctl set_param fail_loc=0x80000506"
362 grep " $MOUNT " /etc/mtab && echo "test 5e: mtab before mount" && return 10
363 mount_client $MOUNT || echo "mount failed (not fatal)"
365 grep " $MOUNT " /etc/mtab && echo "test 5e: mtab after unmount" && return 11
368 run_test 5e "delayed connect, don't crash (bug 10268)"
373 mount_client ${MOUNT} || return 87
374 touch $DIR/a || return 86
377 run_test 6 "manual umount, then mount again"
382 cleanup_nocli || return $?
384 run_test 7 "manual umount, then cleanup"
389 check_mount2 || return 45
390 umount_client $MOUNT2
393 run_test 8 "double mount setup"
398 do_facet ost1 lctl set_param debug=\'inode trace\' || return 1
399 do_facet ost1 lctl set_param subsystem_debug=\'mds ost\' || return 1
401 CHECK_PTLDEBUG="`do_facet ost1 lctl get_param -n debug`"
402 # interop 1.8 <-> 2.0: "trace inode warning error emerg console"
403 local lustre_version=$(get_lustre_version ost1)
404 echo ost1 running $lustre_version
405 if [ "$CHECK_PTLDEBUG" ] && [ "$CHECK_PTLDEBUG" = "trace inode" ]; then
406 echo "lnet.debug success"
407 elif [[ $lustre_version != 1.8* ]] && \
408 [ "$CHECK_PTLDEBUG" = "trace inode warning error emerg console" ]; then
409 echo "lnet.debug success"
411 echo "lnet.debug: want 'trace inode', have '$CHECK_PTLDEBUG'"
414 CHECK_SUBSYS="`do_facet ost1 lctl get_param -n subsystem_debug`"
415 if [ "$CHECK_SUBSYS" ] && [ "$CHECK_SUBSYS" = "mds ost" ]; then
416 echo "lnet.subsystem_debug success"
418 echo "lnet.subsystem_debug: want 'mds ost', have '$CHECK_SUBSYS'"
421 stop_ost || return $?
424 run_test 9 "test ptldebug and subsystem for mkfs"
432 do_facet $facet "test -b $dev" || rc=1
433 if [[ "$size" ]]; then
434 local in=$(do_facet $facet "dd if=$dev of=/dev/null bs=1k count=1 skip=$size 2>&1" |\
435 awk '($3 == "in") { print $1 }')
436 [[ $in = "1+0" ]] || rc=1
443 # interop 1.8 <-> 2.0: 20: no LOGS
444 local files="{OBJECTS,PENDING}"
445 local lustre_version=$(get_lustre_version mds)
446 if [[ $lustre_version = 1.8* ]]; then
447 files="{OBJECTS,PENDING,LOGS}"
450 check_mount || return 41
453 local TMPMTPT="${TMP}/conf16"
455 is_blkdev mds $MDSDEV || LOOPOPT="-o loop"
457 log "change the mode of $MDSDEV/$files to 555"
458 do_facet mds "mkdir -p $TMPMTPT &&
459 mount $LOOPOPT -t $FSTYPE $MDSDEV $TMPMTPT &&
460 chmod 555 $TMPMTPT/$files &&
461 umount $TMPMTPT" || return $?
463 log "mount Lustre to change the mode of $files, then umount Lustre"
465 check_mount || return 41
468 log "read the mode of $files and check if they has been changed properly"
470 for file in ${files//[\{\},]/ }; do
471 expected=`do_facet mds "$DEBUGFS -R 'stat $file' $MDSDEV 2> /dev/null" | \
472 grep 'Mode: ' | sed -e "s/.*Mode: *//" -e "s/ *Flags:.*//"`
474 if [ "$expected" = "0777" ]; then
475 log "Success:Lustre change the mode of $file correctly"
477 error "Lustre does not change mode of $file properly"
481 run_test 16 "verify that lustre will correct the mode of OBJECTS/LOGS/PENDING"
485 check_mount || return 41
488 echo "Remove mds config log"
489 if ! combined_mgs_mds ; then
493 do_facet mgs "$DEBUGFS -w -R 'unlink CONFIGS/$FSNAME-MDT0000' $MGSDEV || return \$?" || return $?
495 if ! combined_mgs_mds ; then
500 start_mds && return 42
503 run_test 17 "Verify failed mds_postsetup won't fail assertion (2936) (should return errs)"
506 [ "$FSTYPE" != "ldiskfs" ] && skip "not needed for FSTYPE=$FSTYPE" && return
511 # check if current MDSSIZE is large enough
512 [ $MDSSIZE -ge $MIN ] && OK=1 && myMDSSIZE=$MDSSIZE && \
513 log "use MDSSIZE=$MDSSIZE"
515 # check if the global config has a large enough MDSSIZE
516 [ -z "$OK" -a ! -z "$STORED_MDSSIZE" ] && [ $STORED_MDSSIZE -ge $MIN ] && \
517 OK=1 && myMDSSIZE=$STORED_MDSSIZE && \
518 log "use STORED_MDSSIZE=$STORED_MDSSIZE"
520 # check if the block device is large enough
521 [ -z "$OK" ] && $(is_blkdev mds $MDSDEV $MIN) && OK=1 &&
522 myMDSSIZE=$MIN && log "use device $MDSDEV with MIN=$MIN"
524 # check if a loopback device has enough space for fs metadata (5%)
526 if [ -z "$OK" ]; then
527 local SPACE=$(do_facet mds "[ -f $MDSDEV -o ! -e $MDSDEV ] && df -P \\\$(dirname $MDSDEV)" |
528 awk '($1 != "Filesystem") {print $4}')
529 ! [ -z "$SPACE" ] && [ $SPACE -gt $((MIN / 20)) ] && \
530 OK=1 && myMDSSIZE=$MIN && \
531 log "use file $MDSDEV with MIN=$MIN"
534 [ -z "$OK" ] && skip_env "$MDSDEV too small for ${MIN}kB MDS" && return
537 echo "mount mds with large journal..."
538 local OLD_MDS_MKFS_OPTS=$MDS_MKFS_OPTS
540 local opts="--mdt --fsname=$FSNAME --device-size=$myMDSSIZE --param sys.timeout=$TIMEOUT $MDSOPT"
542 if combined_mgs_mds ; then
543 MDS_MKFS_OPTS="--mgs $opts"
545 MDS_MKFS_OPTS="--mgsnode=$MGSNID $opts"
549 echo "mount lustre system..."
551 check_mount || return 41
553 echo "check journal size..."
554 local FOUNDSIZE=$(do_facet mds "$DEBUGFS -c -R 'stat <8>' $MDSDEV" | awk '/Size: / { print $NF; exit;}')
555 if [ $FOUNDSIZE -gt $((32 * 1024 * 1024)) ]; then
556 log "Success: mkfs creates large journals. Size: $((FOUNDSIZE >> 20))M"
558 error "expected journal size > 32M, found $((FOUNDSIZE >> 20))M"
563 MDS_MKFS_OPTS=$OLD_MDS_MKFS_OPTS
566 run_test 18 "check mkfs creates large journals"
569 start_mds || return 1
570 stop_mds -f || return 2
572 run_test 19a "start/stop MDS without OSTs"
575 start_ost || return 1
576 stop_ost -f || return 2
578 run_test 19b "start/stop OSTs without MDS"
581 # first format the ost/mdt
585 check_mount || return 43
587 remount_client ro $MOUNT || return 44
588 touch $DIR/$tfile && echo "$DIR/$tfile created incorrectly" && return 45
589 [ -e $DIR/$tfile ] && echo "$DIR/$tfile exists incorrectly" && return 46
590 remount_client rw $MOUNT || return 47
592 [ ! -f $DIR/$tfile ] && echo "$DIR/$tfile missing" && return 48
593 MCNT=`grep -c $MOUNT /etc/mtab`
594 [ "$MCNT" -ne 1 ] && echo "$MOUNT in /etc/mtab $MCNT times" && return 49
599 run_test 20 "remount ro,rw mounts work and doesn't break /etc/mtab"
604 wait_osc_import_state mds ost FULL
608 run_test 21a "start mds before ost, stop ost first"
613 wait_osc_import_state mds ost FULL
617 run_test 21b "start ost before mds, stop mds first"
623 wait_osc_import_state mds ost2 FULL
628 #writeconf to remove all ost2 traces for subsequent tests
631 run_test 21c "start mds between two osts, stop mds last"
636 echo Client mount with ost in logs, but none running
638 wait_osc_import_state mds ost FULL
641 # check_mount will block trying to contact ost
642 mcreate $DIR/$tfile || return 40
643 rm -f $DIR/$tfile || return 42
647 echo Client mount with a running ost
650 check_mount || return 41
655 run_test 22 "start a client before osts (should return errs)"
657 test_23a() { # was test_23
661 # force down client so that recovering mds waits for reconnect
662 local running=$(grep -c $MOUNT /proc/mounts) || true
663 if [ $running -ne 0 ]; then
664 echo "Stopping client $MOUNT (opts: -f)"
668 # enter recovery on mds
670 # try to start a new client
671 mount_client $MOUNT &
673 MOUNT_PID=$(ps -ef | grep "t lustre" | grep -v grep | awk '{print $2}')
674 MOUNT_LUSTRE_PID=`ps -ef | grep mount.lustre | grep -v grep | awk '{print $2}'`
675 echo mount pid is ${MOUNT_PID}, mount.lustre pid is ${MOUNT_LUSTRE_PID}
677 ps --ppid $MOUNT_LUSTRE_PID
678 # FIXME why o why can't I kill these? Manual "ctrl-c" works...
679 kill -TERM $MOUNT_LUSTRE_PID
680 echo "waiting for mount to finish"
682 # we can not wait $MOUNT_PID because it is not a child of this shell
688 while [ "$WAIT" -lt "$MAX_WAIT" ]; do
690 PID1=$(ps -ef | awk '{print $2}' | grep -w $MOUNT_PID)
691 PID2=$(ps -ef | awk '{print $2}' | grep -w $MOUNT_LUSTRE_PID)
694 [ -z "$PID1" -a -z "$PID2" ] && break
695 echo "waiting for mount to finish ... "
696 WAIT=$(( WAIT + sleep))
698 if [ "$WAIT" -eq "$MAX_WAIT" ]; then
699 error "MOUNT_PID $MOUNT_PID and \
700 MOUNT__LUSTRE_PID $MOUNT__LUSTRE_PID still not killed in $WAIT secs"
706 run_test 23a "interrupt client during recovery mount delay"
711 test_23b() { # was test_23
714 # Simulate -EINTR during mount OBD_FAIL_LDLM_CLOSE_THREAD
715 lctl set_param fail_loc=0x80000313
719 run_test 23b "Simulate -EINTR during mount"
721 fs2mds_HOST=$mds_HOST
722 fs2ost_HOST=$ost_HOST
726 echo "umount $MOUNT2 ..."
727 umount $MOUNT2 || true
728 echo "stopping fs2mds ..."
729 stop fs2mds -f || true
730 echo "stopping fs2ost ..."
731 stop fs2ost -f || true
735 if [ -z "$fs2ost_DEV" -o -z "$fs2mds_DEV" ]; then
736 is_blkdev mds $MDSDEV && \
737 skip_env "mixed loopback and real device not working" && return
740 [ -n "$ost1_HOST" ] && fs2ost_HOST=$ost1_HOST
742 local fs2mdsdev=${fs2mds_DEV:-${MDSDEV}_2}
743 local fs2ostdev=${fs2ost_DEV:-$(ostdevname 1)_2}
745 # test 8-char fsname as well
746 local FSNAME2=test1234
747 add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME2} --nomgs --mgsnode=$MGSNID --reformat $fs2mdsdev || exit 10
749 add fs2ost $OST_MKFS_OPTS --fsname=${FSNAME2} --reformat $fs2ostdev || exit 10
752 start fs2mds $fs2mdsdev $MDS_MOUNT_OPTS && trap cleanup_24a EXIT INT
753 start fs2ost $fs2ostdev $OST_MOUNT_OPTS
755 mount -t lustre $MGSNID:/${FSNAME2} $MOUNT2 || return 1
757 check_mount || return 2
758 # files written on 1 should not show up on 2
759 cp /etc/passwd $DIR/$tfile
761 [ -e $MOUNT2/$tfile ] && error "File bleed" && return 7
763 cp /etc/passwd $MOUNT2/b || return 3
764 rm $MOUNT2/b || return 4
765 # 2 is actually mounted
766 grep $MOUNT2' ' /proc/mounts > /dev/null || return 5
768 facet_failover fs2mds
769 facet_failover fs2ost
772 # the MDS must remain up until last MDT
774 MDS=$(do_facet mds "lctl get_param -n devices" | awk '($3 ~ "mdt" && $4 ~ "MDS") { print $4 }')
775 [ -z "$MDS" ] && error "No MDS" && return 8
777 cleanup_nocli || return 6
779 run_test 24a "Multiple MDTs on a single node"
782 if [ -z "$fs2mds_DEV" ]; then
783 is_blkdev mds $MDSDEV && \
784 skip_env "mixed loopback and real device not working" && return
787 local fs2mdsdev=${fs2mds_DEV:-${MDSDEV}_2}
789 add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME}2 --mgs --reformat $fs2mdsdev || exit 10
791 start fs2mds $fs2mdsdev $MDS_MOUNT_OPTS && return 2
794 run_test 24b "Multiple MGSs on a single node (should return err)"
798 check_mount || return 2
799 local MODULES=$($LCTL modules | awk '{ print $2 }')
800 rmmod $MODULES 2>/dev/null || true
803 run_test 25 "Verify modules are referenced"
807 # we need modules before mount for sysctl, so make sure...
808 do_facet mds "lsmod | grep -q lustre || modprobe lustre"
809 #define OBD_FAIL_MDS_FS_SETUP 0x135
810 do_facet mds "lctl set_param fail_loc=0x80000135"
811 start_mds && echo MDS started && return 1
812 lctl get_param -n devices
813 DEVS=$(lctl get_param -n devices | wc -l)
814 [ $DEVS -gt 0 ] && return 2
815 unload_modules || return 203
817 run_test 26 "MDT startup failure cleans LOV (should return errs)"
823 local ORIG=$(do_facet $myfacet "$TEST")
824 if [ $# -gt 3 ]; then
830 echo "Setting $PARAM from $ORIG to $FINAL"
831 do_facet mds "$LCTL conf_param $PARAM=$FINAL" || error conf_param failed
833 wait_update $(facet_host $myfacet) "$TEST" $FINAL || error check failed!
837 start_ost || return 1
838 start_mds || return 2
839 echo "Requeue thread should have started: "
840 ps -e | grep ll_cfg_requeue
841 set_and_check ost1 "lctl get_param -n obdfilter.$FSNAME-OST0000.client_cache_seconds" "$FSNAME-OST0000.ost.client_cache_seconds" || return 3
844 run_test 27a "Reacquire MGS lock if OST started first"
849 # interop 1.8 <-> 2.0:
850 # 1.8: group_acquire_expire, 2.0: identity_acquire_expire
851 local acquire_expire=$(do_facet mds lctl get_param md*.$FSNAME-MDT0000.*acquire_expire | \
852 cut -d= -f1 | cut -d. -f3)
854 set_and_check mds "lctl get_param -n md*.$FSNAME-MDT0000.$acquire_expire" \
855 "$FSNAME-MDT0000.mdt.$acquire_expire" || return 3
856 set_and_check client "lctl get_param -n mdc.$FSNAME-MDT0000-mdc-*.max_rpcs_in_flight" \
857 "$FSNAME-MDT0000.mdc.max_rpcs_in_flight" || return 4
861 run_test 27b "Reacquire MGS lock after failover"
865 TEST="lctl get_param -n llite.$FSNAME-*.max_read_ahead_whole_mb"
866 PARAM="$FSNAME.llite.max_read_ahead_whole_mb"
869 set_and_check client "$TEST" "$PARAM" $FINAL || return 3
870 FINAL=$(($FINAL + 1))
871 set_and_check client "$TEST" "$PARAM" $FINAL || return 4
872 umount_client $MOUNT || return 200
875 if [ $RESULT -ne $FINAL ]; then
876 echo "New config not seen: wanted $FINAL got $RESULT"
879 echo "New config success: got $RESULT"
881 set_and_check client "$TEST" "$PARAM" $ORIG || return 5
884 run_test 28 "permanent parameter setting"
887 [ "$OSTCOUNT" -lt "2" ] && skip_env "$OSTCOUNT < 2, skipping" && return
888 setup > /dev/null 2>&1
892 local PARAM="$FSNAME-OST0001.osc.active"
893 local PROC_ACT="osc.$FSNAME-OST0001-osc-*.active"
894 local PROC_UUID="osc.$FSNAME-OST0001-osc-*.ost_server_uuid"
896 ACTV=$(lctl get_param -n $PROC_ACT)
898 set_and_check client "lctl get_param -n $PROC_ACT" "$PARAM" $DEAC || return 2
899 # also check ost_server_uuid status
900 RESULT=$(lctl get_param -n $PROC_UUID | grep DEACTIV)
901 if [ -z "$RESULT" ]; then
902 echo "Live client not deactivated: $(lctl get_param -n $PROC_UUID)"
905 echo "Live client success: got $RESULT"
909 local MPROC="osc.$(get_mdtosc_proc_path $FSNAME-OST0001).active"
914 RESULT=`do_facet mds " lctl get_param -n $MPROC"`
915 [ ${PIPESTATUS[0]} = 0 ] || error "Can't read $MPROC"
916 if [ $RESULT -eq $DEAC ]; then
917 echo "MDT deactivated also after $WAIT sec (got $RESULT)"
921 if [ $WAIT -eq $MAX ]; then
922 echo "MDT not deactivated: wanted $DEAC got $RESULT"
925 echo "Waiting $(($MAX - $WAIT)) secs for MDT deactivated"
928 # test new client starts deactivated
929 umount_client $MOUNT || return 200
931 RESULT=$(lctl get_param -n $PROC_UUID | grep DEACTIV | grep NEW)
932 if [ -z "$RESULT" ]; then
933 echo "New client not deactivated from start: $(lctl get_param -n $PROC_UUID)"
936 echo "New client success: got $RESULT"
939 # make sure it reactivates
940 set_and_check client "lctl get_param -n $PROC_ACT" "$PARAM" $ACTV || return 6
945 #writeconf to remove all ost2 traces for subsequent tests
948 run_test 29 "permanently remove an OST"
953 TEST="lctl get_param -n llite.$FSNAME-*.max_read_ahead_whole_mb"
955 LIST=(1 2 3 4 5 4 3 2 1 2 3 4 5 4 3 2 1 2 3 4 5)
956 for i in ${LIST[@]}; do
957 set_and_check client "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" $i || return 3
959 # make sure client restart still works
961 mount_client $MOUNT || return 4
962 [ "$($TEST)" -ne "$i" ] && return 5
963 set_and_check client "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" $ORIG || return 6
966 run_test 30 "Big config llog"
968 test_31() { # bug 10734
969 # ipaddr must not exist
970 mount -t lustre 4.3.2.1@tcp:/lustre $MOUNT || true
973 run_test 31 "Connect to non-existent node (returns errors, should not crash)"
975 # Use these start32/stop32 fn instead of t-f start/stop fn,
976 # for local devices, to skip global facet vars init
980 echo "Stopping local ${MOUNT%/*}/${facet} (opts:$@)"
981 umount -d $@ ${MOUNT%/*}/${facet}
990 mkdir -p ${MOUNT%/*}/${facet}
992 echo "Starting local ${facet}: $@ $device ${MOUNT%/*}/${facet}"
993 mount -t lustre $@ ${device} ${MOUNT%/*}/${facet}
995 if [ $RC -ne 0 ]; then
996 echo "mount -t lustre $@ ${device} ${MOUNT%/*}/${facet}"
997 echo "Start of ${device} of local ${facet} failed ${RC}"
1003 cleanup_nocli32 () {
1011 echo "Cleanup test_32 umount $MOUNT ..."
1012 umount -f $MOUNT || true
1013 echo "Cleanup local mds ost1 ..."
1019 # this test is totally useless on a client-only system
1020 client_only && skip "client only testing" && return 0
1021 [ "$NETTYPE" = "tcp" ] || { skip "NETTYPE != tcp" && return 0; }
1022 [ -z "$TUNEFS" ] && skip_env "No tunefs" && return 0
1024 local DISK1_6=$LUSTRE/tests/disk1_6.tar.bz2
1025 [ ! -r $DISK1_6 ] && skip_env "Cant find $DISK1_6, skipping" && return
1027 local tmpdir=$TMP/conf32a
1030 tar xjvf $DISK1_6 -C $tmpdir ||
1031 { skip_env "Cant untar $DISK1_6, skipping" && return ; }
1033 lctl set_param debug=$PTLDEBUG
1035 $TUNEFS $tmpdir/mds || error "tunefs failed"
1037 # nids are wrong, so client wont work, but server should start
1038 start32 mds $tmpdir/mds "-o loop,exclude=lustre-OST0000" && \
1039 trap cleanup_32 EXIT INT || return 3
1041 local UUID=$(lctl get_param -n mds.lustre-MDT0000.uuid)
1043 [ "$UUID" == "lustre-MDT0000_UUID" ] || error "UUID is wrong: $UUID"
1045 $TUNEFS --mgsnode=`hostname` $tmpdir/ost1 || error "tunefs failed"
1046 start32 ost1 $tmpdir/ost1 "-o loop" || return 5
1047 UUID=$(lctl get_param -n obdfilter.lustre-OST0000.uuid)
1049 [ "$UUID" == "lustre-OST0000_UUID" ] || error "UUID is wrong: $UUID"
1051 local NID=$($LCTL list_nids | head -1)
1053 echo "OSC changes should succeed:"
1054 $LCTL conf_param lustre-OST0000.osc.max_dirty_mb=15 || return 7
1055 $LCTL conf_param lustre-OST0000.failover.node=$NID || return 8
1057 echo "MDC changes should succeed:"
1058 $LCTL conf_param lustre-MDT0000.mdc.max_rpcs_in_flight=9 || return 9
1059 $LCTL conf_param lustre-MDT0000.failover.node=$NID || return 10
1064 # mount a second time to make sure we didnt leave upgrade flag on
1066 $TUNEFS --dryrun $tmpdir/mds || error "tunefs failed"
1067 start32 mds $tmpdir/mds "-o loop,exclude=lustre-OST0000" && \
1068 trap cleanup_32 EXIT INT || return 12
1072 rm -rf $tmpdir || true # true is only for TMP on NFS
1074 run_test 32a "Upgrade from 1.6 (not live)"
1077 # this test is totally useless on a client-only system
1078 client_only && skip "client only testing" && return 0
1079 [ "$NETTYPE" = "tcp" ] || { skip "NETTYPE != tcp" && return 0; }
1080 [ -z "$TUNEFS" ] && skip_env "No tunefs" && return
1082 local DISK1_6=$LUSTRE/tests/disk1_6.tar.bz2
1083 [ ! -r $DISK1_6 ] && skip_env "Cant find $DISK1_6, skipping" && return
1085 local tmpdir=$TMP/conf32b
1088 tar xjvf $DISK1_6 -C $tmpdir ||
1089 { skip_env "Cant untar $DISK1_6, skipping" && return ; }
1092 lctl set_param debug=$PTLDEBUG
1094 # Ugrade process in according to comment 2 bug 20246
1095 # writeconf will cause servers to register with their current nids
1096 $TUNEFS --mdt --writeconf --erase-param \
1097 --param="mdt.group_upcall=/usr/sbin/l_getgroups" $tmpdir/mds || \
1098 error "tunefs mds failed"
1100 start32 mds $tmpdir/mds "-o loop,abort_recov" && \
1101 trap cleanup_32 EXIT INT || return 3
1104 start32 mds "-o loop" $tmpdir/mds || return 4
1105 local UUID=$(lctl get_param -n mds.lustre-MDT0000.uuid)
1107 [ "$UUID" == "lustre-MDT0000_UUID" ] || error "UUID is wrong: $UUID"
1109 $TUNEFS --ost --writeconf --erase-param --mgsnode=`hostname`@$NETTYPE $tmpdir/ost1 || \
1110 error "tunefs ost failed"
1111 start32 ost1 $tmpdir/ost1 "-o loop,abort_recov" || return 5
1113 UUID=$(lctl get_param -n obdfilter.lustre-OST0000.uuid)
1115 [ "$UUID" == "lustre-OST0000_UUID" ] || error "UUID is wrong: $UUID"
1117 echo "OSC changes should succeed:"
1118 $LCTL conf_param lustre-OST0000.osc.max_dirty_mb=15 || return 7
1119 $LCTL conf_param lustre-OST0000.failover.node=$NID || return 8
1121 echo "MDC changes should succeed:"
1122 $LCTL conf_param lustre-MDT0000.mdc.max_rpcs_in_flight=9 || return 9
1125 # MDT and OST should have registered with new nids, so we should have
1126 # a fully-functioning client
1127 echo "Check client and old fs contents"
1129 local device=`h2$NETTYPE $HOSTNAME`:/lustre
1130 echo "Starting local client: $HOSTNAME: $device $MOUNT"
1131 mount -t lustre $device $MOUNT || return 1
1133 local old=$(lctl get_param -n mdc.*.max_rpcs_in_flight)
1134 local new=$((old + 5))
1135 lctl conf_param lustre-MDT0000.mdc.max_rpcs_in_flight=$new
1136 wait_update $HOSTNAME "lctl get_param -n mdc.*.max_rpcs_in_flight" $new || return 11
1138 [ "$(cksum $MOUNT/passwd | cut -d' ' -f 1,2)" == "2940530074 2837" ] || return 12
1143 rm -rf $tmpdir || true # true is only for TMP on NFS
1145 run_test 32b "Upgrade from 1.6 with writeconf"
1147 test_33a() { # bug 12333, was test_33
1149 local FSNAME2=test-123
1150 [ -n "$ost1_HOST" ] && fs2ost_HOST=$ost1_HOST
1152 if [ -z "$fs2ost_DEV" -o -z "$fs2mds_DEV" ]; then
1153 is_blkdev mds $MDSDEV && \
1154 skip_env "mixed loopback and real device not working" && return
1157 local fs2mdsdev=${fs2mds_DEV:-${MDSDEV}_2}
1158 local fs2ostdev=${fs2ost_DEV:-$(ostdevname 1)_2}
1159 add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME2} --reformat $fs2mdsdev || exit 10
1160 add fs2ost $OST_MKFS_OPTS --fsname=${FSNAME2} --index=8191 --mgsnode=$MGSNID --reformat $fs2ostdev || exit 10
1162 start fs2mds $fs2mdsdev $MDS_MOUNT_OPTS && trap cleanup_24a EXIT INT
1163 start fs2ost $fs2ostdev $OST_MOUNT_OPTS
1164 do_facet mds "$LCTL conf_param $FSNAME2.sys.timeout=200" || rc=1
1166 mount -t lustre $MGSNID:/${FSNAME2} $MOUNT2 || rc=2
1167 cp /etc/hosts $MOUNT2/. || rc=3
1170 cp /etc/hosts $MOUNT2/ || rc=3
1171 $LFS getstripe $MOUNT2/hosts
1176 rm -rf $MOUNT2 $fs2mdsdev $fs2ostdev
1177 cleanup_nocli || rc=6
1180 run_test 33a "Mount ost with a large index number"
1182 test_33b() { # was test_33a
1185 do_facet client dd if=/dev/zero of=$MOUNT/24 bs=1024k count=1
1186 # Drop lock cancelation reply during umount
1187 #define OBD_FAIL_LDLM_CANCEL 0x304
1188 do_facet client lctl set_param fail_loc=0x80000304
1189 #lctl set_param debug=-1
1190 umount_client $MOUNT
1193 run_test 33b "Drop cancel during umount"
1197 do_facet client "sh runmultiop_bg_pause $DIR/file O_c"
1198 manual_umount_client
1200 do_facet client killall -USR1 multiop
1201 if [ $rc -eq 0 ]; then
1202 error "umount not fail!"
1207 run_test 34a "umount with opened file should be fail"
1212 touch $DIR/$tfile || return 1
1213 stop_mds --force || return 2
1215 manual_umount_client --force
1217 if [ $rc -ne 0 ]; then
1218 error "mtab after failed umount - rc $rc"
1224 run_test 34b "force umount with failed mds should be normal"
1228 touch $DIR/$tfile || return 1
1229 stop_ost --force || return 2
1231 manual_umount_client --force
1233 if [ $rc -ne 0 ]; then
1234 error "mtab after failed umount - rc $rc"
1240 run_test 34c "force umount with failed ost should be normal"
1242 test_35a() { # bug 12459
1246 lctl set_param debug="ha"
1248 log "Set up a fake failnode for the MDS"
1250 do_facet mds $LCTL conf_param ${FSNAME}-MDT0000.failover.node=$FAKENID || return 4
1252 log "Wait for RECONNECT_INTERVAL seconds (10s)"
1255 MSG="conf-sanity.sh test_35a `date +%F%kh%Mm%Ss`"
1258 log "Stopping the MDT:"
1259 stop_mds || return 5
1261 df $MOUNT > /dev/null 2>&1 &
1263 log "Restarting the MDT:"
1264 start_mds || return 6
1265 log "Wait for df ($DFPID) ... "
1270 # retrieve from the log the first server that the client tried to
1271 # contact after the connection loss
1272 $LCTL dk $TMP/lustre-log-$TESTNAME.log
1273 NEXTCONN=`awk "/${MSG}/ {start = 1;}
1274 /import_select_connection.*${FSNAME}-MDT0000-mdc.* using connection/ {
1276 if (\\\$NF ~ /$FAKENID/)
1282 }" $TMP/lustre-log-$TESTNAME.log`
1283 [ "$NEXTCONN" != "0" ] && log "The client didn't try to reconnect to the last active server (tried ${NEXTCONN} instead)" && return 7
1286 run_test 35a "Reconnect to the last active server first"
1288 test_35b() { # bug 18674
1289 remote_mds || { skip "local MDS" && return 0; }
1293 $LCTL set_param debug="ha"
1295 MSG="conf-sanity.sh test_35b `date +%F%kh%Mm%Ss`"
1298 log "Set up a fake failnode for the MDS"
1300 do_facet mds $LCTL conf_param ${FSNAME}-MDT0000.failover.node=$FAKENID || \
1303 local at_max_saved=0
1304 # adaptive timeouts may prevent seeing the issue
1305 if at_is_enabled; then
1306 at_max_saved=$(at_max_get mds)
1307 at_max_set 0 mds client
1310 mkdir -p $MOUNT/$tdir
1312 log "Injecting EBUSY on MDS"
1313 # Setting OBD_FAIL_MDS_RESEND=0x136
1314 do_facet mds "$LCTL set_param fail_loc=0x80000136" || return 2
1316 log "Creating a test file"
1317 touch $MOUNT/$tdir/$tfile
1319 log "Stop injecting EBUSY on MDS"
1320 do_facet mds "$LCTL set_param fail_loc=0" || return 3
1321 rm -f $MOUNT/$tdir/$tfile
1324 # restore adaptive timeout
1325 [ $at_max_saved -ne 0 ] && at_max_set $at_max_saved mds client
1327 $LCTL dk $TMP/lustre-log-$TESTNAME.log
1329 # retrieve from the log if the client has ever tried to
1330 # contact the fake server after the loss of connection
1331 FAILCONN=`awk "BEGIN {ret = 0;}
1332 /import_select_connection.*${FSNAME}-MDT0000-mdc.* using connection/ {
1334 if (\\\$NF ~ /$FAKENID/) {
1339 END {print ret}" $TMP/lustre-log-$TESTNAME.log`
1341 [ "$FAILCONN" == "0" ] && \
1342 log "ERROR: The client reconnection has not been triggered" && \
1344 [ "$FAILCONN" == "2" ] && \
1345 log "ERROR: The client tried to reconnect to the failover server while the primary was busy" && \
1350 run_test 35b "Continue reconnection retries, if the active server is busy"
1353 [ $OSTCOUNT -lt 2 ] && skip_env "skipping test for single OST" && return
1355 [ "$ost_HOST" = "`hostname`" -o "$ost1_HOST" = "`hostname`" ] || \
1356 { skip "remote OST" && return 0; }
1359 local FSNAME2=test1234
1360 local fs3ost_HOST=$ost_HOST
1362 [ -n "$ost1_HOST" ] && fs2ost_HOST=$ost1_HOST && fs3ost_HOST=$ost1_HOST
1364 if [ -z "$fs2ost_DEV" -o -z "$fs2mds_DEV" -o -z "$fs3ost_DEV" ]; then
1365 is_blkdev mds $MDSDEV && \
1366 skip_env "mixed loopback and real device not working" && return
1369 local fs2mdsdev=${fs2mds_DEV:-${MDSDEV}_2}
1370 local fs2ostdev=${fs2ost_DEV:-$(ostdevname 1)_2}
1371 local fs3ostdev=${fs3ost_DEV:-$(ostdevname 2)_2}
1372 add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME2} --reformat $fs2mdsdev || exit 10
1373 # XXX after we support non 4K disk blocksize, change following --mkfsoptions with
1375 add fs2ost $OST_MKFS_OPTS --mkfsoptions='-b4096' --fsname=${FSNAME2} --mgsnode=$MGSNID --reformat $fs2ostdev || exit 10
1376 add fs3ost $OST_MKFS_OPTS --mkfsoptions='-b4096' --fsname=${FSNAME2} --mgsnode=$MGSNID --reformat $fs3ostdev || exit 10
1378 start fs2mds $fs2mdsdev $MDS_MOUNT_OPTS
1379 start fs2ost $fs2ostdev $OST_MOUNT_OPTS
1380 start fs3ost $fs3ostdev $OST_MOUNT_OPTS
1382 mount -t lustre $MGSNID:/${FSNAME2} $MOUNT2 || return 1
1384 sleep 5 # until 11778 fixed
1386 dd if=/dev/zero of=$MOUNT2/$tfile bs=1M count=7 || return 2
1388 BKTOTAL=`lctl get_param -n obdfilter.*.kbytestotal | awk 'BEGIN{total=0}; {total+=$1}; END{print total}'`
1389 BKFREE=`lctl get_param -n obdfilter.*.kbytesfree | awk 'BEGIN{free=0}; {free+=$1}; END{print free}'`
1390 BKAVAIL=`lctl get_param -n obdfilter.*.kbytesavail | awk 'BEGIN{avail=0}; {avail+=$1}; END{print avail}'`
1391 STRING=`df -P $MOUNT2 | tail -n 1 | awk '{print $2","$3","$4}'`
1392 DFTOTAL=`echo $STRING | cut -d, -f1`
1393 DFUSED=`echo $STRING | cut -d, -f2`
1394 DFAVAIL=`echo $STRING | cut -d, -f3`
1395 DFFREE=$(($DFTOTAL - $DFUSED))
1397 ALLOWANCE=$((64 * $OSTCOUNT))
1399 if [ $DFTOTAL -lt $(($BKTOTAL - $ALLOWANCE)) ] ||
1400 [ $DFTOTAL -gt $(($BKTOTAL + $ALLOWANCE)) ] ; then
1401 echo "**** FAIL: df total($DFTOTAL) mismatch OST total($BKTOTAL)"
1404 if [ $DFFREE -lt $(($BKFREE - $ALLOWANCE)) ] ||
1405 [ $DFFREE -gt $(($BKFREE + $ALLOWANCE)) ] ; then
1406 echo "**** FAIL: df free($DFFREE) mismatch OST free($BKFREE)"
1409 if [ $DFAVAIL -lt $(($BKAVAIL - $ALLOWANCE)) ] ||
1410 [ $DFAVAIL -gt $(($BKAVAIL + $ALLOWANCE)) ] ; then
1411 echo "**** FAIL: df avail($DFAVAIL) mismatch OST avail($BKAVAIL)"
1416 stop fs3ost -f || return 200
1417 stop fs2ost -f || return 201
1418 stop fs2mds -f || return 202
1419 rm -rf $MOUNT2 $fs2mdsdev $fs2ostdev $fs3ostdev
1420 unload_modules || return 203
1423 run_test 36 "df report consistency on OSTs with different block size"
1426 client_only && skip "client only testing" && return 0
1427 LOCAL_MDSDEV="$TMP/mdt.img"
1428 SYM_MDSDEV="$TMP/sym_mdt.img"
1430 echo "MDS : $LOCAL_MDSDEV"
1431 echo "SYMLINK : $SYM_MDSDEV"
1435 mkfs.lustre --reformat --fsname=lustre --mdt --mgs --device-size=9000 $LOCAL_MDSDEV ||
1436 error "mkfs.lustre $LOCAL_MDSDEV failed"
1437 ln -s $LOCAL_MDSDEV $SYM_MDSDEV
1439 echo "mount symlink device - $SYM_MDSDEV"
1441 mount_op=`mount -v -t lustre -o loop $SYM_MDSDEV ${MOUNT%/*}/mds 2>&1 | grep "unable to set tunable"`
1442 umount -d ${MOUNT%/*}/mds
1443 rm -f $LOCAL_MDSDEV $SYM_MDSDEV
1445 if [ -n "$mount_op" ]; then
1446 error "**** FAIL: set tunables failed for symlink device"
1450 run_test 37 "verify set tunables works for symlink device"
1452 test_38() { # bug 14222
1457 FILES=`find $SRC -type f -mtime +1 | head -n $COUNT`
1458 log "copying $(echo $FILES | wc -w) files to $DIR/$tdir"
1460 tar cf - $FILES | tar xf - -C $DIR/$tdir || \
1461 error "copying $SRC to $DIR/$tdir"
1463 umount_client $MOUNT
1465 log "rename lov_objid file on MDS"
1466 rm -f $TMP/lov_objid.orig
1467 do_facet mds "$DEBUGFS -c -R \\\"dump lov_objid $TMP/lov_objid.orig\\\" $MDSDEV"
1468 do_facet mds "$DEBUGFS -w -R \\\"rm lov_objid\\\" $MDSDEV"
1470 do_facet mds "od -Ax -td8 $TMP/lov_objid.orig"
1471 # check create in mds_lov_connect
1475 [ $V ] && log "verifying $DIR/$tdir/$f"
1476 diff -q $f $DIR/$tdir/$f || ERROR=y
1478 do_facet mds "$DEBUGFS -c -R \\\"dump lov_objid $TMP/lov_objid.new\\\" $MDSDEV"
1479 do_facet mds "od -Ax -td8 $TMP/lov_objid.new"
1480 [ "$ERROR" = "y" ] && error "old and new files are different after connect" || true
1483 # check it's updates in sync
1484 umount_client $MOUNT
1487 do_facet mds dd if=/dev/zero of=$TMP/lov_objid.clear bs=4096 count=1
1488 do_facet mds "$DEBUGFS -w -R \\\"rm lov_objid\\\" $MDSDEV"
1489 do_facet mds "$DEBUGFS -w -R \\\"write $TMP/lov_objid.clear lov_objid\\\" $MDSDEV "
1494 [ $V ] && log "verifying $DIR/$tdir/$f"
1495 diff -q $f $DIR/$tdir/$f || ERROR=y
1497 do_facet mds "$DEBUGFS -c -R \\\"dump lov_objid $TMP/lov_objid.new1\\\" $MDSDEV"
1498 do_facet mds "od -Ax -td8 $TMP/lov_objid.new1"
1499 umount_client $MOUNT
1501 [ "$ERROR" = "y" ] && error "old and new files are different after sync" || true
1503 log "files compared the same"
1506 run_test 38 "MDS recreates missing lov_objid file from OST data"
1508 test_39() { #bug 14413
1512 perl $SRCDIR/leak_finder.pl $TMP/debug 2>&1 | egrep '*** Leak:' &&
1513 error "memory leak detected" || true
1515 run_test 39 "leak_finder recognizes both LUSTRE and LNET malloc messages"
1517 test_40() { # bug 15759
1519 #define OBD_FAIL_TGT_TOOMANY_THREADS 0x706
1520 do_facet mds "lctl set_param fail_loc=0x80000706"
1524 run_test 40 "race during service thread startup"
1526 test_41() { #bug 14134
1528 start mds $MDSDEV $MDS_MOUNT_OPTS -o nosvc -n
1529 start ost1 `ostdevname 1` $OST_MOUNT_OPTS
1530 start mds $MDSDEV $MDS_MOUNT_OPTS -o nomgs,force
1532 mount_client $MOUNT || return 1
1535 echo "blah blah" > $MOUNT/$tfile
1538 umount_client $MOUNT
1539 stop ost1 -f || return 201
1540 stop mds -f || return 202
1541 stop mds -f || return 203
1542 unload_modules || return 204
1545 run_test 41 "mount mds with --nosvc and --nomgs"
1547 test_42() { #bug 14693
1549 check_mount || return 2
1550 do_facet client lctl conf_param lustre.llite.some_wrong_param=10
1551 umount_client $MOUNT
1552 mount_client $MOUNT || return 1
1556 run_test 42 "invalid config param should not prevent client from mounting"
1558 test_43() { #bug 15993
1560 VERSION_1_8=$(do_facet mds $LCTL get_param version | grep ^lustre.*1\.[78])
1561 if [ -z "$VERSION_1_8" ]; then
1562 skip "skipping test for non 1.8 MDS"
1567 check_mount || return 2
1568 testfile=$DIR/$tfile
1569 lma="this-should-be-removed-after-remount-and-accessed"
1571 echo "set/get trusted.lma"
1572 #define OBD_FAIL_MDS_ALLOW_COMMON_EA_SETTING 0x13f
1573 do_facet mds "lctl set_param fail_loc=0x13f"
1574 lctl set_param fail_loc=0x13f
1575 setfattr -n trusted.lma -v $lma $testfile || error "create common EA"
1576 do_facet mds "lctl set_param fail_loc=0"
1577 lctl set_param fail_loc=0
1578 ATTR=$(getfattr -n trusted.lma $testfile 2> /dev/null | grep trusted.lma)
1579 [ "$ATTR" = "trusted.lma=\"$lma\"" ] || error "check common EA"
1580 umount_client $MOUNT
1585 check_mount || return 3
1586 #define OBD_FAIL_MDS_REMOVE_COMMON_EA 0x13e
1587 do_facet mds "lctl set_param fail_loc=0x13e"
1589 do_facet mds "lctl set_param fail_loc=0"
1590 getfattr -d -m trusted $testfile 2> /dev/null | \
1591 grep "trusted.lma" && error "common EA not removed" || true
1595 run_test 43 "remove common EA if it exists"
1599 check_mount || return 2
1600 UUID=$($LCTL get_param llite.${FSNAME}*.uuid | cut -d= -f2)
1602 UUIDS=$(do_facet mds "$LCTL get_param $(get_mds_mdt_device_proc_path).${FSNAME}*.exports.*.uuid")
1603 for VAL in $UUIDS; do
1604 NID=$(echo $VAL | cut -d= -f1)
1605 CLUUID=$(echo $VAL | cut -d= -f2)
1606 [ "$UUID" = "$CLUUID" ] && STATS_FOUND=yes && break
1608 [ "$STATS_FOUND" = "no" ] && error "stats not found for client"
1612 run_test 44 "mounted client proc entry exists"
1616 check_mount || return 2
1621 #define OBD_FAIL_PTLRPC_LONG_UNLINK 0x50f
1622 do_facet client "lctl set_param fail_loc=0x50f"
1625 manual_umount_client --force || return 3
1626 do_facet client "lctl set_param fail_loc=0x0"
1628 mount_client $MOUNT || return 4
1632 run_test 45 "long unlink handling in ptlrpcd"
1639 umount_client $MOUNT2 || rc=$?
1640 umount_client $MOUNT || rc=$?
1641 while [ $count -gt 0 ]; do
1642 stop ost${count} -f || rc=$?
1646 # writeconf is needed after the test, otherwise,
1647 # we might end up with extra OSTs
1649 cleanup_nocli || rc=$?
1654 echo "Testing with $OSTCOUNT OSTs"
1656 start_mds || return 1
1657 #first client should see only one ost
1658 start_ost || return 2
1659 wait_osc_import_state mds ost FULL
1661 mount_client $MOUNT || return 3
1662 trap "cleanup_46a $OSTCOUNT" EXIT ERR
1665 for (( i=2; i<=$OSTCOUNT; i++ )); do
1666 start ost$i `ostdevname $i` $OST_MOUNT_OPTS || return $((i+2))
1669 # wait until osts in sync
1670 for (( i=2; i<=$OSTCOUNT; i++ )); do
1671 wait_osc_import_state mds ost$i FULL
1674 #second client see all ost's
1676 mount_client $MOUNT2 || return 8
1677 $LFS setstripe $MOUNT2 -c -1 || return 9
1678 $LFS getstripe $MOUNT2 || return 10
1680 echo "ok" > $MOUNT2/widestripe
1681 $LFS getstripe $MOUNT2/widestripe || return 11
1682 # fill acl buffer for avoid expand lsm to them
1683 awk -F : '{if (FNR < 25) { print "u:"$1":rwx" }}' /etc/passwd | while read acl; do
1684 setfacl -m $acl $MOUNT2/widestripe
1688 stat $MOUNT/widestripe || return 12
1690 cleanup_46a $OSTCOUNT || { echo "cleanup_46a failed!" && return 13; }
1693 run_test 46a "handle ost additional - wide striped file"
1698 check_mount || return 2
1699 $LCTL set_param ldlm.namespaces.$FSNAME-*-*-*.lru_size=100
1703 for ns in $($LCTL get_param ldlm.namespaces.$FSNAME-*-*-*.lru_size); do
1704 lrs=$(echo $ns | sed 's/.*lru_size=//')
1705 lru_size[count]=$lrs
1711 client_up || return 3
1714 for ns in $($LCTL get_param ldlm.namespaces.$FSNAME-*-*-*.lru_size); do
1715 lrs=$(echo $ns | sed 's/.*lru_size=//')
1716 if ! test "$lrs" -eq "${lru_size[count]}"; then
1717 n=$(echo $ns | sed -e 's/ldlm.namespaces.//' -e 's/.lru_size=.*//')
1718 error "$n has lost lru_size: $lrs vs. ${lru_size[count]}"
1726 run_test 47 "server restart does not make client loss lru_resize settings"
1731 # reformat after this test is needed - if test will failed
1732 # we will have unkillable file at FS
1736 test_48() { # bug 17636
1739 check_mount || return 2
1741 $LFS setstripe $MOUNT -c -1 || return 9
1742 $LFS getstripe $MOUNT || return 10
1744 echo "ok" > $MOUNT/widestripe
1745 $LFS getstripe $MOUNT/widestripe || return 11
1747 trap cleanup_48 EXIT ERR
1749 # fill acl buffer for avoid expand lsm to them
1750 getent passwd | awk -F : '{ print "u:"$1":rwx" }' | while read acl; do
1751 setfacl -m $acl $MOUNT/widestripe
1754 stat $MOUNT/widestripe || return 12
1759 run_test 48 "too many acls on file"
1761 # check PARAM_SYS_LDLM_TIMEOUT option of MKFS.LUSTRE
1762 test_49() { # bug 17710
1763 local OLD_MDS_MKFS_OPTS=$MDS_MKFS_OPTS
1764 local OLD_OST_MKFS_OPTS=$OST_MKFS_OPTS
1765 local LOCAL_TIMEOUT=20
1767 OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$LOCAL_TIMEOUT --param sys.ldlm_timeout=$LOCAL_TIMEOUT $MKFSOPT $OSTOPT"
1771 check_mount || return 1
1773 echo "check ldlm_timout..."
1774 LDLM_MDS="`do_facet mds lctl get_param -n ldlm_timeout`"
1775 LDLM_OST1="`do_facet ost1 lctl get_param -n ldlm_timeout`"
1776 LDLM_CLIENT="`do_facet client lctl get_param -n ldlm_timeout`"
1778 if [ $LDLM_MDS -ne $LDLM_OST1 ] || [ $LDLM_MDS -ne $LDLM_CLIENT ]; then
1779 error "Different LDLM_TIMEOUT:$LDLM_MDS $LDLM_OST1 $LDLM_CLIENT"
1782 if [ $LDLM_MDS -ne $((LOCAL_TIMEOUT / 3)) ]; then
1783 error "LDLM_TIMEOUT($LDLM_MDS) is not correct"
1786 umount_client $MOUNT
1787 stop_ost || return 2
1788 stop_mds || return 3
1790 OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$LOCAL_TIMEOUT --param sys.ldlm_timeout=$((LOCAL_TIMEOUT - 1)) $MKFSOPT $OSTOPT"
1794 check_mount || return 7
1796 LDLM_MDS="`do_facet mds lctl get_param -n ldlm_timeout`"
1797 LDLM_OST1="`do_facet ost1 lctl get_param -n ldlm_timeout`"
1798 LDLM_CLIENT="`do_facet client lctl get_param -n ldlm_timeout`"
1800 if [ $LDLM_MDS -ne $LDLM_OST1 ] || [ $LDLM_MDS -ne $LDLM_CLIENT ]; then
1801 error "Different LDLM_TIMEOUT:$LDLM_MDS $LDLM_OST1 $LDLM_CLIENT"
1804 if [ $LDLM_MDS -ne $((LOCAL_TIMEOUT - 1)) ]; then
1805 error "LDLM_TIMEOUT($LDLM_MDS) is not correct"
1808 cleanup || return $?
1810 MDS_MKFS_OPTS=$OLD_MDS_MKFS_OPTS
1811 OST_MKFS_OPTS=$OLD_OST_MKFS_OPTS
1813 run_test 49 "check PARAM_SYS_LDLM_TIMEOUT option of MKFS.LUSTRE"
1816 # Test both statfs and lfs df and fail if either one fails
1817 multiop_bg_pause $1 f_
1820 killall -USR1 multiop
1821 [ $RC1 -ne 0 ] && log "lazystatfs multiop failed"
1822 wait $PID || { RC1=$?; log "multiop return error "; }
1829 if [ $RC2 -eq 0 ]; then
1831 log "lazystatfs df failed"
1835 [[ $RC1 -ne 0 || $RC2 -eq 0 ]] && RC=1
1841 lctl set_param llite.$FSNAME-*.lazystatfs=1
1844 lazystatfs $MOUNT || error "lazystatfs failed but no down servers"
1846 cleanup || return $?
1848 run_test 50a "lazystatfs all servers available =========================="
1852 lctl set_param llite.$FSNAME-*.lazystatfs=1
1855 # Wait for client to detect down OST
1856 stop_ost || error "Unable to stop OST1"
1857 wait_osc_import_state mds ost DISCONN
1859 lazystatfs $MOUNT || error "lazystatfs should don't have returned EIO"
1861 umount_client $MOUNT || error "Unable to unmount client"
1862 stop_mds || error "Unable to stop MDS"
1864 run_test 50b "lazystatfs all servers down =========================="
1867 start_mds || error "Unable to start MDS"
1868 start_ost || error "Unable to start OST1"
1869 start_ost2 || error "Unable to start OST2"
1870 mount_client $MOUNT || error "Unable to mount client"
1871 lctl set_param llite.$FSNAME-*.lazystatfs=1
1874 # Wait for client to detect down OST
1875 stop_ost || error "Unable to stop OST1"
1876 wait_osc_import_state mds ost DISCONN
1877 lazystatfs $MOUNT || error "lazystatfs failed with one down server"
1879 umount_client $MOUNT || error "Unable to unmount client"
1880 stop_ost2 || error "Unable to stop OST2"
1881 stop_mds || error "Unable to stop MDS"
1883 run_test 50c "lazystatfs one server down =========================="
1886 start_mds || error "Unable to start MDS"
1887 start_ost || error "Unable to start OST1"
1888 start_ost2 || error "Unable to start OST2"
1889 mount_client $MOUNT || error "Unable to mount client"
1890 lctl set_param llite.$FSNAME-*.lazystatfs=1
1893 # Issue the statfs during the window where the client still
1894 # belives the OST to be available but it is in fact down.
1895 # No failure just a statfs which hangs for a timeout interval.
1896 stop_ost || error "Unable to stop OST1"
1897 lazystatfs $MOUNT || error "lazystatfs failed with one down server"
1899 umount_client $MOUNT || error "Unable to unmount client"
1900 stop_ost2 || error "Unable to stop OST2"
1901 stop_mds || error "Unable to stop MDS"
1903 run_test 50d "lazystatfs client/server conn race =========================="
1910 start_mds || return 1
1911 #first client should see only one ost
1912 start_ost || return 2
1913 wait_osc_import_state mds ost FULL
1915 # Wait for client to detect down OST
1916 stop_ost || error "Unable to stop OST1"
1917 wait_osc_import_state mds ost DISCONN
1919 mount_client $MOUNT || error "Unable to mount client"
1920 lctl set_param llite.$FSNAME-*.lazystatfs=0
1922 multiop_bg_pause $MOUNT _f
1926 if [ $RC1 -ne 0 ]; then
1927 log "lazystatfs multiop failed $RC1"
1930 sleep $(( $TIMEOUT+1 ))
1932 [ $? -ne 0 ] && error "process isn't sleep"
1933 start_ost || error "Unable to start OST1"
1934 wait $pid || error "statfs failed"
1937 umount_client $MOUNT || error "Unable to unmount client"
1938 stop_ost || error "Unable to stop OST1"
1939 stop_mds || error "Unable to stop MDS"
1941 run_test 50e "normal statfs all servers down =========================="
1946 CONN_PROC="osc.$FSNAME-OST0001-osc.ost_server_uuid"
1948 start_mds || error "Unable to start mds"
1949 #first client should see only one ost
1950 start_ost || error "Unable to start OST1"
1951 wait_osc_import_state mds ost FULL
1953 start_ost2 || error "Unable to start OST2"
1954 wait_osc_import_state mds ost2 FULL
1956 # Wait for client to detect down OST
1957 stop_ost2 || error "Unable to stop OST2"
1958 wait_osc_import_state mds ost2 DISCONN
1960 mount_client $MOUNT || error "Unable to mount client"
1961 lctl set_param llite.$FSNAME-*.lazystatfs=0
1963 multiop_bg_pause $MOUNT _f
1967 if [ $RC1 -ne 0 ]; then
1968 log "lazystatfs multiop failed $RC1"
1971 sleep $(( $TIMEOUT+1 ))
1973 [ $? -ne 0 ] && error "process isn't sleep"
1974 start_ost2 || error "Unable to start OST2"
1975 wait $pid || error "statfs failed"
1976 stop_ost2 || error "Unable to stop OST2"
1979 umount_client $MOUNT || error "Unable to unmount client"
1980 stop_ost || error "Unable to stop OST1"
1981 stop_mds || error "Unable to stop MDS"
1984 run_test 50f "normal statfs one server in down =========================="
1987 [ "$OSTCOUNT" -lt "2" ] && skip_env "$OSTCOUNT < 2, skipping" && return
1989 start_ost2 || error "Unable to start OST2"
1991 local PARAM="${FSNAME}-OST0001.osc.active"
1993 $LFS setstripe -c -1 $DIR/$tfile || error "Unable to lfs setstripe"
1994 do_facet mgs $LCTL conf_param $PARAM=0 || error "Unable to deactivate OST"
1996 umount_client $MOUNT || error "Unable to unmount client"
1997 mount_client $MOUNT || error "Unable to mount client"
1998 # This df should not cause a panic
2001 do_facet mgs $LCTL conf_param $PARAM=1 || error "Unable to activate OST"
2003 umount_client $MOUNT || error "Unable to unmount client"
2004 stop_ost2 || error "Unable to stop OST2"
2005 stop_ost || error "Unable to stop OST1"
2006 stop_mds || error "Unable to stop MDS"
2009 run_test 50g "deactivated OST should not cause panic====================="
2011 if ! combined_mgs_mds ; then
2015 equals_msg `basename $0`: test complete
2016 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true