7 # This test needs to be run on the client
10 LUSTRE=${LUSTRE:-`dirname $0`/..}
11 . $LUSTRE/tests/test-framework.sh
15 . ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
19 ALWAYS_EXCEPT="0b $REPLAY_SINGLE_EXCEPT"
24 # make sure we are using the primary MDS, so the config log will
25 # be able to clean up properly.
26 activemds=`facet_active mds`
27 if [ $activemds != "mds" ]; then
28 fail mds $MDS_MOUNT_OPTS
30 zconf_umount `hostname` $MOUNT
34 #no dump option in mountconf...
35 #stop ost ${FORCE} --dump $TMP/replay-single-`hostname`.log
38 if [ "$ONLY" == "cleanup" ]; then
39 sysctl -w lnet.debug=0 || true
40 # failover is the default, '-f' is force
45 SETUP=${SETUP:-"setup"}
46 CLEANUP=${CLEANUP:-"cleanup"}
49 grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT
53 add mds $MDS_MKFS_OPTS --reformat $MDSDEV
54 add ost $OST_MKFS_OPTS --reformat $OSTDEV
55 add ost2 $OST2_MKFS_OPTS --reformat $OSTDEV2
56 start mds $MDSDEV $MDS_MOUNT_OPTS
57 start ost $OSTDEV $OST_MOUNT_OPTS
58 start ost2 $OSTDEV2 $OST2_MOUNT_OPTS
59 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
61 #add_lov lov1 mds --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
63 grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
69 if [ "$ONLY" == "setup" ]; then
77 fail mds $MDSDEV $MDS_MOUNT_OPTS
79 run_test 0 "empty replay"
82 # this test attempts to trigger a race in the precreation code,
83 # and must run before any other objects are created on the filesystem
84 fail ost $OSTDEV $OST_MOUNT_OPTS
85 createmany -o $DIR/$tfile 20 || return 1
86 unlinkmany $DIR/$tfile 20 || return 2
88 run_test 0b "ensure object created after recover exists. (3284)"
93 fail mds $MDSDEV $MDS_MOUNT_OPTS
94 $CHECKSTAT -t file $DIR/$tfile || return 1
97 run_test 1 "simple create"
100 do_facet ost "sysctl -w lustre.fail_loc=0"
103 local old_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
104 touch -o $DIR/$tfile 1
106 local new_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
108 test "$old_last_id" = "$new_last_id" || {
109 echo "OST object create is caused by MDS"
113 old_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
114 echo "data" > $DIR/$tfile
116 new_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
117 test "$old_last_id" = "$new_last_id "&& {
118 echo "CROW does not work on write"
124 #define OBD_FAIL_OST_CROW_EIO | OBD_FAIL_ONCE
125 do_facet ost "sysctl -w lustre.fail_loc=0x80000801"
128 old_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
129 echo "data" > $DIR/1a1
131 new_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
132 test "$old_last_id" = "$new_last_id" || {
133 echo "CROW does work with fail_loc=0x80000801"
139 do_facet ost "sysctl -w lustre.fail_loc=0"
141 #CROW run_test 1a "CROW object create (check OST last_id)"
146 fail mds $MDSDEV $MDS_MOUNT_OPTS
147 $CHECKSTAT -t file $DIR/$tfile || return 1
153 ./mcreate $DIR/$tfile
156 fail mds $MDSDEV $MDS_MOUNT_OPTS
157 $CHECKSTAT -t file $DIR/$tfile || return 1
165 o_directory $DIR/$tfile
166 fail mds $MDSDEV $MDS_MOUNT_OPTS
167 $CHECKSTAT -t file $DIR/$tfile || return 2
170 run_test 3a "replay failed open(O_DIRECTORY)"
174 #define OBD_FAIL_MDS_OPEN_PACK | OBD_FAIL_ONCE
175 do_facet mds "sysctl -w lustre.fail_loc=0x80000114"
177 do_facet mds "sysctl -w lustre.fail_loc=0"
178 fail mds $MDSDEV $MDS_MOUNT_OPTS
179 $CHECKSTAT -t file $DIR/$tfile && return 2
182 run_test 3b "replay failed open -ENOMEM"
186 #define OBD_FAIL_MDS_ALLOC_OBDO | OBD_FAIL_ONCE
187 do_facet mds "sysctl -w lustre.fail_loc=0x80000128"
189 do_facet mds "sysctl -w lustre.fail_loc=0"
190 fail mds $MDSDEV $MDS_MOUNT_OPTS
192 $CHECKSTAT -t file $DIR/$tfile && return 2
195 run_test 3c "replay failed open -ENOMEM"
199 for i in `seq 10`; do
200 echo "tag-$i" > $DIR/$tfile-$i
202 fail mds $MDSDEV $MDS_MOUNT_OPTS
203 for i in `seq 10`; do
204 grep -q "tag-$i" $DIR/$tfile-$i || error "$tfile-$i"
207 run_test 4 "|x| 10 open(O_CREAT)s"
212 fail mds $MDSDEV $MDS_MOUNT_OPTS
213 $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
215 run_test 4b "|x| rm 10 files"
217 # The idea is to get past the first block of precreated files on both
218 # osts, and then replay.
221 for i in `seq 220`; do
222 echo "tag-$i" > $DIR/$tfile-$i
224 fail mds $MDSDEV $MDS_MOUNT_OPTS
225 for i in `seq 220`; do
226 grep -q "tag-$i" $DIR/$tfile-$i || error "f1c-$i"
230 # waiting for commitment of removal
232 run_test 5 "|x| 220 open(O_CREAT)"
238 mcreate $DIR/$tdir/$tfile
239 fail mds $MDSDEV $MDS_MOUNT_OPTS
240 $CHECKSTAT -t dir $DIR/$tdir || return 1
241 $CHECKSTAT -t file $DIR/$tdir/$tfile || return 2
243 # waiting for log process thread
245 run_test 6 "mkdir + contained create"
250 fail mds $MDSDEV $MDS_MOUNT_OPTS
251 $CHECKSTAT -t dir $DIR/$tdir && return 1 || true
253 run_test 6b "|X| rmdir"
258 mcreate $DIR/$tdir/$tfile
259 fail mds $MDSDEV $MDS_MOUNT_OPTS
260 $CHECKSTAT -t dir $DIR/$tdir || return 1
261 $CHECKSTAT -t file $DIR/$tdir/$tfile || return 2
264 run_test 7 "mkdir |X| contained create"
268 multiop $DIR/$tfile mo_c &
271 fail mds $MDSDEV $MDS_MOUNT_OPTS
273 $CHECKSTAT -t file $DIR/$tfile || return 1
274 kill -USR1 $MULTIPID || return 2
275 wait $MULTIPID || return 3
278 run_test 8 "creat open |X| close"
283 local old_inum=`ls -i $DIR/$tfile | awk '{print $1}'`
284 fail mds $MDSDEV $MDS_MOUNT_OPTS
285 local new_inum=`ls -i $DIR/$tfile | awk '{print $1}'`
287 echo " old_inum == $old_inum, new_inum == $new_inum"
288 if [ $old_inum -eq $new_inum ] ;
290 echo " old_inum and new_inum match"
292 echo "!!!! old_inum and new_inum NOT match"
297 run_test 9 "|X| create (same inum/gen)"
302 mv $DIR/$tfile $DIR/$tfile-2
304 fail mds $MDSDEV $MDS_MOUNT_OPTS
305 $CHECKSTAT $DIR/$tfile && return 1
306 $CHECKSTAT $DIR/$tfile-2 ||return 2
310 run_test 10 "create |X| rename unlink"
314 echo "old" > $DIR/$tfile
315 mv $DIR/$tfile $DIR/$tfile-2
317 echo "new" > $DIR/$tfile
319 grep old $DIR/$tfile-2
320 fail mds $MDSDEV $MDS_MOUNT_OPTS
321 grep new $DIR/$tfile || return 1
322 grep old $DIR/$tfile-2 || return 2
324 run_test 11 "create open write rename |X| create-old-name read"
328 multiop $DIR/$tfile o_tSc &
330 # give multiop a chance to open
335 wait $pid || return 1
337 fail mds $MDSDEV $MDS_MOUNT_OPTS
338 [ -e $DIR/$tfile ] && return 2
341 run_test 12 "open, unlink |X| close"
344 # 1777 - replay open after committed chmod that would make
345 # a regular open a failure
348 multiop $DIR/$tfile O_wc &
350 # give multiop a chance to open
353 $CHECKSTAT -p 0 $DIR/$tfile
355 fail mds $MDSDEV $MDS_MOUNT_OPTS
357 wait $pid || return 1
359 $CHECKSTAT -s 1 -p 0 $DIR/$tfile || return 2
362 run_test 13 "open chmod 0 |x| write close"
365 multiop $DIR/$tfile O_tSc &
367 # give multiop a chance to open
371 kill -USR1 $pid || return 1
372 wait $pid || return 2
374 fail mds $MDSDEV $MDS_MOUNT_OPTS
375 [ -e $DIR/$tfile ] && return 3
378 run_test 14 "open(O_CREAT), unlink |X| close"
381 multiop $DIR/$tfile O_tSc &
383 # give multiop a chance to open
387 touch $DIR/g11 || return 1
389 wait $pid || return 2
391 fail mds $MDSDEV $MDS_MOUNT_OPTS
392 [ -e $DIR/$tfile ] && return 3
393 touch $DIR/h11 || return 4
396 run_test 15 "open(O_CREAT), unlink |X| touch new, close"
403 mcreate $DIR/$tfile-2
404 fail mds $MDSDEV $MDS_MOUNT_OPTS
405 [ -e $DIR/$tfile ] && return 1
406 [ -e $DIR/$tfile-2 ] || return 2
407 munlink $DIR/$tfile-2 || return 3
409 run_test 16 "|X| open(O_CREAT), unlink, touch new, unlink new"
413 multiop $DIR/$tfile O_c &
415 # give multiop a chance to open
417 fail mds $MDSDEV $MDS_MOUNT_OPTS
418 kill -USR1 $pid || return 1
419 wait $pid || return 2
420 $CHECKSTAT -t file $DIR/$tfile || return 3
423 run_test 17 "|X| open(O_CREAT), |replay| close"
427 multiop $DIR/$tfile O_tSc &
429 # give multiop a chance to open
432 touch $DIR/$tfile-2 || return 1
433 echo "pid: $pid will close"
435 wait $pid || return 2
437 fail mds $MDSDEV $MDS_MOUNT_OPTS
438 [ -e $DIR/$tfile ] && return 3
439 [ -e $DIR/$tfile-2 ] || return 4
440 # this touch frequently fails
441 touch $DIR/$tfile-3 || return 5
442 munlink $DIR/$tfile-2 || return 6
443 munlink $DIR/$tfile-3 || return 7
446 run_test 18 "|X| open(O_CREAT), unlink, touch new, close, touch, unlink"
448 # bug 1855 (a simpler form of test_11 above)
452 echo "old" > $DIR/$tfile
453 mv $DIR/$tfile $DIR/$tfile-2
454 grep old $DIR/$tfile-2
455 fail mds $MDSDEV $MDS_MOUNT_OPTS
456 grep old $DIR/$tfile-2 || return 2
458 run_test 19 "|X| mcreate, open, write, rename "
462 multiop $DIR/$tfile O_tSc &
464 # give multiop a chance to open
468 fail mds $MDSDEV $MDS_MOUNT_OPTS
470 wait $pid || return 1
471 [ -e $DIR/$tfile ] && return 2
474 run_test 20 "|X| open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)"
478 multiop $DIR/$tfile O_tSc &
480 # give multiop a chance to open
483 touch $DIR/g11 || return 1
485 fail mds $MDSDEV $MDS_MOUNT_OPTS
487 wait $pid || return 2
488 [ -e $DIR/$tfile ] && return 3
489 touch $DIR/h11 || return 4
492 run_test 21 "|X| open(O_CREAT), unlink touch new, replay, close (test mds_cleanup_orphans)"
495 multiop $DIR/$tfile O_tSc &
497 # give multiop a chance to open
503 fail mds $MDSDEV $MDS_MOUNT_OPTS
505 wait $pid || return 1
506 [ -e $DIR/$tfile ] && return 2
509 run_test 22 "open(O_CREAT), |X| unlink, replay, close (test mds_cleanup_orphans)"
512 multiop $DIR/$tfile O_tSc &
514 # give multiop a chance to open
519 touch $DIR/g11 || return 1
521 fail mds $MDSDEV $MDS_MOUNT_OPTS
523 wait $pid || return 2
524 [ -e $DIR/$tfile ] && return 3
525 touch $DIR/h11 || return 4
528 run_test 23 "open(O_CREAT), |X| unlink touch new, replay, close (test mds_cleanup_orphans)"
531 multiop $DIR/$tfile O_tSc &
533 # give multiop a chance to open
537 fail mds $MDSDEV $MDS_MOUNT_OPTS
540 wait $pid || return 1
541 [ -e $DIR/$tfile ] && return 2
544 run_test 24 "open(O_CREAT), replay, unlink, close (test mds_cleanup_orphans)"
547 multiop $DIR/$tfile O_tSc &
549 # give multiop a chance to open
554 fail mds $MDSDEV $MDS_MOUNT_OPTS
556 wait $pid || return 1
557 [ -e $DIR/$tfile ] && return 2
560 run_test 25 "open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)"
564 multiop $DIR/$tfile-1 O_tSc &
566 multiop $DIR/$tfile-2 O_tSc &
568 # give multiop a chance to open
573 wait $pid2 || return 1
575 fail mds $MDSDEV $MDS_MOUNT_OPTS
577 wait $pid1 || return 2
578 [ -e $DIR/$tfile-1 ] && return 3
579 [ -e $DIR/$tfile-2 ] && return 4
582 run_test 26 "|X| open(O_CREAT), unlink two, close one, replay, close one (test mds_cleanup_orphans)"
586 multiop $DIR/$tfile-1 O_tSc &
588 multiop $DIR/$tfile-2 O_tSc &
590 # give multiop a chance to open
595 fail mds $MDSDEV $MDS_MOUNT_OPTS
597 wait $pid1 || return 1
599 wait $pid2 || return 2
600 [ -e $DIR/$tfile-1 ] && return 3
601 [ -e $DIR/$tfile-2 ] && return 4
604 run_test 27 "|X| open(O_CREAT), unlink two, replay, close two (test mds_cleanup_orphans)"
607 multiop $DIR/$tfile-1 O_tSc &
609 multiop $DIR/$tfile-2 O_tSc &
611 # give multiop a chance to open
617 wait $pid2 || return 1
619 fail mds $MDSDEV $MDS_MOUNT_OPTS
621 wait $pid1 || return 2
622 [ -e $DIR/$tfile-1 ] && return 3
623 [ -e $DIR/$tfile-2 ] && return 4
626 run_test 28 "open(O_CREAT), |X| unlink two, close one, replay, close one (test mds_cleanup_orphans)"
629 multiop $DIR/$tfile-1 O_tSc &
631 multiop $DIR/$tfile-2 O_tSc &
633 # give multiop a chance to open
639 fail mds $MDSDEV $MDS_MOUNT_OPTS
641 wait $pid1 || return 1
643 wait $pid2 || return 2
644 [ -e $DIR/$tfile-1 ] && return 3
645 [ -e $DIR/$tfile-2 ] && return 4
648 run_test 29 "open(O_CREAT), |X| unlink two, replay, close two (test mds_cleanup_orphans)"
651 multiop $DIR/$tfile-1 O_tSc &
653 multiop $DIR/$tfile-2 O_tSc &
655 # give multiop a chance to open
661 fail mds $MDSDEV $MDS_MOUNT_OPTS
663 wait $pid1 || return 1
665 wait $pid2 || return 2
666 [ -e $DIR/$tfile-1 ] && return 3
667 [ -e $DIR/$tfile-2 ] && return 4
670 run_test 30 "open(O_CREAT) two, unlink two, replay, close two (test mds_cleanup_orphans)"
673 multiop $DIR/$tfile-1 O_tSc &
675 multiop $DIR/$tfile-2 O_tSc &
677 # give multiop a chance to open
683 fail mds $MDSDEV $MDS_MOUNT_OPTS
685 wait $pid1 || return 1
687 wait $pid2 || return 2
688 [ -e $DIR/$tfile-1 ] && return 3
689 [ -e $DIR/$tfile-2 ] && return 4
692 run_test 31 "open(O_CREAT) two, unlink one, |X| unlink one, close two (test mds_cleanup_orphans)"
694 # tests for bug 2104; completion without crashing is success. The close is
695 # stale, but we always return 0 for close, so the app never sees it.
697 multiop $DIR/$tfile O_c &
699 multiop $DIR/$tfile O_c &
701 # give multiop a chance to open
704 df $MOUNT || sleep 1 && df $MOUNT || return 1
710 run_test 32 "close() notices client eviction; close() after client eviction"
712 # Abort recovery before client complete
717 # this file should be gone, because the replay was aborted
718 $CHECKSTAT -t file $DIR/$tfile && return 3
721 run_test 33 "abort recovery before client does replay"
724 multiop $DIR/$tfile O_c &
726 # give multiop a chance to open
733 [ -e $DIR/$tfile ] && return 1
737 run_test 34 "abort recovery before client does replay (test mds_cleanup_orphans)"
739 # bug 2278 - generate one orphan on OST, then destroy it during recovery from llog
743 #define OBD_FAIL_MDS_REINT_NET_REP 0x119
744 do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
749 # give a chance to remove from MDS
751 $CHECKSTAT -t file $DIR/$tfile && return 1 || true
753 run_test 35 "test recovery from llog for unlink op"
755 # b=2432 resent cancel after replay uses wrong cookie,
756 # so don't resend cancels
760 checkstat $DIR/$tfile
761 facet_failover mds $MDS_MOUNT_OPTS
763 if dmesg | grep "unknown lock cookie"; then
764 echo "cancel after replay failed"
768 run_test 36 "don't resend cancel"
771 # directory orphans can't be unlinked from PENDING directory
773 rmdir $DIR/$tfile 2>/dev/null
774 multiop $DIR/$tfile dD_c &
776 # give multiop a chance to open
781 # clear the dmesg buffer so we only see errors from this recovery
785 dmesg | grep "mds_unlink_orphan.*error .* unlinking orphan" && return 1
789 run_test 37 "abort recovery before client does replay (test mds_cleanup_orphans for directories)"
792 createmany -o $DIR/$tfile-%d 800
793 unlinkmany $DIR/$tfile-%d 0 400
795 fail mds $MDSDEV $MDS_MOUNT_OPTS
796 unlinkmany $DIR/$tfile-%d 400 400
798 $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
800 run_test 38 "test recovery from unlink llog (test llog_gen_rec) "
802 test_39() { # bug 4176
803 createmany -o $DIR/$tfile-%d 800
805 unlinkmany $DIR/$tfile-%d 0 400
806 fail mds $MDSDEV $MDS_MOUNT_OPTS
807 unlinkmany $DIR/$tfile-%d 400 400
809 $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
811 run_test 39 "test recovery from unlink llog (test llog_gen_rec) "
814 cat /proc/fs/lustre/osc/*/stats |
815 awk -vwrites=0 '/ost_write/ { writes += $2 } END { print writes; }'
820 $LCTL mark multiop $MOUNT/$tfile OS_c
821 multiop $MOUNT/$tfile OS_c &
823 writeme -s $MOUNT/${tfile}-2 &
826 facet_failover mds $MDS_MOUNT_OPTS
827 #define OBD_FAIL_MDS_CONNECT_NET 0x117
828 do_facet mds "sysctl -w lustre.fail_loc=0x80000117"
830 stat1=`count_ost_writes`
832 stat2=`count_ost_writes`
833 echo "$stat1, $stat2"
834 if [ $stat1 -lt $stat2 ]; then
835 echo "writes continuing during recovery"
838 echo "writes not continuing during recovery, bug 2477"
841 echo "waiting for writeme $WRITE_PID"
845 echo "waiting for multiop $PID"
846 wait $PID || return 2
847 do_facet client munlink $MOUNT/$tfile || return 3
848 do_facet client munlink $MOUNT/${tfile}-2 || return 3
851 run_test 40 "cause recovery in ptlrpc, ensure IO continues"
855 # make sure that a read to one osc doesn't try to double-unlock its page just
856 # because another osc is invalid. trigger_group_io used to mistakenly return
857 # an error if any oscs were invalid even after having successfully put rpcs
858 # on valid oscs. This was fatal if the caller was ll_readpage who unlocked
859 # the page, guarnateeing that the unlock from the RPC completion would
860 # assert on trying to unlock the unlocked page.
862 local f=$MOUNT/$tfile
863 # make sure the start of the file is ost1
864 lfs setstripe $f $((128 * 1024)) 0 0
865 do_facet client dd if=/dev/zero of=$f bs=4k count=1 || return 3
867 # fail ost2 and read from ost1
868 local osc2_dev=`$LCTL device_list | \
869 awk '(/ost2.*client_facet/){print $4}' `
870 $LCTL --device %$osc2_dev deactivate
871 do_facet client dd if=$f of=/dev/null bs=4k count=1 || return 3
872 $LCTL --device %$osc2_dev activate
875 run_test 41 "read from a valid osc while other oscs are invalid"
877 # test MDS recovery after ost failure
879 blocks=`df -P $MOUNT | tail -n 1 | awk '{ print $2 }'`
880 createmany -o $DIR/$tfile-%d 800
882 unlinkmany $DIR/$tfile-%d 0 400
883 DEBUG42=`sysctl -n lnet.debug`
884 sysctl -w lnet.debug=-1
885 facet_failover ost $OST_MOUNT_OPTS
887 # osc is evicted, fs is smaller (but only with failout OSTs (bug 7287)
888 #blocks_after=`df -P $MOUNT | tail -n 1 | awk '{ print $2 }'`
889 #[ $blocks_after -lt $blocks ] || return 1
890 echo wait for MDS to timeout and recover
891 sleep $((TIMEOUT * 2))
892 sysctl -w lnet.debug=$DEBUG42
893 unlinkmany $DIR/$tfile-%d 400 400
894 $CHECKSTAT -t file $DIR/$tfile-* && return 2 || true
896 run_test 42 "recovery after ost failure"
898 # timeout in MDS/OST recovery RPC will LBUG MDS
899 test_43() { # bug 2530
902 # OBD_FAIL_OST_CREATE_NET 0x204
903 do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
904 fail mds $MDSDEV $MDS_MOUNT_OPTS
906 do_facet ost "sysctl -w lustre.fail_loc=0"
910 run_test 43 "mds osc import failure during recovery; don't LBUG"
913 mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
914 do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
915 $LCTL --device $mdcdev recover
917 do_facet mds "sysctl -w lustre.fail_loc=0"
920 run_test 44 "race in target handle connect"
922 # Handle failed close
924 mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
925 $LCTL --device $mdcdev recover
927 multiop $DIR/$tfile O_c &
931 # This will cause the CLOSE to fail before even
932 # allocating a reply buffer
933 $LCTL --device $mdcdev deactivate
937 wait $pid || return 1
939 $LCTL --device $mdcdev activate
942 $CHECKSTAT -t file $DIR/$tfile || return 2
945 run_test 45 "Handle failed close"
949 drop_reply "touch $DIR/$tfile"
950 fail mds $MDSDEV $MDS_MOUNT_OPTS
951 # ironically, the previous test, 45, will cause a real forced close,
952 # so just look for one for this test
953 dmesg | grep -i "force closing client file handle for $tfile" && return 1
956 run_test 46 "Don't leak file handle after open resend (3325)"
958 test_47() { # bug 2824
959 # create some files to make sure precreate has been done on all
960 # OSTs. (just in case this test is run independently)
961 createmany -o $DIR/$tfile 20 || return 1
963 # OBD_FAIL_OST_CREATE_NET 0x204
964 fail ost $OSTDEV $OST_MOUNT_OPTS
965 do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
966 df $MOUNT || return 2
968 # let the MDS discover the OST failure, attempt to recover, fail
970 sleep $((3 * TIMEOUT))
972 # Without 2824, this createmany would hang
973 createmany -o $DIR/$tfile 20 || return 3
974 unlinkmany $DIR/$tfile 20 || return 4
976 do_facet ost "sysctl -w lustre.fail_loc=0"
979 run_test 47 "MDS->OSC failure during precreate cleanup (2824)"
983 createmany -o $DIR/$tfile 20 || return 1
984 # OBD_FAIL_OST_EROFS 0x216
985 fail mds $MDSDEV $MDS_MOUNT_OPTS
986 do_facet ost "sysctl -w lustre.fail_loc=0x80000216"
987 df $MOUNT || return 2
989 createmany -o $DIR/$tfile 20 20 || return 2
990 unlinkmany $DIR/$tfile 40 || return 3
992 do_facet ost "sysctl -w lustre.fail_loc=0"
995 run_test 48 "MDS->OSC failure during precreate cleanup (2824)"
998 local osc_dev=`$LCTL device_list | \
999 awk '(/ost_svc_mds_svc/){print $4}' `
1000 $LCTL --device %$osc_dev recover && $LCTL --device %$osc_dev recover
1001 # give the mds_lov_sync threads a chance to run
1004 run_test 50 "Double OSC recovery, don't LASSERT (3812)"
1006 # b3764 timed out lock replay
1009 cancel_lru_locks MDC
1011 multiop $DIR/$tfile s
1013 do_facet mds "sysctl -w lustre.fail_loc=0x8000030c"
1014 fail mds $MDSDEV $MDS_MOUNT_OPTS
1015 do_facet mds "sysctl -w lustre.fail_loc=0x0"
1017 $CHECKSTAT -t file $DIR/$tfile-* && return 3 || true
1019 run_test 52 "time out lock replay (3764)"
1021 #b_cray 53 "|X| open request and close reply while two MDC requests in flight"
1022 #b_cray 54 "|X| open request and close reply while two MDC requests in flight"
1024 #b3761 ASSERTION(hash != 0) failed
1026 # OBD_FAIL_MDS_OPEN_CREATE | OBD_FAIL_ONCE
1027 do_facet mds "sysctl -w lustre.fail_loc=0x8000012b"
1029 # give touch a chance to run
1031 do_facet mds "sysctl -w lustre.fail_loc=0x0"
1035 run_test 55 "let MDS_CHECK_RESENT return the original return code instead of 0"
1037 #b3440 ASSERTION(rec->ur_fid2->id) failed
1039 ln -s foo $DIR/$tfile
1041 #drop_reply "cat $DIR/$tfile"
1042 fail mds $MDSDEV $MDS_MOUNT_OPTS
1045 run_test 56 "don't replay a symlink open request (3440)"
1047 #recovery one mds-ost setattr from llog
1049 #define OBD_FAIL_MDS_OST_SETATTR 0x12c
1050 do_facet mds "sysctl -w lustre.fail_loc=0x8000012c"
1053 fail mds $MDSDEV $MDS_MOUNT_OPTS
1055 $CHECKSTAT -t file $DIR/$tfile || return 1
1056 do_facet mds "sysctl -w lustre.fail_loc=0x0"
1059 run_test 57 "test recovery from llog for setattr op"
1061 #recovery many mds-ost setattr from llog
1063 #define OBD_FAIL_MDS_OST_SETATTR 0x12c
1064 do_facet mds "sysctl -w lustre.fail_loc=0x8000012c"
1066 createmany -o $DIR/$tdir/$tfile-%d 2500
1068 fail mds $MDSDEV $MDS_MOUNT_OPTS
1070 $CHECKSTAT -t file $DIR/$tdir/$tfile-* || return 1
1071 do_facet mds "sysctl -w lustre.fail_loc=0x0"
1072 unlinkmany $DIR/$tdir/$tfile-%d 2500
1075 run_test 58 "test recovery from llog for setattr op (test llog_gen_rec)"
1077 equals_msg test complete, cleaning up