6 # This test needs to be run on the client
9 LUSTRE=${LUSTRE:-`dirname $0`/..}
10 . $LUSTRE/tests/test-framework.sh
14 . ${CONFIG:=$LUSTRE/tests/cfg/lmv.sh}
21 # 46 - The MDS will always have to force close the cached opens
24 if [ `using_krb5_sec $SECURITY` == 'n' ] ; then
25 ALWAYS_EXCEPT="0c $ALWAYS_EXCEPT"
32 if [ "$MDSCOUNT" -gt 1 ]; then
34 for mds in `mds_list`; do
35 MDSDEV=$TMP/${mds}-`hostname`
36 add_mds $mds --dev $MDSDEV --size $MDSSIZE --lmv lmv1_svc
38 add_lov_to_lmv lov1 lmv1_svc --stripe_sz $STRIPE_BYTES \
39 --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
42 add_mds $SINGLEMDS --dev $MDSDEV --size $MDSSIZE
43 add_lov lov1 $SINGLEMDS --stripe_sz $STRIPE_BYTES \
44 --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
48 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE
49 add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE
50 add_client client $MDS --lov lov1 --path $MOUNT
56 # make sure we are using the primary MDS, so the config log will
57 # be able to clean up properly.
58 activemds=`facet_active $SINGLEMDS`
59 if [ $activemds != "$SINGLEMDS" ]; then
62 zconf_umount `hostname` $MOUNT
63 for mds in `mds_list`; do
64 stop $mds ${FORCE} $MDSLCONFARGS
66 stop ost2 ${FORCE} --dump cleanup.log
67 stop ost ${FORCE} --dump cleanup.log
72 if [ "$ONLY" == "cleanup" ]; then
73 sysctl -w portals.debug=0 || true
78 SETUP=${SETUP:-"setup"}
79 CLEANUP=${CLEANUP:-"cleanup"}
84 start_krb5_kdc || exit 1
85 start_lsvcgssd || exit 2
87 start ost --reformat $OSTLCONFARGS
88 start ost2 --reformat $OSTLCONFARGS
89 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
90 for mds in `mds_list`; do
91 start $mds --reformat $MDSLCONFARGS
93 grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
98 if [ "$ONLY" == "setup" ]; then
105 replay_barrier $SINGLEMDS
108 run_test 0 "empty replay"
111 # this test attempts to trigger a race in the precreation code,
112 # and must run before any other objects are created on the filesystem
114 createmany -o $DIR/$tfile 20 || return 1
115 unlinkmany $DIR/$tfile 20 || return 2
117 run_test 0b "ensure object created after recover exists. (3284)"
120 if [ `using_krb5_sec $SECURITY` == 'n' ] ; then
121 echo "Skip 0c in non-gss mode"
124 # drop gss error notification
125 replay_barrier $SINGLEMDS
126 fail_drop $SINGLEMDS 0x760
128 # drop gss init request
129 replay_barrier $SINGLEMDS
130 fail_drop $SINGLEMDS 0x780
132 run_test 0c "empty replay with gss init failures"
135 replay_barrier $SINGLEMDS
138 $CHECKSTAT -t file $DIR/$tfile || return 1
141 run_test 1 "simple create"
144 replay_barrier $SINGLEMDS
147 $CHECKSTAT -t file $DIR/$tfile || return 1
153 ./mcreate $DIR/$tfile
154 replay_barrier $SINGLEMDS
157 $CHECKSTAT -t file $DIR/$tfile || return 1
163 replay_barrier $SINGLEMDS
165 o_directory $DIR/$tfile
167 $CHECKSTAT -t file $DIR/$tfile || return 2
170 run_test 3a "replay failed open(O_DIRECTORY)"
173 replay_barrier $SINGLEMDS
174 #define OBD_FAIL_MDS_OPEN_PACK | OBD_FAIL_ONCE
175 do_facet mds "sysctl -w lustre.fail_loc=0x80000114"
177 do_facet mds "sysctl -w lustre.fail_loc=0"
179 $CHECKSTAT -t file $DIR/$tfile && return 2
182 run_test 3b "replay failed open -ENOMEM"
185 replay_barrier $SINGLEMDS
186 #define OBD_FAIL_MDS_ALLOC_OBDO | OBD_FAIL_ONCE
187 do_facet mds "sysctl -w lustre.fail_loc=0x80000128"
189 do_facet mds "sysctl -w lustre.fail_loc=0"
192 $CHECKSTAT -t file $DIR/$tfile && return 2
195 run_test 3c "replay failed open -ENOMEM"
198 replay_barrier $SINGLEMDS
199 for i in `seq 10`; do
200 echo "tag-$i" > $DIR/$tfile-$i
203 for i in `seq 10`; do
204 grep -q "tag-$i" $DIR/$tfile-$i || error "$tfile-$i"
207 run_test 4 "|x| 10 open(O_CREAT)s"
210 replay_barrier $SINGLEMDS
213 $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
215 run_test 4b "|x| rm 10 files"
217 # The idea is to get past the first block of precreated files on both
218 # osts, and then replay.
220 replay_barrier $SINGLEMDS
221 for i in `seq 220`; do
222 echo "tag-$i" > $DIR/$tfile-$i
225 for i in `seq 220`; do
226 grep -q "tag-$i" $DIR/$tfile-$i || error "f1c-$i"
230 # waiting for commitment of removal
232 run_test 5 "|x| 220 open(O_CREAT)"
236 replay_barrier $SINGLEMDS
238 mcreate $DIR/$tdir/$tfile
240 $CHECKSTAT -t dir $DIR/$tdir || return 1
241 $CHECKSTAT -t file $DIR/$tdir/$tfile || return 2
243 # waiting for log process thread
245 run_test 6 "mkdir + contained create"
248 replay_barrier $SINGLEMDS
251 $CHECKSTAT -t dir $DIR/$tdir && return 1 || true
253 run_test 6b "|X| rmdir"
257 replay_barrier $SINGLEMDS
258 mcreate $DIR/$tdir/$tfile
260 $CHECKSTAT -t dir $DIR/$tdir || return 1
261 $CHECKSTAT -t file $DIR/$tdir/$tfile || return 2
264 run_test 7 "mkdir |X| contained create"
267 replay_barrier $SINGLEMDS
268 multiop $DIR/$tfile mo_c &
273 $CHECKSTAT -t file $DIR/$tfile || return 1
274 kill -USR1 $MULTIPID || return 2
275 wait $MULTIPID || return 3
278 run_test 8 "creat open |X| close"
281 replay_barrier $SINGLEMDS
283 local old_inum=`ls -i $DIR/$tfile | awk '{print $1}'`
285 local new_inum=`ls -i $DIR/$tfile | awk '{print $1}'`
287 echo " old_inum == $old_inum, new_inum == $new_inum"
288 if [ $old_inum -eq $new_inum ] ;
290 echo " old_inum and new_inum match"
292 echo "!!!! old_inum and new_inum NOT match"
297 run_test 9 "|X| create (same inum/gen)"
301 replay_barrier $SINGLEMDS
302 mv $DIR/$tfile $DIR/$tfile-2
306 $CHECKSTAT $DIR/$tfile && return 1
307 $CHECKSTAT $DIR/$tfile-2 || return 2
311 run_test 10 "create |X| rename unlink"
315 echo "old" > $DIR/$tfile
316 mv $DIR/$tfile $DIR/$tfile-2
317 replay_barrier $SINGLEMDS
318 echo "new" > $DIR/$tfile
320 grep old $DIR/$tfile-2
322 grep new $DIR/$tfile || return 1
323 grep old $DIR/$tfile-2 || return 2
325 run_test 11 "create open write rename |X| create-old-name read"
329 multiop $DIR/$tfile o_tSc &
331 # give multiop a chance to open
334 replay_barrier $SINGLEMDS
336 wait $pid || return 1
339 [ -e $DIR/$tfile ] && return 2
342 run_test 12 "open, unlink |X| close"
345 # 1777 - replay open after committed chmod that would make
346 # a regular open a failure
349 multiop $DIR/$tfile O_wc &
351 # give multiop a chance to open
354 $CHECKSTAT -p 0 $DIR/$tfile
355 replay_barrier $SINGLEMDS
358 wait $pid || return 1
360 $CHECKSTAT -s 1 -p 0 $DIR/$tfile || return 2
363 run_test 13 "open chmod 0 |x| write close"
366 multiop $DIR/$tfile O_tSc &
368 # give multiop a chance to open
371 replay_barrier $SINGLEMDS
372 kill -USR1 $pid || return 1
373 wait $pid || return 2
376 [ -e $DIR/$tfile ] && return 3
379 run_test 14 "open(O_CREAT), unlink |X| close"
382 multiop $DIR/$tfile O_tSc &
384 # give multiop a chance to open
387 replay_barrier $SINGLEMDS
388 touch $DIR/g11 || return 1
390 wait $pid || return 2
393 [ -e $DIR/$tfile ] && return 3
394 touch $DIR/h11 || return 4
397 run_test 15 "open(O_CREAT), unlink |X| touch new, close"
401 replay_barrier $SINGLEMDS
404 mcreate $DIR/$tfile-2
406 [ -e $DIR/$tfile ] && return 1
407 [ -e $DIR/$tfile-2 ] || return 2
408 munlink $DIR/$tfile-2 || return 3
410 run_test 16 "|X| open(O_CREAT), unlink, touch new, unlink new"
413 replay_barrier $SINGLEMDS
414 multiop $DIR/$tfile O_c &
416 # give multiop a chance to open
419 kill -USR1 $pid || return 1
420 wait $pid || return 2
421 $CHECKSTAT -t file $DIR/$tfile || return 3
424 run_test 17 "|X| open(O_CREAT), |replay| close"
427 replay_barrier $SINGLEMDS
428 multiop $DIR/$tfile O_tSc &
430 # give multiop a chance to open
433 touch $DIR/$tfile-2 || return 1
434 echo "pid: $pid will close"
436 wait $pid || return 2
439 [ -e $DIR/$tfile ] && return 3
440 [ -e $DIR/$tfile-2 ] || return 4
441 # this touch frequently fails
442 touch $DIR/$tfile-3 || return 5
443 munlink $DIR/$tfile-2 || return 6
444 munlink $DIR/$tfile-3 || return 7
447 run_test 18 "|X| open(O_CREAT), unlink, touch new, close, touch, unlink"
449 # bug 1855 (a simpler form of test_11 above)
451 replay_barrier $SINGLEMDS
453 echo "old" > $DIR/$tfile
454 mv $DIR/$tfile $DIR/$tfile-2
455 grep old $DIR/$tfile-2
457 grep old $DIR/$tfile-2 || return 2
459 run_test 19 "|X| mcreate, open, write, rename "
462 replay_barrier $SINGLEMDS
463 multiop $DIR/$tfile O_tSc &
465 # give multiop a chance to open
471 wait $pid || return 1
472 [ -e $DIR/$tfile ] && return 2
475 run_test 20 "|X| open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)"
478 replay_barrier $SINGLEMDS
479 multiop $DIR/$tfile O_tSc &
481 # give multiop a chance to open
484 touch $DIR/g11 || return 1
488 wait $pid || return 2
489 [ -e $DIR/$tfile ] && return 3
490 touch $DIR/h11 || return 4
493 run_test 21 "|X| open(O_CREAT), unlink touch new, replay, close (test mds_cleanup_orphans)"
496 multiop $DIR/$tfile O_tSc &
498 # give multiop a chance to open
501 replay_barrier $SINGLEMDS
506 wait $pid || return 1
507 [ -e $DIR/$tfile ] && return 2
510 run_test 22 "open(O_CREAT), |X| unlink, replay, close (test mds_cleanup_orphans)"
513 multiop $DIR/$tfile O_tSc &
515 # give multiop a chance to open
518 replay_barrier $SINGLEMDS
520 touch $DIR/g11 || return 1
524 wait $pid || return 2
525 [ -e $DIR/$tfile ] && return 3
526 touch $DIR/h11 || return 4
529 run_test 23 "open(O_CREAT), |X| unlink touch new, replay, close (test mds_cleanup_orphans)"
532 multiop $DIR/$tfile O_tSc &
534 # give multiop a chance to open
537 replay_barrier $SINGLEMDS
541 wait $pid || return 1
542 [ -e $DIR/$tfile ] && return 2
545 run_test 24 "open(O_CREAT), replay, unlink, close (test mds_cleanup_orphans)"
548 multiop $DIR/$tfile O_tSc &
550 # give multiop a chance to open
554 replay_barrier $SINGLEMDS
557 wait $pid || return 1
558 [ -e $DIR/$tfile ] && return 2
561 run_test 25 "open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)"
564 replay_barrier $SINGLEMDS
565 multiop $DIR/$tfile-1 O_tSc &
567 multiop $DIR/$tfile-2 O_tSc &
569 # give multiop a chance to open
574 wait $pid2 || return 1
578 wait $pid1 || return 2
579 [ -e $DIR/$tfile-1 ] && return 3
580 [ -e $DIR/$tfile-2 ] && return 4
583 run_test 26 "|X| open(O_CREAT), unlink two, close one, replay, close one (test mds_cleanup_orphans)"
586 replay_barrier $SINGLEMDS
587 multiop $DIR/$tfile-1 O_tSc &
589 multiop $DIR/$tfile-2 O_tSc &
591 # give multiop a chance to open
598 wait $pid1 || return 1
600 wait $pid2 || return 2
601 [ -e $DIR/$tfile-1 ] && return 3
602 [ -e $DIR/$tfile-2 ] && return 4
605 run_test 27 "|X| open(O_CREAT), unlink two, replay, close two (test mds_cleanup_orphans)"
608 multiop $DIR/$tfile-1 O_tSc &
610 multiop $DIR/$tfile-2 O_tSc &
612 # give multiop a chance to open
614 replay_barrier $SINGLEMDS
618 wait $pid2 || return 1
622 wait $pid1 || return 2
623 [ -e $DIR/$tfile-1 ] && return 3
624 [ -e $DIR/$tfile-2 ] && return 4
627 run_test 28 "open(O_CREAT), |X| unlink two, close one, replay, close one (test mds_cleanup_orphans)"
630 multiop $DIR/$tfile-1 O_tSc &
632 multiop $DIR/$tfile-2 O_tSc &
634 # give multiop a chance to open
636 replay_barrier $SINGLEMDS
642 wait $pid1 || return 1
644 wait $pid2 || return 2
645 [ -e $DIR/$tfile-1 ] && return 3
646 [ -e $DIR/$tfile-2 ] && return 4
649 run_test 29 "open(O_CREAT), |X| unlink two, replay, close two (test mds_cleanup_orphans)"
652 multiop $DIR/$tfile-1 O_tSc &
654 multiop $DIR/$tfile-2 O_tSc &
656 # give multiop a chance to open
661 replay_barrier $SINGLEMDS
664 wait $pid1 || return 1
666 wait $pid2 || return 2
667 [ -e $DIR/$tfile-1 ] && return 3
668 [ -e $DIR/$tfile-2 ] && return 4
671 run_test 30 "open(O_CREAT) two, unlink two, replay, close two (test mds_cleanup_orphans)"
674 multiop $DIR/$tfile-1 O_tSc &
676 multiop $DIR/$tfile-2 O_tSc &
678 # give multiop a chance to open
682 replay_barrier $SINGLEMDS
686 wait $pid1 || return 1
688 wait $pid2 || return 2
689 [ -e $DIR/$tfile-1 ] && return 3
690 [ -e $DIR/$tfile-2 ] && return 4
693 run_test 31 "open(O_CREAT) two, unlink one, |X| unlink one, close two (test mds_cleanup_orphans)"
695 # tests for bug 2104; completion without crashing is success. The close is
696 # stale, but we always return 0 for close, so the app never sees it.
698 multiop $DIR/$tfile O_c &
700 multiop $DIR/$tfile O_c &
702 # give multiop a chance to open.
703 # 1 second is not enough, I increased it to 5, however in ideal word
704 # I should have to wait for open finish in more smart manner. --umka
707 df $MOUNT || sleep 1 && df $MOUNT || return 1
713 run_test 32 "close() notices client eviction; close() after client eviction"
715 # Abort recovery before client complete
717 replay_barrier $SINGLEMDS
719 fail_abort $SINGLEMDS
720 # this file should be gone, because the replay was aborted
721 $CHECKSTAT -t file $DIR/$tfile && return 1
724 run_test 33 "abort recovery before client does replay"
727 multiop $DIR/$tfile O_c &
729 # give multiop a chance to open
733 replay_barrier $SINGLEMDS
734 fail_abort $SINGLEMDS
736 [ -e $DIR/$tfile ] && return 1
740 run_test 34 "abort recovery before client does replay (test mds_cleanup_orphans)"
742 # bug 2278 - generate one orphan on OST, then destroy it during recovery from llog
746 #define OBD_FAIL_MDS_REINT_NET_REP 0x119
747 do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
752 # give a chance to remove from MDS
753 fail_abort $SINGLEMDS
754 $CHECKSTAT -t file $DIR/$tfile && return 1 || true
756 run_test 35 "test recovery from llog for unlink op"
758 # b=2432 resent cancel after replay uses wrong cookie,
759 # so don't resend cancels
761 replay_barrier $SINGLEMDS
763 checkstat $DIR/$tfile
764 facet_failover $SINGLEMDS
766 if dmesg | grep "unknown lock cookie"; then
767 echo "cancel after replay failed"
771 run_test 36 "don't resend cancel"
774 # directory orphans can't be unlinked from PENDING directory
776 rmdir $DIR/$tfile 2>/dev/null
777 multiop $DIR/$tfile dD_c &
779 # give multiop a chance to open
783 replay_barrier $SINGLEMDS
784 # clear the dmesg buffer so we only see errors from this recovery
786 fail_abort $SINGLEMDS
788 dmesg | grep "mds_unlink_orphan.*error .* unlinking orphan" && return 1
792 run_test 37 "abort recovery before client does replay (test mds_cleanup_orphans for directories)"
795 createmany -o $DIR/$tfile-%d 800
796 unlinkmany $DIR/$tfile-%d 0 400
797 replay_barrier $SINGLEMDS
799 unlinkmany $DIR/$tfile-%d 400 400
801 $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
803 run_test 38 "test recovery from unlink llog (test llog_gen_rec) "
806 createmany -o $DIR/$tfile-%d 800
807 replay_barrier $SINGLEMDS
808 unlinkmany $DIR/$tfile-%d 0 400
810 unlinkmany $DIR/$tfile-%d 400 400
812 $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
814 run_test 39 "test recovery from unlink llog (test llog_gen_rec) "
817 cat /proc/fs/lustre/osc/*/stats |
818 awk -vwrites=0 '/ost_write/ { writes += $2 } END { print writes; }'
823 $LCTL mark multiop $MOUNT/$tfile OS_c
824 multiop $MOUNT/$tfile OS_c &
826 writeme -s $MOUNT/${tfile}-2 &
829 facet_failover $SINGLEMDS
830 #define OBD_FAIL_MDS_CONNECT_NET 0x117
831 do_facet mds "sysctl -w lustre.fail_loc=0x80000117"
833 stat1=`count_ost_writes`
835 stat2=`count_ost_writes`
836 echo "$stat1, $stat2"
837 if [ $stat1 -lt $stat2 ]; then
838 echo "writes continuing during recovery"
841 echo "writes not continuing during recovery, bug 2477"
844 echo "waiting for writeme $WRITE_PID"
848 echo "waiting for multiop $PID"
849 wait $PID || return 2
850 do_facet client munlink $MOUNT/$tfile || return 3
851 do_facet client munlink $MOUNT/${tfile}-2 || return 3
854 run_test 40 "cause recovery in ptlrpc, ensure IO continues"
858 # make sure that a read to one osc doesn't try to double-unlock its page just
859 # because another osc is invalid. trigger_group_io used to mistakenly return
860 # an error if any oscs were invalid even after having successfully put rpcs
861 # on valid oscs. This was fatal if the caller was ll_readpage who unlocked
862 # the page, guarnateeing that the unlock from the RPC completion would
863 # assert on trying to unlock the unlocked page.
865 local f=$MOUNT/$tfile
866 # make sure the start of the file is ost1
867 lfs setstripe $f $((128 * 1024)) 0 0
868 do_facet client dd if=/dev/zero of=$f bs=4k count=1 || return 3
870 # fail ost2 and read from ost1
871 local osc2_dev=`$LCTL device_list | \
872 awk '(/ost2.*client_facet/){print $4}' `
873 $LCTL --device %$osc2_dev deactivate
874 do_facet client dd if=$f of=/dev/null bs=4k count=1 || return 3
875 $LCTL --device %$osc2_dev activate
878 run_test 41 "read from a valid osc while other oscs are invalid"
880 # test MDS recovery after ost failure
882 blocks=`df $MOUNT | tail -n 1 | awk '{ print $1 }'`
883 createmany -o $DIR/$tfile-%d 800
885 unlinkmany $DIR/$tfile-%d 0 400
888 # osc is evicted, fs is smaller
889 blocks_after=`df $MOUNT | tail -n 1 | awk '{ print $1 }'`
890 [ $blocks_after -lt $blocks ] || return 1
891 echo wait for MDS to timeout and recover
892 sleep $((TIMEOUT * 2))
893 unlinkmany $DIR/$tfile-%d 400 400
894 $CHECKSTAT -t file $DIR/$tfile-* && return 2 || true
896 run_test 42 "recovery after ost failure"
899 # timeout in MDS/OST recovery RPC will LBUG MDS
901 replay_barrier $SINGLEMDS
903 # OBD_FAIL_OST_CREATE_NET 0x204
904 do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
905 facet_failover $SINGLEMDS
906 df $MOUNT || return 1
908 do_facet ost "sysctl -w lustre.fail_loc=0"
912 run_test 43 "mds osc import failure during recovery; don't LBUG"
915 mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
916 do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
917 $LCTL --device $mdcdev recover
919 do_facet mds "sysctl -w lustre.fail_loc=0"
922 run_test 44 "race in target handle connect"
924 # Handle failed close
926 mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
927 $LCTL --device $mdcdev recover
929 multiop $DIR/$tfile O_c &
933 # This will cause the CLOSE to fail before even
934 # allocating a reply buffer
935 $LCTL --device $mdcdev deactivate
939 wait $pid || return 1
941 $LCTL --device $mdcdev activate
944 $CHECKSTAT -t file $DIR/$tfile || return 2
947 run_test 45 "Handle failed close"
951 drop_reply "touch $DIR/$tfile"
953 # ironically, the previous test, 45, will cause a real forced close,
954 # so just look for one for this test
955 dmesg | grep -i "force closing client file handle for $tfile" && return 1
958 run_test 46 "Don't leak file handle after open resend (3325)"
963 # create some files to make sure precreate has been done on all
964 # OSTs. (just in case this test is run independently)
965 createmany -o $DIR/$tfile 20 || return 1
967 # OBD_FAIL_OST_CREATE_NET 0x204
969 do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
970 df $MOUNT || return 2
972 # let the MDS discover the OST failure, attempt to recover, fail
974 sleep $((3 * TIMEOUT))
976 # Without 2824, this createmany would hang
977 createmany -o $DIR/$tfile 20 || return 3
978 unlinkmany $DIR/$tfile 20 || return 4
980 do_facet ost "sysctl -w lustre.fail_loc=0"
983 run_test 47 "MDS->OSC failure during precreate cleanup (2824)"
987 createmany -o $DIR/${tfile}- 100
988 $CHECKSTAT $DIR/${tfile}-99 || return 1
990 df $MOUNT || echo "first df failed"
992 df $MOUNT || return 2
994 $CHECKSTAT $DIR/${tfile}-99 || return 3
997 replay_barrier $SINGLEMDS
999 unlinkmany $DIR/${tfile}- 100 || return 4
1000 if dmesg | grep "back in time"; then
1001 echo "server went back in time!"
1006 run_test 48 "Don't lose transno when client is evicted (2525)"
1008 # b=3550 - replay of unlink
1010 replay_barrier $SINGLEMDS
1011 createmany -o $DIR/$tfile-%d 400 || return 1
1012 unlinkmany $DIR/$tfile-%d 0 400 || return 2
1014 $CHECKSTAT -t file $DIR/$tfile-* && return 3 || true
1016 run_test 49 "re-write records to llog as written during fail"
1019 local osc_dev=`$LCTL device_list | \
1020 awk '(/ost_svc_$SINGLEMDS_svc/){print $4}' `
1021 $LCTL --device %$osc_dev recover && $LCTL --device %$osc_dev recover
1022 # give the mds_lov_sync threads a chance to run
1025 run_test 50 "Double OSC recovery, don't LASSERT (3812)"
1027 # bug 3462 - simultaneous MDC requests
1029 replay_barrier_nodf $SINGLEMDS
1030 mkdir -p $DIR/${tdir}-1
1031 mkdir -p $DIR/${tdir}-2
1032 touch $DIR/${tdir}-2/f
1033 multiop $DIR/${tdir}-1/f O_c &
1035 # give multiop a chance to open
1038 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000115"
1040 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1041 $CHECKSTAT -t file $DIR/${tdir}-2/f || return 1
1045 wait $pid || return 2
1046 $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3
1047 rm -rf $DIR/${tdir}-*
1049 run_test 51a "|X| close request while two MDC requests in flight"
1052 replay_barrier_nodf $SINGLEMDS
1053 mkdir -p $DIR/$tdir-1
1054 mkdir -p $DIR/$tdir-2
1055 multiop $DIR/$tdir-1/f O_c &
1058 # give multiop a chance to open
1059 # 1 second seems to be not enough, we met already such a cases
1063 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107"
1064 touch $DIR/${tdir}-2/f &
1066 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1069 wait $pid || return 1
1073 $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
1074 $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
1075 rm -rf $DIR/${tdir}-*
1077 run_test 51b "|X| open request while two MDC requests in flight"
1080 replay_barrier_nodf $SINGLEMDS
1081 mkdir -p $DIR/${tdir}-1
1082 mkdir -p $DIR/${tdir}-2
1083 multiop $DIR/${tdir}-1/f O_c &
1085 # give multiop a chance to open
1088 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107"
1089 touch $DIR/${tdir}-2/f &
1090 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1092 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000115"
1094 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1098 wait $pid || return 1
1099 $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
1100 $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
1101 rm -rf $DIR/${tdir}-*
1103 run_test 51c "|X| open request and close request while two MDC requests in flight"
1106 replay_barrier_nodf $SINGLEMDS
1107 mkdir -p $DIR/${tdir}-1
1108 mkdir -p $DIR/${tdir}-2
1109 touch $DIR/${tdir}-2/f
1110 multiop $DIR/${tdir}-1/f O_c &
1112 # give multiop a chance to open
1115 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000122"
1117 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1118 #$CHECKSTAT -t file $DIR/${tdir}-2/f || return 1
1122 wait $pid || return 2
1123 $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3
1124 rm -rf $DIR/${tdir}-*
1126 run_test 51d "|X| close reply while two MDC requests in flight"
1129 replay_barrier_nodf $SINGLEMDS
1130 mkdir -p $DIR/$tdir-1
1131 mkdir -p $DIR/$tdir-2
1132 multiop $DIR/$tdir-1/f O_c &
1134 # give multiop a chance to open
1137 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119"
1138 touch $DIR/${tdir}-2/f &
1140 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1143 wait $pid || return 1
1147 $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
1148 $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
1149 rm -rf $DIR/${tdir}-*
1151 run_test 51e "|X| open reply while two MDC requests in flight"
1154 replay_barrier_nodf $SINGLEMDS
1155 mkdir -p $DIR/${tdir}-1
1156 mkdir -p $DIR/${tdir}-2
1157 multiop $DIR/${tdir}-1/f O_c &
1159 # give multiop a chance to open
1162 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119"
1163 touch $DIR/${tdir}-2/f &
1164 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1166 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000122"
1168 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1172 wait $pid || return 1
1173 $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
1174 $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
1175 rm -rf $DIR/${tdir}-*
1177 run_test 51f "|X| open reply and close reply while two MDC requests in flight"
1180 replay_barrier_nodf $SINGLEMDS
1181 mkdir -p $DIR/${tdir}-1
1182 mkdir -p $DIR/${tdir}-2
1183 multiop $DIR/${tdir}-1/f O_c &
1185 # give multiop a chance to open
1188 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119"
1189 touch $DIR/${tdir}-2/f &
1190 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1192 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000115"
1194 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1198 wait $pid || return 1
1199 $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
1200 $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
1201 rm -rf $DIR/${tdir}-*
1203 run_test 51g "|X| open reply and close request while two MDC requests in flight"
1206 replay_barrier_nodf $SINGLEMDS
1207 mkdir -p $DIR/${tdir}-1
1208 mkdir -p $DIR/${tdir}-2
1209 multiop $DIR/${tdir}-1/f O_c &
1211 # give multio:wp a chance to open
1214 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107"
1215 touch $DIR/${tdir}-2/f &
1216 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1218 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000122"
1220 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1224 wait $pid || return 1
1225 $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
1226 $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
1227 rm -rf $DIR/${tdir}-*
1229 run_test 51h "|X| open request and close reply while two MDC requests in flight"
1231 # b3764 timed out lock replay
1234 cancel_lru_locks MDC
1236 multiop $DIR/$tfile s
1237 replay_barrier $SINGLEMDS
1238 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x8000030c"
1240 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x0"
1242 $CHECKSTAT -t file $DIR/$tfile-* && return 3 || true
1244 run_test 52 "time out lock replay (3764)"
1247 replay_barrier_nodf $SINGLEMDS
1254 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107"
1256 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
1261 run_test 53 "|X| open request and close reply while two MDC requests in flight"
1264 replay_barrier $SINGLEMDS
1265 createmany -o $DIR/$tfile 20
1266 unlinkmany $DIR/$tfile 20
1269 run_test 54 "|X| open request and close reply while two MDC requests in flight"
1271 #b3440 ASSERTION(rec->ur_fid2->id) failed
1273 sysctl -w portals.debug=-1 portals.debug_mb=25
1274 ln -s foo $DIR/$tfile
1275 replay_barrier $SINGLEMDS
1276 #drop_reply "cat $DIR/$tfile"
1280 run_test 55 "don't replay a symlink open request (3440)"
1282 #b3761 ASSERTION(hash != 0) failed
1284 # OBD_FAIL_MDS_OPEN_CREATE | OBD_FAIL_ONCE
1285 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x8000012b"
1288 # give a chance for touch to run
1290 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x0"
1291 wait $pid || return 1
1295 run_test 56 "let MDS_CHECK_RESENT return the original return code instead of 0"
1297 #b7312 LASSERT(!IS_ERR(parent)) in reconstruct_open()
1299 mkdir $DIR/$tdir || return 1
1300 touch $DIR/$tdir/$tfile || return 2
1301 multiop $DIR/$tdir/$tfile o_ &
1304 rm -f $DIR/$tdir/$tfile || return 3
1305 rm -rf $DIR/$tdir || return 4
1306 # drop first reint reply
1307 sysctl -w lustre.fail_loc=0x0000030c
1308 facet_failover $SINGLEMDS
1309 df $MOUNT || return 1
1310 kill -USR1 $MULTIPID || return 5
1311 wait $MULTIPID || return 6
1312 sysctl -w lustre.fail_loc=0
1314 run_test 57 "open orphan in reconstruct_open()"
1316 equals_msg test complete, cleaning up