6 # bug 11190 5494 7288 5493
7 ALWAYS_EXCEPT="19b 24 27 52 $RECOVERY_SMALL_EXCEPT"
9 PTLDEBUG=${PTLDEBUG:--1}
10 LUSTRE=${LUSTRE:-`dirname $0`/..}
11 . $LUSTRE/tests/test-framework.sh
13 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
17 # Allow us to override the setup if we already have a mounted system by
18 # setting SETUP=" " and CLEANUP=" "
19 SETUP=${SETUP:-"setup"}
20 CLEANUP=${CLEANUP:-"cleanup"}
28 cleanupall || { echo "FAILed to clean up"; exit 20; }
31 if [ ! -z "$EVAL" ]; then
36 if [ "$ONLY" == "cleanup" ]; then
37 sysctl -w lnet.debug=0 || true
44 [ "$ONLY" == "setup" ] && exit
47 drop_request "mcreate $MOUNT/1" || return 1
48 drop_reint_reply "mcreate $MOUNT/2" || return 2
50 run_test 1 "mcreate: drop req, drop rep"
53 drop_request "tchmod 111 $MOUNT/2" || return 1
54 drop_reint_reply "tchmod 666 $MOUNT/2" || return 2
56 run_test 2 "chmod: drop req, drop rep"
59 drop_request "statone $MOUNT/2" || return 1
60 drop_reply "statone $MOUNT/2" || return 2
62 run_test 3 "stat: drop req, drop rep"
65 do_facet client "cp /etc/inittab $MOUNT/inittab" || return 1
66 drop_request "cat $MOUNT/inittab > /dev/null" || return 2
67 drop_reply "cat $MOUNT/inittab > /dev/null" || return 3
69 run_test 4 "open: drop req, drop rep"
72 drop_request "mv $MOUNT/inittab $MOUNT/renamed" || return 1
73 drop_reint_reply "mv $MOUNT/renamed $MOUNT/renamed-again" || return 2
74 do_facet client "checkstat -v $MOUNT/renamed-again" || return 3
76 run_test 5 "rename: drop req, drop rep"
78 [ ! -e $MOUNT/renamed-again ] && cp /etc/inittab $MOUNT/renamed-again
80 drop_request "mlink $MOUNT/renamed-again $MOUNT/link1" || return 1
81 drop_reint_reply "mlink $MOUNT/renamed-again $MOUNT/link2" || return 2
83 run_test 6 "link: drop req, drop rep"
85 [ ! -e $MOUNT/link1 ] && mlink $MOUNT/renamed-again $MOUNT/link1
86 [ ! -e $MOUNT/link2 ] && mlink $MOUNT/renamed-again $MOUNT/link2
88 drop_request "munlink $MOUNT/link1" || return 1
89 drop_reint_reply "munlink $MOUNT/link2" || return 2
91 run_test 7 "unlink: drop req, drop rep"
95 drop_reint_reply "touch $MOUNT/$tfile" || return 1
97 run_test 8 "touch: drop rep (bug 1423)"
101 pause_bulk "cp /etc/profile $MOUNT/$tfile" || return 1
102 do_facet client "cp /etc/termcap $MOUNT/${tfile}.2" || return 2
103 do_facet client "sync"
104 do_facet client "rm $MOUNT/$tfile $MOUNT/${tfile}.2" || return 3
106 run_test 9 "pause bulk on OST (bug 1420)"
110 do_facet client mcreate $MOUNT/$tfile || return 1
111 drop_bl_callback "chmod 0777 $MOUNT/$tfile" || echo "evicted as expected"
112 # wait for the mds to evict the client
113 #echo "sleep $(($TIMEOUT*2))"
114 #sleep $(($TIMEOUT*2))
115 do_facet client touch $MOUNT/$tfile || echo "touch failed, evicted"
116 do_facet client checkstat -v -p 0777 $MOUNT/$tfile || return 3
117 do_facet client "munlink $MOUNT/$tfile"
119 run_test 10 "finish request on server after client eviction (bug 1521)"
122 # wake up a thread waiting for completion after eviction
124 do_facet client multiop $MOUNT/$tfile Ow || return 1
125 do_facet client multiop $MOUNT/$tfile or || return 2
129 do_facet client multiop $MOUNT/$tfile or || return 3
130 drop_bl_callback multiop $MOUNT/$tfile Ow || echo "evicted as expected"
132 do_facet client munlink $MOUNT/$tfile || return 4
134 run_test 11 "wake up a thread waiting for completion after eviction (b=2460)"
138 $LCTL mark multiop $MOUNT/$tfile OS_c
139 do_facet mds "sysctl -w lustre.fail_loc=0x115"
140 clear_failloc mds $((TIMEOUT * 2)) &
141 multiop $MOUNT/$tfile OS_c &
143 #define OBD_FAIL_MDS_CLOSE_NET 0x115
146 echo "waiting for multiop $PID"
147 wait $PID || return 2
148 do_facet client munlink $MOUNT/$tfile || return 3
150 run_test 12 "recover from timed out resend in ptlrpcd (b=2494)"
152 # Bug 113, check that readdir lost recv timeout works.
154 mkdir $MOUNT/readdir || return 1
155 touch $MOUNT/readdir/newentry || return
156 # OBD_FAIL_MDS_READPAGE_NET|OBD_FAIL_ONCE
157 do_facet mds "sysctl -w lustre.fail_loc=0x80000104"
158 ls $MOUNT/readdir || return 3
159 do_facet mds "sysctl -w lustre.fail_loc=0"
160 rm -rf $MOUNT/readdir || return 4
162 run_test 13 "mdc_readpage restart test (bug 1138)"
164 # Bug 113, check that readdir lost send timeout works.
167 touch $MOUNT/readdir/newentry
168 # OBD_FAIL_MDS_SENDPAGE|OBD_FAIL_ONCE
169 do_facet mds "sysctl -w lustre.fail_loc=0x80000106"
170 ls $MOUNT/readdir || return 1
171 do_facet mds "sysctl -w lustre.fail_loc=0"
173 run_test 14 "mdc_readpage resend test (bug 1138)"
176 do_facet mds "sysctl -w lustre.fail_loc=0x80000128"
177 touch $DIR/$tfile && return 1
180 run_test 15 "failed open (-ENOMEM)"
182 READ_AHEAD=`cat $LPROC/llite/*/max_read_ahead_mb | head -n 1`
184 for f in $LPROC/llite/*/max_read_ahead_mb; do
190 for f in $LPROC/llite/*/max_read_ahead_mb; do
191 echo $READ_AHEAD > $f
196 do_facet client cp /etc/termcap $MOUNT
200 #define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 | OBD_FAIL_ONCE
201 do_facet ost1 sysctl -w lustre.fail_loc=0x80000504
203 # OST bulk will time out here, client resends
204 do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 1
205 sysctl -w lustre.fail_loc=0
206 # give recovery a chance to finish (shouldn't take long)
208 do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 2
211 run_test 16 "timeout bulk put, don't evict client (2732)"
214 # OBD_FAIL_PTLRPC_BULK_GET_NET 0x0503 | OBD_FAIL_ONCE
215 # OST bulk will time out here, client retries
216 sysctl -w lustre.fail_loc=0x80000503
217 # need to ensure we send an RPC
218 do_facet client cp /etc/termcap $DIR/$tfile
222 sysctl -w lustre.fail_loc=0
223 do_facet client "df $DIR"
224 # expect cmp to succeed, client resent bulk
225 do_facet client "cmp /etc/termcap $DIR/$tfile" || return 3
226 do_facet client "rm $DIR/$tfile" || return 4
229 run_test 17 "timeout bulk get, don't evict client (2732)"
232 [ -z ${ost2_svc} ] && echo Skipping, needs 2 osts && return 0
234 do_facet client mkdir -p $MOUNT/$tdir
235 f=$MOUNT/$tdir/$tfile
238 pgcache_empty || return 1
241 lfs setstripe $f $((128 * 1024)) 1 1
243 do_facet client cp /etc/termcap $f
245 local osc2dev=`grep ${ost2_svc}-osc- $LPROC/devices | awk '{print $1}'`
246 $LCTL --device $osc2dev deactivate || return 3
247 # my understanding is that there should be nothing in the page
248 # cache after the client reconnects?
250 pgcache_empty || rc=2
251 $LCTL --device $osc2dev activate
255 run_test 18a "manual ost invalidate clears page cache immediately"
258 do_facet client mkdir -p $MOUNT/$tdir
259 f=$MOUNT/$tdir/$tfile
260 f2=$MOUNT/$tdir/${tfile}-2
263 pgcache_empty || return 1
265 # shouldn't have to set stripe size of count==1
266 lfs setstripe $f $((128 * 1024)) 0 1
267 lfs setstripe $f2 $((128 * 1024)) 0 1
269 do_facet client cp /etc/termcap $f
272 # allow recovery to complete
273 sleep $((TIMEOUT + 2))
274 # my understanding is that there should be nothing in the page
275 # cache after the client reconnects?
277 pgcache_empty || rc=2
281 run_test 18b "eviction and reconnect clears page cache (2766)"
285 do_facet client mcreate $f || return 1
286 drop_ldlm_cancel "chmod 0777 $f" || echo "evicted as expected"
288 do_facet client checkstat -v -p 0777 $f || echo evicted
289 # let the client reconnect
291 do_facet client "munlink $f"
293 run_test 19a "test expired_lock_main on mds (2867)"
297 do_facet client multiop $f Ow || return 1
298 do_facet client multiop $f or || return 2
302 do_facet client multiop $f or || return 3
303 drop_ldlm_cancel multiop $f Ow || echo "client evicted, as expected"
305 do_facet client munlink $f || return 4
307 run_test 19b "test expired_lock_main on ost (2867)"
309 test_20a() { # bug 2983 - ldlm_handle_enqueue cleanup
311 multiop $DIR/$tdir/${tfile} O_wc &
315 #define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
316 do_facet ost1 sysctl -w lustre.fail_loc=0x80000308
317 kill -USR1 $MULTI_PID
320 [ $rc -eq 0 ] && error "multiop didn't fail enqueue: rc $rc" || true
322 run_test 20a "ldlm_handle_enqueue error (should return error)"
324 test_20b() { # bug 2986 - ldlm_handle_enqueue error during open
326 touch $DIR/$tdir/${tfile}
328 #define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
329 do_facet ost1 sysctl -w lustre.fail_loc=0x80000308
330 dd if=/etc/hosts of=$DIR/$tdir/$tfile && \
331 error "didn't fail open enqueue" || true
333 run_test 20b "ldlm_handle_enqueue error (should return error)"
336 mkdir -p $DIR/$tdir-1
337 mkdir -p $DIR/$tdir-2
338 multiop $DIR/$tdir-1/f O_c &
341 do_facet mds "sysctl -w lustre.fail_loc=0x80000129"
342 multiop $DIR/$tdir-2/f Oc &
345 do_facet mds "sysctl -w lustre.fail_loc=0"
347 do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
348 kill -USR1 $close_pid
350 wait $close_pid || return 1
351 wait $open_pid || return 2
352 do_facet mds "sysctl -w lustre.fail_loc=0"
354 $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
355 $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
359 run_test 21a "drop close request while close and open are both in flight"
362 mkdir -p $DIR/$tdir-1
363 mkdir -p $DIR/$tdir-2
364 multiop $DIR/$tdir-1/f O_c &
367 do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
368 mcreate $DIR/$tdir-2/f &
371 do_facet mds "sysctl -w lustre.fail_loc=0"
373 kill -USR1 $close_pid
375 wait $close_pid || return 1
376 wait $open_pid || return 3
378 $CHECKSTAT -t file $DIR/$tdir-1/f || return 4
379 $CHECKSTAT -t file $DIR/$tdir-2/f || return 5
382 run_test 21b "drop open request while close and open are both in flight"
385 mkdir -p $DIR/$tdir-1
386 mkdir -p $DIR/$tdir-2
387 multiop $DIR/$tdir-1/f O_c &
390 do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
391 mcreate $DIR/$tdir-2/f &
394 do_facet mds "sysctl -w lustre.fail_loc=0"
396 do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
397 kill -USR1 $close_pid
399 wait $close_pid || return 1
400 wait $open_pid || return 2
402 do_facet mds "sysctl -w lustre.fail_loc=0"
404 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
405 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
408 run_test 21c "drop both request while close and open are both in flight"
411 mkdir -p $DIR/$tdir-1
412 mkdir -p $DIR/$tdir-2
413 multiop $DIR/$tdir-1/f O_c &
416 do_facet mds "sysctl -w lustre.fail_loc=0x80000129"
417 multiop $DIR/$tdir-2/f Oc &
419 do_facet mds "sysctl -w lustre.fail_loc=0"
421 do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
424 wait $pid || return 1
425 do_facet mds "sysctl -w lustre.fail_loc=0"
427 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
428 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
432 run_test 21d "drop close reply while close and open are both in flight"
435 mkdir -p $DIR/$tdir-1
436 mkdir -p $DIR/$tdir-2
437 multiop $DIR/$tdir-1/f O_c &
440 do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
441 touch $DIR/$tdir-2/f &
443 do_facet mds "sysctl -w lustre.fail_loc=0"
447 wait $pid || return 1
450 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
451 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
454 run_test 21e "drop open reply while close and open are both in flight"
457 mkdir -p $DIR/$tdir-1
458 mkdir -p $DIR/$tdir-2
459 multiop $DIR/$tdir-1/f O_c &
462 do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
463 touch $DIR/$tdir-2/f &
465 do_facet mds "sysctl -w lustre.fail_loc=0"
467 do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
470 wait $pid || return 1
471 do_facet mds "sysctl -w lustre.fail_loc=0"
473 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
474 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
477 run_test 21f "drop both reply while close and open are both in flight"
480 mkdir -p $DIR/$tdir-1
481 mkdir -p $DIR/$tdir-2
482 multiop $DIR/$tdir-1/f O_c &
485 do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
486 touch $DIR/$tdir-2/f &
488 do_facet mds "sysctl -w lustre.fail_loc=0"
490 do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
493 wait $pid || return 1
494 do_facet mds "sysctl -w lustre.fail_loc=0"
496 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
497 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
500 run_test 21g "drop open reply and close request while close and open are both in flight"
503 mkdir -p $DIR/$tdir-1
504 mkdir -p $DIR/$tdir-2
505 multiop $DIR/$tdir-1/f O_c &
508 do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
509 touch $DIR/$tdir-2/f &
512 do_facet mds "sysctl -w lustre.fail_loc=0"
514 do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
517 wait $pid || return 1
518 do_facet mds "sysctl -w lustre.fail_loc=0"
520 wait $touch_pid || return 2
522 $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
523 $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
526 run_test 21h "drop open request and close reply while close and open are both in flight"
528 # bug 3462 - multiple MDC requests
533 do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
538 multiop $f1 msu || return 1
541 do_facet mds "sysctl -w lustre.fail_loc=0"
543 wait $close_pid || return 2
544 rm -rf $f2 || return 4
546 run_test 22 "drop close request and do mknod"
549 multiop $DIR/$tfile O_c &
551 # give a chance for open
555 drop_request "kill -USR1 $pid"
558 wait $pid || return 1
561 run_test 23 "client hang when close a file after mds crash"
563 test_24() { # bug 2248 - eviction fails writeback but app doesn't see it
566 multiop $DIR/$tdir/$tfile Owy_wyc &
571 kill -USR1 $MULTI_PID
574 sysctl -w lustre.fail_loc=0x0
576 [ $rc -eq 0 ] && error "multiop didn't fail fsync: rc $rc" || true
578 run_test 24 "fsync error (should return error)"
580 test_26() { # bug 5921 - evict dead exports by pinger
581 # this test can only run from a client on a separate node.
582 [ "`lsmod | grep obdfilter`" ] && \
583 echo "skipping test 26 (local OST)" && return
584 [ "`lsmod | grep mds`" ] && \
585 echo "skipping test 26 (local MDS)" && return
586 OST_FILE=$LPROC/obdfilter/${ost1_svc}/num_exports
587 OST_EXP="`do_facet ost1 cat $OST_FILE`"
588 OST_NEXP1=`echo $OST_EXP | cut -d' ' -f2`
589 echo starting with $OST_NEXP1 OST exports
590 # OBD_FAIL_PTLRPC_DROP_RPC 0x505
591 do_facet client sysctl -w lustre.fail_loc=0x505
592 # evictor takes up to 2.25x to evict. But if there's a
593 # race to start the evictor from various obds, the loser
594 # might have to wait for the next ping.
595 echo Waiting for $(($TIMEOUT * 4)) secs
596 sleep $(($TIMEOUT * 4))
597 OST_EXP="`do_facet ost1 cat $OST_FILE`"
598 OST_NEXP2=`echo $OST_EXP | cut -d' ' -f2`
599 echo ending with $OST_NEXP2 OST exports
600 do_facet client sysctl -w lustre.fail_loc=0x0
601 [ $OST_NEXP1 -le $OST_NEXP2 ] && error "client not evicted"
604 run_test 26 "evict dead exports"
606 test_26b() { # bug 10140 - evict dead exports by pinger
607 zconf_mount `hostname` $MOUNT2
608 MDS_FILE=$LPROC/mds/${mds_svc}/num_exports
609 MDS_NEXP1="`do_facet mds cat $MDS_FILE | cut -d' ' -f2`"
610 OST_FILE=$LPROC/obdfilter/${ost1_svc}/num_exports
611 OST_NEXP1="`do_facet ost1 cat $OST_FILE | cut -d' ' -f2`"
612 echo starting with $OST_NEXP1 OST and $MDS_NEXP1 MDS exports
613 zconf_umount `hostname` $MOUNT2 -f
614 # evictor takes up to 2.25x to evict. But if there's a
615 # race to start the evictor from various obds, the loser
616 # might have to wait for the next ping.
617 echo Waiting for $(($TIMEOUT * 4)) secs
618 sleep $(($TIMEOUT * 4))
619 OST_NEXP2="`do_facet ost1 cat $OST_FILE | cut -d' ' -f2`"
620 MDS_NEXP2="`do_facet mds cat $MDS_FILE | cut -d' ' -f2`"
621 echo ending with $OST_NEXP2 OST and $MDS_NEXP2 MDS exports
622 [ $OST_NEXP1 -le $OST_NEXP2 ] && error "client not evicted from OST"
623 [ $MDS_NEXP1 -le $MDS_NEXP2 ] && error "client not evicted from MDS"
626 run_test 26b "evict dead exports"
629 [ "`lsmod | grep mds`" ] || \
630 { echo "skipping test 27 (non-local MDS)" && return 0; }
632 writemany -q -a $DIR/$tdir/$tfile 0 5 &
637 #define OBD_FAIL_OSC_SHUTDOWN 0x407
638 sysctl -w lustre.fail_loc=0x80000407
639 # need to wait for reconnect
640 echo -n waiting for fail_loc
641 while [ `sysctl -n lustre.fail_loc` -eq -2147482617 ]; do
647 kill -USR1 $CLIENT_PID
651 run_test 27 "fail LOV while using OSC's"
653 test_28() { # bug 6086 - error adding new clients
654 do_facet client mcreate $MOUNT/$tfile || return 1
655 drop_bl_callback "chmod 0777 $MOUNT/$tfile" ||echo "evicted as expected"
656 #define OBD_FAIL_MDS_ADD_CLIENT 0x12f
657 do_facet mds sysctl -w lustre.fail_loc=0x8000012f
658 # fail once (evicted), reconnect fail (fail_loc), ok
659 df || (sleep 1; df) || (sleep 1; df) || error "reconnect failed"
661 fail mds # verify MDS last_rcvd can be loaded
663 run_test 28 "handle error adding new clients (bug 6086)"
667 # put a load of file creates/writes/deletes
668 writemany -q $DIR/$tdir/$tfile 0 5 &
670 echo writemany pid $CLIENT_PID
674 # wait for client to reconnect to MDS
679 # client process should see no problems even though MDS went down
681 kill -USR1 $CLIENT_PID
684 echo writemany returned $rc
685 #these may fail because of eviction due to slow AST response.
688 run_test 50 "failover MDS under load"
692 # put a load of file creates/writes/deletes
693 writemany -q $DIR/$tdir/$tfile 0 5 &
698 # failover at various points during recovery
699 SEQ="1 5 10 $(seq $TIMEOUT 5 $(($TIMEOUT+10)))"
700 echo will failover at $SEQ
703 echo failover in $i sec
707 # client process should see no problems even though MDS went down
708 # and recovery was interrupted
710 kill -USR1 $CLIENT_PID
713 echo writemany returned $rc
716 run_test 51 "failover MDS during recovery"
719 do_facet client "writemany -q -a $DIR/$tdir/$tfile 300 5" &
721 echo writemany pid $CLIENT_PID
726 wait $CLIENT_PID || rc=$?
727 # active client process should see an EIO for down OST
728 [ $rc -eq 5 ] && { echo "writemany correctly failed $rc" && return 0; }
729 # but timing or failover setup may allow success
730 [ $rc -eq 0 ] && { echo "writemany succeeded" && return 0; }
731 echo "writemany returned $rc"
739 [ $rc -ne 0 ] && { return $rc; }
740 # wait for client to reconnect to OST
744 [ $rc -ne 0 ] && { return $rc; }
751 run_test 52 "failover OST under load"
753 # test of open reconstruct
756 drop_ldlm_reply "./openfile -f O_RDWR:O_CREAT -m 0755 $DIR/$tfile" ||\
759 run_test 53 "touch: drop rep"
762 zconf_mount `hostname` $MOUNT2
766 cat $DIR2/$tfile.missing # save transno = 0, rc != 0 into last_rcvd
769 ERROR=`dmesg | egrep "(test 54|went back in time)" | tail -n1 | grep "went back in time"`
770 [ x"$ERROR" == x ] || error "back in time occured"
772 run_test 54 "back in time"
774 # bug 11330 - liblustre application death during I/O locks up OST
776 [ "`lsmod | grep obdfilter`" ] || \
777 { echo "skipping test 55 (non-local OST)" && return 0; }
781 # first dd should be finished quickly
782 dd if=/dev/zero of=$DIR/$tdir/$tfile-1 bs=32M count=4 &
785 echo "step1: testing ......"
787 if [ -z `ps x | awk '$1 == '$DDPID' { print $5 }'` ]; then break; fi
789 if [ $count -gt 64 ]; then
790 error "dd should be finished!"
794 echo "(dd_pid=$DDPID, time=$count)successful"
796 #define OBD_FAIL_OST_DROP_REQ 0x21d
797 do_facet ost sysctl -w lustre.fail_loc=0x0000021d
798 # second dd will be never finished
799 dd if=/dev/zero of=$DIR/$tdir/$tfile-2 bs=32M count=4 &
802 echo "step2: testing ......"
803 while [ $count -le 64 ]; do
804 dd_name="`ps x | awk '$1 == '$DDPID' { print $5 }'`"
805 if [ -z $dd_name ]; then
807 echo "debug: (dd_name=$dd_name, dd_pid=$DDPID, time=$count)"
808 error "dd shouldn't be finished!"
813 echo "(dd_pid=$DDPID, time=$count)successful"
815 #Recover fail_loc and dd will finish soon
816 do_facet ost sysctl -w lustre.fail_loc=0
818 echo "step3: testing ......"
820 if [ -z `ps x | awk '$1 == '$DDPID' { print $5 }'` ]; then break; fi
822 if [ $count -gt 500 ]; then
823 error "dd should be finished!"
827 echo "(dd_pid=$DDPID, time=$count)successful"
831 run_test 55 "ost_brw_read/write drops timed-out read/write request"
833 test_56() { # b=11277
834 #define OBD_FAIL_MDS_RESEND 0x136
836 do_facet mds sysctl -w lustre.fail_loc=0x80000136
838 do_facet mds sysctl -w lustre.fail_loc=0
841 run_test 56 "replace lock race"
844 # no oscs means no client or mdt
845 while [ -e $LPROC/osc ]; do
846 for f in `find $LPROC -type f`; do
847 cat $f > /dev/null 2>&1
852 test_57() { # bug 10866
856 #define OBD_FAIL_LPROC_REMOVE 0xB00
857 sysctl -w lustre.fail_loc=0x80000B00
858 zconf_umount `hostname` $DIR
859 sysctl -w lustre.fail_loc=0x80000B00
862 sysctl -w lustre.fail_loc=0
864 do_facet client "df $DIR"
866 run_test 57 "read procfs entries causes kernel crash"