6 ALWAYS_EXCEPT="24 27 52 $RECOVERY_SMALL_EXCEPT"
8 PTLDEBUG=${PTLDEBUG:--1}
9 LUSTRE=${LUSTRE:-`dirname $0`/..}
10 . $LUSTRE/tests/test-framework.sh
12 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
16 # Allow us to override the setup if we already have a mounted system by
17 # setting SETUP=" " and CLEANUP=" "
19 CLEANUP=${CLEANUP:-""}
21 cleanup_and_setup_lustre
24 drop_request "mcreate $MOUNT/1" || return 1
25 drop_reint_reply "mcreate $MOUNT/2" || return 2
27 run_test 1 "mcreate: drop req, drop rep"
30 drop_request "tchmod 111 $MOUNT/2" || return 1
31 drop_reint_reply "tchmod 666 $MOUNT/2" || return 2
33 run_test 2 "chmod: drop req, drop rep"
36 drop_request "statone $MOUNT/2" || return 1
37 drop_reply "statone $MOUNT/2" || return 2
39 run_test 3 "stat: drop req, drop rep"
42 do_facet client "cp /etc/inittab $MOUNT/inittab" || return 1
43 drop_request "cat $MOUNT/inittab > /dev/null" || return 2
44 drop_reply "cat $MOUNT/inittab > /dev/null" || return 3
46 run_test 4 "open: drop req, drop rep"
49 drop_request "mv $MOUNT/inittab $MOUNT/renamed" || return 1
50 drop_reint_reply "mv $MOUNT/renamed $MOUNT/renamed-again" || return 2
51 do_facet client "checkstat -v $MOUNT/renamed-again" || return 3
53 run_test 5 "rename: drop req, drop rep"
55 [ ! -e $MOUNT/renamed-again ] && cp /etc/inittab $MOUNT/renamed-again
57 drop_request "mlink $MOUNT/renamed-again $MOUNT/link1" || return 1
58 drop_reint_reply "mlink $MOUNT/renamed-again $MOUNT/link2" || return 2
60 run_test 6 "link: drop req, drop rep"
62 [ ! -e $MOUNT/link1 ] && mlink $MOUNT/renamed-again $MOUNT/link1
63 [ ! -e $MOUNT/link2 ] && mlink $MOUNT/renamed-again $MOUNT/link2
65 drop_request "munlink $MOUNT/link1" || return 1
66 drop_reint_reply "munlink $MOUNT/link2" || return 2
68 run_test 7 "unlink: drop req, drop rep"
72 drop_reint_reply "touch $MOUNT/$tfile" || return 1
74 run_test 8 "touch: drop rep (bug 1423)"
78 pause_bulk "cp /etc/profile $MOUNT/$tfile" || return 1
79 do_facet client "cp /etc/termcap $MOUNT/${tfile}.2" || return 2
80 do_facet client "sync"
81 do_facet client "rm $MOUNT/$tfile $MOUNT/${tfile}.2" || return 3
83 run_test 9 "pause bulk on OST (bug 1420)"
87 do_facet client mcreate $MOUNT/$tfile || return 1
88 drop_bl_callback "chmod 0777 $MOUNT/$tfile" || echo "evicted as expected"
89 # wait for the mds to evict the client
90 #echo "sleep $(($TIMEOUT*2))"
91 #sleep $(($TIMEOUT*2))
92 do_facet client touch $MOUNT/$tfile || echo "touch failed, evicted"
93 do_facet client checkstat -v -p 0777 $MOUNT/$tfile || return 3
94 do_facet client "munlink $MOUNT/$tfile"
96 run_test 10 "finish request on server after client eviction (bug 1521)"
99 # wake up a thread waiting for completion after eviction
101 do_facet client multiop $MOUNT/$tfile Ow || return 1
102 do_facet client multiop $MOUNT/$tfile or || return 2
106 do_facet client multiop $MOUNT/$tfile or || return 3
107 drop_bl_callback multiop $MOUNT/$tfile Ow || echo "evicted as expected"
109 do_facet client munlink $MOUNT/$tfile || return 4
111 run_test 11 "wake up a thread waiting for completion after eviction (b=2460)"
115 $LCTL mark multiop $MOUNT/$tfile OS_c
116 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x115"
117 clear_failloc $SINGLEMDS $((TIMEOUT * 2)) &
118 multiop $MOUNT/$tfile OS_c &
120 #define OBD_FAIL_MDS_CLOSE_NET 0x115
123 echo "waiting for multiop $PID"
124 wait $PID || return 2
125 do_facet client munlink $MOUNT/$tfile || return 3
127 run_test 12 "recover from timed out resend in ptlrpcd (b=2494)"
129 # Bug 113, check that readdir lost recv timeout works.
131 mkdir $MOUNT/readdir || return 1
132 touch $MOUNT/readdir/newentry || return
133 # OBD_FAIL_MDS_READPAGE_NET|OBD_FAIL_ONCE
134 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000104"
135 ls $MOUNT/readdir || return 3
136 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
137 rm -rf $MOUNT/readdir || return 4
139 run_test 13 "mdc_readpage restart test (bug 1138)"
141 # Bug 113, check that readdir lost send timeout works.
144 touch $MOUNT/readdir/newentry
145 # OBD_FAIL_MDS_SENDPAGE|OBD_FAIL_ONCE
146 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000106"
147 ls $MOUNT/readdir || return 1
148 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
150 run_test 14 "mdc_readpage resend test (bug 1138)"
153 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000128"
154 touch $DIR/$tfile && return 1
157 run_test 15 "failed open (-ENOMEM)"
159 READ_AHEAD=`cat $LPROC/llite/*/max_read_ahead_mb | head -n 1`
161 for f in $LPROC/llite/*/max_read_ahead_mb; do
167 for f in $LPROC/llite/*/max_read_ahead_mb; do
168 echo $READ_AHEAD > $f
173 do_facet client cp /etc/termcap $MOUNT
177 #define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 | OBD_FAIL_ONCE
178 do_facet ost1 "sysctl -w lustre.fail_loc=0x80000504"
180 # OST bulk will time out here, client resends
181 do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 1
182 do_facet ost1 sysctl -w lustre.fail_loc=0
183 # give recovery a chance to finish (shouldn't take long)
185 do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 2
188 run_test 16 "timeout bulk put, don't evict client (2732)"
191 # OBD_FAIL_PTLRPC_BULK_GET_NET 0x0503 | OBD_FAIL_ONCE
192 # OST bulk will time out here, client retries
193 do_facet ost1 sysctl -w lustre.fail_loc=0x80000503
194 # need to ensure we send an RPC
195 do_facet client cp /etc/termcap $DIR/$tfile
199 do_facet ost1 sysctl -w lustre.fail_loc=0
200 do_facet client "df $DIR"
201 # expect cmp to succeed, client resent bulk
202 do_facet client "cmp /etc/termcap $DIR/$tfile" || return 3
203 do_facet client "rm $DIR/$tfile" || return 4
206 run_test 17 "timeout bulk get, don't evict client (2732)"
209 [ -z ${ost2_svc} ] && skip "needs 2 osts" && return 0
211 do_facet client mkdir -p $MOUNT/$tdir
212 f=$MOUNT/$tdir/$tfile
215 pgcache_empty || return 1
218 lfs setstripe $f $((128 * 1024)) 1 1
220 do_facet client cp /etc/termcap $f
222 local osc2dev=`grep ${ost2_svc}-osc- $LPROC/devices | egrep -v 'MDT' | awk '{print $1}'`
223 $LCTL --device $osc2dev deactivate || return 3
224 # my understanding is that there should be nothing in the page
225 # cache after the client reconnects?
227 pgcache_empty || rc=2
228 $LCTL --device $osc2dev activate
232 run_test 18a "manual ost invalidate clears page cache immediately"
235 do_facet client mkdir -p $MOUNT/$tdir
236 f=$MOUNT/$tdir/$tfile
237 f2=$MOUNT/$tdir/${tfile}-2
240 pgcache_empty || return 1
242 # shouldn't have to set stripe size of count==1
243 lfs setstripe $f $((128 * 1024)) 0 1
244 lfs setstripe $f2 $((128 * 1024)) 0 1
246 do_facet client cp /etc/termcap $f
249 # allow recovery to complete
250 sleep $((TIMEOUT + 2))
251 # my understanding is that there should be nothing in the page
252 # cache after the client reconnects?
254 pgcache_empty || rc=2
258 run_test 18b "eviction and reconnect clears page cache (2766)"
262 do_facet client mcreate $f || return 1
263 drop_ldlm_cancel "chmod 0777 $f" || echo "evicted as expected"
265 do_facet client checkstat -v -p 0777 $f || echo evicted
266 # let the client reconnect
268 do_facet client "munlink $f"
270 run_test 19a "test expired_lock_main on mds (2867)"
274 do_facet client multiop $f Ow || return 1
275 do_facet client multiop $f or || return 2
279 do_facet client multiop $f or || return 3
280 drop_ldlm_cancel multiop $f Ow || echo "client evicted, as expected"
282 do_facet client munlink $f || return 4
284 run_test 19b "test expired_lock_main on ost (2867)"
286 test_20a() { # bug 2983 - ldlm_handle_enqueue cleanup
288 multiop $DIR/$tdir/${tfile} O_wc &
292 #define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
293 do_facet ost1 sysctl -w lustre.fail_loc=0x80000308
294 kill -USR1 $MULTI_PID
297 [ $rc -eq 0 ] && error "multiop didn't fail enqueue: rc $rc" || true
299 run_test 20a "ldlm_handle_enqueue error (should return error)"
301 test_20b() { # bug 2986 - ldlm_handle_enqueue error during open
303 touch $DIR/$tdir/${tfile}
305 #define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
306 do_facet ost1 sysctl -w lustre.fail_loc=0x80000308
307 dd if=/etc/hosts of=$DIR/$tdir/$tfile && \
308 error "didn't fail open enqueue" || true
310 run_test 20b "ldlm_handle_enqueue error (should return error)"
313 mkdir -p $DIR/$tdir-1
314 mkdir -p $DIR/$tdir-2
315 multiop $DIR/$tdir-1/f O_c &
318 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000129"
319 multiop $DIR/$tdir-2/f Oc &
322 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
324 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000115"
325 kill -USR1 $close_pid
327 wait $close_pid || return 1
328 wait $open_pid || return 2
329 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
331 $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
332 $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
336 run_test 21a "drop close request while close and open are both in flight"
339 mkdir -p $DIR/$tdir-1
340 mkdir -p $DIR/$tdir-2
341 multiop $DIR/$tdir-1/f O_c &
344 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107"
345 mcreate $DIR/$tdir-2/f &
348 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
350 kill -USR1 $close_pid
352 wait $close_pid || return 1
353 wait $open_pid || return 3
355 $CHECKSTAT -t file $DIR/$tdir-1/f || return 4
356 $CHECKSTAT -t file $DIR/$tdir-2/f || return 5
359 run_test 21b "drop open request while close and open are both in flight"
362 mkdir -p $DIR/$tdir-1
363 mkdir -p $DIR/$tdir-2
364 multiop $DIR/$tdir-1/f O_c &
367 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107"
368 mcreate $DIR/$tdir-2/f &
371 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
373 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000115"
374 kill -USR1 $close_pid
376 wait $close_pid || return 1
377 wait $open_pid || return 2
379 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
381 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
382 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
385 run_test 21c "drop both request while close and open are both in flight"
388 mkdir -p $DIR/$tdir-1
389 mkdir -p $DIR/$tdir-2
390 multiop $DIR/$tdir-1/f O_c &
393 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000129"
394 multiop $DIR/$tdir-2/f Oc &
396 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
398 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000122"
401 wait $pid || return 1
402 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
404 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
405 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
409 run_test 21d "drop close reply while close and open are both in flight"
412 mkdir -p $DIR/$tdir-1
413 mkdir -p $DIR/$tdir-2
414 multiop $DIR/$tdir-1/f O_c &
417 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119"
418 touch $DIR/$tdir-2/f &
420 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
424 wait $pid || return 1
427 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
428 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
431 run_test 21e "drop open reply while close and open are both in flight"
434 mkdir -p $DIR/$tdir-1
435 mkdir -p $DIR/$tdir-2
436 multiop $DIR/$tdir-1/f O_c &
439 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119"
440 touch $DIR/$tdir-2/f &
442 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
444 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000122"
447 wait $pid || return 1
448 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
450 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
451 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
454 run_test 21f "drop both reply while close and open are both in flight"
457 mkdir -p $DIR/$tdir-1
458 mkdir -p $DIR/$tdir-2
459 multiop $DIR/$tdir-1/f O_c &
462 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119"
463 touch $DIR/$tdir-2/f &
465 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
467 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000115"
470 wait $pid || return 1
471 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
473 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
474 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
477 run_test 21g "drop open reply and close request while close and open are both in flight"
480 mkdir -p $DIR/$tdir-1
481 mkdir -p $DIR/$tdir-2
482 multiop $DIR/$tdir-1/f O_c &
485 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107"
486 touch $DIR/$tdir-2/f &
489 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
491 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000122"
494 wait $pid || return 1
495 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
497 wait $touch_pid || return 2
499 $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
500 $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
503 run_test 21h "drop open request and close reply while close and open are both in flight"
505 # bug 3462 - multiple MDC requests
510 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000115"
515 multiop $f1 msu || return 1
518 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
520 wait $close_pid || return 2
521 rm -rf $f2 || return 4
523 run_test 22 "drop close request and do mknod"
526 multiop $DIR/$tfile O_c &
528 # give a chance for open
532 drop_request "kill -USR1 $pid"
535 wait $pid || return 1
538 run_test 23 "client hang when close a file after mds crash"
540 test_24() { # bug 2248 - eviction fails writeback but app doesn't see it
543 multiop $DIR/$tdir/$tfile Owy_wyc &
548 kill -USR1 $MULTI_PID
551 sysctl -w lustre.fail_loc=0x0
553 [ $rc -eq 0 ] && error "multiop didn't fail fsync: rc $rc" || true
555 run_test 24 "fsync error (should return error)"
557 test_26() { # bug 5921 - evict dead exports by pinger
558 # this test can only run from a client on a separate node.
559 remote_ost || skip "local OST" && return
560 remote_mds || skip "local MDS" && return
561 OST_FILE=$LPROC/obdfilter/${ost1_svc}/num_exports
562 OST_EXP="`do_facet ost1 cat $OST_FILE`"
563 OST_NEXP1=`echo $OST_EXP | cut -d' ' -f2`
564 echo starting with $OST_NEXP1 OST exports
565 # OBD_FAIL_PTLRPC_DROP_RPC 0x505
566 do_facet client sysctl -w lustre.fail_loc=0x505
567 # evictor takes up to 2.25x to evict. But if there's a
568 # race to start the evictor from various obds, the loser
569 # might have to wait for the next ping.
570 echo Waiting for $(($TIMEOUT * 4)) secs
571 sleep $(($TIMEOUT * 4))
572 OST_EXP="`do_facet ost1 cat $OST_FILE`"
573 OST_NEXP2=`echo $OST_EXP | cut -d' ' -f2`
574 echo ending with $OST_NEXP2 OST exports
575 do_facet client sysctl -w lustre.fail_loc=0x0
576 [ $OST_NEXP1 -le $OST_NEXP2 ] && error "client not evicted"
579 run_test 26 "evict dead exports"
581 test_26b() { # bug 10140 - evict dead exports by pinger
583 zconf_mount `hostname` $MOUNT2 || error "Failed to mount $MOUNT2"
584 sleep 1 # wait connections being established
585 MDS_FILE=$LPROC/mdt/${mds1_svc}/num_exports
586 MDS_NEXP1="`do_facet $SINGLEMDS cat $MDS_FILE | cut -d' ' -f2`"
587 OST_FILE=$LPROC/obdfilter/${ost1_svc}/num_exports
588 OST_NEXP1="`do_facet ost1 cat $OST_FILE | cut -d' ' -f2`"
589 echo starting with $OST_NEXP1 OST and $MDS_NEXP1 MDS exports
590 zconf_umount `hostname` $MOUNT2 -f
591 # evictor takes up to 2.25x to evict. But if there's a
592 # race to start the evictor from various obds, the loser
593 # might have to wait for the next ping.
594 echo Waiting for $(($TIMEOUT * 4)) secs
595 sleep $(($TIMEOUT * 4))
596 OST_NEXP2="`do_facet ost1 cat $OST_FILE | cut -d' ' -f2`"
597 MDS_NEXP2="`do_facet $SINGLEMDS cat $MDS_FILE | cut -d' ' -f2`"
598 echo ending with $OST_NEXP2 OST and $MDS_NEXP2 MDS exports
599 [ $OST_NEXP1 -le $OST_NEXP2 ] && error "client not evicted from OST"
600 [ $MDS_NEXP1 -le $MDS_NEXP2 ] && error "client not evicted from MDS"
603 run_test 26b "evict dead exports"
606 remote_mds && { skip "remote MDS" && return 0; }
608 writemany -q -a $DIR/$tdir/$tfile 0 5 &
612 facet_failover $SINGLEMDS
613 #define OBD_FAIL_OSC_SHUTDOWN 0x407
614 sysctl -w lustre.fail_loc=0x80000407
615 # need to wait for reconnect
616 echo -n waiting for fail_loc
617 while [ `sysctl -n lustre.fail_loc` -eq -2147482617 ]; do
621 facet_failover $SINGLEMDS
623 kill -USR1 $CLIENT_PID
627 run_test 27 "fail LOV while using OSC's"
629 test_28() { # bug 6086 - error adding new clients
630 do_facet client mcreate $MOUNT/$tfile || return 1
631 drop_bl_callback "chmod 0777 $MOUNT/$tfile" ||echo "evicted as expected"
632 #define OBD_FAIL_MDS_CLIENT_ADD 0x12f
633 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x8000012f"
634 # fail once (evicted), reconnect fail (fail_loc), ok
635 df || (sleep 10; df) || (sleep 10; df) || error "reconnect failed"
637 fail $SINGLEMDS # verify MDS last_rcvd can be loaded
639 run_test 28 "handle error adding new clients (bug 6086)"
643 # put a load of file creates/writes/deletes
644 writemany -q $DIR/$tdir/$tfile 0 5 &
646 echo writemany pid $CLIENT_PID
650 # wait for client to reconnect to MDS
655 # client process should see no problems even though MDS went down
657 kill -USR1 $CLIENT_PID
660 echo writemany returned $rc
661 #these may fail because of eviction due to slow AST response.
664 run_test 50 "failover MDS under load"
668 # put a load of file creates/writes/deletes
669 writemany -q $DIR/$tdir/$tfile 0 5 &
673 facet_failover $SINGLEMDS
674 # failover at various points during recovery
675 SEQ="1 5 10 $(seq $TIMEOUT 5 $(($TIMEOUT+10)))"
676 echo will failover at $SEQ
679 echo failover in $i sec
681 facet_failover $SINGLEMDS
683 # client process should see no problems even though MDS went down
684 # and recovery was interrupted
686 kill -USR1 $CLIENT_PID
689 echo writemany returned $rc
692 run_test 51 "failover MDS during recovery"
695 do_facet client "writemany -q -a $DIR/$tdir/$tfile 300 5" &
697 echo writemany pid $CLIENT_PID
702 wait $CLIENT_PID || rc=$?
703 # active client process should see an EIO for down OST
704 [ $rc -eq 5 ] && { echo "writemany correctly failed $rc" && return 0; }
705 # but timing or failover setup may allow success
706 [ $rc -eq 0 ] && { echo "writemany succeeded" && return 0; }
707 echo "writemany returned $rc"
715 [ $rc -ne 0 ] && { return $rc; }
716 # wait for client to reconnect to OST
720 [ $rc -ne 0 ] && { return $rc; }
727 run_test 52 "failover OST under load"
729 # test of open reconstruct
732 drop_ldlm_reply "openfile -f O_RDWR:O_CREAT -m 0755 $DIR/$tfile" ||\
735 run_test 53 "touch: drop rep"
738 zconf_mount `hostname` $MOUNT2
742 cat $DIR2/$tfile.missing # save transno = 0, rc != 0 into last_rcvd
745 ERROR=`dmesg | egrep "(test 54|went back in time)" | tail -n1 | grep "went back in time"`
746 [ x"$ERROR" == x ] || error "back in time occured"
748 run_test 54 "back in time"
750 # bug 11330 - liblustre application death during I/O locks up OST
752 remote_ost && { skip "remote OST" && return 0; }
756 # first dd should be finished quickly
757 dd if=/dev/zero of=$DIR/$tdir/$tfile-1 bs=32M count=4 &
760 echo "step1: testing ......"
762 if [ -z `ps x | awk '$1 == '$DDPID' { print $5 }'` ]; then break; fi
764 if [ $count -gt 64 ]; then
765 error "dd should be finished!"
769 echo "(dd_pid=$DDPID, time=$count)successful"
771 #define OBD_FAIL_OST_DROP_REQ 0x21d
772 do_facet ost sysctl -w lustre.fail_loc=0x0000021d
773 # second dd will be never finished
774 dd if=/dev/zero of=$DIR/$tdir/$tfile-2 bs=32M count=4 &
777 echo "step2: testing ......"
778 while [ $count -le 64 ]; do
779 dd_name="`ps x | awk '$1 == '$DDPID' { print $5 }'`"
780 if [ -z $dd_name ]; then
782 echo "debug: (dd_name=$dd_name, dd_pid=$DDPID, time=$count)"
783 error "dd shouldn't be finished!"
788 echo "(dd_pid=$DDPID, time=$count)successful"
790 #Recover fail_loc and dd will finish soon
791 do_facet ost sysctl -w lustre.fail_loc=0
793 echo "step3: testing ......"
795 if [ -z `ps x | awk '$1 == '$DDPID' { print $5 }'` ]; then break; fi
797 if [ $count -gt 500 ]; then
798 error "dd should be finished!"
802 echo "(dd_pid=$DDPID, time=$count)successful"
806 run_test 55 "ost_brw_read/write drops timed-out read/write request"
808 test_56() { # b=11277
809 #define OBD_FAIL_MDS_RESEND 0x136
811 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000136"
813 do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0"
816 run_test 56 "do not allow reconnect to busy exports"
819 # no oscs means no client or mdt
820 while [ -e $LPROC/osc ]; do
821 for f in `find $LPROC -type f`; do
822 cat $f > /dev/null 2>&1
827 test_57() { # bug 10866
831 #define OBD_FAIL_LPROC_REMOVE 0xB00
832 sysctl -w lustre.fail_loc=0x80000B00
833 zconf_umount `hostname` $DIR
834 sysctl -w lustre.fail_loc=0x80000B00
835 fail_abort $SINGLEMDS
837 sysctl -w lustre.fail_loc=0
839 do_facet client "df $DIR"
841 run_test 57 "read procfs entries causes kernel crash"
843 test_58() { # bug 11546
844 #define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801
847 sysctl -w lustre.fail_loc=0x80000801
848 cp $MOUNT/$tfile /dev/null &
851 sysctl -w lustre.fail_loc=0
852 drop_bl_callback rm -f $MOUNT/$tfile
854 do_facet client "df $DIR"
856 run_test 58 "Eviction in the middle of open RPC reply processing"
858 test_59() { # bug 10589
859 zconf_mount `hostname` $MOUNT2 || error "Failed to mount $MOUNT2"
860 #define OBD_FAIL_LDLM_CANCEL_EVICT_RACE 0x311
861 sysctl -w lustre.fail_loc=0x311
862 writes=`dd if=/dev/zero of=$DIR2/$tfile count=1 2>&1 | awk 'BEGIN { FS="+" } /out/ {print $1}'`
863 sysctl -w lustre.fail_loc=0
865 zconf_umount `hostname` $DIR2 -f
866 reads=`dd if=$DIR/$tfile of=/dev/null 2>&1 | awk 'BEGIN { FS="+" } /in/ {print $1}'`
867 [ $reads -eq $writes ] || error "read" $reads "blocks, must be" $writes
869 run_test 59 "Read cancel race on client eviction"
871 equals_msg `basename $0`: test complete, cleaning up
872 check_and_cleanup_lustre
873 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG || true