9 LUSTRE=${LUSTRE:-`dirname $0`/..}
11 . $LUSTRE/tests/test-framework.sh
15 . ${CONFIG:=$LUSTRE/tests/cfg/lmv.sh}
21 # Allow us to override the setup if we already have a mounted system by
22 # setting SETUP=" " and CLEANUP=" "
23 SETUP=${SETUP:-"setup"}
24 CLEANUP=${CLEANUP:-"cleanup"}
29 if [ "$MDSCOUNT" -gt 1 ]; then
31 for mds in `mds_list`; do
32 MDSDEV=$TMP/${mds}-`hostname`
33 add_mds $mds --dev $MDSDEV --size $MDSSIZE --lmv lmv1_svc
35 add_lov_to_lmv lov1 lmv1_svc --stripe_sz $STRIPE_BYTES \
36 --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
39 add_mds mds1 --dev $MDSDEV --size $MDSSIZE
40 add_lov lov1 mds1 --stripe_sz $STRIPE_BYTES \
41 --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
46 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE
47 add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE
48 add_client client ${MDS} --lov lov1 --path $MOUNT
53 start_krb5_kdc || exit 1
54 start_lsvcgssd || exit 2
56 start ost --reformat $OSTLCONFARGS
57 start ost2 --reformat $OSTLCONFARGS
58 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
59 for mds in `mds_list`; do
60 start $mds --reformat $MDSLCONFARGS
62 grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
66 zconf_umount `hostname` $MOUNT
67 for mds in `mds_list`; do
68 stop $mds ${FORCE} $MDSLCONFARGS
70 stop ost2 ${FORCE} --dump cleanup.log
71 stop ost ${FORCE} --dump cleanup.log
76 if [ ! -z "$EVAL" ]; then
81 if [ "$ONLY" == "setup" ]; then
86 if [ "$ONLY" == "cleanup" ]; then
87 sysctl -w portals.debug=0 || true
92 REFORMAT=--reformat $SETUP
95 [ "$ONLY" == "setup" ] && exit
98 drop_request "mcreate $MOUNT/1" || return 1
99 drop_reint_reply "mcreate $MOUNT/2" || return 2
101 run_test 1 "mcreate: drop req, drop rep"
104 drop_request "tchmod 111 $MOUNT/2" || return 1
105 drop_reint_reply "tchmod 666 $MOUNT/2" || return 2
107 run_test 2 "chmod: drop req, drop rep"
110 drop_request "statone $MOUNT/2" || return 1
111 drop_reply "statone $MOUNT/2" || return 2
113 run_test 3 "stat: drop req, drop rep"
116 do_facet client "cp /etc/resolv.conf $MOUNT/resolv.conf" || return 1
117 drop_request "cat $MOUNT/resolv.conf > /dev/null" || return 2
118 drop_reply "cat $MOUNT/resolv.conf > /dev/null" || return 3
120 run_test 4 "open: drop req, drop rep"
123 drop_request "mv $MOUNT/resolv.conf $MOUNT/renamed" || return 1
124 drop_reint_reply "mv $MOUNT/renamed $MOUNT/renamed-again" || return 2
125 do_facet client "checkstat -v $MOUNT/renamed-again" || return 3
127 run_test 5 "rename: drop req, drop rep"
130 drop_request "mlink $MOUNT/renamed-again $MOUNT/link1" || return 1
131 drop_reint_reply "mlink $MOUNT/renamed-again $MOUNT/link2" || return 2
133 run_test 6 "link: drop req, drop rep"
136 drop_request "munlink $MOUNT/link1" || return 1
137 drop_reint_reply "munlink $MOUNT/link2" || return 2
139 run_test 7 "unlink: drop req, drop rep"
143 drop_reint_reply "touch $MOUNT/renamed" || return 1
145 run_test 8 "touch: drop rep (bug 1423)"
149 pause_bulk "cp /etc/profile $MOUNT" || return 1
150 do_facet client "cp /etc/termcap $MOUNT" || return 2
151 do_facet client "sync"
152 do_facet client "rm $MOUNT/termcap $MOUNT/profile" || return 3
154 run_test 9 "pause bulk on OST (bug 1420)"
158 do_facet client mcreate $MOUNT/f10 || return 1
159 drop_bl_callback "chmod 0777 $MOUNT/f10" || return 2
160 # wait for the mds to evict the client
161 #echo "sleep $(($TIMEOUT*2))"
162 #sleep $(($TIMEOUT*2))
163 do_facet client touch $MOUNT/f10 || echo "touch failed, evicted"
164 do_facet client checkstat -v -p 0777 $MOUNT/f10 || return 3
165 do_facet client "munlink $MOUNT/f10"
167 run_test 10 "finish request on server after client eviction (bug 1521)"
170 # wake up a thead waiting for completion after eviction
172 do_facet client multiop $MOUNT/$tfile Ow || return 1
173 do_facet client multiop $MOUNT/$tfile or || return 2
177 do_facet client multiop $MOUNT/$tfile or || return 3
178 drop_bl_callback multiop $MOUNT/$tfile Ow ||
179 echo "client evicted, as expected"
181 do_facet client munlink $MOUNT/$tfile || return 4
183 run_test 11 "wake up a thead waiting for completion after eviction (b=2460)"
187 $LCTL mark multiop $MOUNT/$tfile OS_c
188 do_facet mds "sysctl -w lustre.fail_loc=0x115"
189 clear_failloc mds $((TIMEOUT * 2)) &
190 multiop $MOUNT/$tfile OS_c &
192 #define OBD_FAIL_MDS_CLOSE_NET 0x115
195 cancel_lru_locks MDC # force the close
196 echo "waiting for multiop $PID"
197 wait $PID || return 2
198 do_facet client munlink $MOUNT/$tfile || return 3
200 run_test 12 "recover from timed out resend in ptlrpcd (b=2494)"
202 # Bug 113, check that readdir lost recv timeout works.
204 mkdir /mnt/lustre/readdir || return 1
205 touch /mnt/lustre/readdir/newentry || return
206 # OBD_FAIL_MDS_READPAGE_NET|OBD_FAIL_ONCE
207 do_facet mds "sysctl -w lustre.fail_loc=0x80000104"
208 ls /mnt/lustre/readdir || return 3
209 do_facet mds "sysctl -w lustre.fail_loc=0"
210 rm -rf /mnt/lustre/readdir || return 4
212 run_test 13 "mdc_readpage restart test (bug 1138)"
214 # Bug 113, check that readdir lost send timeout works.
216 mkdir /mnt/lustre/readdir
217 touch /mnt/lustre/readdir/newentry
218 # OBD_FAIL_MDS_SENDPAGE|OBD_FAIL_ONCE
219 do_facet mds "sysctl -w lustre.fail_loc=0x80000106"
220 ls /mnt/lustre/readdir || return 1
221 do_facet mds "sysctl -w lustre.fail_loc=0"
223 run_test 14 "mdc_readpage resend test (bug 1138)"
226 do_facet mds "sysctl -w lustre.fail_loc=0x80000128"
227 touch $DIR/$tfile && return 1
230 run_test 15 "failed open (-ENOMEM)"
233 for f in /proc/fs/lustre/llite/*/read_ahead; do
239 for f in /proc/fs/lustre/llite/*/read_ahead; do
244 # recovery timeout. This actually should be taken from
249 do_facet client cp /etc/termcap $MOUNT
253 #define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 | OBD_FAIL_ONCE
254 sysctl -w lustre.fail_loc=0x80000504
256 # will get evicted here
257 do_facet client "cmp /etc/termcap $MOUNT/termcap" && return 1
258 sysctl -w lustre.fail_loc=0
259 # give recovery a chance to finish (shouldn't take long)
261 do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 2
264 run_test 16 "timeout bulk put, evict client (2732)"
267 # OBD_FAIL_PTLRPC_BULK_GET_NET 0x0503 | OBD_FAIL_ONCE
268 # client will get evicted here
269 sysctl -w lustre.fail_loc=0x80000503
270 do_facet client cp /etc/termcap $DIR/$tfile
273 sysctl -w lustre.fail_loc=0
274 do_facet client "df $DIR"
276 do_facet client "cmp /etc/termcap $DIR/$tfile" && return 1
277 do_facet client "rm $DIR/$tfile" || return 2
280 run_test 17 "timeout bulk get, evict client (2732)"
283 do_facet client mkdir -p $MOUNT/$tdir
284 f=$MOUNT/$tdir/$tfile
287 pgcache_empty || return 1
290 lfs setstripe $f $((128 * 1024)) 1 1
292 do_facet client cp /etc/termcap $f
294 local osc2_dev=`$LCTL device_list | \
295 awk '(/ost2.*client_facet/){print $4}' `
296 $LCTL --device %$osc2_dev deactivate
297 # my understanding is that there should be nothing in the page
298 # cache after the client reconnects?
300 pgcache_empty || rc=2
301 $LCTL --device %$osc2_dev activate
305 run_test 18a "manual ost invalidate clears page cache immediately"
308 # OBD_FAIL_PTLRPC_BULK_PUT_NET|OBD_FAIL_ONCE
309 do_facet client mkdir -p $MOUNT/$tdir
310 f=$MOUNT/$tdir/$tfile
311 f2=$MOUNT/$tdir/${tfile}-2
314 pgcache_empty || return 1
316 # shouldn't have to set stripe size of count==1
317 lfs setstripe $f $((128 * 1024)) 0 1
318 lfs setstripe $f2 $((128 * 1024)) 0 1
320 do_facet client cp /etc/termcap $f
322 # just use this write to trigger the client's eviction from the ost
323 sysctl -w lustre.fail_loc=0x80000503
324 do_facet client dd if=/dev/zero of=$f2 bs=4k count=1
326 sysctl -w lustre.fail_loc=0
327 # allow recovery to complete
328 sleep $((TIMEOUT + 2))
329 # my understanding is that there should be nothing in the page
330 # cache after the client reconnects?
332 pgcache_empty || rc=2
336 run_test 18b "eviction and reconnect clears page cache (2766)"
340 do_facet client mcreate $f || return 1
341 drop_ldlm_cancel "chmod 0777 $f" || echo evicted
343 do_facet client checkstat -v -p 0777 $f || echo evicted
344 # let the client reconnect
346 do_facet client "munlink $f"
348 run_test 19a "test expired_lock_main on mds (2867)"
352 do_facet client multiop $f Ow || return 1
353 do_facet client multiop $f or || return 2
357 do_facet client multiop $f or || return 3
358 drop_ldlm_cancel multiop $f Ow || echo "client evicted, as expected"
360 do_facet client munlink $f || return 4
362 run_test 19b "test expired_lock_main on ost (2867)"
364 test_20a() { # bug 2983 - ldlm_handle_enqueue cleanup
366 multiop $DIR/$tdir/${tfile} O_wc &
370 #define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
371 do_facet ost sysctl -w lustre.fail_loc=0x80000308
372 kill -USR1 $MULTI_PID
375 [ $rc -eq 0 ] && error "multiop didn't fail enqueue: rc $rc" || true
377 run_test 20a "ldlm_handle_enqueue error (should return error)"
379 test_20b() { # bug 2986 - ldlm_handle_enqueue error during open
381 touch $DIR/$tdir/${tfile}
383 #define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
384 do_facet ost sysctl -w lustre.fail_loc=0x80000308
385 dd if=/etc/hosts of=$DIR/$tdir/$tfile && \
386 error "didn't fail open enqueue" || true
388 run_test 20b "ldlm_handle_enqueue error (should return error)"
391 mkdir -p $DIR/$tdir-1
392 mkdir -p $DIR/$tdir-2
393 multiop $DIR/$tdir-1/f O_c &
396 do_facet mds "sysctl -w lustre.fail_loc=0x80000129"
397 multiop $DIR/$tdir-2/f Oc &
400 do_facet mds "sysctl -w lustre.fail_loc=0"
402 do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
403 kill -USR1 $close_pid
404 cancel_lru_locks MDC # force the close
405 wait $close_pid || return 1
406 wait $open_pid || return 2
407 do_facet mds "sysctl -w lustre.fail_loc=0"
409 $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
410 $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
414 run_test 21a "drop close request while close and open are both in flight"
417 mkdir -p $DIR/$tdir-1
418 mkdir -p $DIR/$tdir-2
419 multiop $DIR/$tdir-1/f O_c &
422 do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
423 mcreate $DIR/$tdir-2/f &
426 do_facet mds "sysctl -w lustre.fail_loc=0"
428 kill -USR1 $close_pid
429 cancel_lru_locks MDC # force the close
430 wait $close_pid || return 1
431 wait $open_pid || return 3
433 $CHECKSTAT -t file $DIR/$tdir-1/f || return 4
434 $CHECKSTAT -t file $DIR/$tdir-2/f || return 5
437 run_test 21b "drop open request while close and open are both in flight"
440 mkdir -p $DIR/$tdir-1
441 mkdir -p $DIR/$tdir-2
442 multiop $DIR/$tdir-1/f O_c &
445 do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
446 mcreate $DIR/$tdir-2/f &
449 do_facet mds "sysctl -w lustre.fail_loc=0"
451 do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
452 kill -USR1 $close_pid
453 cancel_lru_locks MDC # force the close
454 wait $close_pid || return 1
455 wait $open_pid || return 2
457 do_facet mds "sysctl -w lustre.fail_loc=0"
459 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
460 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
463 run_test 21c "drop both request while close and open are both in flight"
466 mkdir -p $DIR/$tdir-1
467 mkdir -p $DIR/$tdir-2
468 multiop $DIR/$tdir-1/f O_c &
471 do_facet mds "sysctl -w lustre.fail_loc=0x80000129"
472 multiop $DIR/$tdir-2/f Oc &
474 do_facet mds "sysctl -w lustre.fail_loc=0"
476 do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
478 cancel_lru_locks MDC # force the close
479 wait $pid || return 1
480 do_facet mds "sysctl -w lustre.fail_loc=0"
482 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
483 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
487 run_test 21d "drop close reply while close and open are both in flight"
490 mkdir -p $DIR/$tdir-1
491 mkdir -p $DIR/$tdir-2
492 multiop $DIR/$tdir-1/f O_c &
495 do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
496 touch $DIR/$tdir-2/f &
498 do_facet mds "sysctl -w lustre.fail_loc=0"
501 cancel_lru_locks MDC # force the close
502 wait $pid || return 1
505 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
506 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
509 run_test 21e "drop open reply while close and open are both in flight"
512 mkdir -p $DIR/$tdir-1
513 mkdir -p $DIR/$tdir-2
514 multiop $DIR/$tdir-1/f O_c &
517 do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
518 touch $DIR/$tdir-2/f &
520 do_facet mds "sysctl -w lustre.fail_loc=0"
522 do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
524 cancel_lru_locks MDC # force the close
525 wait $pid || return 1
526 do_facet mds "sysctl -w lustre.fail_loc=0"
528 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
529 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
532 run_test 21f "drop both reply while close and open are both in flight"
535 mkdir -p $DIR/$tdir-1
536 mkdir -p $DIR/$tdir-2
537 multiop $DIR/$tdir-1/f O_c &
540 do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
541 touch $DIR/$tdir-2/f &
543 do_facet mds "sysctl -w lustre.fail_loc=0"
545 do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
547 cancel_lru_locks MDC # force the close
548 wait $pid || return 1
549 do_facet mds "sysctl -w lustre.fail_loc=0"
551 $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
552 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
555 run_test 21g "drop open reply and close request while close and open are both in flight"
558 mkdir -p $DIR/$tdir-1
559 mkdir -p $DIR/$tdir-2
560 multiop $DIR/$tdir-1/f O_c &
563 do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
564 touch $DIR/$tdir-2/f &
567 do_facet mds "sysctl -w lustre.fail_loc=0"
569 do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
570 cancel_lru_locks MDC # force the close
572 wait $pid || return 1
573 do_facet mds "sysctl -w lustre.fail_loc=0"
575 wait $touch_pid || return 2
577 $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
578 $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
581 run_test 21h "drop open request and close reply while close and open are both in flight"
583 # bug 3462 - multiple MDC requests
588 do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
593 multiop $f1 msu || return 1
595 cancel_lru_locks MDC # force the close
596 do_facet mds "sysctl -w lustre.fail_loc=0"
598 wait $close_pid || return 2
599 rm -rf $f2 || return 4
601 run_test 22 "drop close request and do mknod"
604 multiop $DIR/$tfile O_c &
606 # give a chance for open
610 drop_request "kill -USR1 $pid"
613 wait $pid || return 1
616 #run_test 23 "client hang when close a file after mds crash"