6 ALWAYS_EXCEPT="2 3c 4b 4c 10 $REPLAY_VBR_EXCEPT"
9 PTLDEBUG=${PTLDEBUG:--1}
10 LUSTRE=${LUSTRE:-`dirname $0`/..}
12 CLEANUP=${CLEANUP:-""}
13 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
19 [ -n "$CLIENTS" ] || { skip_env "Need two or more clients" && exit 0; }
20 [ $CLIENTCOUNT -ge 2 ] || \
21 { skip_env "Need two or more remote clients, have $CLIENTCOUNT" && exit 0; }
22 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
24 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
27 [ ! "$NAME" = "ncli" ] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT"
28 [ "$NAME" = "ncli" ] && MOUNT_2=""
32 check_and_setup_lustre
33 rm -rf $DIR/[df][0-9]*
35 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
42 # We need to run do_node in bg, because pdsh does not exit
43 # if child process of run script exists.
44 # I.e. pdsh does not exit when runmultiop_bg_pause exited,
45 # because of multiop_bg_pause -> $MULTIOP_PROG &
46 # By the same reason we need sleep a bit after do_nodes starts
47 # to let runmultiop_bg_pause start muliop and
48 # update /tmp/multiop_bg.pid ;
49 # The rm /tmp/multiop_bg.pid guarantees here that
50 # we have the updated by runmultiop_bg_pause
51 # /tmp/multiop_bg.pid file
53 local pid_file=$TMP/multiop_bg.pid.$$
54 do_node $client "rm -f $pid_file && MULTIOP_PID_FILE=$pid_file LUSTRE= runmultiop_bg_pause $file $cmds" &
58 multiop_pid=$(do_node $client cat $pid_file)
59 [ -n "$multiop_pid" ] || error "$client : Can not get multiop_pid from $pid_file "
60 eval export $(client_var_name $client)_multiop_pid=$multiop_pid
61 eval export $(client_var_name $client)_do_node_pid=$pid
62 local var=$(client_var_name $client)_multiop_pid
63 echo client $client multiop_bg started multiop_pid=${!var}
69 local multiop_pid=$(client_var_name $client)_multiop_pid
70 local do_node_pid=$(client_var_name $client)_do_node_pid
72 echo "Stopping multiop_pid=${!multiop_pid} (kill ${!multiop_pid} on $client)"
73 do_node $client kill -USR1 ${!multiop_pid}
83 fid=$(do_node $client $LFS path2fid $file)
84 do_facet mds $LCTL --device $mds_svc getobjversion $fid
88 local file=$DIR/$tfile
92 do_node $CLIENT1 mcreate $file
93 pre=$(get_version $CLIENT1 $file)
94 do_node $CLIENT1 openfile -f O_RDWR $file
95 post=$(get_version $CLIENT1 $file)
96 if (($pre != $post)); then
97 error "version changed unexpectedly: pre $pre, post $post"
100 run_test 0a "VBR: open and close do not change versions"
103 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
104 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
107 do_node $CLIENT2 chmod 777 $DIR/$tdir
108 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT $DIR/$tdir/$tfile
109 zconf_umount $CLIENT2 $MOUNT
112 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
113 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
114 error "open succeeded unexpectedly"
116 zconf_mount $CLIENT2 $MOUNT
118 run_test 0b "VBR: open (O_CREAT) checks version of parent"
121 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
122 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
123 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tdir/$tfile
126 do_node $CLIENT2 chmod 777 $DIR/$tdir
127 do_node $CLIENT2 chmod 666 $DIR/$tdir/$tfile
128 rmultiop_start $CLIENT1 $DIR/$tdir/$tfile o_c
129 zconf_umount $CLIENT2 $MOUNT
131 client_up $CLIENT1 || error "$CLIENT1 evicted"
133 rmultiop_stop $CLIENT1 || error "close failed"
134 zconf_mount $CLIENT2 $MOUNT
136 run_test 0c "VBR: open (non O_CREAT) does not checks versions"
142 pre=$(get_version $CLIENT1 $DIR)
143 do_node $CLIENT1 mkfifo $DIR/$tfile
144 post=$(get_version $CLIENT1 $DIR)
145 if (($pre == $post)); then
146 error "version not changed: pre $pre, post $post"
149 run_test 0d "VBR: create changes version of parent"
152 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
153 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
156 do_node $CLIENT2 chmod 777 $DIR/$tdir
157 do_node $CLIENT1 mkfifo $DIR/$tdir/$tfile
158 zconf_umount $CLIENT2 $MOUNT
161 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
162 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
163 error "create succeeded unexpectedly"
165 zconf_mount $CLIENT2 $MOUNT
167 run_test 0e "VBR: create checks version of parent"
173 do_node $CLIENT1 mcreate $DIR/$tfile
174 pre=$(get_version $CLIENT1 $DIR)
175 do_node $CLIENT1 rm $DIR/$tfile
176 post=$(get_version $CLIENT1 $DIR)
177 if (($pre == $post)); then
178 error "version not changed: pre $pre, post $post"
181 run_test 0f "VBR: unlink changes version of parent"
184 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
185 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
186 do_node $CLIENT1 mcreate $DIR/$tdir/$tfile
189 do_node $CLIENT2 chmod 777 $DIR/$tdir
190 do_node $CLIENT1 rm $DIR/$tdir/$tfile
191 zconf_umount $CLIENT2 $MOUNT
194 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
195 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
196 error "unlink succeeded unexpectedly"
198 zconf_mount $CLIENT2 $MOUNT
200 run_test 0g "VBR: unlink checks version of parent"
203 local file=$DIR/$tfile
207 do_node $CLIENT1 mcreate $file
208 pre=$(get_version $CLIENT1 $file)
209 do_node $CLIENT1 chown $RUNAS_ID $file
210 post=$(get_version $CLIENT1 $file)
211 if (($pre == $post)); then
212 error "version not changed: pre $pre, post $post"
215 run_test 0h "VBR: setattr of UID changes versions"
218 local file=$DIR/$tfile
222 do_node $CLIENT1 mcreate $file
223 pre=$(get_version $CLIENT1 $file)
224 do_node $CLIENT1 chown :$RUNAS_ID $file
225 post=$(get_version $CLIENT1 $file)
226 if (($pre == $post)); then
227 error "version not changed: pre $pre, post $post"
230 run_test 0i "VBR: setattr of GID changes versions"
233 local file=$DIR/$tfile
235 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
236 do_node $CLIENT1 mcreate $file
239 do_node $CLIENT2 chown :$RUNAS_ID $file
240 do_node $CLIENT1 chown $RUNAS_ID $file
241 zconf_umount $CLIENT2 $MOUNT
244 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
245 if ! do_node $CLIENT1 $CHECKSTAT -u \\\#$UID $file; then
246 error "setattr of UID succeeded unexpectedly"
248 zconf_mount $CLIENT2 $MOUNT
250 run_test 0j "VBR: setattr of UID checks versions"
253 local file=$DIR/$tfile
255 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
256 do_node $CLIENT1 mcreate $file
259 do_node $CLIENT2 chown $RUNAS_ID $file
260 do_node $CLIENT1 chown :$RUNAS_ID $file
261 zconf_umount $CLIENT2 $MOUNT
264 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
265 if ! do_node $CLIENT1 $CHECKSTAT -g \\\#$UID $file; then
266 error "setattr of GID succeeded unexpectedly"
268 zconf_mount $CLIENT2 $MOUNT
270 run_test 0k "VBR: setattr of GID checks versions"
273 local file=$DIR/$tfile
277 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
278 pre=$(get_version $CLIENT1 $file)
279 do_node $CLIENT1 chmod 666 $file
280 post=$(get_version $CLIENT1 $file)
281 if (($pre == $post)); then
282 error "version not changed: pre $pre, post $post"
285 run_test 0l "VBR: setattr of permission changes versions"
288 local file=$DIR/$tfile
290 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
291 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
294 do_node $CLIENT2 chown :$RUNAS_ID $file
295 do_node $CLIENT1 chmod 666 $file
296 zconf_umount $CLIENT2 $MOUNT
299 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
300 if ! do_node $CLIENT1 $CHECKSTAT -p 0644 $file; then
301 error "setattr of permission succeeded unexpectedly"
303 zconf_mount $CLIENT2 $MOUNT
305 run_test 0m "VBR: setattr of permission checks versions"
308 local file=$DIR/$tfile
312 do_node $CLIENT1 mcreate $file
313 pre=$(get_version $CLIENT1 $file)
314 do_node $CLIENT1 chattr +i $file
315 post=$(get_version $CLIENT1 $file)
316 do_node $CLIENT1 chattr -i $file
317 if (($pre == $post)); then
318 error "version not changed: pre $pre, post $post"
321 run_test 0n "VBR: setattr of flags changes versions"
329 if ((${#attr} != 1)); then
330 error "checking multiple attributes not implemented yet"
332 do_node $client lsattr $file | cut -d ' ' -f 1 | grep -q $attr
336 local file=$DIR/$tfile
339 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
340 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
343 do_node $CLIENT2 chmod 666 $file
344 do_node $CLIENT1 chattr +i $file
345 zconf_umount $CLIENT2 $MOUNT
348 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
349 checkattr $CLIENT1 i $file
351 do_node $CLIENT1 chattr -i $file
352 if [ $rc -eq 0 ]; then
353 error "setattr of flags succeeded unexpectedly"
355 zconf_mount $CLIENT2 $MOUNT
357 run_test 0o "VBR: setattr of flags checks versions"
360 local file=$DIR/$tfile
365 ad_orig=$(do_facet mds "$LCTL get_param *.${mds_svc}.atime_diff")
366 do_facet mds "$LCTL set_param *.${mds_svc}.atime_diff=0"
367 do_node $CLIENT1 mcreate $file
368 pre=$(get_version $CLIENT1 $file)
369 do_node $CLIENT1 touch $file
370 post=$(get_version $CLIENT1 $file)
372 # We don't fail MDS in this test. atime_diff shall be
373 # restored to its original value.
375 do_facet mds "$LCTL set_param $ad_orig"
376 if (($pre != $post)); then
377 error "version changed unexpectedly: pre $pre, post $post"
380 run_test 0p "VBR: setattr of times does not change versions"
383 local file=$DIR/$tfile
387 do_node $CLIENT1 mcreate $file
388 pre=$(get_version $CLIENT1 $file)
389 do_node $CLIENT1 truncate $file 1
390 post=$(get_version $CLIENT1 $file)
391 if (($pre != $post)); then
392 error "version changed unexpectedly: pre $pre, post $post"
395 run_test 0q "VBR: setattr of size does not change versions"
398 local file=$DIR/$tfile
403 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
404 do_facet mds "$LCTL set_param *.${mds_svc}.atime_diff=0"
405 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
408 do_node $CLIENT2 chmod 666 $file
409 do_node $CLIENT1 truncate $file 1
411 mtime_pre=$(do_node $CLIENT1 stat --format=%Y $file)
412 do_node $CLIENT1 touch $file
413 mtime_post=$(do_node $CLIENT1 stat --format=%Y $file)
414 zconf_umount $CLIENT2 $MOUNT
417 client_up $CLIENT1 || error "$CLIENT1 evicted"
418 if (($mtime_pre >= $mtime_post)); then
419 error "time not changed: pre $mtime_pre, post $mtime_post"
421 if ! do_node $CLIENT1 $CHECKSTAT -s 1 $file; then
422 error "setattr of size failed"
424 mtime=$(do_node $CLIENT1 stat --format=%Y $file)
425 if (($mtime != $mtime_post)); then
426 error "setattr of times failed: expected $mtime_post, got $mtime"
428 zconf_mount $CLIENT2 $MOUNT
430 run_test 0r "VBR: setattr of times and size does not check versions"
438 do_node $CLIENT1 mcreate $DIR/$tfile
439 do_node $CLIENT1 mkdir -p $DIR/$tdir
440 pre=$(get_version $CLIENT1 $DIR/$tfile)
441 tp_pre=$(get_version $CLIENT1 $DIR/$tdir)
442 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
443 post=$(get_version $CLIENT1 $DIR/$tfile)
444 tp_post=$(get_version $CLIENT1 $DIR/$tdir)
445 if (($pre == $post)); then
446 error "version of source not changed: pre $pre, post $post"
448 if (($tp_pre == $tp_post)); then
449 error "version of target parent not changed: pre $tp_pre, post $tp_post"
452 run_test 0s "VBR: link changes versions of source and target parent"
455 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
456 do_node $CLIENT1 mcreate $DIR/$tfile
457 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
460 do_node $CLIENT2 chmod 777 $DIR/$tdir
461 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
462 zconf_umount $CLIENT2 $MOUNT
465 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
466 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
467 error "link should fail"
469 zconf_mount $CLIENT2 $MOUNT
471 run_test 0t "VBR: link checks version of target parent"
474 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
475 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile
476 do_node $CLIENT1 mkdir -p $DIR/$tdir
479 do_node $CLIENT2 chmod 666 $DIR/$tfile
480 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
481 zconf_umount $CLIENT2 $MOUNT
484 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
485 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
486 error "link should fail"
488 zconf_mount $CLIENT2 $MOUNT
490 run_test 0u "VBR: link checks version of source"
498 do_node $CLIENT1 mcreate $DIR/$tfile
499 do_node $CLIENT1 mkdir -p $DIR/$tdir
500 sp_pre=$(get_version $CLIENT1 $DIR)
501 tp_pre=$(get_version $CLIENT1 $DIR/$tdir)
502 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
503 sp_post=$(get_version $CLIENT1 $DIR)
504 tp_post=$(get_version $CLIENT1 $DIR/$tdir)
505 if (($sp_pre == $sp_post)); then
506 error "version of source parent not changed: pre $sp_pre, post $sp_post"
508 if (($tp_pre == $tp_post)); then
509 error "version of target parent not changed: pre $tp_pre, post $tp_post"
512 run_test 0v "VBR: rename changes versions of source parent and target parent"
518 do_node $CLIENT1 mcreate $DIR/$tfile
519 pre=$(get_version $CLIENT1 $DIR)
520 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tfile-new
521 post=$(get_version $CLIENT1 $DIR)
522 if (($pre == $post)); then
523 error "version of parent not changed: pre $pre, post $post"
526 run_test 0w "VBR: rename within same dir changes version of parent"
529 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
530 do_node $CLIENT1 mcreate $DIR/$tfile
531 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
534 do_node $CLIENT2 chmod 777 $DIR
535 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
536 zconf_umount $CLIENT2 $MOUNT
539 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
540 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
541 error "rename should fail"
543 zconf_mount $CLIENT2 $MOUNT
545 run_test 0x "VBR: rename checks version of source parent"
548 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
549 do_node $CLIENT1 mcreate $DIR/$tfile
550 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
553 do_node $CLIENT2 chmod 777 $DIR/$tdir
554 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
555 zconf_umount $CLIENT2 $MOUNT
558 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
559 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
560 error "rename should fail"
562 zconf_mount $CLIENT2 $MOUNT
564 run_test 0y "VBR: rename checks version of target parent"
566 [ "$CLIENTS" ] && zconf_umount_clients $CLIENTS $DIR
569 echo "mount client $CLIENT1,$CLIENT2..."
570 zconf_mount_clients $CLIENT1 $DIR
571 zconf_mount_clients $CLIENT2 $DIR
573 do_node $CLIENT2 mkdir -p $DIR/$tdir
575 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
576 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 1
577 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
578 zconf_umount $CLIENT2 $DIR
581 # recovery shouldn't fail due to missing client 2
582 client_up $CLIENT1 || return 1
584 # All 50 files should have been replayed
585 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
586 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
588 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
589 [ -e $DIR/$tdir/$tfile-2-0 ] && error "$tfile-2-0 exists"
591 zconf_umount_clients $CLIENTS $DIR
594 run_test 1 "VBR: client during replay doesn't affect another one"
596 test_2a() { # was test_2
597 #ls -al $DIR/$tdir/$tfile
599 zconf_mount_clients $CLIENT1 $DIR
600 zconf_mount_clients $CLIENT2 $DIR
602 do_node $CLIENT2 mkdir -p $DIR/$tdir
604 do_node $CLIENT2 mcreate $DIR/$tdir/$tfile
605 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
606 #do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 1
607 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile
608 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
609 zconf_umount $CLIENT2 $DIR
612 # recovery shouldn't fail due to missing client 2
613 client_up $CLIENT1 || return 1
615 # All 50 files should have been replayed
616 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
617 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
619 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile && return 4
621 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
623 zconf_umount_clients $CLIENTS $DIR
626 run_test 2a "VBR: lost data due to missed REMOTE client during replay"
629 # This test uses three Lustre clients on two hosts.
631 # Lustre Client 1: $CLIENT1:$MOUNT ($DIR)
632 # Lustre Client 2: $CLIENT2:$MOUNT2 ($DIR2)
633 # Lustre Client 3: $CLIENT2:$MOUNT1 ($DIR1)
639 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
640 zconf_mount $CLIENT1 $MOUNT
641 zconf_mount $CLIENT2 $MOUNT2
642 zconf_mount $CLIENT2 $MOUNT1
643 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile-a
644 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile-b
647 # Save an MDT transaction number before recovery.
649 pre=$(get_version $CLIENT1 $DIR/$tfile-a)
652 # Comments on the replay sequence state the expected result
656 # "U" Unable to replay.
660 do_node $CLIENT1 chmod 666 $DIR/$tfile-a # R
661 do_node $CLIENT2 chmod 666 $DIR1/$tfile-b # R
662 do_node $CLIENT2 chown :$RUNAS_ID $DIR2/$tfile-a # U
663 do_node $CLIENT1 chown $RUNAS_ID $DIR/$tfile-a # J
664 do_node $CLIENT2 truncate $DIR2/$tfile-b 1 # U
665 do_node $CLIENT2 chown :$RUNAS_ID $DIR1/$tfile-b # R
666 do_node $CLIENT1 chown $RUNAS_ID $DIR/$tfile-b # R
667 zconf_umount $CLIENT2 $MOUNT2
670 client_evicted $CLIENT1 || error "$CLIENT1:$MOUNT not evicted"
671 client_up $CLIENT2 || error "$CLIENT2:$MOUNT1 evicted"
674 # Check the MDT epoch. $post must be the first transaction
675 # number assigned after recovery.
677 do_node $CLIENT2 touch $DIR1/$tfile
678 post=$(get_version $CLIENT2 $DIR1/$tfile)
679 if (($(($pre >> 32)) == $((post >> 32)))); then
680 error "epoch not changed: pre $pre, post $post"
682 if (($(($post & 0x00000000ffffffff)) != 1)); then
683 error "transno should restart from one: got $post"
686 do_node $CLIENT2 stat $DIR1/$tfile-a
687 do_node $CLIENT2 stat $DIR1/$tfile-b
689 do_node $CLIENT2 $CHECKSTAT -p 0666 -u \\\#$UID -g \\\#$UID \
690 $DIR1/$tfile-a || error "$DIR/$tfile-a: unexpected state"
691 do_node $CLIENT2 $CHECKSTAT -p 0666 -u \\\#$RUNAS_ID -g \\\#$RUNAS_ID \
692 $DIR1/$tfile-b || error "$DIR/$tfile-b: unexpected state"
694 zconf_umount $CLIENT2 $MOUNT1
695 zconf_umount $CLIENT1 $MOUNT
697 run_test 2b "VBR: 3 clients: some, none, and all reqs replayed"
700 zconf_mount_clients $CLIENT1 $DIR
701 zconf_mount_clients $CLIENT2 $DIR
703 #make sure the time will change
704 do_facet mds "$LCTL set_param *.${mds_svc}.atime_diff=0" || return
705 do_node $CLIENT1 touch $DIR/$tfile
706 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
710 do_node $CLIENT2 touch $DIR/$tfile
711 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
713 do_node $CLIENT1 touch $DIR/$tfile
715 do_node $CLIENT2 rm $DIR/$tfile
716 zconf_umount $CLIENT2 $DIR
719 # recovery shouldn't fail due to missing client 2
720 client_up $CLIENT1 || return 1
721 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile && return 2
723 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
725 zconf_umount_clients $CLIENTS $DIR
729 run_test 3a "VBR: setattr of time/size doesn't change version"
732 zconf_mount_clients $CLIENT1 $DIR
733 zconf_mount_clients $CLIENT2 $DIR
735 #make sure the time will change
736 do_facet mds "$LCTL set_param *.${mds_svc}.atime_diff=0" || return
737 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0" || return
738 do_node $CLIENT1 touch $DIR/$tfile
739 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
743 do_node $CLIENT2 chmod +x $DIR/$tfile
744 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
746 do_node $CLIENT1 chmod -x $DIR/$tfile
747 zconf_umount $CLIENT2 $DIR
750 # recovery should fail due to missing client 2
751 client_evicted $CLIENT1 || return 1
753 do_node $CLIENT1 $CHECKSTAT -p 0755 $DIR/$tfile && return 2
754 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
756 zconf_umount_clients $CLIENTS $DIR
760 run_test 3b "VBR: setattr of permissions changes version"
763 [ "$FAILURE_MODE" = HARD ] || \
764 { skip "The HARD failure is needed" && return 0; }
766 [ $RUNAS_ID -eq $UID ] && skip_env "RUNAS_ID = UID = $UID -- skipping" && return
768 zconf_mount_clients $CLIENT1 $DIR
769 zconf_mount_clients $CLIENT2 $DIR
771 # check that permission changes are synced
772 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=1"
774 do_node $CLIENT1 mkdir -p $DIR/d3c/sub || error
775 #chown -R $RUNAS_ID $MOUNT1/d3
776 do_node $CLIENT1 ls -la $DIR/d3c
778 # only HARD failure will work as we use sync operation
780 do_node $CLIENT2 mcreate $DIR/d3c/$tfile-2
782 do_node $CLIENT1 chmod 0700 $UID $DIR/d3c
784 do_node $CLIENT1 mcreate $DIR/d3c/sub/$tfile
785 do_node $CLIENT1 echo "Top Secret" > $DIR/d3c/sub/$tfile
786 #check user can't access new file
787 do_node $CLIENT2 $RUNAS ls $DIR/d3c && return 3
788 do_node $CLIENT1 $RUNAS ls $DIR/d3c && return 4
789 do_node $CLIENT1 $RUNAS cat $DIR/d3c/sub/$tfile && return 5
791 zconf_umount $CLIENT2 $DIR
794 # recovery shouldn't fail due to missing client 2
795 client_up $CLIENT1 || return 1
797 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
798 do_node $CLIENT1 $RUNAS cat $DIR/d3c/sub/$tfile && return 6
799 do_node $CLIENT2 $RUNAS cat $DIR/d3c/sub/$tfile && return 7
800 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
804 run_test 3c "VBR: permission dependency failure"
806 vbr_deactivate_client() {
808 echo "Deactivating client $client";
809 do_node $client "sysctl -w lustre.fail_loc=0x50d"
812 vbr_activate_client() {
814 echo "Activating client $client";
815 do_node $client "sysctl -w lustre.fail_loc=0x0"
821 [ -z "$(do_node $client lctl dl | grep mdt)" ] && \
822 [ -z "$(do_node $client lctl dl | grep ost)" ]
826 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
828 remote_server $CLIENT2 || \
829 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
831 zconf_mount_clients $CLIENT1 $DIR
832 zconf_mount_clients $CLIENT2 $DIR
834 do_node $CLIENT2 mkdir -p $DIR/$tdir
836 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
837 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
838 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
839 vbr_deactivate_client $CLIENT2
842 client_up $CLIENT1 || return 1
844 # All 50 files should have been replayed
845 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
846 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
848 vbr_activate_client $CLIENT2
849 client_up $CLIENT2 || return 4
850 # All 25 files from client2 should have been replayed
851 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
853 zconf_umount_clients $CLIENTS $DIR
856 run_test 4a "fail MDS, delayed recovery"
859 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
861 remote_server $CLIENT2 || \
862 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
864 zconf_mount_clients $CLIENT1 $DIR
865 zconf_mount_clients $CLIENT2 $DIR
868 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
869 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
870 vbr_deactivate_client $CLIENT2
873 client_up $CLIENT1 || return 1
875 # create another set of files
876 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
878 vbr_activate_client $CLIENT2
879 client_up $CLIENT2 || return 2
881 # All files from should have been replayed
882 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
883 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
884 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
886 zconf_umount_clients $CLIENTS $DIR
888 run_test 4b "fail MDS, normal operation, delayed open recovery"
891 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
893 remote_server $CLIENT2 || \
894 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
896 zconf_mount_clients $CLIENT1 $DIR
897 zconf_mount_clients $CLIENT2 $DIR
900 do_node $CLIENT1 createmany -m $DIR/$tfile- 25
901 do_node $CLIENT2 createmany -m $DIR/$tdir/$tfile-2- 25
902 vbr_deactivate_client $CLIENT2
905 client_up $CLIENT1 || return 1
907 # create another set of files
908 do_node $CLIENT1 createmany -m $DIR/$tfile-3- 25
910 vbr_activate_client $CLIENT2
911 client_up $CLIENT2 || return 2
913 # All files from should have been replayed
914 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
915 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
916 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
918 zconf_umount_clients $CLIENTS $DIR
920 run_test 4c "fail MDS, normal operation, delayed recovery"
923 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
925 remote_server $CLIENT2 || \
926 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
928 zconf_mount_clients $CLIENT1 $DIR
929 zconf_mount_clients $CLIENT2 $DIR
932 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
933 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
934 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 1
935 vbr_deactivate_client $CLIENT2
938 client_evicted $CLIENT1 || return 1
940 vbr_activate_client $CLIENT2
941 client_up $CLIENT2 || return 2
943 # First 25 files should have been replayed
944 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
945 # Third file is failed due to missed client2
946 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-3-0 && error "$tfile-3-0 exists"
947 # file from client2 should exists
948 do_node $CLIENT2 unlinkmany $DIR/$tfile-2- 1 || return 4
950 zconf_umount_clients $CLIENTS $DIR
952 run_test 5a "fail MDS, delayed recovery should fail"
955 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
957 remote_server $CLIENT2 || \
958 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
960 zconf_mount_clients $CLIENT1 $DIR
961 zconf_mount_clients $CLIENT2 $DIR
964 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
965 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
966 vbr_deactivate_client $CLIENT2
969 client_up $CLIENT1 || return 1
970 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
972 # create another set of files
973 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
975 vbr_activate_client $CLIENT2
976 client_evicted $CLIENT2 || return 4
977 # file from client2 should fail
978 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
980 # All 50 files from client 1 should have been replayed
981 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
982 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
984 zconf_umount_clients $CLIENTS $DIR
986 run_test 5b "fail MDS, normal operation, delayed recovery should fail"
989 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
991 remote_server $CLIENT2 || \
992 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
994 zconf_mount_clients $CLIENT1 $DIR
995 zconf_mount_clients $CLIENT2 $DIR
997 do_node $CLIENT2 mkdir -p $DIR/$tdir
999 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
1000 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
1001 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
1002 vbr_deactivate_client $CLIENT2
1005 # replay only 5 requests
1006 do_node $CLIENT2 "sysctl -w lustre.fail_val=5"
1007 #define OBD_FAIL_PTLRPC_REPLAY 0x50e
1008 do_node $CLIENT2 "sysctl -w lustre.fail_loc=0x2000050e"
1010 # vbr_activate_client $CLIENT2
1011 # need way to know that client stops replays
1015 client_up $CLIENT1 || return 1
1017 # All files should have been replayed
1018 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
1019 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
1020 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
1022 zconf_umount_clients $CLIENTS $DIR
1025 run_test 6a "fail MDS, delayed recovery, fail MDS"
1028 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1030 remote_server $CLIENT2 || \
1031 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1033 zconf_mount_clients $CLIENT1 $DIR
1034 zconf_mount_clients $CLIENT2 $DIR
1036 do_node $CLIENT2 mkdir -p $DIR/$tdir
1038 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
1039 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
1040 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
1041 vbr_deactivate_client $CLIENT2
1044 vbr_activate_client $CLIENT2
1045 client_up $CLIENT2 || return 4
1048 client_up $CLIENT1 || return 1
1050 # All files should have been replayed
1051 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
1052 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
1053 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
1055 zconf_umount_clients $CLIENTS $DIR
1058 run_test 7a "fail MDS, delayed recovery, fail MDS"
1061 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1063 remote_server $CLIENT2 || \
1064 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1066 zconf_mount_clients $CLIENT1 $DIR
1067 zconf_mount_clients $CLIENT2 $DIR
1069 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 1
1070 do_node $CLIENT2 rm -f $DIR/$tfile
1072 rmultiop_stop $CLIENT2 || return 2
1074 vbr_deactivate_client $CLIENT2
1076 client_up $CLIENT1 || return 3
1077 #client1 is back and will try to open orphan
1078 vbr_activate_client $CLIENT2
1079 client_up $CLIENT2 || return 4
1081 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1082 zconf_umount_clients $CLIENTS $DIR
1085 run_test 8a "orphans are kept until delayed recovery"
1088 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1090 remote_server $CLIENT2 || \
1091 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1093 zconf_mount_clients $CLIENT1 $DIR
1094 zconf_mount_clients $CLIENT2 $DIR
1096 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 1
1098 do_node $CLIENT1 rm -f $DIR/$tfile
1100 vbr_deactivate_client $CLIENT2
1102 client_up $CLIENT1 || return 2
1103 #client1 is back and will try to open orphan
1104 vbr_activate_client $CLIENT2
1105 client_up $CLIENT2 || return 3
1107 rmultiop_stop $CLIENT2 || return 1
1108 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1109 zconf_umount_clients $CLIENTS $DIR
1112 run_test 8b "open1 | unlink2 X delayed_replay1, close1"
1115 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1117 remote_server $CLIENT2 || \
1118 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1120 zconf_mount_clients $CLIENT1 $DIR
1121 zconf_mount_clients $CLIENT2 $DIR
1123 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 1
1125 do_node $CLIENT1 rm -f $DIR/$tfile
1126 rmultiop_stop $CLIENT2 || return 2
1128 vbr_deactivate_client $CLIENT2
1130 client_up $CLIENT1 || return 3
1131 #client1 is back and will try to open orphan
1132 vbr_activate_client $CLIENT2
1133 client_up $CLIENT2 || return 4
1135 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1136 zconf_umount_clients $CLIENTS $DIR
1139 run_test 8c "open1 | unlink2, close1 X delayed_replay1"
1142 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1144 remote_server $CLIENT2 || \
1145 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1147 zconf_mount_clients $CLIENT1 $DIR
1148 zconf_mount_clients $CLIENT2 $DIR
1150 rmultiop_start $CLIENT1 $DIR/$tfile O_tSc || return 1
1151 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 2
1153 do_node $CLIENT1 rm -f $DIR/$tfile
1154 rmultiop_stop $CLIENT2 || return 3
1155 rmultiop_stop $CLIENT1 || return 4
1157 vbr_deactivate_client $CLIENT2
1159 client_up $CLIENT1 || return 6
1161 #client1 is back and will try to open orphan
1162 vbr_activate_client $CLIENT2
1163 client_up $CLIENT2 || return 8
1165 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1166 zconf_umount_clients $CLIENTS $DIR
1169 run_test 8d "open1, open2 | unlink2, close1, close2 X delayed_replay1"
1172 zconf_mount $CLIENT1 $DIR
1173 zconf_mount $CLIENT2 $DIR
1175 do_node $CLIENT1 mcreate $DIR/$tfile
1176 do_node $CLIENT1 mkdir $DIR/$tfile-2
1178 # missed replay from client1 will lead to recovery by versions
1179 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1180 do_node $CLIENT2 rm $DIR/$tfile || return 1
1181 do_node $CLIENT2 touch $DIR/$tfile || return 2
1183 zconf_umount $CLIENT1 $DIR
1185 client_up $CLIENT2 || return 6
1187 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
1188 zconf_umount_clients $CLIENTS $DIR
1191 run_test 8e "create | unlink, create shouldn't fail"
1194 zconf_mount_clients $CLIENT1 $DIR
1195 zconf_mount_clients $CLIENT2 $DIR
1197 do_node $CLIENT1 touch $DIR/$tfile
1198 do_node $CLIENT1 mkdir $DIR/$tfile-2
1200 # missed replay from client1 will lead to recovery by versions
1201 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1202 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
1203 do_node $CLIENT2 mcreate $DIR/$tfile || return 2
1205 zconf_umount $CLIENT1 $DIR
1207 client_up $CLIENT2 || return 6
1209 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
1210 zconf_umount $CLIENT2 $DIR
1213 run_test 8f "create | unlink, create shouldn't fail"
1216 zconf_mount_clients $CLIENT1 $DIR
1217 zconf_mount_clients $CLIENT2 $DIR
1219 do_node $CLIENT1 touch $DIR/$tfile
1220 do_node $CLIENT1 mkdir $DIR/$tfile-2
1222 # missed replay from client1 will lead to recovery by versions
1223 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1224 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
1225 do_node $CLIENT2 mkdir $DIR/$tfile || return 2
1227 zconf_umount $CLIENT1 $DIR
1229 do_node $CLIENT2 df $DIR || return 6
1231 do_node $CLIENT2 rmdir $DIR/$tfile || error "$tfile doesn't exists"
1232 zconf_umount $CLIENT2 $DIR
1235 run_test 8g "create | unlink, create shouldn't fail"
1238 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1240 [ -z "$DBENCH_LIB" ] && skip_env "DBENCH_LIB is not set" && return 0
1242 zconf_mount_clients $CLIENTS $DIR
1244 local duration="-t 60"
1245 local cmd="rundbench 1 $duration "
1247 for CLIENT in ${CLIENTS//,/ }; do
1248 $PDSH $CLIENT "set -x; PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests/:${DBENCH_LIB} DBENCH_LIB=${DBENCH_LIB} $cmd" &
1250 echo $PID >pid.$CLIENT
1251 echo "Started load PID=`cat pid.$CLIENT`"
1255 sleep 3 # give clients a time to do operations
1257 vbr_deactivate_client $CLIENT2
1259 log "$TESTNAME fail mds 1"
1262 # wait for client to reconnect to MDS
1265 vbr_activate_client $CLIENT2
1266 client_up $CLIENT2 || return 4
1268 for CLIENT in ${CLIENTS//,/ }; do
1269 PID=`cat pid.$CLIENT`
1272 echo "load on ${CLIENT} returned $rc"
1275 zconf_umount_clients $CLIENTS $DIR
1277 run_test 10 "mds version recovery; $CLIENTCOUNT clients"
1279 [ "$CLIENTS" ] && zconf_mount_clients $CLIENTS $DIR
1281 equals_msg `basename $0`: test complete, cleaning up
1282 #SLEEP=$((`date +%s` - $NOW))
1283 #[ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
1284 check_and_cleanup_lustre
1285 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true