6 ALWAYS_EXCEPT="3c 4b 4c 10 $REPLAY_VBR_EXCEPT"
9 PTLDEBUG=${PTLDEBUG:--1}
10 LUSTRE=${LUSTRE:-`dirname $0`/..}
12 CLEANUP=${CLEANUP:-""}
13 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
19 [ -n "$CLIENTS" ] || { skip "Need two or more clients" && exit 0; }
20 [ $CLIENTCOUNT -ge 2 ] || \
21 { skip "Need two or more clients, have $CLIENTCOUNT" && exit 0; }
22 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
24 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
27 [ ! "$NAME" = "ncli" ] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT"
28 [ "$NAME" = "ncli" ] && MOUNT_2=""
32 check_and_setup_lustre
33 rm -rf $DIR/[df][0-9]*
35 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
42 # We need to run do_node in bg, because pdsh does not exit
43 # if child process of run script exists.
44 # I.e. pdsh does not exit when runmultiop_bg_pause exited,
45 # because of multiop_bg_pause -> $MULTIOP_PROG &
46 # By the same reason we need sleep a bit after do_nodes starts
47 # to let runmultiop_bg_pause start muliop and
48 # update /tmp/multiop_bg.pid ;
49 # The rm /tmp/multiop_bg.pid guarantees here that
50 # we have the updated by runmultiop_bg_pause
51 # /tmp/multiop_bg.pid file
53 local pid_file=$TMP/multiop_bg.pid.$$
54 do_node $client "rm -f $pid_file && MULTIOP_PID_FILE=$pid_file LUSTRE= runmultiop_bg_pause $file $cmds" &
58 multiop_pid=$(do_node $client cat $pid_file)
59 [ -n "$multiop_pid" ] || error "$client : Can not get multiop_pid from $pid_file "
60 eval export $(client_var_name $client)_multiop_pid=$multiop_pid
61 eval export $(client_var_name $client)_do_node_pid=$pid
62 local var=$(client_var_name $client)_multiop_pid
63 echo client $client multiop_bg started multiop_pid=${!var}
69 local multiop_pid=$(client_var_name $client)_multiop_pid
70 local do_node_pid=$(client_var_name $client)_do_node_pid
72 echo "Stopping multiop_pid=${!multiop_pid} (kill ${!multiop_pid} on $client)"
73 do_node $client kill -USR1 ${!multiop_pid}
83 fid=$(do_node $client $LFS path2fid $file)
84 do_facet mds $LCTL --device $mds_svc getobjversion $fid
88 local file=$DIR/$tfile
92 do_node $CLIENT1 mcreate $file
93 pre=$(get_version $CLIENT1 $file)
94 do_node $CLIENT1 openfile -f O_RDWR $file
95 post=$(get_version $CLIENT1 $file)
96 if (($pre != $post)); then
97 error "version changed unexpectedly: pre $pre, post $post"
100 run_test 0a "VBR: open and close do not change versions"
103 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
104 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
107 do_node $CLIENT2 chmod 777 $DIR/$tdir
108 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT $DIR/$tdir/$tfile
109 zconf_umount $CLIENT2 $MOUNT
112 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
113 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
114 error "open succeeded unexpectedly"
116 zconf_mount $CLIENT2 $MOUNT
118 run_test 0b "VBR: open (O_CREAT) checks version of parent"
121 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
122 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
123 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tdir/$tfile
126 do_node $CLIENT2 chmod 777 $DIR/$tdir
127 do_node $CLIENT2 chmod 666 $DIR/$tdir/$tfile
128 rmultiop_start $CLIENT1 $DIR/$tdir/$tfile o_c
129 zconf_umount $CLIENT2 $MOUNT
132 do_node $CLIENT1 df $MOUNT || error "$CLIENT1 evicted"
133 rmultiop_stop $CLIENT1 || error "close failed"
134 zconf_mount $CLIENT2 $MOUNT
136 run_test 0c "VBR: open (non O_CREAT) does not checks versions"
142 pre=$(get_version $CLIENT1 $DIR)
143 do_node $CLIENT1 mkfifo $DIR/$tfile
144 post=$(get_version $CLIENT1 $DIR)
145 if (($pre == $post)); then
146 error "version not changed: pre $pre, post $post"
149 run_test 0d "VBR: create changes version of parent"
152 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
153 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
156 do_node $CLIENT2 chmod 777 $DIR/$tdir
157 do_node $CLIENT1 mkfifo $DIR/$tdir/$tfile
158 zconf_umount $CLIENT2 $MOUNT
161 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
162 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
163 error "create succeeded unexpectedly"
165 zconf_mount $CLIENT2 $MOUNT
167 run_test 0e "VBR: create checks version of parent"
173 do_node $CLIENT1 mcreate $DIR/$tfile
174 pre=$(get_version $CLIENT1 $DIR)
175 do_node $CLIENT1 rm $DIR/$tfile
176 post=$(get_version $CLIENT1 $DIR)
177 if (($pre == $post)); then
178 error "version not changed: pre $pre, post $post"
181 run_test 0f "VBR: unlink changes version of parent"
184 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
185 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
186 do_node $CLIENT1 mcreate $DIR/$tdir/$tfile
189 do_node $CLIENT2 chmod 777 $DIR/$tdir
190 do_node $CLIENT1 rm $DIR/$tdir/$tfile
191 zconf_umount $CLIENT2 $MOUNT
194 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
195 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
196 error "unlink succeeded unexpectedly"
198 zconf_mount $CLIENT2 $MOUNT
200 run_test 0g "VBR: unlink checks version of parent"
203 local file=$DIR/$tfile
207 do_node $CLIENT1 mcreate $file
208 pre=$(get_version $CLIENT1 $file)
209 do_node $CLIENT1 chown $RUNAS_ID $file
210 post=$(get_version $CLIENT1 $file)
211 if (($pre == $post)); then
212 error "version not changed: pre $pre, post $post"
215 run_test 0h "VBR: setattr of UID changes versions"
218 local file=$DIR/$tfile
222 do_node $CLIENT1 mcreate $file
223 pre=$(get_version $CLIENT1 $file)
224 do_node $CLIENT1 chown :$RUNAS_ID $file
225 post=$(get_version $CLIENT1 $file)
226 if (($pre == $post)); then
227 error "version not changed: pre $pre, post $post"
230 run_test 0i "VBR: setattr of GID changes versions"
233 local file=$DIR/$tfile
235 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
236 do_node $CLIENT1 mcreate $file
239 do_node $CLIENT2 chown :$RUNAS_ID $file
240 do_node $CLIENT1 chown $RUNAS_ID $file
241 zconf_umount $CLIENT2 $MOUNT
244 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
245 if ! do_node $CLIENT1 $CHECKSTAT -u \\\#$UID $file; then
246 error "setattr of UID succeeded unexpectedly"
248 zconf_mount $CLIENT2 $MOUNT
250 run_test 0j "VBR: setattr of UID checks versions"
253 local file=$DIR/$tfile
255 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
256 do_node $CLIENT1 mcreate $file
259 do_node $CLIENT2 chown $RUNAS_ID $file
260 do_node $CLIENT1 chown :$RUNAS_ID $file
261 zconf_umount $CLIENT2 $MOUNT
264 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
265 if ! do_node $CLIENT1 $CHECKSTAT -g \\\#$UID $file; then
266 error "setattr of GID succeeded unexpectedly"
268 zconf_mount $CLIENT2 $MOUNT
270 run_test 0k "VBR: setattr of GID checks versions"
273 local file=$DIR/$tfile
277 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
278 pre=$(get_version $CLIENT1 $file)
279 do_node $CLIENT1 chmod 666 $file
280 post=$(get_version $CLIENT1 $file)
281 if (($pre == $post)); then
282 error "version not changed: pre $pre, post $post"
285 run_test 0l "VBR: setattr of permission changes versions"
288 local file=$DIR/$tfile
290 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
291 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
294 do_node $CLIENT2 chown :$RUNAS_ID $file
295 do_node $CLIENT1 chmod 666 $file
296 zconf_umount $CLIENT2 $MOUNT
299 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
300 if ! do_node $CLIENT1 $CHECKSTAT -p 0644 $file; then
301 error "setattr of permission succeeded unexpectedly"
303 zconf_mount $CLIENT2 $MOUNT
305 run_test 0m "VBR: setattr of permission checks versions"
308 local file=$DIR/$tfile
312 do_node $CLIENT1 mcreate $file
313 pre=$(get_version $CLIENT1 $file)
314 do_node $CLIENT1 chattr +i $file
315 post=$(get_version $CLIENT1 $file)
316 do_node $CLIENT1 chattr -i $file
317 if (($pre == $post)); then
318 error "version not changed: pre $pre, post $post"
321 run_test 0n "VBR: setattr of flags changes versions"
329 if ((${#attr} != 1)); then
330 error "checking multiple attributes not implemented yet"
332 do_node $client lsattr $file | cut -d ' ' -f 1 | grep -q $attr
336 local file=$DIR/$tfile
339 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
340 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
343 do_node $CLIENT2 chmod 666 $file
344 do_node $CLIENT1 chattr +i $file
345 zconf_umount $CLIENT2 $MOUNT
348 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
349 checkattr $CLIENT1 i $file
351 do_node $CLIENT1 chattr -i $file
352 if [ $rc -eq 0 ]; then
353 error "setattr of flags succeeded unexpectedly"
355 zconf_mount $CLIENT2 $MOUNT
357 run_test 0o "VBR: setattr of flags checks versions"
360 local file=$DIR/$tfile
365 ad_orig=$(do_facet mds "$LCTL get_param mds.${mds_svc}.atime_diff")
366 do_facet mds "$LCTL set_param mds.${mds_svc}.atime_diff=0"
367 do_node $CLIENT1 mcreate $file
368 pre=$(get_version $CLIENT1 $file)
369 do_node $CLIENT1 touch $file
370 post=$(get_version $CLIENT1 $file)
372 # We don't fail MDS in this test. atime_diff shall be
373 # restored to its original value.
375 do_facet mds "$LCTL set_param $ad_orig"
376 if (($pre != $post)); then
377 error "version changed unexpectedly: pre $pre, post $post"
380 run_test 0p "VBR: setattr of times does not change versions"
383 local file=$DIR/$tfile
387 do_node $CLIENT1 mcreate $file
388 pre=$(get_version $CLIENT1 $file)
389 do_node $CLIENT1 truncate $file 1
390 post=$(get_version $CLIENT1 $file)
391 if (($pre != $post)); then
392 error "version changed unexpectedly: pre $pre, post $post"
395 run_test 0q "VBR: setattr of size does not change versions"
398 local file=$DIR/$tfile
403 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
404 do_facet mds "$LCTL set_param mds.${mds_svc}.atime_diff=0"
405 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
408 do_node $CLIENT2 chmod 666 $file
409 do_node $CLIENT1 truncate $file 1
411 mtime_pre=$(do_node $CLIENT1 stat --format=%Y $file)
412 do_node $CLIENT1 touch $file
413 mtime_post=$(do_node $CLIENT1 stat --format=%Y $file)
414 zconf_umount $CLIENT2 $MOUNT
417 do_node $CLIENT1 df $MOUNT || error "$CLIENT1 evicted"
418 if (($mtime_pre >= $mtime_post)); then
419 error "time not changed: pre $mtime_pre, post $mtime_post"
421 if ! do_node $CLIENT1 $CHECKSTAT -s 1 $file; then
422 error "setattr of size failed"
424 mtime=$(do_node $CLIENT1 stat --format=%Y $file)
425 if (($mtime != $mtime_post)); then
426 error "setattr of times failed: expected $mtime_post, got $mtime"
428 zconf_mount $CLIENT2 $MOUNT
430 run_test 0r "VBR: setattr of times and size does not check versions"
438 do_node $CLIENT1 mcreate $DIR/$tfile
439 do_node $CLIENT1 mkdir -p $DIR/$tdir
440 pre=$(get_version $CLIENT1 $DIR/$tfile)
441 tp_pre=$(get_version $CLIENT1 $DIR/$tdir)
442 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
443 post=$(get_version $CLIENT1 $DIR/$tfile)
444 tp_post=$(get_version $CLIENT1 $DIR/$tdir)
445 if (($pre == $post)); then
446 error "version of source not changed: pre $pre, post $post"
448 if (($tp_pre == $tp_post)); then
449 error "version of target parent not changed: pre $tp_pre, post $tp_post"
452 run_test 0s "VBR: link changes versions of source and target parent"
455 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
456 do_node $CLIENT1 mcreate $DIR/$tfile
457 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
460 do_node $CLIENT2 chmod 777 $DIR/$tdir
461 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
462 zconf_umount $CLIENT2 $MOUNT
465 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
466 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
467 error "link should fail"
469 zconf_mount $CLIENT2 $MOUNT
471 run_test 0t "VBR: link checks version of target parent"
474 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
475 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile
476 do_node $CLIENT1 mkdir -p $DIR/$tdir
479 do_node $CLIENT2 chmod 666 $DIR/$tfile
480 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
481 zconf_umount $CLIENT2 $MOUNT
484 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
485 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
486 error "link should fail"
488 zconf_mount $CLIENT2 $MOUNT
490 run_test 0u "VBR: link checks version of source"
498 do_node $CLIENT1 mcreate $DIR/$tfile
499 do_node $CLIENT1 mkdir -p $DIR/$tdir
500 sp_pre=$(get_version $CLIENT1 $DIR)
501 tp_pre=$(get_version $CLIENT1 $DIR/$tdir)
502 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
503 sp_post=$(get_version $CLIENT1 $DIR)
504 tp_post=$(get_version $CLIENT1 $DIR/$tdir)
505 if (($sp_pre == $sp_post)); then
506 error "version of source parent not changed: pre $sp_pre, post $sp_post"
508 if (($tp_pre == $tp_post)); then
509 error "version of target parent not changed: pre $tp_pre, post $tp_post"
512 run_test 0v "VBR: rename changes versions of source parent and target parent"
518 do_node $CLIENT1 mcreate $DIR/$tfile
519 pre=$(get_version $CLIENT1 $DIR)
520 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tfile-new
521 post=$(get_version $CLIENT1 $DIR)
522 if (($pre == $post)); then
523 error "version of parent not changed: pre $pre, post $post"
526 run_test 0w "VBR: rename within same dir changes version of parent"
529 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
530 do_node $CLIENT1 mcreate $DIR/$tfile
531 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
534 do_node $CLIENT2 chmod 777 $DIR
535 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
536 zconf_umount $CLIENT2 $MOUNT
539 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
540 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
541 error "rename should fail"
543 zconf_mount $CLIENT2 $MOUNT
545 run_test 0x "VBR: rename checks version of source parent"
548 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
549 do_node $CLIENT1 mcreate $DIR/$tfile
550 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
553 do_node $CLIENT2 chmod 777 $DIR/$tdir
554 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
555 zconf_umount $CLIENT2 $MOUNT
558 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
559 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
560 error "rename should fail"
562 zconf_mount $CLIENT2 $MOUNT
564 run_test 0y "VBR: rename checks version of target parent"
566 [ "$CLIENTS" ] && zconf_umount_clients $CLIENTS $DIR
569 echo "mount client $CLIENT1,$CLIENT2..."
570 zconf_mount_clients $CLIENT1 $DIR
571 zconf_mount_clients $CLIENT2 $DIR
573 do_node $CLIENT2 mkdir -p $DIR/$tdir
575 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
576 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 1
577 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
578 zconf_umount $CLIENT2 $DIR
581 # recovery shouldn't fail due to missing client 2
582 do_node $CLIENT1 df $DIR || return 1
584 # All 50 files should have been replayed
585 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
586 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
588 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
589 [ -e $DIR/$tdir/$tfile-2-0 ] && error "$tfile-2-0 exists"
591 zconf_umount_clients $CLIENTS $DIR
594 run_test 1 "VBR: client during replay doesn't affect another one"
596 test_2a() { # was test_2
597 #ls -al $DIR/$tdir/$tfile
599 zconf_mount_clients $CLIENT1 $DIR
600 zconf_mount_clients $CLIENT2 $DIR
602 do_node $CLIENT2 mkdir -p $DIR/$tdir
604 do_node $CLIENT2 mcreate $DIR/$tdir/$tfile
605 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
606 #do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 1
607 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile
608 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
609 zconf_umount $CLIENT2 $DIR
612 # recovery shouldn't fail due to missing client 2
613 do_node $CLIENT1 df $DIR || return 1
615 # All 50 files should have been replayed
616 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
617 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
619 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile && return 4
621 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
623 zconf_umount_clients $CLIENTS $DIR
626 run_test 2a "VBR: lost data due to missed REMOTE client during replay"
629 # This test uses three Lustre clients on two hosts.
631 # Lustre Client 1: $CLIENT1:$MOUNT ($DIR)
632 # Lustre Client 2: $CLIENT2:$MOUNT2 ($DIR2)
633 # Lustre Client 3: $CLIENT2:$MOUNT1 ($DIR1)
639 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
640 zconf_mount $CLIENT1 $MOUNT
641 zconf_mount $CLIENT2 $MOUNT2
642 zconf_mount $CLIENT2 $MOUNT1
643 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile-a
644 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile-b
647 # Save an MDT transaction number before recovery.
649 pre=$(get_version $CLIENT1 $DIR/$tfile-a)
652 # Comments on the replay sequence state the expected result
656 # "U" Unable to replay.
660 do_node $CLIENT1 chmod 666 $DIR/$tfile-a # R
661 do_node $CLIENT2 chmod 666 $DIR1/$tfile-b # R
662 do_node $CLIENT2 chown :$RUNAS_ID $DIR2/$tfile-a # U
663 do_node $CLIENT1 chown $RUNAS_ID $DIR/$tfile-a # J
664 do_node $CLIENT2 truncate $DIR2/$tfile-b 1 # U
665 do_node $CLIENT2 chown :$RUNAS_ID $DIR1/$tfile-b # R
666 do_node $CLIENT1 chown $RUNAS_ID $DIR/$tfile-b # R
667 zconf_umount $CLIENT2 $MOUNT2
670 do_node $CLIENT1 df $MOUNT && error "$CLIENT1:$MOUNT not evicted"
671 do_node $CLIENT2 df $MOUNT1 || error "$CLIENT2:$MOUNT1 evicted"
674 # Check the MDT epoch. $post must be the first transaction
675 # number assigned after recovery.
677 do_node $CLIENT2 touch $DIR1/$tfile
678 post=$(get_version $CLIENT2 $DIR1/$tfile)
679 if (($(($pre >> 32)) == $((post >> 32)))); then
680 error "epoch not changed: pre $pre, post $post"
682 if (($(($post & 0x00000000ffffffff)) != 1)); then
683 error "transno should restart from one: got $post"
686 do_node $CLIENT2 stat $DIR1/$tfile-a
687 do_node $CLIENT2 stat $DIR1/$tfile-b
689 do_node $CLIENT2 $CHECKSTAT -p 0666 -u \\\#$UID -g \\\#$UID \
690 $DIR1/$tfile-a || error "$DIR/$tfile-a: unexpected state"
691 do_node $CLIENT2 $CHECKSTAT -p 0666 -u \\\#$RUNAS_ID -g \\\#$RUNAS_ID \
692 $DIR1/$tfile-b || error "$DIR/$tfile-b: unexpected state"
694 zconf_umount $CLIENT2 $MOUNT1
695 zconf_umount $CLIENT1 $MOUNT
697 run_test 2b "VBR: 3 clients: some, none, and all reqs replayed"
700 zconf_mount_clients $CLIENT1 $DIR
701 zconf_mount_clients $CLIENT2 $DIR
703 #make sure the time will change
704 do_facet mds "$LCTL set_param mds.${mds_svc}.atime_diff=0" || return
705 do_node $CLIENT1 touch $DIR/$tfile
706 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
710 do_node $CLIENT2 touch $DIR/$tfile
711 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
713 do_node $CLIENT1 touch $DIR/$tfile
715 do_node $CLIENT2 rm $DIR/$tfile
716 zconf_umount $CLIENT2 $DIR
719 # recovery shouldn't fail due to missing client 2
720 do_node $CLIENT1 df $DIR || return 1
721 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile && return 2
723 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
725 zconf_umount_clients $CLIENTS $DIR
729 run_test 3a "VBR: setattr of time/size doesn't change version"
732 zconf_mount_clients $CLIENT1 $DIR
733 zconf_mount_clients $CLIENT2 $DIR
735 #make sure the time will change
736 do_facet mds "$LCTL set_param mds.${mds_svc}.atime_diff=0" || return
737 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0" || return
738 do_node $CLIENT1 touch $DIR/$tfile
739 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
743 do_node $CLIENT2 chmod +x $DIR/$tfile
744 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
746 do_node $CLIENT1 chmod -x $DIR/$tfile
747 zconf_umount $CLIENT2 $DIR
750 # recovery should fail due to missing client 2
751 do_node $CLIENT1 df $DIR && return 1
753 do_node $CLIENT1 $CHECKSTAT -p 0755 $DIR/$tfile && return 2
754 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
756 zconf_umount_clients $CLIENTS $DIR
760 run_test 3b "VBR: setattr of permissions changes version"
763 [ "$FAILURE_MODE" = HARD ] || \
764 { skip "The HARD failure is needed" && return 0; }
766 [ $RUNAS_ID -eq $UID ] && skip "RUNAS_ID = UID = $UID -- skipping" && return
768 zconf_mount_clients $CLIENT1 $DIR
769 zconf_mount_clients $CLIENT2 $DIR
771 # check that permission changes are synced
772 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=1"
774 do_node $CLIENT1 mkdir -p $DIR/d3c/sub || error
775 #chown -R $RUNAS_ID $MOUNT1/d3
776 do_node $CLIENT1 ls -la $DIR/d3c
778 # only HARD failure will work as we use sync operation
780 do_node $CLIENT2 mcreate $DIR/d3c/$tfile-2
782 do_node $CLIENT1 chmod 0700 $UID $DIR/d3c
784 do_node $CLIENT1 mcreate $DIR/d3c/sub/$tfile
785 do_node $CLIENT1 echo "Top Secret" > $DIR/d3c/sub/$tfile
786 #check user can't access new file
787 do_node $CLIENT2 $RUNAS ls $DIR/d3c && return 3
788 do_node $CLIENT1 $RUNAS ls $DIR/d3c && return 4
789 do_node $CLIENT1 $RUNAS cat $DIR/d3c/sub/$tfile && return 5
791 zconf_umount $CLIENT2 $DIR
794 # recovery shouldn't fail due to missing client 2
795 do_node $CLIENT1 df $DIR || return 1
798 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
799 do_node $CLIENT1 $RUNAS cat $DIR/d3c/sub/$tfile && return 6
800 do_node $CLIENT2 $RUNAS cat $DIR/d3c/sub/$tfile && return 7
801 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
805 run_test 3c "VBR: permission dependency failure"
807 vbr_deactivate_client() {
809 echo "Deactivating client $client";
810 do_node $client "sysctl -w lustre.fail_loc=0x50d"
813 vbr_activate_client() {
815 echo "Activating client $client";
816 do_node $client "sysctl -w lustre.fail_loc=0x0"
822 [ -z "$(do_node $client lctl dl | grep mdt)" ] && \
823 [ -z "$(do_node $client lctl dl | grep ost)" ]
827 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
829 remote_server $CLIENT2 || \
830 { skip "Client $CLIENT2 is on the server node" && return 0; }
832 zconf_mount_clients $CLIENT1 $DIR
833 zconf_mount_clients $CLIENT2 $DIR
835 do_node $CLIENT2 mkdir -p $DIR/$tdir
837 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
838 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
839 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
840 vbr_deactivate_client $CLIENT2
843 do_node $CLIENT1 df $DIR || return 1
845 # All 50 files should have been replayed
846 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
847 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
849 vbr_activate_client $CLIENT2
850 do_node $CLIENT2 df $DIR || return 4
851 # All 25 files from client2 should have been replayed
852 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
854 zconf_umount_clients $CLIENTS $DIR
857 run_test 4a "fail MDS, delayed recovery"
860 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
862 remote_server $CLIENT2 || \
863 { skip "Client $CLIENT2 is on the server node" && return 0; }
865 zconf_mount_clients $CLIENT1 $DIR
866 zconf_mount_clients $CLIENT2 $DIR
869 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
870 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
871 vbr_deactivate_client $CLIENT2
874 do_node $CLIENT1 df $DIR || return 1
876 # create another set of files
877 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
879 vbr_activate_client $CLIENT2
880 do_node $CLIENT2 df $DIR || return 2
882 # All files from should have been replayed
883 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
884 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
885 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
887 zconf_umount_clients $CLIENTS $DIR
889 run_test 4b "fail MDS, normal operation, delayed open recovery"
892 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
894 remote_server $CLIENT2 || \
895 { skip "Client $CLIENT2 is on the server node" && return 0; }
897 zconf_mount_clients $CLIENT1 $DIR
898 zconf_mount_clients $CLIENT2 $DIR
901 do_node $CLIENT1 createmany -m $DIR/$tfile- 25
902 do_node $CLIENT2 createmany -m $DIR/$tdir/$tfile-2- 25
903 vbr_deactivate_client $CLIENT2
906 do_node $CLIENT1 df $DIR || return 1
908 # create another set of files
909 do_node $CLIENT1 createmany -m $DIR/$tfile-3- 25
911 vbr_activate_client $CLIENT2
912 do_node $CLIENT2 df $DIR || return 2
914 # All files from should have been replayed
915 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
916 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
917 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
919 zconf_umount_clients $CLIENTS $DIR
921 run_test 4c "fail MDS, normal operation, delayed recovery"
924 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
926 remote_server $CLIENT2 || \
927 { skip "Client $CLIENT2 is on the server node" && return 0; }
929 zconf_mount_clients $CLIENT1 $DIR
930 zconf_mount_clients $CLIENT2 $DIR
933 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
934 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
935 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 1
936 vbr_deactivate_client $CLIENT2
939 do_node $CLIENT1 df $DIR && return 1
941 vbr_activate_client $CLIENT2
942 do_node $CLIENT2 df $DIR || return 2
944 # First 25 files should have been replayed
945 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
946 # Third file is failed due to missed client2
947 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-3-0 && error "$tfile-3-0 exists"
948 # file from client2 should exists
949 do_node $CLIENT2 unlinkmany $DIR/$tfile-2- 1 || return 4
951 zconf_umount_clients $CLIENTS $DIR
953 run_test 5a "fail MDS, delayed recovery should fail"
956 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
958 remote_server $CLIENT2 || \
959 { skip "Client $CLIENT2 is on the server node" && return 0; }
961 zconf_mount_clients $CLIENT1 $DIR
962 zconf_mount_clients $CLIENT2 $DIR
965 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
966 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
967 vbr_deactivate_client $CLIENT2
970 do_node $CLIENT1 df $DIR || return 1
971 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
973 # create another set of files
974 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
976 vbr_activate_client $CLIENT2
977 do_node $CLIENT2 df $DIR && return 4
978 # file from client2 should fail
979 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
981 # All 50 files from client 1 should have been replayed
982 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
983 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
985 zconf_umount_clients $CLIENTS $DIR
987 run_test 5b "fail MDS, normal operation, delayed recovery should fail"
990 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
992 remote_server $CLIENT2 || \
993 { skip "Client $CLIENT2 is on the server node" && return 0; }
995 zconf_mount_clients $CLIENT1 $DIR
996 zconf_mount_clients $CLIENT2 $DIR
998 do_node $CLIENT2 mkdir -p $DIR/$tdir
1000 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
1001 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
1002 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
1003 vbr_deactivate_client $CLIENT2
1006 # replay only 5 requests
1007 do_node $CLIENT2 "sysctl -w lustre.fail_val=5"
1008 #define OBD_FAIL_PTLRPC_REPLAY 0x50e
1009 do_node $CLIENT2 "sysctl -w lustre.fail_loc=0x2000050e"
1010 do_node $CLIENT2 df $DIR
1011 # vbr_activate_client $CLIENT2
1012 # need way to know that client stops replays
1016 do_node $CLIENT1 df $DIR || return 1
1018 # All files should have been replayed
1019 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
1020 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
1021 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
1023 zconf_umount_clients $CLIENTS $DIR
1026 run_test 6a "fail MDS, delayed recovery, fail MDS"
1029 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1031 remote_server $CLIENT2 || \
1032 { skip "Client $CLIENT2 is on the server node" && return 0; }
1034 zconf_mount_clients $CLIENT1 $DIR
1035 zconf_mount_clients $CLIENT2 $DIR
1037 do_node $CLIENT2 mkdir -p $DIR/$tdir
1039 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
1040 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
1041 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
1042 vbr_deactivate_client $CLIENT2
1045 vbr_activate_client $CLIENT2
1046 do_node $CLIENT2 df $DIR || return 4
1049 do_node $CLIENT1 df $DIR || return 1
1051 # All files should have been replayed
1052 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
1053 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
1054 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
1056 zconf_umount_clients $CLIENTS $DIR
1059 run_test 7a "fail MDS, delayed recovery, fail MDS"
1062 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1064 remote_server $CLIENT2 || \
1065 { skip "Client $CLIENT2 is on the server node" && return 0; }
1067 zconf_mount_clients $CLIENT1 $DIR
1068 zconf_mount_clients $CLIENT2 $DIR
1070 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 1
1071 do_node $CLIENT2 rm -f $DIR/$tfile
1073 rmultiop_stop $CLIENT2 || return 2
1075 vbr_deactivate_client $CLIENT2
1077 do_node $CLIENT1 df $DIR || return 3
1078 #client1 is back and will try to open orphan
1079 vbr_activate_client $CLIENT2
1080 do_node $CLIENT2 df $DIR || return 4
1082 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1083 zconf_umount_clients $CLIENTS $DIR
1086 run_test 8a "orphans are kept until delayed recovery"
1089 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1091 remote_server $CLIENT2 || \
1092 { skip "Client $CLIENT2 is on the server node" && return 0; }
1094 zconf_mount_clients $CLIENT1 $DIR
1095 zconf_mount_clients $CLIENT2 $DIR
1097 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 1
1099 do_node $CLIENT1 rm -f $DIR/$tfile
1101 vbr_deactivate_client $CLIENT2
1103 do_node $CLIENT1 df $DIR || return 2
1104 #client1 is back and will try to open orphan
1105 vbr_activate_client $CLIENT2
1106 do_node $CLIENT2 df $DIR || return 3
1108 rmultiop_stop $CLIENT2 || return 1
1109 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1110 zconf_umount_clients $CLIENTS $DIR
1113 run_test 8b "open1 | unlink2 X delayed_replay1, close1"
1116 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1118 remote_server $CLIENT2 || \
1119 { skip "Client $CLIENT2 is on the server node" && return 0; }
1121 zconf_mount_clients $CLIENT1 $DIR
1122 zconf_mount_clients $CLIENT2 $DIR
1124 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 1
1126 do_node $CLIENT1 rm -f $DIR/$tfile
1127 rmultiop_stop $CLIENT2 || return 2
1129 vbr_deactivate_client $CLIENT2
1131 do_node $CLIENT1 df $DIR || return 3
1132 #client1 is back and will try to open orphan
1133 vbr_activate_client $CLIENT2
1134 do_node $CLIENT2 df $DIR || return 4
1136 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1137 zconf_umount_clients $CLIENTS $DIR
1140 run_test 8c "open1 | unlink2, close1 X delayed_replay1"
1143 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1145 remote_server $CLIENT2 || \
1146 { skip "Client $CLIENT2 is on the server node" && return 0; }
1148 zconf_mount_clients $CLIENT1 $DIR
1149 zconf_mount_clients $CLIENT2 $DIR
1151 rmultiop_start $CLIENT1 $DIR/$tfile O_tSc || return 1
1152 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 2
1154 do_node $CLIENT1 rm -f $DIR/$tfile
1155 rmultiop_stop $CLIENT2 || return 3
1156 rmultiop_stop $CLIENT1 || return 4
1158 vbr_deactivate_client $CLIENT2
1160 do_node $CLIENT1 df $DIR || return 6
1162 #client1 is back and will try to open orphan
1163 vbr_activate_client $CLIENT2
1164 do_node $CLIENT2 df $DIR || return 8
1166 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1167 zconf_umount_clients $CLIENTS $DIR
1170 run_test 8d "open1, open2 | unlink2, close1, close2 X delayed_replay1"
1173 zconf_mount $CLIENT1 $DIR
1174 zconf_mount $CLIENT2 $DIR
1176 do_node $CLIENT1 mcreate $DIR/$tfile
1177 do_node $CLIENT1 mkdir $DIR/$tfile-2
1179 # missed replay from client1 will lead to recovery by versions
1180 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1181 do_node $CLIENT2 rm $DIR/$tfile || return 1
1182 do_node $CLIENT2 touch $DIR/$tfile || return 2
1184 zconf_umount $CLIENT1 $DIR
1186 do_node $CLIENT2 df $DIR || return 6
1188 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
1189 zconf_umount_clients $CLIENTS $DIR
1192 run_test 8e "create | unlink, create shouldn't fail"
1195 zconf_mount_clients $CLIENT1 $DIR
1196 zconf_mount_clients $CLIENT2 $DIR
1198 do_node $CLIENT1 touch $DIR/$tfile
1199 do_node $CLIENT1 mkdir $DIR/$tfile-2
1201 # missed replay from client1 will lead to recovery by versions
1202 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1203 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
1204 do_node $CLIENT2 mcreate $DIR/$tfile || return 2
1206 zconf_umount $CLIENT1 $DIR
1208 do_node $CLIENT2 df $DIR || return 6
1210 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
1211 zconf_umount $CLIENT2 $DIR
1214 run_test 8f "create | unlink, create shouldn't fail"
1217 zconf_mount_clients $CLIENT1 $DIR
1218 zconf_mount_clients $CLIENT2 $DIR
1220 do_node $CLIENT1 touch $DIR/$tfile
1221 do_node $CLIENT1 mkdir $DIR/$tfile-2
1223 # missed replay from client1 will lead to recovery by versions
1224 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1225 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
1226 do_node $CLIENT2 mkdir $DIR/$tfile || return 2
1228 zconf_umount $CLIENT1 $DIR
1230 do_node $CLIENT2 df $DIR || return 6
1232 do_node $CLIENT2 rmdir $DIR/$tfile || error "$tfile doesn't exists"
1233 zconf_umount $CLIENT2 $DIR
1236 run_test 8g "create | unlink, create shouldn't fail"
1239 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1241 [ -z "$DBENCH_LIB" ] && skip "DBENCH_LIB is not set" && return 0
1243 zconf_mount_clients $CLIENTS $DIR
1245 local duration="-t 60"
1246 local cmd="rundbench 1 $duration "
1248 for CLIENT in ${CLIENTS//,/ }; do
1249 $PDSH $CLIENT "set -x; PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests/:${DBENCH_LIB} DBENCH_LIB=${DBENCH_LIB} $cmd" &
1251 echo $PID >pid.$CLIENT
1252 echo "Started load PID=`cat pid.$CLIENT`"
1256 sleep 3 # give clients a time to do operations
1258 vbr_deactivate_client $CLIENT2
1260 log "$TESTNAME fail mds 1"
1263 # wait for client to reconnect to MDS
1266 vbr_activate_client $CLIENT2
1267 do_node $CLIENT2 df $DIR || return 4
1269 for CLIENT in ${CLIENTS//,/ }; do
1270 PID=`cat pid.$CLIENT`
1273 echo "load on ${CLIENT} returned $rc"
1276 zconf_umount_clients $CLIENTS $DIR
1278 run_test 10 "mds version recovery; $CLIENTCOUNT clients"
1280 [ "$CLIENTS" ] && zconf_mount_clients $CLIENTS $DIR
1282 equals_msg `basename $0`: test complete, cleaning up
1283 #SLEEP=$((`date +%s` - $NOW))
1284 #[ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
1285 check_and_cleanup_lustre
1286 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true