6 ALWAYS_EXCEPT="2 3c 4b 4c 10 $REPLAY_VBR_EXCEPT"
9 PTLDEBUG=${PTLDEBUG:--1}
10 LUSTRE=${LUSTRE:-`dirname $0`/..}
12 CLEANUP=${CLEANUP:-""}
13 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
19 [ -n "$CLIENTS" ] || { skip_env "Need two or more clients" && exit 0; }
20 [ $CLIENTCOUNT -ge 2 ] || \
21 { skip_env "Need two or more remote clients, have $CLIENTCOUNT" && exit 0; }
22 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
24 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
27 [ ! "$NAME" = "ncli" ] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT"
28 [ "$NAME" = "ncli" ] && MOUNT_2=""
32 check_and_setup_lustre
33 rm -rf $DIR/[df][0-9]*
35 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
42 # We need to run do_node in bg, because pdsh does not exit
43 # if child process of run script exists.
44 # I.e. pdsh does not exit when runmultiop_bg_pause exited,
45 # because of multiop_bg_pause -> $MULTIOP_PROG &
46 # By the same reason we need sleep a bit after do_nodes starts
47 # to let runmultiop_bg_pause start muliop and
48 # update /tmp/multiop_bg.pid ;
49 # The rm /tmp/multiop_bg.pid guarantees here that
50 # we have the updated by runmultiop_bg_pause
51 # /tmp/multiop_bg.pid file
53 local pid_file=$TMP/multiop_bg.pid.$$
54 do_node $client "rm -f $pid_file && MULTIOP_PID_FILE=$pid_file LUSTRE= runmultiop_bg_pause $file $cmds" &
58 multiop_pid=$(do_node $client cat $pid_file)
59 [ -n "$multiop_pid" ] || error "$client : Can not get multiop_pid from $pid_file "
60 eval export $(node_var_name $client)_multiop_pid=$multiop_pid
61 eval export $(node_var_name $client)_do_node_pid=$pid
62 local var=$(node_var_name $client)_multiop_pid
63 echo client $client multiop_bg started multiop_pid=${!var}
69 local multiop_pid=$(node_var_name $client)_multiop_pid
70 local do_node_pid=$(node_var_name $client)_do_node_pid
72 echo "Stopping multiop_pid=${!multiop_pid} (kill ${!multiop_pid} on $client)"
73 do_node $client kill -USR1 ${!multiop_pid}
83 fid=$(do_node $client $LFS path2fid $file)
84 do_facet mds $LCTL --device $mds_svc getobjversion $fid
88 lustre_version=$(get_lustre_version mds)
89 if [[ $lustre_version != 1.8* ]]; then
94 local file=$DIR/$tfile
98 do_node $CLIENT1 mcreate $file
99 pre=$(get_version $CLIENT1 $file)
100 do_node $CLIENT1 openfile -f O_RDWR $file
101 post=$(get_version $CLIENT1 $file)
102 if (($pre != $post)); then
103 error "version changed unexpectedly: pre $pre, post $post"
106 run_test 0a "VBR: open and close do not change versions"
109 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
110 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
113 do_node $CLIENT2 chmod 777 $DIR/$tdir
114 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT $DIR/$tdir/$tfile
115 zconf_umount $CLIENT2 $MOUNT
118 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
119 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
120 error "open succeeded unexpectedly"
122 zconf_mount $CLIENT2 $MOUNT
124 run_test 0b "VBR: open (O_CREAT) checks version of parent"
127 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
128 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
129 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tdir/$tfile
132 do_node $CLIENT2 chmod 777 $DIR/$tdir
133 do_node $CLIENT2 chmod 666 $DIR/$tdir/$tfile
134 rmultiop_start $CLIENT1 $DIR/$tdir/$tfile o_c
135 zconf_umount $CLIENT2 $MOUNT
137 client_up $CLIENT1 || error "$CLIENT1 evicted"
139 rmultiop_stop $CLIENT1 || error "close failed"
140 zconf_mount $CLIENT2 $MOUNT
142 run_test 0c "VBR: open (non O_CREAT) does not checks versions"
148 pre=$(get_version $CLIENT1 $DIR)
149 do_node $CLIENT1 mkfifo $DIR/$tfile
150 post=$(get_version $CLIENT1 $DIR)
151 if (($pre == $post)); then
152 [ -n "$mds20" ] || error "version not changed: pre $pre, post $post"
155 run_test 0d "VBR: create changes version of parent"
158 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
159 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
162 do_node $CLIENT2 chmod 777 $DIR/$tdir
163 do_node $CLIENT1 mkfifo $DIR/$tdir/$tfile
164 zconf_umount $CLIENT2 $MOUNT
167 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
168 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
169 error "create succeeded unexpectedly"
171 zconf_mount $CLIENT2 $MOUNT
173 run_test 0e "VBR: create checks version of parent"
179 do_node $CLIENT1 mcreate $DIR/$tfile
180 pre=$(get_version $CLIENT1 $DIR)
181 do_node $CLIENT1 rm $DIR/$tfile
182 post=$(get_version $CLIENT1 $DIR)
183 if (($pre == $post)); then
184 [ -n "$mds20" ] || error "version not changed: pre $pre, post $post"
187 run_test 0f "VBR: unlink changes version of parent"
190 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
191 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
192 do_node $CLIENT1 mcreate $DIR/$tdir/$tfile
195 do_node $CLIENT2 chmod 777 $DIR/$tdir
196 do_node $CLIENT1 rm $DIR/$tdir/$tfile
197 zconf_umount $CLIENT2 $MOUNT
200 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
201 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
202 error "unlink succeeded unexpectedly"
204 zconf_mount $CLIENT2 $MOUNT
206 run_test 0g "VBR: unlink checks version of parent"
209 local file=$DIR/$tfile
213 do_node $CLIENT1 mcreate $file
214 pre=$(get_version $CLIENT1 $file)
215 do_node $CLIENT1 chown $RUNAS_ID:$RUNAS_GID $file
216 post=$(get_version $CLIENT1 $file)
217 if (($pre == $post)); then
218 error "version not changed: pre $pre, post $post"
221 run_test 0h "VBR: setattr of UID changes versions"
224 local file=$DIR/$tfile
228 do_node $CLIENT1 mcreate $file
229 pre=$(get_version $CLIENT1 $file)
230 do_node $CLIENT1 chgrp $RUNAS_GID $file
231 post=$(get_version $CLIENT1 $file)
232 if (($pre == $post)); then
233 error "version not changed: pre $pre, post $post"
236 run_test 0i "VBR: setattr of GID changes versions"
239 local file=$DIR/$tfile
241 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
242 do_node $CLIENT1 mcreate $file
245 do_node $CLIENT2 chgrp $RUNAS_GID $file
246 do_node $CLIENT1 chown $RUNAS_ID:$RUNAS_GID $file
247 zconf_umount $CLIENT2 $MOUNT
250 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
251 if ! do_node $CLIENT1 $CHECKSTAT -u \\\#$UID $file; then
252 error "setattr of UID succeeded unexpectedly"
254 zconf_mount $CLIENT2 $MOUNT
256 run_test 0j "VBR: setattr of UID checks versions"
259 local file=$DIR/$tfile
261 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
262 do_node $CLIENT1 mcreate $file
265 do_node $CLIENT2 chown $RUNAS_ID:$RUNAS_GID $file
266 do_node $CLIENT1 chgrp $RUNAS_GID $file
267 zconf_umount $CLIENT2 $MOUNT
270 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
271 if ! do_node $CLIENT1 $CHECKSTAT -g \\\#$UID $file; then
272 error "setattr of GID succeeded unexpectedly"
274 zconf_mount $CLIENT2 $MOUNT
276 run_test 0k "VBR: setattr of GID checks versions"
279 local file=$DIR/$tfile
283 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
284 pre=$(get_version $CLIENT1 $file)
285 do_node $CLIENT1 chmod 666 $file
286 post=$(get_version $CLIENT1 $file)
287 if (($pre == $post)); then
288 error "version not changed: pre $pre, post $post"
291 run_test 0l "VBR: setattr of permission changes versions"
294 local file=$DIR/$tfile
296 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
297 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
300 do_node $CLIENT2 chgrp $RUNAS_GID $file
301 do_node $CLIENT1 chmod 666 $file
302 zconf_umount $CLIENT2 $MOUNT
305 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
306 if ! do_node $CLIENT1 $CHECKSTAT -p 0644 $file; then
307 error "setattr of permission succeeded unexpectedly"
309 zconf_mount $CLIENT2 $MOUNT
311 run_test 0m "VBR: setattr of permission checks versions"
314 local file=$DIR/$tfile
318 do_node $CLIENT1 mcreate $file
319 pre=$(get_version $CLIENT1 $file)
320 do_node $CLIENT1 chattr +i $file
321 post=$(get_version $CLIENT1 $file)
322 do_node $CLIENT1 chattr -i $file
323 if (($pre == $post)); then
324 error "version not changed: pre $pre, post $post"
327 run_test 0n "VBR: setattr of flags changes versions"
335 if ((${#attr} != 1)); then
336 error "checking multiple attributes not implemented yet"
338 do_node $client lsattr $file | cut -d ' ' -f 1 | grep -q $attr
342 local file=$DIR/$tfile
345 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
346 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
349 do_node $CLIENT2 chmod 666 $file
350 do_node $CLIENT1 chattr +i $file
351 zconf_umount $CLIENT2 $MOUNT
354 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
355 checkattr $CLIENT1 i $file
357 do_node $CLIENT1 chattr -i $file
358 if [ $rc -eq 0 ]; then
359 error "setattr of flags succeeded unexpectedly"
361 zconf_mount $CLIENT2 $MOUNT
363 run_test 0o "VBR: setattr of flags checks versions"
366 local file=$DIR/$tfile
371 ad_orig=$(do_facet mds "$LCTL get_param *.${mds_svc}.atime_diff")
372 do_facet mds "$LCTL set_param *.${mds_svc}.atime_diff=0"
373 do_node $CLIENT1 mcreate $file
374 pre=$(get_version $CLIENT1 $file)
375 do_node $CLIENT1 touch $file
376 post=$(get_version $CLIENT1 $file)
378 # We don't fail MDS in this test. atime_diff shall be
379 # restored to its original value.
381 do_facet mds "$LCTL set_param $ad_orig"
382 if (($pre != $post)); then
383 error "version changed unexpectedly: pre $pre, post $post"
386 run_test 0p "VBR: setattr of times does not change versions"
389 local file=$DIR/$tfile
393 do_node $CLIENT1 mcreate $file
394 pre=$(get_version $CLIENT1 $file)
395 do_node $CLIENT1 $TRUNCATE $file 1
396 post=$(get_version $CLIENT1 $file)
397 if (($pre != $post)); then
398 error "version changed unexpectedly: pre $pre, post $post"
401 run_test 0q "VBR: setattr of size does not change versions"
404 local file=$DIR/$tfile
409 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
410 do_facet mds "$LCTL set_param *.${mds_svc}.atime_diff=0"
411 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
414 do_node $CLIENT2 chmod 666 $file
415 do_node $CLIENT1 $TRUNCATE $file 1
417 mtime_pre=$(do_node $CLIENT1 stat --format=%Y $file)
418 do_node $CLIENT1 touch $file
419 mtime_post=$(do_node $CLIENT1 stat --format=%Y $file)
420 zconf_umount $CLIENT2 $MOUNT
423 client_up $CLIENT1 || error "$CLIENT1 evicted"
424 if (($mtime_pre >= $mtime_post)); then
425 error "time not changed: pre $mtime_pre, post $mtime_post"
427 if ! do_node $CLIENT1 $CHECKSTAT -s 1 $file; then
428 error "setattr of size failed"
430 mtime=$(do_node $CLIENT1 stat --format=%Y $file)
431 if (($mtime != $mtime_post)); then
432 error "setattr of times failed: expected $mtime_post, got $mtime"
434 zconf_mount $CLIENT2 $MOUNT
436 run_test 0r "VBR: setattr of times and size does not check versions"
444 do_node $CLIENT1 mcreate $DIR/$tfile
445 do_node $CLIENT1 mkdir -p $DIR/$tdir
446 pre=$(get_version $CLIENT1 $DIR/$tfile)
447 tp_pre=$(get_version $CLIENT1 $DIR/$tdir)
448 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
449 post=$(get_version $CLIENT1 $DIR/$tfile)
450 tp_post=$(get_version $CLIENT1 $DIR/$tdir)
451 if (($pre == $post)); then
452 error "version of source not changed: pre $pre, post $post"
454 if (($tp_pre == $tp_post)); then
456 error "version of target parent not changed: pre $tp_pre, post $tp_post"
459 run_test 0s "VBR: link changes versions of source and target parent"
462 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
463 do_node $CLIENT1 mcreate $DIR/$tfile
464 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
467 do_node $CLIENT2 chmod 777 $DIR/$tdir
468 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
469 zconf_umount $CLIENT2 $MOUNT
472 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
473 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
474 error "link should fail"
476 zconf_mount $CLIENT2 $MOUNT
478 run_test 0t "VBR: link checks version of target parent"
481 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
482 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile
483 do_node $CLIENT1 mkdir -p $DIR/$tdir
486 do_node $CLIENT2 chmod 666 $DIR/$tfile
487 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
488 zconf_umount $CLIENT2 $MOUNT
491 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
492 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
493 error "link should fail"
495 zconf_mount $CLIENT2 $MOUNT
497 run_test 0u "VBR: link checks version of source"
505 do_node $CLIENT1 mcreate $DIR/$tfile
506 do_node $CLIENT1 mkdir -p $DIR/$tdir
507 sp_pre=$(get_version $CLIENT1 $DIR)
508 tp_pre=$(get_version $CLIENT1 $DIR/$tdir)
509 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
510 sp_post=$(get_version $CLIENT1 $DIR)
511 tp_post=$(get_version $CLIENT1 $DIR/$tdir)
512 if (($sp_pre == $sp_post)); then
514 error "version of source parent not changed: pre $sp_pre, post $sp_post"
516 if (($tp_pre == $tp_post)); then
518 error "version of target parent not changed: pre $tp_pre, post $tp_post"
521 run_test 0v "VBR: rename changes versions of source parent and target parent"
527 do_node $CLIENT1 mcreate $DIR/$tfile
528 pre=$(get_version $CLIENT1 $DIR)
529 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tfile-new
530 post=$(get_version $CLIENT1 $DIR)
531 if (($pre == $post)); then
533 error "version of parent not changed: pre $pre, post $post"
536 run_test 0w "VBR: rename within same dir changes version of parent"
539 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
540 do_node $CLIENT1 mcreate $DIR/$tfile
541 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
544 do_node $CLIENT2 chmod 777 $DIR
545 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
546 zconf_umount $CLIENT2 $MOUNT
549 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
550 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
551 error "rename should fail"
553 zconf_mount $CLIENT2 $MOUNT
555 run_test 0x "VBR: rename checks version of source parent"
558 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
559 do_node $CLIENT1 mcreate $DIR/$tfile
560 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
563 do_node $CLIENT2 chmod 777 $DIR/$tdir
564 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
565 zconf_umount $CLIENT2 $MOUNT
568 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
569 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
570 error "rename should fail"
572 zconf_mount $CLIENT2 $MOUNT
574 run_test 0y "VBR: rename checks version of target parent"
576 [ "$CLIENTS" ] && zconf_umount_clients $CLIENTS $DIR
579 echo "mount client $CLIENT1,$CLIENT2..."
580 zconf_mount_clients $CLIENT1 $DIR
581 zconf_mount_clients $CLIENT2 $DIR
583 do_node $CLIENT2 mkdir -p $DIR/$tdir
585 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
586 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 1
587 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
588 zconf_umount $CLIENT2 $DIR
591 # recovery shouldn't fail due to missing client 2
592 client_up $CLIENT1 || return 1
594 # All 50 files should have been replayed
595 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
596 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
598 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
599 [ -e $DIR/$tdir/$tfile-2-0 ] && error "$tfile-2-0 exists"
601 zconf_umount_clients $CLIENTS $DIR
604 run_test 1 "VBR: client during replay doesn't affect another one"
606 test_2a() { # was test_2
607 #ls -al $DIR/$tdir/$tfile
609 zconf_mount_clients $CLIENT1 $DIR
610 zconf_mount_clients $CLIENT2 $DIR
612 do_node $CLIENT2 mkdir -p $DIR/$tdir
614 do_node $CLIENT2 mcreate $DIR/$tdir/$tfile
615 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
616 #do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 1
617 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile
618 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
619 zconf_umount $CLIENT2 $DIR
622 # recovery shouldn't fail due to missing client 2
623 client_up $CLIENT1 || return 1
625 # All 50 files should have been replayed
626 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
627 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
629 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile && return 4
631 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
633 zconf_umount_clients $CLIENTS $DIR
636 run_test 2a "VBR: lost data due to missed REMOTE client during replay"
639 # This test uses three Lustre clients on two hosts.
641 # Lustre Client 1: $CLIENT1:$MOUNT ($DIR)
642 # Lustre Client 2: $CLIENT2:$MOUNT2 ($DIR2)
643 # Lustre Client 3: $CLIENT2:$MOUNT1 ($DIR1)
649 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0"
650 zconf_mount $CLIENT1 $MOUNT
651 zconf_mount $CLIENT2 $MOUNT2
652 zconf_mount $CLIENT2 $MOUNT1
653 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile-a
654 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile-b
657 # Save an MDT transaction number before recovery.
659 pre=$(get_version $CLIENT1 $DIR/$tfile-a)
662 # Comments on the replay sequence state the expected result
666 # "U" Unable to replay.
670 do_node $CLIENT1 chmod 666 $DIR/$tfile-a # R
671 do_node $CLIENT2 chmod 666 $DIR1/$tfile-b # R
672 do_node $CLIENT2 chgrp $RUNAS_GID $DIR2/$tfile-a # U
673 do_node $CLIENT1 chown $RUNAS_ID:$RUNAS_GID $DIR/$tfile-a # J
674 do_node $CLIENT2 $TRUNCATE $DIR2/$tfile-b 1 # U
675 do_node $CLIENT2 chgrp $RUNAS_GID $DIR1/$tfile-b # R
676 do_node $CLIENT1 chown $RUNAS_ID:$RUNAS_GID $DIR/$tfile-b # R
677 zconf_umount $CLIENT2 $MOUNT2
680 client_evicted $CLIENT1 || error "$CLIENT1:$MOUNT not evicted"
681 client_up $CLIENT2 || error "$CLIENT2:$MOUNT1 evicted"
684 # Check the MDT epoch. $post must be the first transaction
685 # number assigned after recovery.
687 do_node $CLIENT2 touch $DIR1/$tfile
688 post=$(get_version $CLIENT2 $DIR1/$tfile)
689 if (($(($pre >> 32)) == $((post >> 32)))); then
690 error "epoch not changed: pre $pre, post $post"
692 if (($(($post & 0x00000000ffffffff)) != 1)); then
693 error "transno should restart from one: got $post"
696 do_node $CLIENT2 stat $DIR1/$tfile-a
697 do_node $CLIENT2 stat $DIR1/$tfile-b
699 do_node $CLIENT2 $CHECKSTAT -p 0666 -u \\\#$UID -g \\\#$UID \
700 $DIR1/$tfile-a || error "$DIR/$tfile-a: unexpected state"
701 do_node $CLIENT2 $CHECKSTAT -p 0666 -u \\\#$RUNAS_ID -g \\\#$RUNAS_GID \
702 $DIR1/$tfile-b || error "$DIR/$tfile-b: unexpected state"
704 zconf_umount $CLIENT2 $MOUNT1
705 zconf_umount $CLIENT1 $MOUNT
707 run_test 2b "VBR: 3 clients: some, none, and all reqs replayed"
710 zconf_mount_clients $CLIENT1 $DIR
711 zconf_mount_clients $CLIENT2 $DIR
713 #make sure the time will change
714 do_facet mds "$LCTL set_param *.${mds_svc}.atime_diff=0" || return
715 do_node $CLIENT1 touch $DIR/$tfile
716 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
720 do_node $CLIENT2 touch $DIR/$tfile
721 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
723 do_node $CLIENT1 touch $DIR/$tfile
725 do_node $CLIENT2 rm $DIR/$tfile
726 zconf_umount $CLIENT2 $DIR
729 # recovery shouldn't fail due to missing client 2
730 client_up $CLIENT1 || return 1
731 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile && return 2
733 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
735 zconf_umount_clients $CLIENTS $DIR
739 run_test 3a "VBR: setattr of time/size doesn't change version"
742 zconf_mount_clients $CLIENT1 $DIR
743 zconf_mount_clients $CLIENT2 $DIR
745 #make sure the time will change
746 do_facet mds "$LCTL set_param *.${mds_svc}.atime_diff=0" || return
747 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=0" || return
748 do_node $CLIENT1 touch $DIR/$tfile
749 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
753 do_node $CLIENT2 chmod +x $DIR/$tfile
754 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
756 do_node $CLIENT1 chmod -x $DIR/$tfile
757 zconf_umount $CLIENT2 $DIR
760 # recovery should fail due to missing client 2
761 client_evicted $CLIENT1 || return 1
763 do_node $CLIENT1 $CHECKSTAT -p 0755 $DIR/$tfile && return 2
764 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
766 zconf_umount_clients $CLIENTS $DIR
770 run_test 3b "VBR: setattr of permissions changes version"
773 [ "$FAILURE_MODE" = HARD ] || \
774 { skip "The HARD failure is needed" && return 0; }
776 [ $RUNAS_ID -eq $UID ] && skip_env "RUNAS_ID = UID = $UID -- skipping" && return
778 zconf_mount_clients $CLIENT1 $DIR
779 zconf_mount_clients $CLIENT2 $DIR
781 # check that permission changes are synced
782 do_facet mds "$LCTL set_param *.${mds_svc}.sync_permission=1"
784 do_node $CLIENT1 mkdir -p $DIR/d3c/sub || error
785 #chown -R $RUNAS_ID $MOUNT1/d3
786 do_node $CLIENT1 ls -la $DIR/d3c
788 # only HARD failure will work as we use sync operation
790 do_node $CLIENT2 mcreate $DIR/d3c/$tfile-2
792 do_node $CLIENT1 chmod 0700 $UID $DIR/d3c
794 do_node $CLIENT1 mcreate $DIR/d3c/sub/$tfile
795 do_node $CLIENT1 echo "Top Secret" > $DIR/d3c/sub/$tfile
796 #check user can't access new file
797 do_node $CLIENT2 $RUNAS ls $DIR/d3c && return 3
798 do_node $CLIENT1 $RUNAS ls $DIR/d3c && return 4
799 do_node $CLIENT1 $RUNAS cat $DIR/d3c/sub/$tfile && return 5
801 zconf_umount $CLIENT2 $DIR
804 # recovery shouldn't fail due to missing client 2
805 client_up $CLIENT1 || return 1
807 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
808 do_node $CLIENT1 $RUNAS cat $DIR/d3c/sub/$tfile && return 6
809 do_node $CLIENT2 $RUNAS cat $DIR/d3c/sub/$tfile && return 7
810 do_facet mds "$LCTL set_param mds.${mds_svc}.sync_permission=0"
814 run_test 3c "VBR: permission dependency failure"
816 vbr_deactivate_client() {
818 echo "Deactivating client $client";
819 do_node $client "sysctl -w lustre.fail_loc=0x50d"
822 vbr_activate_client() {
824 echo "Activating client $client";
825 do_node $client "sysctl -w lustre.fail_loc=0x0"
831 [ -z "$(do_node $client lctl dl | grep mdt)" ] && \
832 [ -z "$(do_node $client lctl dl | grep ost)" ]
836 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
838 remote_server $CLIENT2 || \
839 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
841 zconf_mount_clients $CLIENT1 $DIR
842 zconf_mount_clients $CLIENT2 $DIR
844 do_node $CLIENT2 mkdir -p $DIR/$tdir
846 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
847 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
848 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
849 vbr_deactivate_client $CLIENT2
852 client_up $CLIENT1 || return 1
854 # All 50 files should have been replayed
855 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
856 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
858 vbr_activate_client $CLIENT2
859 client_up $CLIENT2 || return 4
860 # All 25 files from client2 should have been replayed
861 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
863 zconf_umount_clients $CLIENTS $DIR
866 run_test 4a "fail MDS, delayed recovery"
869 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
871 remote_server $CLIENT2 || \
872 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
874 zconf_mount_clients $CLIENT1 $DIR
875 zconf_mount_clients $CLIENT2 $DIR
878 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
879 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
880 vbr_deactivate_client $CLIENT2
883 client_up $CLIENT1 || return 1
885 # create another set of files
886 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
888 vbr_activate_client $CLIENT2
889 client_up $CLIENT2 || return 2
891 # All files from should have been replayed
892 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
893 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
894 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
896 zconf_umount_clients $CLIENTS $DIR
898 run_test 4b "fail MDS, normal operation, delayed open recovery"
901 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
903 remote_server $CLIENT2 || \
904 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
906 zconf_mount_clients $CLIENT1 $DIR
907 zconf_mount_clients $CLIENT2 $DIR
910 do_node $CLIENT1 createmany -m $DIR/$tfile- 25
911 do_node $CLIENT2 createmany -m $DIR/$tdir/$tfile-2- 25
912 vbr_deactivate_client $CLIENT2
915 client_up $CLIENT1 || return 1
917 # create another set of files
918 do_node $CLIENT1 createmany -m $DIR/$tfile-3- 25
920 vbr_activate_client $CLIENT2
921 client_up $CLIENT2 || return 2
923 # All files from should have been replayed
924 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
925 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
926 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
928 zconf_umount_clients $CLIENTS $DIR
930 run_test 4c "fail MDS, normal operation, delayed recovery"
933 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
935 remote_server $CLIENT2 || \
936 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
938 zconf_mount_clients $CLIENT1 $DIR
939 zconf_mount_clients $CLIENT2 $DIR
942 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
943 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
944 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 1
945 vbr_deactivate_client $CLIENT2
948 client_evicted $CLIENT1 || return 1
950 vbr_activate_client $CLIENT2
951 client_up $CLIENT2 || return 2
953 # First 25 files should have been replayed
954 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
955 # Third file is failed due to missed client2
956 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-3-0 && error "$tfile-3-0 exists"
957 # file from client2 should exists
958 do_node $CLIENT2 unlinkmany $DIR/$tfile-2- 1 || return 4
960 zconf_umount_clients $CLIENTS $DIR
962 run_test 5a "fail MDS, delayed recovery should fail"
965 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
967 remote_server $CLIENT2 || \
968 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
970 zconf_mount_clients $CLIENT1 $DIR
971 zconf_mount_clients $CLIENT2 $DIR
974 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
975 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
976 vbr_deactivate_client $CLIENT2
979 client_up $CLIENT1 || return 1
980 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
982 # create another set of files
983 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
985 vbr_activate_client $CLIENT2
986 client_evicted $CLIENT2 || return 4
987 # file from client2 should fail
988 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
990 # All 50 files from client 1 should have been replayed
991 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
992 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
994 zconf_umount_clients $CLIENTS $DIR
996 run_test 5b "fail MDS, normal operation, delayed recovery should fail"
999 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1001 remote_server $CLIENT2 || \
1002 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1004 zconf_mount_clients $CLIENT1 $DIR
1005 zconf_mount_clients $CLIENT2 $DIR
1007 do_node $CLIENT2 mkdir -p $DIR/$tdir
1009 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
1010 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
1011 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
1012 vbr_deactivate_client $CLIENT2
1015 # replay only 5 requests
1016 do_node $CLIENT2 "sysctl -w lustre.fail_val=5"
1017 #define OBD_FAIL_PTLRPC_REPLAY 0x50e
1018 do_node $CLIENT2 "sysctl -w lustre.fail_loc=0x2000050e"
1020 # vbr_activate_client $CLIENT2
1021 # need way to know that client stops replays
1025 client_up $CLIENT1 || return 1
1027 # All files should have been replayed
1028 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
1029 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
1030 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
1032 zconf_umount_clients $CLIENTS $DIR
1035 run_test 6a "fail MDS, delayed recovery, fail MDS"
1038 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1040 remote_server $CLIENT2 || \
1041 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1043 zconf_mount_clients $CLIENT1 $DIR
1044 zconf_mount_clients $CLIENT2 $DIR
1046 do_node $CLIENT2 mkdir -p $DIR/$tdir
1048 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
1049 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
1050 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
1051 vbr_deactivate_client $CLIENT2
1054 vbr_activate_client $CLIENT2
1055 client_up $CLIENT2 || return 4
1058 client_up $CLIENT1 || return 1
1060 # All files should have been replayed
1061 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
1062 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
1063 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
1065 zconf_umount_clients $CLIENTS $DIR
1068 run_test 7a "fail MDS, delayed recovery, fail MDS"
1071 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1073 remote_server $CLIENT2 || \
1074 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1076 zconf_mount_clients $CLIENT1 $DIR
1077 zconf_mount_clients $CLIENT2 $DIR
1079 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 1
1080 do_node $CLIENT2 rm -f $DIR/$tfile
1082 rmultiop_stop $CLIENT2 || return 2
1084 vbr_deactivate_client $CLIENT2
1086 client_up $CLIENT1 || return 3
1087 #client1 is back and will try to open orphan
1088 vbr_activate_client $CLIENT2
1089 client_up $CLIENT2 || return 4
1091 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1092 zconf_umount_clients $CLIENTS $DIR
1095 run_test 8a "orphans are kept until delayed recovery"
1098 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1100 remote_server $CLIENT2 || \
1101 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1103 zconf_mount_clients $CLIENT1 $DIR
1104 zconf_mount_clients $CLIENT2 $DIR
1106 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 1
1108 do_node $CLIENT1 rm -f $DIR/$tfile
1110 vbr_deactivate_client $CLIENT2
1112 client_up $CLIENT1 || return 2
1113 #client1 is back and will try to open orphan
1114 vbr_activate_client $CLIENT2
1115 client_up $CLIENT2 || return 3
1117 rmultiop_stop $CLIENT2 || return 1
1118 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1119 zconf_umount_clients $CLIENTS $DIR
1122 run_test 8b "open1 | unlink2 X delayed_replay1, close1"
1125 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1127 remote_server $CLIENT2 || \
1128 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1130 zconf_mount_clients $CLIENT1 $DIR
1131 zconf_mount_clients $CLIENT2 $DIR
1133 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 1
1135 do_node $CLIENT1 rm -f $DIR/$tfile
1136 rmultiop_stop $CLIENT2 || return 2
1138 vbr_deactivate_client $CLIENT2
1140 client_up $CLIENT1 || return 3
1141 #client1 is back and will try to open orphan
1142 vbr_activate_client $CLIENT2
1143 client_up $CLIENT2 || return 4
1145 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1146 zconf_umount_clients $CLIENTS $DIR
1149 run_test 8c "open1 | unlink2, close1 X delayed_replay1"
1152 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1154 remote_server $CLIENT2 || \
1155 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1157 zconf_mount_clients $CLIENT1 $DIR
1158 zconf_mount_clients $CLIENT2 $DIR
1160 rmultiop_start $CLIENT1 $DIR/$tfile O_tSc || return 1
1161 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 2
1163 do_node $CLIENT1 rm -f $DIR/$tfile
1164 rmultiop_stop $CLIENT2 || return 3
1165 rmultiop_stop $CLIENT1 || return 4
1167 vbr_deactivate_client $CLIENT2
1169 client_up $CLIENT1 || return 6
1171 #client1 is back and will try to open orphan
1172 vbr_activate_client $CLIENT2
1173 client_up $CLIENT2 || return 8
1175 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1176 zconf_umount_clients $CLIENTS $DIR
1179 run_test 8d "open1, open2 | unlink2, close1, close2 X delayed_replay1"
1182 zconf_mount $CLIENT1 $DIR
1183 zconf_mount $CLIENT2 $DIR
1185 do_node $CLIENT1 mcreate $DIR/$tfile
1186 do_node $CLIENT1 mkdir $DIR/$tfile-2
1188 # missed replay from client1 will lead to recovery by versions
1189 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1190 do_node $CLIENT2 rm $DIR/$tfile || return 1
1191 do_node $CLIENT2 touch $DIR/$tfile || return 2
1193 zconf_umount $CLIENT1 $DIR
1195 client_up $CLIENT2 || return 6
1197 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
1198 zconf_umount_clients $CLIENTS $DIR
1201 run_test 8e "create | unlink, create shouldn't fail"
1204 zconf_mount_clients $CLIENT1 $DIR
1205 zconf_mount_clients $CLIENT2 $DIR
1207 do_node $CLIENT1 touch $DIR/$tfile
1208 do_node $CLIENT1 mkdir $DIR/$tfile-2
1210 # missed replay from client1 will lead to recovery by versions
1211 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1212 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
1213 do_node $CLIENT2 mcreate $DIR/$tfile || return 2
1215 zconf_umount $CLIENT1 $DIR
1217 client_up $CLIENT2 || return 6
1219 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
1220 zconf_umount $CLIENT2 $DIR
1223 run_test 8f "create | unlink, create shouldn't fail"
1226 zconf_mount_clients $CLIENT1 $DIR
1227 zconf_mount_clients $CLIENT2 $DIR
1229 do_node $CLIENT1 touch $DIR/$tfile
1230 do_node $CLIENT1 mkdir $DIR/$tfile-2
1232 # missed replay from client1 will lead to recovery by versions
1233 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1234 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
1235 do_node $CLIENT2 mkdir $DIR/$tfile || return 2
1237 zconf_umount $CLIENT1 $DIR
1239 do_node $CLIENT2 df $DIR || return 6
1241 do_node $CLIENT2 rmdir $DIR/$tfile || error "$tfile doesn't exists"
1242 zconf_umount $CLIENT2 $DIR
1245 run_test 8g "create | unlink, create shouldn't fail"
1248 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1250 [ -z "$DBENCH_LIB" ] && skip_env "DBENCH_LIB is not set" && return 0
1252 zconf_mount_clients $CLIENTS $DIR
1254 local duration="-t 60"
1255 local cmd="rundbench 1 $duration "
1257 for CLIENT in ${CLIENTS//,/ }; do
1258 $PDSH $CLIENT "set -x; PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests/:${DBENCH_LIB} DBENCH_LIB=${DBENCH_LIB} $cmd" &
1260 echo $PID >pid.$CLIENT
1261 echo "Started load PID=`cat pid.$CLIENT`"
1265 sleep 3 # give clients a time to do operations
1267 vbr_deactivate_client $CLIENT2
1269 log "$TESTNAME fail mds 1"
1272 # wait for client to reconnect to MDS
1275 vbr_activate_client $CLIENT2
1276 client_up $CLIENT2 || return 4
1278 for CLIENT in ${CLIENTS//,/ }; do
1279 PID=`cat pid.$CLIENT`
1282 echo "load on ${CLIENT} returned $rc"
1285 zconf_umount_clients $CLIENTS $DIR
1287 run_test 10 "mds version recovery; $CLIENTCOUNT clients"
1289 [ "$CLIENTS" ] && zconf_mount_clients $CLIENTS $DIR
1291 complete $(basename $0) $SECONDS
1292 check_and_cleanup_lustre