6 ALWAYS_EXCEPT="2 $REPLAY_VBR_EXCEPT"
9 PTLDEBUG=${PTLDEBUG:--1}
10 LUSTRE=${LUSTRE:-`dirname $0`/..}
12 CLEANUP=${CLEANUP:-""}
13 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
19 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
21 [ -n "$CLIENTS" ] || { skip "Need two or more clients" && exit 0; }
22 [ $CLIENTCOUNT -ge 2 ] || \
23 { skip "Need two or more clients, have $CLIENTCOUNT" && exit 0; }
25 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
26 [ ! "$NAME" = "ncli" ] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT"
27 [ "$NAME" = "ncli" ] && MOUNT_2=""
31 check_and_setup_lustre
32 rm -rf $DIR/[df][0-9]*
34 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
41 # We need to run do_node in bg, because pdsh does not exit
42 # if child process of run script exists.
43 # I.e. pdsh does not exit when runmultiop_bg_pause exited,
44 # because of multiop_bg_pause -> $MULTIOP_PROG &
45 # By the same reason we need sleep a bit after do_nodes starts
46 # to let runmultiop_bg_pause start muliop and
47 # update /tmp/multiop_bg.pid ;
48 # The rm /tmp/multiop_bg.pid guarantees here that
49 # we have the updated by runmultiop_bg_pause
50 # /tmp/multiop_bg.pid file
52 local pid_file=$TMP/multiop_bg.pid.$$
53 do_node $client "rm -f $pid_file && MULTIOP_PID_FILE=$pid_file LUSTRE= runmultiop_bg_pause $file $cmds" &
57 multiop_pid=$(do_node $client cat $pid_file)
58 [ -n "$multiop_pid" ] || error "$client : Can not get multiop_pid from $pid_file "
59 eval export $(client_var_name $client)_multiop_pid=$multiop_pid
60 eval export $(client_var_name $client)_do_node_pid=$pid
61 local var=$(client_var_name $client)_multiop_pid
62 echo client $client multiop_bg started multiop_pid=${!var}
68 local multiop_pid=$(client_var_name $client)_multiop_pid
69 local do_node_pid=$(client_var_name $client)_do_node_pid
71 echo "Stopping multiop_pid=${!multiop_pid} (kill ${!multiop_pid} on $client)"
72 do_node $client kill -USR1 ${!multiop_pid}
78 local var=${SINGLEMDS}_svc
83 fid=$(do_node $client $LFS path2fid $file)
84 do_facet $SINGLEMDS $LCTL --device ${!var} getobjversion $fid
88 local file=$DIR/$tfile
92 do_node $CLIENT1 mcreate $file
93 pre=$(get_version $CLIENT1 $file)
94 do_node $CLIENT1 openfile -f O_RDWR $file
95 post=$(get_version $CLIENT1 $file)
96 if (($pre != $post)); then
97 error "version changed unexpectedly: pre $pre, post $post"
100 run_test 0a "open and close do not change versions"
103 local var=${SINGLEMDS}_svc
105 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
106 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
108 replay_barrier $SINGLEMDS
109 do_node $CLIENT2 chmod 777 $DIR/$tdir
110 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT $DIR/$tdir/$tfile
111 zconf_umount $CLIENT2 $MOUNT
112 facet_failover $SINGLEMDS
114 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
115 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
116 error "open succeeded unexpectedly"
118 zconf_mount $CLIENT2 $MOUNT
120 run_test 0b "open (O_CREAT) checks version of parent"
123 local var=${SINGLEMDS}_svc
125 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
126 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
127 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tdir/$tfile
129 replay_barrier $SINGLEMDS
130 do_node $CLIENT2 chmod 777 $DIR/$tdir
131 do_node $CLIENT2 chmod 666 $DIR/$tdir/$tfile
132 rmultiop_start $CLIENT1 $DIR/$tdir/$tfile o_c
133 zconf_umount $CLIENT2 $MOUNT
134 facet_failover $SINGLEMDS
136 do_node $CLIENT1 df $MOUNT || error "$CLIENT1 evicted"
137 rmultiop_stop $CLIENT1 || error "close failed"
138 zconf_mount $CLIENT2 $MOUNT
140 run_test 0c "open (non O_CREAT) does not checks versions"
146 pre=$(get_version $CLIENT1 $DIR)
147 do_node $CLIENT1 mkfifo $DIR/$tfile
148 post=$(get_version $CLIENT1 $DIR)
149 if (($pre == $post)); then
150 error "version not changed: pre $pre, post $post"
153 run_test 0d "create changes version of parent"
156 local var=${SINGLEMDS}_svc
158 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
159 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
161 replay_barrier $SINGLEMDS
162 do_node $CLIENT2 chmod 777 $DIR/$tdir
163 do_node $CLIENT1 mkfifo $DIR/$tdir/$tfile
164 zconf_umount $CLIENT2 $MOUNT
165 facet_failover $SINGLEMDS
167 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
168 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
169 error "create succeeded unexpectedly"
171 zconf_mount $CLIENT2 $MOUNT
173 run_test 0e "create checks version of parent"
179 do_node $CLIENT1 mcreate $DIR/$tfile
180 pre=$(get_version $CLIENT1 $DIR)
181 do_node $CLIENT1 rm $DIR/$tfile
182 post=$(get_version $CLIENT1 $DIR)
183 if (($pre == $post)); then
184 error "version not changed: pre $pre, post $post"
187 run_test 0f "unlink changes version of parent"
190 local var=${SINGLEMDS}_svc
192 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
193 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
194 do_node $CLIENT1 mcreate $DIR/$tdir/$tfile
196 replay_barrier $SINGLEMDS
197 do_node $CLIENT2 chmod 777 $DIR/$tdir
198 do_node $CLIENT1 rm $DIR/$tdir/$tfile
199 zconf_umount $CLIENT2 $MOUNT
200 facet_failover $SINGLEMDS
202 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
203 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
204 error "unlink succeeded unexpectedly"
206 zconf_mount $CLIENT2 $MOUNT
208 run_test 0g "unlink checks version of parent"
211 local file=$DIR/$tfile
215 do_node $CLIENT1 mcreate $file
216 pre=$(get_version $CLIENT1 $file)
217 do_node $CLIENT1 chown $RUNAS_ID $file
218 post=$(get_version $CLIENT1 $file)
219 if (($pre == $post)); then
220 error "version not changed: pre $pre, post $post"
223 run_test 0h "setattr of UID changes versions"
226 local file=$DIR/$tfile
230 do_node $CLIENT1 mcreate $file
231 pre=$(get_version $CLIENT1 $file)
232 do_node $CLIENT1 chown :$RUNAS_ID $file
233 post=$(get_version $CLIENT1 $file)
234 if (($pre == $post)); then
235 error "version not changed: pre $pre, post $post"
238 run_test 0i "setattr of GID changes versions"
241 local file=$DIR/$tfile
242 local var=${SINGLEMDS}_svc
244 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
245 do_node $CLIENT1 mcreate $file
247 replay_barrier $SINGLEMDS
248 do_node $CLIENT2 chown :$RUNAS_ID $file
249 do_node $CLIENT1 chown $RUNAS_ID $file
250 zconf_umount $CLIENT2 $MOUNT
251 facet_failover $SINGLEMDS
253 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
254 if ! do_node $CLIENT1 $CHECKSTAT -u \\\#$UID $file; then
255 error "setattr of UID succeeded unexpectedly"
257 zconf_mount $CLIENT2 $MOUNT
259 run_test 0j "setattr of UID checks versions"
262 local file=$DIR/$tfile
263 local var=${SINGLEMDS}_svc
265 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
266 do_node $CLIENT1 mcreate $file
268 replay_barrier $SINGLEMDS
269 do_node $CLIENT2 chown $RUNAS_ID $file
270 do_node $CLIENT1 chown :$RUNAS_ID $file
271 zconf_umount $CLIENT2 $MOUNT
272 facet_failover $SINGLEMDS
274 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
275 if ! do_node $CLIENT1 $CHECKSTAT -g \\\#$UID $file; then
276 error "setattr of GID succeeded unexpectedly"
278 zconf_mount $CLIENT2 $MOUNT
280 run_test 0k "setattr of GID checks versions"
283 local file=$DIR/$tfile
287 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
288 pre=$(get_version $CLIENT1 $file)
289 do_node $CLIENT1 chmod 666 $file
290 post=$(get_version $CLIENT1 $file)
291 if (($pre == $post)); then
292 error "version not changed: pre $pre, post $post"
295 run_test 0l "setattr of permission changes versions"
298 local file=$DIR/$tfile
299 local var=${SINGLEMDS}_svc
301 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
302 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
304 replay_barrier $SINGLEMDS
305 do_node $CLIENT2 chown :$RUNAS_ID $file
306 do_node $CLIENT1 chmod 666 $file
307 zconf_umount $CLIENT2 $MOUNT
308 facet_failover $SINGLEMDS
310 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
311 if ! do_node $CLIENT1 $CHECKSTAT -p 0644 $file; then
312 error "setattr of permission succeeded unexpectedly"
314 zconf_mount $CLIENT2 $MOUNT
316 run_test 0m "setattr of permission checks versions"
319 local file=$DIR/$tfile
323 do_node $CLIENT1 mcreate $file
324 pre=$(get_version $CLIENT1 $file)
325 do_node $CLIENT1 chattr +i $file
326 post=$(get_version $CLIENT1 $file)
327 do_node $CLIENT1 chattr -i $file
328 if (($pre == $post)); then
329 error "version not changed: pre $pre, post $post"
332 run_test 0n "setattr of flags changes versions"
340 if ((${#attr} != 1)); then
341 error "checking multiple attributes not implemented yet"
343 do_node $client lsattr $file | cut -d ' ' -f 1 | grep -q $attr
347 local file=$DIR/$tfile
349 local var=${SINGLEMDS}_svc
351 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
352 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
354 replay_barrier $SINGLEMDS
355 do_node $CLIENT2 chmod 666 $file
356 do_node $CLIENT1 chattr +i $file
357 zconf_umount $CLIENT2 $MOUNT
358 facet_failover $SINGLEMDS
360 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
361 checkattr $CLIENT1 i $file
363 do_node $CLIENT1 chattr -i $file
364 if [ $rc -eq 0 ]; then
365 error "setattr of flags succeeded unexpectedly"
367 zconf_mount $CLIENT2 $MOUNT
369 run_test 0o "setattr of flags checks versions"
372 local file=$DIR/$tfile
376 local var=${SINGLEMDS}_svc
378 ad_orig=$(do_facet $SINGLEMDS "$LCTL get_param mdd.${!var}.atime_diff")
379 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0"
380 do_node $CLIENT1 mcreate $file
381 pre=$(get_version $CLIENT1 $file)
382 do_node $CLIENT1 touch $file
383 post=$(get_version $CLIENT1 $file)
385 # We don't fail MDS in this test. atime_diff shall be
386 # restored to its original value.
388 do_facet $SINGLEMDS "$LCTL set_param $ad_orig"
389 if (($pre != $post)); then
390 error "version changed unexpectedly: pre $pre, post $post"
393 run_test 0p "setattr of times does not change versions"
396 local file=$DIR/$tfile
400 do_node $CLIENT1 mcreate $file
401 pre=$(get_version $CLIENT1 $file)
402 do_node $CLIENT1 truncate $file 1
403 post=$(get_version $CLIENT1 $file)
404 if (($pre != $post)); then
405 error "version changed unexpectedly: pre $pre, post $post"
408 run_test 0q "setattr of size does not change versions"
411 local file=$DIR/$tfile
415 local var=${SINGLEMDS}_svc
417 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
418 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0"
419 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
421 replay_barrier $SINGLEMDS
422 do_node $CLIENT2 chmod 666 $file
423 do_node $CLIENT1 truncate $file 1
425 mtime_pre=$(do_node $CLIENT1 stat --format=%Y $file)
426 do_node $CLIENT1 touch $file
427 mtime_post=$(do_node $CLIENT1 stat --format=%Y $file)
428 zconf_umount $CLIENT2 $MOUNT
429 facet_failover $SINGLEMDS
431 do_node $CLIENT1 df $MOUNT || error "$CLIENT1 evicted"
432 if (($mtime_pre >= $mtime_post)); then
433 error "time not changed: pre $mtime_pre, post $mtime_post"
435 if ! do_node $CLIENT1 $CHECKSTAT -s 1 $file; then
436 error "setattr of size failed"
438 mtime=$(do_node $CLIENT1 stat --format=%Y $file)
439 if (($mtime != $mtime_post)); then
440 error "setattr of times failed: expected $mtime_post, got $mtime"
442 zconf_mount $CLIENT2 $MOUNT
444 run_test 0r "setattr of times and size does not check versions"
452 do_node $CLIENT1 mcreate $DIR/$tfile
453 do_node $CLIENT1 mkdir -p $DIR/$tdir
454 pre=$(get_version $CLIENT1 $DIR/$tfile)
455 tp_pre=$(get_version $CLIENT1 $DIR/$tdir)
456 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
457 post=$(get_version $CLIENT1 $DIR/$tfile)
458 tp_post=$(get_version $CLIENT1 $DIR/$tdir)
459 if (($pre == $post)); then
460 error "version of source not changed: pre $pre, post $post"
462 if (($tp_pre == $tp_post)); then
463 error "version of target parent not changed: pre $tp_pre, post $tp_post"
466 run_test 0s "link changes versions of source and target parent"
469 local var=${SINGLEMDS}_svc
471 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
472 do_node $CLIENT1 mcreate $DIR/$tfile
473 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
475 replay_barrier $SINGLEMDS
476 do_node $CLIENT2 chmod 777 $DIR/$tdir
477 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
478 zconf_umount $CLIENT2 $MOUNT
479 facet_failover $SINGLEMDS
481 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
482 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
483 error "link should fail"
485 zconf_mount $CLIENT2 $MOUNT
487 run_test 0t "link checks version of target parent"
490 local var=${SINGLEMDS}_svc
492 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
493 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile
494 do_node $CLIENT1 mkdir -p $DIR/$tdir
496 replay_barrier $SINGLEMDS
497 do_node $CLIENT2 chmod 666 $DIR/$tfile
498 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
499 zconf_umount $CLIENT2 $MOUNT
500 facet_failover $SINGLEMDS
502 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
503 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
504 error "link should fail"
506 zconf_mount $CLIENT2 $MOUNT
508 run_test 0u "link checks version of source"
516 do_node $CLIENT1 mcreate $DIR/$tfile
517 do_node $CLIENT1 mkdir -p $DIR/$tdir
518 sp_pre=$(get_version $CLIENT1 $DIR)
519 tp_pre=$(get_version $CLIENT1 $DIR/$tdir)
520 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
521 sp_post=$(get_version $CLIENT1 $DIR)
522 tp_post=$(get_version $CLIENT1 $DIR/$tdir)
523 if (($sp_pre == $sp_post)); then
524 error "version of source parent not changed: pre $sp_pre, post $sp_post"
526 if (($tp_pre == $tp_post)); then
527 error "version of target parent not changed: pre $tp_pre, post $tp_post"
530 run_test 0v "rename changes versions of source parent and target parent"
536 do_node $CLIENT1 mcreate $DIR/$tfile
537 pre=$(get_version $CLIENT1 $DIR)
538 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tfile-new
539 post=$(get_version $CLIENT1 $DIR)
540 if (($pre == $post)); then
541 error "version of parent not changed: pre $pre, post $post"
544 run_test 0w "rename within same dir changes version of parent"
547 local var=${SINGLEMDS}_svc
549 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
550 do_node $CLIENT1 mcreate $DIR/$tfile
551 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
553 replay_barrier $SINGLEMDS
554 do_node $CLIENT2 chmod 777 $DIR
555 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
556 zconf_umount $CLIENT2 $MOUNT
557 facet_failover $SINGLEMDS
559 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
560 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
561 error "rename should fail"
563 zconf_mount $CLIENT2 $MOUNT
565 run_test 0x "rename checks version of source parent"
568 local var=${SINGLEMDS}_svc
570 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
571 do_node $CLIENT1 mcreate $DIR/$tfile
572 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
574 replay_barrier $SINGLEMDS
575 do_node $CLIENT2 chmod 777 $DIR/$tdir
576 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
577 zconf_umount $CLIENT2 $MOUNT
578 facet_failover $SINGLEMDS
580 do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
581 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
582 error "rename should fail"
584 zconf_mount $CLIENT2 $MOUNT
586 run_test 0y "rename checks version of target parent"
588 [ "$CLIENTS" ] && zconf_umount_clients $CLIENTS $DIR
591 echo "mount client $CLIENT1,$CLIENT2..."
592 zconf_mount_clients $CLIENT1 $DIR
593 zconf_mount_clients $CLIENT2 $DIR
595 do_node $CLIENT2 mkdir -p $DIR/$tdir
596 replay_barrier $SINGLEMDS
597 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
598 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 1
599 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
600 zconf_umount $CLIENT2 $DIR
602 facet_failover $SINGLEMDS
603 # recovery shouldn't fail due to missing client 2
604 do_node $CLIENT1 df $DIR || return 1
606 # All 50 files should have been replayed
607 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
608 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
610 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
611 [ -e $DIR/$tdir/$tfile-2-0 ] && error "$tfile-2-0 exists"
613 zconf_umount_clients $CLIENTS $DIR
616 run_test 1a "client during replay doesn't affect another one"
619 zconf_mount_clients $CLIENT1 $DIR
620 zconf_mount_clients $CLIENT2 $DIR
622 do_node $CLIENT2 mkdir -p $DIR/$tdir
623 replay_barrier $SINGLEMDS
624 do_node $CLIENT2 mcreate $DIR/$tdir/$tfile
625 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
626 #client1 read data from client2 which will be lost
627 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile
628 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
629 zconf_umount $CLIENT2 $DIR
631 facet_failover $SINGLEMDS
632 # recovery shouldn't fail due to missing client 2
633 do_node $CLIENT1 df $DIR || return 1
635 # All 50 files should have been replayed
636 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
637 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
638 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile && return 4
640 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
642 zconf_umount_clients $CLIENTS $DIR
645 run_test 2a "lost data due to missed REMOTE client during replay"
648 # This test uses three Lustre clients on two hosts.
650 # Lustre Client 1: $CLIENT1:$MOUNT ($DIR)
651 # Lustre Client 2: $CLIENT2:$MOUNT2 ($DIR2)
652 # Lustre Client 3: $CLIENT2:$MOUNT1 ($DIR1)
657 local var=${SINGLEMDS}_svc
659 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
660 zconf_mount $CLIENT1 $MOUNT
661 zconf_mount $CLIENT2 $MOUNT2
662 zconf_mount $CLIENT2 $MOUNT1
663 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile-a
664 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile-b
667 # Save an MDT transaction number before recovery.
669 pre=$(get_version $CLIENT1 $DIR/$tfile-a)
672 # Comments on the replay sequence state the expected result
676 # "U" Unable to replay.
679 replay_barrier $SINGLEMDS
680 do_node $CLIENT1 chmod 666 $DIR/$tfile-a # R
681 do_node $CLIENT2 chmod 666 $DIR1/$tfile-b # R
682 do_node $CLIENT2 chown :$RUNAS_ID $DIR2/$tfile-a # U
683 do_node $CLIENT1 chown $RUNAS_ID $DIR/$tfile-a # J
684 do_node $CLIENT2 truncate $DIR2/$tfile-b 1 # U
685 do_node $CLIENT2 chown :$RUNAS_ID $DIR1/$tfile-b # R
686 do_node $CLIENT1 chown $RUNAS_ID $DIR/$tfile-b # R
687 zconf_umount $CLIENT2 $MOUNT2
688 facet_failover $SINGLEMDS
690 do_node $CLIENT1 df $MOUNT && error "$CLIENT1:$MOUNT not evicted"
691 do_node $CLIENT2 df $MOUNT1 || error "$CLIENT2:$MOUNT1 evicted"
694 # Check the MDT epoch. $post must be the first transaction
695 # number assigned after recovery.
697 do_node $CLIENT2 touch $DIR1/$tfile
698 post=$(get_version $CLIENT2 $DIR1/$tfile)
699 if (($(($pre >> 32)) == $((post >> 32)))); then
700 error "epoch not changed: pre $pre, post $post"
702 if (($(($post & 0x00000000ffffffff)) != 1)); then
703 error "transno should restart from one: got $post"
706 do_node $CLIENT2 stat $DIR1/$tfile-a
707 do_node $CLIENT2 stat $DIR1/$tfile-b
709 do_node $CLIENT2 $CHECKSTAT -p 0666 -u \\\#$UID -g \\\#$UID \
710 $DIR1/$tfile-a || error "$DIR/$tfile-a: unexpected state"
711 do_node $CLIENT2 $CHECKSTAT -p 0666 -u \\\#$RUNAS_ID -g \\\#$RUNAS_ID \
712 $DIR1/$tfile-b || error "$DIR/$tfile-b: unexpected state"
714 zconf_umount $CLIENT2 $MOUNT1
715 zconf_umount $CLIENT1 $MOUNT
717 run_test 2b "3 clients: some, none, and all reqs replayed"
720 zconf_mount_clients $CLIENT1 $DIR
721 zconf_mount_clients $CLIENT2 $DIR
723 #make sure the time will change
724 local var=${SINGLEMDS}_svc
725 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0" || return
726 do_node $CLIENT1 touch $DIR/$tfile
727 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
729 replay_barrier $SINGLEMDS
731 do_node $CLIENT2 touch $DIR/$tfile
732 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
734 do_node $CLIENT1 touch $DIR/$tfile
736 do_node $CLIENT1 rm $DIR/$tfile
737 zconf_umount $CLIENT2 $DIR
739 facet_failover $SINGLEMDS
740 # recovery shouldn't fail due to missing client 2
741 do_node $CLIENT1 df $DIR || return 1
742 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile && return 2
744 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
746 zconf_umount_clients $CLIENTS $DIR
750 run_test 3a "setattr of time/size doesn't change version"
753 zconf_mount_clients $CLIENT1 $DIR
754 zconf_mount_clients $CLIENT2 $DIR
756 #make sure the time will change
757 local var=${SINGLEMDS}_svc
758 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0" || return
760 do_node $CLIENT1 touch $DIR/$tfile
761 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
763 replay_barrier $SINGLEMDS
765 do_node $CLIENT2 chmod +x $DIR/$tfile
766 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
768 do_node $CLIENT1 chmod -x $DIR/$tfile
769 zconf_umount $CLIENT2 $DIR
771 facet_failover $SINGLEMDS
772 # recovery should fail due to missing client 2
773 do_node $CLIENT1 df $DIR && return 1
775 do_node $CLIENT1 $CHECKSTAT -p 0755 $DIR/$tfile && return 2
776 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
778 zconf_umount_clients $CLIENTS $DIR
782 run_test 3b "setattr of permissions changes version"
784 vbr_deactivate_client() {
786 echo "Deactivating client $client";
787 do_node $client "sysctl -w lustre.fail_loc=0x50d"
790 vbr_activate_client() {
792 echo "Activating client $client";
793 do_node $client "sysctl -w lustre.fail_loc=0x0"
799 [ -z "$(do_node $client lctl dl | grep mdt)" ] && \
800 [ -z "$(do_node $client lctl dl | grep ost)" ]
804 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
806 remote_server $CLIENT2 || \
807 { skip "Client $CLIENT2 is on the server node" && return 0; }
809 zconf_mount_clients $CLIENT1 $DIR
810 zconf_mount_clients $CLIENT2 $DIR
812 do_node $CLIENT2 mkdir -p $DIR/$tdir
813 replay_barrier $SINGLEMDS
814 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
815 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
816 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
817 vbr_deactivate_client $CLIENT2
819 facet_failover $SINGLEMDS
820 do_node $CLIENT1 df $DIR || return 1
822 # All 50 files should have been replayed
823 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
824 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
826 vbr_activate_client $CLIENT2
827 do_node $CLIENT2 df $DIR || return 4
828 # All 25 files from client2 should have been replayed
829 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
831 zconf_umount_clients $CLIENTS $DIR
834 run_test 4a "fail MDS, delayed recovery"
837 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
839 remote_server $CLIENT2 || \
840 { skip "Client $CLIENT2 is on the server node" && return 0; }
842 zconf_mount_clients $CLIENT1 $DIR
843 zconf_mount_clients $CLIENT2 $DIR
845 replay_barrier $SINGLEMDS
846 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
847 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
848 vbr_deactivate_client $CLIENT2
850 facet_failover $SINGLEMDS
851 do_node $CLIENT1 df $DIR || return 1
853 # create another set of files
854 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
856 vbr_activate_client $CLIENT2
857 do_node $CLIENT2 df $DIR || return 2
859 # All files from should have been replayed
860 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
861 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
862 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
864 zconf_umount_clients $CLIENTS $DIR
866 run_test 4b "fail MDS, normal operation, delayed open recovery"
869 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
871 remote_server $CLIENT2 || \
872 { skip "Client $CLIENT2 is on the server node" && return 0; }
874 zconf_mount_clients $CLIENT1 $DIR
875 zconf_mount_clients $CLIENT2 $DIR
877 replay_barrier $SINGLEMDS
878 do_node $CLIENT1 createmany -m $DIR/$tfile- 25
879 do_node $CLIENT2 createmany -m $DIR/$tdir/$tfile-2- 25
880 vbr_deactivate_client $CLIENT2
882 facet_failover $SINGLEMDS
883 do_node $CLIENT1 df $DIR || return 1
885 # create another set of files
886 do_node $CLIENT1 createmany -m $DIR/$tfile-3- 25
888 vbr_activate_client $CLIENT2
889 do_node $CLIENT2 df $DIR || return 2
891 # All files from should have been replayed
892 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
893 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
894 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
896 zconf_umount_clients $CLIENTS $DIR
898 run_test 4c "fail MDS, normal operation, delayed recovery"
901 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
903 remote_server $CLIENT2 || \
904 { skip "Client $CLIENT2 is on the server node" && return 0; }
906 zconf_mount_clients $CLIENT1 $DIR
907 zconf_mount_clients $CLIENT2 $DIR
909 replay_barrier $SINGLEMDS
910 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
911 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
912 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 1
913 vbr_deactivate_client $CLIENT2
915 facet_failover $SINGLEMDS
916 do_node $CLIENT1 df $DIR && return 1
918 vbr_activate_client $CLIENT2
919 do_node $CLIENT2 df $DIR || return 2
921 # First 25 files should have been replayed
922 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
923 # Third file is failed due to missed client2
924 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-3-0 && error "$tfile-3-0 exists"
925 # file from client2 should exists
926 do_node $CLIENT2 unlinkmany $DIR/$tfile-2- 1 || return 4
928 zconf_umount_clients $CLIENTS $DIR
930 run_test 5a "fail MDS, delayed recovery should fail"
933 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
935 remote_server $CLIENT2 || \
936 { skip "Client $CLIENT2 is on the server node" && return 0; }
938 zconf_mount_clients $CLIENT1 $DIR
939 zconf_mount_clients $CLIENT2 $DIR
941 replay_barrier $SINGLEMDS
942 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
943 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
944 vbr_deactivate_client $CLIENT2
946 facet_failover $SINGLEMDS
947 do_node $CLIENT1 df $DIR || return 1
948 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
950 # create another set of files
951 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
953 vbr_activate_client $CLIENT2
954 do_node $CLIENT2 df $DIR && return 4
955 # file from client2 should fail
956 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
958 # All 50 files from client 1 should have been replayed
959 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
960 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
962 zconf_umount_clients $CLIENTS $DIR
964 run_test 5b "fail MDS, normal operation, delayed recovery should fail"
967 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
969 remote_server $CLIENT2 || \
970 { skip "Client $CLIENT2 is on the server node" && return 0; }
972 zconf_mount_clients $CLIENT1 $DIR
973 zconf_mount_clients $CLIENT2 $DIR
975 do_node $CLIENT2 mkdir -p $DIR/$tdir
976 replay_barrier $SINGLEMDS
977 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
978 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
979 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
980 vbr_deactivate_client $CLIENT2
982 facet_failover $SINGLEMDS
983 # replay only 5 requests
984 do_node $CLIENT2 "sysctl -w lustre.fail_val=5"
985 #define OBD_FAIL_PTLRPC_REPLAY 0x50e
986 do_node $CLIENT2 "sysctl -w lustre.fail_loc=0x2000050e"
987 do_node $CLIENT2 df $DIR
988 # vbr_activate_client $CLIENT2
989 # need way to know that client stops replays
992 facet_failover $SINGLEMDS
993 do_node $CLIENT1 df $DIR || return 1
995 # All files should have been replayed
996 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
997 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
998 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
1000 zconf_umount_clients $CLIENTS $DIR
1003 run_test 6a "fail MDS, delayed recovery, fail MDS"
1006 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1008 remote_server $CLIENT2 || \
1009 { skip "Client $CLIENT2 is on the server node" && return 0; }
1011 zconf_mount_clients $CLIENT1 $DIR
1012 zconf_mount_clients $CLIENT2 $DIR
1014 do_node $CLIENT2 mkdir -p $DIR/$tdir
1015 replay_barrier $SINGLEMDS
1016 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
1017 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
1018 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
1019 vbr_deactivate_client $CLIENT2
1021 facet_failover $SINGLEMDS
1022 vbr_activate_client $CLIENT2
1023 do_node $CLIENT2 df $DIR || return 4
1025 facet_failover $SINGLEMDS
1026 do_node $CLIENT1 df $DIR || return 1
1028 # All files should have been replayed
1029 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
1030 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
1031 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
1033 zconf_umount_clients $CLIENTS $DIR
1036 run_test 7a "fail MDS, delayed recovery, fail MDS"
1039 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1041 remote_server $CLIENT2 || \
1042 { skip "Client $CLIENT2 is on the server node" && return 0; }
1044 zconf_mount_clients $CLIENT1 $DIR
1045 zconf_mount_clients $CLIENT2 $DIR
1047 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 1
1048 do_node $CLIENT2 rm -f $DIR/$tfile
1049 replay_barrier $SINGLEMDS
1050 rmultiop_stop $CLIENT2 || return 2
1052 vbr_deactivate_client $CLIENT2
1053 facet_failover $SINGLEMDS
1054 do_node $CLIENT1 df $DIR || return 3
1055 #client1 is back and will try to open orphan
1056 vbr_activate_client $CLIENT2
1057 do_node $CLIENT2 df $DIR || return 4
1059 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1060 zconf_umount_clients $CLIENTS $DIR
1063 run_test 8a "orphans are kept until delayed recovery"
1066 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1068 remote_server $CLIENT2 || \
1069 { skip "Client $CLIENT2 is on the server node" && return 0; }
1071 zconf_mount_clients $CLIENT1 $DIR
1072 zconf_mount_clients $CLIENT2 $DIR
1074 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc|| return 1
1075 replay_barrier $SINGLEMDS
1076 do_node $CLIENT1 rm -f $DIR/$tfile
1078 vbr_deactivate_client $CLIENT2
1079 facet_failover $SINGLEMDS
1080 do_node $CLIENT1 df $DIR || return 2
1081 #client1 is back and will try to open orphan
1082 vbr_activate_client $CLIENT2
1083 do_node $CLIENT2 df $DIR || return 3
1085 rmultiop_stop $CLIENT2 || return 1
1086 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1087 zconf_umount_clients $CLIENTS $DIR
1090 run_test 8b "open1 | unlink2 X delayed_replay1, close1"
1093 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1095 remote_server $CLIENT2 || \
1096 { skip "Client $CLIENT2 is on the server node" && return 0; }
1098 zconf_mount_clients $CLIENT1 $DIR
1099 zconf_mount_clients $CLIENT2 $DIR
1101 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc|| return 1
1102 replay_barrier $SINGLEMDS
1103 do_node $CLIENT1 rm -f $DIR/$tfile
1104 rmultiop_stop $CLIENT2 || return 2
1106 vbr_deactivate_client $CLIENT2
1107 facet_failover $SINGLEMDS
1108 do_node $CLIENT1 df $DIR || return 3
1109 #client1 is back and will try to open orphan
1110 vbr_activate_client $CLIENT2
1111 do_node $CLIENT2 df $DIR || return 4
1113 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1114 zconf_umount_clients $CLIENTS $DIR
1117 run_test 8c "open1 | unlink2, close1 X delayed_replay1"
1120 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1122 remote_server $CLIENT2 || \
1123 { skip "Client $CLIENT2 is on the server node" && return 0; }
1125 zconf_mount_clients $CLIENT1 $DIR
1126 zconf_mount_clients $CLIENT2 $DIR
1128 rmultiop_start $CLIENT1 $DIR/$tfile O_tSc|| return 1
1129 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc|| return 2
1130 replay_barrier $SINGLEMDS
1131 do_node $CLIENT1 rm -f $DIR/$tfile
1132 rmultiop_stop $CLIENT2 || return 3
1133 rmultiop_stop $CLIENT1 || return 4
1135 vbr_deactivate_client $CLIENT2
1136 facet_failover $SINGLEMDS
1137 do_node $CLIENT1 df $DIR || return 6
1139 #client1 is back and will try to open orphan
1140 vbr_activate_client $CLIENT2
1141 do_node $CLIENT2 df $DIR || return 8
1143 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1144 zconf_umount_clients $CLIENTS $DIR
1147 run_test 8d "open1, open2 | unlink2, close1, close2 X delayed_replay1"
1150 zconf_mount $CLIENT1 $DIR
1151 zconf_mount $CLIENT2 $DIR
1153 do_node $CLIENT1 mcreate $DIR/$tfile
1154 do_node $CLIENT1 mkdir $DIR/$tfile-2
1155 replay_barrier $SINGLEMDS
1156 # missed replay from client1 will lead to recovery by versions
1157 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1158 do_node $CLIENT2 rm $DIR/$tfile || return 1
1159 do_node $CLIENT2 touch $DIR/$tfile || return 2
1161 zconf_umount $CLIENT1 $DIR
1162 facet_failover $SINGLEMDS
1163 do_node $CLIENT2 df $DIR || return 6
1165 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
1166 zconf_umount_clients $CLIENTS $DIR
1169 run_test 8e "create | unlink, create shouldn't fail"
1172 zconf_mount_clients $CLIENT1 $DIR
1173 zconf_mount_clients $CLIENT2 $DIR
1175 do_node $CLIENT1 touch $DIR/$tfile
1176 do_node $CLIENT1 mkdir $DIR/$tfile-2
1177 replay_barrier $SINGLEMDS
1178 # missed replay from client1 will lead to recovery by versions
1179 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1180 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
1181 do_node $CLIENT2 mcreate $DIR/$tfile || return 2
1183 zconf_umount $CLIENT1 $DIR
1184 facet_failover $SINGLEMDS
1185 do_node $CLIENT2 df $DIR || return 6
1187 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
1188 zconf_umount $CLIENT2 $DIR
1191 run_test 8f "create | unlink, create shouldn't fail"
1194 zconf_mount_clients $CLIENT1 $DIR
1195 zconf_mount_clients $CLIENT2 $DIR
1197 do_node $CLIENT1 touch $DIR/$tfile
1198 do_node $CLIENT1 mkdir $DIR/$tfile-2
1199 replay_barrier $SINGLEMDS
1200 # missed replay from client1 will lead to recovery by versions
1201 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1202 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
1203 do_node $CLIENT2 mkdir $DIR/$tfile || return 2
1205 zconf_umount $CLIENT1 $DIR
1206 facet_failover $SINGLEMDS
1207 do_node $CLIENT2 df $DIR || return 6
1209 do_node $CLIENT2 rmdir $DIR/$tfile || error "$tfile doesn't exists"
1210 zconf_umount $CLIENT2 $DIR
1213 run_test 8g "create | unlink, create shouldn't fail"
1216 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1218 [ -z "$DBENCH_LIB" ] && skip "DBENCH_LIB is not set" && return 0
1220 zconf_mount_clients $CLIENTS $DIR
1222 local duration="-t 60"
1223 local cmd="rundbench 1 $duration "
1225 for CLIENT in ${CLIENTS//,/ }; do
1226 $PDSH $CLIENT "set -x; PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests/:${DBENCH_LIB} DBENCH_LIB=${DBENCH_LIB} $cmd" &
1228 echo $PID >pid.$CLIENT
1229 echo "Started load PID=`cat pid.$CLIENT`"
1232 replay_barrier $SINGLEMDS
1233 sleep 3 # give clients a time to do operations
1235 vbr_deactivate_client $CLIENT2
1237 log "$TESTNAME fail $SINGLEMDS 1"
1240 # wait for client to reconnect to MDS
1243 vbr_activate_client $CLIENT2
1244 do_node $CLIENT2 df $DIR || return 4
1246 for CLIENT in ${CLIENTS//,/ }; do
1247 PID=`cat pid.$CLIENT`
1250 echo "load on ${CLIENT} returned $rc"
1253 zconf_umount_clients $CLIENTS $DIR
1255 run_test 10 "mds version recovery; $CLIENTCOUNT clients"
1257 [ "$CLIENTS" ] && zconf_mount_clients $CLIENTS $DIR
1259 equals_msg `basename $0`: test complete, cleaning up
1260 check_and_cleanup_lustre
1261 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true