6 ALWAYS_EXCEPT="2 $REPLAY_VBR_EXCEPT"
9 PTLDEBUG=${PTLDEBUG:--1}
10 LUSTRE=${LUSTRE:-`dirname $0`/..}
12 CLEANUP=${CLEANUP:-""}
13 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
22 [ -n "$CLIENTS" ] || { skip_env "Need two or more clients" && exit 0; }
23 [ $CLIENTCOUNT -ge 2 ] || \
24 { skip_env "Need two or more clients, have $CLIENTCOUNT" && exit 0; }
26 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
27 [ ! "$NAME" = "ncli" ] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT"
28 [ "$NAME" = "ncli" ] && MOUNT_2=""
32 check_and_setup_lustre
33 rm -rf $DIR/[df][0-9]*
35 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
42 # We need to run do_node in bg, because pdsh does not exit
43 # if child process of run script exists.
44 # I.e. pdsh does not exit when runmultiop_bg_pause exited,
45 # because of multiop_bg_pause -> $MULTIOP_PROG &
46 # By the same reason we need sleep a bit after do_nodes starts
47 # to let runmultiop_bg_pause start muliop and
48 # update /tmp/multiop_bg.pid ;
49 # The rm /tmp/multiop_bg.pid guarantees here that
50 # we have the updated by runmultiop_bg_pause
51 # /tmp/multiop_bg.pid file
53 local pid_file=$TMP/multiop_bg.pid.$$
54 do_node $client "rm -f $pid_file && MULTIOP_PID_FILE=$pid_file LUSTRE= runmultiop_bg_pause $file $cmds" &
58 multiop_pid=$(do_node $client cat $pid_file)
59 [ -n "$multiop_pid" ] || error "$client : Can not get multiop_pid from $pid_file "
60 eval export $(client_var_name $client)_multiop_pid=$multiop_pid
61 eval export $(client_var_name $client)_do_node_pid=$pid
62 local var=$(client_var_name $client)_multiop_pid
63 echo client $client multiop_bg started multiop_pid=${!var}
69 local multiop_pid=$(client_var_name $client)_multiop_pid
70 local do_node_pid=$(client_var_name $client)_do_node_pid
72 echo "Stopping multiop_pid=${!multiop_pid} (kill ${!multiop_pid} on $client)"
73 do_node $client kill -USR1 ${!multiop_pid}
79 local var=${SINGLEMDS}_svc
84 fid=$(do_node $client $LFS path2fid $file)
85 do_facet $SINGLEMDS $LCTL --device ${!var} getobjversion $fid
89 local file=$DIR/$tfile
93 do_node $CLIENT1 mcreate $file
94 pre=$(get_version $CLIENT1 $file)
95 do_node $CLIENT1 openfile -f O_RDWR $file
96 post=$(get_version $CLIENT1 $file)
97 if (($pre != $post)); then
98 error "version changed unexpectedly: pre $pre, post $post"
101 run_test 0a "open and close do not change versions"
104 local var=${SINGLEMDS}_svc
106 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
107 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
109 replay_barrier $SINGLEMDS
110 do_node $CLIENT2 chmod 777 $DIR/$tdir
111 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT $DIR/$tdir/$tfile
112 zconf_umount $CLIENT2 $MOUNT
113 facet_failover $SINGLEMDS
115 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
116 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
117 error "open succeeded unexpectedly"
119 zconf_mount $CLIENT2 $MOUNT
121 run_test 0b "open (O_CREAT) checks version of parent"
124 local var=${SINGLEMDS}_svc
126 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
127 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
128 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tdir/$tfile
130 replay_barrier $SINGLEMDS
131 do_node $CLIENT2 chmod 777 $DIR/$tdir
132 do_node $CLIENT2 chmod 666 $DIR/$tdir/$tfile
133 rmultiop_start $CLIENT1 $DIR/$tdir/$tfile o_c
134 zconf_umount $CLIENT2 $MOUNT
135 facet_failover $SINGLEMDS
136 client_up $CLIENT1 || error "$CLIENT1 evicted"
138 rmultiop_stop $CLIENT1 || error "close failed"
139 zconf_mount $CLIENT2 $MOUNT
141 run_test 0c "open (non O_CREAT) does not checks versions"
147 pre=$(get_version $CLIENT1 $DIR)
148 do_node $CLIENT1 mkfifo $DIR/$tfile
149 post=$(get_version $CLIENT1 $DIR)
150 if (($pre == $post)); then
151 error "version not changed: pre $pre, post $post"
154 run_test 0d "create changes version of parent"
157 local var=${SINGLEMDS}_svc
159 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
160 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
162 replay_barrier $SINGLEMDS
163 do_node $CLIENT2 chmod 777 $DIR/$tdir
164 do_node $CLIENT1 mkfifo $DIR/$tdir/$tfile
165 zconf_umount $CLIENT2 $MOUNT
166 facet_failover $SINGLEMDS
168 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
169 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
170 error "create succeeded unexpectedly"
172 zconf_mount $CLIENT2 $MOUNT
174 run_test 0e "create checks version of parent"
180 do_node $CLIENT1 mcreate $DIR/$tfile
181 pre=$(get_version $CLIENT1 $DIR)
182 do_node $CLIENT1 rm $DIR/$tfile
183 post=$(get_version $CLIENT1 $DIR)
184 if (($pre == $post)); then
185 error "version not changed: pre $pre, post $post"
188 run_test 0f "unlink changes version of parent"
191 local var=${SINGLEMDS}_svc
193 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
194 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
195 do_node $CLIENT1 mcreate $DIR/$tdir/$tfile
197 replay_barrier $SINGLEMDS
198 do_node $CLIENT2 chmod 777 $DIR/$tdir
199 do_node $CLIENT1 rm $DIR/$tdir/$tfile
200 zconf_umount $CLIENT2 $MOUNT
201 facet_failover $SINGLEMDS
203 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
204 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
205 error "unlink succeeded unexpectedly"
207 zconf_mount $CLIENT2 $MOUNT
209 run_test 0g "unlink checks version of parent"
212 local file=$DIR/$tfile
216 do_node $CLIENT1 mcreate $file
217 pre=$(get_version $CLIENT1 $file)
218 do_node $CLIENT1 chown $RUNAS_ID $file
219 post=$(get_version $CLIENT1 $file)
220 if (($pre == $post)); then
221 error "version not changed: pre $pre, post $post"
224 run_test 0h "setattr of UID changes versions"
227 local file=$DIR/$tfile
231 do_node $CLIENT1 mcreate $file
232 pre=$(get_version $CLIENT1 $file)
233 do_node $CLIENT1 chown :$RUNAS_ID $file
234 post=$(get_version $CLIENT1 $file)
235 if (($pre == $post)); then
236 error "version not changed: pre $pre, post $post"
239 run_test 0i "setattr of GID changes versions"
242 local file=$DIR/$tfile
243 local var=${SINGLEMDS}_svc
245 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
246 do_node $CLIENT1 mcreate $file
248 replay_barrier $SINGLEMDS
249 do_node $CLIENT2 chown :$RUNAS_ID $file
250 do_node $CLIENT1 chown $RUNAS_ID $file
251 zconf_umount $CLIENT2 $MOUNT
252 facet_failover $SINGLEMDS
254 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
255 if ! do_node $CLIENT1 $CHECKSTAT -u \\\#$UID $file; then
256 error "setattr of UID succeeded unexpectedly"
258 zconf_mount $CLIENT2 $MOUNT
260 run_test 0j "setattr of UID checks versions"
263 local file=$DIR/$tfile
264 local var=${SINGLEMDS}_svc
266 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
267 do_node $CLIENT1 mcreate $file
269 replay_barrier $SINGLEMDS
270 do_node $CLIENT2 chown $RUNAS_ID $file
271 do_node $CLIENT1 chown :$RUNAS_ID $file
272 zconf_umount $CLIENT2 $MOUNT
273 facet_failover $SINGLEMDS
275 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
276 if ! do_node $CLIENT1 $CHECKSTAT -g \\\#$UID $file; then
277 error "setattr of GID succeeded unexpectedly"
279 zconf_mount $CLIENT2 $MOUNT
281 run_test 0k "setattr of GID checks versions"
284 local file=$DIR/$tfile
288 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
289 pre=$(get_version $CLIENT1 $file)
290 do_node $CLIENT1 chmod 666 $file
291 post=$(get_version $CLIENT1 $file)
292 if (($pre == $post)); then
293 error "version not changed: pre $pre, post $post"
296 run_test 0l "setattr of permission changes versions"
299 local file=$DIR/$tfile
300 local var=${SINGLEMDS}_svc
302 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
303 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
305 replay_barrier $SINGLEMDS
306 do_node $CLIENT2 chown :$RUNAS_ID $file
307 do_node $CLIENT1 chmod 666 $file
308 zconf_umount $CLIENT2 $MOUNT
309 facet_failover $SINGLEMDS
311 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
312 if ! do_node $CLIENT1 $CHECKSTAT -p 0644 $file; then
313 error "setattr of permission succeeded unexpectedly"
315 zconf_mount $CLIENT2 $MOUNT
317 run_test 0m "setattr of permission checks versions"
320 local file=$DIR/$tfile
324 do_node $CLIENT1 mcreate $file
325 pre=$(get_version $CLIENT1 $file)
326 do_node $CLIENT1 chattr +i $file
327 post=$(get_version $CLIENT1 $file)
328 do_node $CLIENT1 chattr -i $file
329 if (($pre == $post)); then
330 error "version not changed: pre $pre, post $post"
333 run_test 0n "setattr of flags changes versions"
341 if ((${#attr} != 1)); then
342 error "checking multiple attributes not implemented yet"
344 do_node $client lsattr $file | cut -d ' ' -f 1 | grep -q $attr
348 local file=$DIR/$tfile
350 local var=${SINGLEMDS}_svc
352 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
353 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
355 replay_barrier $SINGLEMDS
356 do_node $CLIENT2 chmod 666 $file
357 do_node $CLIENT1 chattr +i $file
358 zconf_umount $CLIENT2 $MOUNT
359 facet_failover $SINGLEMDS
361 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
362 checkattr $CLIENT1 i $file
364 do_node $CLIENT1 chattr -i $file
365 if [ $rc -eq 0 ]; then
366 error "setattr of flags succeeded unexpectedly"
368 zconf_mount $CLIENT2 $MOUNT
370 run_test 0o "setattr of flags checks versions"
373 local file=$DIR/$tfile
377 local var=${SINGLEMDS}_svc
379 ad_orig=$(do_facet $SINGLEMDS "$LCTL get_param mdd.${!var}.atime_diff")
380 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0"
381 do_node $CLIENT1 mcreate $file
382 pre=$(get_version $CLIENT1 $file)
383 do_node $CLIENT1 touch $file
384 post=$(get_version $CLIENT1 $file)
386 # We don't fail MDS in this test. atime_diff shall be
387 # restored to its original value.
389 do_facet $SINGLEMDS "$LCTL set_param $ad_orig"
390 if (($pre != $post)); then
391 error "version changed unexpectedly: pre $pre, post $post"
394 run_test 0p "setattr of times does not change versions"
397 local file=$DIR/$tfile
401 do_node $CLIENT1 mcreate $file
402 pre=$(get_version $CLIENT1 $file)
403 do_node $CLIENT1 truncate $file 1
404 post=$(get_version $CLIENT1 $file)
405 if (($pre != $post)); then
406 error "version changed unexpectedly: pre $pre, post $post"
409 run_test 0q "setattr of size does not change versions"
412 local file=$DIR/$tfile
416 local var=${SINGLEMDS}_svc
418 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
419 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0"
420 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
422 replay_barrier $SINGLEMDS
423 do_node $CLIENT2 chmod 666 $file
424 do_node $CLIENT1 truncate $file 1
426 mtime_pre=$(do_node $CLIENT1 stat --format=%Y $file)
427 do_node $CLIENT1 touch $file
428 sleep 1 # avoid stat caching
429 mtime_post=$(do_node $CLIENT1 stat --format=%Y $file)
430 zconf_umount $CLIENT2 $MOUNT
431 facet_failover $SINGLEMDS
433 client_up $CLIENT1 || error "$CLIENT1 evicted"
434 if (($mtime_pre >= $mtime_post)); then
435 error "time not changed: pre $mtime_pre, post $mtime_post"
437 if ! do_node $CLIENT1 $CHECKSTAT -s 1 $file; then
438 error "setattr of size failed"
440 mtime=$(do_node $CLIENT1 stat --format=%Y $file)
441 if (($mtime != $mtime_post)); then
442 error "setattr of times failed: expected $mtime_post, got $mtime"
444 zconf_mount $CLIENT2 $MOUNT
446 run_test 0r "setattr of times and size does not check versions"
454 do_node $CLIENT1 mcreate $DIR/$tfile
455 do_node $CLIENT1 mkdir -p $DIR/$tdir
456 pre=$(get_version $CLIENT1 $DIR/$tfile)
457 tp_pre=$(get_version $CLIENT1 $DIR/$tdir)
458 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
459 post=$(get_version $CLIENT1 $DIR/$tfile)
460 tp_post=$(get_version $CLIENT1 $DIR/$tdir)
461 if (($pre == $post)); then
462 error "version of source not changed: pre $pre, post $post"
464 if (($tp_pre == $tp_post)); then
465 error "version of target parent not changed: pre $tp_pre, post $tp_post"
468 run_test 0s "link changes versions of source and target parent"
471 local var=${SINGLEMDS}_svc
473 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
474 do_node $CLIENT1 mcreate $DIR/$tfile
475 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
477 replay_barrier $SINGLEMDS
478 do_node $CLIENT2 chmod 777 $DIR/$tdir
479 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
480 zconf_umount $CLIENT2 $MOUNT
481 facet_failover $SINGLEMDS
483 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
484 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
485 error "link should fail"
487 zconf_mount $CLIENT2 $MOUNT
489 run_test 0t "link checks version of target parent"
492 local var=${SINGLEMDS}_svc
494 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
495 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile
496 do_node $CLIENT1 mkdir -p $DIR/$tdir
498 replay_barrier $SINGLEMDS
499 do_node $CLIENT2 chmod 666 $DIR/$tfile
500 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
501 zconf_umount $CLIENT2 $MOUNT
502 facet_failover $SINGLEMDS
504 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
505 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
506 error "link should fail"
508 zconf_mount $CLIENT2 $MOUNT
510 run_test 0u "link checks version of source"
518 do_node $CLIENT1 mcreate $DIR/$tfile
519 do_node $CLIENT1 mkdir -p $DIR/$tdir
520 sp_pre=$(get_version $CLIENT1 $DIR)
521 tp_pre=$(get_version $CLIENT1 $DIR/$tdir)
522 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
523 sp_post=$(get_version $CLIENT1 $DIR)
524 tp_post=$(get_version $CLIENT1 $DIR/$tdir)
525 if (($sp_pre == $sp_post)); then
526 error "version of source parent not changed: pre $sp_pre, post $sp_post"
528 if (($tp_pre == $tp_post)); then
529 error "version of target parent not changed: pre $tp_pre, post $tp_post"
532 run_test 0v "rename changes versions of source parent and target parent"
538 do_node $CLIENT1 mcreate $DIR/$tfile
539 pre=$(get_version $CLIENT1 $DIR)
540 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tfile-new
541 post=$(get_version $CLIENT1 $DIR)
542 if (($pre == $post)); then
543 error "version of parent not changed: pre $pre, post $post"
546 run_test 0w "rename within same dir changes version of parent"
549 local var=${SINGLEMDS}_svc
551 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
552 do_node $CLIENT1 mcreate $DIR/$tfile
553 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
555 replay_barrier $SINGLEMDS
556 do_node $CLIENT2 chmod 777 $DIR
557 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
558 zconf_umount $CLIENT2 $MOUNT
559 facet_failover $SINGLEMDS
561 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
562 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
563 error "rename should fail"
565 zconf_mount $CLIENT2 $MOUNT
567 run_test 0x "rename checks version of source parent"
570 local var=${SINGLEMDS}_svc
572 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
573 do_node $CLIENT1 mcreate $DIR/$tfile
574 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
576 replay_barrier $SINGLEMDS
577 do_node $CLIENT2 chmod 777 $DIR/$tdir
578 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
579 zconf_umount $CLIENT2 $MOUNT
580 facet_failover $SINGLEMDS
582 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
583 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
584 error "rename should fail"
586 zconf_mount $CLIENT2 $MOUNT
588 run_test 0y "rename checks version of target parent"
590 [ "$CLIENTS" ] && zconf_umount_clients $CLIENTS $DIR
593 echo "mount client $CLIENT1,$CLIENT2..."
594 zconf_mount_clients $CLIENT1 $DIR
595 zconf_mount_clients $CLIENT2 $DIR
597 do_node $CLIENT2 mkdir -p $DIR/$tdir
598 replay_barrier $SINGLEMDS
599 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
600 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 1
601 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
602 zconf_umount $CLIENT2 $DIR
604 facet_failover $SINGLEMDS
605 # recovery shouldn't fail due to missing client 2
606 client_up $CLIENT1 || return 1
608 # All 50 files should have been replayed
609 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
610 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
612 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
613 [ -e $DIR/$tdir/$tfile-2-0 ] && error "$tfile-2-0 exists"
615 zconf_umount_clients $CLIENTS $DIR
618 run_test 1a "client during replay doesn't affect another one"
621 zconf_mount_clients $CLIENT1 $DIR
622 zconf_mount_clients $CLIENT2 $DIR
624 do_node $CLIENT2 mkdir -p $DIR/$tdir
625 replay_barrier $SINGLEMDS
626 do_node $CLIENT2 mcreate $DIR/$tdir/$tfile
627 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
628 #client1 read data from client2 which will be lost
629 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile
630 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
631 zconf_umount $CLIENT2 $DIR
633 facet_failover $SINGLEMDS
634 # recovery shouldn't fail due to missing client 2
635 client_up $CLIENT1 || return 1
637 # All 50 files should have been replayed
638 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
639 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
640 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile && return 4
642 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
644 zconf_umount_clients $CLIENTS $DIR
647 run_test 2a "lost data due to missed REMOTE client during replay"
650 # This test uses three Lustre clients on two hosts.
652 # Lustre Client 1: $CLIENT1:$MOUNT ($DIR)
653 # Lustre Client 2: $CLIENT2:$MOUNT2 ($DIR2)
654 # Lustre Client 3: $CLIENT2:$MOUNT1 ($DIR1)
659 local var=${SINGLEMDS}_svc
661 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
662 zconf_mount $CLIENT1 $MOUNT
663 zconf_mount $CLIENT2 $MOUNT2
664 zconf_mount $CLIENT2 $MOUNT1
665 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile-a
666 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile-b
669 # Save an MDT transaction number before recovery.
671 pre=$(get_version $CLIENT1 $DIR/$tfile-a)
674 # Comments on the replay sequence state the expected result
678 # "U" Unable to replay.
681 replay_barrier $SINGLEMDS
682 do_node $CLIENT1 chmod 666 $DIR/$tfile-a # R
683 do_node $CLIENT2 chmod 666 $DIR1/$tfile-b # R
684 do_node $CLIENT2 chown :$RUNAS_ID $DIR2/$tfile-a # U
685 do_node $CLIENT1 chown $RUNAS_ID $DIR/$tfile-a # J
686 do_node $CLIENT2 truncate $DIR2/$tfile-b 1 # U
687 do_node $CLIENT2 chown :$RUNAS_ID $DIR1/$tfile-b # R
688 do_node $CLIENT1 chown $RUNAS_ID $DIR/$tfile-b # R
689 zconf_umount $CLIENT2 $MOUNT2
690 facet_failover $SINGLEMDS
692 client_evicted $CLIENT1 || error "$CLIENT1:$MOUNT not evicted"
693 client_up $CLIENT2 || error "$CLIENT2:$MOUNT1 evicted"
696 # Check the MDT epoch. $post must be the first transaction
697 # number assigned after recovery.
699 do_node $CLIENT2 touch $DIR1/$tfile
700 post=$(get_version $CLIENT2 $DIR1/$tfile)
701 if (($(($pre >> 32)) == $((post >> 32)))); then
702 error "epoch not changed: pre $pre, post $post"
704 if (($(($post & 0x00000000ffffffff)) != 1)); then
705 error "transno should restart from one: got $post"
708 do_node $CLIENT2 stat $DIR1/$tfile-a
709 do_node $CLIENT2 stat $DIR1/$tfile-b
711 do_node $CLIENT2 $CHECKSTAT -p 0666 -u \\\#$UID -g \\\#$UID \
712 $DIR1/$tfile-a || error "$DIR/$tfile-a: unexpected state"
713 do_node $CLIENT2 $CHECKSTAT -p 0666 -u \\\#$RUNAS_ID -g \\\#$RUNAS_ID \
714 $DIR1/$tfile-b || error "$DIR/$tfile-b: unexpected state"
716 zconf_umount $CLIENT2 $MOUNT1
717 zconf_umount $CLIENT1 $MOUNT
719 run_test 2b "3 clients: some, none, and all reqs replayed"
722 zconf_mount_clients $CLIENT1 $DIR
723 zconf_mount_clients $CLIENT2 $DIR
725 #make sure the time will change
726 local var=${SINGLEMDS}_svc
727 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0" || return
728 do_node $CLIENT1 touch $DIR/$tfile
729 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
731 replay_barrier $SINGLEMDS
733 do_node $CLIENT2 touch $DIR/$tfile
734 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
736 do_node $CLIENT1 touch $DIR/$tfile
738 do_node $CLIENT1 rm $DIR/$tfile
739 zconf_umount $CLIENT2 $DIR
741 facet_failover $SINGLEMDS
742 # recovery shouldn't fail due to missing client 2
743 client_up $CLIENT1 || return 1
744 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile && return 2
746 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
748 zconf_umount_clients $CLIENTS $DIR
752 run_test 3a "setattr of time/size doesn't change version"
755 zconf_mount_clients $CLIENT1 $DIR
756 zconf_mount_clients $CLIENT2 $DIR
758 #make sure the time will change
759 local var=${SINGLEMDS}_svc
760 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0" || return
762 do_node $CLIENT1 touch $DIR/$tfile
763 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
765 replay_barrier $SINGLEMDS
767 do_node $CLIENT2 chmod +x $DIR/$tfile
768 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
770 do_node $CLIENT1 chmod -x $DIR/$tfile
771 zconf_umount $CLIENT2 $DIR
773 facet_failover $SINGLEMDS
774 # recovery should fail due to missing client 2
775 client_evicted $CLIENT1 || return 1
777 do_node $CLIENT1 $CHECKSTAT -p 0755 $DIR/$tfile && return 2
778 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
780 zconf_umount_clients $CLIENTS $DIR
784 run_test 3b "setattr of permissions changes version"
786 vbr_deactivate_client() {
788 echo "Deactivating client $client";
789 do_node $client "sysctl -w lustre.fail_loc=0x50d"
792 vbr_activate_client() {
794 echo "Activating client $client";
795 do_node $client "sysctl -w lustre.fail_loc=0x0"
801 [ -z "$(do_node $client lctl dl | grep mdt)" ] && \
802 [ -z "$(do_node $client lctl dl | grep ost)" ]
806 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
808 remote_server $CLIENT2 || \
809 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
811 zconf_mount_clients $CLIENT1 $DIR
812 zconf_mount_clients $CLIENT2 $DIR
814 do_node $CLIENT2 mkdir -p $DIR/$tdir
815 replay_barrier $SINGLEMDS
816 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
817 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
818 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
819 vbr_deactivate_client $CLIENT2
821 facet_failover $SINGLEMDS
822 client_up $CLIENT1 || return 1
824 # All 50 files should have been replayed
825 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
826 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
828 vbr_activate_client $CLIENT2
829 client_up $CLIENT2 || return 4
830 # All 25 files from client2 should have been replayed
831 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
833 zconf_umount_clients $CLIENTS $DIR
836 run_test 4a "fail MDS, delayed recovery"
839 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
841 remote_server $CLIENT2 || \
842 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
844 zconf_mount_clients $CLIENT1 $DIR
845 zconf_mount_clients $CLIENT2 $DIR
847 replay_barrier $SINGLEMDS
848 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
849 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
850 vbr_deactivate_client $CLIENT2
852 facet_failover $SINGLEMDS
853 client_up $CLIENT1 || return 1
855 # create another set of files
856 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
858 vbr_activate_client $CLIENT2
859 client_up $CLIENT2 || return 2
861 # All files from should have been replayed
862 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
863 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
864 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
866 zconf_umount_clients $CLIENTS $DIR
868 run_test 4b "fail MDS, normal operation, delayed open recovery"
871 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
873 remote_server $CLIENT2 || \
874 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
876 zconf_mount_clients $CLIENT1 $DIR
877 zconf_mount_clients $CLIENT2 $DIR
879 replay_barrier $SINGLEMDS
880 do_node $CLIENT1 createmany -m $DIR/$tfile- 25
881 do_node $CLIENT2 createmany -m $DIR/$tdir/$tfile-2- 25
882 vbr_deactivate_client $CLIENT2
884 facet_failover $SINGLEMDS
885 client_up $CLIENT1 || return 1
887 # create another set of files
888 do_node $CLIENT1 createmany -m $DIR/$tfile-3- 25
890 vbr_activate_client $CLIENT2
891 client_up $CLIENT2 || return 2
893 # All files from should have been replayed
894 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
895 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
896 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
898 zconf_umount_clients $CLIENTS $DIR
900 run_test 4c "fail MDS, normal operation, delayed recovery"
903 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
905 remote_server $CLIENT2 || \
906 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
908 zconf_mount_clients $CLIENT1 $DIR
909 zconf_mount_clients $CLIENT2 $DIR
911 replay_barrier $SINGLEMDS
912 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
913 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
914 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 1
915 vbr_deactivate_client $CLIENT2
917 facet_failover $SINGLEMDS
918 client_evicted $CLIENT1 || return 1
920 vbr_activate_client $CLIENT2
921 client_up $CLIENT2 || return 2
923 # First 25 files should have been replayed
924 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
925 # Third file is failed due to missed client2
926 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-3-0 && error "$tfile-3-0 exists"
927 # file from client2 should exists
928 do_node $CLIENT2 unlinkmany $DIR/$tfile-2- 1 || return 4
930 zconf_umount_clients $CLIENTS $DIR
932 run_test 5a "fail MDS, delayed recovery should fail"
935 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
937 remote_server $CLIENT2 || \
938 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
940 zconf_mount_clients $CLIENT1 $DIR
941 zconf_mount_clients $CLIENT2 $DIR
943 replay_barrier $SINGLEMDS
944 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
945 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
946 vbr_deactivate_client $CLIENT2
948 facet_failover $SINGLEMDS
949 client_up $CLIENT1 || return 1
950 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
952 # create another set of files
953 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
955 vbr_activate_client $CLIENT2
956 client_evicted $CLIENT2 || return 4
957 # file from client2 should fail
958 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
960 # All 50 files from client 1 should have been replayed
961 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
962 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
964 zconf_umount_clients $CLIENTS $DIR
966 run_test 5b "fail MDS, normal operation, delayed recovery should fail"
969 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
971 remote_server $CLIENT2 || \
972 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
974 zconf_mount_clients $CLIENT1 $DIR
975 zconf_mount_clients $CLIENT2 $DIR
977 do_node $CLIENT2 mkdir -p $DIR/$tdir
978 replay_barrier $SINGLEMDS
979 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
980 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
981 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
982 vbr_deactivate_client $CLIENT2
984 facet_failover $SINGLEMDS
985 # replay only 5 requests
986 do_node $CLIENT2 "sysctl -w lustre.fail_val=5"
987 #define OBD_FAIL_PTLRPC_REPLAY 0x50e
988 do_node $CLIENT2 "sysctl -w lustre.fail_loc=0x2000050e"
990 # vbr_activate_client $CLIENT2
991 # need way to know that client stops replays
994 facet_failover $SINGLEMDS
995 client_up $CLIENT1 || return 1
997 # All files should have been replayed
998 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
999 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
1000 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
1002 zconf_umount_clients $CLIENTS $DIR
1005 run_test 6a "fail MDS, delayed recovery, fail MDS"
1008 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1010 remote_server $CLIENT2 || \
1011 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1013 zconf_mount_clients $CLIENT1 $DIR
1014 zconf_mount_clients $CLIENT2 $DIR
1016 do_node $CLIENT2 mkdir -p $DIR/$tdir
1017 replay_barrier $SINGLEMDS
1018 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
1019 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
1020 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
1021 vbr_deactivate_client $CLIENT2
1023 facet_failover $SINGLEMDS
1024 vbr_activate_client $CLIENT2
1025 client_up $CLIENT2 || return 4
1027 facet_failover $SINGLEMDS
1028 client_up $CLIENT1 || return 1
1030 # All files should have been replayed
1031 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
1032 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
1033 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
1035 zconf_umount_clients $CLIENTS $DIR
1038 run_test 7a "fail MDS, delayed recovery, fail MDS"
1041 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1043 remote_server $CLIENT2 || \
1044 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1046 zconf_mount_clients $CLIENT1 $DIR
1047 zconf_mount_clients $CLIENT2 $DIR
1049 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 1
1050 do_node $CLIENT2 rm -f $DIR/$tfile
1051 replay_barrier $SINGLEMDS
1052 rmultiop_stop $CLIENT2 || return 2
1054 vbr_deactivate_client $CLIENT2
1055 facet_failover $SINGLEMDS
1056 client_up $CLIENT1 || return 3
1057 #client1 is back and will try to open orphan
1058 vbr_activate_client $CLIENT2
1059 client_up $CLIENT2 || return 4
1061 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1062 zconf_umount_clients $CLIENTS $DIR
1065 run_test 8a "orphans are kept until delayed recovery"
1068 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1070 remote_server $CLIENT2 || \
1071 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1073 zconf_mount_clients $CLIENT1 $DIR
1074 zconf_mount_clients $CLIENT2 $DIR
1076 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc|| return 1
1077 replay_barrier $SINGLEMDS
1078 do_node $CLIENT1 rm -f $DIR/$tfile
1080 vbr_deactivate_client $CLIENT2
1081 facet_failover $SINGLEMDS
1082 client_up $CLIENT1 || return 2
1083 #client1 is back and will try to open orphan
1084 vbr_activate_client $CLIENT2
1085 client_up $CLIENT2 || return 3
1087 rmultiop_stop $CLIENT2 || return 1
1088 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1089 zconf_umount_clients $CLIENTS $DIR
1092 run_test 8b "open1 | unlink2 X delayed_replay1, close1"
1095 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1097 remote_server $CLIENT2 || \
1098 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1100 zconf_mount_clients $CLIENT1 $DIR
1101 zconf_mount_clients $CLIENT2 $DIR
1103 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc|| return 1
1104 replay_barrier $SINGLEMDS
1105 do_node $CLIENT1 rm -f $DIR/$tfile
1106 rmultiop_stop $CLIENT2 || return 2
1108 vbr_deactivate_client $CLIENT2
1109 facet_failover $SINGLEMDS
1110 client_up $CLIENT1 || return 3
1111 #client1 is back and will try to open orphan
1112 vbr_activate_client $CLIENT2
1113 client_up $CLIENT2 || return 4
1115 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1116 zconf_umount_clients $CLIENTS $DIR
1119 run_test 8c "open1 | unlink2, close1 X delayed_replay1"
1122 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1124 remote_server $CLIENT2 || \
1125 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1127 zconf_mount_clients $CLIENT1 $DIR
1128 zconf_mount_clients $CLIENT2 $DIR
1130 rmultiop_start $CLIENT1 $DIR/$tfile O_tSc|| return 1
1131 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc|| return 2
1132 replay_barrier $SINGLEMDS
1133 do_node $CLIENT1 rm -f $DIR/$tfile
1134 rmultiop_stop $CLIENT2 || return 3
1135 rmultiop_stop $CLIENT1 || return 4
1137 vbr_deactivate_client $CLIENT2
1138 facet_failover $SINGLEMDS
1139 client_up $CLIENT1 || return 6
1141 #client1 is back and will try to open orphan
1142 vbr_activate_client $CLIENT2
1143 client_up $CLIENT2 || return 8
1145 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1146 zconf_umount_clients $CLIENTS $DIR
1149 run_test 8d "open1, open2 | unlink2, close1, close2 X delayed_replay1"
1152 zconf_mount $CLIENT1 $DIR
1153 zconf_mount $CLIENT2 $DIR
1155 do_node $CLIENT1 mcreate $DIR/$tfile
1156 do_node $CLIENT1 mkdir $DIR/$tfile-2
1157 replay_barrier $SINGLEMDS
1158 # missed replay from client1 will lead to recovery by versions
1159 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1160 do_node $CLIENT2 rm $DIR/$tfile || return 1
1161 do_node $CLIENT2 touch $DIR/$tfile || return 2
1163 zconf_umount $CLIENT1 $DIR
1164 facet_failover $SINGLEMDS
1165 client_up $CLIENT2 || return 6
1167 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
1168 zconf_umount_clients $CLIENTS $DIR
1171 run_test 8e "create | unlink, create shouldn't fail"
1174 zconf_mount_clients $CLIENT1 $DIR
1175 zconf_mount_clients $CLIENT2 $DIR
1177 do_node $CLIENT1 touch $DIR/$tfile
1178 do_node $CLIENT1 mkdir $DIR/$tfile-2
1179 replay_barrier $SINGLEMDS
1180 # missed replay from client1 will lead to recovery by versions
1181 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1182 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
1183 do_node $CLIENT2 mcreate $DIR/$tfile || return 2
1185 zconf_umount $CLIENT1 $DIR
1186 facet_failover $SINGLEMDS
1187 client_up $CLIENT2 || return 6
1189 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
1190 zconf_umount $CLIENT2 $DIR
1193 run_test 8f "create | unlink, create shouldn't fail"
1196 zconf_mount_clients $CLIENT1 $DIR
1197 zconf_mount_clients $CLIENT2 $DIR
1199 do_node $CLIENT1 touch $DIR/$tfile
1200 do_node $CLIENT1 mkdir $DIR/$tfile-2
1201 replay_barrier $SINGLEMDS
1202 # missed replay from client1 will lead to recovery by versions
1203 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1204 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
1205 do_node $CLIENT2 mkdir $DIR/$tfile || return 2
1207 zconf_umount $CLIENT1 $DIR
1208 facet_failover $SINGLEMDS
1209 client_up $CLIENT2 || return 6
1211 do_node $CLIENT2 rmdir $DIR/$tfile || error "$tfile doesn't exists"
1212 zconf_umount $CLIENT2 $DIR
1215 run_test 8g "create | unlink, create shouldn't fail"
1218 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1220 [ -z "$DBENCH_LIB" ] && skip_env "DBENCH_LIB is not set" && return 0
1222 zconf_mount_clients $CLIENTS $DIR
1224 local duration="-t 60"
1225 local cmd="rundbench 1 $duration "
1227 for CLIENT in ${CLIENTS//,/ }; do
1228 $PDSH $CLIENT "set -x; PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests/:${DBENCH_LIB} DBENCH_LIB=${DBENCH_LIB} $cmd" &
1230 echo $PID >pid.$CLIENT
1231 echo "Started load PID=`cat pid.$CLIENT`"
1234 replay_barrier $SINGLEMDS
1235 sleep 3 # give clients a time to do operations
1237 vbr_deactivate_client $CLIENT2
1239 log "$TESTNAME fail $SINGLEMDS 1"
1242 # wait for client to reconnect to MDS
1245 vbr_activate_client $CLIENT2
1246 client_up $CLIENT2 || return 4
1248 for CLIENT in ${CLIENTS//,/ }; do
1249 PID=`cat pid.$CLIENT`
1252 echo "load on ${CLIENT} returned $rc"
1255 zconf_umount_clients $CLIENTS $DIR
1257 run_test 10 "mds version recovery; $CLIENTCOUNT clients"
1259 [ "$CLIENTS" ] && zconf_mount_clients $CLIENTS $DIR
1261 equals_msg `basename $0`: test complete, cleaning up
1262 check_and_cleanup_lustre
1263 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true