6 ALWAYS_EXCEPT="2 $REPLAY_VBR_EXCEPT"
9 PTLDEBUG=${PTLDEBUG:--1}
10 LUSTRE=${LUSTRE:-`dirname $0`/..}
12 CLEANUP=${CLEANUP:-""}
13 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
22 [ -n "$CLIENTS" ] || { skip_env "Need two or more clients" && exit 0; }
23 [ $CLIENTCOUNT -ge 2 ] || \
24 { skip_env "Need two or more clients, have $CLIENTCOUNT" && exit 0; }
26 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
27 [ ! "$NAME" = "ncli" ] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT"
28 [ "$NAME" = "ncli" ] && MOUNT_2=""
32 check_and_setup_lustre
33 rm -rf $DIR/[df][0-9]*
35 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
42 # We need to run do_node in bg, because pdsh does not exit
43 # if child process of run script exists.
44 # I.e. pdsh does not exit when runmultiop_bg_pause exited,
45 # because of multiop_bg_pause -> $MULTIOP_PROG &
46 # By the same reason we need sleep a bit after do_nodes starts
47 # to let runmultiop_bg_pause start muliop and
48 # update /tmp/multiop_bg.pid ;
49 # The rm /tmp/multiop_bg.pid guarantees here that
50 # we have the updated by runmultiop_bg_pause
51 # /tmp/multiop_bg.pid file
53 local pid_file=$TMP/multiop_bg.pid.$$
54 do_node $client "rm -f $pid_file && MULTIOP_PID_FILE=$pid_file LUSTRE= runmultiop_bg_pause $file $cmds" &
58 multiop_pid=$(do_node $client cat $pid_file)
59 [ -n "$multiop_pid" ] || error "$client : Can not get multiop_pid from $pid_file "
60 eval export $(client_var_name $client)_multiop_pid=$multiop_pid
61 eval export $(client_var_name $client)_do_node_pid=$pid
62 local var=$(client_var_name $client)_multiop_pid
63 echo client $client multiop_bg started multiop_pid=${!var}
69 local multiop_pid=$(client_var_name $client)_multiop_pid
70 local do_node_pid=$(client_var_name $client)_do_node_pid
72 echo "Stopping multiop_pid=${!multiop_pid} (kill ${!multiop_pid} on $client)"
73 do_node $client kill -USR1 ${!multiop_pid}
79 local var=${SINGLEMDS}_svc
84 fid=$(do_node $client $LFS path2fid $file)
85 do_facet $SINGLEMDS $LCTL --device ${!var} getobjversion $fid
89 local file=$DIR/$tfile
93 do_node $CLIENT1 mcreate $file
94 pre=$(get_version $CLIENT1 $file)
95 do_node $CLIENT1 openfile -f O_RDWR $file
96 post=$(get_version $CLIENT1 $file)
97 if (($pre != $post)); then
98 error "version changed unexpectedly: pre $pre, post $post"
101 run_test 0a "open and close do not change versions"
104 local var=${SINGLEMDS}_svc
106 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
107 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
109 replay_barrier $SINGLEMDS
110 do_node $CLIENT2 chmod 777 $DIR/$tdir
111 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT $DIR/$tdir/$tfile
112 zconf_umount $CLIENT2 $MOUNT
113 facet_failover $SINGLEMDS
115 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
116 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
117 error "open succeeded unexpectedly"
119 zconf_mount $CLIENT2 $MOUNT
121 run_test 0b "open (O_CREAT) checks version of parent"
124 local var=${SINGLEMDS}_svc
126 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
127 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
128 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tdir/$tfile
130 replay_barrier $SINGLEMDS
131 do_node $CLIENT2 chmod 777 $DIR/$tdir
132 do_node $CLIENT2 chmod 666 $DIR/$tdir/$tfile
133 rmultiop_start $CLIENT1 $DIR/$tdir/$tfile o_c
134 zconf_umount $CLIENT2 $MOUNT
135 facet_failover $SINGLEMDS
136 client_up $CLIENT1 || error "$CLIENT1 evicted"
138 rmultiop_stop $CLIENT1 || error "close failed"
139 zconf_mount $CLIENT2 $MOUNT
141 run_test 0c "open (non O_CREAT) does not checks versions"
147 pre=$(get_version $CLIENT1 $DIR)
148 do_node $CLIENT1 mkfifo $DIR/$tfile
149 post=$(get_version $CLIENT1 $DIR)
150 if (($pre == $post)); then
151 error "version not changed: pre $pre, post $post"
154 run_test 0d "create changes version of parent"
157 local var=${SINGLEMDS}_svc
159 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
160 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
162 replay_barrier $SINGLEMDS
163 do_node $CLIENT2 chmod 777 $DIR/$tdir
164 do_node $CLIENT1 mkfifo $DIR/$tdir/$tfile
165 zconf_umount $CLIENT2 $MOUNT
166 facet_failover $SINGLEMDS
168 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
169 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
170 error "create succeeded unexpectedly"
172 zconf_mount $CLIENT2 $MOUNT
174 run_test 0e "create checks version of parent"
180 do_node $CLIENT1 mcreate $DIR/$tfile
181 pre=$(get_version $CLIENT1 $DIR)
182 do_node $CLIENT1 rm $DIR/$tfile
183 post=$(get_version $CLIENT1 $DIR)
184 if (($pre == $post)); then
185 error "version not changed: pre $pre, post $post"
188 run_test 0f "unlink changes version of parent"
191 local var=${SINGLEMDS}_svc
193 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
194 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
195 do_node $CLIENT1 mcreate $DIR/$tdir/$tfile
197 replay_barrier $SINGLEMDS
198 do_node $CLIENT2 chmod 777 $DIR/$tdir
199 do_node $CLIENT1 rm $DIR/$tdir/$tfile
200 zconf_umount $CLIENT2 $MOUNT
201 facet_failover $SINGLEMDS
203 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
204 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
205 error "unlink succeeded unexpectedly"
207 zconf_mount $CLIENT2 $MOUNT
209 run_test 0g "unlink checks version of parent"
212 local file=$DIR/$tfile
216 do_node $CLIENT1 mcreate $file
217 pre=$(get_version $CLIENT1 $file)
218 do_node $CLIENT1 chown $RUNAS_ID $file
219 post=$(get_version $CLIENT1 $file)
220 if (($pre == $post)); then
221 error "version not changed: pre $pre, post $post"
224 run_test 0h "setattr of UID changes versions"
227 local file=$DIR/$tfile
231 do_node $CLIENT1 mcreate $file
232 pre=$(get_version $CLIENT1 $file)
233 do_node $CLIENT1 chown :$RUNAS_ID $file
234 post=$(get_version $CLIENT1 $file)
235 if (($pre == $post)); then
236 error "version not changed: pre $pre, post $post"
239 run_test 0i "setattr of GID changes versions"
242 local file=$DIR/$tfile
243 local var=${SINGLEMDS}_svc
245 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
246 do_node $CLIENT1 mcreate $file
248 replay_barrier $SINGLEMDS
249 do_node $CLIENT2 chown :$RUNAS_ID $file
250 do_node $CLIENT1 chown $RUNAS_ID $file
251 zconf_umount $CLIENT2 $MOUNT
252 facet_failover $SINGLEMDS
254 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
255 if ! do_node $CLIENT1 $CHECKSTAT -u \\\#$UID $file; then
256 error "setattr of UID succeeded unexpectedly"
258 zconf_mount $CLIENT2 $MOUNT
260 run_test 0j "setattr of UID checks versions"
263 local file=$DIR/$tfile
264 local var=${SINGLEMDS}_svc
266 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
267 do_node $CLIENT1 mcreate $file
269 replay_barrier $SINGLEMDS
270 do_node $CLIENT2 chown $RUNAS_ID $file
271 do_node $CLIENT1 chown :$RUNAS_ID $file
272 zconf_umount $CLIENT2 $MOUNT
273 facet_failover $SINGLEMDS
275 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
276 if ! do_node $CLIENT1 $CHECKSTAT -g \\\#$UID $file; then
277 error "setattr of GID succeeded unexpectedly"
279 zconf_mount $CLIENT2 $MOUNT
281 run_test 0k "setattr of GID checks versions"
284 local file=$DIR/$tfile
288 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
289 pre=$(get_version $CLIENT1 $file)
290 do_node $CLIENT1 chmod 666 $file
291 post=$(get_version $CLIENT1 $file)
292 if (($pre == $post)); then
293 error "version not changed: pre $pre, post $post"
296 run_test 0l "setattr of permission changes versions"
299 local file=$DIR/$tfile
300 local var=${SINGLEMDS}_svc
302 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
303 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
305 replay_barrier $SINGLEMDS
306 do_node $CLIENT2 chown :$RUNAS_ID $file
307 do_node $CLIENT1 chmod 666 $file
308 zconf_umount $CLIENT2 $MOUNT
309 facet_failover $SINGLEMDS
311 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
312 if ! do_node $CLIENT1 $CHECKSTAT -p 0644 $file; then
313 error "setattr of permission succeeded unexpectedly"
315 zconf_mount $CLIENT2 $MOUNT
317 run_test 0m "setattr of permission checks versions"
320 local file=$DIR/$tfile
324 do_node $CLIENT1 mcreate $file
325 pre=$(get_version $CLIENT1 $file)
326 do_node $CLIENT1 chattr +i $file
327 post=$(get_version $CLIENT1 $file)
328 do_node $CLIENT1 chattr -i $file
329 if (($pre == $post)); then
330 error "version not changed: pre $pre, post $post"
333 run_test 0n "setattr of flags changes versions"
341 if ((${#attr} != 1)); then
342 error "checking multiple attributes not implemented yet"
344 do_node $client lsattr $file | cut -d ' ' -f 1 | grep -q $attr
348 local file=$DIR/$tfile
350 local var=${SINGLEMDS}_svc
352 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
353 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
355 replay_barrier $SINGLEMDS
356 do_node $CLIENT2 chmod 666 $file
357 do_node $CLIENT1 chattr +i $file
358 zconf_umount $CLIENT2 $MOUNT
359 facet_failover $SINGLEMDS
361 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
362 checkattr $CLIENT1 i $file
364 do_node $CLIENT1 chattr -i $file
365 if [ $rc -eq 0 ]; then
366 error "setattr of flags succeeded unexpectedly"
368 zconf_mount $CLIENT2 $MOUNT
370 run_test 0o "setattr of flags checks versions"
373 local file=$DIR/$tfile
377 local var=${SINGLEMDS}_svc
379 ad_orig=$(do_facet $SINGLEMDS "$LCTL get_param mdd.${!var}.atime_diff")
380 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0"
381 do_node $CLIENT1 mcreate $file
382 pre=$(get_version $CLIENT1 $file)
383 do_node $CLIENT1 touch $file
384 post=$(get_version $CLIENT1 $file)
386 # We don't fail MDS in this test. atime_diff shall be
387 # restored to its original value.
389 do_facet $SINGLEMDS "$LCTL set_param $ad_orig"
390 if (($pre != $post)); then
391 error "version changed unexpectedly: pre $pre, post $post"
394 run_test 0p "setattr of times does not change versions"
397 local file=$DIR/$tfile
401 do_node $CLIENT1 mcreate $file
402 pre=$(get_version $CLIENT1 $file)
403 do_node $CLIENT1 truncate $file 1
404 post=$(get_version $CLIENT1 $file)
405 if (($pre != $post)); then
406 error "version changed unexpectedly: pre $pre, post $post"
409 run_test 0q "setattr of size does not change versions"
412 local file=$DIR/$tfile
416 local var=${SINGLEMDS}_svc
418 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
419 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0"
420 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $file
422 replay_barrier $SINGLEMDS
423 do_node $CLIENT2 chmod 666 $file
424 do_node $CLIENT1 truncate $file 1
426 mtime_pre=$(do_node $CLIENT1 stat --format=%Y $file)
427 do_node $CLIENT1 touch $file
428 mtime_post=$(do_node $CLIENT1 stat --format=%Y $file)
429 zconf_umount $CLIENT2 $MOUNT
430 facet_failover $SINGLEMDS
432 client_up $CLIENT1 || error "$CLIENT1 evicted"
433 if (($mtime_pre >= $mtime_post)); then
434 error "time not changed: pre $mtime_pre, post $mtime_post"
436 if ! do_node $CLIENT1 $CHECKSTAT -s 1 $file; then
437 error "setattr of size failed"
439 mtime=$(do_node $CLIENT1 stat --format=%Y $file)
440 if (($mtime != $mtime_post)); then
441 error "setattr of times failed: expected $mtime_post, got $mtime"
443 zconf_mount $CLIENT2 $MOUNT
445 run_test 0r "setattr of times and size does not check versions"
453 do_node $CLIENT1 mcreate $DIR/$tfile
454 do_node $CLIENT1 mkdir -p $DIR/$tdir
455 pre=$(get_version $CLIENT1 $DIR/$tfile)
456 tp_pre=$(get_version $CLIENT1 $DIR/$tdir)
457 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
458 post=$(get_version $CLIENT1 $DIR/$tfile)
459 tp_post=$(get_version $CLIENT1 $DIR/$tdir)
460 if (($pre == $post)); then
461 error "version of source not changed: pre $pre, post $post"
463 if (($tp_pre == $tp_post)); then
464 error "version of target parent not changed: pre $tp_pre, post $tp_post"
467 run_test 0s "link changes versions of source and target parent"
470 local var=${SINGLEMDS}_svc
472 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
473 do_node $CLIENT1 mcreate $DIR/$tfile
474 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
476 replay_barrier $SINGLEMDS
477 do_node $CLIENT2 chmod 777 $DIR/$tdir
478 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
479 zconf_umount $CLIENT2 $MOUNT
480 facet_failover $SINGLEMDS
482 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
483 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
484 error "link should fail"
486 zconf_mount $CLIENT2 $MOUNT
488 run_test 0t "link checks version of target parent"
491 local var=${SINGLEMDS}_svc
493 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
494 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile
495 do_node $CLIENT1 mkdir -p $DIR/$tdir
497 replay_barrier $SINGLEMDS
498 do_node $CLIENT2 chmod 666 $DIR/$tfile
499 do_node $CLIENT1 link $DIR/$tfile $DIR/$tdir/$tfile
500 zconf_umount $CLIENT2 $MOUNT
501 facet_failover $SINGLEMDS
503 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
504 if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
505 error "link should fail"
507 zconf_mount $CLIENT2 $MOUNT
509 run_test 0u "link checks version of source"
517 do_node $CLIENT1 mcreate $DIR/$tfile
518 do_node $CLIENT1 mkdir -p $DIR/$tdir
519 sp_pre=$(get_version $CLIENT1 $DIR)
520 tp_pre=$(get_version $CLIENT1 $DIR/$tdir)
521 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
522 sp_post=$(get_version $CLIENT1 $DIR)
523 tp_post=$(get_version $CLIENT1 $DIR/$tdir)
524 if (($sp_pre == $sp_post)); then
525 error "version of source parent not changed: pre $sp_pre, post $sp_post"
527 if (($tp_pre == $tp_post)); then
528 error "version of target parent not changed: pre $tp_pre, post $tp_post"
531 run_test 0v "rename changes versions of source parent and target parent"
537 do_node $CLIENT1 mcreate $DIR/$tfile
538 pre=$(get_version $CLIENT1 $DIR)
539 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tfile-new
540 post=$(get_version $CLIENT1 $DIR)
541 if (($pre == $post)); then
542 error "version of parent not changed: pre $pre, post $post"
545 run_test 0w "rename within same dir changes version of parent"
548 local var=${SINGLEMDS}_svc
550 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
551 do_node $CLIENT1 mcreate $DIR/$tfile
552 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
554 replay_barrier $SINGLEMDS
555 do_node $CLIENT2 chmod 777 $DIR
556 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
557 zconf_umount $CLIENT2 $MOUNT
558 facet_failover $SINGLEMDS
560 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
561 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
562 error "rename should fail"
564 zconf_mount $CLIENT2 $MOUNT
566 run_test 0x "rename checks version of source parent"
569 local var=${SINGLEMDS}_svc
571 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
572 do_node $CLIENT1 mcreate $DIR/$tfile
573 do_node $CLIENT1 mkdir -p -m 755 $DIR/$tdir
575 replay_barrier $SINGLEMDS
576 do_node $CLIENT2 chmod 777 $DIR/$tdir
577 do_node $CLIENT1 mv $DIR/$tfile $DIR/$tdir/$tfile
578 zconf_umount $CLIENT2 $MOUNT
579 facet_failover $SINGLEMDS
581 client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
582 if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
583 error "rename should fail"
585 zconf_mount $CLIENT2 $MOUNT
587 run_test 0y "rename checks version of target parent"
589 [ "$CLIENTS" ] && zconf_umount_clients $CLIENTS $DIR
592 echo "mount client $CLIENT1,$CLIENT2..."
593 zconf_mount_clients $CLIENT1 $DIR
594 zconf_mount_clients $CLIENT2 $DIR
596 do_node $CLIENT2 mkdir -p $DIR/$tdir
597 replay_barrier $SINGLEMDS
598 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
599 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 1
600 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
601 zconf_umount $CLIENT2 $DIR
603 facet_failover $SINGLEMDS
604 # recovery shouldn't fail due to missing client 2
605 client_up $CLIENT1 || return 1
607 # All 50 files should have been replayed
608 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
609 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
611 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
612 [ -e $DIR/$tdir/$tfile-2-0 ] && error "$tfile-2-0 exists"
614 zconf_umount_clients $CLIENTS $DIR
617 run_test 1a "client during replay doesn't affect another one"
620 zconf_mount_clients $CLIENT1 $DIR
621 zconf_mount_clients $CLIENT2 $DIR
623 do_node $CLIENT2 mkdir -p $DIR/$tdir
624 replay_barrier $SINGLEMDS
625 do_node $CLIENT2 mcreate $DIR/$tdir/$tfile
626 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
627 #client1 read data from client2 which will be lost
628 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile
629 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
630 zconf_umount $CLIENT2 $DIR
632 facet_failover $SINGLEMDS
633 # recovery shouldn't fail due to missing client 2
634 client_up $CLIENT1 || return 1
636 # All 50 files should have been replayed
637 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
638 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
639 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile && return 4
641 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
643 zconf_umount_clients $CLIENTS $DIR
646 run_test 2a "lost data due to missed REMOTE client during replay"
649 # This test uses three Lustre clients on two hosts.
651 # Lustre Client 1: $CLIENT1:$MOUNT ($DIR)
652 # Lustre Client 2: $CLIENT2:$MOUNT2 ($DIR2)
653 # Lustre Client 3: $CLIENT2:$MOUNT1 ($DIR1)
658 local var=${SINGLEMDS}_svc
660 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
661 zconf_mount $CLIENT1 $MOUNT
662 zconf_mount $CLIENT2 $MOUNT2
663 zconf_mount $CLIENT2 $MOUNT1
664 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile-a
665 do_node $CLIENT1 openfile -f O_RDWR:O_CREAT -m 0644 $DIR/$tfile-b
668 # Save an MDT transaction number before recovery.
670 pre=$(get_version $CLIENT1 $DIR/$tfile-a)
673 # Comments on the replay sequence state the expected result
677 # "U" Unable to replay.
680 replay_barrier $SINGLEMDS
681 do_node $CLIENT1 chmod 666 $DIR/$tfile-a # R
682 do_node $CLIENT2 chmod 666 $DIR1/$tfile-b # R
683 do_node $CLIENT2 chown :$RUNAS_ID $DIR2/$tfile-a # U
684 do_node $CLIENT1 chown $RUNAS_ID $DIR/$tfile-a # J
685 do_node $CLIENT2 truncate $DIR2/$tfile-b 1 # U
686 do_node $CLIENT2 chown :$RUNAS_ID $DIR1/$tfile-b # R
687 do_node $CLIENT1 chown $RUNAS_ID $DIR/$tfile-b # R
688 zconf_umount $CLIENT2 $MOUNT2
689 facet_failover $SINGLEMDS
691 client_evicted $CLIENT1 || error "$CLIENT1:$MOUNT not evicted"
692 client_up $CLIENT2 || error "$CLIENT2:$MOUNT1 evicted"
695 # Check the MDT epoch. $post must be the first transaction
696 # number assigned after recovery.
698 do_node $CLIENT2 touch $DIR1/$tfile
699 post=$(get_version $CLIENT2 $DIR1/$tfile)
700 if (($(($pre >> 32)) == $((post >> 32)))); then
701 error "epoch not changed: pre $pre, post $post"
703 if (($(($post & 0x00000000ffffffff)) != 1)); then
704 error "transno should restart from one: got $post"
707 do_node $CLIENT2 stat $DIR1/$tfile-a
708 do_node $CLIENT2 stat $DIR1/$tfile-b
710 do_node $CLIENT2 $CHECKSTAT -p 0666 -u \\\#$UID -g \\\#$UID \
711 $DIR1/$tfile-a || error "$DIR/$tfile-a: unexpected state"
712 do_node $CLIENT2 $CHECKSTAT -p 0666 -u \\\#$RUNAS_ID -g \\\#$RUNAS_ID \
713 $DIR1/$tfile-b || error "$DIR/$tfile-b: unexpected state"
715 zconf_umount $CLIENT2 $MOUNT1
716 zconf_umount $CLIENT1 $MOUNT
718 run_test 2b "3 clients: some, none, and all reqs replayed"
721 zconf_mount_clients $CLIENT1 $DIR
722 zconf_mount_clients $CLIENT2 $DIR
724 #make sure the time will change
725 local var=${SINGLEMDS}_svc
726 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0" || return
727 do_node $CLIENT1 touch $DIR/$tfile
728 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
730 replay_barrier $SINGLEMDS
732 do_node $CLIENT2 touch $DIR/$tfile
733 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
735 do_node $CLIENT1 touch $DIR/$tfile
737 do_node $CLIENT1 rm $DIR/$tfile
738 zconf_umount $CLIENT2 $DIR
740 facet_failover $SINGLEMDS
741 # recovery shouldn't fail due to missing client 2
742 client_up $CLIENT1 || return 1
743 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile && return 2
745 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
747 zconf_umount_clients $CLIENTS $DIR
751 run_test 3a "setattr of time/size doesn't change version"
754 zconf_mount_clients $CLIENT1 $DIR
755 zconf_mount_clients $CLIENT2 $DIR
757 #make sure the time will change
758 local var=${SINGLEMDS}_svc
759 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0" || return
761 do_node $CLIENT1 touch $DIR/$tfile
762 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
764 replay_barrier $SINGLEMDS
766 do_node $CLIENT2 chmod +x $DIR/$tfile
767 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
769 do_node $CLIENT1 chmod -x $DIR/$tfile
770 zconf_umount $CLIENT2 $DIR
772 facet_failover $SINGLEMDS
773 # recovery should fail due to missing client 2
774 client_evicted $CLIENT1 || return 1
776 do_node $CLIENT1 $CHECKSTAT -p 0755 $DIR/$tfile && return 2
777 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
779 zconf_umount_clients $CLIENTS $DIR
783 run_test 3b "setattr of permissions changes version"
785 vbr_deactivate_client() {
787 echo "Deactivating client $client";
788 do_node $client "sysctl -w lustre.fail_loc=0x50d"
791 vbr_activate_client() {
793 echo "Activating client $client";
794 do_node $client "sysctl -w lustre.fail_loc=0x0"
800 [ -z "$(do_node $client lctl dl | grep mdt)" ] && \
801 [ -z "$(do_node $client lctl dl | grep ost)" ]
805 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
807 remote_server $CLIENT2 || \
808 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
810 zconf_mount_clients $CLIENT1 $DIR
811 zconf_mount_clients $CLIENT2 $DIR
813 do_node $CLIENT2 mkdir -p $DIR/$tdir
814 replay_barrier $SINGLEMDS
815 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
816 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
817 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
818 vbr_deactivate_client $CLIENT2
820 facet_failover $SINGLEMDS
821 client_up $CLIENT1 || return 1
823 # All 50 files should have been replayed
824 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
825 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
827 vbr_activate_client $CLIENT2
828 client_up $CLIENT2 || return 4
829 # All 25 files from client2 should have been replayed
830 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
832 zconf_umount_clients $CLIENTS $DIR
835 run_test 4a "fail MDS, delayed recovery"
838 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
840 remote_server $CLIENT2 || \
841 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
843 zconf_mount_clients $CLIENT1 $DIR
844 zconf_mount_clients $CLIENT2 $DIR
846 replay_barrier $SINGLEMDS
847 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
848 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
849 vbr_deactivate_client $CLIENT2
851 facet_failover $SINGLEMDS
852 client_up $CLIENT1 || return 1
854 # create another set of files
855 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
857 vbr_activate_client $CLIENT2
858 client_up $CLIENT2 || return 2
860 # All files from should have been replayed
861 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
862 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
863 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
865 zconf_umount_clients $CLIENTS $DIR
867 run_test 4b "fail MDS, normal operation, delayed open recovery"
870 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
872 remote_server $CLIENT2 || \
873 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
875 zconf_mount_clients $CLIENT1 $DIR
876 zconf_mount_clients $CLIENT2 $DIR
878 replay_barrier $SINGLEMDS
879 do_node $CLIENT1 createmany -m $DIR/$tfile- 25
880 do_node $CLIENT2 createmany -m $DIR/$tdir/$tfile-2- 25
881 vbr_deactivate_client $CLIENT2
883 facet_failover $SINGLEMDS
884 client_up $CLIENT1 || return 1
886 # create another set of files
887 do_node $CLIENT1 createmany -m $DIR/$tfile-3- 25
889 vbr_activate_client $CLIENT2
890 client_up $CLIENT2 || return 2
892 # All files from should have been replayed
893 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
894 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
895 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
897 zconf_umount_clients $CLIENTS $DIR
899 run_test 4c "fail MDS, normal operation, delayed recovery"
902 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
904 remote_server $CLIENT2 || \
905 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
907 zconf_mount_clients $CLIENT1 $DIR
908 zconf_mount_clients $CLIENT2 $DIR
910 replay_barrier $SINGLEMDS
911 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
912 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
913 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 1
914 vbr_deactivate_client $CLIENT2
916 facet_failover $SINGLEMDS
917 client_evicted $CLIENT1 || return 1
919 vbr_activate_client $CLIENT2
920 client_up $CLIENT2 || return 2
922 # First 25 files should have been replayed
923 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
924 # Third file is failed due to missed client2
925 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-3-0 && error "$tfile-3-0 exists"
926 # file from client2 should exists
927 do_node $CLIENT2 unlinkmany $DIR/$tfile-2- 1 || return 4
929 zconf_umount_clients $CLIENTS $DIR
931 run_test 5a "fail MDS, delayed recovery should fail"
934 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
936 remote_server $CLIENT2 || \
937 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
939 zconf_mount_clients $CLIENT1 $DIR
940 zconf_mount_clients $CLIENT2 $DIR
942 replay_barrier $SINGLEMDS
943 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
944 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
945 vbr_deactivate_client $CLIENT2
947 facet_failover $SINGLEMDS
948 client_up $CLIENT1 || return 1
949 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
951 # create another set of files
952 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
954 vbr_activate_client $CLIENT2
955 client_evicted $CLIENT2 || return 4
956 # file from client2 should fail
957 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
959 # All 50 files from client 1 should have been replayed
960 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
961 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
963 zconf_umount_clients $CLIENTS $DIR
965 run_test 5b "fail MDS, normal operation, delayed recovery should fail"
968 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
970 remote_server $CLIENT2 || \
971 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
973 zconf_mount_clients $CLIENT1 $DIR
974 zconf_mount_clients $CLIENT2 $DIR
976 do_node $CLIENT2 mkdir -p $DIR/$tdir
977 replay_barrier $SINGLEMDS
978 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
979 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
980 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
981 vbr_deactivate_client $CLIENT2
983 facet_failover $SINGLEMDS
984 # replay only 5 requests
985 do_node $CLIENT2 "sysctl -w lustre.fail_val=5"
986 #define OBD_FAIL_PTLRPC_REPLAY 0x50e
987 do_node $CLIENT2 "sysctl -w lustre.fail_loc=0x2000050e"
989 # vbr_activate_client $CLIENT2
990 # need way to know that client stops replays
993 facet_failover $SINGLEMDS
994 client_up $CLIENT1 || return 1
996 # All files should have been replayed
997 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
998 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
999 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
1001 zconf_umount_clients $CLIENTS $DIR
1004 run_test 6a "fail MDS, delayed recovery, fail MDS"
1007 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1009 remote_server $CLIENT2 || \
1010 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1012 zconf_mount_clients $CLIENT1 $DIR
1013 zconf_mount_clients $CLIENT2 $DIR
1015 do_node $CLIENT2 mkdir -p $DIR/$tdir
1016 replay_barrier $SINGLEMDS
1017 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
1018 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
1019 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
1020 vbr_deactivate_client $CLIENT2
1022 facet_failover $SINGLEMDS
1023 vbr_activate_client $CLIENT2
1024 client_up $CLIENT2 || return 4
1026 facet_failover $SINGLEMDS
1027 client_up $CLIENT1 || return 1
1029 # All files should have been replayed
1030 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
1031 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
1032 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
1034 zconf_umount_clients $CLIENTS $DIR
1037 run_test 7a "fail MDS, delayed recovery, fail MDS"
1040 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1042 remote_server $CLIENT2 || \
1043 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1045 zconf_mount_clients $CLIENT1 $DIR
1046 zconf_mount_clients $CLIENT2 $DIR
1048 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc || return 1
1049 do_node $CLIENT2 rm -f $DIR/$tfile
1050 replay_barrier $SINGLEMDS
1051 rmultiop_stop $CLIENT2 || return 2
1053 vbr_deactivate_client $CLIENT2
1054 facet_failover $SINGLEMDS
1055 client_up $CLIENT1 || return 3
1056 #client1 is back and will try to open orphan
1057 vbr_activate_client $CLIENT2
1058 client_up $CLIENT2 || return 4
1060 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1061 zconf_umount_clients $CLIENTS $DIR
1064 run_test 8a "orphans are kept until delayed recovery"
1067 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1069 remote_server $CLIENT2 || \
1070 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1072 zconf_mount_clients $CLIENT1 $DIR
1073 zconf_mount_clients $CLIENT2 $DIR
1075 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc|| return 1
1076 replay_barrier $SINGLEMDS
1077 do_node $CLIENT1 rm -f $DIR/$tfile
1079 vbr_deactivate_client $CLIENT2
1080 facet_failover $SINGLEMDS
1081 client_up $CLIENT1 || return 2
1082 #client1 is back and will try to open orphan
1083 vbr_activate_client $CLIENT2
1084 client_up $CLIENT2 || return 3
1086 rmultiop_stop $CLIENT2 || return 1
1087 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1088 zconf_umount_clients $CLIENTS $DIR
1091 run_test 8b "open1 | unlink2 X delayed_replay1, close1"
1094 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1096 remote_server $CLIENT2 || \
1097 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1099 zconf_mount_clients $CLIENT1 $DIR
1100 zconf_mount_clients $CLIENT2 $DIR
1102 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc|| return 1
1103 replay_barrier $SINGLEMDS
1104 do_node $CLIENT1 rm -f $DIR/$tfile
1105 rmultiop_stop $CLIENT2 || return 2
1107 vbr_deactivate_client $CLIENT2
1108 facet_failover $SINGLEMDS
1109 client_up $CLIENT1 || return 3
1110 #client1 is back and will try to open orphan
1111 vbr_activate_client $CLIENT2
1112 client_up $CLIENT2 || return 4
1114 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1115 zconf_umount_clients $CLIENTS $DIR
1118 run_test 8c "open1 | unlink2, close1 X delayed_replay1"
1121 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1123 remote_server $CLIENT2 || \
1124 { skip_env "Client $CLIENT2 is on the server node" && return 0; }
1126 zconf_mount_clients $CLIENT1 $DIR
1127 zconf_mount_clients $CLIENT2 $DIR
1129 rmultiop_start $CLIENT1 $DIR/$tfile O_tSc|| return 1
1130 rmultiop_start $CLIENT2 $DIR/$tfile O_tSc|| return 2
1131 replay_barrier $SINGLEMDS
1132 do_node $CLIENT1 rm -f $DIR/$tfile
1133 rmultiop_stop $CLIENT2 || return 3
1134 rmultiop_stop $CLIENT1 || return 4
1136 vbr_deactivate_client $CLIENT2
1137 facet_failover $SINGLEMDS
1138 client_up $CLIENT1 || return 6
1140 #client1 is back and will try to open orphan
1141 vbr_activate_client $CLIENT2
1142 client_up $CLIENT2 || return 8
1144 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
1145 zconf_umount_clients $CLIENTS $DIR
1148 run_test 8d "open1, open2 | unlink2, close1, close2 X delayed_replay1"
1151 zconf_mount $CLIENT1 $DIR
1152 zconf_mount $CLIENT2 $DIR
1154 do_node $CLIENT1 mcreate $DIR/$tfile
1155 do_node $CLIENT1 mkdir $DIR/$tfile-2
1156 replay_barrier $SINGLEMDS
1157 # missed replay from client1 will lead to recovery by versions
1158 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1159 do_node $CLIENT2 rm $DIR/$tfile || return 1
1160 do_node $CLIENT2 touch $DIR/$tfile || return 2
1162 zconf_umount $CLIENT1 $DIR
1163 facet_failover $SINGLEMDS
1164 client_up $CLIENT2 || return 6
1166 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
1167 zconf_umount_clients $CLIENTS $DIR
1170 run_test 8e "create | unlink, create shouldn't fail"
1173 zconf_mount_clients $CLIENT1 $DIR
1174 zconf_mount_clients $CLIENT2 $DIR
1176 do_node $CLIENT1 touch $DIR/$tfile
1177 do_node $CLIENT1 mkdir $DIR/$tfile-2
1178 replay_barrier $SINGLEMDS
1179 # missed replay from client1 will lead to recovery by versions
1180 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1181 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
1182 do_node $CLIENT2 mcreate $DIR/$tfile || return 2
1184 zconf_umount $CLIENT1 $DIR
1185 facet_failover $SINGLEMDS
1186 client_up $CLIENT2 || return 6
1188 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
1189 zconf_umount $CLIENT2 $DIR
1192 run_test 8f "create | unlink, create shouldn't fail"
1195 zconf_mount_clients $CLIENT1 $DIR
1196 zconf_mount_clients $CLIENT2 $DIR
1198 do_node $CLIENT1 touch $DIR/$tfile
1199 do_node $CLIENT1 mkdir $DIR/$tfile-2
1200 replay_barrier $SINGLEMDS
1201 # missed replay from client1 will lead to recovery by versions
1202 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
1203 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
1204 do_node $CLIENT2 mkdir $DIR/$tfile || return 2
1206 zconf_umount $CLIENT1 $DIR
1207 facet_failover $SINGLEMDS
1208 client_up $CLIENT2 || return 6
1210 do_node $CLIENT2 rmdir $DIR/$tfile || error "$tfile doesn't exists"
1211 zconf_umount $CLIENT2 $DIR
1214 run_test 8g "create | unlink, create shouldn't fail"
1217 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
1219 [ -z "$DBENCH_LIB" ] && skip_env "DBENCH_LIB is not set" && return 0
1221 zconf_mount_clients $CLIENTS $DIR
1223 local duration="-t 60"
1224 local cmd="rundbench 1 $duration "
1226 for CLIENT in ${CLIENTS//,/ }; do
1227 $PDSH $CLIENT "set -x; PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests/:${DBENCH_LIB} DBENCH_LIB=${DBENCH_LIB} $cmd" &
1229 echo $PID >pid.$CLIENT
1230 echo "Started load PID=`cat pid.$CLIENT`"
1233 replay_barrier $SINGLEMDS
1234 sleep 3 # give clients a time to do operations
1236 vbr_deactivate_client $CLIENT2
1238 log "$TESTNAME fail $SINGLEMDS 1"
1241 # wait for client to reconnect to MDS
1244 vbr_activate_client $CLIENT2
1245 client_up $CLIENT2 || return 4
1247 for CLIENT in ${CLIENTS//,/ }; do
1248 PID=`cat pid.$CLIENT`
1251 echo "load on ${CLIENT} returned $rc"
1254 zconf_umount_clients $CLIENTS $DIR
1256 run_test 10 "mds version recovery; $CLIENTCOUNT clients"
1258 [ "$CLIENTS" ] && zconf_mount_clients $CLIENTS $DIR
1260 equals_msg `basename $0`: test complete, cleaning up
1261 check_and_cleanup_lustre
1262 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true