6 ALWAYS_EXCEPT="2 $REPLAY_VBR_EXCEPT"
9 PTLDEBUG=${PTLDEBUG:--1}
10 LUSTRE=${LUSTRE:-`dirname $0`/..}
12 CLEANUP=${CLEANUP:-""}
13 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
19 [ -n "$CLIENTS" ] || { skip "Need two or more clients" && exit 0; }
20 [ $CLIENTCOUNT -ge 2 ] || \
21 { skip "Need two or more clients, have $CLIENTCOUNT" && exit 0; }
22 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
24 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
27 [ ! "$NAME" = "ncli" ] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT"
28 [ "$NAME" = "ncli" ] && MOUNT_2=""
32 check_and_setup_lustre
33 rm -rf $DIR/[df][0-9]*
35 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
37 [ "$CLIENTS" ] && zconf_umount_clients $CLIENTS $DIR
40 echo "mount client $CLIENT1,$CLIENT2..."
41 zconf_mount_clients $CLIENT1 $DIR
42 zconf_mount_clients $CLIENT2 $DIR
44 do_node $CLIENT2 mkdir -p $DIR/$tdir
45 replay_barrier $SINGLEMDS
46 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
47 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 1
48 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
49 zconf_umount $CLIENT2 $DIR
51 facet_failover $SINGLEMDS
52 # recovery shouldn't fail due to missing client 2
53 do_node $CLIENT1 df $DIR || return 1
55 # All 50 files should have been replayed
56 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
57 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
59 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
60 [ -e $DIR/$tdir/$tfile-2-0 ] && error "$tfile-2-0 exists"
62 zconf_umount_clients $CLIENTS $DIR
65 run_test 1 "lost client doesn't affect another during replay"
68 zconf_mount_clients $CLIENT1 $DIR
69 zconf_mount_clients $CLIENT2 $DIR
71 do_node $CLIENT2 mkdir -p $DIR/$tdir
72 replay_barrier $SINGLEMDS
73 do_node $CLIENT2 mcreate $DIR/$tdir/$tfile
74 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
75 #client1 read data from client2 which will be lost
76 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile
77 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
78 zconf_umount $CLIENT2 $DIR
80 facet_failover $SINGLEMDS
81 # recovery shouldn't fail due to missing client 2
82 do_node $CLIENT1 df $DIR || return 1
84 # All 50 files should have been replayed
85 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
86 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
87 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile && return 4
89 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
91 zconf_umount_clients $CLIENTS $DIR
94 run_test 2 "lost data due to missed REMOTE client during replay"
97 zconf_mount_clients $CLIENT1 $DIR
98 zconf_mount_clients $CLIENT2 $DIR
100 #make sure the time will change
101 local var=${SINGLEMDS}_svc
102 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0" || return
103 do_node $CLIENT1 touch $DIR/$tfile
104 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
106 replay_barrier $SINGLEMDS
108 do_node $CLIENT2 touch $DIR/$tfile
109 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
111 do_node $CLIENT1 touch $DIR/$tfile
113 do_node $CLIENT1 rm $DIR/$tfile
114 zconf_umount $CLIENT2 $DIR
116 facet_failover $SINGLEMDS
117 # recovery shouldn't fail due to missing client 2
118 do_node $CLIENT1 df $DIR || return 1
119 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile && return 2
121 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
123 zconf_umount_clients $CLIENTS $DIR
127 run_test 3a "setattr of time/size doesn't change version"
130 zconf_mount_clients $CLIENT1 $DIR
131 zconf_mount_clients $CLIENT2 $DIR
133 #make sure the time will change
134 local var=${SINGLEMDS}_svc
135 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0" || return
137 do_node $CLIENT1 touch $DIR/$tfile
138 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
140 replay_barrier $SINGLEMDS
142 do_node $CLIENT2 chmod +x $DIR/$tfile
143 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
145 do_node $CLIENT1 chmod -x $DIR/$tfile
146 zconf_umount $CLIENT2 $DIR
148 facet_failover $SINGLEMDS
149 # recovery should fail due to missing client 2
150 do_node $CLIENT1 df $DIR && return 1
152 do_node $CLIENT1 $CHECKSTAT -p 755 $DIR/$tfile && return 2
153 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
155 zconf_umount_clients $CLIENTS $DIR
159 run_test 3b "setattr of permissions changes version"
161 vbr_deactivate_client() {
163 echo "Deactivating client $client";
164 do_node $client "sysctl -w lustre.fail_loc=0x50d"
167 vbr_activate_client() {
169 echo "Activating client $client";
170 do_node $client "sysctl -w lustre.fail_loc=0x0"
176 [ -z "$(do_node $client lctl dl | grep mdt)" ] && \
177 [ -z "$(do_node $client lctl dl | grep ost)" ]
181 local var=${SINGLEMDS}_svc
182 do_facet $SINGLEMDS "$LCTL get_param -n mdd.${!var}.stale_export_age" > /dev/null 2>&1
183 [ $? -ne 0 ] && { skip "No delayed recovery support" && return; }
185 remote_server $CLIENT2 || \
186 { skip "Client $CLIENT2 is on the server node" && return 0; }
188 zconf_mount_clients $CLIENT1 $DIR
189 zconf_mount_clients $CLIENT2 $DIR
191 do_node $CLIENT2 mkdir -p $DIR/$tdir
192 replay_barrier $SINGLEMDS
193 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
194 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
195 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
196 vbr_deactivate_client $CLIENT2
198 facet_failover $SINGLEMDS
199 do_node $CLIENT1 df $DIR || return 1
201 # All 50 files should have been replayed
202 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
203 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
205 vbr_activate_client $CLIENT2
206 do_node $CLIENT2 df $DIR || return 4
207 # All 25 files from client2 should have been replayed
208 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
210 zconf_umount_clients $CLIENTS $DIR
213 run_test 4a "fail MDS, delayed recovery"
216 local var=${SINGLEMDS}_svc
217 do_facet $SINGLEMDS "$LCTL get_param -n mdd.${!var}.stale_export_age" > /dev/null 2>&1
218 [ $? -ne 0 ] && { skip "No delayed recovery support" && return; }
220 remote_server $CLIENT2 || \
221 { skip "Client $CLIENT2 is on the server node" && return 0; }
223 zconf_mount_clients $CLIENT1 $DIR
224 zconf_mount_clients $CLIENT2 $DIR
226 replay_barrier $SINGLEMDS
227 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
228 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
229 vbr_deactivate_client $CLIENT2
231 facet_failover $SINGLEMDS
232 do_node $CLIENT1 df $DIR || return 1
234 # create another set of files
235 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
237 vbr_activate_client $CLIENT2
238 do_node $CLIENT2 df $DIR || return 2
240 # All files from should have been replayed
241 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
242 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
243 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
245 zconf_umount_clients $CLIENTS $DIR
247 run_test 4b "fail MDS, normal operation, delayed open recovery"
250 local var=${SINGLEMDS}_svc
251 do_facet $SINGLEMDS "$LCTL get_param -n mdd.${!var}.stale_export_age" > /dev/null 2>&1
252 [ $? -ne 0 ] && { skip "No delayed recovery support" && return; }
254 remote_server $CLIENT2 || \
255 { skip "Client $CLIENT2 is on the server node" && return 0; }
257 zconf_mount_clients $CLIENT1 $DIR
258 zconf_mount_clients $CLIENT2 $DIR
260 replay_barrier $SINGLEMDS
261 do_node $CLIENT1 createmany -m $DIR/$tfile- 25
262 do_node $CLIENT2 createmany -m $DIR/$tdir/$tfile-2- 25
263 vbr_deactivate_client $CLIENT2
265 facet_failover $SINGLEMDS
266 do_node $CLIENT1 df $DIR || return 1
268 # create another set of files
269 do_node $CLIENT1 createmany -m $DIR/$tfile-3- 25
271 vbr_activate_client $CLIENT2
272 do_node $CLIENT2 df $DIR || return 2
274 # All files from should have been replayed
275 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
276 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
277 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
279 zconf_umount_clients $CLIENTS $DIR
281 run_test 4c "fail MDS, normal operation, delayed recovery"
284 local var=${SINGLEMDS}_svc
285 do_facet $SINGLEMDS "$LCTL get_param -n mdd.${!var}.stale_export_age" > /dev/null 2>&1
286 [ $? -ne 0 ] && { skip "No delayed recovery support" && return; }
288 remote_server $CLIENT2 || \
289 { skip "Client $CLIENT2 is on the server node" && return 0; }
291 zconf_mount_clients $CLIENT1 $DIR
292 zconf_mount_clients $CLIENT2 $DIR
294 replay_barrier $SINGLEMDS
295 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
296 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
297 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 1
298 vbr_deactivate_client $CLIENT2
300 facet_failover $SINGLEMDS
301 do_node $CLIENT1 df $DIR && return 1
303 vbr_activate_client $CLIENT2
304 do_node $CLIENT2 df $DIR || return 2
306 # First 25 files should have been replayed
307 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
308 # Third file is failed due to missed client2
309 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-3-0 && error "$tfile-3-0 exists"
310 # file from client2 should exists
311 do_node $CLIENT2 unlinkmany $DIR/$tfile-2- 1 || return 4
313 zconf_umount_clients $CLIENTS $DIR
315 run_test 5a "fail MDS, delayed recovery should fail"
318 local var=${SINGLEMDS}_svc
319 do_facet $SINGLEMDS "$LCTL get_param -n mdd.${!var}.stale_export_age" > /dev/null 2>&1
320 [ $? -ne 0 ] && { skip "No delayed recovery support" && return; }
322 remote_server $CLIENT2 || \
323 { skip "Client $CLIENT2 is on the server node" && return 0; }
325 zconf_mount_clients $CLIENT1 $DIR
326 zconf_mount_clients $CLIENT2 $DIR
328 replay_barrier $SINGLEMDS
329 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
330 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
331 vbr_deactivate_client $CLIENT2
333 facet_failover $SINGLEMDS
334 do_node $CLIENT1 df $DIR || return 1
335 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
337 # create another set of files
338 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
340 vbr_activate_client $CLIENT2
341 do_node $CLIENT2 df $DIR && return 4
342 # file from client2 should fail
343 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
345 # All 50 files from client 1 should have been replayed
346 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
347 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
349 zconf_umount_clients $CLIENTS $DIR
351 run_test 5b "fail MDS, normal operation, delayed recovery should fail"
354 local var=${SINGLEMDS}_svc
355 do_facet $SINGLEMDS "$LCTL get_param -n mdd.${!var}.stale_export_age" > /dev/null 2>&1
356 [ $? -ne 0 ] && { skip "No delayed recovery support" && return; }
358 remote_server $CLIENT2 || \
359 { skip "Client $CLIENT2 is on the server node" && return 0; }
361 zconf_mount_clients $CLIENT1 $DIR
362 zconf_mount_clients $CLIENT2 $DIR
364 do_node $CLIENT2 mkdir -p $DIR/$tdir
365 replay_barrier $SINGLEMDS
366 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
367 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
368 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
369 vbr_deactivate_client $CLIENT2
371 facet_failover $SINGLEMDS
372 # replay only 5 requests
373 do_node $CLIENT2 "sysctl -w lustre.fail_val=5"
374 #define OBD_FAIL_PTLRPC_REPLAY 0x50e
375 do_node $CLIENT2 "sysctl -w lustre.fail_loc=0x2000050e"
376 do_node $CLIENT2 df $DIR
377 # vbr_activate_client $CLIENT2
378 # need way to know that client stops replays
381 facet_failover $SINGLEMDS
382 do_node $CLIENT1 df $DIR || return 1
384 # All files should have been replayed
385 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
386 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
387 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
389 zconf_umount_clients $CLIENTS $DIR
392 run_test 6a "fail MDS, delayed recovery, fail MDS"
395 local var=${SINGLEMDS}_svc
396 do_facet $SINGLEMDS "$LCTL get_param -n mdd.${!var}.stale_export_age" > /dev/null 2>&1
397 [ $? -ne 0 ] && { skip "No delayed recovery support" && return; }
399 remote_server $CLIENT2 || \
400 { skip "Client $CLIENT2 is on the server node" && return 0; }
402 zconf_mount_clients $CLIENT1 $DIR
403 zconf_mount_clients $CLIENT2 $DIR
405 do_node $CLIENT2 mkdir -p $DIR/$tdir
406 replay_barrier $SINGLEMDS
407 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
408 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
409 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
410 vbr_deactivate_client $CLIENT2
412 facet_failover $SINGLEMDS
413 vbr_activate_client $CLIENT2
414 do_node $CLIENT2 df $DIR || return 4
416 facet_failover $SINGLEMDS
417 do_node $CLIENT1 df $DIR || return 1
419 # All files should have been replayed
420 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
421 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
422 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
424 zconf_umount_clients $CLIENTS $DIR
427 run_test 7a "fail MDS, delayed recovery, fail MDS"
433 # We need to run do_node in bg, because pdsh does not exit
434 # if child process of run script exists.
435 # I.e. pdsh does not exit when runmultiop_bg_pause exited,
436 # because of multiop_bg_pause -> $MULTIOP_PROG &
437 # By the same reason we need sleep a bit after do_nodes starts
438 # to let runmultiop_bg_pause start muliop and
439 # update /tmp/multiop_bg.pid ;
440 # The rm /tmp/multiop_bg.pid guarantees here that
441 # we have the updated by runmultiop_bg_pause
442 # /tmp/multiop_bg.pid file
444 local pid_file=$TMP/multiop_bg.pid.$$
445 do_node $client "rm -f $pid_file && MULTIOP_PID_FILE=$pid_file LUSTRE= runmultiop_bg_pause $file O_tSc" &
449 multiop_pid=$(do_node $client cat $pid_file)
450 [ -n "$multiop_pid" ] || error "$client : Can not get multiop_pid from $pid_file "
451 eval export ${client}_multiop_pid=$multiop_pid
452 eval export ${client}_do_node_pid=$pid
453 local var=${client}_multiop_pid
454 echo client $client multiop_bg started multiop_pid=${!var}
460 local multiop_pid=${client}_multiop_pid
461 local do_node_pid=${client}_do_node_pid
463 echo "Stopping multiop_pid=${!multiop_pid} (kill ${!multiop_pid} on $client)"
464 do_node $client kill -USR1 ${!multiop_pid}
466 wait ${!do_node_pid} || true
470 local var=${SINGLEMDS}_svc
471 do_facet $SINGLEMDS "$LCTL get_param -n mdd.${!var}.stale_export_age" > /dev/null 2>&1
472 [ $? -ne 0 ] && { skip "No delayed recovery support" && return; }
474 remote_server $CLIENT2 || \
475 { skip "Client $CLIENT2 is on the server node" && return 0; }
477 zconf_mount_clients $CLIENT1 $DIR
478 zconf_mount_clients $CLIENT2 $DIR
480 rmultiop_start $CLIENT2 $DIR/$tfile || return 1
481 do_node $CLIENT2 rm -f $DIR/$tfile
482 replay_barrier $SINGLEMDS
483 rmultiop_stop $CLIENT2 || return 2
485 vbr_deactivate_client $CLIENT2
486 facet_failover $SINGLEMDS
487 do_node $CLIENT1 df $DIR || return 3
488 #client1 is back and will try to open orphan
489 vbr_activate_client $CLIENT2
490 do_node $CLIENT2 df $DIR || return 4
492 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
493 zconf_umount_clients $CLIENTS $DIR
496 run_test 8a "orphans are kept until delayed recovery"
499 local var=${SINGLEMDS}_svc
500 do_facet $SINGLEMDS "$LCTL get_param -n mdd.${!var}.stale_export_age" > /dev/null 2>&1
501 [ $? -ne 0 ] && { skip "No delayed recovery support" && return; }
503 remote_server $CLIENT2 || \
504 { skip "Client $CLIENT2 is on the server node" && return 0; }
506 zconf_mount_clients $CLIENT1 $DIR
507 zconf_mount_clients $CLIENT2 $DIR
509 rmultiop_start $CLIENT2 $DIR/$tfile || return 1
510 replay_barrier $SINGLEMDS
511 do_node $CLIENT1 rm -f $DIR/$tfile
513 vbr_deactivate_client $CLIENT2
514 facet_failover $SINGLEMDS
515 do_node $CLIENT1 df $DIR || return 2
516 #client1 is back and will try to open orphan
517 vbr_activate_client $CLIENT2
518 do_node $CLIENT2 df $DIR || return 3
520 rmultiop_stop $CLIENT2 || return 1
521 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
522 zconf_umount_clients $CLIENTS $DIR
525 run_test 8b "open1 | unlink2 X delayed_replay1, close1"
528 local var=${SINGLEMDS}_svc
529 do_facet $SINGLEMDS "$LCTL get_param -n mdd.${!var}.stale_export_age" > /dev/null 2>&1
530 [ $? -ne 0 ] && { skip "No delayed recovery support" && return; }
532 remote_server $CLIENT2 || \
533 { skip "Client $CLIENT2 is on the server node" && return 0; }
535 zconf_mount_clients $CLIENT1 $DIR
536 zconf_mount_clients $CLIENT2 $DIR
538 rmultiop_start $CLIENT2 $DIR/$tfile || return 1
539 replay_barrier $SINGLEMDS
540 do_node $CLIENT1 rm -f $DIR/$tfile
541 rmultiop_stop $CLIENT2 || return 2
543 vbr_deactivate_client $CLIENT2
544 facet_failover $SINGLEMDS
545 do_node $CLIENT1 df $DIR || return 3
546 #client1 is back and will try to open orphan
547 vbr_activate_client $CLIENT2
548 do_node $CLIENT2 df $DIR || return 4
550 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
551 zconf_umount_clients $CLIENTS $DIR
554 run_test 8c "open1 | unlink2, close1 X delayed_replay1"
557 local var=${SINGLEMDS}_svc
558 do_facet $SINGLEMDS "$LCTL get_param -n mdd.${!var}.stale_export_age" > /dev/null 2>&1
559 [ $? -ne 0 ] && { skip "No delayed recovery support" && return; }
561 remote_server $CLIENT2 || \
562 { skip "Client $CLIENT2 is on the server node" && return 0; }
564 zconf_mount_clients $CLIENT1 $DIR
565 zconf_mount_clients $CLIENT2 $DIR
567 rmultiop_start $CLIENT1 $DIR/$tfile || return 1
568 rmultiop_start $CLIENT2 $DIR/$tfile || return 2
569 replay_barrier $SINGLEMDS
570 do_node $CLIENT1 rm -f $DIR/$tfile
571 rmultiop_stop $CLIENT2 || return 3
572 rmultiop_stop $CLIENT1 || return 4
574 vbr_deactivate_client $CLIENT2
575 facet_failover $SINGLEMDS
576 do_node $CLIENT1 df $DIR || return 6
578 #client1 is back and will try to open orphan
579 vbr_activate_client $CLIENT2
580 do_node $CLIENT2 df $DIR || return 8
582 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
583 zconf_umount_clients $CLIENTS $DIR
586 run_test 8d "open1, open2 | unlink2, close1, close2 X delayed_replay1"
589 zconf_mount $CLIENT1 $DIR
590 zconf_mount $CLIENT2 $DIR
592 do_node $CLIENT1 mcreate $DIR/$tfile
593 do_node $CLIENT1 mkdir $DIR/$tfile-2
594 replay_barrier $SINGLEMDS
595 # missed replay from client1 will lead to recovery by versions
596 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
597 do_node $CLIENT2 rm $DIR/$tfile || return 1
598 do_node $CLIENT2 touch $DIR/$tfile || return 2
600 zconf_umount $CLIENT1 $DIR
601 facet_failover $SINGLEMDS
602 do_node $CLIENT2 df $DIR || return 6
604 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
605 zconf_umount_clients $CLIENTS $DIR
608 run_test 8e "create | unlink, create shouldn't fail"
611 zconf_mount_clients $CLIENT1 $DIR
612 zconf_mount_clients $CLIENT2 $DIR
614 do_node $CLIENT1 touch $DIR/$tfile
615 do_node $CLIENT1 mkdir $DIR/$tfile-2
616 replay_barrier $SINGLEMDS
617 # missed replay from client1 will lead to recovery by versions
618 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
619 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
620 do_node $CLIENT2 mcreate $DIR/$tfile || return 2
622 zconf_umount $CLIENT1 $DIR
623 facet_failover $SINGLEMDS
624 do_node $CLIENT2 df $DIR || return 6
626 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
627 zconf_umount $CLIENT2 $DIR
630 run_test 8f "create | unlink, create shouldn't fail"
633 zconf_mount_clients $CLIENT1 $DIR
634 zconf_mount_clients $CLIENT2 $DIR
636 do_node $CLIENT1 touch $DIR/$tfile
637 do_node $CLIENT1 mkdir $DIR/$tfile-2
638 replay_barrier $SINGLEMDS
639 # missed replay from client1 will lead to recovery by versions
640 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
641 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
642 do_node $CLIENT2 mkdir $DIR/$tfile || return 2
644 zconf_umount $CLIENT1 $DIR
645 facet_failover $SINGLEMDS
646 do_node $CLIENT2 df $DIR || return 6
648 do_node $CLIENT2 rmdir $DIR/$tfile || error "$tfile doesn't exists"
649 zconf_umount $CLIENT2 $DIR
652 run_test 8g "create | unlink, create shouldn't fail"
655 local var=${SINGLEMDS}_svc
656 do_facet $SINGLEMDS $LCTL get_param -n mdd.${!var}.stale_export_age && \
657 { skip "No delayed recovery support" && return; }
658 [ -z "$DBENCH_LIB" ] && skip "DBENCH_LIB is not set" && return 0
660 zconf_mount_clients $CLIENTS $DIR
662 local duration="-t 60"
663 local cmd="rundbench 1 $duration "
665 for CLIENT in ${CLIENTS//,/ }; do
666 $PDSH $CLIENT "set -x; PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests/:${DBENCH_LIB} DBENCH_LIB=${DBENCH_LIB} $cmd" &
668 echo $PID >pid.$CLIENT
669 echo "Started load PID=`cat pid.$CLIENT`"
672 replay_barrier $SINGLEMDS
673 sleep 3 # give clients a time to do operations
675 vbr_deactivate_client $CLIENT2
677 log "$TESTNAME fail $SINGLEMDS 1"
680 # wait for client to reconnect to MDS
683 vbr_activate_client $CLIENT2
684 do_node $CLIENT2 df $DIR || return 4
686 for CLIENT in ${CLIENTS//,/ }; do
687 PID=`cat pid.$CLIENT`
690 echo "load on ${CLIENT} returned $rc"
693 zconf_umount_clients $CLIENTS $DIR
695 run_test 10 "mds version recovery; $CLIENTCOUNT clients"
697 equals_msg `basename $0`: test complete, cleaning up
698 #SLEEP=$((`date +%s` - $NOW))
699 #[ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
700 check_and_cleanup_lustre
701 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true