2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
5 # Run select tests by setting ONLY, or as arguments to the script.
6 # Skip specific tests by setting EXCEPT.
8 # Run test by setting NOSETUP=true when ltest has setup env for us
12 export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin
15 # bug number for skipped test: LU-4256
16 ALWAYS_EXCEPT="$LRSYNC_EXCEPT 2b"
17 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
19 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
21 [ "$ALWAYS_EXCEPT$EXCEPT" ] &&
22 echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT`"
27 LREPL_LOG=$TMP/lustre_rsync.log
30 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
31 . $LUSTRE/tests/test-framework.sh
33 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
36 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
38 check_and_setup_lustre
43 if getent group nobody; then
45 elif getent group nogroup; then
48 error "No generic nobody group"
53 export LRSYNC=${LRSYNC:-"$LUSTRE/utils/lustre_rsync"}
54 [ ! -f "$LRSYNC" ] && export LRSYNC=$(which lustre_rsync)
55 export LRSYNC="$LRSYNC -v -c no -d 2"
57 # Number of seconds to run dbench
58 DBENCH_TIME=${DBENCH_TIME:-60}
61 MDT0=$($LCTL get_param -n mdc.*.mds_server_uuid |
62 awk '{ gsub(/_UUID/,""); print $1 }' | head -n1)
65 changelog_register || error "changelog_register failed"
66 CL_USER=(${CL_USERS[$SINGLEMDS]})
67 echo $MDT0: Registered changelog user $CL_USER
69 echo "No changelog users present on $SINGLEMDS"
78 rm -rf $TGT/$tdir $TGT/d*.lustre_rsync-test 2> /dev/null
79 rm -rf $TGT2/$tdir $TGT2/d*.lustre_rsync-test 2> /dev/null
80 rm -rf ${DIR}/$tdir $DIR/d*.lustre_rsync-test ${DIR}/tgt 2> /dev/null
82 mkdir -p ${DIR}/$tdir || error "Failed to create target: " $DIR/$tdir
83 mkdir -p ${TGT}/$tdir || error "Failed to create target: " $TGT/$tdir
84 mkdir -p ${TGT2}/$tdir || error "Failed to create target: " $TGT2/$tdir
93 # Check whether the filesystem supports xattr or not.
95 # "large" - large xattr is supported
96 # "small" - large xattr is unsupported but small xattr is supported
97 # "no" - xattr is unsupported
104 local val="$(generate_string $(max_xattr_size))"
105 if large_xattr_enabled &&
106 setfattr -n user.foo -v $val $tgt 2>/dev/null; then
109 setfattr -n user.foo -v bar $tgt 2>/dev/null && xattr="small"
117 local changelog_file=$(generate_logname "changelog")
119 if [ -e $1 -o -e $2 ]; then
120 diff -rq -x "dev1" $1 $2
122 if [ $RC -ne 0 ]; then
123 $LFS changelog $MDT0 > $changelog_file
124 error "Failure in replication; differences found."
129 procs_are_stopped() {
133 for state in $(ps -p "$pids" -o state=); do
134 if [[ "$state" != T ]]; then
142 # Send SIGSTOP to PIDs and wait up to 60 seconds for them to show a
143 # stopped process state.
149 end=$((SECONDS + 60))
150 while ((SECONDS < end)); do
151 if procs_are_stopped $pids; then
161 # Test 1A - test basic operations
162 test_1A() { # was test_1
165 local xattr=$(check_xattr $TGT/foo)
172 touch $DIR/$tdir/file1
173 cp /etc/hosts $DIR/$tdir/d1/
174 touch $DIR/$tdir/d1/"space in filename"
175 touch $DIR/$tdir/d1/file2
178 mv $DIR/$tdir/d1/file2 $DIR/$tdir/d2/file3
180 # File and directory delete
181 touch $DIR/$tdir/d1/file4
182 mkdir $DIR/$tdir/d1/del
183 touch $DIR/$tdir/d1/del/del1
184 touch $DIR/$tdir/d1/del/del2
185 rm -rf $DIR/$tdir/d1/del
186 rm $DIR/$tdir/d1/file4
189 cat /etc/hosts > $DIR/$tdir/d1/link1
190 ln $DIR/$tdir/d1/link1 $DIR/$tdir/d1/link2
191 ln -s $DIR/$tdir/d1/link1 $DIR/$tdir/d1/link3
194 #mknod $DIR/$tdir/dev1 b 8 1
197 local LRSYNC_LOG=$(generate_logname "lrsync_log")
198 echo "Replication #1"
199 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG \
203 chmod 000 $DIR/$tdir/d2/file3
204 chown nobody:$GROUP $DIR/$tdir/d2/file3
207 if [[ "$xattr" != "no" ]]; then
209 touch $DIR/$tdir/file5
210 [[ "$xattr" = "large" ]] &&
211 value="$(generate_string $(max_xattr_size))" || value="bar"
212 setfattr -n user.foo -v $value $DIR/$tdir/file5
215 echo "Replication #2"
216 $LRSYNC -l $LREPL_LOG -D $LRSYNC_LOG
218 if [[ "$xattr" != "no" ]]; then
219 local xval1=$(get_xattr_value user.foo $TGT/$tdir/file5)
220 local xval2=$(get_xattr_value user.foo $TGT2/$tdir/file5)
222 if [[ "$xval1" != "$value" || "$xval2" != "$value" ]]; then
223 error "Error in replicating xattrs."
227 # Use diff to compare the source and the destination
228 check_diff $DIR/$tdir $TGT/$tdir
229 check_diff $DIR/$tdir $TGT2/$tdir
234 run_test 1A "Simple Replication"
236 # Test 1a - test create/delete operations in ROOT directory
237 test_1a() { # LU-5005
238 rm -rf $TGT/root-* 2> /dev/null
239 rm -rf $DIR/root-* 2> /dev/null
248 touch $DIR/root-file2
251 mv $DIR/root-file2 $DIR/root-file3
253 # File and directory delete
254 touch $DIR/root-file4
257 rm -rf $DIR/root-dir1
260 local LRSYNC_LOG=$(generate_logname "lrsync_log")
262 $LRSYNC -s $DIR -t $TGT -m $MDT0 -u $CL_USER -l $LREPL_LOG \
266 stat $TGT/root-dir || error "Dir create not replicated"
267 stat $TGT/root-file || error "File create not replicated"
268 stat $TGT/root-file2 && error "Rename not replicated (src)"
269 stat $TGT/root-file3 || error "Rename not replicated (tgt)"
270 stat $TGT/root-dir1 && error "Dir delete not replicated"
271 stat $TGT/root-file4 && error "File delete not replicated"
279 run_test 1a "Replicate create/delete operations in ROOT directory"
281 # Test 2a - Replicate files created by dbench
287 sh rundbench -C -D $DIR/$tdir 2 -t $DBENCH_TIME || error "dbench failed"
289 local LRSYNC_LOG=$(generate_logname "lrsync_log")
290 # Replicate the changes to $TGT
291 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG \
294 # Use diff to compare the source and the destination
295 check_diff $DIR/$tdir $TGT/$tdir
296 check_diff $DIR/$tdir $TGT2/$tdir
302 run_test 2a "Replicate files created by dbench."
304 # Test 2b - Replicate files changed by dbench.
311 sh rundbench -C -D $DIR/$tdir 2 -t $DBENCH_TIME &
312 # wait for dbench to start
313 wait_for_function 'child_pid=$(pgrep dbench)' 360
314 # let dbench run for a bit
317 echo PIDs: $child_pid
319 stop_procs $child_pid
321 local LRSYNC_LOG=$(generate_logname "lrsync_log")
322 echo Starting replication
323 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG \
325 check_diff $DIR/$tdir $TGT/$tdir
328 $KILL -SIGCONT $child_pid
332 stop_procs $child_pid
334 echo Starting replication
335 $LRSYNC -l $LREPL_LOG -D $LRSYNC_LOG
336 check_diff $DIR/$tdir $TGT/$tdir
338 echo "Wait for dbench to finish"
339 $KILL -SIGCONT $child_pid
342 # Replicate the changes to $TGT
343 echo Starting replication
344 $LRSYNC -l $LREPL_LOG -D $LRSYNC_LOG
346 check_diff $DIR/$tdir $TGT/$tdir
347 check_diff $DIR/$tdir $TGT2/$tdir
353 run_test 2b "Replicate files changed by dbench."
355 # Test 2c - Replicate files while dbench is running
361 sh rundbench -C -D $DIR/$tdir 2 -t $DBENCH_TIME &
363 local LRSYNC_LOG=$(generate_logname "lrsync_log")
364 # Replicate the changes to $TGT
365 sleep 10 # give dbench a headstart
367 while [ $quit -le 1 ];
369 echo "Running lustre_rsync"
370 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m ${mds1_svc} -u $CL_USER \
371 -l $LREPL_LOG -D $LRSYNC_LOG
374 if [ $? -ne 0 ]; then
375 quit=$(expr $quit + 1)
379 # Use diff to compare the source and the destination
380 check_diff $DIR/$tdir $TGT/$tdir
381 check_diff $DIR/$tdir $TGT2/$tdir
387 run_test 2c "Replicate files while dbench is running."
389 # Test 3a - Replicate files created by createmany
395 createmany -o $DIR/$tdir/$tfile $numfiles || error "createmany failed"
397 local LRSYNC_LOG=$(generate_logname "lrsync_log")
398 # Replicate the changes to $TGT
399 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG \
401 check_diff $DIR/$tdir $TGT/$tdir
402 check_diff $DIR/$tdir $TGT2/$tdir
408 run_test 3a "Replicate files created by createmany"
410 # Test 3b - Replicate files created by writemany
417 writemany -q -a $DIR/$tdir/$tfile $time $threads ||
418 error "writemany failed"
420 local LRSYNC_LOG=$(generate_logname "lrsync_log")
421 # Replicate the changes to $TGT
422 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG \
425 check_diff $DIR/$tdir $TGT/$tdir
426 check_diff $DIR/$tdir $TGT2/$tdir
432 run_test 3b "Replicate files created by writemany"
434 # Test 3c - Replicate files created by createmany/unlinkmany
440 createmany -o $DIR/$tdir/$tfile $numfiles || error "createmany failed"
441 unlinkmany $DIR/$tdir/$tfile $numfiles || error "unlinkmany failed"
443 local LRSYNC_LOG=$(generate_logname "lrsync_log")
444 # Replicate the changes to $TGT
445 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG \
447 check_diff $DIR/$tdir $TGT/$tdir
448 check_diff $DIR/$tdir $TGT2/$tdir
454 run_test 3c "Replicate files created by createmany/unlinkmany"
456 # Test 4 - Replicate files created by iozone
458 which iozone > /dev/null 2>&1
459 [ $? -ne 0 ] && skip "iozone not found"
464 END_RUN_FILE=${DIR}/$tdir/run LOAD_PID_FILE=${DIR}/$tdir/pid \
465 MOUNT=${DIR}/$tdir run_iozone.sh &
467 child_pid=$(pgrep iozone)
468 stop_procs $child_pid
470 local LRSYNC_LOG=$(generate_logname "lrsync_log")
471 # Replicate the changes to $TGT
472 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG \
474 check_diff $DIR/$tdir $TGT/$tdir
475 check_diff $DIR/$tdir $TGT2/$tdir
477 $KILL -SIGCONT $child_pid
479 $KILL -SIGKILL $(pgrep run_iozone.sh)
480 $KILL -SIGKILL $(pgrep iozone)
482 # After killing 'run_iozone.sh', process 'iozone' becomes the
483 # child of PID 1. Hence 'wait' does not wait for it. Killing
484 # iozone first, means more iozone processes are spawned off which
485 # is not desirable. So, after sending a sigkill, the test goes
486 # into a wait loop for iozone to cleanup and exit.
488 while [ "$(pgrep "iozone")" != "" ];
490 ps -ef | grep iozone | grep -v grep
494 $LRSYNC -l $LREPL_LOG -D $LRSYNC_LOG
495 check_diff $DIR/$tdir $TGT/$tdir
496 check_diff $DIR/$tdir $TGT2/$tdir
502 run_test 4 "Replicate files created by iozone"
504 # Test 5a - Stop / start lustre_rsync
510 createmany -o $DIR/$tdir/$tfile $NUMTEST
512 # Replicate the changes to $TGT
513 local LRSYNC_LOG=$(generate_logname "lrsync_log")
514 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG \
518 $KILL -SIGHUP $child_pid
520 $LRSYNC -l $LREPL_LOG -D $LRSYNC_LOG
522 check_diff $DIR/$tdir $TGT/$tdir
523 check_diff $DIR/$tdir $TGT2/$tdir
529 run_test 5a "Stop / start lustre_rsync"
531 # Test 5b - Kill / restart lustre_rsync
537 createmany -o $DIR/$tdir/$tfile $NUMTEST
539 # Replicate the changes to $TGT
540 local LRSYNC_LOG=$(generate_logname "lrsync_log")
541 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG \
545 $KILL -SIGKILL $child_pid
547 $LRSYNC -l $LREPL_LOG -D $LRSYNC_LOG
549 check_diff $DIR/$tdir $TGT/$tdir
550 check_diff $DIR/$tdir $TGT2/$tdir
556 run_test 5b "Kill / restart lustre_rsync"
558 # Test 6 - lustre_rsync large no of hard links
566 touch $DIR/$tdir/link0
567 for ((i = 1; i < num_links - 1; i++)); do
568 ln $DIR/$tdir/link0 $DIR/$tdir/link$i
570 # create an extra hard link of src name ending with dest name
571 ln $DIR/$tdir/link0 $DIR/$tdir/ink0
573 local LRSYNC_LOG=$(generate_logname "lrsync_log")
574 # Replicate the changes to $TGT
575 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG \
577 check_diff $DIR/$tdir $TGT/$tdir
578 check_diff $DIR/$tdir $TGT2/$tdir
580 local count1=$(stat --format=%h $TGT/$tdir/link0)
581 local count2=$(stat --format=%h $TGT2/$tdir/link0)
582 if ((count1 != num_links || count2 != num_links)); then
583 ls -l $TGT/$tdir/link0 $TGT2/$tdir/link0
584 error "Incorrect no of hard links found $count1, $count2"
591 run_test 6 "lustre_rsync large no of hard links"
593 # Test 7 - lustre_rsync stripesize
598 mkdir -p ${DIR}/tgt/$tdir
601 $LFS setstripe -c $OSTCOUNT $DIR/$tdir ||
602 error "$LFS setstripe failed"
603 createmany -o $DIR/$tdir/$tfile $numfiles
605 # To simulate replication to another lustre filesystem, replicate
606 # the changes to $DIR/tgt. We can't turn off the changelogs
607 # while we are registered, so lustre_rsync better not try to
608 # replicate the replication steps. It seems ok :)
610 local LRSYNC_LOG=$(generate_logname "lrsync_log")
611 $LRSYNC -s $DIR -t $DIR/tgt -m $MDT0 -u $CL_USER -l $LREPL_LOG \
613 check_diff ${DIR}/$tdir $DIR/tgt/$tdir
616 while [ $i -lt $numfiles ];
618 local count=$($LFS getstripe $DIR/tgt/$tdir/${tfile}$i |
619 awk '/stripe_count/ {print $2}')
620 if [ $count -ne $OSTCOUNT ]; then
621 error "Stripe size not replicated"
629 run_test 7 "lustre_rsync stripesize"
631 # Test 8 - Replicate multiple file/directory moves
636 for i in 1 2 3 4 5 6 7 8 9; do
638 for j in 1 2 3 4 5 6 7 8 9; do
639 mkdir $DIR/$tdir/d$i/d$i$j
640 createmany -o $DIR/$tdir/d$i/d$i$j/a 10 \
642 mv $DIR/$tdir/d$i/d$i$j $DIR/$tdir/d$i/d0$i$j
643 createmany -o $DIR/$tdir/d$i/d0$i$j/b 10 \
645 mv $DIR/$tdir/d$i/d0$i$j/a0 $DIR/$tdir/d$i/d0$i$j/c0
647 mv $DIR/$tdir/d$i $DIR/$tdir/d0$i
650 local LRSYNC_LOG=$(generate_logname "lrsync_log")
651 $LRSYNC -s $DIR -t $TGT -m $MDT0 -u $CL_USER -l $LREPL_LOG \
654 check_diff ${DIR}/$tdir $TGT/$tdir
660 run_test 8 "Replicate multiple file/directory moves"
667 touch $DIR/$tdir/foo/a1
669 local LRSYNC_LOG=$(generate_logname "lrsync_log")
670 $LRSYNC -s $DIR -t $TGT -m $MDT0 -u $CL_USER -l $LREPL_LOG \
673 check_diff ${DIR}/$tdir $TGT/$tdir
675 rm -rf $DIR/$tdir/foo
677 $LRSYNC -s $DIR -t $TGT -m $MDT0 -u $CL_USER -l $LREPL_LOG \
680 check_diff ${DIR}/$tdir $TGT/$tdir
686 run_test 9 "Replicate recursive directory removal"
690 check_and_cleanup_lustre