3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
6 # Run test by setting NOSETUP=true when ltest has setup env for us
10 export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin
13 ALWAYS_EXCEPT="$LRSYNC_EXCEPT 5a 5b"
14 # bug number for skipped test: - 20878
15 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
17 [ "$ALWAYS_EXCEPT$EXCEPT" ] && \
18 echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT`"
23 LREPL_LOG=$TMP/lustre_rsync.log
26 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
27 . $LUSTRE/tests/test-framework.sh
29 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
32 REPLLOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log}
34 [ "$REPLLOG" ] && rm -f $REPLLOG || true
36 check_and_setup_lustre
44 export LRSYNC=${LRSYNC:-"$LUSTRE/utils/lustre_rsync"}
45 [ ! -f "$LRSYNC" ] && export LRSYNC=$(which lustre_rsync)
46 export LRSYNC="$LRSYNC -v" # -a
48 # control the time of tests
49 DBENCH_TIME=${DBENCH_TIME:-60} # No of seconds to run dbench
52 MDT0=$($LCTL get_param -n mdc.*.mds_server_uuid | \
53 awk '{gsub(/_UUID/,""); print $1}' | head -1)
56 CL_USER=$(do_facet $SINGLEMDS lctl --device $MDT0 changelog_register -n)
57 echo $MDT0: Registered changelog user $CL_USER
58 CL_USERS=$(( $(do_facet $SINGLEMDS lctl get_param -n \
59 mdd.$MDT0.changelog_users | wc -l) - 2 ))
60 [ $CL_USERS -ne 1 ] && \
61 echo "Other changelog users present ($CL_USERS)"
65 rm -rf $TGT/$tdir $TGT/d*.lustre_rsync-test 2> /dev/null
66 rm -rf $TGT2/$tdir $TGT2/d*.lustre_rsync-test 2> /dev/null
67 rm -rf ${DIR}/$tdir $DIR/d*.lustre_rsync-test ${DIR}/tgt 2> /dev/null
72 error "Failed to create target: " $TGT
83 $LFS changelog_clear $MDT0 $CL_USER 0
84 do_facet $SINGLEMDS lctl --device $MDT0 changelog_deregister $CL_USER
91 setfattr -n user.foo -v 'bar' $tgt 2> /dev/null
100 if [ -e $1 -o -e $2 ]; then
101 diff -rq -x "dev1" $1 $2
103 if [ $RC -ne 0 ]; then
104 error "Failure in replication; differences found."
109 # Test 1 - test basic operations
113 local xattr=`check_xattr $TGT/foo`
116 mkdir -p ${DIR}/$tdir
121 touch $DIR/$tdir/file1
122 cp /etc/hosts $DIR/$tdir/d1/
123 touch $DIR/$tdir/d1/"space in filename"
124 touch $DIR/$tdir/d1/file2
127 mv $DIR/$tdir/d1/file2 $DIR/$tdir/d2/file3
129 # File and directory delete
130 touch $DIR/$tdir/d1/file4
131 mkdir $DIR/$tdir/d1/del
132 touch $DIR/$tdir/d1/del/del1
133 touch $DIR/$tdir/d1/del/del2
134 rm -rf $DIR/$tdir/d1/del
135 rm $DIR/$tdir/d1/file4
138 cat /etc/hosts > $DIR/$tdir/d1/link1
139 ln $DIR/$tdir/d1/link1 $DIR/$tdir/d1/link2
140 ln -s $DIR/$tdir/d1/link1 $DIR/$tdir/d1/link3
143 #mknod $DIR/$tdir/dev1 b 8 1
146 echo "Replication #1"
147 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
150 chmod 000 $DIR/$tdir/d2/file3
151 chown nobody:nobody $DIR/$tdir/d2/file3
154 if [ "$xattr" == "yes" ]; then
155 touch $DIR/$tdir/file5
156 setfattr -n user.foo -v 'bar' $DIR/$tdir/file5
159 echo "Replication #2"
160 $LRSYNC -l $LREPL_LOG
162 if [ "$xattr" == "yes" ]; then
163 local xval1=$(getfattr -n user.foo --absolute-names --only-values \
165 local xval2=$(getfattr -n user.foo --absolute-names --only-values \
171 # fid2path and path2fid aren't implemented for block devices
172 #if [[ ! -b $TGT/$tdir/dev1 ]] || [[ ! -b $TGT2/$tdir/dev1 ]]; then
173 # ls -l $DIR/$tdir/dev1 $TGT/$tdir/dev1 $TGT2/$tdir/dev1
174 # error "Error replicating block devices"
177 if [[ "$xattr" == "yes" ]] &&
178 [[ "$xval1" != "bar" || "$xval2" != "bar" ]]; then
179 error "Error in replicating xattrs. $xval1, $xval2"
183 # Use diff to compare the source and the destination
184 check_diff $DIR/$tdir $TGT/$tdir
185 check_diff $DIR/$tdir $TGT2/$tdir
191 run_test 1 "Simple Replication"
193 # Test 2a - Replicate files created by dbench
195 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
200 sh rundbench -C -D $DIR/$tdir 2 -t $DBENCH_TIME || error "dbench failed!"
202 # Replicate the changes to $TGT
203 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
205 # Use diff to compare the source and the destination
206 check_diff $DIR/$tdir $TGT/$tdir
207 check_diff $DIR/$tdir $TGT2/$tdir
213 run_test 2a "Replicate files created by dbench."
216 # Test 2b - Replicate files changed by dbench.
218 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
224 sh rundbench -C -D $DIR/$tdir 2 -t $DBENCH_TIME &
227 local child_pid=$(pgrep dbench)
228 echo PIDs: $child_pid
230 $KILL -SIGSTOP $child_pid
232 echo Starting replication
233 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
234 check_diff $DIR/$tdir $TGT/$tdir
237 $KILL -SIGCONT $child_pid
241 $KILL -SIGSTOP $child_pid
243 echo Starting replication
244 $LRSYNC -l $LREPL_LOG
245 check_diff $DIR/$tdir $TGT/$tdir
247 echo "Wait for dbench to finish"
248 $KILL -SIGCONT $child_pid
251 # Replicate the changes to $TGT
252 echo Starting replication
253 $LRSYNC -l $LREPL_LOG
255 check_diff $DIR/$tdir $TGT/$tdir
256 check_diff $DIR/$tdir $TGT2/$tdir
262 run_test 2b "Replicate files changed by dbench."
264 # Test 2c - Replicate files while dbench is running
266 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
271 sh rundbench -C -D $DIR/$tdir 2 -t $DBENCH_TIME &
273 # Replicate the changes to $TGT
274 sleep 10 # give dbench a headstart
276 while [ $quit -le 1 ];
278 echo "Running lustre_rsync"
279 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m ${mds1_svc} -u $CL_USER -l $LREPL_LOG
282 if [ $? -ne 0 ]; then
283 quit=$(expr $quit + 1)
287 # Use diff to compare the source and the destination
288 check_diff $DIR/$tdir $TGT/$tdir
289 check_diff $DIR/$tdir $TGT2/$tdir
295 run_test 2c "Replicate files while dbench is running."
297 # Test 3a - Replicate files created by createmany
299 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
305 mkdir -p ${DIR}/$tdir
306 createmany -o $DIR/$tdir/$tfile $numfiles || error "createmany failed!"
308 # Replicate the changes to $TGT
309 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
310 check_diff $DIR/$tdir $TGT/$tdir
311 check_diff $DIR/$tdir $TGT2/$tdir
317 run_test 3a "Replicate files created by createmany"
320 # Test 3b - Replicate files created by writemany
322 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
329 mkdir -p ${DIR}/$tdir
330 writemany -q -a $DIR/$tdir/$tfile $time $threads || error "writemany failed!"
332 # Replicate the changes to $TGT
333 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
335 check_diff $DIR/$tdir $TGT/$tdir
336 check_diff $DIR/$tdir $TGT2/$tdir
342 run_test 3b "Replicate files created by writemany"
344 # Test 3c - Replicate files created by createmany/unlinkmany
346 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
352 mkdir -p ${DIR}/$tdir
353 createmany -o $DIR/$tdir/$tfile $numfiles || error "createmany failed!"
354 unlinkmany $DIR/$tdir/$tfile $numfiles || error "unlinkmany failed!"
356 # Replicate the changes to $TGT
357 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
358 check_diff $DIR/$tdir $TGT/$tdir
359 check_diff $DIR/$tdir $TGT2/$tdir
365 run_test 3c "Replicate files created by createmany/unlinkmany"
367 # Test 4 - Replicate files created by iozone
369 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
371 which iozone > /dev/null 2>&1
372 if [ $? -ne 0 ]; then
373 skip "iozone not found. Skipping test"
380 mkdir -p ${DIR}/$tdir
381 END_RUN_FILE=${DIR}/$tdir/run LOAD_PID_FILE=${DIR}/$tdir/pid \
382 MOUNT=${DIR}/$tdir run_iozone.sh &
384 child_pid=$(pgrep iozone)
385 $KILL -SIGSTOP $child_pid
387 # Replicate the changes to $TGT
388 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
389 check_diff $DIR/$tdir $TGT/$tdir
390 check_diff $DIR/$tdir $TGT2/$tdir
392 $KILL -SIGCONT $child_pid
394 $KILL -SIGKILL $(pgrep run_iozone.sh)
395 $KILL -SIGKILL $(pgrep iozone)
397 # After killing 'run_iozone.sh', process 'iozone' becomes the
398 # child of PID 1. Hence 'wait' does not wait for it. Killing
399 # iozone first, means more iozone processes are spawned off which
400 # is not desirable. So, after sending a sigkill, the test goes
401 # into a wait loop for iozone to cleanup and exit.
403 while [ "$(pgrep "iozone")" != "" ];
405 ps -ef | grep iozone | grep -v grep
409 $LRSYNC -l $LREPL_LOG
410 check_diff $DIR/$tdir $TGT/$tdir
411 check_diff $DIR/$tdir $TGT2/$tdir
417 run_test 4 "Replicate files created by iozone"
419 # Test 5a - Stop / start lustre_rsync
421 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
427 mkdir -p ${DIR}/$tdir
428 createmany -o $DIR/$tdir/$tfile $NUMTEST
430 # Replicate the changes to $TGT
432 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG &
435 $KILL -SIGHUP $child_pid
437 $LRSYNC -l $LREPL_LOG
439 check_diff $DIR/$tdir $TGT/$tdir
440 check_diff $DIR/$tdir $TGT2/$tdir
446 run_test 5a "Stop / start lustre_rsync"
448 # Test 5b - Kill / restart lustre_rsync
450 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
456 mkdir -p ${DIR}/$tdir
457 createmany -o $DIR/$tdir/$tfile $NUMTEST
459 # Replicate the changes to $TGT
461 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG &
464 $KILL -SIGKILL $child_pid
466 $LRSYNC -l $LREPL_LOG
468 check_diff $DIR/$tdir $TGT/$tdir
469 check_diff $DIR/$tdir $TGT2/$tdir
475 run_test 5b "Kill / restart lustre_rsync"
477 # Test 6 - lustre_rsync large no of hard links
483 mkdir -p ${DIR}/$tdir
484 touch $DIR/$tdir/link0
486 while [ $i -lt $NUMLINKS ];
488 ln $DIR/$tdir/link0 $DIR/$tdir/link${i}
492 # Replicate the changes to $TGT
493 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
494 check_diff $DIR/$tdir $TGT/$tdir
495 check_diff $DIR/$tdir $TGT2/$tdir
497 local count1=$(ls -l $TGT/$tdir/link0 | sed -r 's/ +/ /g' | cut -f 2 -d ' ')
498 local count2=$(ls -l $TGT/$tdir/link0 | sed -r 's/ +/ /g' | cut -f 2 -d ' ')
499 if [[ $count1 -ne $NUMLINKS ]] || [[ $count2 -ne $NUMLINKS ]]; then
500 ls -l $TGT/$tdir/link0 $TGT2/$tdir/link0
501 error "Incorrect no of hard links found $count1, $count2"
507 run_test 6 "lustre_rsync large no of hard links"
509 # Test 7 - lustre_rsync stripesize
515 mkdir -p ${DIR}/$tdir
516 lfs setstripe -c 2 ${DIR}/$tdir
517 createmany -o $DIR/$tdir/$tfile $NUMFILES
519 # To simulate replication to another lustre filesystem, replicate
520 # the changes to $DIR/tgt. We can't turn off the changelogs
521 # while we are registered, so lustre_rsync better not try to
522 # replicate the replication steps. It seems ok :)
525 $LRSYNC -s $DIR -t $DIR/tgt -m $MDT0 -u $CL_USER -l $LREPL_LOG
526 check_diff ${DIR}/$tdir $DIR/tgt/$tdir
529 while [ $i -lt $NUMFILES ];
531 local count=$(lfs getstripe $DIR/tgt/$tdir/${tfile}$i | awk '/stripe_count/ {print $2}')
532 if [ $count -ne 2 ]; then
533 error "Stripe size not replicated"
541 run_test 7 "lustre_rsync stripesize"
543 # Test 8 - Replicate multiple file/directory moves
548 mkdir -p ${DIR}/$tdir
550 for i in 1 2 3 4 5 6 7 8 9; do
552 for j in 1 2 3 4 5 6 7 8 9; do
553 mkdir $DIR/$tdir/d$i/d$i$j
554 createmany -o $DIR/$tdir/d$i/d$i$j/a 10 \
556 mv $DIR/$tdir/d$i/d$i$j $DIR/$tdir/d$i/d0$i$j
557 createmany -o $DIR/$tdir/d$i/d0$i$j/b 10 \
559 mv $DIR/$tdir/d$i/d0$i$j/a0 $DIR/$tdir/d$i/d0$i$j/c0
561 mv $DIR/$tdir/d$i $DIR/$tdir/d0$i
564 $LRSYNC -s $DIR -t $TGT -m $MDT0 -u $CL_USER -l $LREPL_LOG
566 check_diff ${DIR}/$tdir $TGT/$tdir
572 run_test 8 "Replicate multiple file/directory moves"
574 log "cleanup: ======================================================"
576 check_and_cleanup_lustre
577 echo '=========================== finished ==============================='
578 [ -f "$REPLLOG" ] && cat $REPLLOG && grep -q FAIL $REPLLOG && exit 1 || true