2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
5 # Run select tests by setting ONLY, or as arguments to the script.
6 # Skip specific tests by setting EXCEPT.
8 # Run test by setting NOSETUP=true when ltest has setup env for us
12 export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin
15 [ -n "$ONLY" ] && SLOW=yes
16 ALWAYS_EXCEPT="$LRSYNC_EXCEPT"
17 # bug number for skipped test:
18 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
20 [ "$ALWAYS_EXCEPT$EXCEPT" ] && \
21 echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT`"
26 LREPL_LOG=$TMP/lustre_rsync.log
29 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
30 . $LUSTRE/tests/test-framework.sh
32 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
35 check_and_setup_lustre
43 export LRSYNC=${LRSYNC:-"$LUSTRE/utils/lustre_rsync"}
44 [ ! -f "$LRSYNC" ] && export LRSYNC=$(which lustre_rsync)
45 export LRSYNC="$LRSYNC -v -c no" # -a
47 # control the time of tests
48 DBENCH_TIME=${DBENCH_TIME:-60} # No of seconds to run dbench
51 MDT0=$($LCTL get_param -n mdc.*.mds_server_uuid | \
52 awk '{gsub(/_UUID/,""); print $1}' | head -1)
55 CL_USER=$(do_facet $SINGLEMDS lctl --device $MDT0 changelog_register -n)
56 echo $MDT0: Registered changelog user $CL_USER
57 CL_USERS=$(( $(do_facet $SINGLEMDS lctl get_param -n \
58 mdd.$MDT0.changelog_users | wc -l) - 2 ))
59 [ $CL_USERS -ne 1 ] && \
60 echo "Other changelog users present ($CL_USERS)"
64 rm -rf $TGT/$tdir $TGT/d*.lustre_rsync-test 2> /dev/null
65 rm -rf $TGT2/$tdir $TGT2/d*.lustre_rsync-test 2> /dev/null
66 rm -rf ${DIR}/$tdir $DIR/d*.lustre_rsync-test ${DIR}/tgt 2> /dev/null
70 mkdir -p ${TGT2}/$tdir
72 error "Failed to create target: " $TGT
83 $LFS changelog_clear $MDT0 $CL_USER 0
84 do_facet $SINGLEMDS lctl --device $MDT0 changelog_deregister $CL_USER
87 # Check whether the filesystem supports xattr or not.
89 # "large" - large xattr is supported
90 # "small" - large xattr is unsupported but small xattr is supported
91 # "no" - xattr is unsupported
98 local val="$(generate_string $(max_xattr_size))"
99 if large_xattr_enabled &&
100 setfattr -n user.foo -v $val $tgt 2>/dev/null; then
103 setfattr -n user.foo -v bar $tgt 2>/dev/null && xattr="small"
111 local changelog_file=$LOGDIR/${TESTSUITE}.test_${3}.changelog
113 if [ -e $1 -o -e $2 ]; then
114 diff -rq -x "dev1" $1 $2
116 if [ $RC -ne 0 ]; then
117 $LFS changelog $MDT0 > $changelog_file
118 error "Failure in replication; differences found."
123 # Test 1 - test basic operations
127 local xattr=$(check_xattr $TGT/foo)
134 touch $DIR/$tdir/file1
135 cp /etc/hosts $DIR/$tdir/d1/
136 touch $DIR/$tdir/d1/"space in filename"
137 touch $DIR/$tdir/d1/file2
140 mv $DIR/$tdir/d1/file2 $DIR/$tdir/d2/file3
142 # File and directory delete
143 touch $DIR/$tdir/d1/file4
144 mkdir $DIR/$tdir/d1/del
145 touch $DIR/$tdir/d1/del/del1
146 touch $DIR/$tdir/d1/del/del2
147 rm -rf $DIR/$tdir/d1/del
148 rm $DIR/$tdir/d1/file4
151 cat /etc/hosts > $DIR/$tdir/d1/link1
152 ln $DIR/$tdir/d1/link1 $DIR/$tdir/d1/link2
153 ln -s $DIR/$tdir/d1/link1 $DIR/$tdir/d1/link3
156 #mknod $DIR/$tdir/dev1 b 8 1
159 echo "Replication #1"
160 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
163 chmod 000 $DIR/$tdir/d2/file3
164 chown nobody:nobody $DIR/$tdir/d2/file3
167 if [[ "$xattr" != "no" ]]; then
169 touch $DIR/$tdir/file5
170 [[ "$xattr" = "large" ]] &&
171 value="$(generate_string $(max_xattr_size))" || value="bar"
172 setfattr -n user.foo -v $value $DIR/$tdir/file5
175 echo "Replication #2"
176 $LRSYNC -l $LREPL_LOG
178 if [[ "$xattr" != "no" ]]; then
179 local xval1=$(get_xattr_value user.foo $TGT/$tdir/file5)
180 local xval2=$(get_xattr_value user.foo $TGT2/$tdir/file5)
185 # fid2path and path2fid aren't implemented for block devices
186 #if [[ ! -b $TGT/$tdir/dev1 ]] || [[ ! -b $TGT2/$tdir/dev1 ]]; then
187 # ls -l $DIR/$tdir/dev1 $TGT/$tdir/dev1 $TGT2/$tdir/dev1
188 # error "Error replicating block devices"
191 if [[ "$xattr" != "no" ]] &&
192 [[ "$xval1" != "$value" || "$xval2" != "$value" ]]; then
193 error "Error in replicating xattrs."
197 # Use diff to compare the source and the destination
198 check_diff $DIR/$tdir $TGT/$tdir 1
199 check_diff $DIR/$tdir $TGT2/$tdir 1
205 run_test 1 "Simple Replication"
207 # Test 2a - Replicate files created by dbench
209 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
214 sh rundbench -C -D $DIR/$tdir 2 -t $DBENCH_TIME || error "dbench failed!"
216 # Replicate the changes to $TGT
217 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
219 # Use diff to compare the source and the destination
220 check_diff $DIR/$tdir $TGT/$tdir 2a
221 check_diff $DIR/$tdir $TGT2/$tdir 2a
227 run_test 2a "Replicate files created by dbench."
230 # Test 2b - Replicate files changed by dbench.
232 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
238 sh rundbench -C -D $DIR/$tdir 2 -t $DBENCH_TIME &
241 local child_pid=$(pgrep dbench)
242 echo PIDs: $child_pid
244 $KILL -SIGSTOP $child_pid
246 echo Starting replication
247 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
248 check_diff $DIR/$tdir $TGT/$tdir 2b
251 $KILL -SIGCONT $child_pid
255 $KILL -SIGSTOP $child_pid
257 echo Starting replication
258 $LRSYNC -l $LREPL_LOG
259 check_diff $DIR/$tdir $TGT/$tdir 2b
261 echo "Wait for dbench to finish"
262 $KILL -SIGCONT $child_pid
265 # Replicate the changes to $TGT
266 echo Starting replication
267 $LRSYNC -l $LREPL_LOG
269 check_diff $DIR/$tdir $TGT/$tdir 2b
270 check_diff $DIR/$tdir $TGT2/$tdir 2b
276 run_test 2b "Replicate files changed by dbench."
278 # Test 2c - Replicate files while dbench is running
280 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
285 sh rundbench -C -D $DIR/$tdir 2 -t $DBENCH_TIME &
287 # Replicate the changes to $TGT
288 sleep 10 # give dbench a headstart
290 while [ $quit -le 1 ];
292 echo "Running lustre_rsync"
293 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m ${mds1_svc} -u $CL_USER -l $LREPL_LOG
296 if [ $? -ne 0 ]; then
297 quit=$(expr $quit + 1)
301 # Use diff to compare the source and the destination
302 check_diff $DIR/$tdir $TGT/$tdir 2c
303 check_diff $DIR/$tdir $TGT2/$tdir 2c
309 run_test 2c "Replicate files while dbench is running."
311 # Test 3a - Replicate files created by createmany
313 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
319 createmany -o $DIR/$tdir/$tfile $numfiles || error "createmany failed!"
321 # Replicate the changes to $TGT
322 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
323 check_diff $DIR/$tdir $TGT/$tdir 3a
324 check_diff $DIR/$tdir $TGT2/$tdir 3a
330 run_test 3a "Replicate files created by createmany"
333 # Test 3b - Replicate files created by writemany
335 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
342 writemany -q -a $DIR/$tdir/$tfile $time $threads || error "writemany failed!"
344 # Replicate the changes to $TGT
345 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
347 check_diff $DIR/$tdir $TGT/$tdir 3b
348 check_diff $DIR/$tdir $TGT2/$tdir 3b
354 run_test 3b "Replicate files created by writemany"
356 # Test 3c - Replicate files created by createmany/unlinkmany
358 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
364 createmany -o $DIR/$tdir/$tfile $numfiles || error "createmany failed!"
365 unlinkmany $DIR/$tdir/$tfile $numfiles || error "unlinkmany failed!"
367 # Replicate the changes to $TGT
368 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
369 check_diff $DIR/$tdir $TGT/$tdir 3c
370 check_diff $DIR/$tdir $TGT2/$tdir 3c
376 run_test 3c "Replicate files created by createmany/unlinkmany"
378 # Test 4 - Replicate files created by iozone
380 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
382 which iozone > /dev/null 2>&1
383 if [ $? -ne 0 ]; then
384 skip "iozone not found. Skipping test"
391 END_RUN_FILE=${DIR}/$tdir/run LOAD_PID_FILE=${DIR}/$tdir/pid \
392 MOUNT=${DIR}/$tdir run_iozone.sh &
394 child_pid=$(pgrep iozone)
395 $KILL -SIGSTOP $child_pid
397 # Replicate the changes to $TGT
398 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
399 check_diff $DIR/$tdir $TGT/$tdir 4
400 check_diff $DIR/$tdir $TGT2/$tdir 4
402 $KILL -SIGCONT $child_pid
404 $KILL -SIGKILL $(pgrep run_iozone.sh)
405 $KILL -SIGKILL $(pgrep iozone)
407 # After killing 'run_iozone.sh', process 'iozone' becomes the
408 # child of PID 1. Hence 'wait' does not wait for it. Killing
409 # iozone first, means more iozone processes are spawned off which
410 # is not desirable. So, after sending a sigkill, the test goes
411 # into a wait loop for iozone to cleanup and exit.
413 while [ "$(pgrep "iozone")" != "" ];
415 ps -ef | grep iozone | grep -v grep
419 $LRSYNC -l $LREPL_LOG
420 check_diff $DIR/$tdir $TGT/$tdir 4
421 check_diff $DIR/$tdir $TGT2/$tdir 4
427 run_test 4 "Replicate files created by iozone"
429 # Test 5a - Stop / start lustre_rsync
431 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
437 createmany -o $DIR/$tdir/$tfile $NUMTEST
439 # Replicate the changes to $TGT
441 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG &
444 $KILL -SIGHUP $child_pid
446 $LRSYNC -l $LREPL_LOG
448 check_diff $DIR/$tdir $TGT/$tdir 5a
449 check_diff $DIR/$tdir $TGT2/$tdir 5a
455 run_test 5a "Stop / start lustre_rsync"
457 # Test 5b - Kill / restart lustre_rsync
459 [ "$SLOW" = "no" ] && skip "Skipping slow test" && return
465 createmany -o $DIR/$tdir/$tfile $NUMTEST
467 # Replicate the changes to $TGT
469 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG &
472 $KILL -SIGKILL $child_pid
474 $LRSYNC -l $LREPL_LOG
476 check_diff $DIR/$tdir $TGT/$tdir 5b
477 check_diff $DIR/$tdir $TGT2/$tdir 5b
483 run_test 5b "Kill / restart lustre_rsync"
485 # Test 6 - lustre_rsync large no of hard links
491 touch $DIR/$tdir/link0
493 while [ $i -lt $NUMLINKS ];
495 ln $DIR/$tdir/link0 $DIR/$tdir/link${i}
499 # Replicate the changes to $TGT
500 $LRSYNC -s $DIR -t $TGT -t $TGT2 -m $MDT0 -u $CL_USER -l $LREPL_LOG
501 check_diff $DIR/$tdir $TGT/$tdir 6
502 check_diff $DIR/$tdir $TGT2/$tdir 6
504 local count1=$(ls -l $TGT/$tdir/link0 | sed -r 's/ +/ /g' | cut -f 2 -d ' ')
505 local count2=$(ls -l $TGT/$tdir/link0 | sed -r 's/ +/ /g' | cut -f 2 -d ' ')
506 if [[ $count1 -ne $NUMLINKS ]] || [[ $count2 -ne $NUMLINKS ]]; then
507 ls -l $TGT/$tdir/link0 $TGT2/$tdir/link0
508 error "Incorrect no of hard links found $count1, $count2"
514 run_test 6 "lustre_rsync large no of hard links"
516 # Test 7 - lustre_rsync stripesize
519 mkdir -p ${DIR}/tgt/$tdir
523 lfs setstripe -c $OSTCOUNT $DIR/$tdir
524 createmany -o $DIR/$tdir/$tfile $NUMFILES
526 # To simulate replication to another lustre filesystem, replicate
527 # the changes to $DIR/tgt. We can't turn off the changelogs
528 # while we are registered, so lustre_rsync better not try to
529 # replicate the replication steps. It seems ok :)
531 $LRSYNC -s $DIR -t $DIR/tgt -m $MDT0 -u $CL_USER -l $LREPL_LOG
532 check_diff ${DIR}/$tdir $DIR/tgt/$tdir 7
535 while [ $i -lt $NUMFILES ];
537 local count=$(lfs getstripe $DIR/tgt/$tdir/${tfile}$i | awk '/stripe_count/ {print $2}')
538 if [ $count -ne $OSTCOUNT ]; then
539 error "Stripe size not replicated"
547 run_test 7 "lustre_rsync stripesize"
549 # Test 8 - Replicate multiple file/directory moves
554 for i in 1 2 3 4 5 6 7 8 9; do
556 for j in 1 2 3 4 5 6 7 8 9; do
557 mkdir $DIR/$tdir/d$i/d$i$j
558 createmany -o $DIR/$tdir/d$i/d$i$j/a 10 \
560 mv $DIR/$tdir/d$i/d$i$j $DIR/$tdir/d$i/d0$i$j
561 createmany -o $DIR/$tdir/d$i/d0$i$j/b 10 \
563 mv $DIR/$tdir/d$i/d0$i$j/a0 $DIR/$tdir/d$i/d0$i$j/c0
565 mv $DIR/$tdir/d$i $DIR/$tdir/d0$i
568 $LRSYNC -s $DIR -t $TGT -m $MDT0 -u $CL_USER -l $LREPL_LOG
570 check_diff ${DIR}/$tdir $TGT/$tdir 8
576 run_test 8 "Replicate multiple file/directory moves"
583 touch $DIR/$tdir/foo/a1
585 $LRSYNC -s $DIR -t $TGT -m $MDT0 -u $CL_USER -l $LREPL_LOG
587 check_diff ${DIR}/$tdir $TGT/$tdir 9
589 rm -rf $DIR/$tdir/foo
591 $LRSYNC -s $DIR -t $TGT -m $MDT0 -u $CL_USER -l $LREPL_LOG
593 check_diff ${DIR}/$tdir $TGT/$tdir 9
599 run_test 9 "Replicate recursive directory removal"
602 complete $(basename $0) $SECONDS
603 check_and_cleanup_lustre