X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanityn.sh;h=91f9f9a3aa17b45093c3f4a24ab9c2eb84b1949d;hp=53499c2f1c1eff249892cf71b3ed4287b2e2cbbf;hb=HEAD;hpb=5a6ceb664f07812c351786c1043da71ff5027f8c;ds=sidebyside diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 53499c2..896dce9 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -3,44 +3,37 @@ set -e ONLY=${ONLY:-"$*"} -# bug number for skipped test: 9977/LU-7105 -ALWAYS_EXCEPT=" 28 $SANITYN_EXCEPT" -# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! - -SRCDIR=$(dirname $0) -PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH SIZE=${SIZE:-40960} -CHECKSTAT=${CHECKSTAT:-"checkstat -v"} OPENFILE=${OPENFILE:-openfile} OPENUNLINK=${OPENUNLINK:-openunlink} -export MULTIOP=${MULTIOP:-multiop} export TMP=${TMP:-/tmp} MOUNT_2=${MOUNT_2:-"yes"} CHECK_GRANT=${CHECK_GRANT:-"yes"} GRANT_CHECK_LIST=${GRANT_CHECK_LIST:-""} -SAVE_PWD=$PWD - -export NAME=${NAME:-local} - -LUSTRE=${LUSTRE:-`dirname $0`/..} +LUSTRE=${LUSTRE:-$(dirname $0)/..} . $LUSTRE/tests/test-framework.sh -CLEANUP=${CLEANUP:-:} -SETUP=${SETUP:-:} init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging -if [ $(facet_fstype $SINGLEMDS) = "zfs" ]; then -# bug number for skipped test: - ALWAYS_EXCEPT="$ALWAYS_EXCEPT " -# LU-2829 / LU-2887 - make allowances for ZFS slowness +ALWAYS_EXCEPT="$SANITYN_EXCEPT " +# bug number for skipped test: LU-7105 +ALWAYS_EXCEPT+=" 28" +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + +if [ $mds1_FSTYPE = "zfs" ]; then + # bug number: LU-15757 (test_102() causes crash in umount later) + ALWAYS_EXCEPT+=" 102" + # LU-2829 / LU-2887 - make allowances for ZFS slowness TEST33_NFILES=${TEST33_NFILES:-1000} fi + # 23 (min)" [ "$SLOW" = "no" ] && EXCEPT_SLOW="33a" +build_test_filter + FAIL_ON_ERROR=false SETUP=${SETUP:-:} @@ -61,7 +54,33 @@ dd if=/dev/urandom of=$SAMPLE_FILE bs=1M count=1 check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS -build_test_filter +test_0() { + local client2=${CLIENT2:-$HOSTNAME} + local tmp=$(mktemp) + + printf 'a b\n' > $tmp + + do_node_vp "$HOSTNAME" printf 'a b\n' | + diff $tmp - || error "do_node_vp mismatch" + + do_node_vp "$client2" printf 'a b\n' | + diff $tmp - || error "do_node_vp mismatch" + + do_facet_vp mds1 printf 'a b\n' | + diff $tmp - || error "do_facet_vp mismatch" + + printf '%s' 1 2 3 4 5 6 7 8 \ 9 10 ' ' '"' "'" \! \' \( \) $'\n' > $tmp + + do_node_vp "$HOSTNAME" printf '%s' 1 2 3 4 5 6 7 8 \ 9 10 ' ' '"' "'" \! \' \( \) $'\n' | + diff $tmp - || error "do_node_vp mismatch" + + do_node_vp "$client2" printf '%s' 1 2 3 4 5 6 7 8 \ 9 10 ' ' '"' "'" \! \' \( \) $'\n' | + diff $tmp - || error "do_node_vp mismatch" + + do_facet_vp mds1 printf '%s' 1 2 3 4 5 6 7 8 \ 9 10 ' ' '"' "'" \! \' \( \) $'\n' | + diff $tmp - || error "do_facet_vp mismatch" +} +run_test 0 "do_node_vp() and do_facet_vp() do the right thing" test_1() { touch $DIR1/$tfile @@ -288,7 +307,7 @@ test_11() { run_test 11 "execution of file opened for write should return error ====" test_12() { - DIR=$DIR DIR2=$DIR2 sh lockorder.sh + DIR=$DIR DIR2=$DIR2 bash lockorder.sh } run_test 12 "test lock ordering (link, stat, unlink)" @@ -315,73 +334,60 @@ test_14aa() { MULTIPID=$! $DIR2/$tdir/$tfile && error || true - kill -USR1 $MULTIPID - wait $MULTIPID || return 2 + kill $MULTIPID } run_test 14aa "execution of file open for write returns -ETXTBSY" test_14ab() { test_mkdir $DIR1/$tdir - cp -p $(which multiop) $DIR1/$tdir/multiop || error "cp failed" - MULTIOP_PROG=$DIR1/$tdir/multiop multiop_bg_pause $TMP/$tfile O_c || - return 1 - MULTIOP_PID=$! - $MULTIOP $DIR2/$tdir/multiop Oc && error "expected error, got success" - kill -USR1 $MULTIOP_PID || return 2 - wait $MULTIOP_PID || return 3 - rm $TMP/$tfile $DIR1/$tdir/multiop || error "removing multiop" + cp -p $(which sleep) $DIR1/$tdir/sleep || error "cp failed" + $DIR1/$tdir/sleep 60 & + SLEEP_PID=$! + $MULTIOP $DIR2/$tdir/sleep Oc && error "expected error, got success" + kill $SLEEP_PID } run_test 14ab "open(RDWR) of executing file returns -ETXTBSY" test_14b() { # bug 3192, 7040 test_mkdir $DIR1/$tdir - cp -p $(which multiop) $DIR1/$tdir/multiop || error "cp failed" - MULTIOP_PROG=$DIR1/$tdir/multiop multiop_bg_pause $TMP/$tfile O_c || - return 1 - MULTIOP_PID=$! - $TRUNCATE $DIR2/$tdir/multiop 0 && kill -9 $MULTIOP_PID && \ + cp -p $(which sleep) $DIR1/$tdir/sleep || error "cp failed" + $DIR1/$tdir/sleep 60 & + SLEEP_PID=$! + $TRUNCATE $DIR2/$tdir/sleep 60 && kill -9 $SLEEP_PID && \ error "expected truncate error, got success" - kill -USR1 $MULTIOP_PID || return 2 - wait $MULTIOP_PID || return 3 - cmp $(which multiop) $DIR1/$tdir/multiop || error "binary changed" - rm $TMP/$tfile $DIR1/$tdir/multiop || error "removing multiop" + kill $SLEEP_PID + cmp $(which sleep) $DIR1/$tdir/sleep || error "binary changed" } run_test 14b "truncate of executing file returns -ETXTBSY ======" test_14c() { # bug 3430, 7040 test_mkdir $DIR1/$tdir - cp -p $(which multiop) $DIR1/$tdir/multiop || error "cp failed" - MULTIOP_PROG=$DIR1/$tdir/multiop multiop_bg_pause $TMP/$tfile O_c || - return 1 - MULTIOP_PID=$! - cp /etc/hosts $DIR2/$tdir/multiop && error "expected error, got success" - kill -USR1 $MULTIOP_PID || return 2 - wait $MULTIOP_PID || return 3 - cmp $(which multiop) $DIR1/$tdir/multiop || error "binary changed" - rm $TMP/$tfile $DIR1/$tdir/multiop || error "removing multiop" + cp -p $(which sleep) $DIR1/$tdir/sleep || error "cp failed" + $DIR1/$tdir/sleep 60 & + SLEEP_PID=$! + cp /etc/hosts $DIR2/$tdir/sleep && error "expected error, got success" + kill $SLEEP_PID + cmp $(which sleep) $DIR1/$tdir/sleep || error "binary changed" } run_test 14c "open(O_TRUNC) of executing file return -ETXTBSY ==" test_14d() { # bug 10921 test_mkdir $DIR1/$tdir - cp -p $(which multiop) $DIR1/$tdir/multiop || error "cp failed" - MULTIOP_PROG=$DIR1/$tdir/multiop multiop_bg_pause $TMP/$tfile O_c || - return 1 - MULTIOP_PID=$! + cp -p $(which sleep) $DIR1/$tdir/sleep || error "cp failed" + $DIR1/$tdir/sleep 60 & + SLEEP_PID=$! log chmod - chmod 600 $DIR1/$tdir/multiop || error "chmod failed" - kill -USR1 $MULTIOP_PID || return 2 - wait $MULTIOP_PID || return 3 - cmp $(which multiop) $DIR1/$tdir/multiop || error "binary changed" - rm $TMP/$tfile $DIR1/$tdir/multiop || error "removing multiop" + chmod 600 $DIR1/$tdir/sleep || error "chmod failed" + kill $SLEEP_PID + cmp $(which sleep) $DIR1/$tdir/sleep || error "binary changed" } run_test 14d "chmod of executing file is still possible ========" test_15() { # bug 974 - ENOSPC echo "PATH=$PATH" - sh oos2.sh $MOUNT1 $MOUNT2 + bash oos2.sh $MOUNT1 $MOUNT2 wait_delete_completed - grant_error=`dmesg | grep "> available"` + grant_error=$(dmesg | grep "< tot_grant") [ -z "$grant_error" ] || error "$grant_error" } run_test 15 "test out-of-space with multiple writers ===========" @@ -390,7 +396,7 @@ COUNT=${COUNT:-2500} # The FSXNUM reduction for ZFS is needed until ORI-487 is fixed. # We don't want to skip it entirely, but ZFS is VERY slow and cannot # pass a 2500 operation dual-mount run within the time limit. -if [ "$(facet_fstype ost1)" = "zfs" ]; then +if [ "$ost1_FSTYPE" = "zfs" ]; then FSXNUM=$((COUNT / 5)) FSXP=1 elif [ "$SLOW" = "yes" ]; then @@ -407,6 +413,8 @@ test_16a() { local stripe_size=$(do_facet $SINGLEMDS \ "$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize") + check_set_fallocate + # to allocate grant because it may run out due to test_15. $LFS setstripe -c -1 $file1 dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync @@ -414,13 +422,13 @@ test_16a() { rm -f $file1 $LFS setstripe -c -1 $file1 # b=10919 - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 \ - || error "fsx failed" + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || + error "fsx failed" rm -f $file1 - # O_DIRECT reads and writes must be aligned to the device block size. - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 -Z -r 4096 \ - -w 4096 $file1 $file2 || error "fsx with O_DIRECT failed." + # O_DIRECT reads and writes must be aligned to the PAGE_SIZE. + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 -Z -r $PAGE_SIZE \ + -w $PAGE_SIZE $file1 $file2 || error "fsx with O_DIRECT failed." } run_test 16a "$FSXNUM iterations of dual-mount fsx" @@ -428,28 +436,436 @@ run_test 16a "$FSXNUM iterations of dual-mount fsx" test_16b() { local file1=$DIR1/$tfile local file2=$DIR2/$tfile + local stripe_size=($($LFS getstripe -S $DIR)) + + check_set_fallocate # to allocate grant because it may run out due to test_15. lfs setstripe -c -1 $file1 - dd if=/dev/zero of=$file1 bs=$STRIPE_BYTES count=$OSTCOUNT oflag=sync - dd if=/dev/zero of=$file2 bs=$STRIPE_BYTES count=$OSTCOUNT oflag=sync + dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync || + error "dd failed writing to file=$file1" + dd if=/dev/zero of=$file2 bs=$stripe_size count=$OSTCOUNT oflag=sync || + error "dd failed writing to file=$file2" rm -f $file1 lfs setstripe -c -1 $file1 # b=10919 # -o is set to 8192 because writes < 1 page and between 1 and 2 pages # create a mix of tiny writes & normal writes - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 $file1 \ - $file2 + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 \ + $file1 $file2 || error "fsx with tiny write failed." } run_test 16b "$FSXNUM iterations of dual-mount fsx at small size" +test_16c() { + local file1=$DIR1/$tfile + local file2=$DIR2/$tfile + local stripe_size=$(do_facet $SINGLEMDS \ + "$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize") + + [ "$ost1_FSTYPE" != ldiskfs ] && skip "dio on ldiskfs only" + + check_set_fallocate + + # to allocate grant because it may run out due to test_15. + $LFS setstripe -c -1 $file1 + dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync + dd if=/dev/zero of=$file2 bs=$stripe_size count=$OSTCOUNT oflag=sync + rm -f $file1 + wait_delete_completed + + local list=$(comma_list $(osts_nodes)) + if ! get_osd_param $list '' read_cache_enable >/dev/null; then + skip "not cache-capable obdfilter" + fi + + set_osd_param $list '' read_cache_enable 0 + set_osd_param $list '' writethrough_cache_enable 0 + + $LFS setstripe -c -1 $file1 # b=10919 + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || + error "fsx failed" + rm -f $file1 + + set_osd_param $list '' read_cache_enable 1 + set_osd_param $list '' writethrough_cache_enable 1 + + return 0 +} +run_test 16c "verify data consistency on ldiskfs with cache disabled (b=17397)" + +test_16d() { + local file1=$DIR1/$tfile + local file2=$DIR2/$tfile + local file3=$DIR1/file + local tmpfile=$(mktemp) + local stripe_size=$(do_facet $SINGLEMDS \ + "$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize") + + # to allocate grant because it may run out due to test_15. + $LFS setstripe -c -1 $file1 + stack_trap "rm -f $file1 $file2 $file3 $tmpfile" + dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync + dd if=/dev/zero of=$file2 bs=$stripe_size count=$OSTCOUNT oflag=sync + rm -f $file1 + + $LFS setstripe -c -1 $file1 # b=10919 + $LCTL set_param ldlm.namespaces.*.lru_size=clear + + # direct write on one client and direct read from another + dd if=/dev/urandom of=$file1 bs=1M count=100 oflag=direct + dd if=$file2 of=$tmpfile iflag=direct bs=1M + diff $file1 $tmpfile || error "file different(1)" + rm -f $file1 + + # buffer write on one client, but direct read from another + dd if=$tmpfile of=$file1 bs=1M count=100 + dd if=$file2 of=$file3 bs=1M iflag=direct count=100 + diff $file3 $tmpfile || error "file different(2)" + + rm -f $file3 $file2 $file1 + # direct write on one client + dd if=$tmpfile of=$file1 bs=1M count=100 oflag=direct + # buffer read from another client + dd if=$file2 of=$file3 bs=1M count=100 + diff $file3 $tmpfile || error "file different(3)" +} +run_test 16d "Verify DIO and buffer IO with two clients" + +test_16e() { # LU-13227 + # issue: LU-14314 + + (( "$MDS1_VERSION" >= $(version_code 2.13.53) )) || + skip "Need MDS version at least 2.13.53" + + local file1=$DIR1/$tfile + local file2=$DIR2/$tfile + + # client1 write 10M data + dd if=/dev/zero of=$file1 bs=1M count=10 + # drop locks + cancel_lru_locks osc > /dev/null + # use lockahead to generate one PW lock to keep LVB loaded. + $LFS ladvise -a lockahead --start 0 --length 1M \ + --mode WRITE $file1 + # direct write to extend file size on client2 + dd if=/dev/zero of=$file2 bs=1M seek=20 count=1 \ + oflag=direct conv=notrunc + local filesize=$(stat -c %s $file2) + [ "$filesize" -eq 22020096 ] || + error "expected filesize 22020096 got $filesize" + rm -f $file1 +} +run_test 16e "Verify size consistency for O_DIRECT write" + +test_16f() { # LU-14541 + local file1=$DIR1/$tfile + local file2=$DIR2/$tfile + local duration=20 + local status + + (( $MDS1_VERSION > $(version_code 2.15.51) )) || + skip "Need MDS version at least 2.15.51" + + timeout --preserve-status --signal=USR1 $duration \ + rw_seq_cst_vs_drop_caches $file1 $file2 + status=$? + + case $((status & 0x7f)) in + 0) + echo OK # Computers must be fast now. + ;; + 6) # SIGABRT + error "sequential consistency violation detected" + ;; + 10) # SIGUSR1 + echo TIMEOUT # This is fine. + ;; + *) + error "strange status '$status'" + ;; + esac + + rm -f $file1 +} +run_test 16f "rw sequential consistency vs drop_caches" + +test_16g() { + local file1=$DIR1/$tfile + local file2=$DIR2/$tfile + local duration=20 + local status + + timeout --preserve-status --signal=USR1 $duration \ + rw_seq_cst_vs_drop_caches -m $file1 $file2 + status=$? + + case $((status & 0x7f)) in + 0) + echo OK # Computers must be fast now. + ;; + 6) # SIGABRT + error "sequential consistency violation detected" + ;; + 10) # SIGUSR1 + echo TIMEOUT # This is fine. + ;; + *) + error "strange status '$status'" + ;; + esac + + rm -f $file1 +} +run_test 16g "mmap rw sequential consistency vs drop_caches" + +test_16h() { + local tf=$DIR/$tdir/$tfile + local tf2=$DIR2/$tdir/$tfile + local cmd="$MMAP_CAT $tf | od -x | tail -q -n4" + local cmd2="$MMAP_CAT $tf2 | od -x | tail -q -n4" + + test_mkdir $DIR/$tdir + + # create file and populate data + cp /etc/passwd $tf || error "cp failed" + + local size=$(stat -c %s $tf) + c1=$(eval $cmd) + c2=$(eval $cmd2) + if [[ "$c1" != "$c2" ]]; then + echo " ------- mount 1 read --------" + echo $c1 + echo " ------- mount 2 read --------" + echo $c2 + error "content mismatch" + fi + + echo " ------- before truncate --------" + echo $c1 + + # truncate file + $TRUNCATE $tf $((size / 2)) || error "truncate file" + + #cancel_lru_locks + echo " ------- after truncate --------" + + # repeat the comparison + c1=$(eval $cmd) + c2=$(eval $cmd2) + if [[ "$c1" != "$c2" ]]; then + echo " ------- mount 1 read --------" + echo $c1 + echo " ------- mount 2 read --------" + echo $c2 + error "content mismatch after truncate" + fi + echo $c2 +} +run_test 16h "mmap read after truncate file" + +test_16i() { + local tf=$DIR/$tdir/$tfile + local tf2=$DIR2/$tdir/$tfile + + test_mkdir $DIR/$tdir + + # create file and populate data + cp /etc/passwd $tf || error "cp failed" + + local size=$(stat -c %s $tf) + + c1=$(dd if=$tf bs=1 2>/dev/null | od -x | tail -q -n4) + c2=$(dd if=$tf2 bs=1 2>/dev/null | od -x | tail -q -n4) + + if [[ "$c1" != "$c2" ]]; then + echo " ------- mount 1 read --------" + echo $c1 + echo " ------- mount 2 read --------" + echo $c2 + error "content mismatch" + fi + + echo " ------- before truncate --------" + echo $c1 + + # truncate file + $TRUNCATE $tf $((size / 2)) || error "truncate file" + + echo " ------- after truncate --------" + + # repeat the comparison + c1=$(dd if=$tf bs=1 2>/dev/null | od -x | tail -q -n4) + c2=$(dd if=$tf2 bs=1 2>/dev/null | od -x | tail -q -n4) + + if [[ "$c1" != "$c2" ]]; then + echo " ------- mount 1 read --------" + echo $c1 + echo " ------- mount 2 read --------" + echo $c2 + error "content mismatch after truncate" + fi + echo $c2 +} +run_test 16i "read after truncate file" + +test_16j() +{ + (( $OSTCOUNT >= 2 )) || skip "needs >= 2 OSTs" + + local stripe_size=$((1024 * 1024)) #1 MiB + # Max i/o below is ~ 4 * stripe_size, so this gives ~5 i/os + local file_size=$((25 * stripe_size)) + local bsizes + + $LFS setstripe -c 2 -S $stripe_size $DIR/$tfile.1 + stack_trap "rm -f $DIR/$tfile.1" + + # Just a bit bigger than the largest size in the test set below + dd if=/dev/urandom bs=$file_size count=1 of=$DIR/$tfile.1 || + error "buffered i/o to create file failed" + + if zfs_or_rotational; then + # DIO on ZFS can take up to 2 seconds per IO + # rotational is better, but still slow. + # Limit testing on those media to larger sizes + bsizes="$((stripe_size - PAGE_SIZE)) $stripe_size \ + $((stripe_size + 1024))" + else + bsizes="$((PAGE_SIZE / 4)) $((PAGE_SIZE - 1024)) \ + $((PAGE_SIZE - 1)) $PAGE_SIZE $((PAGE_SIZE + 1024)) \ + $((PAGE_SIZE * 3/2)) $((PAGE_SIZE * 4)) \ + $((stripe_size - 1)) $stripe_size \ + $((stripe_size + 1)) $((stripe_size * 3/2)) \ + $((stripe_size * 4)) $((stripe_size * 4 + 1))" + fi + + # 1 process (BIO or DIO) on each client + for bs in $bsizes; do + $LFS setstripe -c 2 -S $stripe_size $DIR/$tfile.2 + echo "bs: $bs, file_size $file_size" + dd if=$DIR/$tfile.1 bs=$((bs *2 )) of=$DIR/tfile.2 \ + conv=notrunc oflag=direct iflag=direct & + pid_dio1=$! + # Buffered I/O with similar but not the same block size + dd if=$DIR2/$tfile.1 bs=$((bs * 2)) of=$DIR2/$tfile.2 \ + conv=notrunc & + pid_bio2=$! + wait $pid_dio1 + rc1=$? + wait $pid_bio2 + rc2=$? + if (( rc1 != 0 )); then + error "dio copy 1 w/bsize $bs failed: $rc1" + fi + if (( rc2 != 0 )); then + error "buffered copy 2 w/bsize $bs failed: $rc2" + fi + + $CHECKSTAT -t file -s $file_size $DIR/$tfile.2 || + error "size incorrect" + $CHECKSTAT -t file -s $file_size $DIR2/$tfile.2 || + error "size incorrect - mount 2" + cmp --verbose $DIR/$tfile.1 $DIR/$tfile.2 || + error "files differ, bsize $bs" + cmp --verbose $DIR2/$tfile.1 $DIR2/$tfile.2 || + error "files differ, bsize $bs - mount 2" + rm -f $DIR/$tfile.2 + done + + # 2 processes - both DIO and BIO - on each client + for bs in $bsizes; do + $LFS setstripe -c 2 -S $stripe_size $DIR/$tfile.2 + echo "bs: $bs, file_size $file_size" + # Client 1 + dd if=$DIR/$tfile.1 bs=$((bs *2 )) of=$DIR/tfile.2 \ + conv=notrunc oflag=direct iflag=direct & + pid_dio1=$! + # Buffered I/O with similar but not the same block size + dd if=$DIR/$tfile.1 bs=$((bs * 2)) of=$DIR/$tfile.2 \ + conv=notrunc & + pid_bio2=$! + # Client 2 + dd if=$DIR2/$tfile.1 bs=$((bs *2 )) of=$DIR2/tfile.2 \ + conv=notrunc oflag=direct iflag=direct & + pid_dio3=$! + # Buffered I/O with similar but not the same block size + dd if=$DIR2/$tfile.1 bs=$((bs * 2)) of=$DIR2/$tfile.2 \ + conv=notrunc & + pid_bio4=$! + wait $pid_dio1 + rc1=$? + wait $pid_bio2 + rc2=$? + wait $pid_dio3 + rc3=$? + wait $pid_bio4 + rc4=$? + + if (( rc1 != 0 )); then + error "dio copy 1 w/bsize $bs failed: $rc1" + fi + if (( rc2 != 0 )); then + error "buffered copy 2 w/bsize $bs failed: $rc2" + fi + if (( rc3 != 0 )); then + error "dio copy 3 w/bsize $bs failed: $rc1" + fi + if (( rc4 != 0 )); then + error "buffered copy 4 w/bsize $bs failed: $rc2" + fi + + $CHECKSTAT -t file -s $file_size $DIR/$tfile.2 || + error "size incorrect" + $CHECKSTAT -t file -s $file_size $DIR2/$tfile.2 || + error "size incorrect - mount 2" + cmp --verbose $DIR/$tfile.1 $DIR/$tfile.2 || + error "files differ, bsize $bs" + cmp --verbose $DIR2/$tfile.1 $DIR2/$tfile.2 || + error "files differ, bsize $bs - mount 2" + rm -f $DIR/$tfile.2 + done +} +run_test 16j "race dio with buffered i/o" + +test_16k() { + local fsxN=${FSX_NP:-5} + local fsxNops=${FSX_NOPS:-10000} + local fsxNparams=${FSXPARAMS_16k:-""} + local dropsleep=${DROP_SLEEP:-3} + local dpipd + local -a fsxpids + local cmd + + [ "$SLOW" = "no" ] && fsxNops=1000 + + while true; do + echo 3 > /proc/sys/vm/drop_caches + sleep $dropsleep + done & + dpipd=$! + stack_trap "kill -9 $dpipd" + + for ((i = 1; i <= fsxN; i++)); do + cmd="$FSX $fsxNparams -N $fsxNops $DIR/fsxfile.${i} -l $((64 * 1024 * 1024))" + echo "+ $cmd" + eval $cmd & + fsxpids[$i]=$! + done + for ((i = 1; i <= fsxN; i++)); do + wait ${fsxpids[$i]} && echo "+ fsx $i done: rc=$?" || + error "- fsx $i FAILURE! rc=$?" + date + done +} +run_test 16k "Parallel FSX and drop caches should not panic" + test_17() { # bug 3513, 3667 remote_ost_nodsh && skip "remote OST with nodsh" && return lfs setstripe $DIR1/$tfile -i 0 -c 1 cp $SAMPLE_FILE $DIR1/$tfile cancel_lru_locks osc > /dev/null - #define OBD_FAIL_ONCE|OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a + #define CFS_FAIL_ONCE|OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a do_facet ost1 lctl set_param fail_loc=0x8000030a ls -ls $DIR1/$tfile | awk '{ print $1,$6 }' > $DIR1/$tfile-1 & \ ls -ls $DIR2/$tfile | awk '{ print $1,$6 }' > $DIR2/$tfile-2 @@ -469,25 +885,27 @@ test_18() { excepts="$excepts -e $(($(printf %d \'$idx)-96))" done - $LUSTRE/tests/mmap_sanity -d $MOUNT1 -m $MOUNT2 $excepts + excepts="$excepts -e 7 -e 8 -e 9" + $LUSTRE/tests/mmap_sanity -d $MOUNT1 -m $MOUNT2 $excepts || + error "mmap_sanity test failed" sync; sleep 1; sync } run_test 18 "mmap sanity check =================================" test_19() { # bug3811 local node=$(facet_active_host ost1) + local device="$FSNAME-OST*" - [ "x$DOM" = "xyes" ] && node=$(facet_active_host $SINGLEMDS) + [ "x$DOM" = "xyes" ] && node=$(facet_active_host $SINGLEMDS) && + device="$FSNAME-MDT*" # check whether obdfilter is cache capable at all - if ! get_osd_param $node '' read_cache_enable >/dev/null; then - echo "not cache-capable obdfilter" - return 0 - fi + get_osd_param $node $device read_cache_enable >/dev/null || + skip "not cache-capable obdfilter" - local MAX=$(get_osd_param $node '' readcache_max_filesize | \ - head -n 1) - set_osd_param $node '' readcache_max_filesize 4096 + local max=$(get_osd_param $node $device readcache_max_filesize |\ + head -n 1) + set_osd_param $node $device readcache_max_filesize 4096 dd if=/dev/urandom of=$TMP/$tfile bs=512k count=32 local SUM=$(cksum $TMP/$tfile | cut -d" " -f 1,2) cp $TMP/$tfile $DIR1/$tfile @@ -502,22 +920,22 @@ test_19() { # bug3811 [ "$(cat $TMP/sum2)" = "$SUM" ] || \ error "$DIR2/$tfile $(cat $TMP/sum2) != $SUM" done - set_osd_param $node '' readcache_max_filesize $MAX + set_osd_param $node $device readcache_max_filesize $max rm $DIR1/$tfile } run_test 19 "test concurrent uncached read races ===============" test_20() { - test_mkdir $DIR1/d20 - cancel_lru_locks $OSC - CNT=$((`lctl get_param -n llite.*.dump_page_cache | wc -l`)) - $MULTIOP $DIR1/f20 Ow8190c - $MULTIOP $DIR2/f20 Oz8194w8190c - $MULTIOP $DIR1/f20 Oz0r8190c - cancel_lru_locks $OSC - CNTD=$((`lctl get_param -n llite.*.dump_page_cache | wc -l` - $CNT)) - [ $CNTD -gt 0 ] && \ - error $CNTD" page left in cache after lock cancel" || true + test_mkdir $DIR1/$tdir + cancel_lru_locks + CNT=$($LCTL get_param -n llite.*.dump_page_cache | wc -l) + $MULTIOP $DIR1/$tdir/$tfile Ow8190c + $MULTIOP $DIR2/$tdir/$tfile Oz8194w8190c + $MULTIOP $DIR1/$tdir/$tfile Oz0r8190c + cancel_lru_locks + CNT2=$($LCTL get_param -n llite.*.dump_page_cache | wc -l) + [[ $CNT2 == $CNT ]] || + error $((CNT2 - CNT))" page left in cache after lock cancel" } run_test 20 "test extra readahead page left in cache ====" @@ -668,12 +1086,37 @@ test_26b() { } run_test 26b "sync mtime between ost and mds" +test_26c() { + (( $MDS1_VERSION >= $(version_code 2.15.61) )) || + skip "Need MDS version at least 2.15.61" + + multiop_bg_pause $DIR1/$tfile O_c || error "multiop failed" + MULTIPID=$! + touch -am -d @978261179 $DIR2/$tfile + kill -USR1 $MULTIPID + wait $MULTIPID || error "wait for PID $MULTIPID failed" + sleep 1 + + echo + stat $DIR/$tfile + local times="$(stat -c"%X %Y" $DIR/$tfile)" + [[ "$times" == "978261179 978261179" ]] || + error "[am]times are not set in past on $DIR: $times" + echo + stat $DIR2/$tfile + local times="$(stat -c"%X %Y" $DIR2/$tfile)" + [[ "$times" == "978261179 978261179" ]] || + error "[am]times are not set in past on $DIR2: $times" + echo +} +run_test 26c "set-in-past on open file is not lost on close" + test_27() { cancel_lru_locks $OSC lctl clear dd if=/dev/zero of=$DIR2/$tfile bs=$((4096+4))k conv=notrunc count=4 seek=3 & DD2_PID=$! - usleep 50 + sleep 0.5 log "dd 1 started" dd if=/dev/zero of=$DIR1/$tfile bs=$((16384-1024))k conv=notrunc count=1 seek=4 & @@ -690,47 +1133,14 @@ test_27() { run_test 27 "align non-overlapping extent locks from request ===" test_28() { # bug 9977 - ECHO_UUID="ECHO_osc1_UUID" - tOST=$($LCTL dl | awk '/-osc-|OSC.*MNT/ { print $4 }' | head -n1) - - $LFS setstripe $DIR1/$tfile -S 1048576 -i 0 -c 2 - tOBJID=`$LFS getstripe $DIR1/$tfile | awk '$1 == 1 {print $2}'` - dd if=/dev/zero of=$DIR1/$tfile bs=1024k count=2 - - $LCTL <<-EOF - newdev - attach echo_client ECHO_osc1 $ECHO_UUID - setup $tOST - EOF - - tECHOID=`$LCTL dl | grep $ECHO_UUID | awk '{ print $1 }'` - $LCTL --device $tECHOID destroy "${tOBJID}:0" - - $LCTL <<-EOF - cfg_device ECHO_osc1 - cleanup - detach - EOF - - # reading of 1st stripe should pass - dd if=$DIR2/$tfile of=/dev/null bs=1024k count=1 || error "dd failed" - # reading of 2nd stripe should fail (this stripe was destroyed) - dd if=$DIR2/$tfile of=/dev/null bs=1024k count=1 skip=1 && error - - # now, recreating test file - dd if=/dev/zero of=$DIR1/$tfile bs=1024k count=2 || error "dd failed" - # reading of 1st stripe should pass - dd if=$DIR2/$tfile of=/dev/null bs=1024k count=1 || error "dd failed" - # reading of 2nd stripe should pass - dd if=$DIR2/$tfile of=/dev/null bs=1024k count=1 skip=1 || - error "dd failed" + skip "echo_client on osc is no longer supported" } run_test 28 "read/write/truncate file with lost stripes" test_30() { #b=11110, LU-2523 test_mkdir $DIR1/$tdir cp -f /bin/bash $DIR1/$tdir/bash - /bin/sh -c 'sleep 1; rm -f $DIR2/$tdir/bash; cp /bin/bash $DIR2/$tdir' & + bash -c 'sleep 1; rm -f $DIR2/$tdir/bash; cp /bin/bash $DIR2/$tdir' & $DIR1/$tdir/bash -c 'sleep 2; openfile -f O_RDONLY /proc/$$/exe >& /dev/null; echo $?' wait @@ -774,50 +1184,29 @@ test_31b() { } run_test 31b "voluntary OST cancel / blocking ast race==============" -# enable/disable lockless truncate feature, depending on the arg 0/1 -enable_lockless_truncate() { - lctl set_param -n $OSC.*.lockless_truncate $1 -} - -test_32a() { # bug 11270 - local p="$TMP/$TESTSUITE-$TESTNAME.parameters" - local stripe_size=$(do_facet $SINGLEMDS \ - "$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize") +#LU-14949 - multi-client version of the test 31r in sanity. +test_31r() { + touch $DIR/$tfile.target + touch $DIR/$tfile.source - save_lustre_params client "$OSC.*.lockless_truncate" > $p - cancel_lru_locks $OSC - enable_lockless_truncate 1 - rm -f $DIR1/$tfile - lfs setstripe -c -1 $DIR1/$tfile - dd if=/dev/zero of=$DIR1/$tfile count=$OSTCOUNT bs=$stripe_size > \ - /dev/null 2>&1 - clear_stats $OSC.*.${OSC}_stats + ls -l $DIR/$tfile.target # cache it for sure - log "checking cached lockless truncate" - $TRUNCATE $DIR1/$tfile 8000000 - $CHECKSTAT -s 8000000 $DIR2/$tfile || error "wrong file size" - [ $(calc_stats $OSC.*.${OSC}_stats lockless_truncate) -ne 0 ] || - error "cached truncate isn't lockless" + #OBD_FAIL_LLITE_OPEN_DELAY 0x1419 + $LCTL set_param fail_loc=0x1419 fail_val=3 + cat $DIR/$tfile.target & + CATPID=$! - log "checking not cached lockless truncate" - $TRUNCATE $DIR2/$tfile 5000000 - $CHECKSTAT -s 5000000 $DIR1/$tfile || error "wrong file size" - [ $(calc_stats $OSC.*.${OSC}_stats lockless_truncate) -ne 0 ] || - error "not cached truncate isn't lockless" + # Guarantee open is waiting before we get here + sleep 1 + mv $DIR2/$tfile.source $DIR2/$tfile.target - log "disabled lockless truncate" - enable_lockless_truncate 0 - clear_stats $OSC.*.${OSC}_stats - $TRUNCATE $DIR2/$tfile 3000000 - $CHECKSTAT -s 3000000 $DIR1/$tfile || error "wrong file size" - [ $(calc_stats $OSC.*.${OSC}_stats lockless_truncate) -eq 0 ] || - error "lockless truncate disabling failed" - rm $DIR1/$tfile - # restore lockless_truncate default values - restore_lustre_params < $p - rm -f $p + wait $CATPID + RC=$? + if [[ $RC -ne 0 ]]; then + error "open with cat failed, rc=$RC" + fi } -run_test 32a "lockless truncate" +run_test 31r "open-rename(replace) race" test_32b() { # bug 11270 remote_ost_nodsh && skip "remote OST with nodsh" && return @@ -870,75 +1259,76 @@ test_32b() { # bug 11270 restore_lustre_params <$p rm -f $p } -run_test 32b "lockless i/o" +# Disable test 32b prior to full removal +#run_test 32b "lockless i/o" print_jbd_stat () { - local dev - local mdts=$(get_facets MDS) - local varcvs - local mds - - local stat=0 - for mds in ${mdts//,/ }; do - varsvc=${mds}_svc - dev=$(basename $(do_facet $mds "lctl get_param -n osd*.${!varsvc}.mntdev|\ - xargs readlink -f" )) - val=$(do_facet $mds "cat /proc/fs/jbd*/${dev}{,:*,-*}/info 2>/dev/null | - head -n1") - val=${val%% *}; - stat=$(( stat + val)) - done - echo $stat + local mdts=$(get_facets MDS) + local stat=0 + local varsvc + local dev + local mds + + for mds in ${mdts//,/ }; do + varsvc=${mds}_svc + + dev=$(basename $(do_facet $mds "lctl get_param -n \ + osd*.${!varsvc}.mntdev | xargs readlink -f")) + val=$(do_facet $mds "cat /proc/fs/jbd*/${dev}{,:*,-*}/info \ + 2>/dev/null | head -n1") + val=${val%% *}; + stat=$((stat + val)) + done + echo $stat } # commit on sharing tests test_33a() { - remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return - [ -z "$CLIENTS" ] && skip "Need two or more clients, have $CLIENTS" && return 0 - [ $CLIENTCOUNT -lt 2 ] && - skip "Need two or more clients, have $CLIENTCOUNT" && return 0 + [ $CLIENTCOUNT -lt 2 ] && + skip "Need two or more clients, have $CLIENTCOUNT" - local nfiles=${TEST33_NFILES:-10000} - local param_file=$TMP/$tfile-params - local fstype=$(facet_fstype $SINGLEMDS) + local nfiles=${TEST33_NFILES:-10000} + local param_file=$TMP/$tfile-params + local COS + local jbdold="N/A" + local jbdnew="N/A" + local jbd save_lustre_params $(get_facets MDS) \ "mdt.*.commit_on_sharing" > $param_file - local COS - local jbdold="N/A" - local jbdnew="N/A" - local jbd - - for COS in 0 1; do - do_facet $SINGLEMDS lctl set_param mdt.*.commit_on_sharing=$COS - avgjbd=0 - avgtime=0 - for i in 1 2 3; do - do_nodes $CLIENT1,$CLIENT2 "mkdir -p $DIR1/$tdir-\\\$(hostname)-$i" - - [ $fstype = ldiskfs ] && jbdold=$(print_jbd_stat) - echo "=== START createmany old: $jbdold transaction" - local elapsed=$(do_and_time "do_nodes $CLIENT1,$CLIENT2 createmany -o $DIR1/$tdir-\\\$(hostname)-$i/f- -r$DIR2/$tdir-\\\$(hostname)-$i/f- $nfiles > /dev/null 2>&1") - [ $fstype = ldiskfs ] && jbdnew=$(print_jbd_stat) - [ $fstype = ldiskfs ] && jbd=$(( jbdnew - jbdold )) - echo "=== END createmany new: $jbdnew transaction : $jbd transactions nfiles $nfiles time $elapsed COS=$COS" - [ $fstype = ldiskfs ] && avgjbd=$(( avgjbd + jbd )) - avgtime=$(( avgtime + elapsed )) - done - eval cos${COS}_jbd=$((avgjbd / 3)) - eval cos${COS}_time=$((avgtime / 3)) - done + for COS in 0 1; do + do_facet $SINGLEMDS lctl set_param mdt.*.commit_on_sharing=$COS + avgjbd=0 + avgtime=0 + for i in 1 2 3; do + do_nodes $CLIENT1,$CLIENT2 "mkdir -p $DIR1/$tdir-\\\$(hostname)-$i" + + [ "$mds1_FSTYPE" = ldiskfs ] && jbdold=$(print_jbd_stat) + echo "=== START createmany old: $jbdold transaction" + local elapsed=$(do_and_time "do_nodes $CLIENT1,$CLIENT2 createmany -o $DIR1/$tdir-\\\$(hostname)-$i/f- -r$DIR2/$tdir-\\\$(hostname)-$i/f- $nfiles > /dev/null 2>&1") + [ "$mds1_FSTYPE" = ldiskfs ] && jbdnew=$(print_jbd_stat) + [ "$mds1_FSTYPE" = ldiskfs ] && jbd=$(( jbdnew - jbdold )) + echo "=== END createmany new: $jbdnew transaction : $jbd transactions nfiles $nfiles time $elapsed COS=$COS" + [ "$mds1_FSTYPE" = ldiskfs ] && avgjbd=$(( avgjbd + jbd )) + avgtime=$(( avgtime + elapsed )) + done + eval cos${COS}_jbd=$((avgjbd / 3)) + eval cos${COS}_time=$((avgtime / 3)) + done - echo "COS=0 transactions (avg): $cos0_jbd time (avg): $cos0_time" - echo "COS=1 transactions (avg): $cos1_jbd time (avg): $cos1_time" - [ "$cos0_jbd" != 0 ] && echo "COS=1 vs COS=0 jbd: $((((cos1_jbd/cos0_jbd - 1)) * 100 )) %" - [ "$cos0_time" != 0 ] && echo "COS=1 vs COS=0 time: $((((cos1_time/cos0_time - 1)) * 100 )) %" + echo "COS=0 transactions (avg): $cos0_jbd time (avg): $cos0_time" + echo "COS=1 transactions (avg): $cos1_jbd time (avg): $cos1_time" + [ "$cos0_jbd" != 0 ] && + echo "COS=1 vs COS=0 jbd: $((((cos1_jbd/cos0_jbd - 1)) * 100 )) %" + [ "$cos0_time" != 0 ] && + echo "COS=1 vs COS=0 time: $((((cos1_time/cos0_time - 1)) * 100 )) %" - restore_lustre_params < $param_file - rm -f $param_file - return 0 + restore_lustre_params < $param_file + rm -f $param_file + return 0 } run_test 33a "commit on sharing, cross crete/delete, 2 clients, benchmark" @@ -946,7 +1336,6 @@ run_test 33a "commit on sharing, cross crete/delete, 2 clients, benchmark" test_33b() { remote_mds_nodsh && skip "remote MDS with nodsh" && return - [ -n "$CLIENTS" ] || { skip "Need two or more clients" && return 0; } [ $CLIENTCOUNT -ge 2 ] || { skip "Need two or more clients, have $CLIENTCOUNT" && return 0; } @@ -1002,41 +1391,94 @@ test_33b() { } run_test 33b "COS: cross create/delete, 2 clients, benchmark under remote dir" -test_33c() { - [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.63) ] && - skip "DNE CoS not supported" && return - +# arg1 is description, arg2 is operations before Sync-on-Lock-Cancel, arg3 is +# the operation that triggers SoLC +op_trigger_solc() { local sync_count + local total=0 + local nodes=$(comma_list $(mdts_nodes)) - mkdir $DIR/$tdir sync_all_data - do_facet mds1 "lctl set_param -n mdt.*.sync_count=0" - # do twice in case transaction is committed before unlock, see LU-8200 + + # trigger CoS twice in case transaction commit before unlock for i in 1 2; do - # remote dir is created on MDT1, which enqueued lock of $tdir on - # MDT0 - $LFS mkdir -i 1 $DIR/$tdir/remote.$i - mkdir $DIR/$tdir/local.$i + bash -c "$2" + do_nodes $nodes "$LCTL set_param -n mdt.*.sync_count=0" + bash -c "$3" + sync_count=$(do_nodes $nodes \ + "lctl get_param -n mdt.*MDT*.sync_count" | calc_sum) + total=$((total + sync_count)); + rm -rf $DIR/$tdir/* + sync_all_data done - sync_count=$(do_facet mds1 "lctl get_param -n mdt.*MDT0000.sync_count") - echo "sync_count $sync_count" - [ $sync_count -eq 0 ] && error "Sync-Lock-Cancel not triggered" + echo $1 + echo " $2" + echo " $3" + echo " SoLC count $total" + (( total > 0 )) || error "$3 didn't trigger SoLC" +} + +test_33_run() { + echo $1 + echo " $2" + eval $2 +} + +test_33c() { + (( MDSCOUNT >= 2 )) || skip "needs >= 2 MDTs" + (( MDS1_VERSION >= $(version_code 2.7.63) )) || + skip "DNE CoS not supported" + + # LU-13522 + stop mds1 + start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS || error "start mds1 failed" + + mkdir_on_mdt0 $DIR/$tdir + sync_all_data + + if (( MDS1_VERSION < $(version_code 2.15.55.204) )); then + op_trigger_solc "create remote dir and local dir" \ + "$LFS mkdir -i 1 $DIR/$tdir/remote" \ + "$LFS mkdir -i 0 $DIR/$tdir/local" + (( MDSCOUNT > 2 )) && + op_trigger_solc "create remote dirs on different MDTs" \ + "$LFS mkdir -i 1 $DIR/$tdir/remote.1" \ + "$LFS mkdir -i 2 $DIR/$tdir/remote.2" + fi + op_trigger_solc "create file on 2nd stripe under striped directory" \ + "$LFS mkdir -i 0 -c 2 $DIR/$tdir/striped" \ + "touch $DIR2/$tdir/striped/subfile" + + echo + echo "Below operations shouldn't trigger Solc:" + $LFS mkdir -i 0 -c 2 $DIR/$tdir/striped sync_all_data do_facet mds1 "lctl set_param -n mdt.*.sync_count=0" - $LFS mkdir -i 1 $DIR/$tdir/remote.3 - # during sleep remote mkdir should have been committed and canceled - # remote lock spontaneously, which shouldn't trigger sync - sleep 6 - mkdir $DIR/$tdir/local.3 + if (( MDS1_VERSION >= $(version_code 2.15.55.204) )); then + test_33_run "create file on 2nd stripe after setattr" \ + "chmod 777 $DIR/$tdir/striped; \ + touch $DIR2/$tdir/striped/subfile" + test_33_run "create remote dir and local dir" \ + "$LFS mkdir -i 1 $DIR/$tdir/remote" \ + "$LFS mkdir -i 0 $DIR/$tdir/local" + (( MDSCOUNT > 2 )) && + test_33_run "create remote dirs on different MDTs" \ + "$LFS mkdir -i 1 $DIR/$tdir/remote.1" \ + "$LFS mkdir -i 2 $DIR/$tdir/remote.2" + fi + test_33_run "create local dir after remote dir creation transaction commit" \ + "$LFS mkdir -i 1 $DIR/$tdir/remote.3; \ + do_facet mds2 $LCTL set_param -n osd*.*MDT0001.force_sync 1; + mkdir $DIR/$tdir/local.3" sync_count=$(do_facet mds1 "lctl get_param -n mdt.*MDT0000.sync_count") - echo "sync_count $sync_count" + echo "Solc count $sync_count" [ $sync_count -eq 0 ] || error "Sync-Lock-Cancel triggered" } -run_test 33c "Cancel cross-MDT lock should trigger Sync-Lock-Cancel" +run_test 33c "Cancel cross-MDT lock should trigger Sync-on-Lock-Cancel" -# arg1 is operations done before CoS, arg2 is the operation that triggers CoS +# arg1 is description, arg2 is operations done before CoS, arg3 is the operation +# that triggers CoS op_trigger_cos() { local commit_nr local total=0 @@ -1046,9 +1488,9 @@ op_trigger_cos() { # trigger CoS twice in case transaction commit before unlock for i in 1 2; do - sh -c "$1" + bash -c "$2" do_nodes $nodes "lctl set_param -n mdt.*.async_commit_count=0" - sh -c "$2" + bash -c "$3" commit_nr=$(do_nodes $nodes \ "lctl get_param -n mdt.*.async_commit_count" | calc_sum) total=$((total + commit_nr)); @@ -1056,76 +1498,101 @@ op_trigger_cos() { sync_all_data done - echo "CoS count $total" - [ $total -gt 0 ] || error "$2 didn't trigger CoS" + echo $1 + echo " $2" + echo " $3" + echo " CoS count $total" + (( total > 0 )) || error "$3 didn't trigger CoS" } test_33d() { - [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.63) ] && - skip "DNE CoS not supported" && return - - # remote directory create - op_trigger_cos "mkdir $DIR/$tdir" "$LFS mkdir -i 1 $DIR/$tdir/subdir" - # remote directory unlink - op_trigger_cos "$LFS mkdir -i 1 $DIR/$tdir" "rmdir $DIR/$tdir" - # striped directory create - op_trigger_cos "mkdir $DIR/$tdir" "$LFS mkdir -c 2 $DIR/$tdir/subdir" - # striped directory setattr - op_trigger_cos "$LFS mkdir -c 2 $DIR/$tdir; touch $DIR/$tdir" \ - "chmod 713 $DIR/$tdir" - # striped directory unlink - op_trigger_cos "$LFS mkdir -c 2 $DIR/$tdir; touch $DIR/$tdir" \ - "rmdir $DIR/$tdir" - # cross-MDT link - op_trigger_cos "$LFS mkdir -c 2 $DIR/$tdir; \ + (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" + (( MDS1_VERSION >= $(version_code 2.7.63) )) || + skip "DNE CoS not supported" + + if (( $MDS1_VERSION < $(version_code 2.15.55.133) )); then + op_trigger_cos "remote directory unlink" \ + "$LFS mkdir -i 1 $DIR/$tdir" "rmdir $DIR2/$tdir" + op_trigger_cos "striped directory create" "mkdir $DIR/$tdir" \ + "$LFS mkdir -c 2 $DIR2/$tdir/subdir" + op_trigger_cos "striped directory setattr" \ + "$LFS mkdir -c 2 $DIR/$tdir" "chmod 713 $DIR2/$tdir" + op_trigger_cos "striped directory unlink" \ + "$LFS mkdir -c 2 $DIR/$tdir" "rmdir $DIR2/$tdir" + op_trigger_cos "cross-MDT link" \ + "mkdir $DIR/$tdir; \ $LFS mkdir -i 0 $DIR/$tdir/d1; \ $LFS mkdir -i 1 $DIR/$tdir/d2; \ touch $DIR/$tdir/d1/tgt" \ - "ln $DIR/$tdir/d1/tgt $DIR/$tdir/d2/src" - # cross-MDT rename - op_trigger_cos "$LFS mkdir -c 2 $DIR/$tdir; \ - $LFS mkdir -i 0 $DIR/$tdir/d1; \ - $LFS mkdir -i 1 $DIR/$tdir/d2; \ - touch $DIR/$tdir/d1/src" \ - "mv $DIR/$tdir/d1/src $DIR/$tdir/d2/tgt" - # migrate - op_trigger_cos "$LFS mkdir -i 0 $DIR/$tdir" \ - "$LFS migrate -m 1 $DIR/$tdir" + "ln $DIR2/$tdir/d1/tgt $DIR2/$tdir/d2/src" + fi + + op_trigger_cos "remote directory create" "$LFS mkdir -i 0 $DIR/$tdir" \ + "$LFS mkdir -i 1 $DIR2/$tdir/subdir" + op_trigger_cos "cross-MDT rename" \ + "mkdir $DIR/$tdir; \ + $LFS mkdir -i 0 $DIR/$tdir/d1; \ + $LFS mkdir -i 1 $DIR/$tdir/d2; \ + touch $DIR/$tdir/d1/src" \ + "mv $DIR2/$tdir/d1/src $DIR2/$tdir/d2/tgt" + op_trigger_cos "migrate" \ + "$LFS mkdir -i 0 $DIR/$tdir" \ + "$LFS migrate -m 1 $DIR2/$tdir" return 0 } -run_test 33d "DNE distributed operation should trigger COS" +run_test 33d "dependent transactions should trigger COS" test_33e() { - [ -n "$CLIENTS" ] || { skip "Need two or more clients" && return 0; } - [ $CLIENTCOUNT -ge 2 ] || - { skip "Need two or more clients, have $CLIENTCOUNT" && - return 0; } - [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.63) ] && - skip "DNE CoS not supported" && return + (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" + (( MDS1_VERSION >= $(version_code 2.7.63) )) || + skip "DNE CoS not supported" - local client2=${CLIENT2:-$(hostname)} - - sync + $LFS mkdir -i 0 $DIR/$tdir + $LFS mkdir -i 0 $DIR/$tdir/d1 + $LFS mkdir -i 1 $DIR/$tdir/d2 local nodes=$(comma_list $(mdts_nodes)) do_nodes $nodes "lctl set_param -n mdt.*.async_commit_count=0" - $LFS mkdir -c 2 $DIR/$tdir - mkdir $DIR/$tdir/subdir - echo abc > $DIR/$tdir/$tfile - do_node $client2 echo dfg >> $DIR/$tdir/$tfile - do_node $client2 touch $DIR/$tdir/subdir + test_33_run "plain dir creation" "mkdir $DIR2/$tdir/plain" + test_33_run "open file and write" "echo abc > $DIR2/$tdir/$tfile" + test_33_run "append write" "echo dfg >> $DIR2/$tdir/$tfile" + test_33_run "setattr" "touch $DIR2/$tdir/$tfile" + test_33_run "file unlink" "rm $DIR2/$tdir/$tfile" + test_33_run "plain dir unlink" "rmdir $DIR2/$tdir/plain" + if (( MDS1_VERSION >= $(version_code 2.15.55.133) )); then + test_33_run "striped directory creation" \ + "$LFS mkdir -i 0 -c 2 $DIR2/$tdir/striped" + test_33_run "set default LMV to create striped subdir" \ + "$LFS setdirstripe -D -c 2 $DIR/$tdir" + test_33_run "striped subdir creation" \ + "createmany -d $DIR/$tdir/subdir 100" + test_33_run "sub file creation and write" \ + "createmany -o $DIR/$tdir/subfile 100; \ + echo abc > $DIR/$tdir/subfile1" + test_33_run "sub file append write" \ + "echo dfg >> $DIR2/$tdir/subfile2" + test_33_run "subdir setatttr" "touch $DIR2/$tdir/subdir1" + test_33_run "subdir unlink" \ + "unlinkmany -d $DIR/$tdir/subdir 100" + test_33_run "sub file unlink" \ + "unlinkmany $DIR2/$tdir/subfile 100" + test_33_run "sub file creation follows striped dir chmod" \ + "chmod 777 $DIR/$tdir/striped; \ + touch $DIR/$tdir/striped/subfile" + test_33_run "striped directory unlink" \ + "rm -rf $DIR2/$tdir/striped" + fi + + test_33_run "directory unlink" "rm -rf $DIR2/$tdir" local async_commit_count=$(do_nodes $nodes \ "lctl get_param -n mdt.*.async_commit_count" | calc_sum) - [ $async_commit_count -gt 0 ] && error "CoS triggerred" - - return 0 + echo "CoS count $async_commit_count" + (( async_commit_count == 0 )) || error "CoS triggerred" } -run_test 33e "DNE local operation shouldn't trigger COS" +run_test 33e "independent transactions shouldn't trigger COS" # End commit on sharing tests @@ -1250,7 +1717,7 @@ test_35() { # bug 17645 for g in $gen; do if ! test "$g" -eq "${generation[count]}"; then list=$(lctl list_param mdc.$FSNAME-MDT*-mdc-*.import) - local c = 0 + local c=0 for imp in $list; do if [ $c = $count ]; then break @@ -1355,7 +1822,7 @@ test_39a() { if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi done } -run_test 39a "test from 11063 ==================================" +run_test 39a "file mtime does not change after rename" test_39b() { local client1=${CLIENT1:-`hostname`} @@ -1382,7 +1849,7 @@ test_39b() { if [ $i = 0 ] ; then echo "repeat after cancel_lru_locks"; fi done } -run_test 39b "11063 problem 1 ==================================" +run_test 39b "file mtime the same on clients with/out lock" test_39c() { local client1=${CLIENT1:-`hostname`} @@ -1439,13 +1906,32 @@ test_39d() { # LU-7310 } run_test 39d "sync write should update mtime" +pdo_sched() { + # how long 40-47 take with specific delay + # sleep 0.1 # 78s + # sleep 0.2 # 103s + # sleep 0.3 # 124s + sleep 0.5 # 164s +} + +# for pdo testing, we must cancel MDT-MDT locks as well as client locks to +# avoid unexpected delays due to previous tests +pdo_lru_clear() { + cancel_lru_locks mdc + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param -n ldlm.namespaces.*mdt*.lru_size=clear + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL get_param ldlm.namespaces.*mdt*.lock_unused_count \ + ldlm.namespaces.*mdt*.lock_count | grep -v '=0' +} + # check that pid exists hence second operation wasn't blocked by first one # if it is so then there is no conflict, return 0 # else second operation is conflicting with first one, return 1 check_pdo_conflict() { local pid=$1 local conflict=0 - sleep 1 # to ensure OP1 is finished on client if OP2 is blocked by OP1 + pdo_sched # to ensure OP1 is finished on client if OP2 is blocked by OP1 if [[ `ps --pid $pid | wc -l` == 1 ]]; then conflict=1 echo "Conflict" @@ -1459,120 +1945,139 @@ check_pdo_conflict() { # test 40: check non-blocking operations test_40a() { remote_mds_nodsh && skip "remote MDS with nodsh" && return -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 - touch $DIR2 - mkdir $DIR1/$tfile & - PID1=$! - sleep 1 - touch $DIR2/$tfile-2 + + mkdir_on_mdt0 $DIR2/$tdir + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" + mkdir $DIR1/$tdir/$tfile & + PID1=$!; pdo_sched + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - mv $DIR2/$tfile-2 $DIR2/$tfile-5 + mv $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-5 check_pdo_conflict $PID1 || error "rename is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-5 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-5 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one check_pdo_conflict $PID1 || error "parallel operation is blocked" + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 - rm -rf $DIR/$tfile* + rm -rf $DIR/$tdir return 0 } run_test 40a "pdirops: create vs others ==============" test_40b() { remote_mds_nodsh && skip "remote MDS with nodsh" && return -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 - touch $DIR1/$tfile & - PID1=$! - sleep 1 + + mkdir_on_mdt0 $DIR2/$tdir + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" + touch $DIR1/$tdir/$tfile & + PID1=$!; pdo_sched # open|create - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - mv $DIR2/$tfile-2 $DIR2/$tfile-5 + mv $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-5 check_pdo_conflict $PID1 || error "rename is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-5 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-5 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one check_pdo_conflict $PID1 || error "parallel operation is blocked" + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 - rm -rf $DIR/$tfile* + rm -rf $DIR/$tdir return 0 } run_test 40b "pdirops: open|create and others ==============" test_40c() { remote_mds_nodsh && skip "remote MDS with nodsh" && return - touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 - link $DIR1/$tfile $DIR1/$tfile-0 & - PID1=$! - sleep 1 + + mkdir_on_mdt0 $DIR2/$tdir + pdo_lru_clear + touch $DIR1/$tdir/$tfile +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" + link $DIR1/$tdir/$tfile $DIR1/$tdir/$tfile-0 & + PID1=$!; pdo_sched # open|create - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - mv $DIR2/$tfile-2 $DIR2/$tfile-5 + mv $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-5 check_pdo_conflict $PID1 || error "rename is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-5 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-5 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one check_pdo_conflict $PID1 || error "parallel operation is blocked" + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 - rm -rf $DIR/$tfile* + rm -rf $DIR/$tdir return 0 } run_test 40c "pdirops: link and others ==============" test_40d() { remote_mds_nodsh && skip "remote MDS with nodsh" && return - touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 - rm $DIR1/$tfile & - PID1=$! - sleep 1 + + mkdir_on_mdt0 $DIR2/$tdir + pdo_lru_clear + touch $DIR1/$tdir/$tfile +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" + rm $DIR1/$tdir/$tfile & + PID1=$!; pdo_sched # open|create - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - mv $DIR2/$tfile-2 $DIR2/$tfile-5 + mv $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-5 check_pdo_conflict $PID1 || error "rename is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-5 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-5 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one check_pdo_conflict $PID1 || error "parallel operation is blocked" + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 return 0 } @@ -1580,357 +2085,523 @@ run_test 40d "pdirops: unlink and others ==============" test_40e() { remote_mds_nodsh && skip "remote MDS with nodsh" && return - touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 - mv $DIR1/$tfile $DIR1/$tfile-0 & - PID1=$! - sleep 1 + + mkdir_on_mdt0 $DIR2/$tdir + pdo_lru_clear + touch $DIR1/$tdir/$tfile +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" + mv $DIR1/$tdir/$tfile $DIR1/$tdir/$tfile-0 & + PID1=$!; pdo_sched # open|create - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-2 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-2 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one check_pdo_conflict $PID1 || error "parallel operation is blocked" + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 - rm -rf $DIR/$tfile* + rm -rf $DIR/$tdir return 0 } run_test 40e "pdirops: rename and others ==============" # test 41: create blocking operations test_41a() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $MULTIOP $DIR1/$tfile oO_CREAT:O_RDWR:c & - PID1=$! - sleep 1 - mkdir $DIR2/$tfile && error "mkdir must fail" + PID1=$! ; pdo_sched + mkdir $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; echo "mkdir isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "mkdir must fail" rm -rf $DIR/$tfile* return 0 } run_test 41a "pdirops: create vs mkdir ==============" test_41b() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $MULTIOP $DIR1/$tfile oO_CREAT:O_RDWR:c & - PID1=$! - sleep 1 - $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c && error "create must fail" + PID1=$! ; pdo_sched + $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "create isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "create must fail" rm -rf $DIR/$tfile* return 0 } run_test 41b "pdirops: create vs create ==============" test_41c() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $MULTIOP $DIR1/$tfile oO_CREAT:O_RDWR:c & - PID1=$! - sleep 1 - link $DIR2/$tfile-2 $DIR2/$tfile && error "link must fail" + PID1=$! ; pdo_sched + link $DIR2/$tfile-2 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "link isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "link must fail" rm -rf $DIR/$tfile* return 0 } run_test 41c "pdirops: create vs link ==============" test_41d() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $MULTIOP $DIR1/$tfile oO_CREAT:O_RDWR:c & - PID1=$! - sleep 1 - rm $DIR2/$tfile || error "unlink must succeed" + PID1=$! ; pdo_sched + rm $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "unlink isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "unlink must succeed" rm -rf $DIR/$tfile* return 0 } run_test 41d "pdirops: create vs unlink ==============" test_41e() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $MULTIOP $DIR1/$tfile oO_CREAT:O_RDWR:c & - PID1=$! - sleep 1 - mv $DIR2/$tfile-2 $DIR2/$tfile || error "rename must succeed" + PID1=$! ; pdo_sched + mv $DIR2/$tfile-2 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "rename must succeed" rm -rf $DIR/$tfile* return 0 } run_test 41e "pdirops: create and rename (tgt) ==============" test_41f() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $MULTIOP $DIR1/$tfile oO_CREAT:O_RDWR:c & - PID1=$! - sleep 1 - mv $DIR2/$tfile $DIR2/$tfile-2 || error "rename must succeed" + PID1=$! ; pdo_sched + mv $DIR2/$tfile $DIR2/$tfile-2 & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "rename must succeed" rm -rf $DIR/$tfile* return 0 } run_test 41f "pdirops: create and rename (src) ==============" test_41g() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $MULTIOP $DIR1/$tfile oO_CREAT:O_RDWR:c & - PID1=$! - sleep 1 - stat $DIR2/$tfile > /dev/null || error "stat must succeed" + PID1=$! ; pdo_sched + stat $DIR2/$tfile > /dev/null & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "getattr isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "stat must succeed" rm -rf $DIR/$tfile* return 0 } run_test 41g "pdirops: create vs getattr ==============" test_41h() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $MULTIOP $DIR1/$tfile oO_CREAT:O_RDWR:c & - PID1=$! - sleep 1 - ls -lia $DIR2/ > /dev/null + PID1=$! ; pdo_sched + ls -lia $DIR2/ > /dev/null & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "readdir isn't blocked"; } + wait $PID2 rm -rf $DIR/$tfile* return 0 } run_test 41h "pdirops: create vs readdir ==============" +sub_test_41i() { + local PID1 PID2 + local fail_loc="$1" + local ret=0 + + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=${fail_loc} || true" &>/dev/null + + $MULTIOP $DIR1/$tfile oO_CREAT:O_EXCL:c 2>/dev/null & + PID1=$! + sleep 0.2 + $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c 2>/dev/null & + PID2=$! + + if ! wait $PID1 && ! wait $PID2; then + echo "Both creates failed (1 should fail, 1 should succeed)" + ret=1 + elif wait $PID1 && wait $PID2; then + echo "Both creates succeeded (1 should fail, 1 should succeed)" + ret=2 + fi + + #Clean + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x0 || true" &>/dev/null + rm -f $DIR/$tfile + + return $ret +} + +test_41i() { + (( $MDS1_VERSION >= $(version_code 2.13.56) )) || + skip "Need MDS version newer than 2.13.56" + local msg fail_loc + +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN 0x169 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN2 0x16a + for fail_loc in "0x80000169" "0x8000016a"; do + echo "Begin 100 tests with fail_loc=$fail_loc" + printf "Progress: " + for i in {1..100}; do + printf "*" + msg=$(sub_test_41i "$fail_loc") || + { echo; error "iter=$i : $msg"; } + done + echo + done +} +run_test 41i "reint_open: create vs create" + + # test 42: unlink and blocking operations test_42a() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mkdir $DIR1/$tfile & - PID1=$! - sleep 1 - mkdir $DIR2/$tfile && error "mkdir must fail" + PID1=$! ; pdo_sched + mkdir $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "mkdir isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "mkdir must fail" rm -rf $DIR/$tfile* return 0 } run_test 42a "pdirops: mkdir vs mkdir ==============" test_42b() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mkdir $DIR1/$tfile & - PID1=$! - sleep 1 - $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c && error "create must fail" + PID1=$! ; pdo_sched + $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "create isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "create must fail" rm -rf $DIR/$tfile* return 0 } run_test 42b "pdirops: mkdir vs create ==============" test_42c() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mkdir $DIR1/$tfile & - PID1=$! - sleep 1 - link $DIR2/$tfile-2 $DIR2/$tfile && error "link must fail" + PID1=$! ; pdo_sched + link $DIR2/$tfile-2 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "link isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "link must fail" rm -rf $DIR/$tfile* return 0 } run_test 42c "pdirops: mkdir vs link ==============" test_42d() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mkdir $DIR1/$tfile & - PID1=$! - sleep 1 - rmdir $DIR2/$tfile || error "unlink must succeed" + PID1=$! ; pdo_sched + rmdir $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "unlink isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "unlink must succeed" rm -rf $DIR/$tfile* return 0 } run_test 42d "pdirops: mkdir vs unlink ==============" test_42e() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mkdir $DIR1/$tfile & - PID1=$! - sleep 1 - mv -T $DIR2/$tfile-2 $DIR2/$tfile && error "rename must fail" + PID1=$! ; pdo_sched + mv -T $DIR2/$tfile-2 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "rename must fail" rm -rf $DIR/$tfile* return 0 } run_test 42e "pdirops: mkdir and rename (tgt) ==============" test_42f() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mkdir $DIR1/$tfile & - PID1=$! - sleep 1 - mv $DIR2/$tfile $DIR2/$tfile-2 || error "rename must succeed" + PID1=$! ; pdo_sched + mv $DIR2/$tfile $DIR2/$tfile-2 & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "rename must succeed" rm -rf $DIR/$tfile* return 0 } run_test 42f "pdirops: mkdir and rename (src) ==============" test_42g() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 - mkdir $DIR1/$tfile & - PID1=$! - sleep 1 - stat $DIR2/$tfile > /dev/null || error "stat must succeed" + mkdir_on_mdt0 $DIR1/$tdir + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" + mkdir $DIR1/$tdir/$tfile & + PID1=$! ; pdo_sched + stat $DIR2/$tdir/$tfile > /dev/null & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "getattr isn't blocked"; } - rm -rf $DIR/$tfile* - return 0 + wait $PID2 ; [ $? -eq 0 ] || error "stat must succeed" + rm -rf $DIR/$tdir } run_test 42g "pdirops: mkdir vs getattr ==============" test_42h() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mkdir $DIR1/$tfile & - PID1=$! - sleep 1 - ls -lia $DIR2/ > /dev/null + PID1=$! ; pdo_sched + ls -lia $DIR2/ > /dev/null & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "readdir isn't blocked"; } + wait $PID2 rm -rf $DIR/$tfile* return 0 } run_test 42h "pdirops: mkdir vs readdir ==============" -# test 43: unlink and blocking operations +# test 43: rmdir,mkdir won't return -EEXIST test_43a() { - touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 - rm $DIR1/$tfile & - PID1=$! - sleep 1 - mkdir $DIR2/$tfile || error "mkdir must succeed" - check_pdo_conflict $PID1 && { wait $PID1; error "mkdir isn't blocked"; } - rm -rf $DIR/$tfile* + for i in {1..1000}; do + mkdir $DIR1/$tdir || error "mkdir $tdir failed" + rmdir $DIR2/$tdir || error "rmdir $tdir failed" + done return 0 } -run_test 43a "pdirops: unlink vs mkdir ==============" +run_test 43a "rmdir,mkdir doesn't return -EEXIST ==============" test_43b() { + pdo_lru_clear touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" rm $DIR1/$tfile & - PID1=$! - sleep 1 - $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c || error "create must succeed" + PID1=$! ; pdo_sched + $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "create isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "create must succeed" rm -rf $DIR/$tfile* return 0 } run_test 43b "pdirops: unlink vs create ==============" test_43c() { + pdo_lru_clear touch $DIR1/$tfile touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" rm $DIR1/$tfile & - PID1=$! - sleep 1 - link $DIR2/$tfile-2 $DIR2/$tfile || error "link must succeed" + PID1=$! ; pdo_sched + link $DIR2/$tfile-2 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "link isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "link must succeed" rm -rf $DIR/$tfile* return 0 } run_test 43c "pdirops: unlink vs link ==============" test_43d() { + pdo_lru_clear touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" rm $DIR1/$tfile & - PID1=$! - sleep 1 - rm $DIR2/$tfile && error "unlink must fail" + PID1=$! ; pdo_sched + rm $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "unlink isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "unlink must fail" rm -rf $DIR/$tfile* return 0 } run_test 43d "pdirops: unlink vs unlink ==============" test_43e() { + pdo_lru_clear touch $DIR1/$tfile touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" rm $DIR1/$tfile & - PID1=$! - sleep 1 - mv -u $DIR2/$tfile-2 $DIR2/$tfile || error "rename must succeed" + PID1=$! ; pdo_sched + mv -u $DIR2/$tfile-2 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "rename must succeed" rm -rf $DIR/$tfile* return 0 } run_test 43e "pdirops: unlink and rename (tgt) ==============" test_43f() { + pdo_lru_clear touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" rm $DIR1/$tfile & - PID1=$! - sleep 1 - mv $DIR2/$tfile $DIR2/$tfile-2 && error "rename must fail" + PID1=$! ; pdo_sched + mv $DIR2/$tfile $DIR2/$tfile-2 & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "rename must fail" rm -rf $DIR/$tfile* return 0 } run_test 43f "pdirops: unlink and rename (src) ==============" test_43g() { + pdo_lru_clear touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" rm $DIR1/$tfile & - PID1=$! - sleep 1 - stat $DIR2/$tfile > /dev/null && error "stat must fail" + PID1=$! ; pdo_sched + stat $DIR2/$tfile > /dev/null & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "getattr isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "stat must fail" rm -rf $DIR/$tfile* return 0 } run_test 43g "pdirops: unlink vs getattr ==============" test_43h() { + pdo_lru_clear touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" rm $DIR1/$tfile & - PID1=$! - sleep 1 - ls -lia $DIR2/ > /dev/null + PID1=$! ; pdo_sched + ls -lia $DIR2/ > /dev/null & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "readdir isn't blocked"; } + wait $PID2 rm -rf $DIR/$tfile* return 0 } @@ -1938,132 +2609,258 @@ run_test 43h "pdirops: unlink vs readdir ==============" test_43i() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + pdo_lru_clear touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" rm $DIR1/$tfile & - PID1=$! - sleep 1 - $LFS mkdir -i 1 $DIR2/$tfile || error "remote mkdir must succeed" + PID1=$! ; pdo_sched + $LFS mkdir -i 1 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "remote mkdir isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "remote mkdir must succeed" rm -rf $DIR/$tfile* return 0 } run_test 43i "pdirops: unlink vs remote mkdir" +test_43j() { + [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] && + skip "Need MDS version newer than 2.13.52" + + mkdir_on_mdt0 $DIR1/$tdir + for i in {1..100}; do +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_CREATE_RACE 0x167 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000167 2>/dev/null || + true" + OK=0 + mkdir $DIR1/$tdir/sub & + PID1=$! + mkdir $DIR2/$tdir/sub && ((OK++)) + wait $PID1 && ((OK++)) + (( OK == 1 )) || error "exactly one mkdir should succeed" + + rmdir $DIR1/$tdir/sub || error "rmdir failed" + done + return 0 +} +run_test 43j "racy mkdir return EEXIST ==============" + +sub_test_43k() { + local PID1 PID2 + local fail_loc="$1" + local ret=0 + + # We test in a separate directory to be able to unblock server thread in + # cfs_race() if LCK_PW is taken on the parent by mdt_reint_unlink. + test_mkdir $DIR2/$tdir + touch $DIR2/$tdir/$tfile + pdo_lru_clear + + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=${fail_loc} || true" &>/dev/null + echo content > $DIR1/$tdir/$tfile & PID1=$! + pdo_sched + multiop $DIR2/$tdir/$tfile u & PID2=$! + + wait $PID1 || + { ret=$?; \ + echo -n "overwriting $tfile should succeed (err=$ret); "; } + wait $PID2 || + { ret=$?; \ + echo -n "unlinking $tfile should succeed (err=$ret);"; } + + #Clean + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x0 || true" &>/dev/null + rm -rf $DIR/$tdir + + return $ret +} + +test_43k() { + (( $MDS1_VERSION >= $(version_code 2.13.56) )) || + skip "Need MDS version newer than 2.13.56" + local msg fail_loc + +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN 0x169 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN2 0x16a + for fail_loc in "0x80000169" "0x8000016a"; do + echo "Begin 100 tests with fail_loc=$fail_loc" + printf "Progress: " + for i in {1..100}; do + printf "*" + msg=$(sub_test_43k "$fail_loc") || + { echo; error "iter=$i : $msg"; } + done + echo + done + + #Clean + reset_fail_loc + + return 0 +} +run_test 43k "unlink vs create" + # test 44: rename tgt and blocking operations test_44a() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000146 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000146 2>/dev/null || true" mv $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - mkdir $DIR2/$tfile && error "mkdir must fail" - check_pdo_conflict $PID1 && { wait $PID1; error "mkdir isn't blocked"; } + PID1=$! ; pdo_sched + mkdir $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" + check_pdo_conflict $PID1 && { wait $PID1; date;error "mkdir isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "mkdir must fail" + date rm -rf $DIR/$tfile* return 0 } run_test 44a "pdirops: rename tgt vs mkdir ==============" test_44b() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000146 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000146 2>/dev/null || true" mv $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c && error "create must fail" + PID1=$! ; pdo_sched + $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "create isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "create must fail" rm -rf $DIR/$tfile* return 0 } run_test 44b "pdirops: rename tgt vs create ==============" test_44c() { + pdo_lru_clear touch $DIR1/$tfile-2 touch $DIR1/$tfile-3 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000146 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000146 2>/dev/null || true" mv $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - link $DIR2/$tfile-3 $DIR2/$tfile && error "link must fail" + PID1=$! ; pdo_sched + link $DIR2/$tfile-3 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "link isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "link must fail" rm -rf $DIR/$tfile* return 0 } run_test 44c "pdirops: rename tgt vs link ==============" test_44d() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000146 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000146 2>/dev/null || true" mv $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - rm $DIR2/$tfile || error "unlink must succeed" + PID1=$! ; pdo_sched + rm $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "unlink isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "unlink must succeed" rm -rf $DIR/$tfile* return 0 } run_test 44d "pdirops: rename tgt vs unlink ==============" test_44e() { + pdo_lru_clear touch $DIR1/$tfile touch $DIR1/$tfile-2 touch $DIR1/$tfile-3 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000146 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000146 2>/dev/null || true" mv $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - mv $DIR2/$tfile-3 $DIR2/$tfile || error "rename must succeed" + PID1=$! ; pdo_sched + mv $DIR2/$tfile-3 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "rename must succeed" rm -rf $DIR/$tfile* return 0 } run_test 44e "pdirops: rename tgt and rename (tgt) ==============" test_44f() { + pdo_lru_clear touch $DIR1/$tfile-2 touch $DIR1/$tfile-3 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000146 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000146 2>/dev/null || true" mv $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - mv $DIR2/$tfile $DIR2/$tfile-3 || error "rename must succeed" + PID1=$! ; pdo_sched + mv $DIR2/$tfile $DIR2/$tfile-3 & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "rename must succeed" rm -rf $DIR/$tfile* return 0 } run_test 44f "pdirops: rename tgt and rename (src) ==============" test_44g() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000146 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000146 2>/dev/null || true" mv $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - stat $DIR2/$tfile > /dev/null || error "stat must succeed" + PID1=$! ; pdo_sched + stat $DIR2/$tfile > /dev/null & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "getattr isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "stat must succeed" rm -rf $DIR/$tfile* return 0 } run_test 44g "pdirops: rename tgt vs getattr ==============" test_44h() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000146 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000146 2>/dev/null || true" mv $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - ls -lia $DIR2/ > /dev/null + PID1=$! ; pdo_sched + ls -lia $DIR2/ > /dev/null & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "readdir isn't blocked"; } + wait $PID2 rm -rf $DIR/$tfile* return 0 } @@ -2072,130 +2869,166 @@ run_test 44h "pdirops: rename tgt vs readdir ==============" # test 44: rename tgt and blocking operations test_44i() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000146 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK2 0x146 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000146 2>/dev/null || true" mv $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - $LFS mkdir -i 1 $DIR2/$tfile && error "remote mkdir must fail" + PID1=$! ; pdo_sched + $LFS mkdir -i 1 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "remote mkdir isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "remote mkdir must fail" rm -rf $DIR/$tfile* return 0 } run_test 44i "pdirops: rename tgt vs remote mkdir" -# test 45: rename src and blocking operations +# test 45: rename,mkdir doesn't fail with -EEXIST test_45a() { - touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 - mv $DIR1/$tfile $DIR1/$tfile-2 & - PID1=$! - sleep 1 - mkdir $DIR2/$tfile || error "mkdir must succeed" - check_pdo_conflict $PID1 && { wait $PID1; error "mkdir isn't blocked"; } - rm -rf $DIR/$tfile* + for i in {1..1000}; do + mkdir $DIR1/$tdir || error "mkdir $tdir failed" + mrename $DIR2/$tdir $DIR2/$tdir.$i > /dev/null || + error "mrename to $tdir.$i failed" + done + rm -rf $DIR/$tdir* return 0 } -run_test 45a "pdirops: rename src vs mkdir ==============" +run_test 45a "rename,mkdir doesn't return -EEXIST ==============" test_45b() { + pdo_lru_clear touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mv $DIR1/$tfile $DIR1/$tfile-2 & - PID1=$! - sleep 1 - $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c || error "create must succeed" + PID1=$! ; pdo_sched + $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "create isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "create must succeed" rm -rf $DIR/$tfile* return 0 } run_test 45b "pdirops: rename src vs create ==============" test_45c() { + pdo_lru_clear touch $DIR1/$tfile touch $DIR1/$tfile-3 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mv $DIR1/$tfile $DIR1/$tfile-2 & - PID1=$! - sleep 1 - link $DIR2/$tfile-3 $DIR2/$tfile || error "link must succeed" + PID1=$! ; pdo_sched + link $DIR2/$tfile-3 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "link isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "link must succeed" rm -rf $DIR/$tfile* return 0 } run_test 45c "pdirops: rename src vs link ==============" test_45d() { + pdo_lru_clear touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mv $DIR1/$tfile $DIR1/$tfile-2 & - PID1=$! - sleep 1 - rm $DIR2/$tfile && error "unlink must fail" + PID1=$! ; pdo_sched + rm $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "unlink isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "unlink must fail" rm -rf $DIR/$tfile* return 0 } run_test 45d "pdirops: rename src vs unlink ==============" test_45e() { + pdo_lru_clear touch $DIR1/$tfile touch $DIR1/$tfile-3 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mv $DIR1/$tfile $DIR1/$tfile-2 & - PID1=$! - sleep 1 - mv $DIR2/$tfile-3 $DIR2/$tfile || error "rename must succeed" + PID1=$! ; pdo_sched + mv $DIR2/$tfile-3 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "rename must succeed" rm -rf $DIR/$tfile* return 0 } run_test 45e "pdirops: rename src and rename (tgt) ==============" test_45f() { + pdo_lru_clear touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mv $DIR1/$tfile $DIR1/$tfile-2 & - PID1=$! - sleep 1 - mv $DIR2/$tfile $DIR2/$tfile-3 && error "rename must fail" + PID1=$! ; pdo_sched + mv $DIR2/$tfile $DIR2/$tfile-3 & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "rename must fail" rm -rf $DIR/$tfile* return 0 } run_test 45f "pdirops: rename src and rename (src) ==============" test_45g() { + pdo_lru_clear touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mv $DIR1/$tfile $DIR1/$tfile-2 & - PID1=$! - sleep 1 - stat $DIR2/$tfile > /dev/null && error "stat must fail" + PID1=$! ; pdo_sched + stat $DIR2/$tfile > /dev/null & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "getattr isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "stat must fail" rm -rf $DIR/$tfile* return 0 } run_test 45g "pdirops: rename src vs getattr ==============" test_45h() { + pdo_lru_clear touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mv $DIR1/$tfile $DIR1/$tfile-2 & - PID1=$! - sleep 1 - ls -lia $DIR2/ > /dev/null + PID1=$! ; pdo_sched + ls -lia $DIR2/ > /dev/null & + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "readdir isn't blocked"; } + wait $PID2 rm -rf $DIR/$tfile* return 0 } @@ -2203,131 +3036,230 @@ run_test 45h "pdirops: unlink vs readdir ==============" test_45i() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + pdo_lru_clear touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" mv $DIR1/$tfile $DIR1/$tfile-2 & - PID1=$! - sleep 1 - $LFS mkdir -i 1 $DIR2/$tfile || error "create remote dir must succeed" + PID1=$! ; pdo_sched + $LFS mkdir -i 1 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "create remote dir isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "create remote dir must succeed" rm -rf $DIR/$tfile* return 0 } run_test 45i "pdirops: rename src vs remote mkdir" +sub_test_45j() { + local PID1 PID2 + local fail_loc="$1" + local ret=0 + + # We test in a sparate directory to be able to unblock server thread in + # cfs_race if LCK_PW is taken on the parent by mdt_reint_rename. + test_mkdir $DIR2/$tdir + echo file1 > $DIR2/$tdir/$tfile + echo file2 > $DIR2/$tdir/$tfile-2 + pdo_lru_clear + + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=${fail_loc} || true" &>/dev/null + + cat $DIR1/$tdir/$tfile >/dev/null & + PID1=$! + pdo_sched + mrename $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile > /dev/null & + PID2=$! + + wait $PID1 || + { ret=$?; echo -n "cat $tfile should succeed (err=$ret); "; } + wait $PID2 || + { ret=$?; \ + echo -n "mrename $tfile-2 to $tfile failed (err=$ret);"; } + + #Clean + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x0 || true" &>/dev/null + rm -rf $DIR/$tdir + + return $ret +} + +test_45j() { + (( $MDS1_VERSION >= $(version_code 2.13.56) )) || + skip "Need MDS version newer than 2.13.56" + local msg fail_loc + +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN 0x169 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN2 0x16a + for fail_loc in "0x80000169" "0x8000016a"; do + echo "Begin 100 tests with fail_loc=$fail_loc" + printf "Progress: " + for i in {1..100}; do + printf "*" + msg=$(sub_test_45j "$fail_loc") || + { echo; error "iter=$i : $msg"; } + done + echo + done +} +run_test 45j "read vs rename ==============" + # test 46: link and blocking operations test_46a() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" link $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - mkdir $DIR2/$tfile && error "mkdir must fail" + PID1=$! ; pdo_sched + mkdir $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "mkdir isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "mkdir must fail" rm -rf $DIR/$tfile* return 0 } run_test 46a "pdirops: link vs mkdir ==============" test_46b() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" link $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c && error "create must fail" + PID1=$! ; pdo_sched + $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "create isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "create must fail" rm -rf $DIR/$tfile* return 0 } run_test 46b "pdirops: link vs create ==============" test_46c() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" link $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - link $DIR2/$tfile $DIR2/$tfile && error "link must fail" + PID1=$! ; pdo_sched + link $DIR2/$tfile $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "link isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "link must fail" rm -rf $DIR/$tfile* return 0 } run_test 46c "pdirops: link vs link ==============" test_46d() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" link $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - rm $DIR2/$tfile || error "unlink must succeed" + PID1=$! ; pdo_sched + rm $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "unlink isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "unlink must succeed" rm -rf $DIR/$tfile* return 0 } run_test 46d "pdirops: link vs unlink ==============" test_46e() { + pdo_lru_clear touch $DIR1/$tfile-2 touch $DIR1/$tfile-3 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" link $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - mv $DIR2/$tfile-3 $DIR2/$tfile || error "rename must succeed" + PID1=$! ; pdo_sched + mv $DIR2/$tfile-3 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "rename must succeed" rm -rf $DIR/$tfile* return 0 } run_test 46e "pdirops: link and rename (tgt) ==============" test_46f() { + pdo_lru_clear touch $DIR1/$tfile-2 touch $DIR1/$tfile-3 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" link $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - mv $DIR2/$tfile $DIR2/$tfile-3 || error "rename must succeed" + PID1=$! ; pdo_sched + mv $DIR2/$tfile $DIR2/$tfile-3 & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "rename must succeed" rm -rf $DIR/$tfile* return 0 } run_test 46f "pdirops: link and rename (src) ==============" test_46g() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" link $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - stat $DIR2/$tfile > /dev/null || error "stat must succeed" + PID1=$! ; pdo_sched + stat $DIR2/$tfile > /dev/null & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "getattr isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "stat must succeed" rm -rf $DIR/$tfile* return 0 } run_test 46g "pdirops: link vs getattr ==============" test_46h() { + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" link $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - ls -lia $DIR2/ > /dev/null - check_pdo_conflict $PID1 && { wait $PID1; - error "readdir isn't blocked"; } + PID1=$! ; pdo_sched + ls -lia $DIR2/ > /dev/null & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" + check_pdo_conflict $PID1 && { wait $PID1; error "readdir isn't blocked"; } + wait $PID2 rm -rf $DIR/$tfile* return 0 } @@ -2335,15 +3267,20 @@ run_test 46h "pdirops: link vs readdir ==============" test_46i() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" link $DIR1/$tfile-2 $DIR1/$tfile & - PID1=$! - sleep 1 - $LFS mkdir -i 1 $DIR2/$tfile && error "remote mkdir must fail" + PID1=$! ; pdo_sched + $LFS mkdir -i 1 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "remote mkdir isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "remote mkdir must fail" rm -rf $DIR/$tfile* return 0 } @@ -2351,29 +3288,40 @@ run_test 46i "pdirops: link vs remote mkdir" # test 47: remote mkdir and blocking operations test_47a() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $LFS mkdir -i 1 $DIR1/$tfile & - PID1=$! - sleep 1 - mkdir $DIR2/$tfile && error "mkdir must fail" + PID1=$! ; pdo_sched + mkdir $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "mkdir isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "mkdir must fail" rm -rf $DIR/$tfile* return 0 } run_test 47a "pdirops: remote mkdir vs mkdir" test_47b() { -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $LFS mkdir -i 1 $DIR1/$tfile & - PID1=$! - sleep 1 - multiop $DIR2/$tfile oO_CREAT:O_EXCL:c && error "create must fail" + PID1=$! ; pdo_sched + sleep 1 # please do not remove this sleep, see LU-10754 + multiop $DIR2/$tfile oO_CREAT:O_EXCL:c & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "create isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "create must fail" rm -rf $DIR/$tfile* return 0 } @@ -2381,14 +3329,19 @@ run_test 47b "pdirops: remote mkdir vs create" test_47c() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $LFS mkdir -i 1 $DIR1/$tfile & - PID1=$! - sleep 1 - link $DIR2/$tfile-2 $DIR2/$tfile && error "link must fail" + PID1=$! ; pdo_sched + link $DIR2/$tfile-2 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "link isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "link must fail" rm -rf $DIR/$tfile* return 0 } @@ -2396,14 +3349,19 @@ run_test 47c "pdirops: remote mkdir vs link" test_47d() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $LFS mkdir -i 1 $DIR1/$tfile & - PID1=$! - sleep 1 - rmdir $DIR2/$tfile || error "unlink must succeed" + PID1=$! ; pdo_sched + rmdir $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "unlink isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "rmdir must succeed" rm -rf $DIR/$tfile* return 0 } @@ -2411,15 +3369,20 @@ run_test 47d "pdirops: remote mkdir vs unlink" test_47e() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + pdo_lru_clear touch $DIR1/$tfile-2 -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $LFS mkdir -i 1 $DIR1/$tfile & - PID1=$! - sleep 1 - mv -T $DIR2/$tfile-2 $DIR2/$tfile && error "rename must fail" + PID1=$! ; pdo_sched + mv -T $DIR2/$tfile-2 $DIR2/$tfile & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -ne 0 ] || error "rename must fail" rm -rf $DIR/$tfile* return 0 } @@ -2427,14 +3390,19 @@ run_test 47e "pdirops: remote mkdir and rename (tgt)" test_47f() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $LFS mkdir -i 1 $DIR1/$tfile & - PID1=$! - sleep 1 - mv $DIR2/$tfile $DIR2/$tfile-2 || error "rename must succeed" + PID1=$! ; pdo_sched + mv $DIR2/$tfile $DIR2/$tfile-2 & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "rename isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "rename must succeed" rm -rf $DIR/$tfile* return 0 } @@ -2442,14 +3410,21 @@ run_test 47f "pdirops: remote mkdir and rename (src)" test_47g() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 + sync + sync_all_data + pdo_lru_clear +#define CFS_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" $LFS mkdir -i 1 $DIR1/$tfile & - PID1=$! - sleep 1 - stat $DIR2/$tfile > /dev/null || error "stat must succeed" + PID1=$! ; pdo_sched + stat $DIR2/$tfile > /dev/null & + PID2=$! ; pdo_sched + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "getattr isn't blocked"; } + wait $PID2 ; [ $? -eq 0 ] || error "stat must succeed" rm -rf $DIR/$tfile* return 0 } @@ -2502,29 +3477,32 @@ test_51a() { run_test 51a "layout lock: refresh layout should work" test_51b() { - [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.3.59) ]] || - { skip "Need MDS version at least 2.3.59"; return 0; } + (( $MDS1_VERSION >= $(version_code 2.3.59) )) || + skip "Need MDS version at least 2.3.59" local tmpfile=`mktemp` - # create an empty file - $MCREATE $DIR1/$tfile || error "mcreate $DIR1/$tfile failed" + $LFS setstripe -E 1m -S 1M -c 1 -E -1 -c 1 $DIR1/$tfile || + error "Create $DIR1/$tfile failed" + + dd if=/dev/zero of=$DIR1/$tfile bs=1k count=1 conv=notrunc || + error "dd $DIR1/$tfile failed" # delay glimpse so that layout has changed when glimpse finish #define OBD_FAIL_GLIMPSE_DELAY 0x1404 - $LCTL set_param fail_loc=0x1404 + $LCTL set_param fail_loc=0x1404 fail_val=4 stat -c %s $DIR2/$tfile |tee $tmpfile & local pid=$! - sleep 1 + sleep 0.2 - # create layout of testing file - dd if=/dev/zero of=$DIR1/$tfile bs=1k count=1 conv=notrunc >/dev/null || + # extend layout of testing file + dd if=/dev/zero of=$DIR1/$tfile bs=1M count=1 seek=2 conv=notrunc || error "dd $DIR1/$tfile failed" wait $pid local fsize=$(cat $tmpfile) - [ x$fsize = x1024 ] || error "file size is $fsize, should be 1024" + [ x$fsize = x3145728 ] || error "file size is $fsize, should be 3145728" rm -f $DIR1/$tfile $tmpfile } @@ -2591,6 +3569,30 @@ test_51d() { } run_test 51d "layout lock: losing layout lock should clean up memory map region" +test_51e() { + (( $MDS1_VERSION >= $(version_code 2.13.54.148) )) || + skip "MDS version must be at least 2.13.54.148" + + local pid + + $MULTIOP $DIR/$tfile oO_CREAT:O_RDWR:eW_E+eUc & + pid=$! + sleep 1 + + $LFS getstripe $DIR2/$tfile + kill -USR1 $pid + wait $pid || error "multiop failed" + + $MULTIOP $DIR/$tfile oO_RDONLY:eR_E+eUc & + pid=$! + sleep 1 + + $LFS getstripe $DIR2/$tfile + kill -USR1 $pid + wait $pid || error "multiop failed" +} +run_test 51e "lfs getstripe does not break leases, part 2" + test_54_part1() { echo "==> rename vs getattr vs setxattr should not deadlock" @@ -2660,56 +3662,59 @@ test_54() { run_test 54 "rename locking" test_55a() { - mkdir -p $DIR/d1/d2 $DIR/d3 || error "(1) mkdir failed" + mkdir_on_mdt0 $DIR/$tdir + mkdir -p $DIR/$tdir/d1/d2 $DIR/$tdir/d3 || error "(1) mkdir failed" #define OBD_FAIL_MDS_RENAME4 0x156 do_facet mds1 $LCTL set_param fail_loc=0x80000156 - mv -T $DIR/d1/d2 $DIR/d3/d2 & + mv -T $DIR/$tdir/d1/d2 $DIR/$tdir/d3/d2 & PID1=$! sleep 1 - rm -r $DIR2/d3 + rm -r $DIR2/$tdir/d3 wait $PID1 && error "(2) mv succeeded" - rm -rf $DIR/d1 + rm -rf $DIR/$tdir } run_test 55a "rename vs unlink target dir" test_55b() { - mkdir -p $DIR/d1/d2 $DIR/d3 || error "(1) mkdir failed" + mkdir_on_mdt0 $DIR/$tdir + mkdir -p $DIR/$tdir/d1/d2 $DIR/$tdir/d3 || error "(1) mkdir failed" #define OBD_FAIL_MDS_RENAME4 0x156 do_facet mds1 $LCTL set_param fail_loc=0x80000156 - mv -T $DIR/d1/d2 $DIR/d3/d2 & + mv -T $DIR/$tdir/d1/d2 $DIR/$tdir/d3/d2 & PID1=$! sleep 1 - rm -r $DIR2/d1 + rm -r $DIR2/$tdir/d1 wait $PID1 && error "(2) mv succeeded" - rm -rf $DIR/d3 + rm -rf $DIR/$tdir } run_test 55b "rename vs unlink source dir" test_55c() { - mkdir -p $DIR/d1/d2 $DIR/d3 || error "(1) mkdir failed" + mkdir_on_mdt0 $DIR/$tdir + mkdir -p $DIR/$tdir/d1/d2 $DIR/$tdir/d3 || error "(1) mkdir failed" #define OBD_FAIL_MDS_RENAME4 0x156 do_facet mds1 $LCTL set_param fail_loc=0x156 - mv -T $DIR/d1/d2 $DIR/d3/d2 & + mv -T $DIR/$tdir/d1/d2 $DIR/$tdir/d3/d2 & PID1=$! sleep 1 # while rename is sleeping, open and remove d3 - $MULTIOP $DIR2/d3 D_c & + $MULTIOP $DIR2/$tdir/d3 D_c & PID2=$! sleep 1 - rm -rf $DIR2/d3 + rm -rf $DIR2/$tdir/d3 sleep 5 # while rename is sleeping 2nd time, close d3 @@ -2718,35 +3723,54 @@ test_55c() wait $PID1 && error "(2) mv succeeded" - rm -rf $DIR/d1 + rm -rf $DIR/$tdir } run_test 55c "rename vs unlink orphan target dir" test_55d() { - touch $DIR/f1 + mkdir_on_mdt0 $DIR/$tdir + + touch $DIR/$tdir/f1 #define OBD_FAIL_MDS_RENAME3 0x155 do_facet mds1 $LCTL set_param fail_loc=0x155 - mv $DIR/f1 $DIR/$tdir & + mv $DIR/$tdir/f1 $DIR/$tdir/$tdir & PID1=$! sleep 2 # while rename is sleeping, create $tdir, but as a directory - mkdir -p $DIR2/$tdir || error "(1) mkdir failed" + mkdir -p $DIR2/$tdir/$tdir || error "(1) mkdir failed" # link in reverse locking order - ln $DIR2/f1 $DIR2/$tdir/ + ln $DIR2/$tdir/f1 $DIR2/$tdir/$tdir/f1 || error "(2) ln failed" - wait $PID1 && error "(2) mv succeeded" - rm -rf $DIR/f1 + ! wait $PID1 || error "(3) mv succeeded" + rm -rf $DIR/$tdir } run_test 55d "rename file vs link" +test_56a() { + $LFS setstripe -c 1 $MOUNT/$tfile || error "creating $MOUNT/$tfile" + stack_trap "rm -f $MOUNT/$tfile" + echo "run_llverdev $MOUNT/$tfile -p -s $((16000000)) -c 4k" + run_llverdev $MOUNT/$tfile -p -s $((16000000)) -c 4k || + error "llverdev failed with rc=$?" +} +run_test 56a "test llverdev with single large stripe" + +test_56b() { + $LFS setstripe -C 2000 $MOUNT/$tfile || error "creating $MOUNT/$tfile" + stack_trap "rm -f $MOUNT/$tfile" + echo "run_llverdev $MOUNT/$tfile -p -s $((16000000 * OSTCOUNT)) -c 4k" + run_llverdev $MOUNT/$tfile -p -s $((16000000 * OSTCOUNT)) -c 4k || + error "llverdev failed with rc=$?" +} +run_test 56b "test llverdev and partial verify of wide stripe file" + test_60() { - local MDSVER=$(lustre_build_version $SINGLEMDS) - [ $(version_code $MDSVER) -lt $(version_code 2.3.0) ] && - skip "MDS version $MDSVER must be >= 2.3.0" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.3.0) ] && + skip "MDS version must be >= 2.3.0" # Create a file test_mkdir $DIR1/$tdir @@ -2832,42 +3856,39 @@ test_70b() { # LU-2781 run_test 70b "remove files after calling rm_entry" test_71a() { - local server_version=$(lustre_version_code $SINGLEMDS) - - [[ $server_version -lt $(version_code 2.1.6) ]] && - skip "Need MDS version at least 2.1.6" && return + [[ "$MDS1_VERSION" -lt $(version_code 2.1.6) ]] && + skip "Need MDS version at least 2.1.6" # Patch not applied to 2.2 and 2.3 branches - [[ $server_version -ge $(version_code 2.2.0) ]] && - [[ $server_version -lt $(version_code 2.4.0) ]] && - skip "Need MDS version earlier than 2.2.0 or at least 2.4.0" && - return + [[ "$MDS1_VERSION" -ge $(version_code 2.2.0) ]] && + [[ "$MDS1_VERSION" -lt $(version_code 2.4.0) ]] && + skip "Need MDS version earlier than 2.2.0 or at least 2.4.0" checkfiemap --test || - { skip "checkfiemap not runnable: $?" && return; } + skip "checkfiemap not runnable: $?" # write data this way: hole - data - hole - data - dd if=/dev/urandom of=$DIR1/$tfile bs=40K seek=1 count=1 - [ "$(facet_fstype ost$(($($GETSTRIPE -i $DIR1/$tfile) + 1)))" = \ + dd if=/dev/urandom of=$DIR1/$tfile bs=64K seek=1 count=1 + [ "$(facet_fstype ost$(($($LFS getstripe -i $DIR1/$tfile) + 1)))" = \ "zfs" ] && skip "ORI-366/LU-1941: FIEMAP unimplemented on ZFS" && return 0 - dd if=/dev/urandom of=$DIR1/$tfile bs=40K seek=3 count=1 + dd if=/dev/urandom of=$DIR1/$tfile bs=64K seek=3 count=1 GET_STAT="lctl get_param -n ldlm.services.ldlm_cbd.stats" stat $DIR2/$tfile local can1=$($GET_STAT | awk '/ldlm_bl_callback/ {print $2}') echo $can1 - checkfiemap $DIR2/$tfile 81920 || + checkfiemap $DIR2/$tfile 131072 || error "data is not flushed from client" local can2=$($GET_STAT | awk '/ldlm_bl_callback/ {print $2}') echo $can2 # common case of "create file, copy file" on a single node # should not flush data from ost - dd if=/dev/urandom of=$DIR1/$tfile bs=40K seek=1 count=1 - dd if=/dev/urandom of=$DIR1/$tfile bs=40K seek=3 count=1 + dd if=/dev/urandom of=$DIR1/$tfile bs=64K seek=1 count=1 + dd if=/dev/urandom of=$DIR1/$tfile bs=64K seek=3 count=1 stat $DIR1/$tfile local can3=$($GET_STAT | awk '/ldlm_bl_callback/ {print $2}') echo $can3 - checkfiemap $DIR1/$tfile 81920 || + checkfiemap $DIR1/$tfile 131072 || error 4 local can4=$($GET_STAT | awk '/ldlm_bl_callback/ {print $2}') echo $can2 @@ -2876,32 +3897,91 @@ test_71a() { run_test 71a "correct file map just after write operation is finished" test_71b() { - local server_version=$(lustre_version_code $SINGLEMDS) - - [[ $server_version -lt $(version_code 2.1.6) ]] && - skip "Need MDS version at least 2.1.6" && return + [[ "$MDS1_VERSION" -lt $(version_code 2.1.6) ]] && + skip "Need MDS version at least 2.1.6" # Patch not applied to 2.2 and 2.3 branches - [[ $server_version -ge $(version_code 2.2.0) ]] && - [[ $server_version -lt $(version_code 2.4.0) ]] && - skip "Need MDS version earlier than 2.2.0 or at least 2.4.0" && - return - [[ $OSTCOUNT -ge 2 ]] || { skip "needs >= 2 OSTs"; return; } + [[ "$MDS1_VERSION" -ge $(version_code 2.2.0) ]] && + [[ "$MDS1_VERSION" -lt $(version_code 2.4.0) ]] && + skip "Need MDS version earlier than 2.2.0 or at least 2.4.0" + [[ $OSTCOUNT -ge 2 ]] || skip "needs >= 2 OSTs" checkfiemap --test || - { skip "error $?: checkfiemap failed" && return; } + skip "error $?: checkfiemap failed" mkdir -p $DIR1/$tdir $LFS setstripe -c -1 $DIR1/$tdir || error "setstripe failed" - dd if=/dev/urandom of=$DIR1/$tdir/$tfile bs=40K count=1 - [ "$(facet_fstype ost$(($($GETSTRIPE -i $DIR1/$tdir/$tfile) + 1)))" = \ + dd if=/dev/urandom of=$DIR1/$tdir/$tfile bs=64K count=1 + [ "$(facet_fstype ost$(($($LFS getstripe -i $DIR1/$tdir/$tfile) + 1)))" = \ "zfs" ] && skip "ORI-366/LU-1941: FIEMAP unimplemented on ZFS" && return 0 - checkfiemap $DIR1/$tdir/$tfile 40960 || error "checkfiemap failed" + checkfiemap $DIR1/$tdir/$tfile 65536 || error "checkfiemap failed" } run_test 71b "check fiemap support for stripecount > 1" +_check_last_flag_with_filefrag() +{ + local file=$1 + local count=$2 + local i + local offset + + echo "check last flag for file with $count extents" + rm -f $file + for ((i=0; i<$count; i++)); do + offset=$((i * 256)) + dd if=/dev/zero of=$file bs=4K count=1 seek=$offset 2> /dev/null + done + + filefrag -s -v $file | grep "last" || + error "test file with $i extents failed" + + rm -f $file +} + +test_71c() { + local file="$DIR1/$tdir/$tfile" + + (( $CLIENT_VERSION >= $(version_code 2.15.57) )) || + skip "Need client version >= 2.15.57" + [ $(facet_fstype ost1) = "ldiskfs" ] || + skip "support only ldiskfs ost" + filefrag -V | grep wc || + skip "need whamcloud version of e2fsprogs" + + mkdir -p $DIR1/$tdir + + # filefrag uses u64[2028] buffer to fetch fiemap. The number of extents + # in the buffer is (8 * 2048 - 32) / 56 = 292. Test file with 291, 292 + # and 293 extents + _check_last_flag_with_filefrag $file 291 + _check_last_flag_with_filefrag $file 292 + _check_last_flag_with_filefrag $file 293 +} +run_test 71c "check FIEMAP_EXTENT_LAST flag with different extents number" + +test_71d() { #LU-17110 + checkfiemap --test || + skip "error $?: checkfiemap failed" + + local f=$DIR/$tfile + + # write data this way: hole - data - hole - data + dd if=/dev/urandom of=$f bs=64K count=1 + [[ "$(facet_fstype ost$(($($LFS getstripe -i $f) + 1)))" != "zfs" ]] || + skip "ORI-366/LU-1941: FIEMAP unimplemented on ZFS" + dd if=/dev/urandom of=$f bs=64K seek=2 count=1 + dd if=/dev/urandom of=$f bs=64K seek=4 count=1 + dd if=/dev/urandom of=$f bs=64K seek=6 count=1 conv=fsync + echo "disk usage: $(du -B1 $f)" + echo "file size: $(du -b $f)" + + checkfiemap --corruption_test $f $((4 * 64 *1024)) || + error "checkfiemap failed" +} +run_test 71d "fiemap corruption test with fm_extent_count=0" + test_72() { local p="$TMP/sanityN-$TESTNAME.parameters" local tlink1 @@ -2948,12 +4028,15 @@ test_73() { clear_stats llite.*.stats # PR lock should be cached by now on both clients getfattr -n user.attr1 $DIR1/$tfile || error "getfattr3 failed" - # 2 hits for getfattr(0)+getfattr(size) - [ $(calc_stats llite.*.stats getxattr_hits) -eq 2 ] || + # At least 2 hits for getfattr(0)+getfattr(size) + # There may be more if auditd has a filesystem-related rule enabled + (( $(calc_stats llite.*.stats getxattr_hits) >= 2 )) || error "not cached in $DIR1" + clear_stats llite.*.stats getfattr -n user.attr1 $DIR2/$tfile || error "getfattr4 failed" - # 4 hits for more getfattr(0)+getfattr(size) - [ $(calc_stats llite.*.stats getxattr_hits) -eq 4 ] || + # At least 2 hits for getfattr(0)+getfattr(size) + # There may be more if auditd has a filesystem-related rule enabled + (( $(calc_stats llite.*.stats getxattr_hits) >= 2 )) || error "not cached in $DIR2" rm -f $DIR2/$tfile @@ -2963,8 +4046,8 @@ test_73() { run_test 73 "getxattr should not cause xattr lock cancellation" test_74() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.93) ] && - skip "Need MDS version at least 2.4.93" && return + [ "$MDS1_VERSION" -lt $(version_code 2.4.93) ] && + skip "Need MDS version at least 2.4.93" dd if=/dev/zero of=$DIR1/$tfile-1 bs=1K count=1 dd if=/dev/zero of=$DIR1/$tfile-2 bs=1K count=1 @@ -2995,10 +4078,10 @@ test_75() { run_test 75 "osc: upcall after unuse lock===================" test_76() { #LU-946 - [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.5.53) ]] && - skip "Need MDS version at least 2.5.53" && return + [[ "$MDS1_VERSION" -lt $(version_code 2.5.53) ]] && + skip "Need MDS version at least 2.5.53" - remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_mds_nodsh && skip "remote MDS with nodsh" local fcount=$((MDSCOUNT * 256)) declare -a fd_list declare -a fid_list @@ -3082,37 +4165,63 @@ nrs_write_read() { local n=16 local dir=$DIR/$tdir local myRUNAS="$1" + local create_as="$2" mkdir $dir || error "mkdir $dir failed" $LFS setstripe -c $OSTCOUNT $dir || error "setstripe to $dir failed" chmod 777 $dir + if [[ -n "$create_as" ]]; then + do_nodes $CLIENTS $create_as "touch $dir/nrs_r_\$HOSTNAME;" || + error "touch failed for $dir/nrs_r_*" + do_nodes $CLIENTS $create_as touch "$dir/nrs_w_\$HOSTNAME" || + error "touch failed for $dir/nrs_w_*" + do_nodes $CLIENTS $create_as "chmod 777 $dir/nrs_*_\$HOSTNAME;" || + error "chmod failed for $dir/nrs_*" + fi + do_nodes $CLIENTS $myRUNAS \ - dd if=/dev/zero of="$dir/nrs_r_$HOSTNAME" bs=1M count=$n || + dd if=/dev/zero of="$dir/nrs_r_\$HOSTNAME" bs=1M count=$n || error "dd at 0 on client failed (1)" - for ((i = 0; i < $n; i++)); do - do_nodes $CLIENTS $myRUNAS dd if=/dev/zero \ - of="$dir/nrs_w_$HOSTNAME" bs=1M seek=$i count=1 || - error "dd at ${i}MB on client failed (2)" & - local pids_w[$i]=$! - done + do_nodes $CLIENTS $myRUNAS \ + "declare -a pids_w; + for ((i = 0; i < $n; i++)); do + dd if=/dev/zero of=$dir/nrs_w_\$HOSTNAME bs=1M \ +seek=\\\$i count=1 conv=notrunc & + pids_w[\\\$i]=\\\$!; + done; + rc_w=0; + for ((i = 0; i < $n; i++)); do + wait \\\${pids_w[\\\$i]}; + newrc=\\\$?; + [ \\\$newrc -gt \\\$rc_w ] && rc_w=\\\$newrc; + done; + exit \\\$rc_w" & + local pid_w=$! do_nodes $CLIENTS sync; cancel_lru_locks osc - for ((i = 0; i < $n; i++)); do - do_nodes $CLIENTS $myRUNAS dd if="$dir/nrs_w_$HOSTNAME" \ - of=/dev/zero bs=1M seek=$i count=1 > /dev/null || - error "dd at ${i}MB on client failed (3)" & - local pids_r[$i]=$! - done + do_nodes $CLIENTS $myRUNAS \ + "declare -a pids_r; + for ((i = 0; i < $n; i++)); do + dd if=$dir/nrs_r_\$HOSTNAME bs=1M of=/dev/null \ +seek=\\\$i count=1 & + pids_r[\\\$i]=\\\$!; + done; + rc_r=0; + for ((i = 0; i < $n; i++)); do + wait \\\${pids_r[\\\$i]}; + newrc=\\\$?; + [ \\\$newrc -gt \\\$rc_r ] && rc_r=\\\$newrc; + done; + exit \\\$rc_r" & + local pid_r=$! cancel_lru_locks osc - for ((i = 0; i < $n; i++)); do - wait ${pids_w[$i]} - wait ${pids_r[$i]} - done - rm -rf $dir || error "rm -rf $dir failed" + wait $pid_w || error "dd (write) failed (2)" + wait $pid_r || error "dd (read) failed (3)" + rm -rvf $dir || error "rm -rf $dir failed" } test_77a() { #LU-3266 @@ -3236,6 +4345,7 @@ tbf_verify() { local dir=$DIR/$tdir local client1=${CLIENT1:-$(hostname)} local myRUNAS="$3" + local create_as="$4" local np=$(check_cpt_number ost1) [ $np -gt 0 ] || error "CPU partitions should not be $np." @@ -3245,6 +4355,11 @@ tbf_verify() { $LFS setstripe -c 1 -i 0 $dir || error "setstripe to $dir failed" chmod 777 $dir + if [[ -n "$create_as" ]]; then + $create_as touch $dir/tbf + chmod 777 $dir/tbf + fi + trap cleanup_tbf_verify EXIT echo "Limited write rate: $1, read rate: $2" echo "Verify the write rate is under TBF control" @@ -3289,7 +4404,7 @@ test_77e() { local idis local rateis - if [ $(lustre_version_code ost1) -ge $(version_code 2.8.54) ]; then + if [ "$OST1_VERSION" -ge $(version_code 2.8.54) ]; then idis="nid=" rateis="rate=" fi @@ -3339,14 +4454,13 @@ test_77f() { [[ $rc -eq 3 ]] && skip "jobid_var not found" && return [[ $rc -ne 0 ]] && error "failed to get param jobid_var" if [ $saved_jobid_var != procname_uid ]; then - set_conf_param_and_check client \ - "$LCTL get_param -n jobid_var" \ - "$FSNAME.sys.jobid_var" procname_uid + set_persistent_param_and_check client \ + "jobid_var" "$FSNAME.sys.jobid_var" procname_uid fi local idis local rateis - if [ $(lustre_version_code ost1) -ge $(version_code 2.8.54) ]; then + if [ "$OST1_VERSION" -ge $(version_code 2.8.54) ]; then idis="jobid=" rateis="rate=" fi @@ -3378,9 +4492,8 @@ test_77f() { local current_jobid_var=$($LCTL get_param -n jobid_var) [[ $? -ne 0 ]] && error "failed to get param jobid_var" if [ $saved_jobid_var != $current_jobid_var ]; then - set_conf_param_and_check client \ - "$LCTL get_param -n jobid_var" \ - "$FSNAME.sys.jobid_var" $saved_jobid_var + set_persistent_param_and_check client \ + "jobid_var" "$FSNAME.sys.jobid_var" $saved_jobid_var fi return 0 } @@ -3402,7 +4515,7 @@ test_77g() { local idis local rateis - if [ $(lustre_version_code ost1) -ge $(version_code 2.8.54) ]; then + if [ "$OST1_VERSION" -ge $(version_code 2.8.54) ]; then idis="jobid=" rateis="rate=" fi @@ -3419,8 +4532,8 @@ test_77g() { run_test 77g "Change TBF type directly" test_77h() { - [ $(lustre_version_code ost1) -ge $(version_code 2.8.55) ] || - { skip "Need OST version at least 2.8.55"; return 0; } + [ "$OST1_VERSION" -ge $(version_code 2.8.55) ] || + skip "Need OST version at least 2.8.55" local old_policy=$(do_facet ost1 \ lctl get_param ost.OSS.ost_io.nrs_policies) @@ -3474,8 +4587,8 @@ tbf_rule_check() } test_77i() { - [ $(lustre_version_code ost1) -ge $(version_code 2.8.55) ] || - { skip "Need OST version at least 2.8.55"; return 0; } + [ "$OST1_VERSION" -ge $(version_code 2.8.55) ] || + skip "Need OST version at least 2.8.55" for i in $(seq 1 $OSTCOUNT) do @@ -3529,11 +4642,10 @@ run_test 77i "Change rank of TBF rule" test_77j() { local idis local rateis - local ost_version=$(lustre_version_code ost1) - [ $ost_version -ge $(version_code 2.9.53) ] || - { skip "Need OST version at least 2.9.53"; return 0; } - if [ $ost_version -ge $(version_code 2.8.60) ]; then + [ "$OST1_VERSION" -ge $(version_code 2.9.53) ] || + skip "Need OST version at least 2.9.53" + if [ "$OST1_VERSION" -ge $(version_code 2.8.60) ]; then idis="opcode=" rateis="rate=" fi @@ -3564,6 +4676,8 @@ test_id() { local idstr="${1}id" local policy="${idstr}={$2}" local rate="rate=$3" + local runas_args="$4" + local createas_args="${5:-$runas_args}" do_nodes $(comma_list $(osts_nodes)) \ lctl set_param jobid_var=procname_uid \ @@ -3571,8 +4685,8 @@ test_id() { ost.OSS.ost_io.nrs_tbf_rule="start\ ost_${idstr}\ ${policy}\ ${rate}" [ $? -ne 0 ] && error "failed to set tbf ${idstr} policy" - nrs_write_read "runas $4" - tbf_verify $3 $3 "runas $4" + nrs_write_read "runas $runas_args" "runas $createas_args" + tbf_verify $3 $3 "runas $runas_args" "runas $createas_args" do_nodes $(comma_list $(osts_nodes)) \ lctl set_param ost.OSS.ost_io.nrs_tbf_rule="stop\ ost_${idstr}" \ @@ -3585,15 +4699,24 @@ test_id() { } test_77ja(){ - if [ $(lustre_version_code ost1) -lt $(version_code 2.11.50) ]; then + if [ "$OST1_VERSION" -lt $(version_code 2.11.50) ]; then skip "Need OST version at least 2.11.50" - return 0 fi - test_id "u" "500" "5" "-u 500" - test_id "g" "500" "5" "-u 500 -g 500" + + test_id "u" "$RUNAS_ID" "5" "-u $RUNAS_ID" + test_id "g" "$RUNAS_GID" "5" "-u $RUNAS_ID -g $RUNAS_GID" } run_test 77ja "check TBF-UID/GID NRS policy" +test_77jb() { # LU-16077 + (( "$OST1_VERSION" >= $(version_code 2.15.51) )) || + skip "Need OST version at least 2.15.51" + + test_id "u" "$RUNAS_ID" "5" "-u $RUNAS_ID" "-u 0 -g $RUNAS_GID" + test_id "g" "$RUNAS_GID" "5" "-u $RUNAS_ID -g $RUNAS_GID" "-u $RUNAS_ID -g 0" +} +run_test 77jb "check TBF-UID/GID NRS policy on files that don't belong to us" + cleanup_77k() { local rule_lists=$1 @@ -3612,8 +4735,8 @@ cleanup_77k() } test_77k() { - [[ $(lustre_version_code ost1) -ge $(version_code 2.9.53) ]] || - { skip "Need OST version at least 2.9.53"; return 0; } + [[ "$OST1_VERSION" -ge $(version_code 2.9.53) ]] || + skip "Need OST version at least 2.9.53" do_nodes $(comma_list $(osts_nodes)) \ lctl set_param ost.OSS.ost_io.nrs_policies="tbf" \ @@ -3655,39 +4778,39 @@ test_77k() { trap "cleanup_77k \"ext_a ext_b\" \"fifo\"" EXIT - [[ $(lustre_version_code ost1) -ge $(version_code 2.10.58) ]] || - { skip "Need OST version at least 2.10.58"; return 0; } + [[ "$OST1_VERSION" -ge $(version_code 2.10.58) ]] || + skip "Need OST version at least 2.10.58" do_nodes $(comma_list $(osts_nodes)) \ lctl set_param ost.OSS.ost_io.nrs_tbf_rule="stop\ ext_a" \ ost.OSS.ost_io.nrs_tbf_rule="stop\ ext_b" \ - ost.OSS.ost_io.nrs_tbf_rule="start\ ext_ug\ uid={500}\&gid={1000}\ rate=5" - nrs_write_read "runas -u 500 -g 1000" - tbf_verify 5 5 "runas -u 500 -g 1000" + ost.OSS.ost_io.nrs_tbf_rule="start\ ext_ug\ uid={$RUNAS_ID}\&gid={$RUNAS_GID}\ rate=5" + nrs_write_read "runas -u $RUNAS_ID -g $RUNAS_GID" + tbf_verify 5 5 "runas -u $RUNAS_ID -g $RUNAS_GID" do_nodes $(comma_list $(osts_nodes)) \ lctl set_param ost.OSS.ost_io.nrs_tbf_rule="stop\ ext_ug" \ - ost.OSS.ost_io.nrs_tbf_rule="start\ ext_uw\ uid={500}\&opcode={ost_write}\ rate=20" \ - ost.OSS.ost_io.nrs_tbf_rule="start\ ext_ur\ uid={500}\&opcode={ost_read}\ rate=10" + ost.OSS.ost_io.nrs_tbf_rule="start\ ext_uw\ uid={$RUNAS_ID}\&opcode={ost_write}\ rate=20" \ + ost.OSS.ost_io.nrs_tbf_rule="start\ ext_ur\ uid={$RUNAS_ID}\&opcode={ost_read}\ rate=10" - nrs_write_read "runas -u 500" - tbf_verify 20 10 "runas -u 500" + nrs_write_read "runas -u $RUNAS_ID" + tbf_verify 20 10 "runas -u $RUNAS_ID" do_nodes $(comma_list $(osts_nodes)) \ lctl set_param ost.OSS.ost_io.nrs_tbf_rule="stop\ ext_uw" \ ost.OSS.ost_io.nrs_tbf_rule="stop\ ext_ur" \ - ost.OSS.ost_io.nrs_tbf_rule="start\ ext_a\ uid={500},opcode={ost_write}\ rate=20" \ - ost.OSS.ost_io.nrs_tbf_rule="start\ ext_b\ uid={500},opcode={ost_read}\ rate=10" - nrs_write_read "runas -u 500" - tbf_verify 10 10 "runas -u 500" - tbf_verify 20 10 "runas -u 500" + ost.OSS.ost_io.nrs_tbf_rule="start\ ext_a\ uid={$RUNAS_ID},opcode={ost_write}\ rate=20" \ + ost.OSS.ost_io.nrs_tbf_rule="start\ ext_b\ uid={$RUNAS_ID},opcode={ost_read}\ rate=10" + nrs_write_read "runas -u $RUNAS_ID" + tbf_verify 10 10 "runas -u $RUNAS_ID" + tbf_verify 20 10 "runas -u $RUNAS_ID" cleanup_77k "ext_a ext_b" "fifo" } -run_test 77k "check TBF policy with NID/JobID/OPCode expression" +run_test 77k "check TBF policy with UID/GID/JobID/OPCode expression" test_77l() { - [[ $(lustre_version_code ost1) -ge $(version_code 2.10.56) ]] || - { skip "Need OST version at least 2.10.56"; return 0; } + [[ "$OST1_VERSION" -ge $(version_code 2.10.56) ]] || + skip "Need OST version at least 2.10.56" do_facet ost1 lctl set_param ost.OSS.ost_io.nrs_policies="tbf\ nid" do_facet ost1 lctl set_param ost.OSS.ost_io.nrs_policies="tbf" @@ -3706,9 +4829,8 @@ test_77l() { run_test 77l "check the output of NRS policies for generic TBF" test_77m() { - if [ $(lustre_version_code ost1) -lt $(version_code 2.9.54) ]; then + if [ "$OST1_VERSION" -lt $(version_code 2.9.54) ]; then skip "Need OST version at least 2.9.54" - return 0 fi local dir=$DIR/$tdir @@ -3758,17 +4880,15 @@ test_77m() { run_test 77m "check NRS Delay slows write RPC processing" test_77n() { #LU-10802 - if [ $(lustre_version_code ost1) -lt $(version_code 2.10.58) ]; then + if [ "$OST1_VERSION" -lt $(version_code 2.10.58) ]; then skip "Need OST version at least 2.10.58" - return 0 fi # Configure jobid_var local saved_jobid_var=$($LCTL get_param -n jobid_var) if [ $saved_jobid_var != procname_uid ]; then - set_conf_param_and_check client \ - "$LCTL get_param -n jobid_var" \ - "$FSNAME.sys.jobid_var" procname_uid + set_persistent_param_and_check client \ + "jobid_var" "$FSNAME.sys.jobid_var" procname_uid fi do_nodes $(comma_list $(osts_nodes)) \ @@ -3794,13 +4914,209 @@ test_77n() { #LU-10802 local current_jobid_var=$($LCTL get_param -n jobid_var) if [ $saved_jobid_var != $current_jobid_var ]; then - set_conf_param_and_check client \ - "$LCTL get_param -n jobid_var" \ - "$FSNAME.sys.jobid_var" $saved_jobid_var + set_persistent_param_and_check client \ + "jobid_var" "$FSNAME.sys.jobid_var" $saved_jobid_var fi } run_test 77n "check wildcard support for TBF JobID NRS policy" +test_77o() { + (( $OST1_VERSION > $(version_code 2.14.54) )) || + skip "need OST > 2.14.54" + + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_policies="tbf\ nid" + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="start\ name\ nid={192.168.*.*@tcp}\ rate=10000" + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="start\ name1\ nid={192.168.*.*@tcp}\ rate=10000" + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="change\ name1\ rank=name" + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="stop\ name" + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_policies="fifo" +} +run_test 77o "Changing rank should not panic" + +test_77q() { + local i + local gidlist="500 10 33 100 " + local uidlist=" 500 11 3" + + (( $MDS1_VERSION > $(version_code 2.14.54) )) || + skip "need MDS >= 2.14.54" + + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_policies="tbf" + stack_trap "do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_policies=fifo" + + # require 44cc782/LU-9859 to support list with unexpected spaces + (( MDS1_VERSION >= $(version_code 2.15.57) )) || + gidlist=$(echo $gidlist) uidlist=$(echo $uidlist) + + for i in {1..50}; do + local pid1 pid2 + + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="'start rule77q_1 uid={$uidlist}&gid={$gidlist} rate=100'" & + pid1=$! + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="'start rule77q_2 uid={1000}&gid={1000} rate=100'" & + pid2=$! + wait $pid1 || error "$i: Fail to start TBF rule 'rule77q_1'" + wait $pid2 || error "$i: Fail to start TBF rule 'rule77q_2'" + + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="'stop rule77q_1'" & + pid1=$! + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="'stop rule77q_2'" & + pid2=$! + wait $pid1 || error "$i: Fail to stop TBF rule 'rule77q_1'" + wait $pid2 || error "$i: Fail to stop TBF rule 'rule77q_2'" + done +} +run_test 77q "Parallel TBF rule definitions should not panic" + +test_77p() { + local c + local -a spec_chars=( + '@' '.' '~' '#' '/' '^' '%' '*' ';' ',' '?' '<' '>' ':' + '+' '=' ')' '(' '{' '}' '|' '[' ']' '!' '&' '\$' '\`' '\\') + + (( $MDS1_VERSION > $(version_code 2.14.54) )) || + skip "need MDS >= 2.14.54" + + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_policies="tbf" + stack_trap "do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_policies=fifo" + + # TBF rule name size is 16 bytes + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="start\ test_77p_overflo\ uid={500}\ rate=500" && + error "The length of tbf rule name is not checked" || true + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="start\ \ uid={500}\ rate=500" && + error "The server should not accept empty tbf rule name" || true + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="start\ test_77p_empty" && + error "The server should not accept 'start ' without an expression" || true + + # Test with special chars + for c in "${spec_chars[@]}"; do + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="'start test77p${c}spec uid={500} rate=500'" && + error "Special char '${c}' should not be accepted in a tbf rule name" || true + done + +} +run_test 77p "Check validity of rule names for TBF policies" + +cleanup_77r() { + local pid=$1 + local saved_jobid=$2 + local current_jobid_var + + echo "cleanup 77r $pid" + + do_facet mds1 $LCTL set_param -n mds.MDS.mdt.nrs_policies=fifo + kill $pid || echo "fail to kill md thread" + + current_jobid_var=$($LCTL get_param -n jobid_var) + if [ $saved_jobid != $current_jobid_var ]; then + set_persistent_param_and_check client \ + "jobid_var" "$FSNAME.sys.jobid_var" $saved_jobid + fi + + sleep 2 + rm -rf $DIR1/$tdir +} + +md_thread_run="true" +md_thread_77r() { + local pid + + while $md_thread_run; do + printf '%s\n' {$DIR1,$DIR2}/$tdir/${tfile}-{01..20} | + xargs -P20 -I{} $RUNAS bash -c 'touch {}; rm -f {}' \ + &> /dev/null & pid=$! + trap "echo kill md_thread xargs; md_thread_run=false; kill $pid" INT TERM + wait $pid + done +} + +wait_policy_state() { + local state="$1" + local policy="$2" + local change_pid="$3" + local time + + for time in {1..60}; do + local nbr_started + + nbr_started=$(do_facet mds1 $LCTL get_param mds.MDS.mdt.nrs_policies | + egrep -A2 "name: ${policy}$" | grep -c "state: $state") + + [[ "$nbr_started" != 2 ]] || return 0 + sleep 1 + done + + [[ -z "$change_pid" ]] || kill $change_pid || true + return 1 +} + +test_77r() { #LU-14976 + + (( MDS1_VERSION > $(version_code 2.15.56) )) || + skip "need MDS >= 2.15.56 c098c095 change nrs policies at run time" + + local pid + local -A rules + local -a policies + local saved_jobid_var + + rules["tbf uid"]="start md_rule uid={$RUNAS_ID} rate=1" + rules["tbf gid"]="start md_rule gid={$RUNAS_GID} rate=1" + rules["tbf jobid"]="start md_rule jobid={*.$RUNAS_ID} rate=1" + rules["tbf"]="start md_rule uid={$RUNAS_ID} rate=1" + policies=( + "tbf uid" + "tbf gid" + "tbf jobid" + "tbf" + "fifo" + ) + + test_mkdir -i 0 -c 1 $DIR1/$tdir + chmod 777 $DIR1/$tdir + + # Configure jobid_var + saved_jobid_var=$($LCTL get_param -n jobid_var) + if [ $saved_jobid_var != procname_uid ]; then + set_persistent_param_and_check client \ + "jobid_var" "$FSNAME.sys.jobid_var" procname_uid + fi + + # start md thread + md_thread_77r & pid=$! + stack_trap "cleanup_77r $pid '$saved_jobid_var'" + + local policy + for policy in "${policies[@]}"; do + local change_pid + + # wait to queue requests + sleep 5 + + do_facet mds1 "$LCTL set_param mds.MDS.mdt.nrs_policies='$policy'" & + change_pid=$! + + wait_policy_state "started" "$policy" "$change_pid" || + error "timeout to start '$policy' policy" + + [[ -n "${rules[$policy]}" ]] || continue + + do_facet mds1 "$LCTL set_param mds.MDS.mdt.nrs_tbf_rule='${rules[$policy]}'" || + error "fail to set rule '${rules[$policy]}' to '$policy'" + done + + wait_policy_state "stopped" "tbf" || + error "fail to stop tbf policy" + + echo "check the number of requests in queue:" + local awkcmd='/name: / {last = $3} ' + awkcmd+='/queued: / {printf " %s: %d\n", last, $2;' + awkcmd+=' if (last == "tbf" && $2 > 0) exit 1;}' + do_facet mds1 $LCTL get_param mds.MDS.mdt.nrs_policies | awk "$awkcmd" || + error "request leak in tbf policies" +} +run_test 77r "Change type of tbf policy at run time" + test_78() { #LU-6673 local rc @@ -3859,6 +5175,7 @@ test_80a() { local file local pid + mkdir_on_mdt0 $DIR1/$tdir mkdir -p $DIR1/$tdir/dir createmany -o $DIR1/$tdir/dir/f 10 || error "create files under remote dir failed $i" @@ -3898,125 +5215,83 @@ cleanup_80b() { kill -9 $migrate_pid } +success_count=0 +failure_count=0 + +run_and_count() +{ + eval $@ &>/dev/null && success_count=$((success_count + 1)) || + failure_count=$((failure_count + 1)) +} + test_80b() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - local migrate_dir1=$DIR1/$tdir/migrate_dir - local migrate_dir2=$DIR2/$tdir/migrate_dir - local migrate_run=$LUSTRE/tests/migrate.sh + local migdir1=$DIR1/$tdir/migrate_dir + local migdir2=$DIR2/$tdir/migrate_dir local start_time - local end_time - local show_time=1 local mdt_idx local rc=0 local rc1=0 trap cleanup_80b EXIT - #prepare migrate directory - mkdir -p $migrate_dir1 + # prepare migrate directory + mkdir -p $migdir1 for F in {1,2,3,4,5}; do - echo "$F$F$F$F$F" > $migrate_dir1/file$F + echo "$F$F$F$F$F" > $migdir1/file$F echo "$F$F$F$F$F" > $DIR/$tdir/file$F done - #migrate the directories among MDTs + # migrate the directories among MDTs ( while true; do mdt_idx=$((RANDOM % MDSCOUNT)) - $LFS migrate -m $mdt_idx $migrate_dir1 &>/dev/null || - rc=$? - [ $rc -ne 0 -o $rc -ne 16 ] || break + # migrate may fail + $LFS migrate -m $mdt_idx $migdir1 &>/dev/null done ) & migrate_pid=$! echo "start migration thread $migrate_pid" - #Access the files at the same time - start_time=$(date +%s) - echo "accessing the migrating directory for 5 minutes..." - while true; do - ls $migrate_dir2 > /dev/null || { - echo "read dir fails" - break - } - diff -u $DIR2/$tdir/file1 $migrate_dir2/file1 || { - echo "access file1 fails" - break - } - - cat $migrate_dir2/file2 > $migrate_dir2/file3 || { - echo "access file2/3 fails" - break - } - - echo "aaaaa" > $migrate_dir2/file4 > /dev/null || { - echo "access file4 fails" - break - } - - stat $migrate_dir2/file5 > /dev/null || { - echo "stat file5 fails" - break - } - - touch $migrate_dir2/source_file > /dev/null || rc1=$? - [ $rc1 -ne 0 -o $rc1 -ne 1 ] || { - echo "touch file failed with $rc1" - break; - } - - if [ -e $migrate_dir2/source_file ]; then - ln $migrate_dir2/source_file $migrate_dir2/link_file \ - &>/dev/null || rc1=$? - if [ -e $migrate_dir2/link_file ]; then - rm -rf $migrate_dir2/link_file - fi - - mrename $migrate_dir2/source_file \ - $migrate_dir2/target_file &>/dev/null || rc1=$? - [ $rc1 -ne 0 -o $rc1 -ne 1 ] || { - echo "rename failed with $rc1" - break - } - - if [ -e $migrate_dir2/target_file ]; then - rm -rf $migrate_dir2/target_file &>/dev/null || - rc1=$? - else - rm -rf $migrate_dir2/source_file &>/dev/null || - rc1=$? - fi - [ $rc1 -ne 0 -o $rc1 -ne 1 ] || { - echo "unlink failed with $rc1" - break - } - fi - - end_time=$(date +%s) - duration=$((end_time - start_time)) - if [ $((duration % 10)) -eq 0 ]; then - if [ $show_time -eq 1 ]; then - echo "...$duration seconds" - show_time=0 - fi - else - show_time=1 + # access the files at the same time + start_time=$SECONDS + echo "accessing the migrating directory for 1 minute..." + while ((SECONDS - start_time < 60)); do + run_and_count ls $migdir2 + run_and_count diff -u $DIR2/$tdir/file1 $migdir2/file1 + run_and_count "cat $migdir2/file2 > $migdir2/file3" + run_and_count "echo "aaaaa" > $migdir2/file4" + run_and_count stat $migdir2/file5 + run_and_count touch $migdir2/source_file + if [ -e $migdir2/source_file ]; then + run_and_count ln $migdir2/source_file \ + $migdir2/link_file + + [ -e $migdir2/link_file ] && + rm -rf $migdir2/link_file + + run_and_count mrename $migdir2/source_file \ + $migdir2/target_file + + [ -e $migdir2/target_file ] && + run_and_count rm -rf $migdir2/target_file || + run_and_count rm -rf $migdir2/source_file fi kill -0 $migrate_pid || { echo "migration stopped 1" break } - - [ $duration -ge 300 ] && break done - #check migration are still there + # check migration are still there kill -0 $migrate_pid || error "migration stopped 2" cleanup_80b + # access during migration may fail + echo "concurrent access $failure_count failures, $success_count successes" } run_test 80b "Accessing directory during migration" -test_81() { +test_81a() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return rm -rf $DIR1/$tdir @@ -4042,11 +5317,142 @@ test_81() { return 0 } -run_test 81 "rename and stat under striped directory" +run_test 81a "rename and stat under striped directory" + +test_81b() { + [ $MDSCOUNT -lt 2 ] && + skip "We need at least 2 MDTs for this test" + + local setattr_pid + + $LFS mkdir -c $MDSCOUNT $DIR1/$tdir || error "$LFS mkdir" + createmany -o $DIR1/$tdir/$tfile. $COUNT || error "createmany" + + ( + while true; do + touch $DIR1/$tdir + done + ) & + setattr_pid=$! + + for ((i = 0; i < COUNT; i++)); do + mrename $DIR2/$tdir/$tfile.$i $DIR2/$tdir/$tfile-new.$i \ + > /dev/null + done + + kill -9 $setattr_pid +} +run_test 81b "rename under striped directory doesn't deadlock" + +test_81c() { + [ $MDSCOUNT -lt 4 ] && skip_env "needs >= 4 MDTs" + [ $MDS1_VERSION -lt $(version_code 2.13.52) ] && + skip "Need MDS version at least 2.13.52" + + # source is local, source parent is remote + $LFS mkdir -i 0 $DIR1/${tdir}_src || error "mkdir ${tdir}_src" + $LFS mkdir -i 1 $DIR1/${tdir}_tgt || error "mkdir ${tdir}_tgt" + $LFS mkdir -i 3 $DIR1/${tdir}_src/sub || error "mkdir sub" + $LFS mkdir -i 3 $DIR1/${tdir}_tgt/sub || error "mkdir sub" + stat $DIR2/${tdir}_src/sub || error "stat sub failed" + mv $DIR1/${tdir}_src/sub $DIR1/${tdir}_tgt/ || error "mv failed" + [ -f $DIR2/${tdir}_src/sub ] && error "sub should be gone" + rm -rf $DIR1/${tdir}_src $DIR1/${tdir}_tgt + + # source is remote, source parent is local + $LFS mkdir -i 3 $DIR1/${tdir}_src || error "mkdir ${tdir}_src" + $LFS mkdir -i 1 $DIR1/${tdir}_tgt || error "mkdir ${tdir}_tgt" + $LFS mkdir -i 0 $DIR1/${tdir}_src/sub || error "mkdir sub" + $LFS mkdir -i 3 $DIR1/${tdir}_tgt/sub || error "mkdir sub" + stat $DIR2/${tdir}_src/sub || error "stat sub failed" + mv $DIR1/${tdir}_src/sub $DIR1/${tdir}_tgt/ || error "mv failed" + [ -f $DIR2/${tdir}_src/sub ] && error "sub should be gone" + rm -rf $DIR1/${tdir}_src $DIR1/${tdir}_tgt + + # source and source parent are remote + $LFS mkdir -i 0 $DIR1/${tdir}_src || error "mkdir ${tdir}_src" + $LFS mkdir -i 1 $DIR1/${tdir}_tgt || error "mkdir ${tdir}_tgt" + mkdir $DIR1/${tdir}_src/sub || error "mkdir sub" + $LFS mkdir -i 3 $DIR1/${tdir}_tgt/sub || error "mkdir sub" + stat $DIR2/${tdir}_src/sub || error "stat sub failed" + mv $DIR1/${tdir}_src/sub $DIR1/${tdir}_tgt/ || error "mv failed" + [ -f $DIR2/${tdir}_src/sub ] && error "sub should be gone" + rm -rf $DIR1/${tdir}_src $DIR1/${tdir}_tgt + + # source and source parent are remote, and source is remote object + $LFS mkdir -i 0 $DIR1/${tdir}_src || error "mkdir ${tdir}_src" + $LFS mkdir -i 1 $DIR1/${tdir}_tgt || error "mkdir ${tdir}_tgt" + $LFS mkdir -i 2 $DIR1/${tdir}_src/sub || error "mkdir sub" + $LFS mkdir -i 3 $DIR1/${tdir}_tgt/sub || error "mkdir sub" + stat $DIR2/${tdir}_src/sub || error "stat sub failed" + mv $DIR1/${tdir}_src/sub $DIR1/${tdir}_tgt/ || error "mv failed" + [ -f $DIR2/${tdir}_src/sub ] && error "sub should be gone" || true +} +run_test 81c "rename revoke LOOKUP lock for remote object" + +cleanup_81d() { + for ((mds = 0; mds < $MDSCOUNT; mds++)); do + local d2=$DIR2/$tdir-$mds + + rm -rf $d2 & + done + wait || error "rm failed" +} + +test_81d() { + local setattr_pid + local mdts=$(comma_list $(mdts_nodes)) + + do_nodes $mdts "$LCTL set_param mdt.*.md_stats=clear > /dev/null" + + stack_trap cleanup_81d + for ((mds = 0; mds < $MDSCOUNT; mds++)); do + local d1=$DIR1/$tdir-$mds + + $LFS mkdir -i $mds $d1 $d1/_temporary || error "mkdir failed" + createmany -o $d1/_temporary/$tfile. $COUNT || + error "createmany failed for $d1/_temporary" + done + + for ((mds = 0; mds < $MDSCOUNT; mds++)); do + local d1=$DIR1/$tdir-$mds + local d2=$DIR2/$tdir-$mds + + for ((i = 0; i < COUNT; i++)); do + mrename $d1/_temporary/$tfile.$i $d1/$tfile.$i & + ((i++)) + mrename $d2/_temporary/$tfile.$i $d2/$tfile.$i & + done + done + wait || error "rename failed" + + cleanup_81d + local stats=$DIR1/md_stats + local total=$((MDSCOUNT * COUNT)) + + do_nodes $mdts "$LCTL get_param -n mdt.*.md_stats" > $stats + cat $stats + crossdir=$(awk '/crossdir_rename/ {sum+=$2} END {print sum}' $stats) + (( crossdir == total )) || + error "not crossdir: $crossdir != $total" + samedir=$(awk '/samedir_rename/ {sum+=$2} END {print sum}' $stats) + (( samedir == 0 )) || error "considered samedir: $samedir" + pardir=$(awk '/parallel_rename_dir/ {sum+=$2} END {print sum}' $stats) + (( pardir == 0 )) || error "considered directory: $pardir" + + + (( MDS1_VERSION >= $(version_code 2.15.60) )) || + { echo "need MDS >= 2.15.60 for parallel cross-dir"; return 0; } + + parfile=$(awk '/parallel_rename_file/ {sum+=$2} END {print sum}' $stats) + (( parfile == total )) || + error "not considered file: $parfile != $total" +} +run_test 81d "parallel rename file cross-dir on same MDT" test_82() { - [[ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.6.91) ]] || - { skip "Need MDS version at least 2.6.92"; return 0; } + [[ "$MDS1_VERSION" -gt $(version_code 2.6.91) ]] || + skip "Need MDS version at least 2.6.92" # Client 1 creates a file. multiop_bg_pause $DIR1/$tfile O_ac || error "multiop_bg_pause 1" @@ -4101,6 +5507,36 @@ test_83() { } run_test 83 "access striped directory while it is being created/unlinked" +test_84() { + [ $MDS1_VERSION -lt $(version_code 2.12.55) ] && + skip "lustre < 2.12.55 does not contain LU-12485 fix" + + local mtime + + $MULTIOP $DIR/$tfile oO_RDWR:O_CREAT:O_LOV_DELAY_CREATE:c || + error "create $tfile failed" + mtime=$(stat -c%Y $DIR/$tfile) + mtime=$((mtime + 200)) + + #define OBD_FAIL_OBD_0NLINK_RACE 0x60b + do_facet mds1 $LCTL set_param fail_loc=0x8000060b + + touch -c -m $mtime $DIR/$tfile & + setattr_pid=$! + # sleep a while to let 'touch' run first + sleep 5 + rm -f $DIR2/$tfile || error "unlink $tfile failed" + + # touch may fail + wait $setattr_pid || true +} +run_test 84 "0-nlink race in lu_object_find()" + +test_85() { + llapi_root_test $DIR/$tfile $DIR2/$tfile +} +run_test 85 "Lustre API root cache race" + test_90() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return local pid1 @@ -4205,29 +5641,29 @@ test_93() { mkdir -p $DIR1/$tfile-1/ mkdir -p $DIR2/$tfile-2/ - local old_rr=$(do_facet $SINGLEMDS lctl get_param -n \ - 'lod.lustre-MDT*/qos_threshold_rr' | sed -e 's/%//') - do_facet $SINGLEMDS lctl set_param -n \ - 'lod.lustre-MDT*/qos_threshold_rr' 100 + local old_rr=$(do_facet $SINGLEMDS "$LCTL get_param -n \ + lod.$FSNAME-MDT0000-*/qos_threshold_rr" | sed -e 's/%//') + do_facet $SINGLEMDS "$LCTL set_param -n \ + lod.$FSNAME-MDT0000-*/qos_threshold_rr=100" #define OBD_FAIL_MDS_LOV_CREATE_RACE 0x163 - do_facet $SINGLEMDS "lctl set_param fail_loc=0x00000163" + do_facet $SINGLEMDS "$LCTL set_param fail_loc=0x00000163" - $SETSTRIPE -c -1 $DIR1/$tfile-1/file1 & + $LFS setstripe -c -1 $DIR1/$tfile-1/file1 & local PID1=$! sleep 1 - $SETSTRIPE -c -1 $DIR2/$tfile-2/file2 & + $LFS setstripe -c -1 $DIR2/$tfile-2/file2 & local PID2=$! wait $PID2 wait $PID1 - do_facet $SINGLEMDS "lctl set_param fail_loc=0x0" - do_facet $SINGLEMDS "lctl set_param -n \ - 'lod.lustre-MDT*/qos_threshold_rr' $old_rr" + do_facet $SINGLEMDS "$LCTL set_param fail_loc=0x0" + do_facet $SINGLEMDS "$LCTL set_param -n \ + lod.$FSNAME-MDT0000-*/qos_threshold_rr=$old_rr" - $GETSTRIPE $DIR1/$tfile-1/file1 - rc1=$($GETSTRIPE -q $DIR1/$tfile-1/file1 | + $LFS getstripe $DIR1/$tfile-1/file1 + rc1=$($LFS getstripe -q $DIR1/$tfile-1/file1 | awk '{if (/[0-9]/) print $1 }' | sort | uniq -d | wc -l) - $GETSTRIPE $DIR2/$tfile-2/file2 - rc2=$($GETSTRIPE -q $DIR2/$tfile-2/file2 | + $LFS getstripe $DIR2/$tfile-2/file2 + rc2=$($LFS getstripe -q $DIR2/$tfile-2/file2 | awk '{if (/[0-9]/) print $1 }' | sort | uniq -d | wc -l) echo "rc1=$rc1 and rc2=$rc2 " [ $rc1 -eq 0 ] && [ $rc2 -eq 0 ] || @@ -4235,11 +5671,110 @@ test_93() { } run_test 93 "alloc_rr should not allocate on same ost" +test_94() { + $LCTL set_param osc.*.idle_timeout=0 + dd if=/dev/zero of=$DIR2/$tfile bs=4k count=2 conv=fsync + + local before=$(date +%s) + local evict + + $LCTL mark write +#define OBD_FAIL_LDLM_PAUSE_CANCEL 0x312 + $LCTL set_param fail_val=5 fail_loc=0x80000312 + dd if=/dev/zero of=$DIR/$tfile conv=notrunc oflag=append bs=4k count=1 & + local pid=$! + sleep 2 + +#define OBD_FAIL_LDLM_PAUSE_CANCEL_LOCAL 0x329 + $LCTL set_param fail_val=6 fail_loc=0x80000329 + $LCTL mark kill $pid + kill -ALRM $pid + + dd if=/dev/zero of=$DIR2/$tfile conv=notrunc oflag=append bs=4k count=1 + + wait $pid + dd if=/dev/zero of=$DIR/$tfile bs=4k count=1 conv=fsync + + evict=$(do_facet client $LCTL get_param \ + osc.$FSNAME-OST*-osc-*/state | + awk -F"[ [,]" '/EVICTED ]$/ { if (t<$5) {t=$5;} } END { print t }') + + [ -z "$evict" ] || [[ $evict -le $before ]] || + (do_facet client $LCTL get_param \ + osc.$FSNAME-OST*-osc-*/state; + error "eviction happened: $evict before:$before") + $LCTL set_param osc.*.idle_timeout=debug +} +run_test 94 "signal vs CP callback race" + +test_95a() { + local file=$DIR/$tfile + local file2=$DIR2/$tfile + local fast_read_save + local pid + + fast_read_save=$($LCTL get_param -n llite.*.fast_read | head -n 1) + [ -z "$fast_read_save" ] && skip "no fast read support" + + stack_trap "$LCTL set_param llite.*.fast_read=$fast_read_save" EXIT + $LCTL set_param llite.*.fast_read=0 + + $LFS setstripe -c $OSTCOUNT $file || error "failed to setstripe $file" + dd if=/dev/zero of=$file bs=1M count=2 || error "failed to write $file" + cancel_lru_locks $OSC + $MULTIOP $file Oz1048576w4096c || error "failed to write $file" + $MULTIOP $file oz1044480r4096c || error "failed to read $file" + + # OBD_FAIL_LLITE_PAGE_INVALIDATE_PAUSE 0x1421 + $LCTL set_param fail_loc=0x80001421 fail_val=7 + $MULTIOP $file2 Oz1048576w4096_c & + pid=$! + + sleep 2 + # OBD_FAIL_LLITE_READPAGE_PAUSE 0x1422 + $LCTL set_param fail_loc=0x80001422 fail_val=10 + $MULTIOP $file oz1044480r4096c || error "failed to read $file" + + kill -USR1 $pid && wait $pid || error "wait for PID $pid failed" +} +run_test 95a "Check readpage() on a page that was removed from page cache" + +test_95b() { + local file=$DIR/$tfile + local file2=$DIR2/$tfile + local fast_read_save + local pid + + fast_read_save=$($LCTL get_param -n llite.*.fast_read | head -n 1) + [ -z "$fast_read_save" ] && skip "no fast read support" + + stack_trap "$LCTL set_param llite.*.fast_read=$fast_read_save" EXIT + $LCTL set_param llite.*.fast_read=0 + + $LFS setstripe -c $OSTCOUNT $file || error "failed to setstripe $file" + dd if=/dev/zero of=$file bs=$((PAGE_SIZE * 3)) count=1 || + error "failed to write $file" + + # This does the read from the second mount, so this flushes the pages + # the first mount and creates new ones on the second mount + # OBD_FAIL_LLITE_READPAGE_PAUSE2 0x1424 + $LCTL set_param fail_loc=0x80001424 fail_val=5 + $MULTIOP $file2 or${PAGE_SIZE}c & + pid=$! + + sleep 2 + fadvise_dontneed_helper $file2 + $LCTL set_param fail_loc=0 + sleep 4 + wait $pid || error "failed to read file" +} +run_test 95b "Check readpage() on a page that is no longer uptodate" + # Data-on-MDT tests test_100a() { skip "Reserved for glimpse-ahead" && return - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && - skip "Need MDS version at least 2.10.55" && return + [ "$MDS1_VERSION" -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" mkdir -p $DIR/$tdir @@ -4250,21 +5785,20 @@ test_100a() { $CHECKSTAT -t file -s 4096 $DIR/$tdir/dom || error "stat #1" # first stat from server should return size data and save glimpse - local gls=$(lctl get_param -n mdc.*.stats | \ - awk '/ldlm_glimpse/ {print $2}') - [ -z $gls ] || error "Unexpected $gls glimpse RPCs" + local gls=$(lctl get_param -n mdc.*.stats | grep -c ldlm_glimpse) + [ $gls -eq 0 ] || error "Unexpected $gls glimpse RPCs" # second stat to check size is NOT cached on client without IO lock $CHECKSTAT -t file -s 4096 $DIR/$tdir/dom || error "stat #2" - local gls=$(lctl get_param -n mdc.*.stats | grep ldlm_glimpse | wc -l) - [ "1" == "$gls" ] || error "Expect 1 glimpse RPCs but got $gls" + local gls=$(lctl get_param -n mdc.*.stats | grep -c ldlm_glimpse) + [ $gls -ge 1 ] || error "Expect glimpse RPCs but none" rm -f $dom } run_test 100a "DoM: glimpse RPCs for stat without IO lock (DoM only file)" test_100b() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && - skip "Need MDS version at least 2.10.55" && return + [ "$MDS1_VERSION" -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" mkdir -p $DIR/$tdir @@ -4278,17 +5812,16 @@ test_100b() { # second stat to check size is cached on client $CHECKSTAT -t file -s 4096 $DIR/$tdir/dom || error "stat #2" - local gls=$(lctl get_param -n mdc.*.stats | - awk '/ldlm_glimpse/ {print $2}') + local gls=$(lctl get_param -n mdc.*.stats | grep -c ldlm_glimpse) # both stats should cause no glimpse requests - [ -z $gls ] || error "Unexpected $gls glimpse RPCs" + [ $gls == 0 ] || error "Unexpected $gls glimpse RPCs" rm -f $dom } run_test 100b "DoM: no glimpse RPC for stat with IO lock (DoM only file)" test_100c() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && - skip "Need MDS version at least 2.10.55" && return + [ "$MDS1_VERSION" -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" mkdir -p $DIR/$tdir @@ -4302,7 +5835,7 @@ test_100c() { $CHECKSTAT -t file -s 2097152 $DIR/$tdir/dom || error "Wrong size from stat #1" - local gls=$(lctl get_param -n osc.*.stats | grep ldlm_glimpse | wc -l) + local gls=$(lctl get_param -n osc.*.stats | grep -c ldlm_glimpse) [ $gls -eq 0 ] && error "Expect OST glimpse RPCs but got none" rm -f $dom @@ -4310,8 +5843,8 @@ test_100c() { run_test 100c "DoM: write vs stat without IO lock (combined file)" test_100d() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && - skip "Need MDS version at least 2.10.55" && return + [ "$MDS1_VERSION" -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" mkdir -p $DIR/$tdir @@ -4327,91 +5860,886 @@ test_100d() { $CHECKSTAT -t file -s 4096 $DIR/$tdir/dom || error "Wrong size from stat #1" - local gls=$(lctl get_param -n osc.*.stats | grep ldlm_glimpse | wc -l) + local gls=$(lctl get_param -n osc.*.stats | grep -c ldlm_glimpse) [ $gls -eq 0 ] && error "Expect OST glimpse but got none" rm -f $dom } run_test 100d "DoM: write+truncate vs stat without IO lock (combined file)" +test_100e() { + [ "$MDS1_VERSION" -lt $(version_code 2.11.50) ] && + skip "Need MDS version at least 2.11.50" + + local dom=$DIR/$tdir/dom + local dom2=$DIR2/$tdir/dom + mkdir -p $DIR/$tdir + + $LFS setstripe -E 1024K -L mdt $DIR/$tdir + + cancel_lru_locks mdc + dd if=/dev/urandom of=$dom bs=12000 count=1 + $TRUNCATE $dom2 6000 + cancel_lru_locks mdc + lctl set_param -n mdc.*.stats=clear + # expect read-on-open to return all data before write + cat /etc/hosts >> $dom + local read=$(lctl get_param -n mdc.*.stats | grep -c ost_read) + [[ $read -eq 0 ]] || error "Unexpected $read READ RPCs" +} +run_test 100e "DoM: read on open and file size" + test_101a() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && - skip "Need MDS version at least 2.10.55" && return + [ "$MDS1_VERSION" -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" $LFS setstripe -E 1024K -L mdt -E EOF $DIR1/$tfile # to get layout $CHECKSTAT -t file $DIR1/$tfile - OLD_VAL=$(cat /proc/sys/vm/dirty_writeback_centisecs) - echo 0 > /proc/sys/vm/dirty_writeback_centisecs - echo $OLD_VAL + local old_wb=$(sysctl -n vm.dirty_writeback_centisecs) + sysctl -wq vm.dirty_writeback_centisecs=0 + + trap "sysctl -wq vm.dirty_writeback_centisecs=$old_wb" EXIT # open + IO lock dd if=/dev/zero of=$DIR1/$tfile bs=4096 count=1 || error_noexit "Write fails" # must discard pages lctl set_param -n mdc.*.stats=clear - rm $DIR2/$tfile || error_noexit "Unlink fails" - local writes=$(lctl get_param -n mdc.*.stats | grep ost_write | wc -l) - echo $OLD_VAL > /proc/sys/vm/dirty_writeback_centisecs + rm $DIR2/$tfile || error "Unlink fails" + local writes=$(lctl get_param -n mdc.*.stats | grep -c ost_write) [ $writes -eq 0 ] || error "Found WRITE RPC but expect none" } run_test 101a "Discard DoM data on unlink" test_101b() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && - skip "Need MDS version at least 2.10.55" && return + [ "$MDS1_VERSION" -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" $LFS setstripe -E 1024K -L mdt -E EOF $DIR1/$tfile touch $DIR1/${tfile}_2 # to get layout $CHECKSTAT -t file $DIR1/$tfile - OLD_VAL=$(cat /proc/sys/vm/dirty_writeback_centisecs) - echo 0 > /proc/sys/vm/dirty_writeback_centisecs - echo $OLD_VAL + local old_wb=$(sysctl -n vm.dirty_writeback_centisecs) + sysctl -wq vm.dirty_writeback_centisecs=0 + + trap "sysctl -wq vm.dirty_writeback_centisecs=$old_wb" EXIT # open + IO lock - dd if=/dev/zero of=$DIR1/$tfile bs=4096 count=1 || - error_noexit "Write fails" + dd if=/dev/zero of=$DIR1/$tfile bs=4096 count=1 || error "Write fails" # must discard pages lctl set_param -n mdc.*.stats=clear - mv $DIR2/${tfile}_2 $DIR2/$tfile || error_noexit "Rename fails" - local writes=$(lctl get_param -n mdc.*.stats | grep ost_write | wc -l) - echo $OLD_VAL > /proc/sys/vm/dirty_writeback_centisecs + mv $DIR2/${tfile}_2 $DIR2/$tfile || error "Rename fails" + + local writes=$(lctl get_param -n mdc.*.stats | grep -c ost_write) [ $writes -eq 0 ] || error "Found WRITE RPC but expect none" } run_test 101b "Discard DoM data on rename" test_101c() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && - skip "Need MDS version at least 2.10.55" && return + [ "$MDS1_VERSION" -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" $LFS setstripe -E 1024K -L mdt -E EOF $DIR1/$tfile # to get layout $CHECKSTAT -t file $DIR1/$tfile - OLD_VAL=$(cat /proc/sys/vm/dirty_writeback_centisecs) - echo 0 > /proc/sys/vm/dirty_writeback_centisecs - echo $OLD_VAL + local old_wb=$(sysctl -n vm.dirty_writeback_centisecs) + sysctl -wq vm.dirty_writeback_centisecs=0 + + trap "sysctl -wq vm.dirty_writeback_centisecs=$old_wb" EXIT # open + IO lock - dd if=/dev/zero of=$DIR1/$tfile bs=4096 count=1 || - error_noexit "Write fails" + dd if=/dev/zero of=$DIR1/$tfile bs=4096 count=1 || error "Write fails" $MULTIOP $DIR1/$tfile O_c & MULTIOP_PID=$! sleep 1 lctl set_param -n mdc.*.stats=clear - rm $DIR2/$tfile > /dev/null || error_noexit "Unlink fails" - kill -USR1 $MULTIOP_PID && wait $MULTIOP_PID || - error_noexit "multiop failure" - local writes=$(lctl get_param -n mdc.*.stats | grep ost_write | wc -l) - echo $OLD_VAL > /proc/sys/vm/dirty_writeback_centisecs + rm $DIR2/$tfile > /dev/null || error "Unlink fails for opened file" + kill -USR1 $MULTIOP_PID && wait $MULTIOP_PID || error "multiop failure" + + local writes=$(lctl get_param -n mdc.*.stats | grep -c ost_write) [ $writes -eq 0 ] || error "Found WRITE RPC but expect none" } run_test 101c "Discard DoM data on close-unlink" +# test to verify file handle related system calls +# (name_to_handle_at/open_by_handle_at) +# The new system calls are supported in glibc >= 2.14. + +# test to verify we can open by handle an unlinked file from > 1 client +# This test opens the file normally on $DIR1, which is on one mount, and then +# opens it by handle on $DIR2, which is on a different mount. +test_102() { + (( "$MDS1_VERSION" >= $(version_code 2.11.57) )) || + skip "Needs MDS version 2.11.57 or later" + + echo "Test file_handle syscalls" > $DIR/$tfile || + error "write failed" + check_fhandle_syscalls $DIR/$tfile $DIR2 || + error "check_fhandle_syscalls $tfile failed" + + # test this is working on DNE directories also + if (( MDSCOUNT > 1 && MDS1_VERSION >= $(version_code 2.14.52) )); then + $LFS mkdir -i 1 $DIR/$tdir.remote + cancel_lru_locks mdc + check_fhandle_syscalls $DIR/$tdir.remote $DIR2 || + error "check_fhandle_syscalls $tdir.remote failed" + $LFS mkdir -c -1 $DIR/$tdir.remote/subdir + cancel_lru_locks mdc + check_fhandle_syscalls $DIR/$tdir.remote/subdir $DIR2 || + error "check_fhandle_syscalls $tdir.remote/subdir fail" + + $LFS mkdir -c -1 $DIR/$tdir.stripe + cancel_lru_locks mdc + check_fhandle_syscalls $DIR/$tdir.stripe $DIR2 || + error "check_fhandle_syscalls $tdir.stripe failed" + $LFS mkdir -c -1 $DIR/$tdir.stripe/subdir + cancel_lru_locks mdc + check_fhandle_syscalls $DIR/$tdir.stripe/subdir $DIR2 || + error "check_fhandle_syscalls $tdir.stripe/subdir fail" + fi +} +run_test 102 "Test open by handle of unlinked file" + +# Compare file size between first & second mount, ensuring the client correctly +# glimpses even with unused speculative locks - LU-11670 +test_103() { + [ $OST1_VERSION -lt $(version_code 2.10.50) ] && + skip "Lockahead needs OST version at least 2.10.50" + + local locktest=23 + + test_mkdir -p $DIR/$tdir + + # Force file on to OST0 + $LFS setstripe -i 0 $DIR/$tdir + + # Do not check multiple locks on glimpse + # OBD_FAIL_OSC_NO_SIZE_DATA 0x415 + $LCTL set_param fail_loc=0x415 + + # Delay write commit by 2 seconds to guarantee glimpse wins race + # The same fail_loc is used on client & server so it can work in the + # single node sanity setup + do_facet ost1 $LCTL set_param fail_loc=0x415 fail_val=2 + + echo "Incorrect size expected (no glimpse fix):" + lockahead_test -d $DIR/$tdir -D $DIR2/$tdir -t $locktest -f $tfile + rc=$? + if [ $rc -eq 0 ]; then + echo "This doesn't work 100%, but this is just reproducing the bug, not testing the fix, so OK to not fail test." + fi + + # guarantee write commit timeout has expired + sleep 2 + + # Clear fail_loc on client + $LCTL set_param fail_loc=0 + + # Delay write commit by 2 seconds to guarantee glimpse wins race + # OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 + do_facet ost1 $LCTL set_param fail_loc=0x214 fail_val=2 + + # Write commit is still delayed by 2 seconds + lockahead_test -d $DIR/$tdir -D $DIR2/$tdir -t $locktest -f $tfile + rc=$? + [ $rc -eq 0 ] || error "Lockahead test$locktest failed, $rc" + + # guarantee write commit timeout has expired + sleep 2 + + rm -f $DIR/$tfile || error "unable to delete $DIR/$tfile" +} +run_test 103 "Test size correctness with lockahead" + +get_stat_xtimes() +{ + local xtimes + + xtimes=$(stat -c "%X %Y %Z" $DIR/$tfile) + + echo ${xtimes[*]} +} + +get_mdt_xtimes() +{ + local mdtdev=$1 + local output + local xtimes + + output=$(do_facet mds1 "$DEBUGFS -c -R 'stat ROOT/$tfile' $mdtdev") + ((xtimes[0]=$(awk -F ':' /atime/'{ print $2 }' <<< "$output"))) + ((xtimes[1]=$(awk -F ':' /mtime/'{ print $2 }' <<< "$output"))) + ((xtimes[2]=$(awk -F ':' /ctime/'{ print $2 }' <<< "$output"))) + + echo ${xtimes[*]} +} + +check_mdt_xtimes() +{ + local mdtdev=$1 + local xtimes=($(get_stat_xtimes)) + local mdt_xtimes=($(get_mdt_xtimes $mdtdev)) + + echo "STAT a|m|ctime ${xtimes[*]}" + echo "MDT a|m|ctime ${mdt_xtimes[*]}" + [[ ${xtimes[0]} == ${mdt_xtimes[0]} ]] || + error "$DIR/$tfile atime (${xtimes[0]}:${mdt_xtimes[0]}) diff" + [[ ${xtimes[1]} == ${mdt_xtimes[1]} ]] || + error "$DIR/$tfile mtime (${xtimes[1]}:${mdt_xtimes[1]}) diff" + [[ ${xtimes[2]} == ${mdt_xtimes[2]} ]] || + error "$DIR/$tfile ctime (${xtimes[2]}:${mdt_xtimes[2]}) diff" +} + +test_104() { + [ "$mds1_FSTYPE" == "ldiskfs" ] || skip_env "ldiskfs only test" + [ $MDS1_VERSION -lt $(version_code 2.12.4) ] && + skip "Need MDS version at least 2.12.4" + + local pid + local mdtdev=$(mdsdevname ${SINGLEMDS//mds/}) + local atime_diff=$(do_facet $SINGLEMDS \ + lctl get_param -n mdd.*MDT0000*.atime_diff) + + do_facet $SINGLEMDS \ + lctl set_param -n mdd.*MDT0000*.atime_diff=0 + + stack_trap "do_facet $SINGLEMDS \ + lctl set_param -n mdd.*MDT0000*.atime_diff=$atime_diff" EXIT + + dd if=/dev/zero of=$DIR/$tfile bs=1k count=1 conv=notrunc + check_mdt_xtimes $mdtdev + sleep 2 + + dd if=/dev/zero of=$DIR/$tfile bs=1k count=1 conv=notrunc + check_mdt_xtimes $mdtdev + sleep 2 + $MULTIOP $DIR2/$tfile Oz8192w8192_c & + pid=$! + sleep 2 + dd if=/dev/zero of=$DIR/$tfile bs=1k count=1 conv=notrunc + sleep 2 + kill -USR1 $pid && wait $pid || error "multiop failure" + check_mdt_xtimes $mdtdev + + local xtimes + local mdt_xtimes + + # Verify mtime/ctime is NOT upated on MDS when there is no modification + # on the client side + xtimes=($(get_stat_xtimes)) + $MULTIOP $DIR/$tfile O_c & + pid=$! + sleep 2 + kill -USR1 $pid && wait $pid || error "multiop failure" + mdt_xtimes=($(get_mdt_xtimes $mdtdev)) + [[ ${xtimes[1]} == ${mdt_xtimes[1]} ]] || + error "$DIR/$tfile mtime (${xtimes[1]}:${mdt_xtimes[1]}) diff" + [[ ${xtimes[2]} == ${mdt_xtimes[2]} ]] || + error "$DIR/$tfile ctime (${xtimes[2]}:${mdt_xtimes[2]}) diff" + check_mdt_xtimes $mdtdev + + sleep 2 + # Change ctime via chmod + $MULTIOP $DIR/$tfile o_tc & + pid=$! + sleep 2 + kill -USR1 $pid && wait $pid || error "multiop failure" + check_mdt_xtimes $mdtdev +} +run_test 104 "Verify that MDS stores atime/mtime/ctime during close" + +test_105() { + test_mkdir -p $DIR/$tdir + echo test > $DIR/$tdir/$tfile + $LCTL set_param fail_loc=0x416 + cancel_lru_locks osc & sleep 1 + fsize1=$(stat -c %s $DIR2/$tdir/$tfile) + wait + [[ $fsize1 = 5 ]] || error "Glimpse returned wrong file size $fsize1" +} +run_test 105 "Glimpse and lock cancel race" + +test_106a() { + [ "$mds1_FSTYPE" == "ldiskfs" ] && statx_supported || + skip_env "Test only for ldiskfs and statx() supported" + + local btime + local mdt_btime + local output + local mdtdev=$(mdsdevname ${SINGLEMDS//mds/}) + + dd if=/dev/zero of=$DIR/$tfile bs=1k count=1 conv=notrunc + btime=$($STATX -c %W $DIR/$tfile) + output=$(do_facet mds1 "$DEBUGFS -c -R 'stat ROOT/$tfile' $mdtdev") + echo $output + ((mdt_btime=$(awk -F ':' /crtime/'{ print $2 }' <<< "$output"))) + [[ $btime == $mdt_btime ]] || + error "$DIR/$tfile btime ($btime:$mdt_btime) diff" + +} +run_test 106a "Verify the btime via statx()" + +test_106b() { + statx_supported || skip_env "statx() only test" + + local rpcs_before + local rpcs_after + + $LFS setstripe -c 1 $DIR/$tfile || error "$DIR/$tfile setstripe failed" + dd if=/dev/zero of=$DIR/$tfile bs=1k count=1 conv=notrunc + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $((rpcs_before + 1)) ] || + error "$STATX should send 1 glimpse RPC to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + # %n: FILENAME; %i: STATX_INO; %A STATX_MODE; %h STATX_NLINK; + # %u: STATX_UID; %g: STATX_GID; %W STATX_BTIME; %X STATX_ATIME; + # %Z: STATX_CTIME + $STATX -c "%n %i %A %h %u %g %W %X %Z" $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $rpcs_before ] || + error "$STATX should not send glimpse RPCs to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX --cached=always $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $rpcs_before ] || + error "$STATX should not send glimpse RPCs to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX -c %Y $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $((rpcs_before + 1)) ] || + error "$STATX -c %Y should send 1 glimpse RPC to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX -c %s $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $((rpcs_before + 1)) ] || + error "$STATX -c %s should send 1 glimpse RPC to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX -c %b $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $((rpcs_before + 1)) ] || + error "$STATX -c %b should send 1 glimpse RPC to $OSC" +} +run_test 106b "Glimpse RPCs test for statx" + +test_106c() { + statx_supported || skip_env "statx() only test" + + local mask + + touch $DIR/$tfile + # Mask supported in stx_attributes by Lustre is + # STATX_ATTR_IMMUTABLE(0x10) | STATX_ATTR_APPEND(0x20) : (0x30). + mask=$($STATX -c %p $DIR/$tfile) + (( (0x$mask & 0x30) == 0x30 )) || + error "supported stx_attributes: got '0x$mask', expected '0x30' at least" + chattr +i $DIR/$tfile || error "chattr +i $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "10" ]] || + error "got immutable flags '$mask', expected '10'" + chattr -i $DIR/$tfile || error "chattr -i $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "0" ]] || error "got flags '$mask', expected '0'" + chattr +a $DIR/$tfile || error "chattr +a $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "20" ]] || error "got flags '$mask', expected '20'" + chattr -a $DIR/$tfile || error "chattr -a $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "0" ]] || error "got flags '$mask', expected '0'" + chattr +ia $DIR/$tfile || error "chattr +ia $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "30" ]] || error "got flags '$mask', expected '30'" + chattr -ia $DIR/$tfile || error "chattr -ia $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "0" ]] || error "got flags '$mask', expected '0'" +} +run_test 106c "Verify statx attributes mask" + +test_107a() { # LU-1031 + dd if=/dev/zero of=$DIR1/$tfile bs=1M count=10 + local gid1=14091995 + local gid2=16022000 + + $LFS getstripe $DIR1/$tfile + + multiop_bg_pause $DIR1/$tfile OG${gid1}_g${gid1}c || return 1 + local MULTIPID1=$! + multiop_bg_pause $DIR2/$tfile O_G${gid2}r10g${gid2}c || return 2 + local MULTIPID2=$! + kill -USR1 $MULTIPID2 + sleep 2 + if [[ $(ps h -o comm -p $MULTIPID2) == "" ]]; then + error "First grouplock does not block second one" + else + echo "First grouplock blocks second one" + fi + kill -USR1 $MULTIPID1 + wait $MULTIPID1 + wait $MULTIPID2 +} +run_test 107a "Basic grouplock conflict" + +test_107b() { + dd if=/dev/zero of=$DIR1/$tfile bs=1M count=10 + local gid1=14091995 + local gid2=16022000 + + $LFS getstripe $DIR1/$tfile + + multiop_bg_pause $DIR1/$tfile OG${gid1}_g${gid1}c || return 1 + local MULTIPID1=$! + multiop $DIR2/$tfile Or10c & + local MULTIPID2=$! + sleep 2 + + if [[ $(ps h -o comm -p $MULTIPID2) == "" ]]; then + error "Grouplock does not block IO" + else + echo "Grouplock blocks IO" + fi + + multiop $DIR2/$tfile OG${gid2}_g${gid2}c & + local MULTIPID3=$! + sleep 2 + if [[ $(ps h -o comm -p $MULTIPID3) == "" ]]; then + error "First grouplock does not block second one" + else + echo "First grouplock blocks second one" + fi + + kill -USR1 $MULTIPID1 + sleep 2 + + if [[ $(ps h -o comm -p $MULTIPID3) == "" ]]; then + error "Second grouplock thread disappeared" + fi + + if [[ $(ps h -o comm -p $MULTIPID2) == "" ]]; then + error "Second grouplock does not block IO" + else + echo "Second grouplock blocks IO" + fi + + kill -USR1 $MULTIPID3 + wait $MULTIPID1 + wait $MULTIPID2 + wait $MULTIPID3 +} +run_test 107b "Grouplock is added to the head of waiting list" + +test_108a() { + local offset + + $LFS setstripe -E 1M -c 1 -E -1 $DIR1/$tfile || + error "Create $DIR1/$tfile failed" + + dd if=/dev/zero of=$DIR1/$tfile bs=10000 count=1 || + error "dd $DIR1/$tfile failed" + offset=$(lseek_test -d 5000 $DIR2/$tfile) + [[ $offset == 5000 ]] || error "offset $offset != 5000" + + $TRUNCATE $DIR1/$tfile 2000 + offset=$(lseek_test -l 1000 $DIR2/$tfile) + [[ $offset == 2000 ]] || error "offset $offset != 2000" + + #define OBD_FAIL_OSC_DELAY_IO 0x414 + $LCTL set_param fail_val=4 fail_loc=0x80000414 + dd if=/dev/zero of=$DIR1/$tfile count=1 bs=8M conv=notrunc oflag=dsync & + local pid=$! + sleep 2 + + offset=$(lseek_test -l 8000 $DIR2/$tfile) + wait $pid + [[ $offset == 8388608 ]] || error "offset $offset != 8388608" +} +run_test 108a "lseek: parallel updates" + +# LU-14110 +test_109() { + local i + local pid1 pid2 + + ! local_mode || + skip "Clients need to be on different nodes than the servers" + + umount_client $MOUNT + umount_client $MOUNT2 + + echo "Starting race between client mount instances (50 iterations):" + for i in {1..50}; do + log "Iteration $i" + +#define CFS_FAIL_ONCE|OBD_FAIL_LLITE_RACE_MOUNT 0x80001417 + $LCTL set_param -n fail_loc=0x80001417 + + mount_client $MOUNT & pid1=$! + mount_client $MOUNT2 & pid2=$! + wait $pid1 || error "Mount $MOUNT fails with $?" + wait $pid2 || error "Mount $MOUNT2 fails with $?" + + umount_client $MOUNT & pid1=$! + umount_client $MOUNT2 & pid2=$! + wait $pid1 || error "Umount $MOUNT fails with $?" + wait $pid2 || error "Umount $MOUNT2 fails with $?" + + $LUSTRE_RMMOD || error "Fail to remove lustre modules" + load_modules + echo + done + + mount_client $MOUNT + mount_client $MOUNT2 +} + +run_test 109 "Race with several mount instances on 1 node" + +test_110() { + local before=$(date +%s) + local evict + + mkdir -p $DIR/$tdir + touch $DIR/$tdir/f1 + touch $DIR/$tfile + + #define OBD_FAIL_PTLRPC_RESEND_RACE 0x525 + do_facet mds1 lctl set_param fail_loc=0x525 fail_val=3 + + # disable last_xid logic by dropping link reply + ln $DIR/$tdir/f1 $DIR/$tdir/f2 & + sleep 1 + + #define OBD_FAIL_PTLRPC_ENQ_RESEND 0x534 + do_facet mds1 lctl set_param fail_loc=0x534 + + # RPC will race with its Resend and the Resend will sleep to let + # the original lock to get granted & cancelled. + # + # AST_SENT is set artificially, so an explicit conflict is not needed + # + # The woken up Resend gets a new lock, but client does not wait for it + stat $DIR/$tfile + sleep $TIMEOUT + do_facet mds1 lctl set_param fail_loc=0 fail_val=0 + + # Take a conflict to wait long enough to see the eviction + touch $DIR2/$tfile + + # let the client reconnect + client_reconnect + evict=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state | + awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }') + + [ -z "$evict" ] || [[ $evict -le $before ]] || + (do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state; + error "eviction happened: $evict before:$before") +} +run_test 110 "do not grant another lock on resend" + +test_111() { + [ $MDSCOUNT -ge 2 ] || skip "needs >= 2 MDTs" + [[ $(facet_active_host mds1) = $(facet_active_host mds2) ]] || + skip "MDT0 and MDT1 should be on the same node" + + mkdir $DIR1/$tdir + $LFS mkdir -i 0 $DIR1/$tdir/mdt0dir + $LFS mkdir -i 1 $DIR1/$tdir/mdt1dir + + mkdir $DIR1/$tdir/mdt0dir/foodir + touch $DIR1/$tdir/mdt0dir/foodir/{file1,file2} + + $MULTIOP $DIR2/$tdir/mdt0dir/foodir/file2 Ow4096_c & + MULTIOP_PID=$! + ln $DIR1/$tdir/mdt0dir/foodir/file2 $DIR1/$tdir/mdt1dir/file2 + + #define OBD_FAIL_MDS_LINK_RENAME_RACE 0x18a + do_facet mds1 $LCTL set_param fail_loc=0x8000018a + + ln $DIR1/$tdir/mdt0dir/foodir/file2 $DIR1/$tdir/mdt1dir/file2x & + sleep 1 + + rm $DIR2/$tdir/mdt1dir/file2 + sleep 1 + + mv $DIR2/$tdir/mdt0dir/foodir/file1 $DIR2/$tdir/mdt0dir/foodir/file2 + sleep 1 + + kill $MULTIOP_PID + wait + rm -r $DIR1/$tdir || error "Removing test dir failed" +} +run_test 111 "A racy rename/link an open file should not cause fs corruption" + +test_112() { + (( MDSCOUNT >= 2 )) || + skip "We need at least 2 MDTs for this test" + + (( MDS1_VERSION >= $(version_code 2.14.54) )) || + skip "Need server version at least 2.14.54" + + local rr + local count + + rr=$($LCTL get_param -n lmv.*.qos_threshold_rr | head -n1) + rr=${rr%%%} + stack_trap "$LCTL set_param lmv.*.qos_threshold_rr=$rr > /dev/null" + + mkdir -p $DIR1/$tdir/s1/s2 || error "mkdir s2 failed" + $LFS mkdir -i 0 $DIR1/$tdir/s1/s2/s3 || error "mkdir s3 failed" + $LFS setdirstripe -D -i -1 --max-inherit-rr=0 $DIR1/$tdir/s1/s2/s3 || + error "setdirstripe s3 failed" + $LCTL set_param lmv.*.qos_threshold_rr=90 + mkdir $DIR2/$tdir/s1/s2/s3/d{1..64} + count=$($LFS getstripe -m $DIR2/$tdir/s1/s2/s3/d* | grep ^0 | wc -l) + (( count == 64 )) || error "only $count subdirs created on MDT0" + + $LFS setdirstripe -D -i -1 --max-inherit-rr=3 $DIR1/$tdir/s1/s2/s3 || + error "setdirstripe s3 failed" + mkdir $DIR2/$tdir/s1/s2/s3/s{1..64} + count=$($LFS getstripe -m $DIR2/$tdir/s1/s2/s3/s* | grep ^0 | wc -l) + (( count == 64 / MDSCOUNT )) || error "$count subdirs created on MDT0" +} +run_test 112 "update max-inherit in default LMV" + +test_113 () { + (( MDS1_VERSION >= $(version_code 2.15.50) )) || + skip "Need server version at least 2.15.50" + + local instance + local nid + + instance=$($LFS getname -i $DIR1) || + error "cannot get instance of $DIR1" + + $LFS check osts $DIR1 | grep $instance || + error "cannot find OSTs of instance $instance" + + $LFS check osts $DIR1 | grep -v $instance + if (( $? == 0 )); then + error "find OSTs other than instance $instance" + fi + + $LFS check osts | grep $instance || + error "cannot find other OSTs" + + nid=$(df $DIR2 | tail -1 | sed 's%:/.*%%') || + error "cannot parse nid for $DIR2" + + $LFS check mgts $DIR2 | grep MGC$nid || + error "cannot find mgc of $nid" + + $LFS check mgts $DIR2 | grep -v MGC$nid + if (( $? == 0 )); then + error "find MGTs other than nid $nid" + fi +} +run_test 113 "check servers of specified fs" + +check_default_lmv() { + local dir=$1 + + local enabled + local dmv + local index + local count + local inherit + local inherit_rr + local raw + + enabled=$(do_facet mds1 \ + $LCTL get_param -n mdt.*-MDT0000*.enable_dmv_implicit_inherit) + + dmv=$($LFS getdirstripe -D $dir) + echo $dir $dmv + index=$(echo $dmv | awk '{ print $4 }') + (( index == $2 )) || error "$dir default stripe index $index != $2" + + count=$(echo $dmv | awk '{ print $2 }') + (( count == $3 )) || error "$dir default stripe count $count != $3" + + inherit=$(echo $dmv | awk '{ print $8 }') + (( inherit == $4 )) || error "$dir default max-inherit $inherit != $4" + + if [ $index -eq -1 ]; then + inherit_rr=$(echo $dmv | awk '{ print $10 }') + (( inherit_rr == $5 )) || + error "$dir default max-inherit-rr $inherit_rr != $5" + fi + + # with --raw, print default LMV stored in inode, otherwise print nothing + raw=$($LFS getdirstripe -D --raw $dir) + if (( enabled == 1 )); then + [ -z $raw ] || + error "implicit inherited DMV is printed with --raw" + else + # if disabled, dmv is stored in inode, which will always + # print max-inherit-rr + echo $dir $raw + [[ $raw =~ $dmv.* ]] || error "$dir raw $raw != dmv $dmv" + fi +} + +test_dmv_imp_inherit() { + local dmv + local raw + local index + local count + local inherit + local inherit_rr + + rm -rf $DIR/$tdir || error "rm $tdir failed" + mkdir -p $DIR/$tdir || error "mkdir $tdir failed" + + # set dir default LMV + $LFS setdirstripe -D -c1 -X4 --max-inherit-rr 2 $DIR/$tdir || + error "setdirstripe -D $tdir failed" + dmv=$($LFS getdirstripe -D $DIR/$tdir) + raw=$($LFS getdirstripe -D --raw $DIR/$tdir) + [ "$dmv" == "$raw" ] || error "$dmv != $raw" + + mkdir -p $DIR/$tdir/l1/l2/l3 || error "mkdir $DIR/$tdir/l1/l2/l3 failed" + check_default_lmv $DIR/$tdir/l1/l2/l3 -1 1 1 0 + check_default_lmv $DIR2/$tdir/l1/l2/l3 -1 1 1 0 + + # below tests are valid only when this feature is enabled + local enabled=$(do_facet mds1 \ + $LCTL get_param -n mdt.*-MDT0000*.enable_dmv_implicit_inherit) + + (( enabled == 1 )) || return 0 + + # set l2 default LMV, dmv of l3 should change immediately + $LFS setdirstripe -D -i1 -c2 -X4 $DIR/$tdir/l1/l2 || + error "setdirstripe -D $tdir/l1/l2 failed" + + check_default_lmv $DIR/$tdir/l1/l2/l3 1 2 3 + check_default_lmv $DIR2/$tdir/l1/l2/l3 1 2 3 + + # change tdir default LMV, dmv of l3 should be unchanged because dmv + # of l2 is explicitly set + $LFS setdirstripe -D -i2 -c2 -X3 $DIR/$tdir || + error "setdirstripe -D $tdir failed" + + check_default_lmv $DIR/$tdir/l1 2 2 2 + check_default_lmv $DIR2/$tdir/l1 2 2 2 + check_default_lmv $DIR/$tdir/l1/l2/l3 1 2 3 + check_default_lmv $DIR2/$tdir/l1/l2/l3 1 2 3 +} + +test_114() { + (( MDSCOUNT >= 2 )) || + skip "We need at least 2 MDTs for this test" + + (( MDS1_VERSION >= $(version_code 2.15.55.45) )) || + skip "Need server version at least 2.15.54.45" + + test_dmv_imp_inherit + + # disable dmv_imp_inherit to simulate old client + local mdts=$(comma_list $(mdts_nodes)) + + do_nodes $mdts $LCTL set_param -n \ + mdt.*MDT*.enable_dmv_implicit_inherit=0 + test_dmv_imp_inherit + do_nodes $mdts $LCTL set_param -n \ + mdt.*MDT*.enable_dmv_implicit_inherit=1 +} +run_test 114 "implicit default LMV inherit" + +test_115() { + local td=$DIR/$tdir + + [ "$mds1_FSTYPE" == "ldiskfs" ] || skip_env "ldiskfs only test" + + mkdir_on_mdt0 $td || error "can't mkdir" + # turn it htree (don't really needed) + createmany -m $td/f 3000 || error "can't createmany" + + # here is an example of debugfs output for htree command: + # Entry #0: Hash 0x00000000, block 27 + # Reading directory block 27, phys 16760 + # 938 0x0016fb58-7f3d21f5 (32) f775 834 0x001db8c8-d31a4e0e (32) f671 + # 1085 0x0040cb70-4498abd4 (32) f922 1850 0x0066a1e6-f6f0dc69 (32) f1687 + # 2005 0x006c1a46-ef466058 (32) f1842 2025 0x007e64d4-8b28b734 (32) f1862 + # 642 0x008b53a0-77adc601 (32) f479 447 0x009ec152-af54eea3 (32) f284 + # 1740 0x00c38f56-ed310e61 (32) f1577 2165 0x00cdfd66-f429a93f (32) f2002 + # 930 0x00d7ada4-b80421c9 (32) f767 1946 0x00da6a7a-e8080600 (32) f1783 + # 273 0x00f8ea00-760bf97c (32) f110 1589 0x0103c4ee-94fad5dd (32) f1426 + # 1383 0x01193516-83120b48 (32) f1220 2379 0x01431e3c-e85b5bd9 (32) f2216 + # + # find couple names in a same htree block of the same size + mdt_dev=$(facet_device $SINGLEMDS) + de=( $(do_facet $SINGLEMDS "debugfs -c -R 'htree /ROOT/$tdir' $mdt_dev" | + awk '/Reading directory block/ { getline; print $4,$8; exit; }' )) + local de1=${de[0]} + local de2=${de[1]} + [[ $de1 == "" || $de2 == "" ]] && error "de1=$de1 de2=$de2" + echo "USE: $de1 $de2" + # release one mkdir will lookup + rm $DIR/$tdir/$de2 +#define OBD_FAIL_MDS_PAUSE_CREATE_AFTER_LOOKUP 0x2401 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80002401 fail_val=5 + mkdir $DIR/$tdir/$de2 & + sleep 0.3 + local PID1=$! + # recreate $de2 + mkdir $DIR2/$tdir/$de2 + # release space $de1 (should be enough to save $de2) + rm $DIR2/$tdir/$de1 + # ready to create a dup of $de2 + wait $PID1 + local found=$(ls $DIR/$tdir/|grep "^$de2\$"|wc -l) + (( $found == 1 )) || error "found $found" +} +run_test 115 "ldiskfs doesn't check direntry for uniqueness" + +test_116() { + (( $MDSCOUNT >= 2 )) || skip "needs >= 2 MDTs" + (( $MDS1_VERSION >= $(version_code 2.15.61) )) || + skip "Need MDS version at least 2.15.61 for intent mkdir" + + local mdt_idx + local save="$TMP/$TESTSUITE-$TESTNAME.parameters" + + save_lustre_params client "llite.*.intent_mkdir" > $save + stack_trap "restore_lustre_params < $save; rm -f $save" EXIT + $LCTL set_param llite.*.intent_mkdir=1 + + $LFS mkdir -c$MDSCOUNT -i0 $DIR/$tdir || + error "$LFS mkdir $DIR/$tdir failed" + echo "MD layout $DIR/$tdir:" + $LFS getdirstripe $DIR/$tdir + echo "mkdir $DIR/$tdir/tdir0" + mkdir $DIR/$tdir/tdir0 || error "mkdir tdir0 failed" + echo "setdirstripe -D -i1 $DIR2/$tdir/tdir0" + $LFS setdirstripe -D -i1 $DIR2/$tdir/tdir0 || + error "$LFS setdirstripe $DIR2/$tdir/tdir0 failed" + echo "mkdir $DIR/$tdir/tdir0/tdir11" + mkdir $DIR/$tdir/tdir0/tdir11 || error "mkdir tdir0/tdir11 failed" + $LFS getdirstripe $DIR/$tdir/tdir0 + $LFS getdirstripe $DIR/$tdir/tdir0/tdir11 + + mdt_idx=$($LFS getstripe -m $DIR/$tdir/tdir0/tdir11) + [ $mdt_idx == 1 ] || + error "$DIR/$tdir/tdir0/tdir11 on wrong MDT $mdt_idx" +} +run_test 116 "DNE: Set default LMV layout from a remote client" + log "cleanup: ======================================================" # kill and wait in each test only guarentee script finish, but command in script @@ -4420,7 +6748,7 @@ log "cleanup: ======================================================" [ "$(mount | grep $MOUNT2)" ] && wait_update $HOSTNAME "fuser -m $MOUNT2" "" || true -complete $SECONDS +complete_test $SECONDS rm -f $SAMPLE_FILE check_and_cleanup_lustre exit_status