ALWAYS_EXCEPT=" 42a 42b 42c 42d 45 68b $SANITY_EXCEPT"
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
-# with LOD/OSP landing
-# bug number for skipped tests: LU-2036 LU-8139 LU-8411
-ALWAYS_EXCEPT=" 76 101g 407 $ALWAYS_EXCEPT"
+# bug number for skipped tests: LU-2036 LU-8411 LU-9096
+ALWAYS_EXCEPT=" 76 407 253 $ALWAYS_EXCEPT"
is_sles11() # LU-4341
{
. ${CONFIG:=$LUSTRE/tests/cfg/${NAME}.sh}
init_logging
-# 5 12 (min)"
-[ "$SLOW" = "no" ] && EXCEPT_SLOW="24D 27m 64b 68 71 115 300o"
+# 5 12 (min)"
+[ "$SLOW" = "no" ] && EXCEPT_SLOW="27m 64b 68 71 115 300o"
if [ $(facet_fstype $SINGLEMDS) = "zfs" ]; then
- # bug number for skipped test: LU-4536 LU-1957
- ALWAYS_EXCEPT="$ALWAYS_EXCEPT 65ic 180"
+ # bug number for skipped test: LU-9514 LU-4536 LU-1957
+ ALWAYS_EXCEPT="$ALWAYS_EXCEPT 51f 65ic 180"
# 13 (min)"
[ "$SLOW" = "no" ] && EXCEPT_SLOW="$EXCEPT_SLOW 51b"
fi
local rc=0
test_mkdir -p $WDIR
- mdt_index=$($LFS getstripe -M $WDIR)
- mdt_index=$((mdt_index+1))
-
touch $WDIR/$tfile
+ mdt_index=$($LFS getstripe -M $WDIR/$tfile)
+ mdt_index=$((mdt_index+1))
+ cancel_lru_locks mdc
#fail mds will wait the failover finish then set
#following fail_loc to avoid interfer the recovery process.
fail mds${mdt_index}
}
simple_cleanup_common() {
+ local rc=0
trap 0
+ [ -z "$DIR" -o -z "$tdir" ] && return 0
+
+ local start=$SECONDS
rm -rf $DIR/$tdir
+ rc=$?
wait_delete_completed
+ echo "cleanup time $((SECONDS - start))"
+ return $rc
}
max_pages_per_rpc() {
- $LCTL get_param -n mdc.*.max_pages_per_rpc | head -n1
+ local mdtname="$(printf "MDT%04x" ${1:-0})"
+ $LCTL get_param -n mdc.*$mdtname*.max_pages_per_rpc
}
test_24v() {
- local NRFILES=100000
- local FREE_INODES=$(mdt_free_inodes 0)
- [[ $FREE_INODES -lt $NRFILES ]] &&
- skip "not enough free inodes $FREE_INODES required $NRFILES" &&
- return
-
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
- trap simple_cleanup_common EXIT
-
+ local nrfiles=${COUNT:-100000}
# Performance issue on ZFS see LU-4072 (c.f. LU-2887)
- [ $(facet_fstype $SINGLEMDS) = "zfs" ] && NRFILES=10000
+ [ $(facet_fstype $SINGLEMDS) = "zfs" ] && nrfiles=${COUNT:-10000}
- test_mkdir -p $DIR/$tdir
- createmany -m $DIR/$tdir/$tfile $NRFILES
+ local fname="$DIR/$tdir/$tfile"
+ test_mkdir "$(dirname $fname)"
+ # assume MDT0000 has the fewest inodes
+ local stripes=$($LFS getdirstripe -c $(dirname $fname))
+ local free_inodes=$(($(mdt_free_inodes 0) * stripes))
+ [[ $free_inodes -lt $nrfiles ]] && nrfiles=$free_inodes
+
+ trap simple_cleanup_common EXIT
+
+ createmany -m "$fname" $nrfiles
cancel_lru_locks mdc
lctl set_param mdc.*.stats clear
- ls $DIR/$tdir >/dev/null || error "error in listing large dir"
-
+ # was previously test_24D: LU-6101
+ # readdir() returns correct number of entries after cursor reload
+ local num_ls=$(ls $DIR/$tdir | wc -l)
+ local num_uniq=$(ls $DIR/$tdir | sort -u | wc -l)
+ local num_all=$(ls -a $DIR/$tdir | wc -l)
+ if [ $num_ls -ne $nrfiles -o $num_uniq -ne $nrfiles -o \
+ $num_all -ne $((nrfiles + 2)) ]; then
+ error "Expected $nrfiles files, got $num_ls " \
+ "($num_uniq unique $num_all .&..)"
+ fi
# LU-5 large readdir
- # DIRENT_SIZE = 32 bytes for sizeof(struct lu_dirent) +
- # 8 bytes for name(filename is mostly 5 in this test) +
- # 8 bytes for luda_type
+ # dirent_size = 32 bytes for sizeof(struct lu_dirent) +
+ # N bytes for name (len($nrfiles) rounded to 8 bytes) +
+ # 8 bytes for luda_type (4 bytes rounded to 8 bytes)
# take into account of overhead in lu_dirpage header and end mark in
- # each page, plus one in RPC_NUM calculation.
- DIRENT_SIZE=48
- RPC_SIZE=$(($(max_pages_per_rpc) * $(page_size)))
- RPC_NUM=$(((NRFILES * DIRENT_SIZE + RPC_SIZE - 1) / RPC_SIZE + 1))
- mds_readpage=$(lctl get_param mdc.*MDT0000*.stats |
- awk '/^mds_readpage/ {print $2}')
- [[ $mds_readpage -gt $RPC_NUM ]] &&
- error "large readdir doesn't take effect"
+ # each page, plus one in rpc_num calculation.
+ local dirent_size=$((32 + (${#tfile} | 7) + 1 + 8))
+ local page_entries=$((($(page_size) - 24) / dirent_size))
+ local mdt_idx=$($LFS getdirstripe -i $(dirname $fname))
+ local rpc_pages=$(max_pages_per_rpc $mdt_idx)
+ local rpc_max=$((nrfiles / (page_entries * rpc_pages) + stripes))
+ local mds_readpage=$(calc_stats mdc.*.stats mds_readpage)
+ echo "readpages: $mds_readpage rpc_max: $rpc_max"
+ (( $mds_readpage < $rpc_max - 2 || $mds_readpage > $rpc_max + 1)) &&
+ error "large readdir doesn't take effect: " \
+ "$mds_readpage should be about $rpc_max"
simple_cleanup_common
}
-run_test 24v "list directory with large files (handle hash collision, bug: 17560)"
+run_test 24v "list large directory (test hash collision, b=17560)"
test_24w() { # bug21506
SZ1=234852
rm -rf $DIR/$tdir
test_mkdir -p $DIR/$tdir
+ trap simple_cleanup_common EXIT
createmany -m $DIR/$tdir/$tfile $NFILES
local t=$(ls $DIR/$tdir | wc -l)
local u=$(ls $DIR/$tdir | sort -u | wc -l)
error "Expected $NFILES files, got $t ($u unique $v .&..)"
fi
- rm -rf $DIR/$tdir || error "Can not delete directories"
+ simple_cleanup_common || error "Can not delete directories"
}
run_test 24A "readdir() returns correct number of entries."
}
run_test 24C "check .. in striped dir"
-test_24D() { # LU-6101
- local NFILES=50000
-
- rm -rf $DIR/$tdir
- mkdir -p $DIR/$tdir
- createmany -m $DIR/$tdir/$tfile $NFILES
- local t=$(ls $DIR/$tdir | wc -l)
- local u=$(ls $DIR/$tdir | sort -u | wc -l)
- local v=$(ls -ai $DIR/$tdir | sort -u | wc -l)
- if [ $t -ne $NFILES -o $u -ne $NFILES -o $v -ne $((NFILES + 2)) ] ; then
- error "Expected $NFILES files, got $t ($u unique $v .&..)"
- fi
-
- rm -rf $DIR/$tdir || error "Can not delete directories"
-}
-run_test 24D "readdir() returns correct number of entries after cursor reload"
-
test_24E() {
[[ $MDSCOUNT -lt 4 ]] && skip "needs >= 4 MDTs" && return
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
#define OBD_FAIL_MDS_OSC_PRECREATE 0x139
do_nodes $list $LCTL set_param fail_loc=0x139
test_mkdir -p $DIR/$tdir
- rm -rf $DIR/$tdir/*
+ trap simple_cleanup_common EXIT
createmany -o $DIR/$tdir/t- 1000
do_nodes $list $LCTL set_param fail_loc=0
$GETSTRIPE $DIR/$tdir > $TLOG
OBJS=$(awk -vobj=0 '($1 == 0) { obj += 1 } END { print obj; }' $TLOG)
unlinkmany $DIR/$tdir/t- 1000
+ trap 0
[[ $OBJS -gt 0 ]] &&
error "$OBJS objects created on OST-0. See $TLOG" || pass
}
# update too, until that use mount/ll_decode_filter_fid/mount.
# Re-enable when debugfs will understand new filter_fid.
#
- if false && [ $(facet_fstype ost$ost) == ldiskfs ]; then
+ if [ $(facet_fstype ost$ost) == ldiskfs ]; then
ff=$(do_facet ost$ost "$DEBUGFS -c -R 'stat $obj_file' \
$dev 2>/dev/null" | grep "parent=")
- else
+ fi
+ if [ -z "$ff" ]; then
stop ost$ost
mount_fstype ost$ost
ff=$(do_facet ost$ost $LL_DECODE_FILTER_FID \
# /mnt/O/0/d23/23: objid=23 seq=0 parent=[0x200000400:0x1e:0x1]
# fid: objid=23 seq=0 parent=[0x200000400:0x1e:0x0] stripe=1
- local ff_parent=$(echo $ff|sed -e 's/.*parent=.//')
- local ff_pseq=$(echo $ff_parent | cut -d: -f1)
- local ff_poid=$(echo $ff_parent | cut -d: -f2)
+ #
+ # fid: parent=[0x200000400:0x1e:0x0] stripe=1 stripe_count=2 \
+ # stripe_size=1048576 component_id=1 component_start=0 \
+ # component_end=33554432
+ local ff_parent=$(sed -e 's/.*parent=.//' <<<$ff)
+ local ff_pseq=$(cut -d: -f1 <<<$ff_parent)
+ local ff_poid=$(cut -d: -f2 <<<$ff_parent)
local ff_pstripe
- if echo $ff_parent | grep -q 'stripe='; then
- ff_pstripe=$(echo $ff_parent | sed -e 's/.*stripe=//')
+ if grep -q 'stripe=' <<<$ff; then
+ ff_pstripe=$(sed -e 's/.*stripe=//' -e 's/ .*//' <<<$ff)
else
- #
# $LL_DECODE_FILTER_FID does not print "stripe="; look
- # into f_ver in this case. See the comment on
- # ff_parent.
- #
- ff_pstripe=$(echo $ff_parent | cut -d: -f3 |
- sed -e 's/\]//')
+ # into f_ver in this case. See comment on ff_parent.
+ ff_pstripe=$(cut -d: -f3 <<<$ff_parent | sed -e 's/]//')
fi
- # compare lmm_seq and filter_fid->ff_parent.f_seq
- [ $ff_pseq = $lmm_seq ] ||
- error "FF parent SEQ $ff_pseq != $lmm_seq"
- # compare lmm_object_id and filter_fid->ff_parent.f_oid
- [ $ff_poid = $lmm_oid ] ||
- error "FF parent OID $ff_poid != $lmm_oid"
+ if grep -q 'stripe_count=' <<<$ff; then
+ local ff_scnt=$(sed -e 's/.*stripe_count=//' \
+ -e 's/ .*//' <<<$ff)
+ [ $lmm_count = $ff_scnt ] ||
+ error "FF stripe count $lmm_count != $ff_scnt"
+ fi
+ # compare lmm_seq and filter_fid->ff_parent.f_seq
+ [ $ff_pseq = $lmm_seq ] ||
+ error "FF parent SEQ $ff_pseq != $lmm_seq"
+ # compare lmm_object_id and filter_fid->ff_parent.f_oid
+ [ $ff_poid = $lmm_oid ] ||
+ error "FF parent OID $ff_poid != $lmm_oid"
(($ff_pstripe == $stripe_nr)) ||
- error "FF stripe $ff_pstripe != $stripe_nr"
+ error "FF stripe $ff_pstripe != $stripe_nr"
- stripe_nr=$((stripe_nr + 1))
- done
+ stripe_nr=$((stripe_nr + 1))
+ done
}
test_27z() {
pool_add_targets $POOL $ost_range || error "pool_add_targets failed"
local skip27D
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] &&
- skip27D = "-s 29"
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.55) ] &&
+ skip27D += "-s 29"
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.9.55) ] &&
+ skip27D += "-s 30,31"
llapi_layout_test -d$DIR/$tdir -p$POOL -o$OSTCOUNT $skip27D ||
error "llapi_layout_test failed"
- cleanup_pools || error "cleanup_pools failed"
+ destroy_test_pools || error "destroy test pools failed"
}
run_test 27D "validate llapi_layout API"
run_test 27E "check that default extended attribute size properly increases"
test_27F() { # LU-5346/LU-7975
-
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
-
[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.51) ]] &&
skip "Need MDS version at least 2.8.51" && return
+ remote_ost_nodsh && skip "remote OST with nodsh" && return
test_mkdir -p $DIR/$tdir
rm -f $DIR/$tdir/f0
error "$BEFOREWRITES < $AFTERWRITES"
start_writeback
}
-run_test 42a "ensure that we don't flush on close =============="
+run_test 42a "ensure that we don't flush on close"
test_42b() {
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
# give multiop a chance to open
sleep 1
- $DIR/$tdir/$tfile && error || true
+ $DIR/$tdir/$tfile && error "execute $DIR/$tdir/$tfile succeeded" || true
kill -USR1 $pid
}
run_test 43A "execution of file opened for write should return -ETXTBSY"
test_43a() {
- [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
- test_mkdir -p $DIR/$tdir
- cp -p `which $MULTIOP` $DIR/$tdir/multiop ||
- cp -p multiop $DIR/$tdir/multiop
+ test_mkdir $DIR/$tdir
+ cp -p $(which $MULTIOP) $DIR/$tdir/multiop ||
+ cp -p multiop $DIR/$tdir/multiop
MULTIOP_PROG=$DIR/$tdir/multiop multiop_bg_pause $TMP/$tfile.junk O_c ||
- return 1
- MULTIOP_PID=$!
- $MULTIOP $DIR/$tdir/multiop Oc && error "expected error, got success"
- kill -USR1 $MULTIOP_PID || return 2
- wait $MULTIOP_PID || return 3
- rm $TMP/$tfile.junk $DIR/$tdir/multiop
+ error "multiop open $TMP/$tfile.junk failed"
+ MULTIOP_PID=$!
+ $MULTIOP $DIR/$tdir/multiop Oc && error "expected error, got success"
+ kill -USR1 $MULTIOP_PID || error "kill -USR1 PID $MULTIOP_PID failed"
+ wait $MULTIOP_PID || error "wait PID $MULTIOP_PID failed"
}
run_test 43a "open(RDWR) of file being executed should return -ETXTBSY"
test_43b() {
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
- test_mkdir -p $DIR/$tdir
- cp -p `which $MULTIOP` $DIR/$tdir/multiop ||
- cp -p multiop $DIR/$tdir/multiop
+ test_mkdir $DIR/$tdir
+ cp -p $(which $MULTIOP) $DIR/$tdir/multiop ||
+ cp -p multiop $DIR/$tdir/multiop
MULTIOP_PROG=$DIR/$tdir/multiop multiop_bg_pause $TMP/$tfile.junk O_c ||
- return 1
- MULTIOP_PID=$!
- $TRUNCATE $DIR/$tdir/multiop 0 && error "expected error, got success"
- kill -USR1 $MULTIOP_PID || return 2
- wait $MULTIOP_PID || return 3
- rm $TMP/$tfile.junk $DIR/$tdir/multiop
+ error "multiop open $TMP/$tfile.junk failed"
+ MULTIOP_PID=$!
+ $TRUNCATE $DIR/$tdir/multiop 0 && error "expected error, got success"
+ kill -USR1 $MULTIOP_PID || error "kill -USR1 PID $MULTIOP_PID failed"
+ wait $MULTIOP_PID || error "wait PID $MULTIOP_PID failed"
}
run_test 43b "truncate of file being executed should return -ETXTBSY"
test_43c() {
local testdir="$DIR/$tdir"
- test_mkdir -p $DIR/$tdir
+ test_mkdir $testdir
cp $SHELL $testdir/
- ( cd $(dirname $SHELL) && md5sum $(basename $SHELL) ) | \
- ( cd $testdir && md5sum -c)
+ ( cd $(dirname $SHELL) && md5sum $(basename $SHELL) ) |
+ ( cd $testdir && md5sum -c )
}
-run_test 43c "md5sum of copy into lustre========================"
+run_test 43c "md5sum of copy into lustre"
test_44A() { # was test_44
[[ $OSTCOUNT -lt 2 ]] && skip_env "skipping 2-stripe test" && return
dd if=/dev/zero of=$DIR/f1 bs=4k count=1 seek=1023
dd if=$DIR/f1 bs=4k count=1 > /dev/null
}
-run_test 44A "zero length read from a sparse stripe ============="
+run_test 44A "zero length read from a sparse stripe"
test_44a() {
local nstripe=$($LCTL lov_getconfig $DIR | grep default_stripe_count: |
}
run_test 51e "check file nlink limit"
+test_51f() {
+ test_mkdir $DIR/$tdir
+
+ local max=100000
+ local ulimit_old=$(ulimit -n)
+ local spare=20 # number of spare fd's for scripts/libraries, etc.
+ local mdt=$(lfs getstripe -M $DIR/$tdir)
+ local numfree=$(lfs df -i $DIR/$tdir | awk '/MDT:'$mdt'/ { print $4 }')
+
+ echo "MDT$mdt numfree=$numfree, max=$max"
+ [[ $numfree -gt $max ]] && numfree=$max || numfree=$((numfree * 7 / 8))
+ if [ $((numfree + spare)) -gt $ulimit_old ]; then
+ while ! ulimit -n $((numfree + spare)); do
+ numfree=$((numfree * 3 / 4))
+ done
+ echo "changed ulimit from $ulimit_old to $((numfree + spare))"
+ else
+ echo "left ulimit at $ulimit_old"
+ fi
+
+ createmany -o -k -t 120 $DIR/$tdir/f $numfree ||
+ error "create+open $numfree files in $DIR/$tdir failed"
+ ulimit -n $ulimit_old
+
+ # if createmany exits at 120s there will be fewer than $numfree files
+ unlinkmany $DIR/$tdir/f $numfree || true
+}
+run_test 51f "check many open files limit"
+
test_52a() {
[ -f $DIR/$tdir/foo ] && chattr -a $DIR/$tdir/foo
test_mkdir -p $DIR/$tdir
run_test 53 "verify that MDS and OSTs agree on pre-creation ===="
test_54a() {
+ perl -MSocket -e ';' || { skip "no Socket perl module installed" && return; }
+
$SOCKETSERVER $DIR/socket ||
error "$SOCKETSERVER $DIR/socket failed: $?"
$SOCKETCLIENT $DIR/socket ||
}
run_test 56b "check $LFS getdirstripe"
+test_56c() {
+ local ost_idx=0
+ local ost_name=$(ostname_from_index $ost_idx)
+
+ local old_status=$(ost_dev_status $ost_idx)
+ [[ -z "$old_status" ]] ||
+ { skip_env "OST $ost_name is in $old_status status"; return 0; }
+
+ do_facet ost1 $LCTL set_param -n obdfilter.$ost_name.degraded=1
+ sleep_maxage
+
+ local new_status=$(ost_dev_status $ost_idx)
+ [[ "$new_status" = "D" ]] ||
+ error "OST $ost_name is in status of '$new_status', not 'D'"
+
+ do_facet ost1 $LCTL set_param -n obdfilter.$ost_name.degraded=0
+ sleep_maxage
+
+ new_status=$(ost_dev_status $ost_idx)
+ [[ -z "$new_status" ]] ||
+ error "OST $ost_name is in status of '$new_status', not ''"
+}
+run_test 56c "check 'lfs df' showing device status"
+
NUMFILES=3
NUMDIRS=3
setup_56() {
test_56s() { # LU-611
TDIR=$DIR/${tdir}s
- setup_56 $NUMFILES $NUMDIRS "-c $OSTCOUNT"
+ #LU-9369
+ setup_56 0 $NUMDIRS
+ for i in $(seq 1 $NUMDIRS); do
+ $SETSTRIPE -c $((OSTCOUNT + 1)) $TDIR/dir$i/$tfile
+ done
+ EXPECTED=$NUMDIRS
+ CMD="$LFIND -c $OSTCOUNT $TDIR"
+ NUMS=$($CMD | wc -l)
+ [ $NUMS -eq $EXPECTED ] || {
+ $GETSTRIPE -R $TDIR
+ error "\"$CMD\" wrong: found $NUMS, expected $EXPECTED"
+ }
+ rm -rf $TDIR
+
+ setup_56 $NUMFILES $NUMDIRS "-c $OSTCOUNT"
if [[ $OSTCOUNT -gt 1 ]]; then
$SETSTRIPE -c 1 $TDIR/$tfile.{0,1,2,3}
ONESTRIPE=4
test_56t() { # LU-611
TDIR=$DIR/${tdir}t
+
+ #LU-9369
+ setup_56 0 $NUMDIRS
+ for i in $(seq 1 $NUMDIRS); do
+ $SETSTRIPE -S 4M $TDIR/dir$i/$tfile
+ done
+ EXPECTED=$NUMDIRS
+ CMD="$LFIND -S 4M $TDIR"
+ NUMS=$($CMD | wc -l)
+ [ $NUMS -eq $EXPECTED ] || {
+ $GETSTRIPE -R $TDIR
+ error "\"$CMD\" wrong: found $NUMS, expected $EXPECTED"
+ }
+ rm -rf $TDIR
+
setup_56 $NUMFILES $NUMDIRS "--stripe-size 512k"
$SETSTRIPE -S 256k $TDIR/$tfile.{0,1,2,3}
start mgs $(mgsdevname) $MGS_MOUNT_OPTS || error "start mgs failed"
$pass || error "test failed, see FAILED test_60a messages for specifics"
}
-run_test 60a "llog_test run from kernel module and test llog_reader =========="
+run_test 60a "llog_test run from kernel module and test llog_reader"
+
+test_60aa() {
+ # test old logid format
+ if [ $(lustre_version_code mgs) -le $(version_code 3.1.53) ]; then
+ do_facet mgs $LCTL dl | grep MGS
+ do_facet mgs "$LCTL --device %MGS llog_print \\\\\\\$$FSNAME-client" ||
+ error "old llog_print failed"
+ fi
+
+ # test new logid format
+ if [ $(lustre_version_code mgs) -ge $(version_code 2.9.53) ]; then
+ do_facet mgs "$LCTL --device MGS llog_print $FSNAME-client" ||
+ error "new llog_print failed"
+ fi
+}
+run_test 60aa "llog_print works with FIDs and simple names"
test_60b() { # bug 6411
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
}
run_test 77b "checksum error on client write, read"
+cleanup_77c() {
+ trap 0
+ set_checksums 0
+ $LCTL set_param osc.*osc-[^mM]*.checksum_dump=0
+ $check_ost &&
+ do_facet ost1 $LCTL set_param obdfilter.*-OST*.checksum_dump=0
+ [ -n $osc_file_prefix ] && rm -f ${osc_file_prefix}*
+ $check_ost && [ -n $ost_file_prefix ] &&
+ do_facet ost1 rm -f ${ost_file_prefix}\*
+}
+
+test_77c() {
+ [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
+ $GSS && skip "could not run with gss" && return
+
+ local bad1
+ local osc_file_prefix
+ local osc_file
+ local check_ost=false
+ local ost_file_prefix
+ local ost_file
+ local orig_cksum
+ local dump_cksum
+ local fid
+
+ # ensure corruption will occur on first OSS/OST
+ $LFS setstripe -i 0 $DIR/$tfile
+
+ [ ! -f $F77_TMP ] && setup_f77
+ dd if=$F77_TMP of=$DIR/$tfile bs=1M count=$F77SZ conv=sync ||
+ error "dd write error: $?"
+ fid=$($LFS path2fid $DIR/$tfile)
+
+ if [ $(lustre_version_code ost1) -ge $(version_code 2.5.42.6) ]
+ then
+ check_ost=true
+ ost_file_prefix=$(do_facet ost1 $LCTL get_param -n debug_path)
+ ost_file_prefix=${ost_file_prefix}-checksum_dump-ost-\\${fid}
+ else
+ echo "OSS do not support bulk pages dump upon error"
+ fi
+
+ osc_file_prefix=$($LCTL get_param -n debug_path)
+ osc_file_prefix=${osc_file_prefix}-checksum_dump-osc-\\${fid}
+
+ trap cleanup_77c EXIT
+
+ set_checksums 1
+ # enable bulk pages dump upon error on Client
+ $LCTL set_param osc.*osc-[^mM]*.checksum_dump=1
+ # enable bulk pages dump upon error on OSS
+ $check_ost &&
+ do_facet ost1 $LCTL set_param obdfilter.*-OST*.checksum_dump=1
+
+ # flush Client cache to allow next read to reach OSS
+ cancel_lru_locks osc
+
+ #define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408
+ $LCTL set_param fail_loc=0x80000408
+ dd if=$DIR/$tfile of=/dev/null bs=1M || error "dd read error: $?"
+ $LCTL set_param fail_loc=0
+
+ rm -f $DIR/$tfile
+
+ # check cksum dump on Client
+ osc_file=$(ls ${osc_file_prefix}*)
+ [ -n "$osc_file" ] || error "no checksum dump file on Client"
+ # OBD_FAIL_OSC_CHECKSUM_RECEIVE corrupts with "bad1" at start of file
+ bad1=$(dd if=$osc_file bs=1 count=4 2>/dev/null) || error "dd error: $?"
+ [ $bad1 == "bad1" ] || error "unexpected corrupt pattern"
+ orig_cksum=$(dd if=$F77_TMP bs=1 skip=4 count=1048572 2>/dev/null |
+ cksum)
+ dump_cksum=$(dd if=$osc_file bs=1 skip=4 2>/dev/null | cksum)
+ [[ "$orig_cksum" == "$dump_cksum" ]] ||
+ error "dump content does not match on Client"
+
+ $check_ost || skip "No need to check cksum dump on OSS"
+
+ # check cksum dump on OSS
+ ost_file=$(do_facet ost1 ls ${ost_file_prefix}\*)
+ [ -n "$ost_file" ] || error "no checksum dump file on OSS"
+ orig_cksum=$(dd if=$F77_TMP bs=1048576 count=1 2>/dev/null | cksum)
+ dump_cksum=$(do_facet ost1 dd if=$ost_file 2>/dev/null \| cksum)
+ [[ "$orig_cksum" == "$dump_cksum" ]] ||
+ error "dump content does not match on OSS"
+
+ cleanup_77c
+}
+run_test 77c "checksum error on client read with debug"
+
test_77d() { # bug 10889
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
$GSS && skip "could not run with gss" && return
}
run_test 77g "checksum error on OST write, read"
-test_77i() { # bug 13805
- [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
- $GSS && skip "could not run with gss" && return
- #define OBD_FAIL_OSC_CONNECT_CKSUM 0x40b
- lctl set_param fail_loc=0x40b
- remount_client $MOUNT
- lctl set_param fail_loc=0
- for VALUE in `lctl get_param osc.*osc-[^mM]*.checksum_type`; do
- PARAM=`echo ${VALUE[0]} | cut -d "=" -f1`
- algo=`lctl get_param -n $PARAM | sed 's/.*\[\(.*\)\].*/\1/g'`
- [ "$algo" = "adler" ] || error "algo set to $algo instead of adler"
- done
- remount_client $MOUNT
-}
-run_test 77i "client not supporting OSD_CONNECT_CKSUM"
-
test_77j() { # bug 13805
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
$GSS && skip "could not run with gss" && return
lctl set_param fail_loc=0x40c
remount_client $MOUNT
lctl set_param fail_loc=0
- sleep 2 # wait async osc connect to finish
- for VALUE in `lctl get_param osc.*osc-[^mM]*.checksum_type`; do
- PARAM=`echo ${VALUE[0]} | cut -d "=" -f1`
- algo=`lctl get_param -n $PARAM | sed 's/.*\[\(.*\)\].*/\1/g'`
+ # wait async osc connect to finish and reflect updated state value
+ local i
+ for (( i=0; i < OSTCOUNT; i++ )) ; do
+ wait_osc_import_state client ost$((i+1)) FULL
+ done
+
+ for VALUE in $(lctl get_param osc.*osc-[^mM]*.checksum_type); do
+ PARAM=$(echo ${VALUE[0]} | cut -d "=" -f1)
+ algo=$(lctl get_param -n $PARAM | sed 's/.*\[\(.*\)\].*/\1/g')
[ "$algo" = "adler" ] || error "algo set to $algo instead of adler"
done
remount_client $MOUNT
}
run_test 101f "check mmap read performance"
+test_101g_brw_size_test() {
+ local mb=$1
+ local pages=$((mb * 1048576 / $(page_size)))
+
+ $LCTL set_param osc.*.max_pages_per_rpc=${mb}M ||
+ { error "unable to set max_pages_per_rpc=${mb}M"; return 1; }
+ for mp in $($LCTL get_param -n osc.*.max_pages_per_rpc); do
+ [ $mp -ne $pages ] && error "max_pages_per_rpc $mp != $pages" &&
+ return 2
+ done
+
+ $LCTL set_param -n osc.*.rpc_stats=0
+
+ # 10 RPCs should be enough for the test
+ local count=10
+ dd if=/dev/zero of=$DIR/$tfile bs=${mb}M count=$count ||
+ { error "dd write ${mb} MB blocks failed"; return 3; }
+ cancel_lru_locks osc
+ dd of=/dev/null if=$DIR/$tfile bs=${mb}M count=$count ||
+ { error "dd write ${mb} MB blocks failed"; return 4; }
+
+ # calculate number of full-sized read and write RPCs
+ rpcs=($($LCTL get_param -n 'osc.*.rpc_stats' |
+ sed -n '/pages per rpc/,/^$/p' |
+ awk '/'$pages':/ { reads += $2; writes += $5 };' \
+ 'END { print reads,writes }'))
+ [ ${rpcs[0]} -ne $count ] && error "${rpcs[0]} != $count read RPCs" &&
+ return 5
+ [ ${rpcs[1]} -ne $count ] && error "${rpcs[1]} != $count write RPCs" &&
+ return 6
+
+ return 0
+}
+
test_101g() {
local rpcs
local osts=$(get_facets OST)
local list=$(comma_list $(osts_nodes))
local p="$TMP/$TESTSUITE-$TESTNAME.parameters"
+ local brw_size="obdfilter.*.brw_size"
- save_lustre_params $osts "obdfilter.*.brw_size" > $p
-
- $LFS setstripe -c 1 $DIR/$tfile
+ $LFS setstripe -i 0 -c 1 $DIR/$tfile
+ local orig_mb=$(do_facet ost1 $LCTL get_param -n $brw_size | head -n 1)
if [ $(lustre_version_code ost1) -ge $(version_code 2.8.52) ]; then
- set_osd_param $list '' brw_size 16M
-
- echo "remount client to enable large RPC size"
- remount_client $MOUNT || error "remount_client failed"
-
- for mp in $($LCTL get_param -n osc.*.max_pages_per_rpc); do
- [ "$mp" -eq 4096 ] ||
- error "max_pages_per_rpc not correctly set"
- done
-
- $LCTL set_param -n osc.*.rpc_stats=0
-
- # 10*16 MiB should be enough for the test
- dd if=/dev/zero of=$DIR/$tfile bs=16M count=10
- cancel_lru_locks osc
- dd of=/dev/null if=$DIR/$tfile bs=16M count=10
+ [ $(lustre_version_code ost1) -ge $(version_code 2.9.52) ] &&
+ suffix="M"
+ if [[ $orig_mb < 16 ]]; then
+ save_lustre_params $osts "$brw_size" > $p
+ do_nodes $list $LCTL set_param -n $brw_size=16$suffix ||
+ error "set 16MB RPC size failed"
+
+ echo "remount client to enable new RPC size"
+ remount_client $MOUNT || error "remount_client failed"
+ fi
- # calculate 16 MiB RPCs
- rpcs=$($LCTL get_param 'osc.*.rpc_stats' |
- sed -n '/pages per rpc/,/^$/p' |
- awk 'BEGIN { sum = 0 }; /4096:/ { sum += $2 };
- END { print sum }')
- echo $rpcs RPCs
- [ "$rpcs" -eq 10 ] || error "not all RPCs are 16 MiB BRW rpcs"
+ test_101g_brw_size_test 16 || error "16MB RPC test failed"
+ # should be able to set brw_size=12, but no rpc_stats for that
+ test_101g_brw_size_test 8 || error "8MB RPC test failed"
fi
- echo "set RPC size to 4MB"
-
- $LCTL set_param -n osc.*.max_pages_per_rpc=4M osc.*.rpc_stats=0
- dd if=/dev/zero of=$DIR/$tfile bs=4M count=25
- cancel_lru_locks osc
- dd of=/dev/null if=$DIR/$tfile bs=4M count=25
+ test_101g_brw_size_test 4 || error "4MB RPC test failed"
- # calculate 4 MiB RPCs
- rpcs=$($LCTL get_param 'osc.*.rpc_stats' |
- sed -n '/pages per rpc/,/^$/p' |
- awk 'BEGIN { sum = 0 }; /1024:/ { sum += $2 };
- END { print sum }')
- echo $rpcs RPCs
- [ "$rpcs" -eq 25 ] || error "not all RPCs are 4 MiB BRW rpcs"
-
- restore_lustre_params < $p
- remount_client $MOUNT || error "remount_client failed"
+ if [[ $orig_mb < 16 ]]; then
+ restore_lustre_params < $p
+ remount_client $MOUNT || error "remount_client restore failed"
+ fi
rm -f $p $DIR/$tfile
}
}
run_test 110 "filename length checking"
+#
+# Purpose: To verify dynamic thread (OSS) creation.
+#
test_115() {
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
- OSTIO_pre=$(ps -e | grep ll_ost_io | awk '{ print $4 }'| sort -n |
- tail -1 | cut -c11-20)
- [ -z "$OSTIO_pre" ] && skip "no OSS threads" && return
+ remote_ost_nodsh && skip "remote OST with nodsh" && return
+
+ # Lustre does not stop service threads once they are started.
+ # Reset number of running threads to default.
+ stopall
+ setupall
+
+ local OSTIO_pre
+ local save_params="$TMP/sanity-$TESTNAME.parameters"
+
+ # Get ll_ost_io count before I/O
+ OSTIO_pre=$(do_facet ost1 \
+ "$LCTL get_param ost.OSS.ost_io.threads_started | cut -d= -f2")
+ # Exit if lustre is not running (ll_ost_io not running).
+ [ -z "$OSTIO_pre" ] && error "no OSS threads"
+
echo "Starting with $OSTIO_pre threads"
+ local thread_max=$((OSTIO_pre * 2))
+ local rpc_in_flight=$((thread_max * 2))
+ # Number of I/O Process proposed to be started.
+ local nfiles
+ local facets=$(get_facets OST)
- NUMTEST=20000
- NUMFREE=$(df -i -P $DIR | tail -n 1 | awk '{ print $4 }')
- [[ $NUMFREE -lt $NUMTEST ]] && NUMTEST=$(($NUMFREE - 1000))
- echo "$NUMTEST creates/unlinks"
- test_mkdir -p $DIR/$tdir
- createmany -o $DIR/$tdir/$tfile $NUMTEST
- unlinkmany $DIR/$tdir/$tfile $NUMTEST
+ save_lustre_params client \
+ "osc.*OST*.max_rpcs_in_flight" > $save_params
+ save_lustre_params $facets \
+ "ost.OSS.ost_io.threads_max" >> $save_params
+
+ # Set in_flight to $rpc_in_flight
+ $LCTL set_param osc.*OST*.max_rpcs_in_flight=$rpc_in_flight ||
+ error "Failed to set max_rpcs_in_flight to $rpc_in_flight"
+ nfiles=${rpc_in_flight}
+ # Set ost thread_max to $thread_max
+ do_facet ost1 \
+ "$LCTL set_param ost.OSS.ost_io.threads_max=$thread_max"
+
+ # 5 Minutes should be sufficient for max number of OSS
+ # threads(thread_max) to be created.
+ local timeout=300
+
+ # Start I/O.
+ local WTL=${WTL:-"$LUSTRE/tests/write_time_limit"}
+ mkdir -p $DIR/$tdir
+ for i in $(seq $nfiles); do
+ local file=$DIR/$tdir/${tfile}-$i
+ $LFS setstripe -c -1 -i 0 $file
+ ($WTL $file $timeout)&
+ done
- OSTIO_post=$(ps -e | grep ll_ost_io | awk '{ print $4 }' | sort -n |
- tail -1 | cut -c11-20)
+ # I/O Started - Wait for thread_started to reach thread_max or report
+ # error if thread_started is more than thread_max.
+ echo "Waiting for thread_started to reach thread_max"
+ local thread_started=0
+ local end_time=$((SECONDS + timeout))
+
+ while [ $SECONDS -le $end_time ] ; do
+ echo -n "."
+ # Get ost i/o thread_started count.
+ thread_started=$(do_facet ost1 \
+ "$LCTL get_param \
+ ost.OSS.ost_io.threads_started | cut -d= -f2")
+ # Break out if thread_started is equal/greater than thread_max
+ if [[ $thread_started -ge $thread_max ]]; then
+ echo ll_ost_io thread_started $thread_started, \
+ equal/greater than thread_max $thread_max
+ break
+ fi
+ sleep 1
+ done
- # don't return an error
- [ $OSTIO_post == $OSTIO_pre ] && echo \
- "WARNING: No new ll_ost_io threads were created ($OSTIO_pre)" &&
- echo "This may be fine, depending on what ran before this test" &&
- echo "and how fast this system is." && return
+ # Cleanup - We have the numbers, Kill i/o jobs if running.
+ jobcount=($(jobs -p))
+ for i in $(seq 0 $((${#jobcount[@]}-1)))
+ do
+ kill -9 ${jobcount[$i]}
+ if [ $? -ne 0 ] ; then
+ echo Warning: \
+ Failed to Kill \'WTL\(I/O\)\' with pid ${jobcount[$i]}
+ fi
+ done
- echo "Started with $OSTIO_pre threads, ended with $OSTIO_post"
+ # Cleanup files left by WTL binary.
+ for i in $(seq $nfiles); do
+ local file=$DIR/$tdir/${tfile}-$i
+ rm -rf $file
+ if [ $? -ne 0 ] ; then
+ echo "Warning: Failed to delete file $file"
+ fi
+ done
+
+ restore_lustre_params <$save_params
+ rm -f $save_params || echo "Warning: delete file '$save_params' failed"
+
+ # Error out if no new thread has started or Thread started is greater
+ # than thread max.
+ if [[ $thread_started -le $OSTIO_pre ||
+ $thread_started -gt $thread_max ]]; then
+ error "ll_ost_io: thread_started $thread_started" \
+ "OSTIO_pre $OSTIO_pre, thread_max $thread_max." \
+ "No new thread started or thread started greater " \
+ "than thread_max."
+ fi
}
run_test 115 "verify dynamic thread creation===================="
local canondev
local node
- local LDPROC=/proc/fs/ldiskfs
+ local ldproc=/proc/fs/ldiskfs
local facets=$(get_facets MDS)
for facet in ${facets//,/ }; do
canondev=$(ldiskfs_canon \
*.$(convert_facet2label $facet).mntdev $facet)
- do_facet $facet "test -e $LDPROC/$canondev/max_dir_size" ||
- LDPROC=/sys/fs/ldiskfs
- do_facet $facet "echo $1 >$LDPROC/$canondev/max_dir_size"
- do_facet $facet "test -e $LDPROC/$canondev/warning_dir_size" ||
- LDPROC=/sys/fs/ldiskfs
- do_facet $facet "echo $2 >$LDPROC/$canondev/warning_dir_size"
+ do_facet $facet "test -e $ldproc/$canondev/max_dir_size" ||
+ ldproc=/sys/fs/ldiskfs
+ do_facet $facet "echo $1 >$ldproc/$canondev/max_dir_size"
+ do_facet $facet "echo $2 >$ldproc/$canondev/warning_dir_size"
done
}
}
test_129() {
+ [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.56) ]] ||
+ { skip "Need MDS version with at least 2.5.56"; return 0; }
+
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
if [ "$(facet_fstype $SINGLEMDS)" != ldiskfs ]; then
skip "Only applicable to ldiskfs-based MDTs"
remote_mds_nodsh && skip "remote MDS with nodsh" && return
local ENOSPC=28
local EFBIG=27
- local has_warning=0
+ local has_warning=false
rm -rf $DIR/$tdir
mkdir -p $DIR/$tdir
# block size of mds1
- local MDT_DEV=$(mdsdevname ${SINGLEMDS//mds/})
- local MDSBLOCKSIZE=$($LCTL get_param -n mdc.*MDT0000*.blocksize)
- local MAX=$((MDSBLOCKSIZE * 5))
- set_dir_limits $MAX $MAX
- local I=$(stat -c%s "$DIR/$tdir")
- local J=0
- while [[ $I -le $MAX ]]; do
- $MULTIOP $DIR/$tdir/$J Oc
+ local maxsize=$(($($LCTL get_param -n mdc.*MDT0000*.blocksize) * 5))
+ set_dir_limits $maxsize $maxsize
+ local dirsize=$(stat -c%s "$DIR/$tdir")
+ local nfiles=0
+ while [[ $dirsize -le $maxsize ]]; do
+ $MULTIOP $DIR/$tdir/file_base_$nfiles Oc
rc=$?
- if [ $has_warning -eq 0 ]; then
- check_mds_dmesg '"is approaching"' &&
- has_warning=1
+ if ! $has_warning; then
+ check_mds_dmesg '"is approaching"' && has_warning=true
fi
- #check two errors ENOSPC for new version of ext4 max_dir_size patch
- #mainline kernel commit df981d03eeff7971ac7e6ff37000bfa702327ef1
- #and EFBIG for previous versions
+ # check two errors:
+ # ENOSPC for new ext4 max_dir_size (kernel commit df981d03ee)
+ # EFBIG for previous versions included in ldiskfs series
if [ $rc -eq $EFBIG -o $rc -eq $ENOSPC ]; then
set_dir_limits 0 0
echo "return code $rc received as expected"
- createmany -o $DIR/$tdir/$J_file_ 1000 ||
+ createmany -o $DIR/$tdir/file_extra_$nfiles. 5 ||
error_exit "create failed w/o dir size limit"
check_mds_dmesg '"has reached"' ||
- error_exit "has reached message should be output"
+ error_exit "reached message should be output"
[ $has_warning -eq 0 ] &&
error_exit "warning message should be output"
- I=$(stat -c%s "$DIR/$tdir")
+ dirsize=$(stat -c%s "$DIR/$tdir")
- if [ $(lustre_version_code $SINGLEMDS) -lt \
- $(version_code 2.4.51) ]
- then
- [[ $I -eq $MAX ]] && return 0
- else
- [[ $I -gt $MAX ]] && return 0
- fi
- error_exit "current dir size $I, previous limit $MAX"
+ [[ $dirsize -ge $maxsize ]] && return 0
+ error_exit "current dir size $dirsize, " \
+ "previous limit $maxsize"
elif [ $rc -ne 0 ]; then
set_dir_limits 0 0
- error_exit "return code $rc received instead of expected " \
- "$EFBIG or $ENOSPC, files in dir $I"
+ error_exit "return $rc received instead of expected " \
+ "$EFBIG or $ENOSPC, files in dir $dirsize"
fi
- J=$((J+1))
- I=$(stat -c%s "$DIR/$tdir")
+ nfiles=$((nfiles + 1))
+ dirsize=$(stat -c%s "$DIR/$tdir")
done
set_dir_limits 0 0
- error "exceeded dir size limit $MAX($MDSCOUNT) : $I bytes"
+ error "exceeded dir size limit $maxsize($MDSCOUNT) : $dirsize bytes"
}
run_test 129 "test directory size limit ========================"
run_test 131e "test read hitting hole"
check_stats() {
+ local facet=$1
+ local op=$2
+ local want=${3:-0}
local res
- local count
- case $1 in
- $SINGLEMDS) res=`do_facet $SINGLEMDS $LCTL get_param mdt.$FSNAME-MDT0000.md_stats | grep "$2"`
+
+ case $facet in
+ mds*) res=$(do_facet $facet \
+ $LCTL get_param mdt.$FSNAME-MDT0000.md_stats | grep "$op")
;;
- ost) res=`do_facet ost1 $LCTL get_param obdfilter.$FSNAME-OST0000.stats | grep "$2"`
+ ost*) res=$(do_facet $facet \
+ $LCTL get_param obdfilter.$FSNAME-OST0000.stats | grep "$op")
;;
- *) error "Wrong argument $1" ;;
+ *) error "Wrong facet '$facet'" ;;
esac
echo $res
- [ -z "$res" ] && error "The counter for $2 on $1 was not incremented"
+ [ "$res" ] || error "The counter for $op on $facet was not incremented"
# if the argument $3 is zero, it means any stat increment is ok.
- if [[ $3 -gt 0 ]]; then
- count=$(echo $res | awk '{ print $2 }')
- [[ $count -ne $3 ]] &&
- error "The $2 counter on $1 is wrong - expected $3"
+ if [[ $want -gt 0 ]]; then
+ local count=$(echo $res | awk '{ print $2 }')
+ [[ $count -ne $want ]] &&
+ error "The $op counter on $facet is $count, not $want"
fi
}
dd if=/dev/zero of=${testdir}/${tfile} conv=notrunc bs=512k count=1 || error "dd failed"
sync
cancel_lru_locks osc
- check_stats ost "write" 1
+ check_stats ost1 "write" 1
dd if=${testdir}/${tfile} of=/dev/null bs=1k count=1 || error "dd failed"
- check_stats ost "read" 1
+ check_stats ost1 "read" 1
> ${testdir}/${tfile} || error "truncate failed"
- check_stats ost "punch" 1
+ check_stats ost1 "punch" 1
rm -f ${testdir}/${tfile} || error "file remove failed"
wait_delete_completed
- check_stats ost "destroy" 1
+ check_stats ost1 "destroy" 1
rm -rf $DIR/${tdir}
}
}
run_test 133e "Verifying OST {read,write}_bytes nid stats ================="
-proc_dirs=""
-for dir in /proc/fs/lustre/ /proc/sys/lnet/ /proc/sys/lustre/ \
- /sys/fs/lustre/ /sys/fs/lnet/ /sys/kernel/debug/lnet/ \
- /sys/kernel/debug/lustre/; do
- [[ -d $dir ]] && proc_dirs+=" $dir"
-done
+proc_regexp="/{proc,sys}/{fs,sys,kernel/debug}/{lustre,lnet}/"
test_133f() {
remote_mds_nodsh && skip "remote MDS with nodsh" && return
remote_ost_nodsh && skip "remote OST with nodsh" && return
# First without trusting modes.
+ local proc_dirs=$(eval \ls -d $proc_regexp 2>/dev/null)
+ echo "proc_dirs='$proc_dirs'"
+ [ -n "$proc_dirs" ] || error "no proc_dirs on $HOSTNAME"
find $proc_dirs -exec cat '{}' \; &> /dev/null
# Second verifying readability.
# eventually, this can also be replaced with "lctl get_param -R",
# but not until that option is always available on the server
local facet
- for facet in $SINGLEMDS ost1; do
- do_facet $facet find $proc_dirs \
+ for facet in mds1 ost1; do
+ local facet_proc_dirs=$(do_facet $facet \
+ \\\ls -d $proc_regexp 2>/dev/null)
+ echo "${facet}_proc_dirs='$facet_proc_dirs'"
+ [ -z "$facet_proc_dirs" ] && error "no proc_dirs on $facet"
+ do_facet $facet find $facet_proc_dirs \
! -name req_history \
-exec cat '{}' \\\; &> /dev/null
- do_facet $facet find $proc_dirs \
+ do_facet $facet find $facet_proc_dirs \
! -name req_history \
-type f \
-exec cat '{}' \\\; &> /dev/null ||
remote_mds_nodsh && skip "remote MDS with nodsh" && return
remote_ost_nodsh && skip "remote OST with nodsh" && return
# Second verifying writability.
+ local proc_dirs=$(eval \ls -d $proc_regexp 2>/dev/null)
+ echo "proc_dirs='$proc_dirs'"
+ [ -n "$proc_dirs" ] || error "no proc_dirs on $HOSTNAME"
find $proc_dirs \
-type f \
-not -name force_lbug \
-exec badarea_io '{}' \; &> /dev/null ||
error "find $proc_dirs failed"
- [ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.5.54) ] &&
- skip "Too old lustre on MDS" && return
-
- [ $(lustre_version_code ost1) -le $(version_code 2.5.54) ] &&
- skip "Too old lustre on ost1" && return
-
local facet
- for facet in $SINGLEMDS ost1; do
- do_facet $facet find $proc_dirs \
+ for facet in mds1 ost1; do
+ [ $(lustre_version_code $facet) -le $(version_code 2.5.54) ] &&
+ skip "Too old lustre on $facet" && continue
+ local facet_proc_dirs=$(do_facet $facet \
+ \\\ls -d $proc_regexp 2> /dev/null)
+ echo "${facet}_proc_dirs='$facet_proc_dirs'"
+ [ -z "$facet_proc_dirs" ] && error "no proc_dirs on $facet"
+ do_facet $facet find $facet_proc_dirs \
-type f \
-not -name force_lbug \
-not -name changelog_mask \
-exec badarea_io '{}' \\\; &> /dev/null ||
- error "$facet find $proc_dirs failed"
-
+ error "$facet find $facet_proc_dirs failed"
done
# remount the FS in case writes/reads /proc break the FS
}
run_test 133g "Check for Oopses on bad io area writes/reads in /proc"
+test_133h() {
+ remote_mds_nodsh && skip "remote MDS with nodsh" && return
+ remote_ost_nodsh && skip "remote OST with nodsh" && return
+ [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.9.54) ]] &&
+ skip "Need MDS version at least 2.9.54" && return
+
+ local facet
+ for facet in client mds1 ost1; do
+ local facet_proc_dirs=$(do_facet $facet \
+ \\\ls -d $proc_regexp 2> /dev/null)
+ [ -z "$facet_proc_dirs" ] && error "no proc_dirs on $facet"
+ echo "${facet}_proc_dirs='$facet_proc_dirs'"
+ # Get the list of files that are missing the terminating newline
+ local missing=($(do_facet $facet \
+ find ${facet_proc_dirs} -type f \| \
+ while read F\; do \
+ awk -v FS='\v' -v RS='\v\v' \
+ "'END { if(NR>0 && \
+ \\\$NF !~ /.*\\\n\$/) \
+ print FILENAME}'" \
+ '\$F'\; \
+ done 2>/dev/null))
+ [ ${#missing[*]} -eq 0 ] ||
+ error "files do not end with newline: ${missing[*]}"
+ done
+}
+run_test 133h "Proc files should end with newlines"
+
test_134a() {
remote_mds_nodsh && skip "remote MDS with nodsh" && return
[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.54) ]] &&
}
run_test 160d "verify that changelog log catch the migrate event"
+test_160e() {
+ remote_mds_nodsh && skip "remote MDS with nodsh" && return
+
+ # Create a user
+ CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \
+ changelog_register -n)
+ echo "Registered as changelog user $CL_USER"
+ trap cleanup_changelog EXIT
+
+ # Delete a future user (expect fail)
+ do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister cl77
+ local rc=$?
+
+ if [ $rc -eq 0 ]; then
+ error "Deleted non-existant user cl77"
+ elif [ $rc -ne 2 ]; then
+ error "changelog_deregister failed with $rc, " \
+ "expected 2 (ENOENT)"
+ fi
+
+ # Clear to a bad index (1 billion should be safe)
+ $LFS changelog_clear $MDT0 $CL_USER 1000000000
+ rc=$?
+
+ if [ $rc -eq 0 ]; then
+ error "Successfully cleared to invalid CL index"
+ elif [ $rc -ne 22 ]; then
+ error "changelog_clear failed with $rc, expected 22 (EINVAL)"
+ fi
+}
+run_test 160e "changelog negative testing"
+
test_161a() {
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
test_mkdir -p -c1 $DIR/$tdir
}
run_test 161c "check CL_RENME[UNLINK] changelog record flags"
+test_161d() {
+ local user
+ local pid
+ local fid
+
+ # cleanup previous run
+ rm -rf $DIR/$tdir/$tfile
+
+ user=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \
+ changelog_register -n)
+ [[ $? -eq 0 ]] || error "changelog_register failed"
+
+ # work in a standalone dir to avoid locking on $DIR/$MOUNT to
+ # interfer with $MOUNT/.lustre/fid/ access
+ mkdir $DIR/$tdir
+ [[ $? -eq 0 ]] || error "mkdir failed"
+
+ #define OBD_FAIL_LLITE_CREATE_NODE_PAUSE 0x140c | OBD_FAIL_ONCE
+ $LCTL set_param fail_loc=0x8000140c
+ # 5s pause
+ $LCTL set_param fail_val=5
+
+ # create file
+ echo foofoo > $DIR/$tdir/$tfile &
+ pid=$!
+
+ # wait for create to be delayed
+ sleep 2
+
+ ps -p $pid
+ [[ $? -eq 0 ]] || error "create should be blocked"
+
+ local tempfile=$(mktemp)
+ fid=$(changelog_extract_field $MDT0 "CREAT" "$tfile" "t=")
+ cat $MOUNT/.lustre/fid/$fid 2>/dev/null >$tempfile || error "cat failed"
+ # some delay may occur during ChangeLog publishing and file read just
+ # above, that could allow file write to happen finally
+ [[ -s $tempfile ]] && echo "file should be empty"
+
+ $LCTL set_param fail_loc=0
+
+ wait $pid
+ [[ $? -eq 0 ]] || error "create failed"
+
+ $LFS changelog_clear $MDT0 $user 0
+ do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $user
+}
+run_test 161d "create with concurrent .lustre/fid access"
+
check_path() {
local expected=$1
shift
local rc=0
local rmmod_remote=0
- do_facet ost1 "lsmod | grep -q obdecho || " \
- "{ insmod ${LUSTRE}/obdecho/obdecho.ko || " \
- "modprobe obdecho; }" && rmmod_remote=1
+ do_rpc_nodes $(facet_active_host ost1) load_module obdecho/obdecho &&
+ rmmod_remote=true || error "failed to load module obdecho"
target=$(do_facet ost1 $LCTL dl | awk '/obdfilter/ {print $4;exit}')
[[ -n $target ]] && { obdecho_test $target ost1 || rc=1; }
- [ $rmmod_remote -eq 1 ] && do_facet ost1 "rmmod obdecho"
+ $rmmod_remote && do_facet ost1 "rmmod obdecho"
return $rc
}
run_test 180b "test obdecho directly on obdfilter"
error "create $file2 failed"
$OPENFILE -f O_CREAT:O_LOV_DELAY_CREATE $file3 ||
error "create $file3 failed"
- lovea1=$($LFS getstripe $file1 | sed 1d)
+ lovea1=$(get_layout_param $file1)
$LFS swap_layouts $file2 $file3 ||
error "swap $file2 $file3 layouts failed"
$LFS swap_layouts $file1 $file2 ||
error "swap $file1 $file2 layouts failed"
- lovea2=$($LFS getstripe $file2 | sed 1d)
+ lovea2=$(get_layout_param $file2)
+ echo "$lovea1"
+ echo "$lovea2"
[ "$lovea1" == "$lovea2" ] || error "lovea $lovea1 != $lovea2"
lovea1=$(getfattr -n trusted.lov $file1 | grep ^trusted)
break
done
- cleanup_pools
+ destroy_test_pools
return $rc
}
run_test 200 "OST pools"
check_default_stripe_attr() {
ACTUAL=$($GETSTRIPE $* $DIR/$tdir)
case $1 in
- --stripe-count|--count)
+ --stripe-count|-c)
[ -n "$2" ] && EXPECTED=0 || EXPECTED=$(default_attr count);;
- --stripe-size|--size)
+ --stripe-size|-S)
[ -n "$2" ] && EXPECTED=0 || EXPECTED=$(default_attr size);;
- --stripe-index|--index)
+ --stripe-index|-i)
EXPECTED=-1;;
*)
error "unknown getstripe attr '$1'"
}
test_205() { # Job stats
+ [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.7.1) ]] ||
+ { skip "Need MDS version with at least 2.7.1"; return 0; }
+
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
remote_mgs_nodsh && skip "remote MGS with nodsh" && return
remote_mds_nodsh && skip "remote MDS with nodsh" && return
"ldlm.namespaces.filter-*.contended_locks" >> $p
save_lustre_params $facets \
"ldlm.namespaces.filter-*.contention_seconds" >> $p
- clear_osc_stats
+ clear_stats osc.*.osc_stats
- # agressive lockless i/o settings
- for node in $(osts_nodes); do
- do_node $node 'lctl set_param -n ldlm.namespaces.filter-*.max_nolock_bytes 2000000; lctl set_param -n ldlm.namespaces.filter-*.contended_locks 0; lctl set_param -n ldlm.namespaces.filter-*.contention_seconds 60'
- done
- lctl set_param -n osc.*.contention_seconds 60
+ # agressive lockless i/o settings
+ do_nodes $(comma_list $(osts_nodes)) \
+ "lctl set_param -n ldlm.namespaces.*.max_nolock_bytes=2000000 \
+ ldlm.namespaces.filter-*.contended_locks=0 \
+ ldlm.namespaces.filter-*.contention_seconds=60"
+ lctl set_param -n osc.*.contention_seconds=60
- $DIRECTIO write $DIR/$tfile 0 10 4096
- $CHECKSTAT -s 40960 $DIR/$tfile
+ $DIRECTIO write $DIR/$tfile 0 10 4096
+ $CHECKSTAT -s 40960 $DIR/$tfile
- # disable lockless i/o
- for node in $(osts_nodes); do
- do_node $node 'lctl set_param -n ldlm.namespaces.filter-*.max_nolock_bytes 0; lctl set_param -n ldlm.namespaces.filter-*.contended_locks 32; lctl set_param -n ldlm.namespaces.filter-*.contention_seconds 0'
- done
- lctl set_param -n osc.*.contention_seconds 0
- clear_osc_stats
+ # disable lockless i/o
+ do_nodes $(comma_list $(osts_nodes)) \
+ "lctl set_param -n ldlm.namespaces.filter-*.max_nolock_bytes=0 \
+ ldlm.namespaces.filter-*.contended_locks=32 \
+ ldlm.namespaces.filter-*.contention_seconds=0"
+ lctl set_param -n osc.*.contention_seconds=0
+ clear_stats osc.*.osc_stats
- dd if=/dev/zero of=$DIR/$tfile count=0
- $CHECKSTAT -s 0 $DIR/$tfile
+ dd if=/dev/zero of=$DIR/$tfile count=0
+ $CHECKSTAT -s 0 $DIR/$tfile
- restore_lustre_params <$p
- rm -f $p
- rm $DIR/$tfile
+ restore_lustre_params <$p
+ rm -f $p
+ rm $DIR/$tfile
}
-run_test 216 "check lockless direct write works and updates file size and kms correctly"
+run_test 216 "check lockless direct write updates file size and kms correctly"
test_217() { # bug 22430
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
do_facet ost$((OSTIDX + 1)) lctl set_param fail_val=-1
#define OBD_FAIL_OST_ENOINO 0x229
do_facet ost$((OSTIDX + 1)) lctl set_param fail_loc=0x229
- do_facet mgs $LCTL pool_new $FSNAME.$TESTNAME || return 1
+ create_pool $FSNAME.$TESTNAME || return 1
do_facet mgs $LCTL pool_add $FSNAME.$TESTNAME $OST || return 2
$SETSTRIPE $DIR/$tdir -i $OSTIDX -c 1 -p $FSNAME.$TESTNAME
remote_mds_nodsh && skip "remote MDS with nodsh" && return
remote_mgs_nodsh && skip "remote MGS with nodsh" && return
- local ost_name=$($LFS osts | grep ${ostidx}": " | \
- awk '{print $2}' | sed -e 's/_UUID$//')
+ local ost_name=$($LFS osts |
+ sed -n 's/^'$ostidx': \(.*\)_UUID .*/\1/p')
# on the mdt's osc
local mdtosc_proc1=$(get_mdtosc_proc_path $SINGLEMDS $ost_name)
do_facet $SINGLEMDS $LCTL get_param -n \
osp.$mdtosc_proc1.reserved_mb_low)
echo "prev high watermark $last_wm_h, prev low watermark $last_wm_l"
- do_facet mgs $LCTL pool_new $FSNAME.$TESTNAME ||
- error "Pool creation failed"
+ create_pool $FSNAME.$TESTNAME || error "Pool creation failed"
do_facet mgs $LCTL pool_add $FSNAME.$TESTNAME $ost_name ||
error "Adding $ost_name to pool failed"
return 1
}
+percent() {
+ bc <<<"scale=2; ($1 - $2) * 100 / $2"
+}
+
+# run a random read IO workload
+# usage: random_read_iops <filename> <filesize> <iosize>
+random_read_iops() {
+ local file=$1
+ local fsize=$2
+ local iosize=${3:-4096}
+
+ $READS -f $file -s $fsize -b $iosize -n $((fsize / iosize)) -t 60 |
+ sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##'
+}
+
+drop_file_oss_cache() {
+ local file="$1"
+ local nodes="$2"
+
+ $LFS ladvise -a dontneed $file 2>/dev/null ||
+ do_nodes $nodes "echo 3 > /proc/sys/vm/drop_caches"
+}
+
ladvise_willread_performance()
{
local repeat=10
+ local average_origin=0
local average_cache=0
local average_ladvise=0
for ((i = 1; i <= $repeat; i++)); do
echo "Iter $i/$repeat: reading without willread hint"
cancel_lru_locks osc
- do_nodes $(comma_list $(osts_nodes)) \
- "echo 3 > /proc/sys/vm/drop_caches"
- local speed_origin=$($READS -f $DIR/$tfile -s $size \
- -b 4096 -n $((size / 4096)) -t 60 |
- sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##')
+ drop_file_oss_cache $DIR/$tfile $(comma_list $(osts_nodes))
+ local speed_origin=$(random_read_iops $DIR/$tfile $size)
+ echo "Iter $i/$repeat: uncached speed: $speed_origin"
+ average_origin=$(bc <<<"$average_origin + $speed_origin")
- echo "Iter $i/$repeat: Reading again without willread hint"
cancel_lru_locks osc
- local speed_cache=$($READS -f $DIR/$tfile -s $size \
- -b 4096 -n $((size / 4096)) -t 60 |
- sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##')
+ local speed_cache=$(random_read_iops $DIR/$tfile $size)
+ echo "Iter $i/$repeat: OSS cache speed: $speed_cache"
+ average_cache=$(bc <<<"$average_cache + $speed_cache")
- echo "Iter $i/$repeat: reading with willread hint"
cancel_lru_locks osc
- do_nodes $(comma_list $(osts_nodes)) \
- "echo 3 > /proc/sys/vm/drop_caches"
- lfs ladvise -a willread $DIR/$tfile ||
- error "Ladvise failed"
- local speed_ladvise=$($READS -f $DIR/$tfile -s $size \
- -b 4096 -n $((size / 4096)) -t 60 |
- sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##')
-
- local cache_speedup=$(echo "scale=2; \
- ($speed_cache-$speed_origin)/$speed_origin*100" | bc)
- cache_speedup=$(echo ${cache_speedup%.*})
- echo "Iter $i/$repeat: cache speedup: $cache_speedup%"
- average_cache=$((average_cache + cache_speedup))
-
- local ladvise_speedup=$(echo "scale=2; \
- ($speed_ladvise-$speed_origin)/$speed_origin*100" | bc)
- ladvise_speedup=$(echo ${ladvise_speedup%.*})
- echo "Iter $i/$repeat: ladvise speedup: $ladvise_speedup%"
- average_ladvise=$((average_ladvise + ladvise_speedup))
- done
- average_cache=$((average_cache / repeat))
- average_ladvise=$((average_ladvise / repeat))
-
- if [ $average_cache -lt 20 ]; then
- echo "Speedup with cache is less than 20% ($average_cache%),"\
- "skipping check of speedup with willread:"\
- "$average_ladvise%"
+ drop_file_oss_cache $DIR/$tfile $(comma_list $(osts_nodes))
+ $LFS ladvise -a willread $DIR/$tfile || error "ladvise failed"
+ local speed_ladvise=$(random_read_iops $DIR/$tfile $size)
+ echo "Iter $i/$repeat: ladvise speed: $speed_ladvise"
+ average_ladvise=$(bc <<<"$average_ladvise + $speed_ladvise")
+ done
+ average_origin=$(bc <<<"scale=2; $average_origin / $repeat")
+ average_cache=$(bc <<<"scale=2; $average_cache / $repeat")
+ average_ladvise=$(bc <<<"scale=2; $average_ladvise / $repeat")
+
+ speedup_cache=$(percent $average_cache $average_origin)
+ speedup_ladvise=$(percent $average_ladvise $average_origin)
+
+ echo "Average uncached read: $average_origin"
+ echo "Average speedup with OSS cached read: " \
+ "$average_cache = +$speedup_cache%"
+ echo "Average speedup with ladvise willread: " \
+ "$average_ladvise = +$speedup_ladvise%"
+
+ local lowest_speedup=20
+ if [ ${average_cache%.*} -lt $lowest_speedup ]; then
+ echo "Speedup with OSS cached read less than $lowest_speedup%, "
+ "got $average_cache%. Skipping ladvise willread check."
return 0
fi
- local lowest_speedup=$((average_cache / 2))
- [ $average_ladvise -gt $lowest_speedup ] ||
+ # the test won't work on ZFS until it supports 'ladvise dontneed', but
+ # it is still good to run until then to exercise 'ladvise willread'
+ ! $LFS ladvise -a dontneed $DIR/$tfile &&
+ [ "$(facet_fstype ost1)" = "zfs" ] &&
+ echo "osd-zfs does not support dontneed or drop_caches" &&
+ return 0
+
+ lowest_speedup=$(bc <<<"scale=2; $average_cache / 2")
+ [ ${average_ladvise%.*} -gt $lowest_speedup ] ||
error_not_in_vm "Speedup with willread is less than " \
- "$lowest_speedup%, got $average_ladvise%"
- echo "Speedup with willread ladvise: $average_ladvise%"
- echo "Speedup with cache: $average_cache%"
+ "$lowest_speedup%, got $average_ladvise%"
}
test_255a() {
+ [ $(lustre_version_code ost1) -lt $(version_code 2.8.54) ] &&
+ skip "lustre < 2.8.54 does not support ladvise " && return
+ remote_ost_nodsh && skip "remote OST with nodsh" && return
+
lfs setstripe -c -1 -i 0 $DIR/$tfile || error "$tfile failed"
ladvise_no_type willread $DIR/$tfile &&
ladvise_no_ioctl $DIR/$tfile &&
skip "ladvise ioctl is not supported" && return
- [ $(lustre_version_code ost1) -lt $(version_code 2.8.54) ] &&
- skip "lustre < 2.8.54 does not support ladvise " && return
-
local size_mb=100
local size=$((size_mb * 1048576))
dd if=/dev/zero of=$DIR/$tfile bs=1048576 count=$size_mb ||
}
test_255b() {
+ remote_ost_nodsh && skip "remote OST with nodsh" && return
+
+ lfs setstripe -c 1 -i 0 $DIR/$tfile
+
ladvise_no_type dontneed $DIR/$tfile &&
skip "dontneed ladvise is not supported" && return
[ $(lustre_version_code ost1) -lt $(version_code 2.8.54) ] &&
skip "lustre < 2.8.54 does not support ladvise" && return
- [ "$(facet_fstype ost1)" = "zfs" ] &&
- skip "zfs-osd does not support dontneed advice" && return
-
- lfs setstripe -c 1 -i 0 $DIR/$tfile
+ ! $LFS ladvise -a dontneed $DIR/$tfile &&
+ [ "$(facet_fstype ost1)" = "zfs" ] &&
+ skip "zfs-osd does not support 'ladvise dontneed'" && return
local size_mb=100
local size=$((size_mb * 1048576))
error "getdirstripe failed"
stripe_count=$($LFS getdirstripe -c $DIR/$tdir/striped_dir)
if [ "$stripe_count" != "2" ]; then
- error "stripe_count is $stripe_count, expect 2"
+ error "1:stripe_count is $stripe_count, expect 2"
+ fi
+ stripe_count=$($LFS getdirstripe -T $DIR/$tdir/striped_dir)
+ if [ "$stripe_count" != "2" ]; then
+ error "2:stripe_count is $stripe_count, expect 2"
fi
stripe_index=$($LFS getdirstripe -i $DIR/$tdir/striped_dir)
mkdir $DIR/$tdir/striped_dir/dir_c
$LFS setdirstripe -i 0 -c 2 -t all_char $DIR/$tdir/striped_dir/stp_a ||
- error "set striped dir under striped dir error"
+ error "set striped adir under striped dir error"
- $LFS setdirstripe -i 0 -c 2 -t all_char $DIR/$tdir/striped_dir/stp_b ||
- error "set striped dir under striped dir error"
+ $LFS setdirstripe -i 0 -c 2 -H all_char $DIR/$tdir/striped_dir/stp_b ||
+ error "set striped bdir under striped dir error"
$LFS setdirstripe -i 0 -c 2 -t all_char $DIR/$tdir/striped_dir/stp_c ||
- error "set striped dir under striped dir error"
+ error "set striped cdir under striped dir error"
mrename $DIR/$tdir/striped_dir/dir_a $DIR/$tdir/striped_dir/dir_b ||
error "rename dir under striped dir fails"
createmany -o $DIR/$tdir/striped_dir/f- 10 ||
error "create files under striped dir failed"
+ $LFS setdirstripe -i0 -c$MDSCOUNT -H all_char $DIR/$tdir/hashdir ||
+ error "set striped hashdir error"
+
+ $LFS setdirstripe -i0 -c$MDSCOUNT -H all_char $DIR/$tdir/hashdir/d0 ||
+ error "create dir0 under hash dir failed"
+ $LFS setdirstripe -i0 -c$MDSCOUNT -H fnv_1a_64 $DIR/$tdir/hashdir/d1 ||
+ error "create dir1 under hash dir failed"
+
# unfortunately, we need to umount to clear dir layout cache for now
# once we fully implement dir layout, we can drop this
umount_client $MOUNT || error "umount failed"
mount_client $MOUNT || error "mount failed"
+ $LFS find -H fnv_1a_64 $DIR/$tdir/hashdir
+ local dircnt=$($LFS find -H fnv_1a_64 $DIR/$tdir/hashdir | wc -l)
+ [ $dircnt -eq 1 ] || error "lfs find striped dir got:$dircnt,except:1"
+
#set the stripe to be unknown hash type
#define OBD_FAIL_UNKNOWN_LMV_STRIPE 0x1901
$LCTL set_param fail_loc=0x1901
skip "Need MDS version at least 2.7.55" && return
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+ remote_mds_nodsh && skip "remote MDS with nodsh" && return
+
local stripe_index
local list=$(comma_list $(mdts_nodes))
[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.54) ] &&
skip "lustre < 2.8.54 does not contain LU-4825 fix" && return
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
+ remote_mds_nodsh && skip "remote MDS with nodsh" && return
local old_iused=$($LFS df -i | grep OST0000 | awk '{ print $3 }')
local new_iused
for i in $(seq 120); do
new_iused=$($LFS df -i | grep OST0000 | awk '{ print $3 }')
- [ $((old_iused - new_iused)) -gt 800 ] && break
+ # system may be too busy to destroy all objs in time, use
+ # a somewhat small value to not fail autotest
+ [ $((old_iused - new_iused)) -gt 400 ] && break
sleep 1
done
echo "waited $i sec, old Iused $old_iused, new Iused $new_iused"
- [ $((old_iused - new_iused)) -gt 800 ] ||
+ [ $((old_iused - new_iused)) -gt 400 ] ||
error "objs not destroyed after unlink"
}
run_test 311 "disable OSP precreate, and unlink should destroy objs"
local objid=$2
local vdevdir=$(dirname $(facet_vdevice $ost))
- local cmd="$ZDB -e -p $vdevdir -dddd $(facet_device $ost)"
+ local cmd="$ZDB -e -p $vdevdir -ddddd $(facet_device $ost)"
local zfs_zapid=$(do_facet $ost $cmd |
grep -w "/O/0/d$((objid%32))" -C 5 |
awk '/Object/{getline; print $1}')
}
test_312() { # LU-4856
+ remote_ost_nodsh && skip "remote OST with nodsh" && return
+
[ $(facet_fstype ost1) = "zfs" ] ||
{ skip "the test only applies to zfs" && return; }
local max_blksz=$(do_facet ost1 \
$ZFS get -p recordsize $(facet_device ost1) |
awk '!/VALUE/{print $3}')
+ local min_blksz=$(getconf PAGE_SIZE)
# to make life a little bit easier
$LFS mkdir -c 1 -i 0 $DIR/$tdir
# block size change by sequential over write
local blksz
- for ((bs=4096; bs <= max_blksz; bs <<= 2)); do
+ for ((bs=$min_blksz; bs <= max_blksz; bs <<= 2)); do
dd if=/dev/zero of=$tf bs=$bs count=1 oflag=sync conv=notrunc
blksz=$(zfs_object_blksz ost1 $zfs_objid)
rm -f $tf
# block size change by sequential append write
- dd if=/dev/zero of=$tf bs=4K count=1 oflag=sync conv=notrunc
+ dd if=/dev/zero of=$tf bs=$min_blksz count=1 oflag=sync conv=notrunc
oid=$($LFS getstripe $tf | awk '/obdidx/{getline; print $2}')
zfs_objid=$(zfs_oid_to_objid ost1 $oid)
- for ((count = 1; count < $((max_blksz / 4096)); count *= 2)); do
- dd if=/dev/zero of=$tf bs=4K count=$count seek=$count \
+ for ((count = 1; count < $((max_blksz / min_blksz)); count *= 2)); do
+ dd if=/dev/zero of=$tf bs=$min_blksz count=$count seek=$count \
oflag=sync conv=notrunc
blksz=$(zfs_object_blksz ost1 $zfs_objid)
- blksz=$((blksz / 8192)) # in 2*4K unit
- [ $blksz -eq $count ] ||
- error "blksz error(in 8k): $blksz, expected: $count"
+ [ $blksz -eq $((2 * count * min_blksz)) ] ||
+ error "blksz error, actual $blksz, " \
+ "expected: 2 * $count * $min_blksz"
done
rm -f $tf
oid=$($LFS getstripe $tf | awk '/obdidx/{getline; print $2}')
zfs_objid=$(zfs_oid_to_objid ost1 $oid)
- dd if=/dev/zero of=$tf bs=8K count=1 oflag=sync conv=notrunc
+ dd if=/dev/zero of=$tf bs=1K count=1 oflag=sync conv=notrunc
blksz=$(zfs_object_blksz ost1 $zfs_objid)
- [ $blksz -eq 8192 ] || error "blksz error: $blksz, expected: 8k"
+ [ $blksz -eq $min_blksz ] ||
+ error "blksz error: $blksz, expected: $min_blksz"
dd if=/dev/zero of=$tf bs=64K count=1 oflag=sync conv=notrunc seek=128
blksz=$(zfs_object_blksz ost1 $zfs_objid)
run_test 312 "make sure ZFS adjusts its block size by write pattern"
test_313() {
+ remote_ost_nodsh && skip "remote OST with nodsh" && return
+
local file=$DIR/$tfile
rm -f $file
$SETSTRIPE -c 1 -i 0 $file || error "setstripe failed"
}
run_test 313 "io should fail after last_rcvd update fail"
-test_399() { # LU-7655 for OST fake write
+test_fake_rw() {
+ local read_write=$1
+ if [ "$read_write" = "write" ]; then
+ local dd_cmd="dd if=/dev/zero of=$DIR/$tfile"
+ elif [ "$read_write" = "read" ]; then
+ local dd_cmd="dd of=/dev/null if=$DIR/$tfile"
+ else
+ error "argument error"
+ fi
+
# turn off debug for performance testing
local saved_debug=$($LCTL get_param -n debug)
$LCTL set_param debug=0
local blocks=$((ost1_avail_size/2/1024)) # half avail space by megabytes
[ $blocks -gt 1000 ] && blocks=1000 # 1G in maximum
+ if [ "$read_write" = "read" ]; then
+ truncate -s $(expr 1048576 \* $blocks) $DIR/$tfile
+ fi
+
local start_time=$(date +%s.%N)
- dd if=/dev/zero of=$DIR/$tfile bs=1M count=$blocks oflag=sync ||
- error "real dd writing error"
+ $dd_cmd bs=1M count=$blocks oflag=sync ||
+ error "real dd $read_write error"
local duration=$(bc <<< "$(date +%s.%N) - $start_time")
- rm -f $DIR/$tfile
- # define OBD_FAIL_OST_FAKE_WRITE 0x238
+ if [ "$read_write" = "write" ]; then
+ rm -f $DIR/$tfile
+ fi
+
+ # define OBD_FAIL_OST_FAKE_RW 0x238
do_facet ost1 $LCTL set_param fail_loc=0x238
local start_time=$(date +%s.%N)
- dd if=/dev/zero of=$DIR/$tfile bs=1M count=$blocks oflag=sync ||
- error "fake dd writing error"
+ $dd_cmd bs=1M count=$blocks oflag=sync ||
+ error "fake dd $read_write error"
local duration_fake=$(bc <<< "$(date +%s.%N) - $start_time")
- # verify file size
- cancel_lru_locks osc
- $CHECKSTAT -t file -s $((blocks * 1024 * 1024)) $DIR/$tfile ||
- error "$tfile size not $blocks MB"
-
+ if [ "$read_write" = "write" ]; then
+ # verify file size
+ cancel_lru_locks osc
+ $CHECKSTAT -t file -s $((blocks * 1024 * 1024)) $DIR/$tfile ||
+ error "$tfile size not $blocks MB"
+ fi
do_facet ost1 $LCTL set_param fail_loc=0
- echo "fake write $duration_fake vs. normal write $duration in seconds"
+ echo "fake $read_write $duration_fake vs. normal $read_write" \
+ "$duration in seconds"
[ $(bc <<< "$duration_fake < $duration") -eq 1 ] ||
error_not_in_vm "fake write is slower"
$LCTL set_param -n debug="$saved_debug"
rm -f $DIR/$tfile
}
-run_test 399 "fake write should not be slower than normal write"
+test_399a() { # LU-7655 for OST fake write
+ remote_ost_nodsh && skip "remote OST with nodsh" && return
+
+ test_fake_rw write
+}
+run_test 399a "fake write should not be slower than normal write"
+
+test_399b() { # LU-8726 for OST fake read
+ remote_ost_nodsh && skip "remote OST with nodsh" && return
+
+ if [ "$(facet_fstype ost1)" != "ldiskfs" ]; then
+ skip "only for ldiskfs" && return 0
+ fi
+ test_fake_rw read
+}
+run_test 399b "fake read should not be slower than normal read"
test_400a() { # LU-1606, was conf-sanity test_74
local extra_flags=''
#count the number of parameters by "list_param -R"
local params=$($LCTL list_param -R '*' 2>/dev/null | wc -l)
#count the number of parameters by listing proc files
+ local proc_dirs=$(eval \ls -d $proc_regexp 2>/dev/null)
+ echo "proc_dirs='$proc_dirs'"
+ [ -n "$proc_dirs" ] || error "no proc_dirs on $HOSTNAME"
local procs=$(find -L $proc_dirs -mindepth 1 -printf '%P\n' 2>/dev/null|
sort -u | wc -l)
run_test 403 "i_nlink should not drop to zero due to aliasing"
test_404() { # LU-6601
+ local server_version=$(lustre_version_code $SINGLEMDS)
+ [[ $server_version -ge $(version_code 2.8.53) ]] ||
+ { skip "Need server version newer than 2.8.52"; return 0; }
+
remote_mds_nodsh && skip "remote MDS with nodsh" && return
local mosps=$(do_facet $SINGLEMDS $LCTL dl |
awk '/osp .*-osc-MDT/ { print $4}')
test_407() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
-
[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.55) ]] &&
skip "Need MDS version at least 2.8.55" && return
+ remote_mds_nodsh && skip "remote MDS with nodsh" && return
$LFS mkdir -i 0 -c 1 $DIR/$tdir.0 ||
error "$LFS mkdir -i 0 -c 1 $tdir.0 failed"
}
run_test 409 "Large amount of cross-MDTs hard links on the same file"
+prep_801() {
+ [[ $(lustre_version_code mds1) -lt $(version_code 2.9.55) ]] ||
+ [[ $(lustre_version_code ost1) -lt $(version_code 2.9.55) ]] &&
+ skip "Need server version at least 2.9.55" & exit 0
+ start_full_debug_logging
+}
+
+post_801() {
+ stop_full_debug_logging
+}
+
+test_801a() {
+ prep_801
+
+ #define OBD_FAIL_BARRIER_DELAY 0x2202
+ do_facet mgs $LCTL set_param fail_val=3 fail_loc=0x2202
+ do_facet mgs $LCTL barrier_freeze $FSNAME 10 &
+
+ sleep 1
+ local b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'freezing_p1'" ] ||
+ error "(1) unexpected barrier status $b_status"
+
+ do_facet mgs $LCTL set_param fail_val=0 fail_loc=0
+ wait
+ b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'frozen'" ] ||
+ error "(2) unexpected barrier status $b_status"
+
+ local expired=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/will be expired/ { print $7 }')
+ echo "sleep $((expired + 3)) seconds, then the barrier will be expired"
+ sleep $((expired + 3))
+
+ b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'expired'" ] ||
+ error "(3) unexpected barrier status $b_status"
+
+ do_facet mgs $LCTL barrier_freeze $FSNAME 10 ||
+ error "(4) fail to freeze barrier"
+
+ b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'frozen'" ] ||
+ error "(5) unexpected barrier status $b_status"
+
+ #define OBD_FAIL_BARRIER_DELAY 0x2202
+ do_facet mgs $LCTL set_param fail_val=3 fail_loc=0x2202
+ do_facet mgs $LCTL barrier_thaw $FSNAME &
+
+ sleep 1
+ b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'thawing'" ] ||
+ error "(6) unexpected barrier status $b_status"
+
+ do_facet mgs $LCTL set_param fail_val=0 fail_loc=0
+ wait
+ b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'thawed'" ] ||
+ error "(7) unexpected barrier status $b_status"
+
+ #define OBD_FAIL_BARRIER_FAILURE 0x2203
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x2203
+ do_facet mgs $LCTL barrier_freeze $FSNAME
+
+ b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'failed'" ] ||
+ error "(8) unexpected barrier status $b_status"
+
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+ do_facet mgs $LCTL barrier_thaw $FSNAME
+
+ post_801
+}
+run_test 801a "write barrier user interfaces and stat machine"
+
+test_801b() {
+ prep_801
+
+ mkdir $DIR/$tdir || error "(1) fail to mkdir"
+ createmany -d $DIR/$tdir/d 6 || "(2) fail to mkdir"
+ touch $DIR/$tdir/d2/f10 || error "(3) fail to touch"
+ touch $DIR/$tdir/d3/f11 || error "(4) fail to touch"
+ touch $DIR/$tdir/d4/f12 || error "(5) fail to touch"
+
+ cancel_lru_locks mdc
+
+ # 180 seconds should be long enough
+ do_facet mgs $LCTL barrier_freeze $FSNAME 180
+
+ local b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'frozen'" ] ||
+ error "(6) unexpected barrier status $b_status"
+
+ mkdir $DIR/$tdir/d0/d10 &
+ mkdir_pid=$!
+
+ touch $DIR/$tdir/d1/f13 &
+ touch_pid=$!
+
+ ln $DIR/$tdir/d2/f10 $DIR/$tdir/d2/f14 &
+ ln_pid=$!
+
+ mv $DIR/$tdir/d3/f11 $DIR/$tdir/d3/f15 &
+ mv_pid=$!
+
+ rm -f $DIR/$tdir/d4/f12 &
+ rm_pid=$!
+
+ stat $DIR/$tdir/d5 || error "(7) stat should succeed"
+
+ # To guarantee taht the 'stat' is not blocked
+ b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'frozen'" ] ||
+ error "(8) unexpected barrier status $b_status"
+
+ # let above commands to run at background
+ sleep 5
+
+ ps -p $mkdir_pid || error "(9) mkdir should be blocked"
+ ps -p $touch_pid || error "(10) touch should be blocked"
+ ps -p $ln_pid || error "(11) link should be blocked"
+ ps -p $mv_pid || error "(12) rename should be blocked"
+ ps -p $rm_pid || error "(13) unlink should be blocked"
+
+ b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'frozen'" ] ||
+ error "(14) unexpected barrier status $b_status"
+
+ do_facet mgs $LCTL barrier_thaw $FSNAME
+ b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'thawed'" ] ||
+ error "(15) unexpected barrier status $b_status"
+
+ wait $mkdir_pid || error "(16) mkdir should succeed"
+ wait $touch_pid || error "(17) touch should succeed"
+ wait $ln_pid || error "(18) link should succeed"
+ wait $mv_pid || error "(19) rename should succeed"
+ wait $rm_pid || error "(20) unlink should succeed"
+
+ post_801
+}
+run_test 801b "modification will be blocked by write barrier"
+
+test_801c() {
+ [[ $MDSCOUNT -lt 2 ]] && skip "needs >= 2 MDTs" && return
+
+ prep_801
+
+ stop mds2 || error "(1) Fail to stop mds2"
+
+ do_facet mgs $LCTL barrier_freeze $FSNAME 30
+
+ local b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'expired'" -o "$b_status" = "'failed'" ] || {
+ do_facet mgs $LCTL barrier_thaw $FSNAME
+ error "(2) unexpected barrier status $b_status"
+ }
+
+ do_facet mgs $LCTL barrier_rescan $FSNAME ||
+ error "(3) Fail to rescan barrier bitmap"
+
+ do_facet mgs $LCTL barrier_freeze $FSNAME 10
+
+ b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'frozen'" ] ||
+ error "(4) unexpected barrier status $b_status"
+
+ do_facet mgs $LCTL barrier_thaw $FSNAME
+ b_status=$(do_facet mgs $LCTL barrier_stat $FSNAME |
+ awk '/The barrier for/ { print $7 }')
+ [ "$b_status" = "'thawed'" ] ||
+ error "(5) unexpected barrier status $b_status"
+
+ local devname=$(mdsdevname 2)
+
+ start mds2 $devname $MDS_MOUNT_OPTS || error "(6) Fail to start mds2"
+
+ do_facet mgs $LCTL barrier_rescan $FSNAME ||
+ error "(7) Fail to rescan barrier bitmap"
+
+ post_801
+}
+run_test 801c "rescan barrier bitmap"
+
+saved_MGS_MOUNT_OPTS=$MGS_MOUNT_OPTS
+saved_MDS_MOUNT_OPTS=$MDS_MOUNT_OPTS
+saved_OST_MOUNT_OPTS=$OST_MOUNT_OPTS
+
+cleanup_802() {
+ trap 0
+
+ stopall
+ MGS_MOUNT_OPTS=$saved_MGS_MOUNT_OPTS
+ MDS_MOUNT_OPTS=$saved_MDS_MOUNT_OPTS
+ OST_MOUNT_OPTS=$saved_OST_MOUNT_OPTS
+ setupall
+}
+
+test_802() {
+
+ [[ $(lustre_version_code mds1) -lt $(version_code 2.9.55) ]] ||
+ [[ $(lustre_version_code ost1) -lt $(version_code 2.9.55) ]] &&
+ skip "Need server version at least 2.9.55" & exit 0
+
+ mkdir $DIR/$tdir || error "(1) fail to mkdir"
+
+ cp $LUSTRE/tests/test-framework.sh $DIR/$tdir/ ||
+ error "(2) Fail to copy"
+
+ trap cleanup_802 EXIT
+
+ # sync by force before remount as readonly
+ sync; sync_all_data; sleep 3; sync_all_data
+
+ stopall
+
+ MGS_MOUNT_OPTS=$(csa_add "$MGS_MOUNT_OPTS" -o rdonly_dev)
+ MDS_MOUNT_OPTS=$(csa_add "$MDS_MOUNT_OPTS" -o rdonly_dev)
+ OST_MOUNT_OPTS=$(csa_add "$OST_MOUNT_OPTS" -o rdonly_dev)
+
+ echo "Mount the server as read only"
+ setupall server_only || error "(3) Fail to start servers"
+
+ echo "Mount client without ro should fail"
+ mount_client $MOUNT &&
+ error "(4) Mount client without 'ro' should fail"
+
+ echo "Mount client with ro should succeed"
+ mount_client $MOUNT ro ||
+ error "(5) Mount client with 'ro' should succeed"
+
+ echo "Modify should be refused"
+ touch $DIR/$tdir/guard && error "(6) Touch should fail under ro mode"
+
+ echo "Read should be allowed"
+ diff $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh ||
+ error "(7) Read should succeed under ro mode"
+
+ cleanup_802
+}
+run_test 802 "simulate readonly device"
+
#
# tests that do cleanup/setup should be run at the end
#