Whamcloud - gitweb
LU-9514 test: sanity 51f add to ALWAYS_EXCEPT
[fs/lustre-release.git] / lustre / tests / sanity.sh
index 98d1a05..fb4539d 100755 (executable)
@@ -12,9 +12,8 @@ ONLY=${ONLY:-"$*"}
 ALWAYS_EXCEPT="                42a  42b  42c  42d  45   68b   $SANITY_EXCEPT"
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
-# with LOD/OSP landing
-# bug number for skipped tests: LU-2036 LU-8411
-ALWAYS_EXCEPT="                 76     407 $ALWAYS_EXCEPT"
+# bug number for skipped tests: LU-2036 LU-8411 LU-9096
+ALWAYS_EXCEPT="                 76     407     253 $ALWAYS_EXCEPT"
 
 is_sles11()                                            # LU-4341
 {
@@ -79,8 +78,8 @@ init_logging
 [ "$SLOW" = "no" ] && EXCEPT_SLOW="27m 64b 68 71 115 300o"
 
 if [ $(facet_fstype $SINGLEMDS) = "zfs" ]; then
-       # bug number for skipped test: LU-4536 LU-1957
-       ALWAYS_EXCEPT="$ALWAYS_EXCEPT  65ic    180"
+       # bug number for skipped test: LU-9514 LU-4536 LU-1957
+       ALWAYS_EXCEPT="$ALWAYS_EXCEPT  51f     65ic    180"
        #                                               13    (min)"
        [ "$SLOW" = "no" ] && EXCEPT_SLOW="$EXCEPT_SLOW 51b"
 fi
@@ -1889,10 +1888,11 @@ check_seq_oid()
                # update too, until that use mount/ll_decode_filter_fid/mount.
                # Re-enable when debugfs will understand new filter_fid.
                #
-               if false && [ $(facet_fstype ost$ost) == ldiskfs ]; then
+               if [ $(facet_fstype ost$ost) == ldiskfs ]; then
                        ff=$(do_facet ost$ost "$DEBUGFS -c -R 'stat $obj_file' \
                                $dev 2>/dev/null" | grep "parent=")
-               else
+               fi
+               if [ -z "$ff" ]; then
                        stop ost$ost
                        mount_fstype ost$ost
                        ff=$(do_facet ost$ost $LL_DECODE_FILTER_FID \
@@ -1908,45 +1908,39 @@ check_seq_oid()
 
                # /mnt/O/0/d23/23: objid=23 seq=0 parent=[0x200000400:0x1e:0x1]
                # fid: objid=23 seq=0 parent=[0x200000400:0x1e:0x0] stripe=1
-               local ff_parent=$(echo $ff|sed -e 's/.*parent=.//')
-               local ff_pseq=$(echo $ff_parent | cut -d: -f1)
-               local ff_poid=$(echo $ff_parent | cut -d: -f2)
+               #
+               # fid: parent=[0x200000400:0x1e:0x0] stripe=1 stripe_count=2 \
+               #       stripe_size=1048576 component_id=1 component_start=0 \
+               #       component_end=33554432
+               local ff_parent=$(sed -e 's/.*parent=.//' <<<$ff)
+               local ff_pseq=$(cut -d: -f1 <<<$ff_parent)
+               local ff_poid=$(cut -d: -f2 <<<$ff_parent)
                local ff_pstripe
-               if echo $ff_parent | grep -q 'stripe='; then
-                       ff_pstripe=$(echo $ff_parent | sed -e 's/.*stripe=//')
-                       if echo $ff_pstripe | grep -q 'stripe_size='; then
-                               ff_pstripe=$(echo $ff_pstripe | cut -d' ' -f1)
-                       fi
+               if grep -q 'stripe=' <<<$ff; then
+                       ff_pstripe=$(sed -e 's/.*stripe=//' -e 's/ .*//' <<<$ff)
                else
-                       #
                        # $LL_DECODE_FILTER_FID does not print "stripe="; look
-                       # into f_ver in this case.  See the comment on
-                       # ff_parent.
-                       #
-                       ff_pstripe=$(echo $ff_parent | cut -d: -f3 |
-                               sed -e 's/\]//')
+                       # into f_ver in this case.  See comment on ff_parent.
+                       ff_pstripe=$(cut -d: -f3 <<<$ff_parent | sed -e 's/]//')
                fi
 
-               if echo $ff_parent | grep -q 'stripe_count='; then
-                       local ff_scnt=$(echo $ff_parent |
-                                       sed -e 's/.*stripe_count=//' |
-                                       cut -d' ' -f1)
-
-                       [ $lmm_count -eq $ff_scnt ] ||
+               if grep -q 'stripe_count=' <<<$ff; then
+                       local ff_scnt=$(sed -e 's/.*stripe_count=//' \
+                                           -e 's/ .*//' <<<$ff)
+                       [ $lmm_count = $ff_scnt ] ||
                                error "FF stripe count $lmm_count != $ff_scnt"
                fi
-
-                # compare lmm_seq and filter_fid->ff_parent.f_seq
-                [ $ff_pseq = $lmm_seq ] ||
-                        error "FF parent SEQ $ff_pseq != $lmm_seq"
-                # compare lmm_object_id and filter_fid->ff_parent.f_oid
-                [ $ff_poid = $lmm_oid ] ||
-                        error "FF parent OID $ff_poid != $lmm_oid"
+               # compare lmm_seq and filter_fid->ff_parent.f_seq
+               [ $ff_pseq = $lmm_seq ] ||
+                       error "FF parent SEQ $ff_pseq != $lmm_seq"
+               # compare lmm_object_id and filter_fid->ff_parent.f_oid
+               [ $ff_poid = $lmm_oid ] ||
+                       error "FF parent OID $ff_poid != $lmm_oid"
                (($ff_pstripe == $stripe_nr)) ||
-                        error "FF stripe $ff_pstripe != $stripe_nr"
+                       error "FF stripe $ff_pstripe != $stripe_nr"
 
-                stripe_nr=$((stripe_nr + 1))
-        done
+               stripe_nr=$((stripe_nr + 1))
+       done
 }
 
 test_27z() {
@@ -4852,8 +4846,22 @@ run_test 56r "check lfs find -size works =========================="
 
 test_56s() { # LU-611
        TDIR=$DIR/${tdir}s
-       setup_56 $NUMFILES $NUMDIRS "-c $OSTCOUNT"
 
+       #LU-9369
+       setup_56 0 $NUMDIRS
+       for i in $(seq 1 $NUMDIRS); do
+               $SETSTRIPE -c $((OSTCOUNT + 1)) $TDIR/dir$i/$tfile
+       done
+       EXPECTED=$NUMDIRS
+       CMD="$LFIND -c $OSTCOUNT $TDIR"
+       NUMS=$($CMD | wc -l)
+       [ $NUMS -eq $EXPECTED ] || {
+               $GETSTRIPE -R $TDIR
+               error "\"$CMD\" wrong: found $NUMS, expected $EXPECTED"
+       }
+       rm -rf $TDIR
+
+       setup_56 $NUMFILES $NUMDIRS "-c $OSTCOUNT"
        if [[ $OSTCOUNT -gt 1 ]]; then
                $SETSTRIPE -c 1 $TDIR/$tfile.{0,1,2,3}
                ONESTRIPE=4
@@ -4906,6 +4914,21 @@ run_test 56s "check lfs find -stripe-count works"
 
 test_56t() { # LU-611
        TDIR=$DIR/${tdir}t
+
+       #LU-9369
+       setup_56 0 $NUMDIRS
+       for i in $(seq 1 $NUMDIRS); do
+               $SETSTRIPE -S 4M $TDIR/dir$i/$tfile
+       done
+       EXPECTED=$NUMDIRS
+       CMD="$LFIND -S 4M $TDIR"
+       NUMS=$($CMD | wc -l)
+       [ $NUMS -eq $EXPECTED ] || {
+               $GETSTRIPE -R $TDIR
+               error "\"$CMD\" wrong: found $NUMS, expected $EXPECTED"
+       }
+       rm -rf $TDIR
+
        setup_56 $NUMFILES $NUMDIRS "--stripe-size 512k"
 
        $SETSTRIPE -S 256k $TDIR/$tfile.{0,1,2,3}
@@ -7701,31 +7724,113 @@ test_110() {
 }
 run_test 110 "filename length checking"
 
+#
+# Purpose: To verify dynamic thread (OSS) creation.
+#
 test_115() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
-       OSTIO_pre=$(ps -e | grep ll_ost_io | awk '{ print $4 }'| sort -n |
-               tail -1 | cut -c11-20)
-       [ -z "$OSTIO_pre" ] && skip "no OSS threads" && return
+       remote_ost_nodsh && skip "remote OST with nodsh" && return
+
+       # Lustre does not stop service threads once they are started.
+       # Reset number of running threads to default.
+       stopall
+       setupall
+
+       local OSTIO_pre
+       local save_params="$TMP/sanity-$TESTNAME.parameters"
+
+       # Get ll_ost_io count before I/O
+       OSTIO_pre=$(do_facet ost1 \
+               "$LCTL get_param ost.OSS.ost_io.threads_started | cut -d= -f2")
+       # Exit if lustre is not running (ll_ost_io not running).
+       [ -z "$OSTIO_pre" ] && error "no OSS threads"
+
        echo "Starting with $OSTIO_pre threads"
+       local thread_max=$((OSTIO_pre * 2))
+       local rpc_in_flight=$((thread_max * 2))
+       # Number of I/O Process proposed to be started.
+       local nfiles
+       local facets=$(get_facets OST)
 
-       NUMTEST=20000
-       NUMFREE=$(df -i -P $DIR | tail -n 1 | awk '{ print $4 }')
-       [[ $NUMFREE -lt $NUMTEST ]] && NUMTEST=$(($NUMFREE - 1000))
-       echo "$NUMTEST creates/unlinks"
-       test_mkdir -p $DIR/$tdir
-       createmany -o $DIR/$tdir/$tfile $NUMTEST
-       unlinkmany $DIR/$tdir/$tfile $NUMTEST
+       save_lustre_params client \
+               "osc.*OST*.max_rpcs_in_flight" > $save_params
+       save_lustre_params $facets \
+               "ost.OSS.ost_io.threads_max" >> $save_params
+
+       # Set in_flight to $rpc_in_flight
+       $LCTL set_param osc.*OST*.max_rpcs_in_flight=$rpc_in_flight ||
+               error "Failed to set max_rpcs_in_flight to $rpc_in_flight"
+       nfiles=${rpc_in_flight}
+       # Set ost thread_max to $thread_max
+       do_facet ost1 \
+               "$LCTL set_param ost.OSS.ost_io.threads_max=$thread_max"
+
+       # 5 Minutes should be sufficient for max number of OSS
+       # threads(thread_max) to be created.
+       local timeout=300
+
+       # Start I/O.
+       local WTL=${WTL:-"$LUSTRE/tests/write_time_limit"}
+       mkdir -p $DIR/$tdir
+       for i in $(seq $nfiles); do
+               local file=$DIR/$tdir/${tfile}-$i
+               $LFS setstripe -c -1 -i 0 $file
+               ($WTL $file $timeout)&
+       done
+
+       # I/O Started - Wait for thread_started to reach thread_max or report
+       # error if thread_started is more than thread_max.
+       echo "Waiting for thread_started to reach thread_max"
+       local thread_started=0
+       local end_time=$((SECONDS + timeout))
+
+       while [ $SECONDS -le $end_time ] ; do
+               echo -n "."
+               # Get ost i/o thread_started count.
+               thread_started=$(do_facet ost1 \
+                       "$LCTL get_param \
+                       ost.OSS.ost_io.threads_started | cut -d= -f2")
+               # Break out if thread_started is equal/greater than thread_max
+               if [[ $thread_started -ge $thread_max ]]; then
+                       echo ll_ost_io thread_started $thread_started, \
+                               equal/greater than thread_max $thread_max
+                       break
+               fi
+               sleep 1
+       done
 
-       OSTIO_post=$(ps -e | grep ll_ost_io | awk '{ print $4 }' | sort -n |
-               tail -1 | cut -c11-20)
+       # Cleanup - We have the numbers, Kill i/o jobs if running.
+       jobcount=($(jobs -p))
+       for i in $(seq 0 $((${#jobcount[@]}-1)))
+       do
+               kill -9 ${jobcount[$i]}
+               if [ $? -ne 0 ] ; then
+                       echo Warning: \
+                       Failed to Kill \'WTL\(I/O\)\' with pid ${jobcount[$i]}
+               fi
+       done
 
-       # don't return an error
-       [ $OSTIO_post == $OSTIO_pre ] && echo \
-           "WARNING: No new ll_ost_io threads were created ($OSTIO_pre)" &&
-           echo "This may be fine, depending on what ran before this test" &&
-           echo "and how fast this system is." && return
+       # Cleanup files left by WTL binary.
+       for i in $(seq $nfiles); do
+               local file=$DIR/$tdir/${tfile}-$i
+               rm -rf $file
+               if [ $? -ne 0 ] ; then
+                       echo "Warning: Failed to delete file $file"
+               fi
+       done
 
-       echo "Started with $OSTIO_pre threads, ended with $OSTIO_post"
+       restore_lustre_params <$save_params
+       rm -f $save_params || echo "Warning: delete file '$save_params' failed"
+
+       # Error out if no new thread has started or Thread started is greater
+       # than thread max.
+       if [[ $thread_started -le $OSTIO_pre ||
+                       $thread_started -gt $thread_max ]]; then
+               error "ll_ost_io: thread_started $thread_started" \
+                     "OSTIO_pre $OSTIO_pre, thread_max $thread_max." \
+                     "No new thread started or thread started greater " \
+                     "than thread_max."
+       fi
 }
 run_test 115 "verify dynamic thread creation===================="
 
@@ -9075,18 +9180,16 @@ set_dir_limits () {
        local canondev
        local node
 
-       local LDPROC=/proc/fs/ldiskfs
+       local ldproc=/proc/fs/ldiskfs
        local facets=$(get_facets MDS)
 
        for facet in ${facets//,/ }; do
                canondev=$(ldiskfs_canon \
                           *.$(convert_facet2label $facet).mntdev $facet)
-               do_facet $facet "test -e $LDPROC/$canondev/max_dir_size" ||
-                                               LDPROC=/sys/fs/ldiskfs
-               do_facet $facet "echo $1 >$LDPROC/$canondev/max_dir_size"
-               do_facet $facet "test -e $LDPROC/$canondev/warning_dir_size" ||
-                                               LDPROC=/sys/fs/ldiskfs
-               do_facet $facet "echo $2 >$LDPROC/$canondev/warning_dir_size"
+               do_facet $facet "test -e $ldproc/$canondev/max_dir_size" ||
+                       ldproc=/sys/fs/ldiskfs
+               do_facet $facet "echo $1 >$ldproc/$canondev/max_dir_size"
+               do_facet $facet "echo $2 >$ldproc/$canondev/warning_dir_size"
        done
 }
 
@@ -9110,62 +9213,54 @@ test_129() {
        remote_mds_nodsh && skip "remote MDS with nodsh" && return
        local ENOSPC=28
        local EFBIG=27
-       local has_warning=0
+       local has_warning=false
 
        rm -rf $DIR/$tdir
        mkdir -p $DIR/$tdir
 
        # block size of mds1
-       local MDT_DEV=$(mdsdevname ${SINGLEMDS//mds/})
-       local MDSBLOCKSIZE=$($LCTL get_param -n mdc.*MDT0000*.blocksize)
-       local MAX=$((MDSBLOCKSIZE * 5))
-       set_dir_limits $MAX $MAX
-       local I=$(stat -c%s "$DIR/$tdir")
-       local J=0
-       while [[ $I -le $MAX ]]; do
-               $MULTIOP $DIR/$tdir/$J Oc
+       local maxsize=$(($($LCTL get_param -n mdc.*MDT0000*.blocksize) * 5))
+       set_dir_limits $maxsize $maxsize
+       local dirsize=$(stat -c%s "$DIR/$tdir")
+       local nfiles=0
+       while [[ $dirsize -le $maxsize ]]; do
+               $MULTIOP $DIR/$tdir/file_base_$nfiles Oc
                rc=$?
-               if [ $has_warning -eq 0 ]; then
-                       check_mds_dmesg '"is approaching"' &&
-                               has_warning=1
+               if ! $has_warning; then
+                       check_mds_dmesg '"is approaching"' && has_warning=true
                fi
-               #check two errors ENOSPC for new version of ext4 max_dir_size patch
-               #mainline kernel commit df981d03eeff7971ac7e6ff37000bfa702327ef1
-               #and EFBIG for previous versions
+               # check two errors:
+               # ENOSPC for new ext4 max_dir_size (kernel commit df981d03ee)
+               # EFBIG for previous versions included in ldiskfs series
                if [ $rc -eq $EFBIG -o $rc -eq $ENOSPC ]; then
                        set_dir_limits 0 0
                        echo "return code $rc received as expected"
 
-                       createmany -o $DIR/$tdir/$J_file_ 1000 ||
+                       createmany -o $DIR/$tdir/file_extra_$nfiles. 5 ||
                                error_exit "create failed w/o dir size limit"
 
                        check_mds_dmesg '"has reached"' ||
-                               error_exit "has reached message should be output"
+                               error_exit "reached message should be output"
 
                        [ $has_warning -eq 0 ] &&
                                error_exit "warning message should be output"
 
-                       I=$(stat -c%s "$DIR/$tdir")
+                       dirsize=$(stat -c%s "$DIR/$tdir")
 
-                       if [ $(lustre_version_code $SINGLEMDS) -lt \
-                                       $(version_code 2.4.51) ]
-                       then
-                               [[ $I -eq $MAX ]] && return 0
-                       else
-                               [[ $I -gt $MAX ]] && return 0
-                       fi
-                       error_exit "current dir size $I, previous limit $MAX"
+                       [[ $dirsize -ge $maxsize ]] && return 0
+                       error_exit "current dir size $dirsize, " \
+                                  "previous limit $maxsize"
                elif [ $rc -ne 0 ]; then
                        set_dir_limits 0 0
-                       error_exit "return code $rc received instead of expected " \
-                                  "$EFBIG or $ENOSPC, files in dir $I"
+                       error_exit "return $rc received instead of expected " \
+                                  "$EFBIG or $ENOSPC, files in dir $dirsize"
                fi
-               J=$((J+1))
-               I=$(stat -c%s "$DIR/$tdir")
+               nfiles=$((nfiles + 1))
+               dirsize=$(stat -c%s "$DIR/$tdir")
        done
 
        set_dir_limits 0 0
-       error "exceeded dir size limit $MAX($MDSCOUNT) : $I bytes"
+       error "exceeded dir size limit $maxsize($MDSCOUNT) : $dirsize bytes"
 }
 run_test 129 "test directory size limit ========================"
 
@@ -15673,7 +15768,7 @@ zfs_oid_to_objid()
        local objid=$2
 
        local vdevdir=$(dirname $(facet_vdevice $ost))
-       local cmd="$ZDB -e -p $vdevdir -dddd $(facet_device $ost)"
+       local cmd="$ZDB -e -p $vdevdir -ddddd $(facet_device $ost)"
        local zfs_zapid=$(do_facet $ost $cmd |
                          grep -w "/O/0/d$((objid%32))" -C 5 |
                          awk '/Object/{getline; print $1}')
@@ -15710,6 +15805,7 @@ test_312() { # LU-4856
        local max_blksz=$(do_facet ost1 \
                          $ZFS get -p recordsize $(facet_device ost1) |
                          awk '!/VALUE/{print $3}')
+       local min_blksz=$(getconf PAGE_SIZE)
 
        # to make life a little bit easier
        $LFS mkdir -c 1 -i 0 $DIR/$tdir
@@ -15724,7 +15820,7 @@ test_312() { # LU-4856
 
        # block size change by sequential over write
        local blksz
-       for ((bs=4096; bs <= max_blksz; bs <<= 2)); do
+       for ((bs=$min_blksz; bs <= max_blksz; bs <<= 2)); do
                dd if=/dev/zero of=$tf bs=$bs count=1 oflag=sync conv=notrunc
 
                blksz=$(zfs_object_blksz ost1 $zfs_objid)
@@ -15733,18 +15829,18 @@ test_312() { # LU-4856
        rm -f $tf
 
        # block size change by sequential append write
-       dd if=/dev/zero of=$tf bs=4K count=1 oflag=sync conv=notrunc
+       dd if=/dev/zero of=$tf bs=$min_blksz count=1 oflag=sync conv=notrunc
        oid=$($LFS getstripe $tf | awk '/obdidx/{getline; print $2}')
        zfs_objid=$(zfs_oid_to_objid ost1 $oid)
 
-       for ((count = 1; count < $((max_blksz / 4096)); count *= 2)); do
-               dd if=/dev/zero of=$tf bs=4K count=$count seek=$count \
+       for ((count = 1; count < $((max_blksz / min_blksz)); count *= 2)); do
+               dd if=/dev/zero of=$tf bs=$min_blksz count=$count seek=$count \
                        oflag=sync conv=notrunc
 
                blksz=$(zfs_object_blksz ost1 $zfs_objid)
-               blksz=$((blksz / 8192)) # in 2*4K unit
-               [ $blksz -eq $count ] ||
-                       error "blksz error(in 8k): $blksz, expected: $count"
+               [ $blksz -eq $((2 * count * min_blksz)) ] ||
+                       error "blksz error, actual $blksz, "    \
+                               "expected: 2 * $count * $min_blksz"
        done
        rm -f $tf
 
@@ -15753,9 +15849,10 @@ test_312() { # LU-4856
        oid=$($LFS getstripe $tf | awk '/obdidx/{getline; print $2}')
        zfs_objid=$(zfs_oid_to_objid ost1 $oid)
 
-       dd if=/dev/zero of=$tf bs=8K count=1 oflag=sync conv=notrunc
+       dd if=/dev/zero of=$tf bs=1K count=1 oflag=sync conv=notrunc
        blksz=$(zfs_object_blksz ost1 $zfs_objid)
-       [ $blksz -eq 8192 ] || error "blksz error: $blksz, expected: 8k"
+       [ $blksz -eq $min_blksz ] ||
+               error "blksz error: $blksz, expected: $min_blksz"
 
        dd if=/dev/zero of=$tf bs=64K count=1 oflag=sync conv=notrunc seek=128
        blksz=$(zfs_object_blksz ost1 $zfs_objid)