LU-2724 ptlrpc: skip NULL obd_svc_stats in lprocfs_rd_import()

[fs/lustre-release.git] / lustre / tests / sanity.sh
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index 0d03287..940e64c 100644 (file)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -17,11 +17,6 @@ ALWAYS_EXCEPT="                27u   42a  42b  42c  42d  45   51d   68b   $SANIT
  ALWAYS_EXCEPT="                 76     $ALWAYS_EXCEPT"
  
  
-# Tests that fail on uml
-CPU=`awk '/model/ {print $4}' /proc/cpuinfo`
-#                                    buffer i/o errs             sock spc runas
-[ "$CPU" = "UML" ] && EXCEPT="$EXCEPT 27m 27n 27o 27p 27q 27r 31d 54a  64b 99a 99b 99c 99d 99e 99f 101a"
-
  SRCDIR=$(cd $(dirname $0); echo $PWD)
  export PATH=$PATH:/sbin
  
@@ -65,6 +60,10 @@ init_logging
  
  [ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 24v 27m 36f 36g 36h 51b 60c 63 64b 68 71 73 77f 78 101a 103 115 120g 124b"
  
+[ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
+# bug number for skipped test:        LU-2834 LU-1593 LU-2610 LU-2833 LU-1957 LU-2805
+       ALWAYS_EXCEPT="$ALWAYS_EXCEPT 18      34h     40      48a     180     184c"
+
  FAIL_ON_ERROR=false
  
  cleanup() {
@@ -911,20 +910,26 @@ test_24p() {
  }
  run_test 24p "mkdir .../R12{a,b}; rename .../R12a .../R12b"
  
+cleanup_multiop_pause() {
+       trap 0
+       kill -USR1 $MULTIPID
+}
+
  test_24q() {
         [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
         test_mkdir $DIR/R13a
         test_mkdir $DIR/R13b
-       DIRINO=`ls -lid $DIR/R13a | awk '{ print $1 }'`
-       multiop_bg_pause $DIR/R13b D_c || return 1
+       local DIRINO=$(ls -lid $DIR/R13a | awk '{ print $1 }')
+       multiop_bg_pause $DIR/R13b D_c || error "multiop failed to start"
         MULTIPID=$!
  
+       trap cleanup_multiop_pause EXIT
         mrename $DIR/R13a $DIR/R13b
-       $CHECKSTAT -a $DIR/R13a || error
-       $CHECKSTAT -t dir $DIR/R13b || error
-       DIRINO2=`ls -lid $DIR/R13b | awk '{ print $1 }'`
+       $CHECKSTAT -a $DIR/R13a || error "R13a still exists"
+       $CHECKSTAT -t dir $DIR/R13b || error "R13b does not exist"
+       local DIRINO2=$(ls -lid $DIR/R13b | awk '{ print $1 }')
         [ "$DIRINO" = "$DIRINO2" ] || error "R13a $DIRINO != R13b $DIRINO2"
-       kill -USR1 $MULTIPID
+       cleanup_multiop_pause
         wait $MULTIPID || error "multiop close failed"
  }
  run_test 24q "mkdir .../R13{a,b}; open R13b rename R13a R13b ==="
@@ -1683,8 +1688,13 @@ check_seq_oid()
                  #       { error "mounting $dev as $FSTYPE failed"; return 3; }
                  #local obj_file=$(do_facet ost$ost find $dir/O/$seq -name $oid)
                  #local ff=$(do_facet ost$ost $LL_DECODE_FILTER_FID $obj_file)
-
-                local obj_file="O/$seq/d$((oid %32))/$oid"
+               seq=$(echo $seq | sed -e "s/^0x//g")
+               if [ $seq == 0 ]; then
+                       oid_hex=$(echo $oid)
+               else
+                       oid_hex=$(echo $hex | sed -e "s/^0x//g")
+               fi
+                local obj_file="O/$seq/d$((oid %32))/$oid_hex"
                  local ff=$(do_facet ost$ost "$DEBUGFS -c -R 'stat $obj_file' \
                             $dev 2>/dev/null" | grep "parent=")
  
@@ -1991,6 +2001,22 @@ test_31m() {
  }
  run_test 31m "link to file: the same, non-existing, dir==============="
  
+test_31n() {
+       [ -e /proc/self/fd/173 ] && echo "skipping, fd 173 is in use" && return
+       touch $DIR/$tfile || error "cannot create '$DIR/$tfile'"
+       nlink=$(stat --format=%h $DIR/$tfile)
+       [ ${nlink:--1} -eq 1 ] || error "nlink is $nlink, expected 1"
+       exec 173<$DIR/$tfile
+       trap "exec 173<&-" EXIT
+       nlink=$(stat --dereference --format=%h /proc/self/fd/173)
+       [ ${nlink:--1} -eq 1 ] || error "nlink is $nlink, expected 1"
+       rm $DIR/$tfile || error "cannot remove '$DIR/$tfile'"
+       nlink=$(stat --dereference --format=%h /proc/self/fd/173)
+       [ ${nlink:--1} -eq 0 ] || error "nlink is $nlink, expected 0"
+       exec 173<&-
+}
+run_test 31n "check link count of unlinked file"
+
  cleanup_test32_mount() {
         trap 0
         $UMOUNT $DIR/$tdir/ext2-mountpoint
@@ -2435,6 +2461,8 @@ test_34h() {
         local sz=1000
  
         dd if=/dev/zero of=$DIR/$tfile bs=1M count=10 || error
+       sync # Flush the cache so that multiop below does not block on cache
+            # flush when getting the group lock
         $MULTIOP $DIR/$tfile OG${gid}T${sz}g${gid}c &
         MULTIPID=$!
         sleep 2
@@ -3473,7 +3501,7 @@ run_test 50 "special situations: /proc symlinks  ==============="
  
  test_51a() {   # was test_51
         # bug 1516 - create an empty entry right after ".." then split dir
-       test_mkdir $DIR/$tdir
+       test_mkdir -p $DIR/$tdir
         touch $DIR/$tdir/foo
         $MCREATE $DIR/$tdir/bar
         rm $DIR/$tdir/foo
@@ -3494,6 +3522,9 @@ test_51b() {
         [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
         local BASE=$DIR/$tdir
  
+       # cleanup the directory
+       rm -fr $BASE
+
         test_mkdir -p $BASE
  
         local mdtidx=$(printf "%04x" $($LFS getstripe -M $BASE))
@@ -5486,7 +5517,9 @@ function get_named_value()
      done
  }
  
-export CACHE_MAX=`$LCTL get_param -n llite.*.max_cached_mb | head -n 1`
+export CACHE_MAX=$($LCTL get_param -n llite.*.max_cached_mb |
+                  awk '/^max_cached_mb/ { print $2 }')
+
  cleanup_101a() {
         $LCTL set_param -n llite.*.max_cached_mb $CACHE_MAX
         trap 0
@@ -5497,7 +5530,6 @@ test_101a() {
         local s
         local discard
         local nreads=10000
-       [ "$CPU" = "UML" ] && nreads=1000
         local cache_limit=32
  
         $LCTL set_param -n osc.*-osc*.rpc_stats 0
@@ -6455,6 +6487,7 @@ test_117() # bug 10891
  }
  run_test 117 "verify fsfilt_extend =========="
  
+NO_SLOW_RESENDCOUNT=4
  export OLD_RESENDCOUNT=""
  set_resend_count () {
         local PROC_RESENDCOUNT="osc.${FSNAME}-OST*-osc-*.resend_count"
@@ -6463,7 +6496,8 @@ set_resend_count () {
         echo resend_count is set to $(lctl get_param -n $PROC_RESENDCOUNT)
  }
  
-[ "$SLOW" = "no" ] && set_resend_count 4 # for reduce test_118* time (bug 14842)
+# for reduce test_118* time (b=14842)
+[ "$SLOW" = "no" ] && set_resend_count $NO_SLOW_RESENDCOUNT
  
  # Reset async IO behavior after error case
  reset_async() {
@@ -6533,6 +6567,10 @@ run_test 118b "Reclaim dirty pages on fatal error =========="
  test_118c()
  {
         [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
+
+       # for 118c, restore the original resend count, LU-1940
+       [ "$SLOW" = "no" ] && [ -n "$OLD_RESENDCOUNT" ] &&
+                               set_resend_count $OLD_RESENDCOUNT
         remote_ost_nodsh && skip "remote OST with nodsh" && return
  
         reset_async
@@ -6575,6 +6613,9 @@ test_118c()
  }
  run_test 118c "Fsync blocks on EROFS until dirty pages are flushed =========="
  
+# continue to use small resend count to reduce test_118* time (b=14842)
+[ "$SLOW" = "no" ] && set_resend_count $NO_SLOW_RESENDCOUNT
+
  test_118d()
  {
         [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
@@ -7870,7 +7911,7 @@ get_ost_param() {
                  [ x$gl = x"" ] && gl=0
                  gl_sum=$((gl_sum + gl))
          done
-        echo $gl
+        echo $gl_sum
  }
  
  som_mode_switch() {
@@ -8233,6 +8274,36 @@ test_133e() {
  }
  run_test 133e "Verifying OST {read,write}_bytes nid stats ================="
  
+test_133f() {
+       local proc_dirs="/proc/fs/lustre/ /proc/sys/lnet/ /proc/sys/lustre/"
+       local facet
+
+       # First without trusting modes.
+       find $proc_dirs \
+               -exec cat '{}' \; &> /dev/null
+
+       # Second verifying readability.
+       find $proc_dirs \
+               -type f \
+               -readable \
+               -exec cat '{}' \; > /dev/null ||
+                       error "proc file read failed"
+
+       for facet in $SINGLEMDS ost1; do
+               do_facet $facet find $proc_dirs \
+                       -not -name req_history \
+                       -exec cat '{}' \\\; &> /dev/null
+
+           do_facet $facet     find $proc_dirs \
+                       -not -name req_history \
+                       -type f \
+                       -readable \
+                       -exec cat '{}' \\\; > /dev/null ||
+                               error "proc file read failed"
+       done
+}
+run_test 133f "Check for LBUGs/Oopses/unreadable files in /proc"
+
  test_140() { #bug-17379
         [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
          test_mkdir -p $DIR/$tdir || error "Creating dir $DIR/$tdir"
@@ -9255,6 +9326,148 @@ test_183() { # LU-2275
  }
  run_test 183 "No crash or request leak in case of strange dispositions ========"
  
+test_185() { # LU-2441
+       mkdir -p $DIR/$tdir || error "creating dir $DIR/$tdir"
+       touch $DIR/$tdir/spoo
+       local mtime1=$(stat -c "%Y" $DIR/$tdir)
+       local fid=$($MULTIOP $DIR/$tdir VFw4096c) ||
+               error "cannot create/write a volatile file"
+       $CHECKSTAT -t file $MOUNT/.lustre/fid/$fid 2>/dev/null &&
+               error "FID is still valid after close"
+
+       multiop_bg_pause $DIR/$tdir vVw4096_c
+       local multi_pid=$!
+
+       local OLD_IFS=$IFS
+       IFS=":"
+       local fidv=($fid)
+       IFS=$OLD_IFS
+       # assume that the next FID for this client is sequential, since stdout
+       # is unfortunately eaten by multiop_bg_pause
+       local n=$((${fidv[1]} + 1))
+       local next_fid="${fidv[0]}:$(printf "0x%x" $n):${fidv[2]}"
+       $CHECKSTAT -t file $MOUNT/.lustre/fid/$next_fid ||
+               error "FID is missing before close"
+       kill -USR1 $multi_pid
+       # 1 second delay, so if mtime change we will see it
+       sleep 1
+       local mtime2=$(stat -c "%Y" $DIR/$tdir)
+       [[ $mtime1 == $mtime2 ]] || error "mtime has changed"
+}
+run_test 185 "Volatile file support"
+
+check_swap_layouts_support()
+{
+       $LCTL get_param -n llite.*.sbi_flags | grep -q layout ||
+               { skip "Does not support layout lock."; return 0; }
+       return 1
+}
+
+# test suite 184 is for LU-2016, LU-2017
+test_184a() {
+       check_swap_layouts_support && return 0
+
+       dir0=$DIR/$tdir/$testnum
+       test_mkdir -p $dir0 || error "creating dir $dir0"
+       ref1=/etc/passwd
+       ref2=/etc/group
+       file1=$dir0/f1
+       file2=$dir0/f2
+       $SETSTRIPE -c1 $file1
+       cp $ref1 $file1
+       $SETSTRIPE -c2 $file2
+       cp $ref2 $file2
+       gen1=$($GETSTRIPE -g $file1)
+       gen2=$($GETSTRIPE -g $file2)
+
+       $LFS swap_layouts $file1 $file2 || error "swap of file layout failed"
+       gen=$($GETSTRIPE -g $file1)
+       [[ $gen1 != $gen ]] ||
+               "Layout generation on $file1 does not change"
+       gen=$($GETSTRIPE -g $file2)
+       [[ $gen2 != $gen ]] ||
+               "Layout generation on $file2 does not change"
+
+       cmp $ref1 $file2 || error "content compare failed ($ref1 != $file2)"
+       cmp $ref2 $file1 || error "content compare failed ($ref2 != $file1)"
+}
+run_test 184a "Basic layout swap"
+
+test_184b() {
+       check_swap_layouts_support && return 0
+
+       dir0=$DIR/$tdir/$testnum
+       mkdir -p $dir0 || error "creating dir $dir0"
+       file1=$dir0/f1
+       file2=$dir0/f2
+       file3=$dir0/f3
+       dir1=$dir0/d1
+       dir2=$dir0/d2
+       mkdir $dir1 $dir2
+       $SETSTRIPE -c1 $file1
+       $SETSTRIPE -c2 $file2
+       $SETSTRIPE -c1 $file3
+       chown $RUNAS_ID $file3
+       gen1=$($GETSTRIPE -g $file1)
+       gen2=$($GETSTRIPE -g $file2)
+
+       $LFS swap_layouts $dir1 $dir2 &&
+               error "swap of directories layouts should fail"
+       $LFS swap_layouts $dir1 $file1 &&
+               error "swap of directory and file layouts should fail"
+       $RUNAS $LFS swap_layouts $file1 $file2 &&
+               error "swap of file we cannot write should fail"
+       $LFS swap_layouts $file1 $file3 &&
+               error "swap of file with different owner should fail"
+       /bin/true # to clear error code
+}
+run_test 184b "Forbidden layout swap (will generate errors)"
+
+test_184c() {
+       check_swap_layouts_support && return 0
+
+       local dir0=$DIR/$tdir/$testnum
+       mkdir -p $dir0 || error "creating dir $dir0"
+
+       local ref1=$dir0/ref1
+       local ref2=$dir0/ref2
+       local file1=$dir0/file1
+       local file2=$dir0/file2
+       # create a file large enough for the concurent test
+       dd if=/dev/urandom of=$ref1 bs=1M count=$((RANDOM % 50 + 20))
+       dd if=/dev/urandom of=$ref2 bs=1M count=$((RANDOM % 50 + 20))
+       echo "ref file size: ref1(`stat -c %s $ref1`), ref2(`stat -c %s $ref2`)"
+
+       cp $ref2 $file2
+       dd if=$ref1 of=$file1 bs=16k &
+       local DD_PID=$!
+
+       sleep 0.$((RANDOM % 5 + 1))
+
+       $LFS swap_layouts $file1 $file2
+       local rc=$?
+       wait $DD_PID
+       [[ $? == 0 ]] || error "concurrent write on $file1 failed"
+       [[ $rc == 0 ]] || error "swap of $file1 and $file2 failed"
+
+       # how many bytes copied before swapping layout
+       local copied=`stat -c %s $file2`
+       local remaining=`stat -c %s $ref1`
+       remaining=$((remaining - copied))
+       echo "Copied $copied bytes before swapping layout..."
+
+       cmp -n $copied $file1 $ref2 | grep differ &&
+               error "Content mismatch [0, $copied) of ref2 and file1"
+       cmp -n $copied $file2 $ref1 ||
+               error "Content mismatch [0, $copied) of ref1 and file2"
+       cmp -i $copied:$copied -n $remaining $file1 $ref1 ||
+               error "Content mismatch [$copied, EOF) of ref1 and file1"
+
+       # clean up
+       rm -f $ref1 $ref2 $file1 $file2
+}
+run_test 184c "Concurrent write and layout swap"
+
  # OST pools tests
  check_file_in_pool()
  {
@@ -10478,6 +10691,7 @@ test_230b() {
         [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
         local MDTIDX=1
         local remote_dir=$DIR/$tdir/remote_dir
+       local rc=0
  
         mkdir -p $DIR/$tdir
         $LFS mkdir -i $MDTIDX $remote_dir ||
@@ -10486,10 +10700,67 @@ test_230b() {
         $LFS mkdir -i 0 $remote_dir/new_dir &&
                 error "nested remote directory create succeed!"
  
+       do_facet mds$((MDTIDX + 1)) lctl set_param mdt.*.enable_remote_dir=1
+       $LFS mkdir -i 0 $remote_dir/new_dir || rc=$?
+       do_facet mds$((MDTIDX + 1)) lctl set_param mdt.*.enable_remote_dir=0
+
+       [ $rc -ne 0 ] &&
+          error "create remote directory failed after set enable_remote_dir"
+
         rm -r $DIR/$tdir || error "unlink remote directory failed"
  }
  run_test 230b "nested remote directory should be failed"
  
+test_231a()
+{
+       # For simplicity this test assumes that max_pages_per_rpc
+       # is the same across all OSCs
+       local max_pages=$($LCTL get_param -n osc.*.max_pages_per_rpc | head -1)
+       local bulk_size=$((max_pages * 4096))
+
+       mkdir -p $DIR/$tdir
+
+       # clear the OSC stats
+       $LCTL set_param osc.*.stats=0 &>/dev/null
+
+       # Client writes $bulk_size - there must be 1 rpc for $max_pages.
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=$bulk_size count=1 \
+               oflag=direct &>/dev/null || error "dd failed"
+
+       local nrpcs=$($LCTL get_param osc.*.stats |awk '/ost_write/ {print $2}')
+       if [ x$nrpcs != "x1" ]; then
+               error "found $nrpc ost_write RPCs, not 1 as expected"
+       fi
+
+       # Drop the OSC cache, otherwise we will read from it
+       cancel_lru_locks osc
+
+       # clear the OSC stats
+       $LCTL set_param osc.*.stats=0 &>/dev/null
+
+       # Client reads $bulk_size.
+       dd if=$DIR/$tdir/$tfile of=/dev/null bs=$bulk_size count=1 \
+               iflag=direct &>/dev/null || error "dd failed"
+
+       nrpcs=$($LCTL get_param osc.*.stats | awk '/ost_read/ { print $2 }')
+       if [ x$nrpcs != "x1" ]; then
+               error "found $nrpc ost_read RPCs, not 1 as expected"
+       fi
+}
+run_test 231a "checking that reading/writing of BRW RPC size results in one RPC"
+
+test_231b() {
+       mkdir -p $DIR/$tdir
+       local i
+       for i in {0..1023}; do
+               dd if=/dev/zero of=$DIR/$tdir/$tfile conv=notrunc \
+                       seek=$((2 * i)) bs=4096 count=1 &>/dev/null ||
+                       error "dd of=$DIR/$tdir/$tfile seek=$((2 * i)) failed"
+       done
+       sync
+}
+run_test 231b "must not assert on fully utilized OST request buffer"
+
  #
  # tests that do cleanup/setup should be run at the end
  #