X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fsanityn.sh;h=e69caf8c21e311580dc49a8d487880bf281d3e7e;hb=41ba272d1904bb9c612f785f608fc2d396763581;hp=63b2aa771ddfdfe6515ca52075835241ec3cae5f;hpb=b00779a40f744c01863f5f2b6a90e6e8cd3ca093;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 63b2aa7..e69caf8 100644 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -40,11 +40,14 @@ init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging -[ $(facet_fstype $SINGLEMDS) = "zfs" ] && -# bug number for skipped test: LU-2840 LU-2776 - ALWAYS_EXCEPT="$ALWAYS_EXCEPT 21 51a" +if [ $(facet_fstype $SINGLEMDS) = "zfs" ]; then +# bug number for skipped test: LU-2840 LU-2189 LU-2776 + ALWAYS_EXCEPT="$ALWAYS_EXCEPT 21 36 51a" +# LU-2829 / LU-2887 - make allowances for ZFS slowness + TEST33_NFILES=${TEST33_NFILES:-5000} +fi -[ "$SLOW" = "no" ] && EXCEPT_SLOW="12 23 33a" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="33a" FAIL_ON_ERROR=false @@ -57,7 +60,7 @@ LOVNAME=$($LCTL get_param -n llite.*.lov.common_name | tail -n 1) OSTCOUNT=$($LCTL get_param -n lov.$LOVNAME.numobd) assert_DIR -rm -rf $DIR1/[df][0-9]* $DIR1/lnk +rm -rf $DIR1/[df][0-9]* $DIR1/lnk $DIR/[df].${TESTSUITE}* SAMPLE_FILE=$TMP/$(basename $0 .sh).junk dd if=/dev/urandom of=$SAMPLE_FILE bs=1M count=1 @@ -489,28 +492,27 @@ test_21() { # Bug 5907 run_test 21 " Try to remove mountpoint on another dir ====" test_23() { # Bug 5972 - echo "others should see updated atime while another read" > $DIR1/f23 - + local at_diff=$(do_facet $SINGLEMDS $LCTL get_param -n mdd.*.atime_diff) + echo "atime should be updated while another read" > $DIR1/$tfile + # clear the lock(mode: LCK_PW) gotten from creating operation cancel_lru_locks osc - - time1=`date +%s` - #MAX_ATIME_DIFF 60, we update atime only if older than 60 seconds - sleep 61 - - multiop_bg_pause $DIR1/f23 or20_c || return 1 + time1=$(date +%s) + echo "now is $time1" + sleep $((at_diff + 1)) + + echo "starting reads" + multiop_bg_pause $DIR1/$tfile or20_c || return 1 # with SOM and opencache enabled, we need to close a file and cancel # open lock to get atime propogated to MDS - kill -USR1 $! + kill -USR1 $! || return 2 cancel_lru_locks mdc - time2=`stat -c "%X" $DIR2/f23` - - if (( $time2 <= $time1 )); then - error "atime doesn't update among nodes" - fi + time2=$(stat -c "%X" $DIR/$tfile) + echo "new atime is $time2" - rm -f $DIR1/f23 || error "rm -f $DIR1/f23 failed" + [ $time2 -gt $time1 ] || error "atime was not updated" + rm -f $DIR1/$tfile || error "rm -f $DIR1/$tfile failed" true } run_test 23 " others should see updated atime while another read====" @@ -523,7 +525,7 @@ test_24a() { lfs df -i $DIR2 || error "lfs df -i $DIR2 failed" lfs df $DIR1/$tfile || error "lfs df $DIR1/$tfile failed" lfs df -ih $DIR2/$tfile || error "lfs df -ih $DIR2/$tfile failed" - + OSC=`lctl dl | awk '/-osc-|OSC.*MNT/ {print $4}' | head -n 1` # OSC=`lctl dl | awk '/-osc-/ {print $4}' | head -n 1` lctl --device %$OSC deactivate @@ -687,16 +689,14 @@ test_29() { # bug 10999 #bug 11549 - permanently turn test off in b1_5 run_test 29 "lock put race between glimpse and enqueue =========" -test_30() { #bug #11110 +test_30() { #bug #11110, LU-2523 test_mkdir -p $DIR1/$tdir cp -f /bin/bash $DIR1/$tdir/bash /bin/sh -c 'sleep 1; rm -f $DIR2/$tdir/bash; cp /bin/bash $DIR2/$tdir' & - local err=$($DIR1/$tdir/bash -c 'sleep 2; - openfile -f O_RDONLY /proc/$$/exe >& /dev/null; echo $?') + $DIR1/$tdir/bash -c 'sleep 2; + openfile -f O_RDONLY /proc/$$/exe >& /dev/null; echo $?' wait - [ $err -ne 116 ] && - error_ignore 12900 "return code ($err) != -ESTALE" && return true } @@ -744,8 +744,8 @@ enable_lockless_truncate() { } test_32a() { # bug 11270 - local p="$TMP/sanityN-$TESTNAME.parameters" - save_lustre_params $HOSTNAME osc.*.lockless_truncate > $p + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_lustre_params client "osc.*.lockless_truncate" > $p cancel_lru_locks osc enable_lockless_truncate 1 rm -f $DIR1/$tfile @@ -783,15 +783,19 @@ run_test 32a "lockless truncate" test_32b() { # bug 11270 remote_ost_nodsh && skip "remote OST with nodsh" && return - local node - local p="$TMP/sanityN-$TESTNAME.parameters" - save_lustre_params $HOSTNAME "osc.*.contention_seconds" > $p - for node in $(osts_nodes); do - save_lustre_params $node "ldlm.namespaces.filter-*.max_nolock_bytes" >> $p - save_lustre_params $node "ldlm.namespaces.filter-*.contended_locks" >> $p - save_lustre_params $node "ldlm.namespaces.filter-*.contention_seconds" >> $p - done - clear_osc_stats + local node + local facets=$(get_facets OST) + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + + save_lustre_params client "osc.*.contention_seconds" > $p + save_lustre_params $facets \ + "ldlm.namespaces.filter-*.max_nolock_bytes" >> $p + save_lustre_params $facets \ + "ldlm.namespaces.filter-*.contended_locks" >> $p + save_lustre_params $facets \ + "ldlm.namespaces.filter-*.contention_seconds" >> $p + clear_osc_stats + # agressive lockless i/o settings for node in $(osts_nodes); do do_node $node 'lctl set_param -n ldlm.namespaces.filter-*.max_nolock_bytes 2000000; lctl set_param -n ldlm.namespaces.filter-*.contended_locks 0; lctl set_param -n ldlm.namespaces.filter-*.contention_seconds 60' @@ -831,11 +835,10 @@ print_jbd_stat () { local stat=0 for mds in ${mdts//,/ }; do varsvc=${mds}_svc - dev=$(basename $(do_facet $mds lctl get_param -n osd*.${!varsvc}.mntdev)) - val=$(do_facet $mds "procfile=/proc/fs/jbd/$dev/info; -[ -f \\\$procfile ] || procfile=/proc/fs/jbd2/$dev/info; -[ -f \\\$procfile ] || procfile=/proc/fs/jbd2/${dev}\:\\\*/info; -cat \\\$procfile | head -1;") + dev=$(basename $(do_facet $mds "lctl get_param -n osd*.${!varsvc}.mntdev|\ + xargs readlink -f" )) + val=$(do_facet $mds "cat /proc/fs/jbd*/${dev}{,:*,-*}/info 2>/dev/null|\ + head -1") val=${val%% *}; stat=$(( stat + val)) done @@ -846,18 +849,20 @@ cat \\\$procfile | head -1;") test_33a() { remote_mds_nodsh && skip "remote MDS with nodsh" && return - [ -n "$CLIENTS" ] || { skip "Need two or more clients" && return 0; } - [ $CLIENTCOUNT -ge 2 ] || \ - { skip "Need two or more clients, have $CLIENTCOUNT" && return 0; } + [ -z "$CLIENTS" ] && skip "Need two or more clients, have $CLIENTS" && return 0 + [ $CLIENTCOUNT -lt 2 ] && + skip "Need two or more clients, have $CLIENTCOUNT" && return 0 local nfiles=${TEST33_NFILES:-10000} local param_file=$TMP/$tfile-params + local fstype=$(facet_fstype $SINGLEMDS) - save_lustre_params $(comma_list $(mdts_nodes)) "mdt.*.commit_on_sharing" > $param_file + save_lustre_params $(get_facets MDS) \ + "mdt.*.commit_on_sharing" > $param_file local COS - local jbdold - local jbdnew + local jbdold="N/A" + local jbdnew="N/A" local jbd for COS in 0 1; do @@ -867,13 +872,13 @@ test_33a() { for i in 1 2 3; do do_nodes $CLIENT1,$CLIENT2 "mkdir -p $DIR1/$tdir-\\\$(hostname)-$i" - jbdold=$(print_jbd_stat) + [ $fstype = ldiskfs ] && jbdold=$(print_jbd_stat) echo "=== START createmany old: $jbdold transaction" local elapsed=$(do_and_time "do_nodes $CLIENT1,$CLIENT2 createmany -o $DIR1/$tdir-\\\$(hostname)-$i/f- -r $DIR2/$tdir-\\\$(hostname)-$i/f- $nfiles > /dev/null 2>&1") - jbdnew=$(print_jbd_stat) - jbd=$(( jbdnew - jbdold )) + [ $fstype = ldiskfs ] && jbdnew=$(print_jbd_stat) + [ $fstype = ldiskfs ] && jbd=$(( jbdnew - jbdold )) echo "=== END createmany new: $jbdnew transaction : $jbd transactions nfiles $nfiles time $elapsed COS=$COS" - avgjbd=$(( avgjbd + jbd )) + [ $fstype = ldiskfs ] && avgjbd=$(( avgjbd + jbd )) avgtime=$(( avgtime + elapsed )) done eval cos${COS}_jbd=$((avgjbd / 3)) @@ -904,8 +909,9 @@ test_33b() { local nfiles=${TEST33_NFILES:-10000} local param_file=$TMP/$tfile-params - save_lustre_params $(comma_list $(mdts_nodes)) \ - "mdt.*.commit_on_sharing" > $param_file + save_lustre_params $(get_facets MDS) \ + "mdt.*.commit_on_sharing" > $param_file + local COS local jbdold local jbdnew @@ -1084,33 +1090,29 @@ test_36() { #bug 16417 i=0 SIZE=50 let SIZE_B=SIZE*1024*1024 + sync; sleep 5; sync # wait for delete thread while [ $i -le 10 ]; do lctl mark "start test" - local before=$($LFS df | awk '{ if ($1 ~/^filesystem/) \ - { print $5; exit} }') + local before=$(lfs_df $MOUNT1 | awk '/^filesystem/{ print $4; exit }') dd if=/dev/zero of=$DIR1/$tdir/$tfile bs=1M count=$SIZE || error "dd $DIR1/$tdir/$tfile ${SIZE}MB failed" sync # sync data from client cache sync_all_data # sync data from server cache (delayed allocation) - sleep 1 - local after_dd=$($LFS df | awk '{ if ($1 ~/^filesystem/) \ - { print $5; exit} }') + sleep 2 + local after_dd=$(lfs_df $MOUNT1 | awk '/^filesystem/{ print $4; exit }') multiop_bg_pause $DIR2/$tdir/$tfile O_r${SIZE_B}c || return 3 read_pid=$! rm -f $DIR1/$tdir/$tfile kill -USR1 $read_pid wait $read_pid wait_delete_completed - local after=$($LFS df | awk '{ if ($1 ~/^filesystem/) \ - { print $5; exit} }') + local after=$(lfs_df $MOUNT1 | awk '/^filesystem/{ print $4; exit }') echo "*** cycle($i) *** before($before) after_dd($after_dd)" \ "after($after)" # this free space! not used - if [ $after_dd -ge $after ]; then - error "space leaked" - return 1; - fi + (( $after_dd <= $after)) || + error "space leaked after_dd:$after_dd > after:$after" let i=i+1 done } @@ -2301,6 +2303,9 @@ test_51a() { run_test 51a "layout lock: refresh layout should work" test_51b() { + [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.3.59) ]] || + { skip "Need MDS version at least 2.3.59"; return 0; } + local tmpfile=`mktemp` # create an empty file @@ -2432,6 +2437,39 @@ test_70b() { # LU-2781 } run_test 70b "remove files after calling rm_entry" +test_71() { + checkfiemap --test || + { skip "checkfiemap not runnable: $?" && return; } + # write data this way: hole - data - hole - data + dd if=/dev/urandom of=$DIR1/$tfile bs=40K seek=1 count=1 + [ "$(facet_fstype ost$(($($GETSTRIPE -i $DIR1/$tfile) + 1)))" = \ + "zfs" ] && + skip "ORI-366/LU-1941: FIEMAP unimplemented on ZFS" && return 0 + dd if=/dev/urandom of=$DIR1/$tfile bs=40K seek=3 count=1 + GET_STAT="lctl get_param -n ldlm.services.ldlm_cbd.stats" + stat $DIR2/$tfile + local can1=$($GET_STAT | awk '/ldlm_bl_callback/ {print $2}') + echo $can1 + checkfiemap $DIR2/$tfile 81920 || + error "data is not flushed from client" + local can2=$($GET_STAT | awk '/ldlm_bl_callback/ {print $2}') + echo $can2 + + # common case of "create file, copy file" on a single node + # should not flush data from ost + dd if=/dev/urandom of=$DIR1/$tfile bs=40K seek=1 count=1 + dd if=/dev/urandom of=$DIR1/$tfile bs=40K seek=3 count=1 + stat $DIR1/$tfile + local can3=$($GET_STAT | awk '/ldlm_bl_callback/ {print $2}') + echo $can3 + checkfiemap $DIR1/$tfile 81920 || + error 4 + local can4=$($GET_STAT | awk '/ldlm_bl_callback/ {print $2}') + echo $can2 + [ $can3 -eq $can4 ] || error $((can2-can1)) "cancel RPC occured." +} +run_test 71 "correct file map just after write operation is finished" + log "cleanup: ======================================================" [ "$(mount | grep $MOUNT2)" ] && umount $MOUNT2