X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=4ad3f1435f89cb7b91a25d499761565092de6b74;hp=f3ba16db9d421c0446b5e81f84c376798cf393a6;hb=b924164398e939986e20506ab5d004e64f0b004e;hpb=fbf5870b9848929d352460f1f005b79c0b5ccc5a diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index f3ba16d..4ad3f14 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -1003,13 +1003,13 @@ test_27p() { reset_enospc rm -f $DIR/d27/f27p - $MCREATE $DIR/d27/f27p || error - $TRUNCATE $DIR/d27/f27p 80000000 || error - $CHECKSTAT -s 80000000 $DIR/d27/f27p || error + $MCREATE $DIR/d27/f27p || error "mcreate failed" + $TRUNCATE $DIR/d27/f27p 80000000 || error "truncate failed" + $CHECKSTAT -s 80000000 $DIR/d27/f27p || error "checkstat failed" exhaust_precreations 0 0x80000215 - echo foo >> $DIR/d27/f27p || error - $CHECKSTAT -s 80000004 $DIR/d27/f27p || error + echo foo >> $DIR/d27/f27p || error "append failed" + $CHECKSTAT -s 80000004 $DIR/d27/f27p || error "checkstat failed" reset_enospc } @@ -2990,6 +2990,21 @@ test_65i() { # bug6367 } run_test 65i "set non-default striping on root directory (bug 6367)=" +test_65ia() { # bug12836 + $LFS getstripe $MOUNT || error "getstripe $MOUNT failed" +} +run_test 65ia "getstripe on -1 default directory striping" + +test_65ib() { # bug12836 + $LFS getstripe -v $MOUNT || error "getstripe -v $MOUNT failed" +} +run_test 65ib "getstripe -v on -1 default directory striping" + +test_65ic() { # bug12836 + $LFS find -mtime -1 $MOUNT || error "find $MOUNT failed" +} +run_test 65ic "new find on -1 default directory striping" + test_65j() { # bug6367 sync; sleep 1 # if we aren't already remounting for each test, do so for this test @@ -3241,9 +3256,6 @@ test75_prep() { } test_75a() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return test75_prep cp -p ${F128k} ${FHEAD} @@ -3261,9 +3273,6 @@ test_75a() { run_test 75a "TEST join file ====================================" test_75b() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return test75_prep cp -p ${F128k} ${FTAIL} @@ -3277,9 +3286,6 @@ test_75b() { run_test 75b "TEST join file 2 ==================================" test_75c() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return test75_prep cp -p ${F128k} ${FTAIL} @@ -3293,9 +3299,6 @@ test_75c() { run_test 75c "TEST join file 3 ==================================" test_75d() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return test75_prep cp -p ${F128k} ${FHEAD} @@ -3310,9 +3313,6 @@ test_75d() { run_test 75d "TEST join file 4 ==================================" test_75e() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return test75_prep rm -rf ${FHEAD} || "delete join file error" @@ -3320,9 +3320,6 @@ test_75e() { run_test 75e "TEST join file 5 (remove joined file) =============" test_75f() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return test75_prep cp -p ${F128k} ${F75}_join_10_compare @@ -3340,9 +3337,6 @@ test_75f() { run_test 75f "TEST join file 6 (join 10 files) ==================" test_75g() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return [ ! -f ${F75}_join_10 ] && echo "${F75}_join_10 missing" && return $LFS getstripe ${F75}_join_10 @@ -3623,7 +3617,7 @@ test_80() { # bug 10718 dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 seek=1M sync; sleep 1; sync BEFORE=`date +%s` - cancel_lru_locks OSC + cancel_lru_locks osc AFTER=`date +%s` DIFF=$((AFTER-BEFORE)) if [ $DIFF -gt 1 ] ; then @@ -3702,13 +3696,17 @@ test_99f() { run_test 99f "cvs commit =======================================" test_100() { + [ "$NETTYPE" = tcp ] || \ + { skip "TCP secure port test, not useful for NETTYPE=$NETTYPE" && \ + return ; } + remote_ost_nodsh && skip "remote OST with nodsh" && return remote_mds_nodsh && skip "remote MDS with nodsh" && return remote_servers || \ { skip "useless for local single node setup" && return; } netstat -tna | ( rc=1; while read PROT SND RCV LOCAL REMOTE STAT; do - [ "$PROT" != "$NETTYPE" ] && continue + [ "$PROT" != "tcp" ] && continue RPORT=$(echo $REMOTE | cut -d: -f2) [ "$RPORT" != "$ACCEPTOR_PORT" ] && continue @@ -3992,17 +3990,6 @@ test_102c() { } run_test 102c "non-root getfattr/setfattr for lustre.lov EAs ===========" -get_stripe_info() { - stripe_size=0 - stripe_count=0 - stripe_offset=0 - local lines=`sed -n '/obdidx/=' $1` - stripe_size=`awk '{if($1~/size/) print $2}' $1` - stripe_count=`awk '{if($1~/count/) print $2}' $1` - lines=`expr $lines + 1` - stripe_offset=`sed -n ${lines}p $1 |awk '{print $1}'` -} - compare_stripe_info1() { for num in 1 2 3 4 do @@ -4012,22 +3999,16 @@ compare_stripe_info1() { do local size=`expr $STRIPE_SIZE \* $num` local file=file"$num-$offset-$count" - local tmp_file=out - $GETSTRIPE -v $file > $tmp_file - get_stripe_info $tmp_file - if test $stripe_size -ne $size - then + get_stripe_info client $file + if [ $stripe_size -ne $size ]; then error "$file: different stripe size" && return fi - if test $stripe_count -ne $count - then + if [ $stripe_count -ne $count ]; then error "$file: different stripe count" && return fi - if test $stripe_offset -ne 0 - then + if [ $stripe_index -ne 0 ]; then error "$file: different stripe offset" && return fi - rm -f $tmp_file done done done @@ -4042,22 +4023,16 @@ compare_stripe_info2() { do local size=`expr $STRIPE_SIZE \* $num` local file=file"$num-$offset-$count" - local tmp_file=out - $GETSTRIPE -v $file > $tmp_file - get_stripe_info $tmp_file - if test $stripe_size -ne $size - then + get_stripe_info client $file + if [ $stripe_size -ne $size ]; then error "$file: different stripe size" && return fi - if test $stripe_count -ne $count - then + if [ $stripe_count -ne $count ]; then error "$file: different stripe count" && return fi - if test $stripe_offset -ne $offset - then + if [ $stripe_index -ne $offset ]; then error "$file: different stripe offset" && return fi - rm -f $tmp_file done done done @@ -5107,8 +5082,10 @@ test_123a() { # was test 123, statahead(bug 11401) SLOWOK=1 fi - remount_client $MOUNT mkdir -p $DIR/$tdir + rm -rf $DIR/$tdir/* + cancel_lru_locks mdc + cancel_lru_locks osc error=0 NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'` [ $NUMFREE -gt 100000 ] && NUMFREE=100000 || NUMFREE=$((NUMFREE-1000)) @@ -5186,10 +5163,9 @@ run_test 123b "not panic with network error in statahead enqueue (bug 15027)" test_124a() { [ -z "`lctl get_param -n mdc.*.connect_flags | grep lru_resize`" ] && \ skip "no lru resize on server" && return 0 - NR=2000 + local NR=2000 mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" - # use touch to produce $NR new locks log "create $NR files at $DIR/$tdir" createmany -o $DIR/$tdir/f $NR || error "failed to create $NR files in $DIR/$tdir" @@ -5197,14 +5173,14 @@ test_124a() { cancel_lru_locks mdc ls -l $DIR/$tdir > /dev/null - NSDIR="" - LRU_SIZE=0 + local NSDIR="" + local LRU_SIZE=0 for VALUE in `lctl get_param ldlm.namespaces.*mdc-*.lru_size`; do - PARAM=`echo ${VALUE[0]} | cut -d "=" -f1` + local PARAM=`echo ${VALUE[0]} | cut -d "=" -f1` LRU_SIZE=$(lctl get_param -n $PARAM) if [ $LRU_SIZE -gt $(default_lru_size) ]; then NSDIR=$(echo $PARAM | cut -d "." -f1-3) - log "using $(basename $NSDIR) namespace" + log "NS=$(basename $NSDIR)" break fi done @@ -5213,40 +5189,53 @@ test_124a() { skip "Not enough cached locks created!" return 0 fi - log "created $LRU_SIZE lock(s)" - - # we want to sleep 30s to not make test too long - SLEEP=30 - SLEEP_ADD=2 - - # we know that lru resize allows one client to hold $LIMIT locks for 10h - MAX_HRS=10 - - # get the pool limit - LIMIT=`lctl get_param -n $NSDIR.pool.limit` - - # calculate lock volume factor taking into account data set size and the - # rule that number of locks will be getting smaller durring sleep interval - # and we need to additionally enforce LVF to take this into account. - # Use $LRU_SIZE_B here to take into account real number of locks created - # in the case of CMD, LRU_SIZE_B != $NR in most of cases - LVF=$(($MAX_HRS * 60 * 60 * $LIMIT / $SLEEP)) - LRU_SIZE_B=$LRU_SIZE - log "make client drop locks $LVF times faster so that ${SLEEP}s is enough to cancel $LRU_SIZE lock(s)" - OLD_LVF=`lctl get_param -n $NSDIR.pool.lock_volume_factor` + log "LRU=$LRU_SIZE" + + local SLEEP=30 + + # We know that lru resize allows one client to hold $LIMIT locks + # for 10h. After that locks begin to be killed by client. + local MAX_HRS=10 + local LIMIT=`lctl get_param -n $NSDIR.pool.limit` + + # Make LVF so higher that sleeping for $SLEEP is enough to _start_ + # killing locks. Some time was spent for creating locks. This means + # that up to the moment of sleep finish we must have killed some of + # them (10-100 locks). This depends on how fast ther were created. + # Many of them were touched in almost the same moment and thus will + # be killed in groups. + local LVF=$(($MAX_HRS * 60 * 60 / $SLEEP)) + + # Use $LRU_SIZE_B here to take into account real number of locks + # created in the case of CMD, LRU_SIZE_B != $NR in most of cases + local LRU_SIZE_B=$LRU_SIZE + log "LVF=$LVF" + local OLD_LVF=`lctl get_param -n $NSDIR.pool.lock_volume_factor` lctl set_param -n $NSDIR.pool.lock_volume_factor $LVF - log "sleep for $((SLEEP+SLEEP_ADD))s" - sleep $((SLEEP+SLEEP_ADD)) + + # Let's make sure that we really have some margin. Client checks + # cached locks every 10 sec. + SLEEP=$((SLEEP+20)) + log "Sleep ${SLEEP} sec" + local SEC=0 + while ((SEC<$SLEEP)); do + echo -n "..." + sleep 5 + SEC=$((SEC+5)) + LRU_SIZE=`lctl get_param -n $NSDIR/lru_size` + echo -n "$LRU_SIZE" + done + echo "" lctl set_param -n $NSDIR.pool.lock_volume_factor $OLD_LVF - LRU_SIZE_A=`lctl get_param -n $NSDIR.lru_size` + local LRU_SIZE_A=`lctl get_param -n $NSDIR/lru_size` [ $LRU_SIZE_B -gt $LRU_SIZE_A ] || { - error "No locks dropped in "$((SLEEP+SLEEP_ADD))"s. LRU size: $LRU_SIZE_A" + error "No locks dropped in ${SLEEP}s. LRU size: $LRU_SIZE_A" unlinkmany $DIR/$tdir/f $NR return } - log "Dropped "$((LRU_SIZE_B-LRU_SIZE_A))" locks in "$((SLEEP+SLEEP_ADD))"s" + log "Dropped "$((LRU_SIZE_B-LRU_SIZE_A))" locks in ${SLEEP}s" log "unlink $NR files at $DIR/$tdir" unlinkmany $DIR/$tdir/f $NR } @@ -5346,6 +5335,8 @@ run_test 125 "don't return EPROTO when a dir has a non-default striping and ACLs test_126() { # bug 12829/13455 [ -z "$(lctl get_param -n llite.*.client_type | grep local)" ] && skip "must run as local client" && return [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return + $GSS && skip "must run as gss disabled" && return + $RUNAS -u 0 -g 1 touch $DIR/$tfile || error "touch failed" gid=`ls -n $DIR/$tfile | awk '{print $4}'` rm -f $DIR/$tfile @@ -5742,6 +5733,19 @@ test_140() { #bug-17379 } run_test 140 "Check reasonable stack depth (shouldn't LBUG) ====" +test_141() { + local ls + #define OBD_FAIL_MGC_PAUSE_PROCESS_LOG 0x903 + $LCTL set_param fail_loc=0x903 + # cancel_lru_locks mgc - does not work due to lctl set_param syntax + for ls in /proc/fs/lustre/ldlm/namespaces/MGC*/lru_size; do + echo "clear" > $ls + done + FAIL_ON_ERROR=true cleanup + FAIL_ON_ERROR=true setup +} +run_test 141 "umount should not race with any mgc requeue thread" + test_150() { local TF="$TMP/$tfile" @@ -5823,7 +5827,7 @@ test_151() { cancel_lru_locks osc cat $DIR/$tfile >/dev/null AFTER=`roc_hit` - if ! let "AFTER - BEFORE == CPAGES"; then + if let "AFTER - BEFORE != 0"; then error "IN CACHE: before: $BEFORE, after: $AFTER" fi