X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fparallel-scale.sh;h=b0dfdce6b7dbbcf7283aaf2b222ba9c1dabb6dc6;hb=675dd06e429ee9551d0f874f3461ac3e5091c039;hp=4cfe52f78b9d773a8c647f645b83e3e82a2e80c2;hpb=386ad7757288705a6d8e32feac108fa5a68cfe22;p=fs%2Flustre-release.git diff --git a/lustre/tests/parallel-scale.sh b/lustre/tests/parallel-scale.sh index 4cfe52f..b0dfdce 100644 --- a/lustre/tests/parallel-scale.sh +++ b/lustre/tests/parallel-scale.sh @@ -6,6 +6,10 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} . $LUSTRE/tests/test-framework.sh init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +init_logging + +# bug 20670 +ALWAYS_EXCEPT="parallel_grouplock $PARALLEL_SCALE_EXCEPT" # # compilbench @@ -81,9 +85,20 @@ wdisjoint_THREADS=${wdisjoint_THREADS:-4} wdisjoint_REP=${wdisjoint_REP:-10000} [ "$SLOW" = "no" ] && wdisjoint_REP=100 +# +# parallel_grouplock +# +# +PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock 2> /dev/null || true)} +parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5} + build_test_filter check_and_setup_lustre +get_mpiuser_id $MPI_USER +MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"} +$GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS + print_opts () { local var @@ -108,16 +123,16 @@ test_compilebench() { print_opts cbench_DIR cbench_IDIRS cbench_RUNS [ x$cbench_DIR = x ] && - { skip "compilebench not found" && return; } + { skip_env "compilebench not found" && return; } [ -e $cbench_DIR/compilebench ] || \ - { skip "No compilebench build" && return; } + { skip_env "No compilebench build" && return; } local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }') if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then cbench_IDIRS=$(( space / 680 / 1024)) [ $cbench_IDIRS = 0 ] && \ - skip "Need free space atleast 680 Mb, have $space" && return + skip_env "Need free space atleast 680 Mb, have $space" && return log free space=$space, reducing initial dirs to $cbench_IDIRS fi @@ -146,7 +161,7 @@ run_test compilebench "compilebench" test_metabench() { [ x$METABENCH = x ] && - { skip "metabench not found" && return; } + { skip_env "metabench not found" && return; } local clients=$CLIENTS [ -z $clients ] && clients=$(hostname) @@ -156,8 +171,7 @@ test_metabench() { # FIXME # Need space estimation here. - generate_machine_file $clients $MACHINEFILE || \ - error "can not generate machinefile $MACHINEFILE" + generate_machine_file $clients $MACHINEFILE || return $? print_opts METABENCH clients mbench_NFILES mbench_THREADS @@ -182,8 +196,13 @@ test_metabench() { run_test metabench "metabench" test_simul() { + if [ "$NFSCLIENT" ]; then + skip "skipped for NFSCLIENT mode" + return + fi + [ x$SIMUL = x ] && - { skip "simul not found" && return; } + { skip_env "simul not found" && return; } local clients=$CLIENTS [ -z $clients ] && clients=$(hostname) @@ -193,8 +212,7 @@ test_simul() { # FIXME # Need space estimation here. - generate_machine_file $clients $MACHINEFILE || \ - error "can not generate machinefile $MACHINEFILE" + generate_machine_file $clients $MACHINEFILE || return $? print_opts SIMUL clients simul_REP simul_THREADS @@ -223,10 +241,10 @@ test_connectathon() { print_opts cnt_DIR cnt_NRUN [ x$cnt_DIR = x ] && - { skip "connectathon dir not found" && return; } + { skip_env "connectathon dir not found" && return; } [ -e $cnt_DIR/runtests ] || \ - { skip "No connectathon runtests found" && return; } + { skip_env "No connectathon runtests found" && return; } local testdir=$DIR/d0.connectathon mkdir -p $testdir @@ -256,7 +274,7 @@ run_test connectathon "connectathon" test_ior() { [ x$IOR = x ] && - { skip "IOR not found" && return; } + { skip_env "IOR not found" && return; } local clients=$CLIENTS [ -z $clients ] && clients=$(hostname) @@ -269,13 +287,12 @@ test_ior() { echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS" ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS )) [ $ior_blockSize = 0 ] && \ - skip "Need free space more than ($num_clients * $ior_THREADS )Gb: $((num_clients*ior_THREADS *1024 *1024*2)), have $space" && return + skip_env "Need free space more than ($num_clients * $ior_THREADS )Gb: $((num_clients*ior_THREADS *1024 *1024*2)), have $space" && return echo "free space=$space, Need: $num_clients x $ior_THREADS x $ior_blockSize Gb (blockSize reduced to $ior_blockSize Gb)" fi - generate_machine_file $clients $MACHINEFILE || \ - error "can not generate machinefile $MACHINEFILE" + generate_machine_file $clients $MACHINEFILE || return $? print_opts IOR ior_THREADS ior_DURATION MACHINEFILE @@ -283,7 +300,13 @@ test_ior() { mkdir -p $testdir # mpi_run uses mpiuser chmod 0777 $testdir - + if [ "$NFSCLIENT" ]; then + setstripe_nfsserver $testdir -c -1 || + { error "setstripe on nfsserver failed" && return 1; } + else + $LFS setstripe $testdir -c -1 || + { error "setstripe failed" && return 2; } + fi # # -b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)" # -o S testFileName @@ -306,8 +329,13 @@ test_ior() { run_test ior "ior" test_cascading_rw() { + if [ "$NFSCLIENT" ]; then + skip "skipped for NFSCLIENT mode" + return + fi + [ x$CASC_RW = x ] && - { skip "cascading_rw not found" && return; } + { skip_env "cascading_rw not found" && return; } local clients=$CLIENTS [ -z $clients ] && clients=$(hostname) @@ -317,8 +345,7 @@ test_cascading_rw() { # FIXME # Need space estimation here. - generate_machine_file $clients $MACHINEFILE || \ - error "can not generate machinefile $MACHINEFILE" + generate_machine_file $clients $MACHINEFILE || return $? print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE @@ -344,9 +371,14 @@ test_cascading_rw() { run_test cascading_rw "cascading_rw" test_write_append_truncate() { + if [ "$NFSCLIENT" ]; then + skip "skipped for NFSCLIENT mode" + return + fi + # location is lustre/tests dir if ! which write_append_truncate > /dev/null 2>&1 ; then - skip "write_append_truncate not found" + skip_env "write_append_truncate not found" return fi @@ -358,8 +390,7 @@ test_write_append_truncate() { # FIXME # Need space estimation here. - generate_machine_file $clients $MACHINEFILE || \ - error "can not generate machinefile $MACHINEFILE" + generate_machine_file $clients $MACHINEFILE || return $? local testdir=$DIR/d0.write_append_truncate local file=$testdir/f0.wat @@ -385,8 +416,13 @@ test_write_append_truncate() { run_test write_append_truncate "write_append_truncate" test_write_disjoint() { + if [ "$NFSCLIENT" ]; then + skip "skipped for NFSCLIENT mode" + return + fi + [ x$WRITE_DISJOINT = x ] && - { skip "write_disjoint not found" && return; } + { skip_env "write_disjoint not found" && return; } local clients=$CLIENTS [ -z $clients ] && clients=$(hostname) @@ -396,8 +432,7 @@ test_write_disjoint() { # FIXME # Need space estimation here. - generate_machine_file $clients $MACHINEFILE || \ - error "can not generate machinefile $MACHINEFILE" + generate_machine_file $clients $MACHINEFILE || return $? print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP MACHINEFILE local testdir=$DIR/d0.write_disjoint @@ -418,6 +453,153 @@ test_write_disjoint() { } run_test write_disjoint "write_disjoint" +test_parallel_grouplock() { + if [ "$NFSCLIENT" ]; then + skip "skipped for NFSCLIENT mode" + return + fi + + [ x$PARALLEL_GROUPLOCK = x ] && + { skip "PARALLEL_GROUPLOCK not found" && return; } + + local clients=$CLIENTS + [ -z $clients ] && clients=$(hostname) + + local num_clients=$(get_node_count ${clients//,/ }) + + generate_machine_file $clients $MACHINEFILE || return $? + + print_opts clients parallel_grouplock_MINTASKS MACHINEFILE + + local testdir=$DIR/d0.parallel_grouplock + mkdir -p $testdir + # mpi_run uses mpiuser + chmod 0777 $testdir + + do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" || + error "set_param max_rw_chunk=0 failed " + + local cmd + local status=0 + local subtest + for i in $(seq 12); do + subtest="-t $i" + local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest" + echo "+ $cmd" + + mpi_run -np $parallel_grouplock_MINTASKS -machinefile ${MACHINEFILE} $cmd + local rc=$? + if [ $rc != 0 ] ; then + error_noexit "parallel_grouplock subtests $subtest failed! $rc" + else + echo "parallel_grouplock subtests $subtest PASS" + fi + let status=$((status + rc)) + # clear debug to collect one log per one test + do_nodes $(comma_list $(nodes_list)) lctl clear + done + [ $status -eq 0 ] || error "parallel_grouplock status: $status" + rm -rf $testdir +} +run_test parallel_grouplock "parallel_grouplock" + +statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5} +statahead_NUMFILES=${statahead_NUMFILES:-500000} + +cleanup_statahead () { + trap 0 + + local clients=$1 + local mntpt_root=$2 + local num_mntpts=$3 + + for i in $(seq 0 $num_mntpts);do + zconf_umount_clients $clients ${mntpt_root}$i || + error_exit "Failed to umount lustre on ${mntpt_root}$i" + done +} + +test_statahead () { + if [[ -n $NFSCLIENT ]]; then + skip "Statahead testing is not supported on NFS clients." + return 0 + fi + + [ x$MDSRATE = x ] && + { skip_env "mdsrate not found" && return; } + + local clients=$CLIENTS + [ -z $clients ] && clients=$(hostname) + + local num_clients=$(get_node_count ${clients//,/ }) + + generate_machine_file $clients $MACHINEFILE || return $? + + print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES + + # create large dir + + # do not use default "d[0-9]*" dir name + # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup + local dir=dstatahead + local testdir=$DIR/$dir + + # cleanup only if dir exists + # cleanup only $statahead_NUMFILES number of files + # ignore the other files created by someone else + [ -d $testdir ] && + mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE $statahead_NUMFILES $testdir 'f%%d' --ignore + + mkdir -p $testdir + # mpi_run uses mpiuser + chmod 0777 $testdir + + local num_files=$statahead_NUMFILES + + local IFree=$(inodes_available) + if [ $IFree -lt $num_files ]; then + num_files=$IFree + fi + + cancel_lru_locks mdc + + local cmd="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir --nfiles $num_files --filefmt 'f%%d'" + echo "+ $cmd" + + mpi_run -np $((num_clients * 32)) -machinefile ${MACHINEFILE} $cmd + + local rc=$? + if [ $rc != 0 ] ; then + error "mdsrate failed to create $rc" + return $rc + fi + + local num_mntpts=$statahead_NUMMNTPTS + local mntpt_root=$TMP/mntpt/lustre + mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT} + + echo "Mounting $num_mntpts lustre clients starts on $clients" + trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR + for i in $(seq 0 $num_mntpts); do + zconf_mount_clients $clients ${mntpt_root}$i $mntopts || + error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients" + done + + do_rpc_nodes $clients cancel_lru_locks mdc + + do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir + + mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE $num_files $testdir 'f%%d' --ignore + + # use rm instead of rmdir because of + # testdir could contain the files created by someone else, + # or by previous run where is num_files prev > num_files current + rm -rf $testdir + cleanup_statahead $clients $mntpt_root $num_mntpts +} + +run_test statahead "statahead test, multiple clients" + equals_msg `basename $0`: test complete, cleaning up check_and_cleanup_lustre -[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG || true +[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true