X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre-iokit%2Fsgpdd-survey%2Fsgpdd-survey;h=50140240184c2908d6ffa3e02242bbad342f17ba;hb=38555069bf3de77726a1afb9d15911bc4db484b7;hp=85a80b9b4e5bae08610661ecd2ee6b487ef6a8bc;hpb=1add4b9c0b22077d6fba02751eb03d709791f682;p=fs%2Flustre-release.git diff --git a/lustre-iokit/sgpdd-survey/sgpdd-survey b/lustre-iokit/sgpdd-survey/sgpdd-survey index 85a80b9..5014024 100755 --- a/lustre-iokit/sgpdd-survey/sgpdd-survey +++ b/lustre-iokit/sgpdd-survey/sgpdd-survey @@ -3,52 +3,113 @@ ###################################################################### # customize per survey -# the SG device to measure -dev=/dev/sg6 +# CHOOSE EITHER scsidevs or rawdevs +# the SCSI devices to measure - WARNING: will be erased. +# The raw devices to use +# rawdevs=${rawdevs:-"/dev/raw/raw1"} +# scsidevs=`ls /dev/sd[a-z] /dev/sd[a-z][a-z]` # all devices, if you use udev -# result file prefix +# result file prefix. date/time+hostname makes unique # NB ensure the path exists if it includes subdirs -rslt=/tmp/sg_dd_rslt +rslt_loc=${rslt_loc:-"/tmp"} +rslt=${rslt:-"$rslt_loc/sgpdd_survey_`date +%F@%R`_`uname -n`"} # what to do (read or write) -action=write +actions=${actions:-"write read"} -# total size (MBytes) +# total size per device (MBytes) # NB bigger than device cache is good -size=8192 +size=${size:-8192} # record size (KBytes) -rszlo=1024 -rszhi=1024 +rszlo=${rszlo:-1024} +rszhi=${rszhi:-1024} -# Concurrent regions -crglo=1 -crghi=4 +# Concurrent regions per device +crglo=${crglo:-1} +crghi=${crghi:-256} -# total numbers of threads to share between concurrent regions -# NB survey skips over #thr < #regions -thrlo=1 -thrhi=1024 +# boundary blocks between concurrent regions per device +boundary=${boundary:-1024} + +# threads to share between concurrent regions per device +# multiple threads per region simulates a deeper request queue +# NB survey skips over #thr < #regions and #thr/#regions > SG_MAX_QUEUE +thrlo=${thrlo:-1} +thrhi=${thrhi:-4096} ##################################################################### +# leave the rest of this alone unless you know what you're doing... + +# and max # threads one instance will spawn +SG_MAX_QUEUE=16 + +# is the sg module loaded? +sg_is_loaded=$(grep -q "^sg " /proc/modules && echo true || echo false) + +# did we load it? +sg_was_loaded=false -# disk block size (Bytes) -bs=512 +# map given device names into SG device names +i=0 +devs=() +if [ "$scsidevs" ]; then + # we will test for a LUN, the test for a partition + # if the partition number is > 9 this will fail -if [ $action = read ]; then - f1="if=$dev" - f2="of=/dev/null" - skip=skip + # make sure sg kernel module is loaded + if ! $sg_is_loaded; then + echo "loading the sg kernel module" + modprobe sg && sg_was_loaded=true + sg_is_loaded=true + fi + + for d in $scsidevs; do + if [[ -L "$d" ]]; then + echo "Device $d specified by alias. Will 'readlink' for device name" + d=$(readlink -f $d) + fi + devs[$i]=`sg_map | awk "{if (\\\$2 == \"$d\") print \\\$1}"` + if [ -z "${devs[i]}" ]; then + echo "Can't find SG device for $d, testing for partition" + pt=`echo $d | sed 's/[0-9]*$//'` + # Try again + devs[$i]=`sg_map | awk "{if (\\\$2 == \"$pt\") print \\\$1}"` + if [ -z "${devs[i]}" ]; then + echo -e "Can't find SG device $pt.\nDo you have the sg module configured for your kernel?" + exit 1 + fi + fi + i=$((i+1)) + done +elif [ "$rawdevs" ]; then + for r in $rawdevs; do + RES=`raw -q $r` + if [ $? -eq 0 ];then + devs[$i]=$r + i=$((i+1)) + else + echo "Raw device $r not set up" + exit 1 + fi + done else - f1="if=/dev/zero" - f2="of=$dev" - skip=seek + echo "Must specify scsidevs or rawdevs" + exit 1 fi -start=`date +%F@%R` -rsltf=${rslt}_${start}.summary +ndevs=${#devs[@]} + +# determine block size. This should also work for raw devices +# If it fails, set to 512 +bs=$((`sg_readcap -lb ${devs[0]} | awk '{print $2}'`)) +if [ $bs == 0 ];then + echo "sg_readcap failed, setting block size to 512" + bs=512 +fi +rsltf=${rslt}.summary +workf=${rslt}.detail echo -n > $rsltf -workf=${rslt}_${start}.detail echo -n > $workf print_summary () { @@ -61,10 +122,12 @@ print_summary () { echo $minusn "$*" } +print_summary "$(date) sgpdd-survey on $rawdevs$scsidevs from $(hostname)" + for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do for ((crg=$crglo;crg<=$crghi;crg*=2)); do for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do - if ((thr < crg)); then + if ((thr < crg || thr/crg > SG_MAX_QUEUE)); then continue fi # compute parameters @@ -74,49 +137,83 @@ for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do # show computed parameters actual_rsz=$((bpt*bs/1024)) actual_size=$((bs*count*crg/1024)) - str=`printf 'total_size %8dK rsz %4d thr %5d crg %3d ' \ - $actual_size $actual_rsz $thr $crg` + str=`printf 'total_size %8dK rsz %4d crg %5d thr %5d ' \ + $((actual_size*ndevs)) $actual_rsz $((crg*ndevs)) $((thr*ndevs))` echo "==============> $str" >> $workf print_summary -n "$str" freemem=`awk < /proc/meminfo '/^MemTotal:/ {printf "%d\n", $2}'` - if (((actual_rsz * thr /crg + 64) * crg > freemem)); then + if (((actual_rsz*thr/crg + 64)*crg*ndevs > freemem)); then print_summary "ENOMEM" continue fi - # start test - t0=`date +%s.%N` - for ((i=0;i ${rslt}_tmp${i} \ - $f1 $f2 ${skip}=$((1024+i*blocks)) \ - thr=$((thr/crg)) count=$count bs=$bs bpt=$bpt time=1& - done - wait - t1=`date +%s.%N` - # collect all results in 1 file - rfile=${rslt}_thr${thr}_crg${crg}_rsz${rsz} - echo > $rfile - ok=0 - for ((i=0;i /dev/null 2>&1; then - ok=$((ok + 1)) + # run tests + for action in $actions; do + print_summary -n "$action " + echo "=====> $action" >> $workf + tmpf=${workf}_tmp + # start test + t0=`date +%s.%N` + for ((i=0;i ${tmpf}_${i}_${j} \ + $inf $outf ${skip}=$((boundary+j*blocks)) \ + thr=$((thr/crg)) count=$count bs=$bs bpt=$bpt time=1& + done + done + wait + t1=`date +%s.%N` + # collect/check individual stats + echo > $tmpf + ok=0 + for ((i=0;i /dev/null 2>&1; then + echo "Error found in $rtmp" + elif grep 'time to transfer data' $rtmp > /dev/null 2>&1; then + ok=$((ok + 1)) + fi + cat ${rtmp} >> $tmpf + cat ${rtmp} >> $workf + rm ${rtmp} + done + done + if ((ok != ndevs*crg)); then + print_summary -n "$((ndevs*crg - ok)) failed " + else + # compute MB/sec from elapsed + bw=`awk "BEGIN {printf \"%7.2f MB/s\", $actual_size * $ndevs / (( $t1 - $t0 ) * 1024); exit}"` + # compute MB/sec from nregions*slowest + check=`awk < $tmpf \ + '/time to transfer data/ {mb=$8/1.048576; if (n == 0 || mb < min) min = mb; n++}\ + END {printf "%5d x %6.2f = %7.2f MB/s", n, min, min * n}'` + print_summary -n "$bw $check " fi - cat ${rslt}_tmp${i} >> $rfile - cat ${rslt}_tmp${i} >> $workf - rm ${rslt}_tmp${i} + rm $tmpf done - if [ $ok -ne $crg ]; then - print_summary `printf "failed %d" $((crg - ok))` - else - # compute MB/sec from elapsed - bw=`awk "BEGIN {printf \"%6.2f MB/s\", $actual_size / (( $t1 - $t0 ) * 1024); exit}"` - # compute MB/sec from nregions*slowest - check=`awk < $rfile \ - '/time to transfer data/ {mb=$8/1.048576; if (n == 0 || mb < min) min = mb; n++}\ - END {printf "%3d x %6.2f = %6.2f MB/s", n, min, min * n}'` - print_summary "$bw $check" - fi - rm $rfile + print_summary "" done done done + +if $sg_was_loaded; then + echo "unloading sg module" + rmmod sg +fi