X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre-iokit%2Fobdfilter-survey%2Fobdfilter-survey;h=4f8991336528ce16853ae83d9048ef8133454748;hp=bb85c0bfc7d81805853c37a7da475f7e3df40361;hb=d8d0244bce0afe54e18d43608f2f34b52a58dd7e;hpb=024aea8216330cb77e89d43dcbb758614e58ac37 diff --git a/lustre-iokit/obdfilter-survey/obdfilter-survey b/lustre-iokit/obdfilter-survey/obdfilter-survey index bb85c0b..4f89913 100755 --- a/lustre-iokit/obdfilter-survey/obdfilter-survey +++ b/lustre-iokit/obdfilter-survey/obdfilter-survey @@ -38,7 +38,7 @@ fi # result file prefix (date/time + hostname makes unique) # NB ensure path to it exists -rslt=${rslt:-"/home_nfs/eeb/obdfilter_survey_`date +%F@%R`_`uname -n`"} +rslt=${rslt:-"/tmp/obdfilter_survey_`date +%F@%R`_`uname -n`"} # lustre root (if running with own source tree) lustre_root=${lustre_root:-"/home_nfs/eeb/lustre"} @@ -80,7 +80,12 @@ restart_thr=1 restart_nobj=1 # machine's page size (K) -PAGE_SIZE=${PAGE_SIZE:-16} +if [ -z "$PAGE_SIZE" ]; then + if which python >/dev/null; then + PAGE_SIZE=`echo 'import resource; print resource.getpagesize()/1024;' |python` + fi +fi +PAGE_SIZE=${PAGE_SIZE:-4} # max buffer_mem (total_threads * buffer size) # (to avoid lctl ENOMEM problems) @@ -96,10 +101,10 @@ custom_remote_shell () { here=`pwd` # Hop on to the remote node, chdir to 'here' and run the given # commands. One of the following will probably work. - #ssh $host "cd $here; $cmds" + ssh $host "cd $here; $cmds" #rsh $host "cd $here; $cmds" # we have to remove the leading `uname -n`: from pdsh output lines - pdsh -w $host "cd $here; $cmds" | sed 's/^[^:]*://' + #pdsh -w $host "cd $here; $cmds" | sed 's/^[^:]*://' } ##################################################################### @@ -125,9 +130,11 @@ fi rsltf="${rslt}.summary" workf="${rslt}.detail" cmdsf="${rslt}.script" +vmstatf="${rslt}.vmstat" echo -n > $rsltf echo -n > $workf +declare -a vmstatpids # hide a little trick to unset this from the command line if [ "$lustre_root" == " " ]; then @@ -370,19 +377,27 @@ else done fi +# get vmstat started # disable portals debug and get obdecho loaded on all relevant hosts unique_hosts=(`unique ${host_names[@]}`) +pidcount=0 for host in ${unique_hosts[@]}; do remote_shell $host "echo 0 > /proc/sys/portals/debug" + host_vmstatf=${vmstatf}_${host} + echo -n > $host_vmstatf + remote_shell $host "vmstat 5 >> $host_vmstatf" & + pid=$! + vmstatpids[$pidcount]=$pid + pidcount=$((pidcount+1)) do_unload_obdecho[$host]=0 if obdecho_loaded $host; then - continue + continue fi load_obdecho $host if obdecho_loaded $host; then - do_unload_obdecho[$host]=1 - continue - fi + do_unload_obdecho[$host]=1 + continue + fi echo "Can't load obdecho on $host" 1>&2 exit 1 done @@ -402,7 +417,7 @@ done for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do for ((nobj=$nobjlo;nobj<=$nobjhi;nobj*=2)); do for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do - if ((thr < nobj)); then + if ((thr % nobj)); then continue fi # restart? @@ -452,6 +467,10 @@ for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do done # run tests for test in ${tests[@]}; do + declare -a pidarray + for host in ${unique_hosts[@]}; do + echo "starting run for test: $test rsz: $rsz threads: $thr objects: $nobj" >> ${vmstatf}_${host} + done print_summary -n "$test " # create per-host script files for host in ${unique_hosts[@]}; do @@ -462,25 +481,37 @@ for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do devno=${devnos[$idx]} tmpfi="${tmpf}_$idx" first_obj=${first_objs[$idx]} + thr_per_obj=$((${thr}/${nobj})) echo >> ${cmdsf}_${host} \ "$lctl > $tmpfi 2>&1 \\ --threads $thr -$snap $devno \\ - test_brw $count `testname2type $test` q $pages ${thr}t${first_obj} &" - done - for host in ${unique_hosts[@]}; do - echo "wait" >> ${cmdsf}_${host} - done - # timed run of all the per-host script files - t0=`date +%s.%N` - for host in ${unique_hosts[@]}; do - remote_shell $host bash ${cmdsf}_${host}& - done - wait - t1=`date +%s.%N` - # clean up per-host script files - for host in ${unique_hosts[@]}; do - rm ${cmdsf}_${host} - done + test_brw $count `testname2type $test` q $pages ${thr_per_obj}t${first_obj} &" + done + pidcount=0 + for host in ${unique_hosts[@]}; do + echo "wait" >> ${cmdsf}_${host} + pidarray[$pidcount]=0 + pidcount=$((pidcount+1)) + done + # timed run of all the per-host script files + t0=`date +%s.%N` + pidcount=0 + for host in ${unique_hosts[@]}; do + remote_shell $host bash ${cmdsf}_${host} & + pidarray[$pidcount]=$! + pidcount=$((pidcount+1)) + done + pidcount=0 + for host in ${unique_hosts[@]}; do + wait ${pidarray[$pidcount]} + pidcount=$((pidcount+1)) + done + #wait + t1=`date +%s.%N` + # clean up per-host script files + for host in ${unique_hosts[@]}; do + rm ${cmdsf}_${host} + done # compute bandwidth from total data / elapsed time str=`awk "BEGIN {printf \"%7.2f \",\ $total_size / (( $t1 - $t0 ) * 1024)}"` @@ -538,8 +569,17 @@ for ((i=0; i/dev/null + wait $pid + pidcount=$((pidcount+1)) if ((${do_unload_obdecho[$host]})); then - unload_obdecho $host + unload_obdecho $host fi done + +exit 0