Whamcloud - gitweb
Be more specific so that other lustre* cruft is not picked up.
[fs/lustre-release.git] / lustre-iokit / obdfilter-survey / obdfilter-survey
index bb85c0b..4f89913 100755 (executable)
@@ -38,7 +38,7 @@ fi
 
 # result file prefix (date/time + hostname makes unique)
 # NB ensure path to it exists
-rslt=${rslt:-"/home_nfs/eeb/obdfilter_survey_`date +%F@%R`_`uname -n`"}
+rslt=${rslt:-"/tmp/obdfilter_survey_`date +%F@%R`_`uname -n`"}
 
 # lustre root (if running with own source tree)
 lustre_root=${lustre_root:-"/home_nfs/eeb/lustre"}
@@ -80,7 +80,12 @@ restart_thr=1
 restart_nobj=1
 
 # machine's page size (K)
-PAGE_SIZE=${PAGE_SIZE:-16}
+if [ -z "$PAGE_SIZE" ]; then
+    if which python >/dev/null; then
+       PAGE_SIZE=`echo 'import resource; print resource.getpagesize()/1024;' |python`
+    fi
+fi
+PAGE_SIZE=${PAGE_SIZE:-4}
 
 # max buffer_mem (total_threads * buffer size)
 # (to avoid lctl ENOMEM problems)
@@ -96,10 +101,10 @@ custom_remote_shell () {
     here=`pwd`
     # Hop on to the remote node, chdir to 'here' and run the given
     # commands. One of the following will probably work.
-    #ssh $host "cd $here; $cmds"
+    ssh $host "cd $here; $cmds"
     #rsh $host "cd $here; $cmds"
     # we have to remove the leading `uname -n`: from pdsh output lines
-    pdsh -w $host "cd $here; $cmds" | sed 's/^[^:]*://'
+    #pdsh -w $host "cd $here; $cmds" | sed 's/^[^:]*://'
 }
 
 #####################################################################
@@ -125,9 +130,11 @@ fi
 rsltf="${rslt}.summary"
 workf="${rslt}.detail"
 cmdsf="${rslt}.script"
+vmstatf="${rslt}.vmstat"
 echo -n > $rsltf
 echo -n > $workf
 
+declare -a vmstatpids
 
 # hide a little trick to unset this from the command line
 if [ "$lustre_root" == " " ]; then
@@ -370,19 +377,27 @@ else
     done
 fi
 
+# get vmstat started
 # disable portals debug and get obdecho loaded on all relevant hosts
 unique_hosts=(`unique ${host_names[@]}`)
+pidcount=0
 for host in ${unique_hosts[@]}; do
     remote_shell $host "echo 0 > /proc/sys/portals/debug"
+    host_vmstatf=${vmstatf}_${host}
+    echo -n > $host_vmstatf
+    remote_shell $host "vmstat 5 >> $host_vmstatf" &
+    pid=$!
+    vmstatpids[$pidcount]=$pid
+    pidcount=$((pidcount+1))
     do_unload_obdecho[$host]=0
     if obdecho_loaded $host; then
-       continue
+        continue
     fi
     load_obdecho $host
     if obdecho_loaded $host; then
-       do_unload_obdecho[$host]=1
-       continue
-    fi
+        do_unload_obdecho[$host]=1
+        continue
+        fi
     echo "Can't load obdecho on $host" 1>&2
     exit 1
 done
@@ -402,7 +417,7 @@ done
 for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
     for ((nobj=$nobjlo;nobj<=$nobjhi;nobj*=2)); do 
        for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do
-           if ((thr < nobj)); then
+           if ((thr % nobj)); then
                continue
            fi
            # restart?
@@ -452,6 +467,10 @@ for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
            done
            # run tests
            for test in ${tests[@]}; do
+                declare -a pidarray
+                for host in ${unique_hosts[@]}; do
+                    echo "starting run for test: $test rsz: $rsz threads: $thr objects: $nobj" >> ${vmstatf}_${host}
+                done
                print_summary -n "$test "
                # create per-host script files
                for host in ${unique_hosts[@]}; do
@@ -462,25 +481,37 @@ for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
                    devno=${devnos[$idx]}
                    tmpfi="${tmpf}_$idx"
                    first_obj=${first_objs[$idx]}
+                   thr_per_obj=$((${thr}/${nobj}))
                    echo >> ${cmdsf}_${host} \
                        "$lctl > $tmpfi 2>&1 \\
                          --threads $thr -$snap $devno \\
-                        test_brw $count `testname2type $test` q $pages ${thr}t${first_obj} &"
-               done
-               for host in ${unique_hosts[@]}; do
-                   echo "wait" >> ${cmdsf}_${host}
-               done
-               # timed run of all the per-host script files
-               t0=`date +%s.%N`
-               for host in ${unique_hosts[@]}; do
-                   remote_shell $host bash ${cmdsf}_${host}&
-               done
-               wait
-               t1=`date +%s.%N`
-               # clean up per-host script files
-               for host in ${unique_hosts[@]}; do
-                   rm ${cmdsf}_${host}
-               done
+                        test_brw $count `testname2type $test` q $pages ${thr_per_obj}t${first_obj} &"
+                done
+                pidcount=0
+                for host in ${unique_hosts[@]}; do
+                    echo "wait" >> ${cmdsf}_${host}
+                    pidarray[$pidcount]=0
+                    pidcount=$((pidcount+1))
+                done
+                # timed run of all the per-host script files
+                t0=`date +%s.%N`
+                pidcount=0
+                for host in ${unique_hosts[@]}; do
+                    remote_shell $host bash ${cmdsf}_${host} &
+                    pidarray[$pidcount]=$!
+                    pidcount=$((pidcount+1))
+                done
+                pidcount=0
+                for host in ${unique_hosts[@]}; do
+                    wait ${pidarray[$pidcount]}
+                    pidcount=$((pidcount+1))
+                done
+                #wait
+                t1=`date +%s.%N`
+                # clean up per-host script files
+                for host in ${unique_hosts[@]}; do
+                    rm ${cmdsf}_${host}
+                done
                # compute bandwidth from total data / elapsed time
                str=`awk "BEGIN {printf \"%7.2f \",\
                         $total_size / (( $t1 - $t0 ) * 1024)}"`
@@ -538,8 +569,17 @@ for ((i=0; i<ndevs; i++)); do
 done
 
 # unload any obdecho modules we loaded
+pidcount=0
 for host in ${unique_hosts[@]}; do
+    remote_shell $host "killall vmstat" &
+    pid=$!
+    kill -term ${vmstatpids[$pidcount]}
+    kill -kill ${vmstatpids[$pidcount]} 2>/dev/null
+    wait $pid
+    pidcount=$((pidcount+1))
     if ((${do_unload_obdecho[$host]})); then
-       unload_obdecho $host
+        unload_obdecho $host
     fi
 done
+
+exit 0