3 # cluster name (expect all node names to be this followed by a number)
6 # client node numbers (individual numbers or inclusive ranges)
9 # numbers of clients to survey
12 clients_iterator="+=1"
14 # numbers of tasks per client to survey
16 tasks_per_client_hi=16
17 tasks_per_client_iterator="*=4"
19 # record sizes to survey
24 ## which tests to run (first must be write)
25 # remount) not really a test; just remount to uncache everything
28 #tests=(write rewrite read reread rewrite_again)
29 tests=(write rewrite remount read)
31 # total # bytes written/read by any client node
32 min_per_client_size=75M
35 # should each task do I/O to its own file?
39 IOR="/home/ericb/ior/src/C/IOR"
45 llmount=/home/ericb/lustre/utils/llmount
47 # where lustre is mounted on the clients
50 # basename of the test file(s)
51 testfile=${lustre}/ior_survey_testfile
53 # how to unmount and remount the F/S on a client (to clear the cache)
54 remount="umount $lustre && $llmount -o nettype=elan mdev6:/ll_mds/client $lustre"
56 # the result file prefix (date/time + hostname makes unique)
57 #rslt=/home/ericb/ior_survey_`date +%F@%R`_`uname -n`
58 rslt=/home/ericb/ior_survey
60 #don't spin for MPI completions
61 export LIBELAN_WAITTYPE=0
63 ################################################################################
64 # dont change stuff below here
67 echo $1 | awk '{ nvals=split($1, vals, "-");\
68 if (nvals == 1) print 1;\
69 else if (nvals == 2) printf "%d\n", vals[2] - vals[1] + 1;}'
73 echo $1 | awk '{ split($1, vals, "-"); print vals[1]; }'
80 if [ -z "$range" ]; then
83 chunk=`count_range $range`
84 if ((chunk > n)); then
85 base=`base_range $range`
99 if [ -z "$range" ]; then
102 base=`base_range $range`
103 chunk=`count_range $range`
104 if ((chunk > n)); then chunk=n; fi
105 nodes="${nodes}${sep}${base}"; sep=","
106 if ((chunk > 1)); then nodes="${nodes}-$((base+chunk-1))"; fi
115 while ((radix > 0)); do
116 nodes=`n2noderange $((n+radix)) $@`
117 if [ -n "$nodes" ]; then
128 *G|*g) n=`echo $str | sed 's/[gG]//'`; echo $((n*1024*1024*1024));;
129 *M|*m) n=`echo $str | sed 's/[Mm]//'`; echo $((n*1024*1024));;
130 *K|*k) n=`echo $str | sed 's/[Kk]//'`; echo $((n*1024));;
137 G=$((1024*1024*1024))
140 if ((n%G == 0 && n >= G)); then
142 elif ((n%M == 0 && n >= M)); then
144 elif ((n%K == 0 && n >= K)); then
151 if [ ${#tests[@]} -eq 0 -o "${tests[0]}" != "write" ]; then
152 echo "First test must be 'write'" 1>&2
156 rsltf="${rslt}.summary"
157 iorcf="${rslt}.script"
158 workf="${rslt}.detail"
165 if [ "$1" = "-n" ]; then
170 echo $minusn "$*" >> $rsltf
174 min_per_client_size=`parse_number $min_per_client_size`
175 min_total_size=`parse_number $min_total_size`
177 rsize_lo=`parse_number $rsize_lo`
178 rsize_hi=`parse_number $rsize_hi`
180 nclients=`countnodes ${clients[@]}`
182 if ((clients_hi > nclients)); then clients_hi=$nclients; fi
184 for ((rsize=rsize_lo; rsize<=rsize_hi; rsize$rsize_iterator)); do
185 pp_rsize=`pp_number $rsize`
187 for ((nclnt=clients_lo; nclnt<=clients_hi; nclnt$clients_iterator)); do
188 test_clients="${cluster}`n2noderange $nclnt ${clients[@]}`"
190 per_client_size=$((min_total_size/nclnt))
191 if ((per_client_size < min_per_client_size)); then
192 per_client_size=$min_per_client_size
194 total_size=`pp_number $((per_client_size * nclnt))`
196 for ((ntask=tasks_per_client_lo; ntask <= tasks_per_client_hi; ntask$tasks_per_client_iterator)); do
197 per_task_size=$((per_client_size/ntask))
199 hdrstr=`printf "Total: %5sB rsize: %4s clients: %4d tasks: %3d: " $total_size $rsize $nclnt $ntask`
200 print_summary -n "$hdrstr"
202 for ((test_idx=0; test_idx < ${#tests[@]}; test_idx++)); do
203 test=${tests[$test_idx]}
205 print_summary -n "$test "
206 echo "===========> ${hdrstr} on $test_clients doing $test" >> $workf
209 if [ "$test" = "remount" ]; then
210 echo "=> $remount" >> $tmpf
211 $pdsh -S -b -w "$test_clients" >> $tmpf 2>&1 \
214 echo "Completion Status: $status" >> $tmpf
222 cmd="(mount -t lustre; mount -t lustre_lite) | grep $lustre"
223 echo "=> $cmd" >> $tmpf
224 $pdsh -S -b -w "$test_clients" >> $tmpf 2>&1 \
227 echo "Completion Status: $status" >> $tmpf
232 print_summary "Lustre NOT mounted on $lustre somewhere"
238 -o${testfile} # test file prefix
239 -b${per_task_size} # bytes per task
240 -t${rsize} # record size
241 -e # fsync before close
247 # keep the test file(s) unless this is the last test
248 ((test_idx < ${#tests[@]}-1)) && cmdline[$((idx++))]="-k"
250 # use the existing test file(s) unless this is the first test
251 ((test_idx > 0)) && cmdline[$((idx++))]="-E"
254 ((file_per_task)) && cmdline[$((idx++))]="-F"
257 *write*) cmdline[$((idx++))]="-w"
259 *) cmdline[$((idx++))]="-r"
263 echo "=> ${cmdline[@]}" >> $tmpf
265 $pdsh -S -b -Rmqsh -w "$test_clients" -n $ntask >> $tmpf 2>&1 \
269 echo "Completion Status: $status" >> $tmpf
274 result=`awk < $tmpf "/$awkstr/ {print $ 3; found=1; exit}\
275 END {if (!found) print \"ERROR\"}"`
282 str=`printf "%8s" "$result"`
283 print_summary -n "$str "