3 # cluster name (all node names are this followed by the node number)
6 # client node numbers (individual numbers or inclusive ranges)
9 # numbers of clients to survey
12 clients_iterator="+=1"
14 # numbers of tasks per client to survey
17 tasks_per_client_iterator="*=2"
19 # record sizes to survey
24 ## which tests to run (first must be write)
25 # clear_cache) not really a test; just uncache everything
28 #tests=(write rewrite read reread rewrite_again)
29 tests=(write rewrite clear_cache read reread)
31 # total # bytes written/read by any client node
32 min_per_client_size=4G
35 # should each task do I/O to its own file?
39 IOR=/usr/local/sbin/IOR
43 # the result file prefix (date/time + hostname makes unique)
44 rslt=/tmp/ior_survey_`date +%F@%R`_`uname -n`
46 # where lustre is mounted on the clients
49 # basename of the test file(s)
50 testfile=${lustre}/ior_survey_testfile
52 # pdsh args required to instantiate all instances of IOR in parallel
53 # the chosen module must support '-n <procs-per-node>'
54 # -R<module>, -f<fanout> etc
57 #don't spin for MPI completions
58 export LIBELAN_WAITTYPE=0
60 ################################################################################
61 # dont change stuff below here unless you know what you're doing...
64 echo $1 | awk '{ nvals=split($1, vals, "-");\
65 if (nvals == 1) print 1;\
66 else if (nvals == 2) printf "%d\n", vals[2] - vals[1] + 1;}'
70 echo $1 | awk '{ split($1, vals, "-"); print vals[1]; }'
77 if [ -z "$range" ]; then
80 chunk=`count_range $range`
81 if ((chunk > n)); then
82 base=`base_range $range`
96 if [ -z "$range" ]; then
99 local base=`base_range $range`
100 local chunk=`count_range $range`
101 if ((chunk > n)); then chunk=n; fi
102 local nodes="${nodes}${sep}${base}"; sep=","
103 if ((chunk > 1)); then nodes="${nodes}-$((base+chunk-1))"; fi
112 while ((radix > 0)); do
113 local nodes=`n2noderange $((n+radix)) $@`
114 if [ -n "$nodes" ]; then
125 *G|*g) n=`echo $str | sed 's/[gG]//'`; echo $((n*1024*1024*1024));;
126 *M|*m) n=`echo $str | sed 's/[Mm]//'`; echo $((n*1024*1024));;
127 *K|*k) n=`echo $str | sed 's/[Kk]//'`; echo $((n*1024));;
134 local G=$((1024*1024*1024))
135 local M=$((1024*1024))
137 if ((n%G == 0 && n >= G)); then
139 elif ((n%M == 0 && n >= M)); then
141 elif ((n%K == 0 && n >= K)); then
148 if [ ${#tests[@]} -eq 0 -o "${tests[0]}" != "write" ]; then
149 echo "First test must be 'write'" 1>&2
153 rsltf="${rslt}.summary"
154 workf="${rslt}.detail"
159 if [ "$1" = "-n" ]; then
164 echo $minusn "$*" >> $rsltf
168 # convert params to actual numbers
169 min_per_client_size=`parse_number $min_per_client_size`
170 min_total_size=`parse_number $min_total_size`
172 rsize_lo=`parse_number $rsize_lo`
173 rsize_hi=`parse_number $rsize_hi`
175 # check on actual numbers of client nodes
176 nclients=`countnodes ${clients[@]}`
177 if ((clients_hi > nclients)); then clients_hi=$nclients; fi
179 for ((rsize=rsize_lo; rsize<=rsize_hi; rsize$rsize_iterator)); do
180 pp_rsize=`pp_number $rsize`
182 for ((nclnt=clients_lo; nclnt<=clients_hi; nclnt$clients_iterator)); do
183 test_clients="${cluster}`n2noderange $nclnt ${clients[@]}`"
185 per_client_size=$((min_total_size/nclnt))
186 if ((per_client_size < min_per_client_size)); then
187 per_client_size=$min_per_client_size
190 for ((ntask=tasks_per_client_lo; ntask <= tasks_per_client_hi; ntask$tasks_per_client_iterator)); do
191 per_task_size=$((per_client_size/ntask))
192 if ((per_task_size%rsize != 0)); then
193 per_task_size=$(((per_task_size/rsize + 1)*rsize))
195 total_size=`pp_number $((per_task_size*nclnt*ntask))`
197 hdrstr=`printf "Total: %5sB rsize: %4sB clients: %4d tasks: %3d: " \
198 $total_size $pp_rsize $nclnt $ntask`
199 print_summary -n "$hdrstr"
201 for ((test_idx=0; test_idx < ${#tests[@]}; test_idx++)); do
202 test=${tests[$test_idx]}
204 print_summary -n "$test "
205 echo "===========> ${hdrstr} on $test_clients doing $test" >> $workf
209 if [ "$test" = "clear_cache" ]; then
210 clear_cache='for LRU in /proc/fs/lustre/ldlm/namespaces/*/lru_size; do; echo clear > $LRU; done'
211 echo "=> $clear_cache" >> $tmpf
212 $pdsh -S -b -w "$test_clients" >> $tmpf 2>&1 \
215 echo "Completion Status: $status" >> $tmpf
223 # check lustre is mounted everywhere it's needed
224 cmd="(mount -t lustre; mount -t lustre_lite) | grep $lustre"
225 echo "=> Mount Check: $cmd" >> $tmpf
226 $pdsh -S -b -w "$test_clients" >> $tmpf 2>&1 \
229 echo "Completion Status: $status" >> $tmpf
233 print_summary "Lustre NOT mounted on $lustre somewhere"
239 -o${testfile} # test file prefix
240 -b${per_task_size} # bytes per task
241 -t${rsize} # record size
242 -e # fsync before close
248 # keep the test file(s) unless this is the last test
249 ((test_idx < ${#tests[@]}-1)) && cmdline[$((idx++))]="-k"
251 # use the existing test file(s) unless this is the first test
252 ((test_idx > 0)) && cmdline[$((idx++))]="-E"
255 ((file_per_task)) && cmdline[$((idx++))]="-F"
258 *write*) cmdline[$((idx++))]="-w"
260 *) cmdline[$((idx++))]="-r"
264 echo "=> ${cmdline[@]}" >> $tmpf
266 $pdsh -S -b $pdsh_mpiargs -w "$test_clients" -n $ntask >> $tmpf 2>&1 \
270 echo "Completion Status: $status" >> $tmpf
275 result=`awk < $tmpf "/$awkstr/ {print $ 3; found=1; exit}\
276 END {if (!found) print \"ERROR\"}"`
283 str=`printf "%8s" "$result"`
284 print_summary -n "$str "