3 # This file is provided under a dual BSD/GPLv2 license. When using or
4 # redistributing this file, you may do so under either license.
8 # Copyright(c) 2016 Intel Corporation.
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of version 2 of the GNU General Public License as
12 # published by the Free Software Foundation.
14 # This program is distributed in the hope that it will be useful, but
15 # WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # General Public License for more details.
19 # Contact Information:
20 # Cong Xu, cong.xu@intel.com
24 # Copyright(c) 2016 Intel Corporation.
26 # Redistribution and use in source and binary forms, with or without
27 # modification, are permitted provided that the following conditions
30 # * Redistributions of source code must retain the above copyright
31 # notice, this list of conditions and the following disclaimer.
32 # * Redistributions in binary form must reproduce the above copyright
33 # notice, this list of conditions and the following disclaimer in
34 # the documentation and/or other materials provided with the
36 # * Neither the name of Intel Corporation nor the names of its
37 # contributors may be used to endorse or promote products derived
38 # from this software without specific prior written permission.
40 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
50 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57 Usage: $0 [-a] [-d] [-l] [-h] [-m] [-n] [-o] [-u]
58 -a command to launch application
59 -d shared nfs directory to store LIOProf logs
60 -l lowest Lustre OSS node [Hostname]
61 -h highest Lustre OSS node [Hostname]
62 -m lowest Lustre Client [Hostname]
63 -n highest Lustre Client [Hostname]
64 -o use Obdfilter-survey to measure Lustre bandwidth
71 while getopts ":a:d:l:h:m:n:ou:" arg; do
86 o="Obdfilter-survey";;
95 if [ -n "${o}" ]; then
96 # Launch OBDfilter-survey to measure Lustre bandwidth
97 if [ -n "${a}" ] || [ -z "${d}" ] || [ -z "${l}" ] || [ -z "${h}" ] \
98 || [ -z "${u}" ]; then
103 if [ -z "${a}" ] || [ -z "${d}" ] || [ -z "${l}" ] || [ -z "${h}" ] \
104 || [ -z "${m}" ] || [ -z "${n}" ] || [ -z "${u}" ]; then
111 cluster_name=$(cut -d- -f1 <<<"${l}")
114 OSS_MIN=$(cut -d- -f2 <<<"${l}")
115 OSS_MAX=$(cut -d- -f2 <<<"${h}")
118 CLIENT_MIN=$(cut -d- -f2 <<<"${m}")
119 CLIENT_MAX=$(cut -d- -f2 <<<"${n}")
124 # Commands information
126 pdsh_cmd=/usr/bin/pdsh
128 # Job ID (Based on job time)
129 job_id=job-`date +%s`
130 echo "Launch" ${job_id}
133 if [ -n "${o}" ]; then
134 # OBDfilter-survey (Obtain maximum available bandwidth of Lustre)
135 echo "Running OBDfilter-survey in the background"
137 HOMEOBDFILTER=${d}/${job_id}/obdfilter
138 sudo -u ${USER_NAME} mkdir -p $HOMEOBDFILTER
139 sudo -u ${USER_NAME} chmod 777 -R ${d}/${job_id}
140 ${pdsh_cmd} -R ssh -w $cluster_name-[$OSS_MIN-$OSS_MAX] " \
141 size=65536 nobjlo=1 nobjhi=2 thrlo=32 thrhi=64 \
142 obdfilter-survey > ${HOMEOBDFILTER}/\`hostname -s\` & \
148 # rpc and brw logs directories
149 LOCALRPC=/lioprof_loc/${job_id}/rpc
150 LOCALBRW=/lioprof_loc/${job_id}/brw
151 LOCALIOSTAT=/lioprof_loc/${job_id}/iostat
153 HOMERPC=${d}/${job_id}/rpc
154 HOMEBRW=${d}/${job_id}/brw
155 HOMEIOSTAT=${d}/${job_id}/iostat
157 # Create logs directories
158 ${pdsh_cmd} -R ssh -w $cluster_name-[$OSS_MIN-$OSS_MAX] " \
159 mkdir -p ${LOCALRPC} ${LOCALBRW} ${LOCALIOSTAT}; \
162 # Change log directories permissions
163 sudo -u ${USER_NAME} mkdir -p ${HOMERPC} ${HOMEBRW} ${HOMEIOSTAT}
164 sudo -u ${USER_NAME} chmod 777 -R ${d}/${job_id}
167 ${pdsh_cmd} -R ssh -w $cluster_name-[$OSS_MIN-$OSS_MAX] \
168 "lctl set_param debug=rpctrace"
170 # Evaluate Performance
173 ${pdsh_cmd} -R ssh -w $cluster_name-[$OSS_MIN-$CLIENT_MAX] " \
174 echo 3 > /proc/sys/vm/drop_caches; echo 0 > /proc/sys/vm/drop_caches;
177 # Start RPC log service and brw_stats
178 ${pdsh_cmd} -R ssh -w $cluster_name-[$OSS_MIN-$OSS_MAX] " \
179 echo > /proc/fs/lustre/obdfilter/*/brw_stats; \
180 lctl clear; lctl debug_daemon start ${LOCALRPC}/rpc.log 1024; \
184 ${pdsh_cmd} -R ssh -w $cluster_name-[$OSS_MIN-$OSS_MAX] " \
185 iostat 1 > ${LOCALIOSTAT}/iostat.log&
189 ######################## Launch Application ########################
190 ${a} > ${d}/${job_id}/job-output
192 ####################################################################
194 # Collect Lustre RPC and btw_stats logs
195 ${pdsh_cmd} -R ssh -w $cluster_name-[$CLIENT_MIN-$CLIENT_MAX] " \
196 lctl set_param ldlm.namespaces.*.lru_size=clear
199 ${pdsh_cmd} -R ssh -w $cluster_name-[$OSS_MIN-$OSS_MAX] " \
200 lctl debug_daemon stop; \
201 cat /proc/fs/lustre/obdfilter/*/brw_stats > \
202 ${HOMEBRW}/brw-\`hostname -s\`; \
203 lctl debug_file ${LOCALRPC}/rpc.log ${HOMERPC}/rpc-\`hostname -s\`; \
206 # Stop iostat and collect data
207 ${pdsh_cmd} -R ssh -w $cluster_name-[$OSS_MIN-$OSS_MAX] " \
208 pkill iostat; cp -r ${LOCALIOSTAT}/iostat.log \
209 ${HOMEIOSTAT}/iostat-\`hostname -s\` \
212 # Change log file mode
214 sudo -u root chmod 755 -R ${HOMERPC}/* ${HOMEBRW}/* ${HOMEIOSTAT}/*
216 ###################################################################
217 #### Warning! Pay much more attention to rm commands with root ####
218 ###################################################################
219 # Clear local history logs
221 LOCAL_LIOPROF=/lioprof_loc
222 ${pdsh_cmd} -R ssh -w $cluster_name-[$OSS_MIN-$OSS_MAX] " \
224 rm -rf ${LOCAL_LIOPROF}; \