3 # vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
6 # lustre_start - start or stop multiple Lustre servers from a CSV file
8 # This script is used to parse each line of a CSV (Comma-Separated Value) file
9 # and execute remote command to start/stop the service on every Lustre server
10 # target that will be part of the Lustre cluster.
12 ################################################################################
18 Usage: $(basename $0) [options] <-a|-w|-x> <CSV file>
20 This script is used to start or stop multiple Lustre servers from a
24 -a select all the nodes from the CSV file to operate on
25 -w hostname,hostname,...
26 select the specified list of nodes (separated by commas) to
27 operate on rather than all the nodes in the CSV file
28 -x hostname,hostname,...
29 exclude the specified list of nodes (separated by commas)
30 -n no net - don't verify network connectivity and hostnames
32 -m pass "mount options" item in the CSV file to mount command line
33 -k stop the services on Lustre server targets
36 CSV file a comma-separated value file that contains configuration
37 parameters for each target in a Lustre cluster
39 Please refer to "lustre_config -h" for the description of CSV file formats.
44 # Get the library of functions
45 . @scriptlibdir@/lc_common
49 # Get and check the positional parameters
50 while getopts "aw:x:nmkhv" OPTION; do
53 [ -z "$SPECIFIED_NODELIST" ] && [ -z "$EXCLUDED_NODELIST" ] \
55 NODELIST_OPT="$NODELIST_OPT -a"
59 SPECIFIED_NODELIST=$OPTARG
60 NODELIST_OPT="$NODELIST_OPT -w $SPECIFIED_NODELIST"
64 EXCLUDED_NODELIST=$OPTARG
65 NODELIST_OPT="$NODELIST_OPT -x $EXCLUDED_NODELIST"
91 # Toss out the parameters we've already processed
94 # Here we expect the CSV file
96 error_output "Missing CSV file!"
103 # get_fstab_mntopts host_name device_name mount_point
104 # Get the mount options from the /etc/fstab file
105 get_fstab_mntopts() {
112 if [ -z "$host_name" -o -z "$dev_name" -o -z "$mnt_pnt" ]; then
113 echo "get_fstab_mntopts(): Missing argument!"
117 # Execute remote command to get the mount options from /etc/fstab file
118 mnt_opts=$($REMOTE $host_name "grep -w ^$dev_name /etc/fstab | \
119 grep -w $mnt_pnt | awk '{print \$4}'" 2>/dev/null)
121 mnt_opts=${mnt_opts//$host_name: /}
127 # Start the service on one Lustre server target
131 local extra_mntopts="$*"
135 if $SPECIFY_MNTOPTS; then
136 # Use the "mount options" item from the CSV file
137 [ -n "${MOUNT_OPTIONS[i]}" ] && mntopts=${MOUNT_OPTIONS[i]}
139 # Do not use the "mount options" item from the CSV file
140 mntopts=$(get_fstab_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]} \
142 [ ${PIPESTATUS[0]} -ne 0 ] && error_output "$mntopts" && return 1
145 [ -n "$mntopts" ] && mntopts="-o $mntopts"
146 [ -n "$extra_mntopts" ] && mntopts="$mntopts $extra_mntopts"
147 # Strip of any leading space
150 # Execute remote command to start the service
151 verbose_output "Mounting Lustre ${DEVICE_TYPE[i]} target"\
152 "${DEVICE_NAME[i]} (opts: $mntopts) on ${HOST_NAME[i]}:${MOUNT_POINT[i]}..."
153 $REMOTE ${HOST_NAME[i]} "PATH=\$PATH:/sbin:/usr/sbin
154 error() { set +x; echo \"ERROR: \$2: \$1\"; echo \"XXRETCODE:\$1\"; exit \$1; }
155 mkdir -p ${MOUNT_POINT[i]} || \\
156 error \${PIPESTATUS[0]} \"failed to mkdir ${MOUNT_POINT[i]}\"
157 mount -t $FS_TYPE $mntopts ${DEVICE_NAME[i]} ${MOUNT_POINT[i]} || \\
158 error \${PIPESTATUS[0]} \\
159 \"failed to mount ${DEVICE_NAME[i]} on host ${HOST_NAME[i]}\""
160 return ${PIPESTATUS[0]}
163 # Stop the service on one Lustre server target
167 # Execute remote command to stop the service
168 verbose_output "Unmounting Lustre ${DEVICE_TYPE[i]} target"\
169 "${DEVICE_NAME[i]} on ${HOST_NAME[i]}:${MOUNT_POINT[i]}..."
170 $REMOTE ${HOST_NAME[i]} "PATH=\$PATH:/sbin:/usr/sbin
171 error() { set +x; echo \"ERROR: \$2: \$1\"; echo \"XXRETCODE:\$1\"; exit \$1; }
172 if grep -q \" ${MOUNT_POINT[i]} \" /proc/mounts; then
173 umount -d -f ${MOUNT_POINT[i]} || \\
174 error \${PIPESTATUS[0]} \\
175 \"failed to unmount ${DEVICE_NAME[i]} on host ${HOST_NAME[i]}\"
177 echo \"${DEVICE_NAME[i]} was not mounted on\"\\
178 \"${HOST_NAME[i]}:${MOUNT_POINT[i]}\"
180 return ${PIPESTATUS[0]}
183 # mass_op op_type target_type
184 # Start/stop the services on Lustre server targets in parallel
192 declare -a REMOTE_PID
195 if [ -z "$op_type" -o -z "$target_type" ]; then
196 error_output "mass_op(): Missing argument!"
201 "start") op_func=start_service;;
202 "stop") op_func=stop_service;;
203 *) error_output "mass_op(): Invalid op type \"$op_type\"!" && return 1;;
206 for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
207 if [ "${DEVICE_TYPE[i]}" = "$target_type" ] \
208 && [[ "${FORMAT_OPTIONS[i]}" != *noformat* ]]; then
210 REMOTE_PID[$pid_num]=$!
211 let pid_num=$pid_num+1
215 for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
216 wait ${REMOTE_PID[${pid_num}]}
217 local RC1=${PIPESTATUS[0]}
218 [ $RC1 -ne 0 ] && RC=$RC1
221 [ $RC -ne 0 ] && return $RC
226 # Unload the modules to make cleanup
232 host_list=$(comma_list "${HOST_NAME[@]}")
233 [ -z "$host_list" ] && return 0
235 command="PATH=\$PATH:/sbin:/usr/sbin
236 if grep -q libcfs /proc/modules; then
237 lctl net down 1>/dev/null 2>&1
242 $REMOTE $host_list "$command"
244 for host in ${host_list//,/ }; do
245 $REMOTE $host "$command"
250 # Start the services on Lustre server targets
253 local combo_mgs_mdt=false
255 if [ ${#HOST_NAME[@]} -eq 0 ]; then
256 verbose_output "There are no Lustre targets specified."
260 # Start lnet network on the MGS node
261 start_mgs_lnet || return ${PIPESTATUS[0]}
263 local checked_hosts=""
264 for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
265 host_in_hostlist ${HOST_NAME[i]} $checked_hosts && continue
266 if ! is_mgs_node ${HOST_NAME[i]}; then
267 # Add module options to the module configuration file
268 add_module_options $i ${HOST_NAME[i]} || return ${PIPESTATUS[0]}
270 # Check lnet networks
271 check_lnet $i || return ${PIPESTATUS[0]}
273 checked_hosts="$checked_hosts,${HOST_NAME[i]}"
277 # Start MGS or the MGS service on combo MGS/MDT (with "-o nosvc -n" options)
278 if [ -n "${MGS_NODENAME[0]}" ]; then
279 local idx=${MGS_IDX[0]}
280 if [ "${DEVICE_TYPE[idx]#*mdt*}" != "${DEVICE_TYPE[idx]}" ]; then
283 start_service ${MGS_IDX[0]} "-o nosvc -n" || return ${PIPESTATUS[0]}
285 start_service ${MGS_IDX[0]} || return ${PIPESTATUS[0]}
290 mass_op "start" "ost" || return ${PIPESTATUS[0]}
292 # Start the MDT service on combo MGS/MDT (with "-o nomgs" option)
293 if $combo_mgs_mdt; then
294 start_service ${MGS_IDX[0]} "-o nomgs" || return ${PIPESTATUS[0]}
298 mass_op "start" "mdt" || return ${PIPESTATUS[0]}
300 verbose_output "Success on all Lustre targets!"
304 # Stop the services on Lustre server targets
308 if [ ${#HOST_NAME[@]} -eq 0 ]; then
309 verbose_output "There are no Lustre targets specified."
314 mass_op "stop" "mdt" || return ${PIPESTATUS[0]}
316 # Stop the MDT service on combo MGS/MDT
317 if [ -n "${MGS_NODENAME[0]}" ]; then
318 local idx=${MGS_IDX[0]}
319 if [ "${DEVICE_TYPE[idx]#*mdt*}" != "${DEVICE_TYPE[idx]}" ]; then
321 stop_service ${MGS_IDX[0]} || return ${PIPESTATUS[0]}
326 mass_op "stop" "ost" || return ${PIPESTATUS[0]}
328 # Stop MGS or the MGS service on combo MGS/MDT
329 if [ -n "${MGS_NODENAME[0]}" ]; then
330 stop_service ${MGS_IDX[0]} || return ${PIPESTATUS[0]}
338 #********************************* Main Flow **********************************#
341 check_file $CSV_FILE || exit ${PIPESTATUS[0]}
343 # Get the list of nodes to be operated on
344 NODES_TO_USE=$(get_nodelist) || error_exit ${PIPESTATUS[0]} "$NODES_TO_USE"
346 # Check the node list
347 check_nodelist $NODES_TO_USE || exit ${PIPESTATUS[0]}
349 # Check the network connectivity and hostnames
350 if $VERIFY_CONNECT; then
351 verbose_output "Checking the cluster network connectivity and hostnames..."
352 $VERIFY_CLUSTER_NET $NODELIST_OPT $VERBOSE_OPT $CSV_FILE || \
353 exit ${PIPESTATUS[0]}
354 verbose_output "Check the cluster network connectivity and hostnames OK!"
357 # Configure the Lustre cluster
358 echo "$(basename $0): ******** Lustre cluster configuration BEGIN ********"
360 get_lustre_items $CSV_FILE || exit ${PIPESTATUS[0]}
362 check_mgs || exit ${PIPESTATUS[0]}
364 if ! $STOP_SERVICE; then
365 mass_start || exit ${PIPESTATUS[0]}
367 mass_stop || exit ${PIPESTATUS[0]}
370 echo "$(basename $0): ******** Lustre cluster configuration END **********"