Details : Initialize RPC XID from clock at startup (randomly if clock is
bad).
+Severity : enhancement
+Bugzilla : 14095
+Description: Add lustre_start utility to start or stop multiple Lustre servers
+ from a CSV file.
+
--------------------------------------------------------------------------------
2007-08-10 Cluster File Systems, Inc. <info@clusterfs.com>
sbinscripts = lc_servip lustre_up14 lustre_rmmod
# These are scripts that are generated from .in files
-genscripts = lustre_config lc_modprobe lc_net lc_hb lc_cluman lustre_createcsv lc_md lc_lvm
+genscripts = lustre_config lc_modprobe lc_net lc_hb lc_cluman lustre_createcsv \
+ lc_md lc_lvm lustre_start
sbin_SCRIPTS = $(genscripts) $(sbinscripts)
bin_SCRIPTS = lustre_req_history
-#
+#!/bin/bash
+
# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+
#
-# lc_common - This file contains functions to be used by most or all
+# lc_common - This file contains common variables and functions to be used by
# Lustre cluster config scripts.
#
################################################################################
-# Remote command
-REMOTE=${REMOTE:-"ssh -x -q"}
-#REMOTE=${REMOTE:-"pdsh -S -R ssh -w"}
-export REMOTE
+#****************************** Common Variables ******************************#
+export PATH=$PATH:/sbin:/usr/sbin
-# Lustre utilities
-CMD_PATH=${CMD_PATH:-"/usr/sbin"}
-MKFS=${MKFS:-"$CMD_PATH/mkfs.lustre"}
-TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"}
-LCTL=${LCTL:-"$CMD_PATH/lctl"}
+# Remote command
+export REMOTE=${REMOTE:-"ssh -x -q"}
+#export REMOTE=${REMOTE:-"pdsh -S -R ssh -w"}
-EXPORT_PATH=${EXPORT_PATH:-"PATH=\$PATH:/sbin:/usr/sbin;"}
+# Lustre utilities
+export MKFS=${MKFS:-"mkfs.lustre"}
+export TUNEFS=${TUNEFS:-"tunefs.lustre"}
+export LCTL=${LCTL:-"lctl"}
-# Raid command path
-RAID_CMD_PATH=${RAID_CMD_PATH:-"/sbin"}
-MDADM=${MDADM:-"$RAID_CMD_PATH/mdadm"}
+# Software RAID command
+export MDADM=${MDADM:-"mdadm"}
# Some scripts to be called
-SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"$(cd `dirname $0`; echo $PWD)"}
-MODULE_CONFIG=${SCRIPTS_PATH}/lc_modprobe
-VERIFY_CLUSTER_NET=${SCRIPTS_PATH}/lc_net
-GEN_HB_CONFIG=${SCRIPTS_PATH}/lc_hb
-GEN_CLUMGR_CONFIG=${SCRIPTS_PATH}/lc_cluman
-SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}/lc_servip
-SCRIPT_GEN_MONCF=${SCRIPTS_PATH}/lc_mon
-SCRIPT_CONFIG_MD=${SCRIPTS_PATH}/lc_md
-SCRIPT_CONFIG_LVM=${SCRIPTS_PATH}/lc_lvm
+export MODULE_CONFIG=${MODULE_CONFIG:-"lc_modprobe"}
+export VERIFY_CLUSTER_NET=${VERIFY_CLUSTER_NET:-"lc_net"}
+export GEN_HB_CONFIG=${GEN_HB_CONFIG:-"lc_hb"}
+export GEN_CLUMGR_CONFIG=${GEN_CLUMGR_CONFIG:-"lc_cluman"}
+export SCRIPT_VERIFY_SRVIP=${SCRIPT_VERIFY_SRVIP:-"lc_servip"}
+export SCRIPT_GEN_MONCF=${SCRIPT_GEN_MONCF:-"lc_mon"}
+export SCRIPT_CONFIG_MD=${SCRIPT_CONFIG_MD:-"lc_md"}
+export SCRIPT_CONFIG_LVM=${SCRIPT_CONFIG_LVM:-"lc_lvm"}
# Variables of HA software
HBVER_HBV1="hbv1" # Heartbeat version 1
FILE_SUFFIX=${FILE_SUFFIX:-".lustre"} # Suffix of the generated config files
# Marker of the MD device line
-MD_MARKER=${MD_MARKER:-"MD"}
+export MD_MARKER=${MD_MARKER:-"MD"}
# Marker of the LVM device line
-PV_MARKER=${PV_MARKER:-"PV"}
-VG_MARKER=${VG_MARKER:-"VG"}
-LV_MARKER=${LV_MARKER:-"LV"}
+export PV_MARKER=${PV_MARKER:-"PV"}
+export VG_MARKER=${VG_MARKER:-"VG"}
+export LV_MARKER=${LV_MARKER:-"LV"}
-declare -a CONFIG_ITEM # Items in each line of the csv file
+declare -a CONFIG_ITEM # Items in each line of the CSV file
declare -a NODE_NAME # Hostnames of nodes have been configured
-# Nodelist variables
-USE_ALLNODES=false # default is not to operate on all the nodes
-SPECIFIED_NODELIST="" # specified list of nodes to be operated on
-EXCLUDED_NODELIST="" # list of nodes to be excluded
+declare -a MGS_NODENAME # Node names of the MGS servers
+declare -a MGS_IDX # Indexes of MGSs in the global arrays
+declare -i MGS_NUM # Number of MGS servers in the cluster
+declare -i INIT_IDX
+
+# All of the Lustre target items in the CSV file
+declare -a HOST_NAME MODULE_OPTS DEVICE_NAME MOUNT_POINT DEVICE_TYPE FS_NAME
+declare -a MGS_NIDS INDEX FORMAT_OPTIONS MKFS_OPTIONS MOUNT_OPTIONS FAILOVERS
-export PATH=$PATH:$CMD_PATH:$SCRIPTS_PATH:$CLUMAN_TOOLS_PATH:$RAID_CMD_PATH:/sbin:/usr/sbin
+# Heartbeat software requires that node names in the configuration directive
+# must (normally) match the "uname -n" of that machine. Since the value of the
+# "failover nids" field in the CSV file is the NID(s) of failover partner node,
+# we have to figure out the corresponding hostname of that node.
+declare -a FAILOVERS_NAMES
+export VERIFY_CONNECT=true # Verify network connectivity by default
+export USE_ALLNODES=false # Not operating on all the nodes by default
+export SPECIFIED_NODELIST="" # Specified list of nodes to be operated on
+export EXCLUDED_NODELIST="" # Specified list of nodes to be excluded
+export NODES_TO_USE="" # Defacto list of nodes to be operated on
+export NODELIST_OPT=""
+export VERBOSE_OUTPUT=false
+export VERBOSE_OPT=""
+
+
+#****************************** Common Functions ******************************#
# verbose_output string
# Output verbose information $string
return 0
}
+# error_output string
+# Output error string to stderr, prefixing with ERROR
+# for easy error parsing from the rest of the output.
+error_output() {
+ echo >&2 "$(basename $0): ERROR: $*"
+ return 0
+}
+
+# error_exit rc string
+# Output error to stderr via error_output and exit with rc.
+error_exit() {
+ local rc=$1
+ shift
+
+ error_output $*
+ exit $rc
+}
+
# Check whether the reomte command is pdsh
is_pdsh() {
if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then
check_file() {
# Check argument
if [ $# -eq 0 ]; then
- echo >&2 "`basename $0`: check_file() error: Missing csv file!"
+ error_output "check_file(): Missing CSV file!"
return 1
fi
- CSV_FILE=$1
+ local CSV_FILE=$1
if [ ! -s ${CSV_FILE} ]; then
- echo >&2 "`basename $0`: check_file() error: ${CSV_FILE}"\
+ error_output "check_file(): ${CSV_FILE}"\
"does not exist or is empty!"
return 1
fi
}
# parse_line line
-# Parse a line in the csv file
+# Parse a line in the CSV file
parse_line() {
# Check argument
if [ $# -eq 0 ]; then
- echo >&2 "`basename $0`: parse_line() error: Missing argument!"
+ error_output "parse_line(): Missing argument!"
return 1
fi
declare -i i=0 # Index of the CONFIG_ITEM array
- declare -i length=0
+ declare -i length=0
declare -i idx=0
- declare -i s_quote_flag=0 # Flag of the single quote character
+ declare -i s_quote_flag=0 # Flag of the single quote character
declare -i d_quote_flag=0 # Flag of the double quotes character
local TMP_LETTER LINE
-
+
LINE="$*"
# Initialize the CONFIG_ITEM array
ret_str=$*
if [ "${ret_str}" != "${ret_str#*connect:*}" ]; then
- echo >&2 "`basename $0`: ${fn_name}() error: ${ret_str}"
+ error_output "${fn_name}(): ${ret_str}"
return 0
fi
if [ -z "${ret_str}" ]; then
- echo >&2 "`basename $0`: ${fn_name}() error:" \
+ error_output "${fn_name}():" \
"No results from remote!" \
"Check network connectivity between the local host and ${host_addr}!"
return 0
echo "`basename $0`: nid2hostname() error: Invalid nid - \"${nid}\"!"
return 1
fi
-
+
case "${nettype}" in
lo*) host_name=`hostname`;;
elan*) # QsNet
echo "${host_name}"
return 1
fi
-
+
nid=${host_name}@${nettype}
;;
esac
OUTLIST="$OUTLIST,$ITEM"
fi
done
-
+
# strip leading comma
echo ${OUTLIST#,}
}
# get_csv_nodelist csv_file
-# Get the comma-separated list of all the nodes from the csv file
+# Get the comma-separated list of all the nodes from the CSV file
get_csv_nodelist() {
local csv_file=$1
local all_nodelist
- # Check the csv file
+ # Check the CSV file
! check_file ${csv_file} 2>&1 && return 1
all_nodelist=$(egrep -v "([[:space:]]|^)#" ${csv_file} | cut -d, -f 1)
get_nodelist() {
local ALL_NODELIST
- # Get the list of all the nodes in the csv file
+ # Get the list of all the nodes in the CSV file
ALL_NODELIST=$(get_csv_nodelist ${CSV_FILE})
[ ${PIPESTATUS[0]} -ne 0 ] && echo "${ALL_NODELIST}" && return 1
local nodes_to_use=$1
if [ -z "${nodes_to_use}" ]; then
- echo "`basename $0`: There are no hosts to be operated on."\
+ error_output "There are no nodes to be operated on."\
"Check the node selection options (-a, -w or -x)."
- usage
+ usage 1>&2
+ return 1
else
verbose_output "Operating on the following nodes: ${nodes_to_use}"
fi
# get_mgs_nids mgs_hostname mgs_nids
# Get the corresponding NID(s) of the MGS node ${mgs_hostname} from the
-# "mgs nids" field of one lustre target in the csv file
+# "mgs nids" field of one lustre target in the CSV file
get_mgs_nids() {
local mgs_node="$1"
local all_mgs_nids="$2"
done
# Let's use lctl to get the real nids from the mgs node
- ret_str=$(${REMOTE} ${mgs_node} "${LCTL} list_nids" 2>&1 </dev/null)
+ ret_str=$($REMOTE $mgs_node "PATH=\$PATH:/sbin:/usr/sbin
+$LCTL list_nids" 2>&1 </dev/null)
if [ ${PIPESTATUS[0]} -ne 0 -a -n "${ret_str}" ]; then
echo "$(basename $0): get_mgs_nids() error:" \
"remote command to ${mgs_node} error: ${ret_str}"
return 1
}
+
+# Check the items required for OSTs, MDTs and MGS
+#
+# When formatting an OST, the following items: hostname,
+# device name, device type and mgs nids, cannot have null value.
+#
+# When formatting an MDT or MGS, the following items: hostname,
+# device name and device type, cannot have null value.
+check_lustre_item() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ error_output "check_lustre_item(): Missing argument"\
+ "for function check_lustre_item()!"
+ return 1
+ fi
+
+ declare -i i=$1
+
+ # Check hostname, device name and device type
+ if [ -z "${HOST_NAME[i]}" ] || \
+ [ -z "${DEVICE_NAME[i]}" ] || [ -z "${DEVICE_TYPE[i]}" ]; then
+ error_output "check_lustre_item(): Some required"\
+ "item has null value! Check hostname,"\
+ "device name and device type!"
+ return 1
+ fi
+
+ # Check mgs nids
+ if [ "${DEVICE_TYPE[i]}" = "ost" ]&&[ -z "${MGS_NIDS[i]}" ]; then
+ error_output "check_lustre_item(): OST's mgs nids"\
+ "item has null value!"
+ return 1
+ fi
+
+ # Check mount point
+ if [ -z "${MOUNT_POINT[i]}" ]; then
+ error_output "check_lustre_item(): mount"\
+ "point item of target ${DEVICE_NAME[i]} has null value!"
+ return 1
+ fi
+
+ return 0
+}
+
+# Get the number of MGS nodes in the cluster
+get_mgs_num() {
+ INIT_IDX=0
+ MGS_NUM=${#MGS_NODENAME[@]}
+ [ -z "${MGS_NODENAME[0]}" ] && let "INIT_IDX += 1" \
+ && let "MGS_NUM += 1"
+}
+
+# is_mgs_node hostname
+# Verify whether @hostname is a MGS node
+is_mgs_node() {
+ local host_name=$1
+ declare -i i
+
+ get_mgs_num
+ for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
+ [ "${MGS_NODENAME[i]}" = "${host_name}" ] && return 0
+ done
+
+ return 1
+}
+
+# Check whether the MGS nodes are in the same failover group
+check_mgs_group() {
+ declare -i i
+ declare -i j
+ declare -i idx
+ local mgs_node
+
+ get_mgs_num
+ for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
+ mgs_node=${MGS_NODENAME[i]}
+ for ((j = ${INIT_IDX}; j < ${MGS_NUM}; j++)); do
+ [ "${MGS_NODENAME[j]}" = "${mgs_node}" ] && continue 1
+
+ idx=${MGS_IDX[j]}
+ if [ "${FAILOVERS_NAMES[idx]#*$mgs_node*}" = "${FAILOVERS_NAMES[idx]}" ]
+ then
+ error_output "check_mgs_group():"\
+ "MGS node ${mgs_node} is not in the ${HOST_NAME[idx]}"\
+ "failover group!"
+ return 1
+ fi
+ done
+ done
+
+ return 0
+}
+
+# Get and check MGS servers.
+# There should be no more than one MGS specified in the entire CSV file.
+check_mgs() {
+ declare -i i
+ declare -i j
+ declare -i exp_idx # Index of explicit MGS servers
+ declare -i imp_idx # Index of implicit MGS servers
+ local is_exp_mgs is_imp_mgs
+ local mgs_node
+
+ # Initialize the MGS_NODENAME and MGS_IDX arrays
+ unset MGS_NODENAME
+ unset MGS_IDX
+
+ exp_idx=1
+ imp_idx=1
+ for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+ is_exp_mgs=false
+ is_imp_mgs=false
+
+ # Check whether this node is an explicit MGS node
+ # or an implicit one
+ if [ "${DEVICE_TYPE[i]#*mgs*}" != "${DEVICE_TYPE[i]}" ]; then
+ verbose_output "Explicit MGS target" \
+ "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
+ is_exp_mgs=true
+ fi
+
+ if [ "${DEVICE_TYPE[i]}" = "mdt" -a -z "${MGS_NIDS[i]}" ]; then
+ verbose_output "Implicit MGS target" \
+ "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
+ is_imp_mgs=true
+ fi
+
+ # Get and check MGS servers
+ if ${is_exp_mgs} || ${is_imp_mgs}; then
+ # Check whether more than one MGS target in one MGS node
+ if is_mgs_node ${HOST_NAME[i]}; then
+ error_output "check_mgs():"\
+ "More than one MGS target in the same node -"\
+ "\"${HOST_NAME[i]}\"!"
+ return 1
+ fi
+
+ # Get and check primary MGS server and backup MGS server
+ if [ "${FORMAT_OPTIONS[i]}" = "${FORMAT_OPTIONS[i]#*noformat*}" ]
+ then
+ # Primary MGS server
+ if [ -z "${MGS_NODENAME[0]}" ]; then
+ if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
+ || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
+ error_output "check_mgs():"\
+ "There exist both explicit and implicit MGS"\
+ "targets in the CSV file!"
+ return 1
+ fi
+ MGS_NODENAME[0]=${HOST_NAME[i]}
+ MGS_IDX[0]=$i
+ else
+ mgs_node=${MGS_NODENAME[0]}
+ if [ "${FAILOVERS_NAMES[i]#*$mgs_node*}" = "${FAILOVERS_NAMES[i]}" ]
+ then
+ error_output "check_mgs():"\
+ "More than one primary MGS nodes in the CSV" \
+ "file - ${MGS_NODENAME[0]} and ${HOST_NAME[i]}!"
+ else
+ error_output "check_mgs():"\
+ "MGS nodes ${MGS_NODENAME[0]} and ${HOST_NAME[i]}"\
+ "are failover pair, one of them should use"\
+ "\"--noformat\" in the format options item!"
+ fi
+ return 1
+ fi
+ else # Backup MGS server
+ if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
+ || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
+ error_output "check_mgs():"\
+ "There exist both explicit and implicit MGS"\
+ "targets in the CSV file!"
+ return 1
+ fi
+
+ if ${is_exp_mgs}; then # Explicit MGS
+ MGS_NODENAME[exp_idx]=${HOST_NAME[i]}
+ MGS_IDX[exp_idx]=$i
+ exp_idx=$(( exp_idx + 1 ))
+ else # Implicit MGS
+ MGS_NODENAME[imp_idx]=${HOST_NAME[i]}
+ MGS_IDX[imp_idx]=$i
+ imp_idx=$(( imp_idx + 1 ))
+ fi
+ fi
+ fi #End of "if ${is_exp_mgs} || ${is_imp_mgs}"
+ done
+
+ # Check whether the MGS nodes are in the same failover group
+ if ! check_mgs_group; then
+ return 1
+ fi
+
+ return 0
+}
+
+# Execute remote command to add module options to
+# the module configuration file
+add_module_options() {
+ declare -i i=$1
+ local hostname=$2
+
+ if [ -z "$hostname" ]; then
+ error_output "add_module_options(): Missing hostname!"
+ return 1
+ fi
+
+ [ -z "${MODULE_OPTS[i]}" ] && return 0
+
+ # Execute remote command to add module options to
+ # the module configuration file
+ verbose_output "Adding module options to $hostname"
+ $REMOTE $hostname "PATH=\$PATH:/sbin:/usr/sbin
+echo \"${MODULE_OPTS[i]}\" | $MODULE_CONFIG"
+ local RC=${PIPESTATUS[0]}
+ if [ $RC -ne 0 ]; then
+ error_output "add_module_options():"\
+ "Failed to add module options to $hostname!"
+ return $RC
+ fi
+
+ return 0
+}
+
+# check_lnet_connect hostname_index mgs_hostname
+# Check whether the target node can contact the MGS node @mgs_hostname
+# If @mgs_hostname is null, then it means the primary MGS node
+check_lnet_connect() {
+ declare -i i=$1
+ local mgs_node=$2
+
+ local mgs_prim_nids
+ local nids_str=
+ local mgs_nid
+ local ping_mgs
+ local try
+
+ # Execute remote command to check that
+ # this node can contact the MGS node
+ verbose_output "Checking lnet connectivity between" \
+ "${HOST_NAME[i]} and the MGS node ${mgs_node}"
+ mgs_prim_nids=`echo ${MGS_NIDS[i]} | awk -F: '{print $1}'`
+
+ if [ -z "${mgs_node}" -o $MGS_NUM -eq 1 ]; then
+ nids_str=${mgs_prim_nids} # nids of primary MGS node
+ if [ -z "${nids_str}" ]; then
+ error_output "check_lnet_connect():"\
+ "Check the mgs nids item of host ${HOST_NAME[i]}!"\
+ "Missing nids of the primary MGS node!"
+ return 1
+ fi
+ else
+ # Get the corresponding NID(s) of the MGS node ${mgs_node}
+ # from the "mgs nids" field
+ nids_str=$(get_mgs_nids ${mgs_node} ${MGS_NIDS[i]})
+ if [ ${PIPESTATUS[0]} -ne 0 ]; then
+ error_output "${nids_str}"
+ return 1
+ fi
+ fi
+
+ ping_mgs=false
+ for mgs_nid in ${nids_str//,/ }
+ do
+ for try in $(seq 0 5); do
+ $REMOTE ${HOST_NAME[i]} "PATH=\$PATH:/sbin:/usr/sbin
+$LCTL ping $mgs_nid 5 1>/dev/null"
+ if [ ${PIPESTATUS[0]} -eq 0 ]; then
+ # This node can contact the MGS node
+ verbose_output "${HOST_NAME[i]} can contact the MGS" \
+ "node $mgs_node by using nid \"$mgs_nid\"!"
+ ping_mgs=true
+ break
+ fi
+ done
+ done
+
+ if ! ${ping_mgs}; then
+ error_output "check_lnet_connect():" \
+ "${HOST_NAME[i]} cannot contact the MGS node ${mgs_node}"\
+ "with nids - \"${nids_str}\"! Check ${LCTL} command!"
+ return 1
+ fi
+
+ return 0
+}
+
+# Start lnet network in the cluster node and check that
+# this node can contact the MGS node
+check_lnet() {
+ if ! $VERIFY_CONNECT; then
+ return 0
+ fi
+
+ # Check argument
+ if [ $# -eq 0 ]; then
+ error_output "check_lnet(): Missing argument!"
+ return 1
+ fi
+
+ declare -i i=$1
+ declare -i j
+ local ret_str
+
+ # Execute remote command to start lnet network
+ verbose_output "Starting lnet network on ${HOST_NAME[i]}"
+ ret_str=$($REMOTE ${HOST_NAME[i]} "PATH=\$PATH:/sbin:/usr/sbin
+modprobe lnet && $LCTL network up" 2>&1)
+ if [ ${PIPESTATUS[0]} -ne 0 ]; then
+ error_output "check_lnet(): start lnet network on" \
+ "${HOST_NAME[i]} error: $ret_str"
+ return 1
+ fi
+
+ if is_mgs_node ${HOST_NAME[i]}; then
+ return 0
+ fi
+
+ # Execute remote command to check that
+ # this node can contact the MGS node
+ for ((j = 0; j < ${MGS_NUM}; j++)); do
+ if ! check_lnet_connect $i ${MGS_NODENAME[j]}; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# Start lnet network in the MGS node
+start_mgs_lnet() {
+ declare -i i
+ declare -i idx
+
+ if [ -z "${MGS_NODENAME[0]}" -a -z "${MGS_NODENAME[1]}" ]; then
+ if ${USE_ALLNODES}; then
+ verbose_output "There is no MGS target in the ${CSV_FILE} file."
+ else
+ verbose_output "There is no MGS target in the node list \"${NODES_TO_USE}\"."
+ fi
+ return 0
+ fi
+
+ for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
+ # Execute remote command to add lnet options lines to
+ # the MGS node's modprobe.conf/modules.conf
+ idx=${MGS_IDX[i]}
+ add_module_options $idx ${MGS_NODENAME[i]} || return ${PIPESTATUS[0]}
+
+ # Start lnet network in the MGS node
+ check_lnet $idx || return ${PIPESTATUS[0]}
+ done
+
+ return 0
+}
+
+# Get all the Lustre target items in the CSV file and do some checks.
+get_lustre_items() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ error_output "get_lustre_items(): Missing argument"\
+ "for function get_lustre_items()!"
+ return 1
+ fi
+
+ local CSV_FILE=$1
+ local LINE
+ local marker
+ local hostname
+ declare -i line_num=0
+ declare -i idx=0
+
+ exec 9< ${CSV_FILE}
+ while read -u 9 -r LINE; do
+ line_num=${line_num}+1
+ # verbose_output "Parsing line ${line_num}: $LINE"
+
+ # Get rid of the empty line
+ [ -z "`echo ${LINE} | awk '/[[:alnum:]]/ {print $0}'`" ] && continue
+
+ # Get rid of the comment line
+ [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] && continue
+
+ # Skip the Linux MD/LVM line
+ marker=$(echo ${LINE} | cut -d, -f 2)
+ if [ "${marker}" = "${MD_MARKER}" -o "${marker}" = "${PV_MARKER}" ] \
+ || [ "${marker}" = "${VG_MARKER}" -o "${marker}" = "${LV_MARKER}" ]; then
+ continue
+ fi
+
+ # Skip the host which is not specified in the host list
+ if ! ${USE_ALLNODES}; then
+ hostname=$(echo ${LINE} | cut -d, -f 1)
+ ! host_in_hostlist ${hostname} ${NODES_TO_USE} && continue
+ fi
+
+ # Parse the config line into CONFIG_ITEM
+ if ! parse_line "$LINE"; then
+ error_output "parse_line(): Occurred"\
+ "on line ${line_num} in ${CSV_FILE}: $LINE"
+ return 1
+ fi
+
+ HOST_NAME[idx]=${CONFIG_ITEM[0]}
+ MODULE_OPTS[idx]=${CONFIG_ITEM[1]}
+ DEVICE_NAME[idx]=${CONFIG_ITEM[2]}
+ MOUNT_POINT[idx]=${CONFIG_ITEM[3]}
+ DEVICE_TYPE[idx]=${CONFIG_ITEM[4]}
+ FS_NAME[idx]=${CONFIG_ITEM[5]}
+ MGS_NIDS[idx]=${CONFIG_ITEM[6]}
+ INDEX[idx]=${CONFIG_ITEM[7]}
+ FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]}
+ MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]}
+ MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]}
+ FAILOVERS[idx]=${CONFIG_ITEM[11]}
+
+ MODULE_OPTS[idx]=`echo "${MODULE_OPTS[idx]}" | sed 's/"/\\\"/g'`
+
+ # Convert IP addresses in NIDs to hostnames
+ FAILOVERS_NAMES[idx]=$(ip2hostname_multi_node ${FAILOVERS[idx]})
+ if [ ${PIPESTATUS[0]} -ne 0 ]; then
+ error_output "${FAILOVERS_NAMES[idx]}"
+ return 1
+ fi
+
+ # Check some required items for formatting target
+ if ! check_lustre_item $idx; then
+ error_output "check_lustre_item():"\
+ "Occurred on line ${line_num} in ${CSV_FILE}."
+ return 1
+ fi
+
+ idx=${idx}+1
+ done
+
+ return 0
+}
#!/bin/bash
+
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+
#
# lc_hb - script for generating the Heartbeat HA software's
# configuration files
HBVER_OPT=$OPTARG
if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \
&& [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then
- echo >&2 $"`basename $0`: Invalid Heartbeat software" \
+ error_output "Invalid Heartbeat software" \
"version - ${HBVER_OPT}!"
usage
fi
HOSTNAME_OPT=$OPTARG
PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
if [ -z "${PRIM_NODENAME}" ]; then
- echo >&2 $"`basename $0`: Missing primary nodename!"
+ error_output "Missing primary nodename!"
usage
fi
HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
if [ ${HOSTNAME_NUM} -lt 2 ]; then
- echo >&2 $"`basename $0`: Missing failover nodenames!"
+ error_output "Missing failover nodenames!"
usage
fi
if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ]
then
- echo >&2 $"`basename $0`: Heartbeat version 1 can" \
+ error_output "Heartbeat version 1 can" \
"only support 2 nodes!"
usage
fi
TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
- echo >&2 $"`basename $0`: Missing target device name!"
+ error_output "Missing target device name!"
usage
fi
if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
- echo >&2 $"`basename $0`: Missing mount point for target"\
+ error_output "Missing mount point for target"\
"${TARGET_DEVNAMES[TARGET_NUM]}!"
usage
fi
# Check the required parameters
if [ -z "${HBVER_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -r option!"
+ error_output "Missing -r option!"
usage
fi
if [ -z "${HOSTNAME_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -n option!"
+ error_output "Missing -n option!"
usage
fi
if [ -z "${DEVICE_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -d option!"
+ error_output "Missing -d option!"
usage
fi
local file_name=$2
if [ -z "${host_name}" ]; then
- echo >&2 "`basename $0`: check_remote_file() error:"\
+ error_output "check_remote_file():"\
"Missing hostname!"
return 1
fi
if [ -z "${file_name}" ]; then
- echo >&2 "`basename $0`: check_remote_file() error:"\
+ error_output "check_remote_file():"\
"Missing file name!"
return 1
fi
# Execute remote command to check the file
${REMOTE} ${host_name} "[ -e ${file_name} ]"
if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: check_remote_file() error:"\
+ error_output "check_remote_file():"\
"${file_name} does not exist in host ${host_name}!"
return 1
fi
ret_str=`${REMOTE} ${host_name} "${CL_STATUS} hbstatus" 2>&1`
if [ $? -ne 0 ]; then
if [ "${ret_str}" = "${ret_str#*stop*}" ]; then
- echo >&2 "`basename $0`: hb_running() error:"\
+ error_output "hb_running():"\
"remote command to ${host_name} error: ${ret_str}!"
return 2
else
local host_name=$1
local ret_str
- ret_str=`${REMOTE} ${host_name} "/sbin/service heartbeat stop" 2>&1`
+ ret_str=$(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin
+service heartbeat stop < /dev/null" 2>&1)
if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: stop_heartbeat() error:"\
+ error_output "stop_heartbeat():"\
"remote command to ${host_name} error: ${ret_str}!"
return 1
fi
for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
# Check Heartbeat configuration directory
if ! check_remote_file ${NODE_NAMES[idx]} ${HA_DIR}; then
- echo >&2 "`basename $0`: check_heartbeat() error:"\
+ error_output "check_heartbeat():"\
"Is Heartbeat package installed?"
return 1
fi
if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
# Check mon configuration directory
if ! check_remote_file ${NODE_NAMES[idx]} ${MON_DIR}; then
- echo >&2 "`basename $0`: check_heartbeat()"\
- "error: Is mon package installed?"
+ error_output "check_heartbeat():"\
+ "Is mon package installed?"
return 1
fi
fi
if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
# Check crm directory
if ! check_remote_file ${NODE_NAMES[idx]} ${CIB_DIR}; then
- echo >&2 "`basename $0`: check_heartbeat()"\
- "error: Is Heartbeat v2 package installed?"
+ error_output "check_heartbeat():"\
+ "Is Heartbeat v2 package installed?"
return 1
fi
fi
local ret_str
# Execute remote command to get the target server name
- ret_str=`${REMOTE} ${host_name} \
- "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1`
+ ret_str=$(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin
+${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1)
if [ $? -ne 0 ]; then
echo "`basename $0`: get_srvname() error:" \
"from host ${host_name} - ${ret_str}"
TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
${TARGET_DEVNAMES[i]})
if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_SRVNAMES[i]}"
+ error_output "${TARGET_SRVNAMES[i]}"
return 1
fi
done
touch ${TMP_DIR}$"/ha.cf."${NODE_NAMES[idx]}
scp ${HACF_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp ha.cf file"\
+ error_output "Failed to scp ha.cf file"\
"to node ${NODE_NAMES[idx]}!"
return 1
fi
python ${CIB_GEN_SCRIPT} --stdout \
${HARES_LUSTRE} > ${CIB_LUSTRE}
if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to generate cib.xml file"\
+ error_output "Failed to generate cib.xml file"\
"for node ${PRIM_NODENAME}!"
return 1
fi
/bin/cp -f ${HARES_LUSTRE} ${TMP_DIR}$"/haresources."${NODE_NAMES[idx]}
scp ${HARES_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp haresources file"\
+ error_output "Failed to scp haresources file"\
"to node ${NODE_NAMES[idx]}!"
return 1
fi
if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
scp ${CIB_LUSTRE} ${NODE_NAMES[idx]}:${CIB_DIR}/
if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp cib.xml"\
+ error_output "Failed to scp cib.xml"\
"file to node ${NODE_NAMES[idx]}!"
return 1
fi
touch ${TMP_DIR}$"/authkeys."${NODE_NAMES[idx]}
scp -p ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}/
if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp authkeys file"\
+ error_output "Failed to scp authkeys file"\
"to node ${NODE_NAMES[idx]}!"
return 1
fi
${SCRIPT_GEN_MONCF} ${params}
if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to generate mon.cf file"\
+ error_output "Failed to generate mon.cf file"\
"by using ${SCRIPT_GEN_MONCF}!"
return 1
fi
scp ${MONCF_LUSTRE} ${NODE_NAMES[idx]}:${MON_DIR}/
if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp mon.cf file"\
+ error_output "Failed to scp mon.cf file"\
"to node ${NODE_NAMES[idx]}!"
return 1
fi
# Here we expect the csv file
if [ $# -eq 0 ]; then
- echo >&2 "`basename $0`: Missing csv file!"
+ error_output "Missing csv file!"
usage
fi
+CSV_FILE=$1
+
# check_lvm_item index
#
# Check the items required for managing LVM device ${LVM_NAME[index]}
check_lvm_item() {
# Check argument
if [ $# -eq 0 ]; then
- echo >&2 "`basename $0`: check_lvm_item() error:"\
+ error_output "check_lvm_item():"\
"Missing argument!"
return 1
fi
# Check hostname
if [ -z "${HOST_NAME[i]}" ]; then
- echo >&2 "`basename $0`: check_lvm_item() error:"\
+ error_output "check_lvm_item():"\
"hostname item has null value!"
return 1
fi
if [ -z "${LVM_NAME[i]}" ] \
&& [ "${LINE_MARKER[i]}" != "${LV_MARKER}" -a "${OP_MODE[i]}" != "remove" ]
then
- echo >&2 "`basename $0`: check_lvm_item() error:"\
+ error_output "check_lvm_item():"\
"LVM component name item has null value!"
return 1
fi
if [ -n "${OP_MODE[i]}" ] \
&& [ "${OP_MODE[i]}" != "create" -a "${OP_MODE[i]}" != "remove" ]
then
- echo >&2 "`basename $0`: check_lvm_item() error:"\
+ error_output "check_lvm_item():"\
"Invalid operation mode item - \"${OP_MODE[i]}\"!"
return 1
fi
if [ -z "${OP_MODE[i]}" -o "${OP_MODE[i]}" = "create" ]; then
if [ "${LINE_MARKER[i]}" = "${VG_MARKER}" -a -z "${SIXTH_ITEM[i]}" ]
then
- echo >&2 "`basename $0`: check_lvm_item() error:"\
+ error_output "check_lvm_item():"\
"pv paths item of vg ${LVM_NAME[i]} has null value!"
return 1
fi
if [ "${LINE_MARKER[i]}" = "${LV_MARKER}" ]; then
if [ -z "${SIXTH_ITEM[i]}" ]; then
- echo >&2 "`basename $0`: check_lvm_item() error:"\
+ error_output "check_lvm_item():"\
"lv size item has null value!"
return 1
fi
if [ -z "${SEVENTH_ITEM[i]}" ]; then
- echo >&2 "`basename $0`: check_lvm_item() error:"\
+ error_output "check_lvm_item():"\
"vg name item has null value!"
return 1
fi
get_lvm_items() {
# Check argument
if [ $# -eq 0 ]; then
- echo >&2 "`basename $0`: get_lvm_items() error: Missing csv file!"
+ error_output "get_lvm_items(): Missing csv file!"
return 1
fi
- CSV_FILE=$1
+ local CSV_FILE=$1
local LINE line_marker
local hostname
declare -i line_num=0
# Check some required items
if ! check_lvm_item $idx; then
- echo >&2 "`basename $0`: check_lvm_item() error:"\
+ error_output "check_lvm_item():"\
"Occurred on line ${line_num} in ${CSV_FILE}."
return 1
fi
fi
;;
*)
- echo >&2 "`basename $0`: construct_lvm_cmdline() error:"\
+ error_output "construct_lvm_cmdline():"\
"Invalid operation mode - \"${OP_MODE[i]}\"!"
return 1
;;
verbose_output "Configuring LVM devices in host ${host_name}..."
verbose_output "Configure command line is: \"${LVM_CMDLINE}\""
REMOTE_CMD[pid_num]="${REMOTE} ${host_name} \"${LVM_CMDLINE}\""
- ${REMOTE} ${host_name} "(${EXPORT_PATH} ${LVM_CMDLINE})" >&2 &
+ $REMOTE $host_name "export PATH=\$PATH:/sbin:/usr/sbin; $LVM_CMDLINE" &
REMOTE_PID[pid_num]=$!
let "pid_num += 1"
for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
wait ${REMOTE_PID[${pid_num}]}
if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "`basename $0`: config_lvm() error: Failed"\
+ error_output "config_lvm(): Failed"\
"to execute \"${REMOTE_CMD[${pid_num}]}\"!"
failed_status=true
fi
# Main flow
# Check the csv file
-if ! check_file $1; then
- exit 1
-fi
+check_file $CSV_FILE || exit ${PIPESTATUS[0]}
# Get the list of nodes to be operated on
-NODES_TO_USE=$(get_nodelist)
-[ ${PIPESTATUS[0]} -ne 0 ] && echo >&2 "${NODES_TO_USE}" && exit 1
+NODES_TO_USE=$(get_nodelist) || error_exit ${PIPESTATUS[0]} "$NODES_TO_USE"
# Check the node list
check_nodelist ${NODES_TO_USE} || exit 1
# Here we expect the csv file
if [ $# -eq 0 ]; then
- echo >&2 "`basename $0`: Missing csv file!"
+ error_output "Missing csv file!"
usage
fi
+CSV_FILE=$1
+
# check_md_item index
#
# Check the items required for managing MD device ${MD_NAME[index]}
check_md_item() {
# Check argument
if [ $# -eq 0 ]; then
- echo >&2 "`basename $0`: check_md_item() error:"\
+ error_output "check_md_item():"\
"Missing argument!"
return 1
fi
# Check hostname
if [ -z "${HOST_NAME[i]}" ]; then
- echo >&2 "`basename $0`: check_md_item() error:"\
+ error_output "check_md_item():"\
"hostname item has null value!"
return 1
fi
if [ -z "${OP_MODE[i]}" -o "${OP_MODE[i]}" = "create" ]; then
# Check MD device name
if [ -z "${MD_NAME[i]}" ]; then
- echo >&2 "`basename $0`: check_md_item() error:"\
+ error_output "check_md_item():"\
"md name item has null value!"
return 1
fi
if [ -z "${RAID_LEVEL[i]}" ]; then
- echo >&2 "`basename $0`: check_md_item() error:"\
+ error_output "check_md_item():"\
"raid level item of MD device ${MD_NAME[i]} has null value!"
return 1
fi
if [ -z "${MD_DEVS[i]}" ]; then
- echo >&2 "`basename $0`: check_md_item() error:"\
+ error_output "check_md_item():"\
"component devices item of ${MD_NAME[i]} has null value!"
return 1
fi
get_md_items() {
# Check argument
if [ $# -eq 0 ]; then
- echo >&2 "`basename $0`: get_md_items() error: Missing csv file!"
+ error_output "get_md_items(): Missing csv file!"
return 1
fi
- CSV_FILE=$1
+ local CSV_FILE=$1
local LINE
local hostname
declare -i line_num=0
# Check some required items
if ! check_md_item $idx; then
- echo >&2 "`basename $0`: check_md_item() error:"\
+ error_output "check_md_item():"\
"Occurred on line ${line_num} in ${CSV_FILE}."
return 1
fi
ret_str=$(${REMOTE} ${host_name} "${cmd}" 2>&1)
if [ ${PIPESTATUS[0]} -ne 0 ]; then
if [ -n "${ret_str}" ]; then
- echo >&2 "`basename $0`: md_is_active() error:"\
+ error_output "md_is_active():"\
"remote command to ${host_name} error: ${ret_str}!"
return 2 # Error occurred
else
# Construct the create command line
mdadm_cmd=$(construct_mdadm_create_cmdline ${i})
if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "${mdadm_cmd}"
+ error_output "${mdadm_cmd}"
return 1
fi
verbose_output "Configuring MD devices in host ${host_name}..."
verbose_output "Configure command line is: \"${MDADM_CMDLINE}\""
REMOTE_CMD[pid_num]="${REMOTE} ${host_name} \"${MDADM_CMDLINE}\""
- ${REMOTE} ${host_name} "${MDADM_CMDLINE}" >&2 &
+ $REMOTE $host_name "export PATH=\$PATH:/sbin:/usr/sbin; $MDADM_CMDLINE" &
REMOTE_PID[pid_num]=$!
let "pid_num += 1"
sleep 1
for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
wait ${REMOTE_PID[${pid_num}]}
if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "`basename $0`: config_md() error: Failed"\
+ error_output "config_md(): Failed"\
"to execute \"${REMOTE_CMD[${pid_num}]}\"!"
failed_status=true
fi
# Main flow
# Check the csv file
-if ! check_file $1; then
- exit 1
-fi
+check_file $CSV_FILE || exit ${PIPESTATUS[0]}
# Get the list of nodes to be operated on
-NODES_TO_USE=$(get_nodelist)
-[ ${PIPESTATUS[0]} -ne 0 ] && echo >&2 "${NODES_TO_USE}" && exit 1
+NODES_TO_USE=$(get_nodelist) || error_exit ${PIPESTATUS[0]} "$NODES_TO_USE"
# Check the node list
check_nodelist ${NODES_TO_USE} || exit 1
#!/bin/bash
+
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+
#
# lc_net - script for Lustre cluster network verification
#
# Here we expect the csv file
if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: Missing csv file!"
+ error_output "Missing csv file!"
usage
fi
# Get the list of nodes to be operated on
NODES_TO_USE=$(get_nodelist)
- [ ${PIPESTATUS[0]} -ne 0 ] && echo >&2 "${NODES_TO_USE}" && return 1
+ [ ${PIPESTATUS[0]} -ne 0 ] && error_output "${NODES_TO_USE}" && return 1
# Check the node list
if [ -z "${NODES_TO_USE}" ]; then
# and get the IP address of this host from ping
HOST_IPADDRS[i]=$(ping_host ${HOST_NAMES[i]})
if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "${HOST_IPADDRS[i]}"
+ error_output "${HOST_IPADDRS[i]}"
return 1
fi
cmd="ping -c1 ${HOST_NAMES[i]} 2>&1"
ret_str=$(${REMOTE} ${HOST_NAMES[i]} "${cmd}" 2>&1)
if [ ${PIPESTATUS[0]} -ne 0 -a -n "${ret_str}" ]; then
- echo >&2 "`basename $0`: remote_check() error:"\
+ error_output "remote_check():"\
"remote to ${HOST_NAMES[i]} error: ${ret_str}!"
return 1
fi
if [ -z "${ret_str}" ]; then
- echo >&2 "`basename $0`: remote_check() error:"\
+ error_output "remote_check():"\
"No results from ${HOST_NAMES[i]}! Check the network"\
"connectivity between local host and ${HOST_NAMES[i]}!"
return 1
# Check whether ${HOST_NAMES[i]} agrees with the local host
# about what its name is resolved to.
if [ "${ip_addr}" != "${HOST_IPADDRS[i]}" ]; then
- echo >&2 "`basename $0`: remote_check() error:"\
+ error_output "remote_check():"\
"Local host resolves ${HOST_NAMES[i]} to IP address"\
"\"${HOST_IPADDRS[i]}\", while its own resolution is"\
"\"${ip_addr}\". They are not the same!"
# Usage
usage() {
- cat >&2 <<EOF
+ cat <<EOF
-Usage: `basename $0` [options] <csv file>
+Usage: $(basename $0) [options] <-a|-w|-x> <csv file>
This script is used to format and set up multiple lustre servers from a
csv file.
(separated by commas) for each target in a Lustre cluster
EOF
- exit 1
}
# Samples
. @scriptlibdir@/lc_common
#***************************** Global variables *****************************#
-declare -a MGS_NODENAME # node names of the MGS servers
-declare -a MGS_IDX # indexes of MGSs in the global arrays
-declare -i MGS_NUM # number of MGS servers in the cluster
-declare -i INIT_IDX
-
declare -a NODE_NAMES # node names in the failover group
declare -a TARGET_OPTS # target services in one failover group
-# All the items in the csv file
-declare -a HOST_NAME MODULE_OPTS DEVICE_NAME MOUNT_POINT DEVICE_TYPE FS_NAME
-declare -a MGS_NIDS INDEX FORMAT_OPTIONS MKFS_OPTIONS MOUNT_OPTIONS FAILOVERS
-
-# Heartbeat software requires that node names in the configuration directive
-# must (normally) match the "uname -n" of that machine. Since the value of the
-# "failover nids" field in the csv file is the NID(s) of failover partner node,
-# we have to figure out the corresponding hostname of that node.
-declare -a FAILOVERS_NAMES
-
-VERIFY_CONNECT=true
CONFIG_MD_LVM=false
MODIFY_FSTAB=true
UPGRADE_TARGET=false
-VERBOSE_OUTPUT=false
# Get and check the positional parameters
while getopts "aw:x:t:ndfmuhv" OPTION; do
case $OPTION in
if [ "${HATYPE_OPT}" != "${HBVER_HBV1}" ] \
&& [ "${HATYPE_OPT}" != "${HBVER_HBV2}" ] \
&& [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
- echo >&2 $"`basename $0`: Invalid HA software type" \
+ error_output "Invalid HA software type" \
"- ${HATYPE_OPT}!"
- usage
+ usage 1>&2
+ exit 1
fi
;;
n)
UPGRADE_TARGET=true
;;
h)
+ usage
sample
;;
v)
VERBOSE_OUTPUT=true
;;
?)
- usage
+ usage 1>&2
+ exit 1
+ ;;
esac
done
# Here we expect the csv file
if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: Missing csv file!"
- usage
+ error_output "Missing csv file!"
+ usage 1>&2
+ exit 1
fi
-# Check the items required for OSTs, MDTs and MGS
-#
-# When formatting an OST, the following items: hostname, module_opts,
-# device name, device type and mgs nids, cannot have null value.
-#
-# When formatting an MDT or MGS, the following items: hostname,
-# module_opts, device name and device type, cannot have null value.
-check_item() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: check_item() error: Missing argument"\
- "for function check_item()!"
- return 1
- fi
-
- declare -i i=$1
-
- # Check hostname, module_opts, device name and device type
- if [ -z "${HOST_NAME[i]}" ]||[ -z "${MODULE_OPTS[i]}" ]\
- ||[ -z "${DEVICE_NAME[i]}" ]||[ -z "${DEVICE_TYPE[i]}" ]; then
- echo >&2 $"`basename $0`: check_item() error: Some required"\
- "item has null value! Check hostname, module_opts,"\
- "device name and device type!"
- return 1
- fi
-
- # Check mgs nids
- if [ "${DEVICE_TYPE[i]}" = "ost" ]&&[ -z "${MGS_NIDS[i]}" ]; then
- echo >&2 $"`basename $0`: check_item() error: OST's mgs nids"\
- "item has null value!"
- return 1
- fi
-
- # Check mount point
- if [ -z "${MOUNT_POINT[i]}" ]; then
- echo >&2 $"`basename $0`: check_item() error: mount"\
- "point item of target ${DEVICE_NAME[i]} has null value!"
- return 1
- fi
-
- return 0
-}
-
-# Get the number of MGS nodes in the cluster
-get_mgs_num() {
- INIT_IDX=0
- MGS_NUM=${#MGS_NODENAME[@]}
- [ -z "${MGS_NODENAME[0]}" ] && let "INIT_IDX += 1" \
- && let "MGS_NUM += 1"
-}
-
-# is_mgs_node hostname
-# Verify whether @hostname is a MGS node
-is_mgs_node() {
- local host_name=$1
- declare -i i
-
- get_mgs_num
- for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
- [ "${MGS_NODENAME[i]}" = "${host_name}" ] && return 0
- done
-
- return 1
-}
-
-# Check whether the MGS nodes are in the same failover group
-check_mgs_group() {
- declare -i i
- declare -i j
- declare -i idx
- local mgs_node
-
- get_mgs_num
- for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
- mgs_node=${MGS_NODENAME[i]}
- for ((j = ${INIT_IDX}; j < ${MGS_NUM}; j++)); do
- [ "${MGS_NODENAME[j]}" = "${mgs_node}" ] && continue 1
-
- idx=${MGS_IDX[j]}
- if [ "${FAILOVERS_NAMES[idx]#*$mgs_node*}" = "${FAILOVERS_NAMES[idx]}" ]
- then
- echo >&2 $"`basename $0`: check_mgs_group() error:"\
- "MGS node ${mgs_node} is not in the ${HOST_NAME[idx]}"\
- "failover group!"
- return 1
- fi
- done
- done
-
- return 0
-}
-
-# Get and check MGS servers.
-# There should be no more than one MGS specified in the entire csv file.
-check_mgs() {
- declare -i i
- declare -i j
- declare -i exp_idx # Index of explicit MGS servers
- declare -i imp_idx # Index of implicit MGS servers
- local is_exp_mgs is_imp_mgs
- local mgs_node
-
- # Initialize the MGS_NODENAME and MGS_IDX arrays
- unset MGS_NODENAME
- unset MGS_IDX
-
- exp_idx=1
- imp_idx=1
- for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
- is_exp_mgs=false
- is_imp_mgs=false
-
- # Check whether this node is an explicit MGS node
- # or an implicit one
- if [ "${DEVICE_TYPE[i]#*mgs*}" != "${DEVICE_TYPE[i]}" ]; then
- verbose_output "Explicit MGS target" \
- "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
- is_exp_mgs=true
- fi
-
- if [ "${DEVICE_TYPE[i]}" = "mdt" -a -z "${MGS_NIDS[i]}" ]; then
- verbose_output "Implicit MGS target" \
- "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
- is_imp_mgs=true
- fi
-
- # Get and check MGS servers
- if ${is_exp_mgs} || ${is_imp_mgs}; then
- # Check whether more than one MGS target in one MGS node
- if is_mgs_node ${HOST_NAME[i]}; then
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "More than one MGS target in the same node -"\
- "\"${HOST_NAME[i]}\"!"
- return 1
- fi
-
- # Get and check primary MGS server and backup MGS server
- if [ "${FORMAT_OPTIONS[i]}" = "${FORMAT_OPTIONS[i]#*noformat*}" ]
- then
- # Primary MGS server
- if [ -z "${MGS_NODENAME[0]}" ]; then
- if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
- || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "There exist both explicit and implicit MGS"\
- "targets in the csv file!"
- return 1
- fi
- MGS_NODENAME[0]=${HOST_NAME[i]}
- MGS_IDX[0]=$i
- else
- mgs_node=${MGS_NODENAME[0]}
- if [ "${FAILOVERS_NAMES[i]#*$mgs_node*}" = "${FAILOVERS_NAMES[i]}" ]
- then
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "More than one primary MGS nodes in the csv" \
- "file - ${MGS_NODENAME[0]} and ${HOST_NAME[i]}!"
- else
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "MGS nodes ${MGS_NODENAME[0]} and ${HOST_NAME[i]}"\
- "are failover pair, one of them should use"\
- "\"--noformat\" in the format options item!"
- fi
- return 1
- fi
- else # Backup MGS server
- if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
- || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "There exist both explicit and implicit MGS"\
- "targets in the csv file!"
- return 1
- fi
-
- if ${is_exp_mgs}; then # Explicit MGS
- MGS_NODENAME[exp_idx]=${HOST_NAME[i]}
- MGS_IDX[exp_idx]=$i
- exp_idx=$(( exp_idx + 1 ))
- else # Implicit MGS
- MGS_NODENAME[imp_idx]=${HOST_NAME[i]}
- MGS_IDX[imp_idx]=$i
- imp_idx=$(( imp_idx + 1 ))
- fi
- fi
- fi #End of "if ${is_exp_mgs} || ${is_imp_mgs}"
- done
-
- # Check whether the MGS nodes are in the same failover group
- if ! check_mgs_group; then
- return 1
- fi
-
- return 0
-}
+CSV_FILE=$1
# Construct the command line of mkfs.lustre
construct_mkfs_cmdline() {
# Check argument
if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
+ error_output "construct_mkfs_cmdline():"\
"Missing argument for function construct_mkfs_cmdline()!"
return 1
fi
MKFS_CMD="$MKFS_CMD --mgs --mdt"
;;
*)
- echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
+ error_output "construct_mkfs_cmdline():"\
"Invalid device type - \"${DEVICE_TYPE[i]}\"!"
return 1
;;
get_nodenames() {
# Check argument
if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: get_nodenames() error: Missing"\
+ error_output "get_nodenames(): Missing"\
"argument for function get_nodenames()!"
return 1
fi
do
NODE_NAMES[idx]=$(nids2hostname ${nids})
if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "${NODE_NAMES[idx]}"
+ error_output "${NODE_NAMES[idx]}"
return 1
fi
HOSTNAME_OPT=${HOST_NAME[i]}
if ! get_nodenames $i; then
- echo >&2 $"`basename $0`: gen_ha_config() error: Can not get the"\
+ error_output "gen_ha_config(): Can not get the"\
"failover nodenames from failover nids - \"${FAILOVERS[i]}\" in"\
"the \"${HOST_NAME[i]}\" failover group!"
return 1
return 0
}
-# Get all the items in the csv file and do some checks.
-get_items() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: get_items() error: Missing argument"\
- "for function get_items()!"
- return 1
- fi
-
- CSV_FILE=$1
- local LINE
- local marker
- local hostname
- declare -i line_num=0
- declare -i idx=0
-
- exec 9< ${CSV_FILE}
- while read -u 9 -r LINE; do
- line_num=${line_num}+1
- # verbose_output "Parsing line ${line_num}: $LINE"
-
- # Get rid of the empty line
- if [ -z "`echo ${LINE}|awk '/[[:alnum:]]/ {print $0}'`" ]; then
- continue
- fi
-
- # Get rid of the comment line
- if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ]
- then
- continue
- fi
-
- # Skip the Linux MD/LVM line
- marker=$(echo ${LINE} | cut -d, -f 2)
- if [ "${marker}" = "${MD_MARKER}" -o "${marker}" = "${PV_MARKER}" ] \
- || [ "${marker}" = "${VG_MARKER}" -o "${marker}" = "${LV_MARKER}" ]; then
- continue
- fi
-
- # Skip the host which is not specified in the host list
- if ! ${USE_ALLNODES}; then
- hostname=$(echo ${LINE} | cut -d, -f 1)
- ! host_in_hostlist ${hostname} ${NODES_TO_USE} && continue
- fi
-
- # Parse the config line into CONFIG_ITEM
- if ! parse_line "$LINE"; then
- echo >&2 $"`basename $0`: parse_line() error: Occurred"\
- "on line ${line_num} in ${CSV_FILE}: $LINE"
- return 1
- fi
-
- HOST_NAME[idx]=${CONFIG_ITEM[0]}
- MODULE_OPTS[idx]=${CONFIG_ITEM[1]}
- DEVICE_NAME[idx]=${CONFIG_ITEM[2]}
- MOUNT_POINT[idx]=${CONFIG_ITEM[3]}
- DEVICE_TYPE[idx]=${CONFIG_ITEM[4]}
- FS_NAME[idx]=${CONFIG_ITEM[5]}
- MGS_NIDS[idx]=${CONFIG_ITEM[6]}
- INDEX[idx]=${CONFIG_ITEM[7]}
- FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]}
- MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]}
- MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]}
- FAILOVERS[idx]=${CONFIG_ITEM[11]}
-
- MODULE_OPTS[idx]=`echo "${MODULE_OPTS[idx]}" | sed 's/"/\\\"/g'`
-
- # Convert IP addresses in NIDs to hostnames
- FAILOVERS_NAMES[idx]=$(ip2hostname_multi_node ${FAILOVERS[idx]})
- if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "${FAILOVERS_NAMES[idx]}"
- return 1
- fi
-
- # Check some required items for formatting target
- if ! check_item $idx; then
- echo >&2 $"`basename $0`: check_item() error:"\
- "Occurred on line ${line_num} in ${CSV_FILE}."
- return 1
- fi
-
- idx=${idx}+1
- done
-
- return 0
-}
-
-# check_lnet_connect hostname_index mgs_hostname
-# Check whether the target node can contact the MGS node @mgs_hostname
-# If @mgs_hostname is null, then it means the primary MGS node
-check_lnet_connect() {
- declare -i i=$1
- local mgs_node=$2
-
- local COMMAND RET_STR
- local mgs_prim_nids
- local nids_str=
- local mgs_nid
- local ping_mgs
-
- # Execute remote command to check that
- # this node can contact the MGS node
- verbose_output "Checking lnet connectivity between" \
- "${HOST_NAME[i]} and the MGS node ${mgs_node}"
- mgs_prim_nids=`echo ${MGS_NIDS[i]} | awk -F: '{print $1}'`
-
- if [ -z "${mgs_node}" -o $MGS_NUM -eq 1 ]; then
- nids_str=${mgs_prim_nids} # nids of primary MGS node
- if [ -z "${nids_str}" ]; then
- echo >&2 $"`basename $0`: check_lnet_connect() error:"\
- "Check the mgs nids item of host ${HOST_NAME[i]}!"\
- "Missing nids of the primary MGS node!"
- return 1
- fi
- else
- # Get the corresponding NID(s) of the MGS node ${mgs_node}
- # from the "mgs nids" field
- nids_str=$(get_mgs_nids ${mgs_node} ${MGS_NIDS[i]})
- if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "${nids_str}"
- return 1
- fi
- fi
-
- ping_mgs=false
- for mgs_nid in ${nids_str//,/ }
- do
- COMMAND=$"${LCTL} ping ${mgs_nid} 5 || echo failed 2>&1"
- RET_STR=$(${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1)
- if [ ${PIPESTATUS[0]} -eq 0 -a "${RET_STR}" = "${RET_STR#*failed*}" ]
- then
- # This node can contact the MGS node
- verbose_output "${HOST_NAME[i]} can contact the MGS" \
- "node ${mgs_node} by using nid \"${mgs_nid}\"!"
- ping_mgs=true
- break
- fi
- done
-
- if ! ${ping_mgs}; then
- echo >&2 "`basename $0`: check_lnet_connect() error:" \
- "${HOST_NAME[i]} cannot contact the MGS node ${mgs_node}"\
- "with nids - \"${nids_str}\"! Check ${LCTL} command!"
- return 1
- fi
-
- return 0
-}
-
-# Start lnet network in the cluster node and check that
-# this node can contact the MGS node
-check_lnet() {
- if ! ${VERIFY_CONNECT}; then
- return 0
- fi
-
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: check_lnet() error: Missing"\
- "argument for function check_lnet()!"
- return 1
- fi
-
- declare -i i=$1
- declare -i j
- local COMMAND RET_STR
-
- # Execute remote command to start lnet network
- verbose_output "Starting lnet network in ${HOST_NAME[i]}"
- COMMAND="PATH=\$PATH:/sbin:/usr/sbin modprobe lnet; ${LCTL} network up 2>&1"
- RET_STR=$(${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1)
- if [ ${PIPESTATUS[0]} -ne 0 -o "${RET_STR}" = "${RET_STR#*LNET configured*}" ]
- then
- echo >&2 "`basename $0`: check_lnet() error: remote" \
- "${HOST_NAME[i]} error: ${RET_STR}"
- return 1
- fi
-
- if is_mgs_node ${HOST_NAME[i]}; then
- return 0
- fi
-
- # Execute remote command to check that
- # this node can contact the MGS node
- for ((j = 0; j < ${MGS_NUM}; j++)); do
- if ! check_lnet_connect $i ${MGS_NODENAME[j]}; then
- return 1
- fi
- done
-
- return 0
-}
-
-# Start lnet network in the MGS node
-start_mgs_lnet() {
- declare -i i
- declare -i idx
- local COMMAND
-
- if [ -z "${MGS_NODENAME[0]}" -a -z "${MGS_NODENAME[1]}" ]; then
- if ${USE_ALLNODES}; then
- verbose_output "There is no MGS target in the ${CSV_FILE} file."
- else
- verbose_output "There is no MGS target in the node list \"${NODES_TO_USE}\"."
- fi
- return 0
- fi
-
- for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
- # Execute remote command to add lnet options lines to
- # the MGS node's modprobe.conf/modules.conf
- idx=${MGS_IDX[i]}
- COMMAND=$"echo \"${MODULE_OPTS[${idx}]}\"|${MODULE_CONFIG}"
- verbose_output "Adding lnet module options to ${MGS_NODENAME[i]}"
- ${REMOTE} ${MGS_NODENAME[i]} "${COMMAND}" >&2
- if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "`basename $0`: start_mgs_lnet() error:"\
- "Failed to execute remote command to" \
- "add module options to ${MGS_NODENAME[i]}!"\
- "Check ${MODULE_CONFIG}!"
- return 1
- fi
-
- # Start lnet network in the MGS node
- if ! check_lnet ${idx}; then
- return 1
- fi
- done
-
- return 0
-}
-
# Execute remote command to add lnet options lines to remote nodes'
# modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets
mass_config() {
declare -a REMOTE_CMD
declare -i pid_num=0
declare -i i=0
+ local checked_hosts=""
if [ ${#HOST_NAME[@]} -eq 0 ]; then
- verbose_output "There are no lustre targets specified."
+ verbose_output "There are no Lustre targets specified."
return 0
fi
"${HOST_NAME[i]}"
${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "`basename $0`: mass_config() error:"\
+ error_output "mass_config():"\
"Failed to execute remote command to"\
"create the mountpoint on ${HOST_NAME[i]}!"
return 1
fi
- if ! $UPGRADE_TARGET && ! is_mgs_node ${HOST_NAME[i]}; then
+ if ! $UPGRADE_TARGET && ! is_mgs_node ${HOST_NAME[i]} && \
+ ! host_in_hostlist ${HOST_NAME[i]} $checked_hosts; then
# Execute remote command to add lnet options lines to
# modprobe.conf/modules.conf
- COMMAND=$"echo \"${MODULE_OPTS[i]}\"|${MODULE_CONFIG}"
- verbose_output "Adding lnet module options to" \
- "${HOST_NAME[i]}"
- ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
- if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "`basename $0`: mass_config() error:"\
- "Failed to execute remote command to"\
- "add module options to ${HOST_NAME[i]}!"
- return 1
- fi
+ add_module_options $i ${HOST_NAME[i]} || return ${PIPESTATUS[0]}
# Check lnet networks
- if ! check_lnet $i; then
- return 1
- fi
+ check_lnet $i || return ${PIPESTATUS[0]}
+
+ checked_hosts="$checked_hosts,${HOST_NAME[i]}"
fi
# Execute remote command to format or upgrade Lustre target
$UPGRADE_TARGET && OP="Upgrading" || OP="Formatting"
verbose_output "$OP Lustre target ${DEVICE_NAME[i]} on ${HOST_NAME[i]}..."
- COMMAND="$EXPORT_PATH $MKFS_CMD"
+ COMMAND="export PATH=\$PATH:/sbin:/usr/sbin; $MKFS_CMD"
REMOTE_CMD[$pid_num]="$REMOTE ${HOST_NAME[i]} \"$COMMAND\""
verbose_output "$OP command line is: ${REMOTE_CMD[$pid_num]}"
for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
wait ${REMOTE_PID[${pid_num}]}
if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "`basename $0`: mass_config() error: Failed"\
+ error_output "mass_config(): Failed"\
"to execute \"${REMOTE_CMD[${pid_num}]}\"!"
fail_exit_status=true
fi
# Get mount options
if [ -n "${MOUNT_OPTIONS[i]}" ]; then
# The mount options already specified in the csv file.
- mntopts=${MOUNT_OPTIONS[i]}
+ mntopts="${MOUNT_OPTIONS[i]}"
else
mntopts=$(get_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]}\
${FAILOVERS[i]})
if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "${mntopts}"
+ error_output "${mntopts}"
return 1
fi
fi
echo -e \"${mntent}\" >> \$(fcanon /etc/fstab)"
${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
if [ ${PIPESTATUS[0]} -ne 0 ]; then
- echo >&2 "`basename $0`: modify_fstab() error:"\
+ error_output "modify_fstab():"\
"Failed to modify /etc/fstab of host ${HOST_NAME[i]}"\
"to add Lustre target ${DEVICE_NAME[i]}!"
return 1
return 0
}
-# Main flow
+#********************************* Main Flow **********************************#
+
# Check the csv file
-if ! check_file $1; then
- exit 1
-fi
+check_file $CSV_FILE || exit ${PIPESTATUS[0]}
# Get the list of nodes to be operated on
-NODES_TO_USE=$(get_nodelist)
-[ ${PIPESTATUS[0]} -ne 0 ] && echo >&2 "${NODES_TO_USE}" && exit 1
+NODES_TO_USE=$(get_nodelist) || error_exit ${PIPESTATUS[0]} "$NODES_TO_USE"
# Check the node list
-check_nodelist ${NODES_TO_USE} || exit 1
+check_nodelist $NODES_TO_USE || exit ${PIPESTATUS[0]}
if ${VERIFY_CONNECT}; then
# Check the network connectivity and hostnames
fi
# Configure the Lustre cluster
-echo "`basename $0`: ******** Lustre cluster configuration START ********"
-if ! get_items ${CSV_FILE}; then
- exit 1
-fi
+echo "`basename $0`: ******** Lustre cluster configuration BEGIN ********"
-if ! check_mgs; then
- exit 1
-fi
+get_lustre_items $CSV_FILE || exit ${PIPESTATUS[0]}
-if ! mass_config; then
- exit 1
-fi
+check_mgs || exit ${PIPESTATUS[0]}
-if ! modify_fstab; then
- exit 1
-fi
+# Format or upgrade Lustre server targets
+mass_config || exit ${PIPESTATUS[0]}
+
+# Modify /etc/fstab to add the new Lustre server targets
+modify_fstab || exit ${PIPESTATUS[0]}
# Produce HA software's configuration files
if ! config_ha; then
#!/bin/bash
+
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+
#
# lustre_createcsv - generate a csv file from a running lustre cluster
#
# Usage
usage() {
- cat >&2 <<EOF
+ cat <<EOF
Usage: `basename $0` [-t HAtype] [-d] [-h] [-v] [-f csv_filename]
Default is lustre_config.csv.
EOF
- exit 1
}
# Get the library of functions
if [ "${HATYPE_OPT}" != "${HBVER_HBV1}" ] \
&& [ "${HATYPE_OPT}" != "${HBVER_HBV2}" ] \
&& [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
- echo >&2 "`basename $0`: Invalid HA software type" \
+ error_output "Invalid HA software type" \
"- ${HATYPE_OPT}!"
- usage
+ usage 1>&2
+ exit 1
fi
;;
d) GET_MDLVM_INFO=true;;
- h) usage;;
+ h) usage && exit 0;;
v) VERBOSE_OUTPUT=true;;
f) LUSTRE_CSV_FILE=$OPTARG;;
- ?) usage
+ ?) usage 1>&2 && exit 1;;
esac
done
# Verify the local host is the MGS node
mgs_node() {
if [ ! -e ${LUSTRE_PROC_DEVICES} ]; then
- echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} does" \
+ error_output "${LUSTRE_PROC_DEVICES} does" \
"not exist. Lustre kernel modules may not be loaded!"
return 1
fi
if [ -z "`cat ${LUSTRE_PROC_DEVICES}`" ]; then
- echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} is" \
+ error_output "${LUSTRE_PROC_DEVICES} is" \
"empty. Lustre services may not be started!"
return 1
fi
if [ -z "`grep ${MGS_TYPE} ${LUSTRE_PROC_DEVICES}`" ]; then
- echo >&2 "`basename $0`: error: This node is not a MGS node." \
+ error_output "This node is not a MGS node." \
"The script should be run on the MGS node!"
return 1
fi
fi
if [ ! -e ${LNET_PROC_PEERS} ]; then
- echo >&2 "`basename $0`: error: ${LNET_PROC_PEERS} does not" \
+ error_output "${LNET_PROC_PEERS} does not" \
"exist. LNET kernel modules may not be loaded" \
"or LNET network may not be up!"
return 1
# Get the hostnames of the nodes
for ((idx = 1, i = 1; idx < ${#HOST_NIDS[@]}; idx++, i++)); do
if [ -z "${HOST_NIDS[idx]}" ]; then
- echo >&2 "`basename $0`: get_hostnames() error:" \
+ error_output "get_hostnames():" \
"Invalid nid - \"${HOST_NIDS[idx]}\"!"
return 1
fi
HOST_NAMES[i]=$(nid2hostname ${HOST_NIDS[idx]})
if [ $? -ne 0 ]; then
- echo >&2 "${HOST_NAMES[i]}"
+ error_output "${HOST_NAMES[i]}"
return 1
fi
if [ "${first_item}" != "${first_item#devices=}" ]; then
MD_DEVS[j]=`echo "${line}" | sed -e 's/devices=//' -e 's/,/ /g'`
fi
- done < <(${REMOTE} ${host_name} "${MDADM} --detail --scan --verbose")
+ done < <(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin
+ ${MDADM} --detail --scan --verbose")
if [ $i -eq 0 ]; then
verbose_output "There are no active MD devices" \
local cmd ret_str
# Execute remote command to get all the PV informations.
- cmd="${EXPORT_PATH} pvdisplay -c | awk -F: '{print \$1}' | xargs"
+ cmd="PATH=\$PATH:/sbin:/usr/sbin \
+pvdisplay -c | awk -F: '{print \$1}' | xargs"
ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
if [ $? -ne 0 ]; then
if [ -n "${ret_str}" ]; then
- echo >&2 "`basename $0`: get_pv_configs() error:" \
+ error_output "get_pv_configs():" \
"remote command to ${host_name} error: ${ret_str}"
else
remote_error "get_pv_configs" ${host_name}
local cmd ret_str
# Execute remote command to get the PV names.
- cmd="${EXPORT_PATH} vgdisplay -v ${vg_name} 2>/dev/null\
+ cmd="PATH=\$PATH:/sbin:/usr/sbin vgdisplay -v ${vg_name} 2>/dev/null\
| grep \"PV Name\" | awk '{print \$3}' | xargs"
ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
if [ $? -ne 0 ]; then
unset VG_PVNAMES
# Execute remote command to get all the VG names.
- cmd="${EXPORT_PATH} vgdisplay \
+ cmd="PATH=\$PATH:/sbin:/usr/sbin vgdisplay \
| grep \"VG Name\" | awk '{print \$3}' | xargs"
ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
if [ $? -ne 0 ]; then
if [ -n "${ret_str}" ]; then
- echo >&2 "`basename $0`: get_vg_configs() error:" \
+ error_output "get_vg_configs():" \
"remote command to ${host_name} error: ${ret_str}"
else
remote_error "get_vg_configs" ${host_name}
VG_NAME[i]=${vg_name}
VG_PVNAMES[i]=$(get_vg_pvnames ${host_name} ${VG_NAME[i]})
if [ $? -ne 0 ]; then
- echo >&2 "${VG_PVNAMES[i]}"
+ error_output "${VG_PVNAMES[i]}"
return 1
fi
let "i += 1"
LV_SIZE[i]=`echo "${line}" | awk -F: '{print $7}' | sed -e 's/.*/&K/'`
let "i += 1"
- done < <(${REMOTE} ${host_name} "${EXPORT_PATH} lvdisplay -c")
+ done < <(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin lvdisplay -c")
if [ $i -eq 0 ]; then
verbose_output "There are no LVs in the host ${host_name}"
# Execute remote command to get the kernel version
ret_str=`${REMOTE} ${host_name} "uname -r" 2>&1`
if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo >&2 "`basename $0`: get_module_opts() error:" \
+ error_output "get_module_opts():" \
"remote command error: ${ret_str}"
return 1
fi
ret_str=`${REMOTE} ${host_name} \
"grep ${target_svname} ${res_file}" 2>&1`
if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo >&2 "`basename $0`: is_ha_target() error:" \
+ error_output "is_ha_target():" \
"remote command error: ${ret_str}"
return 1
fi
done < <(${REMOTE} ${host_name} "cat ${HA_CF}")
if [ -z "${HB_CHANNELS}" ]; then
- echo >&2 "`basename $0`: get_hb_configs() error:" \
+ error_output "get_hb_configs():" \
"There are no heartbeat channel configs in ${HA_CF}" \
"of host ${host_name} or ${HA_CF} does not exist!"
return 0
done < <(${REMOTE} ${host_name} "cat ${HA_RES}")
if [ -z "${SRV_IPADDRS}" ]; then
- echo >&2 "`basename $0`: get_hb_configs() error: There"\
+ error_output "get_hb_configs(): There"\
"are no service address in ${HA_RES} of host"\
"${host_name} or ${HA_RES} does not exist!"
return 0
# Execute remote command to get Heartbeat channel
HB_CHANNELS=$(get_cluman_channel ${host_name})
if [ $? -ne 0 ]; then
- echo >&2 "${HB_CHANNELS}"
+ error_output "${HB_CHANNELS}"
fi
# Execute remote command to get service IP address
SRV_IPADDRS=$(get_cluman_srvaddr ${host_name} \
${TARGET_SVNAMES[i]})
if [ $? -ne 0 ]; then
- echo >&2 "${SRV_IPADDRS}"
+ error_output "${SRV_IPADDRS}"
return 0
fi
let "i += 1"
let "j += 1"
else
- echo >&2 "`basename $0`: get_svnames() error: Invalid"\
+ error_output "get_svnames(): Invalid"\
"line in ${host_name}'s ${LUSTRE_PROC_DEVICES}"\
"- \"${line}\"!"
return 1
if [ "${target_svname}" = "${MGS_SVNAME}" ]; then
# Execute remote command to get the device name of mgs target
ret_str=`${REMOTE} ${host_name} \
- "/sbin/findfs LABEL=${target_svname}" 2>&1`
+ "PATH=\$PATH:/sbin:/usr/sbin findfs LABEL=${target_svname}" 2>&1`
if [ $? -ne 0 -a -n "${ret_str}" ]; then
if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]
then
# Execute remote command to get the device size
ret_str=`${REMOTE} ${host_name} \
- "/sbin/blockdev --getsize ${target_devname}" 2>&1`
+ "PATH=\$PATH:/sbin:/usr/sbin blockdev --getsize ${target_devname}" 2>&1`
if [ $? -ne 0 -a -n "${ret_str}" ]; then
echo "`basename $0`: get_devsize() error:" \
"remote command error: ${ret_str}"
# Execute remote command to get the real device name
ret_str=`${REMOTE} ${host_name} \
- "/sbin/losetup ${loop_dev}" 2>&1`
+ "PATH=\$PATH:/sbin:/usr/sbin losetup ${loop_dev}" 2>&1`
if [ $? -ne 0 -a -n "${ret_str}" ]; then
echo "`basename $0`: get_realdevname() error:" \
"remote command error: ${ret_str}"
TARGET_DEVNAMES[i]=$(get_devname ${host_name} \
${TARGET_SVNAMES[i]})
if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVNAMES[i]}"
+ error_output "${TARGET_DEVNAMES[i]}"
return 1
fi
"target in ${host_name}."
continue
else
- echo >&2 "`basename $0`: get_devname() error:"\
+ error_output "get_devname():"\
"No device corresponding to target" \
"${TARGET_SVNAMES[i]} in ${host_name}!"
return 1
TARGET_MNTPNTS[i]=$(get_mntpnt ${host_name} \
${TARGET_DEVNAMES[i]})
if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_MNTPNTS[i]}"
+ error_output "${TARGET_MNTPNTS[i]}"
return 1
fi
TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \
${TARGET_DEVNAMES[i]})
if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVSIZES[i]}"
+ error_output "${TARGET_DEVSIZES[i]}"
return 1
fi
TARGET_DEVNAMES[i]=$(get_realdevname ${host_name} \
${TARGET_DEVNAMES[i]})
if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVNAMES[i]}"
+ error_output "${TARGET_DEVNAMES[i]}"
return 1
fi
fi
"ost") let "ret = $2 & LDD_F_SV_TYPE_OST";;
"mgs") let "ret = $2 & LDD_F_SV_TYPE_MGS";;
"*")
- echo >&2 "`basename $0`: is_target() error: Invalid" \
+ error_output "is_target(): Invalid" \
"target service type - \"$1\"!"
return 1
;;
stripe_count=`echo ${ret_str} | awk '{print $1}'`
fi
- if [ -z "`echo ${stripe_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
+ if [ "$stripe_count" != "-1" ] && \
+ [ -z "`echo ${stripe_count}|awk '/^[[:digit:]]/ {print $0}'`" ]; then
echo "`basename $0`: get_stripecount() error: can't" \
"get stripe count of ${target_fsname} in ${host_name}!"
return 1
${TUNEFS} --print --verbose ${TARGET_DEVNAMES[i]} 2>/dev/null")
if [ -z "${flags}" ]; then
- echo >&2 "`basename $0`: get_ldds() error: Invalid" \
+ error_output "get_ldds(): Invalid" \
"ldd_flags of target ${TARGET_DEVNAMES[i]}" \
"in host ${host_name} - it's value is null!"\
"Check ${TUNEFS} command!"
# Get the lustre target service type
TARGET_DEVTYPES[i]=$(get_devtype ${flags})
if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVTYPES[i]} From device" \
+ error_output "${TARGET_DEVTYPES[i]} From device" \
"${TARGET_DEVNAMES[i]} in host ${host_name}!"
return 1
fi
# Get failover nids of the lustre target
TARGET_FAILNIDS[i]=$(get_failnids "${params}")
if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_FAILNIDS[i]} From device" \
+ error_output "${TARGET_FAILNIDS[i]} From device" \
"${TARGET_DEVNAMES[i]} in host ${host_name}!"
return 1
fi
# Get other format options of the lustre target
TARGET_FMTOPTS[i]=$(get_fmtopts ${TARGET_DEVNAMES[i]} ${host_name} "${params}")
if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_FMTOPTS[i]}"
+ error_output "${TARGET_FMTOPTS[i]}"
return 1
fi
# Get the stripe count option
stripecount_opt=$(get_stripecount_opt ${host_name} ${TARGET_FSNAMES[i]})
if [ $? -ne 0 ]; then
- echo >&2 "${stripecount_opt}"
+ error_output "${stripecount_opt}"
return 1
fi
local ret_str
# Execute remote command to get the journal inode number
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Journal inode:'" 2>&1`
+ ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \
+debugfs -R 'stats -h' ${target_devname} | grep 'Journal inode:'" 2>&1`
if [ $? -ne 0 -a -n "${ret_str}" ]; then
echo "`basename $0`: get_journalsize() error:" \
"remote command error: ${ret_str}"
fi
# Execute remote command to get the journal size
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R \
- 'stat <${journal_inode}>' ${target_devname}|grep '^User:'" 2>&1`
+ ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \
+debugfs -R 'stat <${journal_inode}>' ${target_devname}|grep '^User:'" 2>&1`
if [ $? -ne 0 -a -n "${ret_str}" ]; then
echo "`basename $0`: get_journalsize() error:" \
"remote command error: ${ret_str}"
declare -i journal_size
# Execute remote command to get the block count
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Block count:'" 2>&1`
+ ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \
+debugfs -R 'stats -h' ${target_devname} | grep 'Block count:'" 2>&1`
if [ $? -ne 0 -a -n "${ret_str}" ]; then
echo "`basename $0`: figure_journal_size() error:" \
"remote command error: ${ret_str}"
local ret_str
# Execute remote command to get the inode count
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Inode count:'" 2>&1`
+ ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \
+debugfs -R 'stats -h' ${target_devname} | grep 'Inode count:'" 2>&1`
if [ $? -ne 0 -a -n "${ret_str}" ]; then
echo "`basename $0`: get_ratio() error:" \
"remote command error: ${ret_str}"
fi
# Execute remote command to get the block count
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Block count:'" 2>&1`
+ ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \
+debugfs -R 'stats -h' ${target_devname} | grep 'Block count:'" 2>&1`
if [ $? -ne 0 -a -n "${ret_str}" ]; then
echo "`basename $0`: get_ratio() error:" \
"remote command error: ${ret_str}"
local ret_str
# Execute remote command to get the inode size
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Inode size:'" 2>&1`
+ ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \
+debugfs -R 'stats -h' ${target_devname} | grep 'Inode size:'" 2>&1`
if [ $? -ne 0 -a -n "${ret_str}" ]; then
echo "`basename $0`: get_isize() error:" \
"remote command error: ${ret_str}"
TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \
${TARGET_DEVNAMES[i]})
if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVSIZES[i]}"
+ error_output "${TARGET_DEVSIZES[i]}"
return 1
fi
fi
journal_opt=$(get_J_opt ${host_name} ${TARGET_DEVNAMES[i]} \
${TARGET_DEVSIZES[i]})
if [ $? -ne 0 ]; then
- echo >&2 "${journal_opt}"
+ error_output "${journal_opt}"
return 1
fi
ratio_opt=$(get_i_opt ${host_name} ${TARGET_DEVNAMES[i]} \
${TARGET_DEVTYPES[i]} ${TARGET_DEVSIZES[i]})
if [ $? -ne 0 ]; then
- echo >&2 "${ratio_opt}"
+ error_output "${ratio_opt}"
return 1
fi
inode_size_opt=$(get_I_opt ${host_name} ${TARGET_DEVNAMES[i]} \
${TARGET_DEVTYPES[i]} ${TARGET_FSNAMES[i]})
if [ $? -ne 0 ]; then
- echo >&2 "${inode_size_opt}"
+ error_output "${inode_size_opt}"
return 1
fi
get_configs() {
# Check the hostname
if [ -z "$1" ]; then
- echo >&2 "`basename $0`: get_configs() error:" \
+ error_output "get_configs():" \
"Missing hostname!"
return 1
fi
--- /dev/null
+#!/bin/bash
+
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+
+#
+# lustre_start - start or stop multiple Lustre servers from a CSV file
+#
+# This script is used to parse each line of a CSV (Comma-Separated Value) file
+# and execute remote command to start/stop the service on every Lustre server
+# target that will be part of the Lustre cluster.
+#
+################################################################################
+
+# Usage
+usage() {
+ cat <<EOF
+
+Usage: $(basename $0) [options] <-a|-w|-x> <CSV file>
+
+ This script is used to start or stop multiple Lustre servers from a
+ CSV file.
+
+ Options:
+ -a select all the nodes from the CSV file to operate on
+ -w hostname,hostname,...
+ select the specified list of nodes (separated by commas) to
+ operate on rather than all the nodes in the CSV file
+ -x hostname,hostname,...
+ exclude the specified list of nodes (separated by commas)
+ -n no net - don't verify network connectivity and hostnames
+ in the cluster
+ -m pass "mount options" item in the CSV file to mount command line
+ -k stop the services on Lustre server targets
+ -v verbose mode
+ -h help
+ CSV file a comma-separated value file that contains configuration
+ parameters for each target in a Lustre cluster
+
+ Please refer to "lustre_config -h" for the description of CSV file formats.
+
+EOF
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common
+
+SPECIFY_MNTOPTS=false
+STOP_SERVICE=false
+# Get and check the positional parameters
+while getopts "aw:x:nmkhv" OPTION; do
+ case $OPTION in
+ a)
+ [ -z "$SPECIFIED_NODELIST" ] && [ -z "$EXCLUDED_NODELIST" ] \
+ && USE_ALLNODES=true
+ NODELIST_OPT="$NODELIST_OPT -a"
+ ;;
+ w)
+ USE_ALLNODES=false
+ SPECIFIED_NODELIST=$OPTARG
+ NODELIST_OPT="$NODELIST_OPT -w $SPECIFIED_NODELIST"
+ ;;
+ x)
+ USE_ALLNODES=false
+ EXCLUDED_NODELIST=$OPTARG
+ NODELIST_OPT="$NODELIST_OPT -x $EXCLUDED_NODELIST"
+ ;;
+ n)
+ VERIFY_CONNECT=false
+ ;;
+ m)
+ SPECIFY_MNTOPTS=true
+ ;;
+ k)
+ STOP_SERVICE=true
+ ;;
+ h)
+ usage
+ exit 0
+ ;;
+ v)
+ VERBOSE_OPT="-v"
+ VERBOSE_OUTPUT=true
+ ;;
+ ?)
+ usage 1>&2
+ exit 1
+ ;;
+ esac
+done
+
+# Toss out the parameters we've already processed
+shift $((OPTIND - 1))
+
+# Here we expect the CSV file
+if [ $# -eq 0 ]; then
+ error_output "Missing CSV file!"
+ usage 1>&2
+ exit 1
+fi
+
+CSV_FILE=$1
+
+# get_fstab_mntopts host_name device_name mount_point
+# Get the mount options from the /etc/fstab file
+get_fstab_mntopts() {
+ local host_name=$1
+ local dev_name=$2
+ local mnt_pnt=$3
+
+ local mnt_opts=""
+
+ if [ -z "$host_name" -o -z "$dev_name" -o -z "$mnt_pnt" ]; then
+ echo "get_fstab_mntopts(): Missing argument!"
+ return 1
+ fi
+
+ # Execute remote command to get the mount options from /etc/fstab file
+ mnt_opts=$($REMOTE $host_name "grep -w ^$dev_name /etc/fstab | \
+grep -w $mnt_pnt | awk '{print \$4}'" 2>/dev/null)
+
+ mnt_opts=${mnt_opts//$host_name: /}
+
+ echo $mnt_opts
+ return 0
+}
+
+# Start the service on one Lustre server target
+start_service() {
+ declare -i i=$1
+ shift
+ local extra_mntopts="$*"
+ local mntopts=""
+
+ # Get mount options
+ if $SPECIFY_MNTOPTS; then
+ # Use the "mount options" item from the CSV file
+ [ -n "${MOUNT_OPTIONS[i]}" ] && mntopts=${MOUNT_OPTIONS[i]}
+ else
+ # Do not use the "mount options" item from the CSV file
+ mntopts=$(get_fstab_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]} \
+ ${MOUNT_POINT[i]})
+ [ ${PIPESTATUS[0]} -ne 0 ] && error_output "$mntopts" && return 1
+ fi
+
+ [ -n "$mntopts" ] && mntopts="-o $mntopts"
+ [ -n "$extra_mntopts" ] && mntopts="$mntopts $extra_mntopts"
+ # Strip of any leading space
+ mntopts=${mntopts# }
+
+ # Execute remote command to start the service
+ verbose_output "Mounting Lustre ${DEVICE_TYPE[i]} target"\
+ "${DEVICE_NAME[i]} (opts: $mntopts) on ${HOST_NAME[i]}:${MOUNT_POINT[i]}..."
+ $REMOTE ${HOST_NAME[i]} "PATH=\$PATH:/sbin:/usr/sbin
+error() { set +x; echo \"ERROR: \$2: \$1\"; echo \"XXRETCODE:\$1\"; exit \$1; }
+mkdir -p ${MOUNT_POINT[i]} || \\
+ error \${PIPESTATUS[0]} \"failed to mkdir ${MOUNT_POINT[i]}\"
+mount -t $FS_TYPE $mntopts ${DEVICE_NAME[i]} ${MOUNT_POINT[i]} || \\
+ error \${PIPESTATUS[0]} \\
+ \"failed to mount ${DEVICE_NAME[i]} on host ${HOST_NAME[i]}\""
+ return ${PIPESTATUS[0]}
+}
+
+# Stop the service on one Lustre server target
+stop_service() {
+ declare -i i=$1
+
+ # Execute remote command to stop the service
+ verbose_output "Unmounting Lustre ${DEVICE_TYPE[i]} target"\
+ "${DEVICE_NAME[i]} on ${HOST_NAME[i]}:${MOUNT_POINT[i]}..."
+ $REMOTE ${HOST_NAME[i]} "PATH=\$PATH:/sbin:/usr/sbin
+error() { set +x; echo \"ERROR: \$2: \$1\"; echo \"XXRETCODE:\$1\"; exit \$1; }
+if grep -q \" ${MOUNT_POINT[i]} \" /proc/mounts; then
+ umount -d -f ${MOUNT_POINT[i]} || \\
+ error \${PIPESTATUS[0]} \\
+ \"failed to unmount ${DEVICE_NAME[i]} on host ${HOST_NAME[i]}\"
+else
+ echo \"${DEVICE_NAME[i]} was not mounted on\"\\
+ \"${HOST_NAME[i]}:${MOUNT_POINT[i]}\"
+fi"
+ return ${PIPESTATUS[0]}
+}
+
+# mass_op op_type target_type
+# Start/stop the services on Lustre server targets in parallel
+mass_op() {
+ local op_type=$1
+ local target_type=$2
+
+ local op_func
+ declare -i i
+ declare -i pid_num=0
+ declare -a REMOTE_PID
+ local RC=0
+
+ if [ -z "$op_type" -o -z "$target_type" ]; then
+ error_output "mass_op(): Missing argument!"
+ return 1
+ fi
+
+ case "$op_type" in
+ "start") op_func=start_service;;
+ "stop") op_func=stop_service;;
+ *) error_output "mass_op(): Invalid op type \"$op_type\"!" && return 1;;
+ esac
+
+ for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+ if [ "${DEVICE_TYPE[i]}" = "$target_type" ] \
+ && [[ "${FORMAT_OPTIONS[i]}" != *noformat* ]]; then
+ eval "$op_func $i &"
+ REMOTE_PID[$pid_num]=$!
+ let pid_num=$pid_num+1
+ fi
+ done
+
+ for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
+ wait ${REMOTE_PID[${pid_num}]}
+ local RC1=${PIPESTATUS[0]}
+ [ $RC1 -ne 0 ] && RC=$RC1
+ done
+
+ [ $RC -ne 0 ] && return $RC
+
+ return 0
+}
+
+# Unload the modules to make cleanup
+unload_modules() {
+ local command
+ local host
+ local host_list
+
+ host_list=$(comma_list "${HOST_NAME[@]}")
+ [ -z "$host_list" ] && return 0
+
+ command="PATH=\$PATH:/sbin:/usr/sbin
+if grep -q libcfs /proc/modules; then
+ lctl net down 1>/dev/null 2>&1
+ lustre_rmmod
+fi"
+
+ if is_pdsh; then
+ $REMOTE $host_list "$command"
+ else
+ for host in ${host_list//,/ }; do
+ $REMOTE $host "$command"
+ done
+ fi
+}
+
+# Start the services on Lustre server targets
+mass_start() {
+ declare -i i
+ local combo_mgs_mdt=false
+
+ if [ ${#HOST_NAME[@]} -eq 0 ]; then
+ verbose_output "There are no Lustre targets specified."
+ return 0
+ fi
+
+ # Start lnet network on the MGS node
+ start_mgs_lnet || return ${PIPESTATUS[0]}
+
+ local checked_hosts=""
+ for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+ host_in_hostlist ${HOST_NAME[i]} $checked_hosts && continue
+ if ! is_mgs_node ${HOST_NAME[i]}; then
+ # Add module options to the module configuration file
+ add_module_options $i ${HOST_NAME[i]} || return ${PIPESTATUS[0]}
+
+ # Check lnet networks
+ check_lnet $i || return ${PIPESTATUS[0]}
+
+ checked_hosts="$checked_hosts,${HOST_NAME[i]}"
+ fi
+ done
+
+ # Start MGS or the MGS service on combo MGS/MDT (with "-o nosvc -n" options)
+ if [ -n "${MGS_NODENAME[0]}" ]; then
+ local idx=${MGS_IDX[0]}
+ if [ "${DEVICE_TYPE[idx]#*mdt*}" != "${DEVICE_TYPE[idx]}" ]; then
+ # Combo MGS/MDT
+ combo_mgs_mdt=true
+ start_service ${MGS_IDX[0]} "-o nosvc -n" || return ${PIPESTATUS[0]}
+ else
+ start_service ${MGS_IDX[0]} || return ${PIPESTATUS[0]}
+ fi
+ fi
+
+ # Start OST(s)
+ mass_op "start" "ost" || return ${PIPESTATUS[0]}
+
+ # Start the MDT service on combo MGS/MDT (with "-o nomgs" option)
+ if $combo_mgs_mdt; then
+ start_service ${MGS_IDX[0]} "-o nomgs" || return ${PIPESTATUS[0]}
+ fi
+
+ # Start MDT(s)
+ mass_op "start" "mdt" || return ${PIPESTATUS[0]}
+
+ verbose_output "Success on all Lustre targets!"
+ return 0
+}
+
+# Stop the services on Lustre server targets
+mass_stop() {
+ declare -i i
+
+ if [ ${#HOST_NAME[@]} -eq 0 ]; then
+ verbose_output "There are no Lustre targets specified."
+ return 0
+ fi
+
+ # Stop MDT(s)
+ mass_op "stop" "mdt" || return ${PIPESTATUS[0]}
+
+ # Stop the MDT service on combo MGS/MDT
+ if [ -n "${MGS_NODENAME[0]}" ]; then
+ local idx=${MGS_IDX[0]}
+ if [ "${DEVICE_TYPE[idx]#*mdt*}" != "${DEVICE_TYPE[idx]}" ]; then
+ # Combo MGS/MDT
+ stop_service ${MGS_IDX[0]} || return ${PIPESTATUS[0]}
+ fi
+ fi
+
+ # Stop OST(s)
+ mass_op "stop" "ost" || return ${PIPESTATUS[0]}
+
+ # Stop MGS or the MGS service on combo MGS/MDT
+ if [ -n "${MGS_NODENAME[0]}" ]; then
+ stop_service ${MGS_IDX[0]} || return ${PIPESTATUS[0]}
+ fi
+
+ unload_modules
+
+ return 0
+}
+
+#********************************* Main Flow **********************************#
+
+# Check the CSV file
+check_file $CSV_FILE || exit ${PIPESTATUS[0]}
+
+# Get the list of nodes to be operated on
+NODES_TO_USE=$(get_nodelist) || error_exit ${PIPESTATUS[0]} "$NODES_TO_USE"
+
+# Check the node list
+check_nodelist $NODES_TO_USE || exit ${PIPESTATUS[0]}
+
+# Check the network connectivity and hostnames
+if $VERIFY_CONNECT; then
+ verbose_output "Checking the cluster network connectivity and hostnames..."
+ $VERIFY_CLUSTER_NET $NODELIST_OPT $VERBOSE_OPT $CSV_FILE || \
+ exit ${PIPESTATUS[0]}
+ verbose_output "Check the cluster network connectivity and hostnames OK!"
+fi
+
+# Configure the Lustre cluster
+echo "$(basename $0): ******** Lustre cluster configuration BEGIN ********"
+
+get_lustre_items $CSV_FILE || exit ${PIPESTATUS[0]}
+
+check_mgs || exit ${PIPESTATUS[0]}
+
+if ! $STOP_SERVICE; then
+ mass_start || exit ${PIPESTATUS[0]}
+else
+ mass_stop || exit ${PIPESTATUS[0]}
+fi
+
+echo "$(basename $0): ******** Lustre cluster configuration END **********"
+
+exit 0