From 29d24e4a400a069ea6bc06f37a04b6860acc03d6 Mon Sep 17 00:00:00 2001 From: yujian Date: Tue, 16 Sep 2008 08:11:22 +0000 Subject: [PATCH] Branch HEAD b=14095 i=nathan.rutman i=brian 1) add lustre_start utility to start or stop multiple Lustre servers from a CSV file 2) fix the utility path issue in bug 14094 3) use 'error_output()' to format error messages 4) move common variables and functions to lc_common 5) fix the "lustre_config -h" issue in bug 14133 6) fix the module_opts issue in bug 14092 7) remove the duplicates from checking lnet connectivity to MGS node --- lustre/ChangeLog | 5 + lustre/scripts/Makefile.am | 3 +- lustre/scripts/lc_common | 583 +++++++++++++++++++++++++++++++++---- lustre/scripts/lc_hb.in | 64 ++-- lustre/scripts/lc_lvm.in | 37 ++- lustre/scripts/lc_md.in | 35 ++- lustre/scripts/lc_net.in | 15 +- lustre/scripts/lustre_config.in | 542 +++------------------------------- lustre/scripts/lustre_createcsv.in | 125 ++++---- lustre/scripts/lustre_start.in | 372 +++++++++++++++++++++++ 10 files changed, 1096 insertions(+), 685 deletions(-) create mode 100644 lustre/scripts/lustre_start.in diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 4ecc54e..cf138ed 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1411,6 +1411,11 @@ Description: xid & resent requests Details : Initialize RPC XID from clock at startup (randomly if clock is bad). +Severity : enhancement +Bugzilla : 14095 +Description: Add lustre_start utility to start or stop multiple Lustre servers + from a CSV file. + -------------------------------------------------------------------------------- 2007-08-10 Cluster File Systems, Inc. diff --git a/lustre/scripts/Makefile.am b/lustre/scripts/Makefile.am index 5b9b764..15d05c2 100644 --- a/lustre/scripts/Makefile.am +++ b/lustre/scripts/Makefile.am @@ -37,7 +37,8 @@ sbinscripts = lc_servip lustre_up14 lustre_rmmod # These are scripts that are generated from .in files -genscripts = lustre_config lc_modprobe lc_net lc_hb lc_cluman lustre_createcsv lc_md lc_lvm +genscripts = lustre_config lc_modprobe lc_net lc_hb lc_cluman lustre_createcsv \ + lc_md lc_lvm lustre_start sbin_SCRIPTS = $(genscripts) $(sbinscripts) bin_SCRIPTS = lustre_req_history diff --git a/lustre/scripts/lc_common b/lustre/scripts/lc_common index 8b1bcbe..2d67971 100644 --- a/lustre/scripts/lc_common +++ b/lustre/scripts/lc_common @@ -1,38 +1,37 @@ -# +#!/bin/bash + # vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: + # -# lc_common - This file contains functions to be used by most or all +# lc_common - This file contains common variables and functions to be used by # Lustre cluster config scripts. # ################################################################################ -# Remote command -REMOTE=${REMOTE:-"ssh -x -q"} -#REMOTE=${REMOTE:-"pdsh -S -R ssh -w"} -export REMOTE +#****************************** Common Variables ******************************# +export PATH=$PATH:/sbin:/usr/sbin -# Lustre utilities -CMD_PATH=${CMD_PATH:-"/usr/sbin"} -MKFS=${MKFS:-"$CMD_PATH/mkfs.lustre"} -TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"} -LCTL=${LCTL:-"$CMD_PATH/lctl"} +# Remote command +export REMOTE=${REMOTE:-"ssh -x -q"} +#export REMOTE=${REMOTE:-"pdsh -S -R ssh -w"} -EXPORT_PATH=${EXPORT_PATH:-"PATH=\$PATH:/sbin:/usr/sbin;"} +# Lustre utilities +export MKFS=${MKFS:-"mkfs.lustre"} +export TUNEFS=${TUNEFS:-"tunefs.lustre"} +export LCTL=${LCTL:-"lctl"} -# Raid command path -RAID_CMD_PATH=${RAID_CMD_PATH:-"/sbin"} -MDADM=${MDADM:-"$RAID_CMD_PATH/mdadm"} +# Software RAID command +export MDADM=${MDADM:-"mdadm"} # Some scripts to be called -SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"$(cd `dirname $0`; echo $PWD)"} -MODULE_CONFIG=${SCRIPTS_PATH}/lc_modprobe -VERIFY_CLUSTER_NET=${SCRIPTS_PATH}/lc_net -GEN_HB_CONFIG=${SCRIPTS_PATH}/lc_hb -GEN_CLUMGR_CONFIG=${SCRIPTS_PATH}/lc_cluman -SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}/lc_servip -SCRIPT_GEN_MONCF=${SCRIPTS_PATH}/lc_mon -SCRIPT_CONFIG_MD=${SCRIPTS_PATH}/lc_md -SCRIPT_CONFIG_LVM=${SCRIPTS_PATH}/lc_lvm +export MODULE_CONFIG=${MODULE_CONFIG:-"lc_modprobe"} +export VERIFY_CLUSTER_NET=${VERIFY_CLUSTER_NET:-"lc_net"} +export GEN_HB_CONFIG=${GEN_HB_CONFIG:-"lc_hb"} +export GEN_CLUMGR_CONFIG=${GEN_CLUMGR_CONFIG:-"lc_cluman"} +export SCRIPT_VERIFY_SRVIP=${SCRIPT_VERIFY_SRVIP:-"lc_servip"} +export SCRIPT_GEN_MONCF=${SCRIPT_GEN_MONCF:-"lc_mon"} +export SCRIPT_CONFIG_MD=${SCRIPT_CONFIG_MD:-"lc_md"} +export SCRIPT_CONFIG_LVM=${SCRIPT_CONFIG_LVM:-"lc_lvm"} # Variables of HA software HBVER_HBV1="hbv1" # Heartbeat version 1 @@ -62,23 +61,42 @@ FS_TYPE=${FS_TYPE:-"lustre"} # Lustre filesystem type FILE_SUFFIX=${FILE_SUFFIX:-".lustre"} # Suffix of the generated config files # Marker of the MD device line -MD_MARKER=${MD_MARKER:-"MD"} +export MD_MARKER=${MD_MARKER:-"MD"} # Marker of the LVM device line -PV_MARKER=${PV_MARKER:-"PV"} -VG_MARKER=${VG_MARKER:-"VG"} -LV_MARKER=${LV_MARKER:-"LV"} +export PV_MARKER=${PV_MARKER:-"PV"} +export VG_MARKER=${VG_MARKER:-"VG"} +export LV_MARKER=${LV_MARKER:-"LV"} -declare -a CONFIG_ITEM # Items in each line of the csv file +declare -a CONFIG_ITEM # Items in each line of the CSV file declare -a NODE_NAME # Hostnames of nodes have been configured -# Nodelist variables -USE_ALLNODES=false # default is not to operate on all the nodes -SPECIFIED_NODELIST="" # specified list of nodes to be operated on -EXCLUDED_NODELIST="" # list of nodes to be excluded +declare -a MGS_NODENAME # Node names of the MGS servers +declare -a MGS_IDX # Indexes of MGSs in the global arrays +declare -i MGS_NUM # Number of MGS servers in the cluster +declare -i INIT_IDX + +# All of the Lustre target items in the CSV file +declare -a HOST_NAME MODULE_OPTS DEVICE_NAME MOUNT_POINT DEVICE_TYPE FS_NAME +declare -a MGS_NIDS INDEX FORMAT_OPTIONS MKFS_OPTIONS MOUNT_OPTIONS FAILOVERS -export PATH=$PATH:$CMD_PATH:$SCRIPTS_PATH:$CLUMAN_TOOLS_PATH:$RAID_CMD_PATH:/sbin:/usr/sbin +# Heartbeat software requires that node names in the configuration directive +# must (normally) match the "uname -n" of that machine. Since the value of the +# "failover nids" field in the CSV file is the NID(s) of failover partner node, +# we have to figure out the corresponding hostname of that node. +declare -a FAILOVERS_NAMES +export VERIFY_CONNECT=true # Verify network connectivity by default +export USE_ALLNODES=false # Not operating on all the nodes by default +export SPECIFIED_NODELIST="" # Specified list of nodes to be operated on +export EXCLUDED_NODELIST="" # Specified list of nodes to be excluded +export NODES_TO_USE="" # Defacto list of nodes to be operated on +export NODELIST_OPT="" +export VERBOSE_OUTPUT=false +export VERBOSE_OPT="" + + +#****************************** Common Functions ******************************# # verbose_output string # Output verbose information $string @@ -89,6 +107,24 @@ verbose_output() { return 0 } +# error_output string +# Output error string to stderr, prefixing with ERROR +# for easy error parsing from the rest of the output. +error_output() { + echo >&2 "$(basename $0): ERROR: $*" + return 0 +} + +# error_exit rc string +# Output error to stderr via error_output and exit with rc. +error_exit() { + local rc=$1 + shift + + error_output $* + exit $rc +} + # Check whether the reomte command is pdsh is_pdsh() { if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then @@ -103,13 +139,13 @@ is_pdsh() { check_file() { # Check argument if [ $# -eq 0 ]; then - echo >&2 "`basename $0`: check_file() error: Missing csv file!" + error_output "check_file(): Missing CSV file!" return 1 fi - CSV_FILE=$1 + local CSV_FILE=$1 if [ ! -s ${CSV_FILE} ]; then - echo >&2 "`basename $0`: check_file() error: ${CSV_FILE}"\ + error_output "check_file(): ${CSV_FILE}"\ "does not exist or is empty!" return 1 fi @@ -118,21 +154,21 @@ check_file() { } # parse_line line -# Parse a line in the csv file +# Parse a line in the CSV file parse_line() { # Check argument if [ $# -eq 0 ]; then - echo >&2 "`basename $0`: parse_line() error: Missing argument!" + error_output "parse_line(): Missing argument!" return 1 fi declare -i i=0 # Index of the CONFIG_ITEM array - declare -i length=0 + declare -i length=0 declare -i idx=0 - declare -i s_quote_flag=0 # Flag of the single quote character + declare -i s_quote_flag=0 # Flag of the single quote character declare -i d_quote_flag=0 # Flag of the double quotes character local TMP_LETTER LINE - + LINE="$*" # Initialize the CONFIG_ITEM array @@ -239,12 +275,12 @@ remote_error() { ret_str=$* if [ "${ret_str}" != "${ret_str#*connect:*}" ]; then - echo >&2 "`basename $0`: ${fn_name}() error: ${ret_str}" + error_output "${fn_name}(): ${ret_str}" return 0 fi if [ -z "${ret_str}" ]; then - echo >&2 "`basename $0`: ${fn_name}() error:" \ + error_output "${fn_name}():" \ "No results from remote!" \ "Check network connectivity between the local host and ${host_addr}!" return 0 @@ -267,7 +303,7 @@ nid2hostname() { echo "`basename $0`: nid2hostname() error: Invalid nid - \"${nid}\"!" return 1 fi - + case "${nettype}" in lo*) host_name=`hostname`;; elan*) # QsNet @@ -365,7 +401,7 @@ ip2hostname_single_node() { echo "${host_name}" return 1 fi - + nid=${host_name}@${nettype} ;; esac @@ -449,18 +485,18 @@ exclude_items_from_list() { OUTLIST="$OUTLIST,$ITEM" fi done - + # strip leading comma echo ${OUTLIST#,} } # get_csv_nodelist csv_file -# Get the comma-separated list of all the nodes from the csv file +# Get the comma-separated list of all the nodes from the CSV file get_csv_nodelist() { local csv_file=$1 local all_nodelist - # Check the csv file + # Check the CSV file ! check_file ${csv_file} 2>&1 && return 1 all_nodelist=$(egrep -v "([[:space:]]|^)#" ${csv_file} | cut -d, -f 1) @@ -477,7 +513,7 @@ get_csv_nodelist() { get_nodelist() { local ALL_NODELIST - # Get the list of all the nodes in the csv file + # Get the list of all the nodes in the CSV file ALL_NODELIST=$(get_csv_nodelist ${CSV_FILE}) [ ${PIPESTATUS[0]} -ne 0 ] && echo "${ALL_NODELIST}" && return 1 @@ -513,9 +549,10 @@ check_nodelist() { local nodes_to_use=$1 if [ -z "${nodes_to_use}" ]; then - echo "`basename $0`: There are no hosts to be operated on."\ + error_output "There are no nodes to be operated on."\ "Check the node selection options (-a, -w or -x)." - usage + usage 1>&2 + return 1 else verbose_output "Operating on the following nodes: ${nodes_to_use}" fi @@ -548,7 +585,7 @@ nid_in_nidlist() { # get_mgs_nids mgs_hostname mgs_nids # Get the corresponding NID(s) of the MGS node ${mgs_hostname} from the -# "mgs nids" field of one lustre target in the csv file +# "mgs nids" field of one lustre target in the CSV file get_mgs_nids() { local mgs_node="$1" local all_mgs_nids="$2" @@ -565,7 +602,8 @@ get_mgs_nids() { done # Let's use lctl to get the real nids from the mgs node - ret_str=$(${REMOTE} ${mgs_node} "${LCTL} list_nids" 2>&1 &1 /dev/null" + if [ ${PIPESTATUS[0]} -eq 0 ]; then + # This node can contact the MGS node + verbose_output "${HOST_NAME[i]} can contact the MGS" \ + "node $mgs_node by using nid \"$mgs_nid\"!" + ping_mgs=true + break + fi + done + done + + if ! ${ping_mgs}; then + error_output "check_lnet_connect():" \ + "${HOST_NAME[i]} cannot contact the MGS node ${mgs_node}"\ + "with nids - \"${nids_str}\"! Check ${LCTL} command!" + return 1 + fi + + return 0 +} + +# Start lnet network in the cluster node and check that +# this node can contact the MGS node +check_lnet() { + if ! $VERIFY_CONNECT; then + return 0 + fi + + # Check argument + if [ $# -eq 0 ]; then + error_output "check_lnet(): Missing argument!" + return 1 + fi + + declare -i i=$1 + declare -i j + local ret_str + + # Execute remote command to start lnet network + verbose_output "Starting lnet network on ${HOST_NAME[i]}" + ret_str=$($REMOTE ${HOST_NAME[i]} "PATH=\$PATH:/sbin:/usr/sbin +modprobe lnet && $LCTL network up" 2>&1) + if [ ${PIPESTATUS[0]} -ne 0 ]; then + error_output "check_lnet(): start lnet network on" \ + "${HOST_NAME[i]} error: $ret_str" + return 1 + fi + + if is_mgs_node ${HOST_NAME[i]}; then + return 0 + fi + + # Execute remote command to check that + # this node can contact the MGS node + for ((j = 0; j < ${MGS_NUM}; j++)); do + if ! check_lnet_connect $i ${MGS_NODENAME[j]}; then + return 1 + fi + done + + return 0 +} + +# Start lnet network in the MGS node +start_mgs_lnet() { + declare -i i + declare -i idx + + if [ -z "${MGS_NODENAME[0]}" -a -z "${MGS_NODENAME[1]}" ]; then + if ${USE_ALLNODES}; then + verbose_output "There is no MGS target in the ${CSV_FILE} file." + else + verbose_output "There is no MGS target in the node list \"${NODES_TO_USE}\"." + fi + return 0 + fi + + for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do + # Execute remote command to add lnet options lines to + # the MGS node's modprobe.conf/modules.conf + idx=${MGS_IDX[i]} + add_module_options $idx ${MGS_NODENAME[i]} || return ${PIPESTATUS[0]} + + # Start lnet network in the MGS node + check_lnet $idx || return ${PIPESTATUS[0]} + done + + return 0 +} + +# Get all the Lustre target items in the CSV file and do some checks. +get_lustre_items() { + # Check argument + if [ $# -eq 0 ]; then + error_output "get_lustre_items(): Missing argument"\ + "for function get_lustre_items()!" + return 1 + fi + + local CSV_FILE=$1 + local LINE + local marker + local hostname + declare -i line_num=0 + declare -i idx=0 + + exec 9< ${CSV_FILE} + while read -u 9 -r LINE; do + line_num=${line_num}+1 + # verbose_output "Parsing line ${line_num}: $LINE" + + # Get rid of the empty line + [ -z "`echo ${LINE} | awk '/[[:alnum:]]/ {print $0}'`" ] && continue + + # Get rid of the comment line + [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] && continue + + # Skip the Linux MD/LVM line + marker=$(echo ${LINE} | cut -d, -f 2) + if [ "${marker}" = "${MD_MARKER}" -o "${marker}" = "${PV_MARKER}" ] \ + || [ "${marker}" = "${VG_MARKER}" -o "${marker}" = "${LV_MARKER}" ]; then + continue + fi + + # Skip the host which is not specified in the host list + if ! ${USE_ALLNODES}; then + hostname=$(echo ${LINE} | cut -d, -f 1) + ! host_in_hostlist ${hostname} ${NODES_TO_USE} && continue + fi + + # Parse the config line into CONFIG_ITEM + if ! parse_line "$LINE"; then + error_output "parse_line(): Occurred"\ + "on line ${line_num} in ${CSV_FILE}: $LINE" + return 1 + fi + + HOST_NAME[idx]=${CONFIG_ITEM[0]} + MODULE_OPTS[idx]=${CONFIG_ITEM[1]} + DEVICE_NAME[idx]=${CONFIG_ITEM[2]} + MOUNT_POINT[idx]=${CONFIG_ITEM[3]} + DEVICE_TYPE[idx]=${CONFIG_ITEM[4]} + FS_NAME[idx]=${CONFIG_ITEM[5]} + MGS_NIDS[idx]=${CONFIG_ITEM[6]} + INDEX[idx]=${CONFIG_ITEM[7]} + FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]} + MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]} + MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]} + FAILOVERS[idx]=${CONFIG_ITEM[11]} + + MODULE_OPTS[idx]=`echo "${MODULE_OPTS[idx]}" | sed 's/"/\\\"/g'` + + # Convert IP addresses in NIDs to hostnames + FAILOVERS_NAMES[idx]=$(ip2hostname_multi_node ${FAILOVERS[idx]}) + if [ ${PIPESTATUS[0]} -ne 0 ]; then + error_output "${FAILOVERS_NAMES[idx]}" + return 1 + fi + + # Check some required items for formatting target + if ! check_lustre_item $idx; then + error_output "check_lustre_item():"\ + "Occurred on line ${line_num} in ${CSV_FILE}." + return 1 + fi + + idx=${idx}+1 + done + + return 0 +} diff --git a/lustre/scripts/lc_hb.in b/lustre/scripts/lc_hb.in index 0fa1fb0..08a8661 100644 --- a/lustre/scripts/lc_hb.in +++ b/lustre/scripts/lc_hb.in @@ -1,4 +1,7 @@ #!/bin/bash + +# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: + # # lc_hb - script for generating the Heartbeat HA software's # configuration files @@ -62,7 +65,7 @@ while getopts "r:n:vd:" OPTION; do HBVER_OPT=$OPTARG if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \ && [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then - echo >&2 $"`basename $0`: Invalid Heartbeat software" \ + error_output "Invalid Heartbeat software" \ "version - ${HBVER_OPT}!" usage fi @@ -71,17 +74,17 @@ while getopts "r:n:vd:" OPTION; do HOSTNAME_OPT=$OPTARG PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'` if [ -z "${PRIM_NODENAME}" ]; then - echo >&2 $"`basename $0`: Missing primary nodename!" + error_output "Missing primary nodename!" usage fi HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'` if [ ${HOSTNAME_NUM} -lt 2 ]; then - echo >&2 $"`basename $0`: Missing failover nodenames!" + error_output "Missing failover nodenames!" usage fi if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ] then - echo >&2 $"`basename $0`: Heartbeat version 1 can" \ + error_output "Heartbeat version 1 can" \ "only support 2 nodes!" usage fi @@ -94,11 +97,11 @@ while getopts "r:n:vd:" OPTION; do TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'` TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'` if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then - echo >&2 $"`basename $0`: Missing target device name!" + error_output "Missing target device name!" usage fi if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then - echo >&2 $"`basename $0`: Missing mount point for target"\ + error_output "Missing mount point for target"\ "${TARGET_DEVNAMES[TARGET_NUM]}!" usage fi @@ -111,17 +114,17 @@ done # Check the required parameters if [ -z "${HBVER_OPT}" ]; then - echo >&2 $"`basename $0`: Missing -r option!" + error_output "Missing -r option!" usage fi if [ -z "${HOSTNAME_OPT}" ]; then - echo >&2 $"`basename $0`: Missing -n option!" + error_output "Missing -n option!" usage fi if [ -z "${DEVICE_OPT}" ]; then - echo >&2 $"`basename $0`: Missing -d option!" + error_output "Missing -d option!" usage fi @@ -152,13 +155,13 @@ check_remote_file() { local file_name=$2 if [ -z "${host_name}" ]; then - echo >&2 "`basename $0`: check_remote_file() error:"\ + error_output "check_remote_file():"\ "Missing hostname!" return 1 fi if [ -z "${file_name}" ]; then - echo >&2 "`basename $0`: check_remote_file() error:"\ + error_output "check_remote_file():"\ "Missing file name!" return 1 fi @@ -166,7 +169,7 @@ check_remote_file() { # Execute remote command to check the file ${REMOTE} ${host_name} "[ -e ${file_name} ]" if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: check_remote_file() error:"\ + error_output "check_remote_file():"\ "${file_name} does not exist in host ${host_name}!" return 1 fi @@ -184,7 +187,7 @@ hb_running() { ret_str=`${REMOTE} ${host_name} "${CL_STATUS} hbstatus" 2>&1` if [ $? -ne 0 ]; then if [ "${ret_str}" = "${ret_str#*stop*}" ]; then - echo >&2 "`basename $0`: hb_running() error:"\ + error_output "hb_running():"\ "remote command to ${host_name} error: ${ret_str}!" return 2 else @@ -202,9 +205,10 @@ stop_heartbeat() { local host_name=$1 local ret_str - ret_str=`${REMOTE} ${host_name} "/sbin/service heartbeat stop" 2>&1` + ret_str=$(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin +service heartbeat stop < /dev/null" 2>&1) if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: stop_heartbeat() error:"\ + error_output "stop_heartbeat():"\ "remote command to ${host_name} error: ${ret_str}!" return 1 fi @@ -223,7 +227,7 @@ check_heartbeat() { for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do # Check Heartbeat configuration directory if ! check_remote_file ${NODE_NAMES[idx]} ${HA_DIR}; then - echo >&2 "`basename $0`: check_heartbeat() error:"\ + error_output "check_heartbeat():"\ "Is Heartbeat package installed?" return 1 fi @@ -231,8 +235,8 @@ check_heartbeat() { if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then # Check mon configuration directory if ! check_remote_file ${NODE_NAMES[idx]} ${MON_DIR}; then - echo >&2 "`basename $0`: check_heartbeat()"\ - "error: Is mon package installed?" + error_output "check_heartbeat():"\ + "Is mon package installed?" return 1 fi fi @@ -240,8 +244,8 @@ check_heartbeat() { if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then # Check crm directory if ! check_remote_file ${NODE_NAMES[idx]} ${CIB_DIR}; then - echo >&2 "`basename $0`: check_heartbeat()"\ - "error: Is Heartbeat v2 package installed?" + error_output "check_heartbeat():"\ + "Is Heartbeat v2 package installed?" return 1 fi fi @@ -284,8 +288,8 @@ get_srvname() { local ret_str # Execute remote command to get the target server name - ret_str=`${REMOTE} ${host_name} \ - "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1` + ret_str=$(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin +${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1) if [ $? -ne 0 ]; then echo "`basename $0`: get_srvname() error:" \ "from host ${host_name} - ${ret_str}" @@ -321,7 +325,7 @@ get_srvnames() { TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \ ${TARGET_DEVNAMES[i]}) if [ $? -ne 0 ]; then - echo >&2 "${TARGET_SRVNAMES[i]}" + error_output "${TARGET_SRVNAMES[i]}" return 1 fi done @@ -397,7 +401,7 @@ create_hacf() { touch ${TMP_DIR}$"/ha.cf."${NODE_NAMES[idx]} scp ${HACF_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/ if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to scp ha.cf file"\ + error_output "Failed to scp ha.cf file"\ "to node ${NODE_NAMES[idx]}!" return 1 fi @@ -444,7 +448,7 @@ create_haresources() { python ${CIB_GEN_SCRIPT} --stdout \ ${HARES_LUSTRE} > ${CIB_LUSTRE} if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to generate cib.xml file"\ + error_output "Failed to generate cib.xml file"\ "for node ${PRIM_NODENAME}!" return 1 fi @@ -455,7 +459,7 @@ create_haresources() { /bin/cp -f ${HARES_LUSTRE} ${TMP_DIR}$"/haresources."${NODE_NAMES[idx]} scp ${HARES_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/ if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to scp haresources file"\ + error_output "Failed to scp haresources file"\ "to node ${NODE_NAMES[idx]}!" return 1 fi @@ -463,7 +467,7 @@ create_haresources() { if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then scp ${CIB_LUSTRE} ${NODE_NAMES[idx]}:${CIB_DIR}/ if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to scp cib.xml"\ + error_output "Failed to scp cib.xml"\ "file to node ${NODE_NAMES[idx]}!" return 1 fi @@ -491,7 +495,7 @@ create_authkeys() { touch ${TMP_DIR}$"/authkeys."${NODE_NAMES[idx]} scp -p ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}/ if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to scp authkeys file"\ + error_output "Failed to scp authkeys file"\ "to node ${NODE_NAMES[idx]}!" return 1 fi @@ -547,7 +551,7 @@ create_moncf() { ${SCRIPT_GEN_MONCF} ${params} if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to generate mon.cf file"\ + error_output "Failed to generate mon.cf file"\ "by using ${SCRIPT_GEN_MONCF}!" return 1 fi @@ -560,7 +564,7 @@ create_moncf() { scp ${MONCF_LUSTRE} ${NODE_NAMES[idx]}:${MON_DIR}/ if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to scp mon.cf file"\ + error_output "Failed to scp mon.cf file"\ "to node ${NODE_NAMES[idx]}!" return 1 fi diff --git a/lustre/scripts/lc_lvm.in b/lustre/scripts/lc_lvm.in index 98248d7..3f0d616 100644 --- a/lustre/scripts/lc_lvm.in +++ b/lustre/scripts/lc_lvm.in @@ -162,17 +162,19 @@ shift `expr $OPTIND - 1` # Here we expect the csv file if [ $# -eq 0 ]; then - echo >&2 "`basename $0`: Missing csv file!" + error_output "Missing csv file!" usage fi +CSV_FILE=$1 + # check_lvm_item index # # Check the items required for managing LVM device ${LVM_NAME[index]} check_lvm_item() { # Check argument if [ $# -eq 0 ]; then - echo >&2 "`basename $0`: check_lvm_item() error:"\ + error_output "check_lvm_item():"\ "Missing argument!" return 1 fi @@ -181,7 +183,7 @@ check_lvm_item() { # Check hostname if [ -z "${HOST_NAME[i]}" ]; then - echo >&2 "`basename $0`: check_lvm_item() error:"\ + error_output "check_lvm_item():"\ "hostname item has null value!" return 1 fi @@ -190,7 +192,7 @@ check_lvm_item() { if [ -z "${LVM_NAME[i]}" ] \ && [ "${LINE_MARKER[i]}" != "${LV_MARKER}" -a "${OP_MODE[i]}" != "remove" ] then - echo >&2 "`basename $0`: check_lvm_item() error:"\ + error_output "check_lvm_item():"\ "LVM component name item has null value!" return 1 fi @@ -199,7 +201,7 @@ check_lvm_item() { if [ -n "${OP_MODE[i]}" ] \ && [ "${OP_MODE[i]}" != "create" -a "${OP_MODE[i]}" != "remove" ] then - echo >&2 "`basename $0`: check_lvm_item() error:"\ + error_output "check_lvm_item():"\ "Invalid operation mode item - \"${OP_MODE[i]}\"!" return 1 fi @@ -208,20 +210,20 @@ check_lvm_item() { if [ -z "${OP_MODE[i]}" -o "${OP_MODE[i]}" = "create" ]; then if [ "${LINE_MARKER[i]}" = "${VG_MARKER}" -a -z "${SIXTH_ITEM[i]}" ] then - echo >&2 "`basename $0`: check_lvm_item() error:"\ + error_output "check_lvm_item():"\ "pv paths item of vg ${LVM_NAME[i]} has null value!" return 1 fi if [ "${LINE_MARKER[i]}" = "${LV_MARKER}" ]; then if [ -z "${SIXTH_ITEM[i]}" ]; then - echo >&2 "`basename $0`: check_lvm_item() error:"\ + error_output "check_lvm_item():"\ "lv size item has null value!" return 1 fi if [ -z "${SEVENTH_ITEM[i]}" ]; then - echo >&2 "`basename $0`: check_lvm_item() error:"\ + error_output "check_lvm_item():"\ "vg name item has null value!" return 1 fi @@ -237,11 +239,11 @@ check_lvm_item() { get_lvm_items() { # Check argument if [ $# -eq 0 ]; then - echo >&2 "`basename $0`: get_lvm_items() error: Missing csv file!" + error_output "get_lvm_items(): Missing csv file!" return 1 fi - CSV_FILE=$1 + local CSV_FILE=$1 local LINE line_marker local hostname declare -i line_num=0 @@ -280,7 +282,7 @@ get_lvm_items() { # Check some required items if ! check_lvm_item $idx; then - echo >&2 "`basename $0`: check_lvm_item() error:"\ + error_output "check_lvm_item():"\ "Occurred on line ${line_num} in ${CSV_FILE}." return 1 fi @@ -473,7 +475,7 @@ construct_lvm_cmdline() { fi ;; *) - echo >&2 "`basename $0`: construct_lvm_cmdline() error:"\ + error_output "construct_lvm_cmdline():"\ "Invalid operation mode - \"${OP_MODE[i]}\"!" return 1 ;; @@ -511,7 +513,7 @@ config_lvm_devs() { verbose_output "Configuring LVM devices in host ${host_name}..." verbose_output "Configure command line is: \"${LVM_CMDLINE}\"" REMOTE_CMD[pid_num]="${REMOTE} ${host_name} \"${LVM_CMDLINE}\"" - ${REMOTE} ${host_name} "(${EXPORT_PATH} ${LVM_CMDLINE})" >&2 & + $REMOTE $host_name "export PATH=\$PATH:/sbin:/usr/sbin; $LVM_CMDLINE" & REMOTE_PID[pid_num]=$! let "pid_num += 1" @@ -553,7 +555,7 @@ config_lvm() { for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do wait ${REMOTE_PID[${pid_num}]} if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "`basename $0`: config_lvm() error: Failed"\ + error_output "config_lvm(): Failed"\ "to execute \"${REMOTE_CMD[${pid_num}]}\"!" failed_status=true fi @@ -569,13 +571,10 @@ config_lvm() { # Main flow # Check the csv file -if ! check_file $1; then - exit 1 -fi +check_file $CSV_FILE || exit ${PIPESTATUS[0]} # Get the list of nodes to be operated on -NODES_TO_USE=$(get_nodelist) -[ ${PIPESTATUS[0]} -ne 0 ] && echo >&2 "${NODES_TO_USE}" && exit 1 +NODES_TO_USE=$(get_nodelist) || error_exit ${PIPESTATUS[0]} "$NODES_TO_USE" # Check the node list check_nodelist ${NODES_TO_USE} || exit 1 diff --git a/lustre/scripts/lc_md.in b/lustre/scripts/lc_md.in index ab741af..0790ebc 100644 --- a/lustre/scripts/lc_md.in +++ b/lustre/scripts/lc_md.in @@ -114,17 +114,19 @@ shift `expr $OPTIND - 1` # Here we expect the csv file if [ $# -eq 0 ]; then - echo >&2 "`basename $0`: Missing csv file!" + error_output "Missing csv file!" usage fi +CSV_FILE=$1 + # check_md_item index # # Check the items required for managing MD device ${MD_NAME[index]} check_md_item() { # Check argument if [ $# -eq 0 ]; then - echo >&2 "`basename $0`: check_md_item() error:"\ + error_output "check_md_item():"\ "Missing argument!" return 1 fi @@ -133,7 +135,7 @@ check_md_item() { # Check hostname if [ -z "${HOST_NAME[i]}" ]; then - echo >&2 "`basename $0`: check_md_item() error:"\ + error_output "check_md_item():"\ "hostname item has null value!" return 1 fi @@ -142,19 +144,19 @@ check_md_item() { if [ -z "${OP_MODE[i]}" -o "${OP_MODE[i]}" = "create" ]; then # Check MD device name if [ -z "${MD_NAME[i]}" ]; then - echo >&2 "`basename $0`: check_md_item() error:"\ + error_output "check_md_item():"\ "md name item has null value!" return 1 fi if [ -z "${RAID_LEVEL[i]}" ]; then - echo >&2 "`basename $0`: check_md_item() error:"\ + error_output "check_md_item():"\ "raid level item of MD device ${MD_NAME[i]} has null value!" return 1 fi if [ -z "${MD_DEVS[i]}" ]; then - echo >&2 "`basename $0`: check_md_item() error:"\ + error_output "check_md_item():"\ "component devices item of ${MD_NAME[i]} has null value!" return 1 fi @@ -169,11 +171,11 @@ check_md_item() { get_md_items() { # Check argument if [ $# -eq 0 ]; then - echo >&2 "`basename $0`: get_md_items() error: Missing csv file!" + error_output "get_md_items(): Missing csv file!" return 1 fi - CSV_FILE=$1 + local CSV_FILE=$1 local LINE local hostname declare -i line_num=0 @@ -208,7 +210,7 @@ get_md_items() { # Check some required items if ! check_md_item $idx; then - echo >&2 "`basename $0`: check_md_item() error:"\ + error_output "check_md_item():"\ "Occurred on line ${line_num} in ${CSV_FILE}." return 1 fi @@ -231,7 +233,7 @@ md_is_active() { ret_str=$(${REMOTE} ${host_name} "${cmd}" 2>&1) if [ ${PIPESTATUS[0]} -ne 0 ]; then if [ -n "${ret_str}" ]; then - echo >&2 "`basename $0`: md_is_active() error:"\ + error_output "md_is_active():"\ "remote command to ${host_name} error: ${ret_str}!" return 2 # Error occurred else @@ -365,7 +367,7 @@ construct_mdadm_cmdline() { # Construct the create command line mdadm_cmd=$(construct_mdadm_create_cmdline ${i}) if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "${mdadm_cmd}" + error_output "${mdadm_cmd}" return 1 fi @@ -429,7 +431,7 @@ config_md_devs() { verbose_output "Configuring MD devices in host ${host_name}..." verbose_output "Configure command line is: \"${MDADM_CMDLINE}\"" REMOTE_CMD[pid_num]="${REMOTE} ${host_name} \"${MDADM_CMDLINE}\"" - ${REMOTE} ${host_name} "${MDADM_CMDLINE}" >&2 & + $REMOTE $host_name "export PATH=\$PATH:/sbin:/usr/sbin; $MDADM_CMDLINE" & REMOTE_PID[pid_num]=$! let "pid_num += 1" sleep 1 @@ -471,7 +473,7 @@ config_md() { for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do wait ${REMOTE_PID[${pid_num}]} if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "`basename $0`: config_md() error: Failed"\ + error_output "config_md(): Failed"\ "to execute \"${REMOTE_CMD[${pid_num}]}\"!" failed_status=true fi @@ -487,13 +489,10 @@ config_md() { # Main flow # Check the csv file -if ! check_file $1; then - exit 1 -fi +check_file $CSV_FILE || exit ${PIPESTATUS[0]} # Get the list of nodes to be operated on -NODES_TO_USE=$(get_nodelist) -[ ${PIPESTATUS[0]} -ne 0 ] && echo >&2 "${NODES_TO_USE}" && exit 1 +NODES_TO_USE=$(get_nodelist) || error_exit ${PIPESTATUS[0]} "$NODES_TO_USE" # Check the node list check_nodelist ${NODES_TO_USE} || exit 1 diff --git a/lustre/scripts/lc_net.in b/lustre/scripts/lc_net.in index d618c69..16196d3 100644 --- a/lustre/scripts/lc_net.in +++ b/lustre/scripts/lc_net.in @@ -1,4 +1,7 @@ #!/bin/bash + +# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: + # # lc_net - script for Lustre cluster network verification # @@ -58,7 +61,7 @@ shift `expr $OPTIND - 1` # Here we expect the csv file if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: Missing csv file!" + error_output "Missing csv file!" usage fi @@ -76,7 +79,7 @@ get_hostnames() { # Get the list of nodes to be operated on NODES_TO_USE=$(get_nodelist) - [ ${PIPESTATUS[0]} -ne 0 ] && echo >&2 "${NODES_TO_USE}" && return 1 + [ ${PIPESTATUS[0]} -ne 0 ] && error_output "${NODES_TO_USE}" && return 1 # Check the node list if [ -z "${NODES_TO_USE}" ]; then @@ -133,7 +136,7 @@ local_check() { # and get the IP address of this host from ping HOST_IPADDRS[i]=$(ping_host ${HOST_NAMES[i]}) if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "${HOST_IPADDRS[i]}" + error_output "${HOST_IPADDRS[i]}" return 1 fi @@ -153,13 +156,13 @@ remote_check() { cmd="ping -c1 ${HOST_NAMES[i]} 2>&1" ret_str=$(${REMOTE} ${HOST_NAMES[i]} "${cmd}" 2>&1) if [ ${PIPESTATUS[0]} -ne 0 -a -n "${ret_str}" ]; then - echo >&2 "`basename $0`: remote_check() error:"\ + error_output "remote_check():"\ "remote to ${HOST_NAMES[i]} error: ${ret_str}!" return 1 fi if [ -z "${ret_str}" ]; then - echo >&2 "`basename $0`: remote_check() error:"\ + error_output "remote_check():"\ "No results from ${HOST_NAMES[i]}! Check the network"\ "connectivity between local host and ${HOST_NAMES[i]}!" return 1 @@ -177,7 +180,7 @@ remote_check() { # Check whether ${HOST_NAMES[i]} agrees with the local host # about what its name is resolved to. if [ "${ip_addr}" != "${HOST_IPADDRS[i]}" ]; then - echo >&2 "`basename $0`: remote_check() error:"\ + error_output "remote_check():"\ "Local host resolves ${HOST_NAMES[i]} to IP address"\ "\"${HOST_IPADDRS[i]}\", while its own resolution is"\ "\"${ip_addr}\". They are not the same!" diff --git a/lustre/scripts/lustre_config.in b/lustre/scripts/lustre_config.in index 3fb13e8..fcdf45f 100644 --- a/lustre/scripts/lustre_config.in +++ b/lustre/scripts/lustre_config.in @@ -17,9 +17,9 @@ # Usage usage() { - cat >&2 < +Usage: $(basename $0) [options] <-a|-w|-x> This script is used to format and set up multiple lustre servers from a csv file. @@ -53,7 +53,6 @@ Usage: `basename $0` [options] (separated by commas) for each target in a Lustre cluster EOF - exit 1 } # Samples @@ -233,29 +232,12 @@ EOF . @scriptlibdir@/lc_common #***************************** Global variables *****************************# -declare -a MGS_NODENAME # node names of the MGS servers -declare -a MGS_IDX # indexes of MGSs in the global arrays -declare -i MGS_NUM # number of MGS servers in the cluster -declare -i INIT_IDX - declare -a NODE_NAMES # node names in the failover group declare -a TARGET_OPTS # target services in one failover group -# All the items in the csv file -declare -a HOST_NAME MODULE_OPTS DEVICE_NAME MOUNT_POINT DEVICE_TYPE FS_NAME -declare -a MGS_NIDS INDEX FORMAT_OPTIONS MKFS_OPTIONS MOUNT_OPTIONS FAILOVERS - -# Heartbeat software requires that node names in the configuration directive -# must (normally) match the "uname -n" of that machine. Since the value of the -# "failover nids" field in the csv file is the NID(s) of failover partner node, -# we have to figure out the corresponding hostname of that node. -declare -a FAILOVERS_NAMES - -VERIFY_CONNECT=true CONFIG_MD_LVM=false MODIFY_FSTAB=true UPGRADE_TARGET=false -VERBOSE_OUTPUT=false # Get and check the positional parameters while getopts "aw:x:t:ndfmuhv" OPTION; do case $OPTION in @@ -279,9 +261,10 @@ while getopts "aw:x:t:ndfmuhv" OPTION; do if [ "${HATYPE_OPT}" != "${HBVER_HBV1}" ] \ && [ "${HATYPE_OPT}" != "${HBVER_HBV2}" ] \ && [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then - echo >&2 $"`basename $0`: Invalid HA software type" \ + error_output "Invalid HA software type" \ "- ${HATYPE_OPT}!" - usage + usage 1>&2 + exit 1 fi ;; n) @@ -300,6 +283,7 @@ while getopts "aw:x:t:ndfmuhv" OPTION; do UPGRADE_TARGET=true ;; h) + usage sample ;; v) @@ -307,7 +291,9 @@ while getopts "aw:x:t:ndfmuhv" OPTION; do VERBOSE_OUTPUT=true ;; ?) - usage + usage 1>&2 + exit 1 + ;; esac done @@ -316,210 +302,18 @@ shift `expr $OPTIND - 1` # Here we expect the csv file if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: Missing csv file!" - usage + error_output "Missing csv file!" + usage 1>&2 + exit 1 fi -# Check the items required for OSTs, MDTs and MGS -# -# When formatting an OST, the following items: hostname, module_opts, -# device name, device type and mgs nids, cannot have null value. -# -# When formatting an MDT or MGS, the following items: hostname, -# module_opts, device name and device type, cannot have null value. -check_item() { - # Check argument - if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: check_item() error: Missing argument"\ - "for function check_item()!" - return 1 - fi - - declare -i i=$1 - - # Check hostname, module_opts, device name and device type - if [ -z "${HOST_NAME[i]}" ]||[ -z "${MODULE_OPTS[i]}" ]\ - ||[ -z "${DEVICE_NAME[i]}" ]||[ -z "${DEVICE_TYPE[i]}" ]; then - echo >&2 $"`basename $0`: check_item() error: Some required"\ - "item has null value! Check hostname, module_opts,"\ - "device name and device type!" - return 1 - fi - - # Check mgs nids - if [ "${DEVICE_TYPE[i]}" = "ost" ]&&[ -z "${MGS_NIDS[i]}" ]; then - echo >&2 $"`basename $0`: check_item() error: OST's mgs nids"\ - "item has null value!" - return 1 - fi - - # Check mount point - if [ -z "${MOUNT_POINT[i]}" ]; then - echo >&2 $"`basename $0`: check_item() error: mount"\ - "point item of target ${DEVICE_NAME[i]} has null value!" - return 1 - fi - - return 0 -} - -# Get the number of MGS nodes in the cluster -get_mgs_num() { - INIT_IDX=0 - MGS_NUM=${#MGS_NODENAME[@]} - [ -z "${MGS_NODENAME[0]}" ] && let "INIT_IDX += 1" \ - && let "MGS_NUM += 1" -} - -# is_mgs_node hostname -# Verify whether @hostname is a MGS node -is_mgs_node() { - local host_name=$1 - declare -i i - - get_mgs_num - for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do - [ "${MGS_NODENAME[i]}" = "${host_name}" ] && return 0 - done - - return 1 -} - -# Check whether the MGS nodes are in the same failover group -check_mgs_group() { - declare -i i - declare -i j - declare -i idx - local mgs_node - - get_mgs_num - for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do - mgs_node=${MGS_NODENAME[i]} - for ((j = ${INIT_IDX}; j < ${MGS_NUM}; j++)); do - [ "${MGS_NODENAME[j]}" = "${mgs_node}" ] && continue 1 - - idx=${MGS_IDX[j]} - if [ "${FAILOVERS_NAMES[idx]#*$mgs_node*}" = "${FAILOVERS_NAMES[idx]}" ] - then - echo >&2 $"`basename $0`: check_mgs_group() error:"\ - "MGS node ${mgs_node} is not in the ${HOST_NAME[idx]}"\ - "failover group!" - return 1 - fi - done - done - - return 0 -} - -# Get and check MGS servers. -# There should be no more than one MGS specified in the entire csv file. -check_mgs() { - declare -i i - declare -i j - declare -i exp_idx # Index of explicit MGS servers - declare -i imp_idx # Index of implicit MGS servers - local is_exp_mgs is_imp_mgs - local mgs_node - - # Initialize the MGS_NODENAME and MGS_IDX arrays - unset MGS_NODENAME - unset MGS_IDX - - exp_idx=1 - imp_idx=1 - for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do - is_exp_mgs=false - is_imp_mgs=false - - # Check whether this node is an explicit MGS node - # or an implicit one - if [ "${DEVICE_TYPE[i]#*mgs*}" != "${DEVICE_TYPE[i]}" ]; then - verbose_output "Explicit MGS target" \ - "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}." - is_exp_mgs=true - fi - - if [ "${DEVICE_TYPE[i]}" = "mdt" -a -z "${MGS_NIDS[i]}" ]; then - verbose_output "Implicit MGS target" \ - "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}." - is_imp_mgs=true - fi - - # Get and check MGS servers - if ${is_exp_mgs} || ${is_imp_mgs}; then - # Check whether more than one MGS target in one MGS node - if is_mgs_node ${HOST_NAME[i]}; then - echo >&2 $"`basename $0`: check_mgs() error:"\ - "More than one MGS target in the same node -"\ - "\"${HOST_NAME[i]}\"!" - return 1 - fi - - # Get and check primary MGS server and backup MGS server - if [ "${FORMAT_OPTIONS[i]}" = "${FORMAT_OPTIONS[i]#*noformat*}" ] - then - # Primary MGS server - if [ -z "${MGS_NODENAME[0]}" ]; then - if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \ - || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then - echo >&2 $"`basename $0`: check_mgs() error:"\ - "There exist both explicit and implicit MGS"\ - "targets in the csv file!" - return 1 - fi - MGS_NODENAME[0]=${HOST_NAME[i]} - MGS_IDX[0]=$i - else - mgs_node=${MGS_NODENAME[0]} - if [ "${FAILOVERS_NAMES[i]#*$mgs_node*}" = "${FAILOVERS_NAMES[i]}" ] - then - echo >&2 $"`basename $0`: check_mgs() error:"\ - "More than one primary MGS nodes in the csv" \ - "file - ${MGS_NODENAME[0]} and ${HOST_NAME[i]}!" - else - echo >&2 $"`basename $0`: check_mgs() error:"\ - "MGS nodes ${MGS_NODENAME[0]} and ${HOST_NAME[i]}"\ - "are failover pair, one of them should use"\ - "\"--noformat\" in the format options item!" - fi - return 1 - fi - else # Backup MGS server - if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \ - || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then - echo >&2 $"`basename $0`: check_mgs() error:"\ - "There exist both explicit and implicit MGS"\ - "targets in the csv file!" - return 1 - fi - - if ${is_exp_mgs}; then # Explicit MGS - MGS_NODENAME[exp_idx]=${HOST_NAME[i]} - MGS_IDX[exp_idx]=$i - exp_idx=$(( exp_idx + 1 )) - else # Implicit MGS - MGS_NODENAME[imp_idx]=${HOST_NAME[i]} - MGS_IDX[imp_idx]=$i - imp_idx=$(( imp_idx + 1 )) - fi - fi - fi #End of "if ${is_exp_mgs} || ${is_imp_mgs}" - done - - # Check whether the MGS nodes are in the same failover group - if ! check_mgs_group; then - return 1 - fi - - return 0 -} +CSV_FILE=$1 # Construct the command line of mkfs.lustre construct_mkfs_cmdline() { # Check argument if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\ + error_output "construct_mkfs_cmdline():"\ "Missing argument for function construct_mkfs_cmdline()!" return 1 fi @@ -548,7 +342,7 @@ construct_mkfs_cmdline() { MKFS_CMD="$MKFS_CMD --mgs --mdt" ;; *) - echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\ + error_output "construct_mkfs_cmdline():"\ "Invalid device type - \"${DEVICE_TYPE[i]}\"!" return 1 ;; @@ -596,7 +390,7 @@ construct_mkfs_cmdline() { get_nodenames() { # Check argument if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: get_nodenames() error: Missing"\ + error_output "get_nodenames(): Missing"\ "argument for function get_nodenames()!" return 1 fi @@ -615,7 +409,7 @@ get_nodenames() { do NODE_NAMES[idx]=$(nids2hostname ${nids}) if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "${NODE_NAMES[idx]}" + error_output "${NODE_NAMES[idx]}" return 1 fi @@ -645,7 +439,7 @@ gen_ha_config() { HOSTNAME_OPT=${HOST_NAME[i]} if ! get_nodenames $i; then - echo >&2 $"`basename $0`: gen_ha_config() error: Can not get the"\ + error_output "gen_ha_config(): Can not get the"\ "failover nodenames from failover nids - \"${FAILOVERS[i]}\" in"\ "the \"${HOST_NAME[i]}\" failover group!" return 1 @@ -746,238 +540,6 @@ config_ha() { return 0 } -# Get all the items in the csv file and do some checks. -get_items() { - # Check argument - if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: get_items() error: Missing argument"\ - "for function get_items()!" - return 1 - fi - - CSV_FILE=$1 - local LINE - local marker - local hostname - declare -i line_num=0 - declare -i idx=0 - - exec 9< ${CSV_FILE} - while read -u 9 -r LINE; do - line_num=${line_num}+1 - # verbose_output "Parsing line ${line_num}: $LINE" - - # Get rid of the empty line - if [ -z "`echo ${LINE}|awk '/[[:alnum:]]/ {print $0}'`" ]; then - continue - fi - - # Get rid of the comment line - if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] - then - continue - fi - - # Skip the Linux MD/LVM line - marker=$(echo ${LINE} | cut -d, -f 2) - if [ "${marker}" = "${MD_MARKER}" -o "${marker}" = "${PV_MARKER}" ] \ - || [ "${marker}" = "${VG_MARKER}" -o "${marker}" = "${LV_MARKER}" ]; then - continue - fi - - # Skip the host which is not specified in the host list - if ! ${USE_ALLNODES}; then - hostname=$(echo ${LINE} | cut -d, -f 1) - ! host_in_hostlist ${hostname} ${NODES_TO_USE} && continue - fi - - # Parse the config line into CONFIG_ITEM - if ! parse_line "$LINE"; then - echo >&2 $"`basename $0`: parse_line() error: Occurred"\ - "on line ${line_num} in ${CSV_FILE}: $LINE" - return 1 - fi - - HOST_NAME[idx]=${CONFIG_ITEM[0]} - MODULE_OPTS[idx]=${CONFIG_ITEM[1]} - DEVICE_NAME[idx]=${CONFIG_ITEM[2]} - MOUNT_POINT[idx]=${CONFIG_ITEM[3]} - DEVICE_TYPE[idx]=${CONFIG_ITEM[4]} - FS_NAME[idx]=${CONFIG_ITEM[5]} - MGS_NIDS[idx]=${CONFIG_ITEM[6]} - INDEX[idx]=${CONFIG_ITEM[7]} - FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]} - MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]} - MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]} - FAILOVERS[idx]=${CONFIG_ITEM[11]} - - MODULE_OPTS[idx]=`echo "${MODULE_OPTS[idx]}" | sed 's/"/\\\"/g'` - - # Convert IP addresses in NIDs to hostnames - FAILOVERS_NAMES[idx]=$(ip2hostname_multi_node ${FAILOVERS[idx]}) - if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "${FAILOVERS_NAMES[idx]}" - return 1 - fi - - # Check some required items for formatting target - if ! check_item $idx; then - echo >&2 $"`basename $0`: check_item() error:"\ - "Occurred on line ${line_num} in ${CSV_FILE}." - return 1 - fi - - idx=${idx}+1 - done - - return 0 -} - -# check_lnet_connect hostname_index mgs_hostname -# Check whether the target node can contact the MGS node @mgs_hostname -# If @mgs_hostname is null, then it means the primary MGS node -check_lnet_connect() { - declare -i i=$1 - local mgs_node=$2 - - local COMMAND RET_STR - local mgs_prim_nids - local nids_str= - local mgs_nid - local ping_mgs - - # Execute remote command to check that - # this node can contact the MGS node - verbose_output "Checking lnet connectivity between" \ - "${HOST_NAME[i]} and the MGS node ${mgs_node}" - mgs_prim_nids=`echo ${MGS_NIDS[i]} | awk -F: '{print $1}'` - - if [ -z "${mgs_node}" -o $MGS_NUM -eq 1 ]; then - nids_str=${mgs_prim_nids} # nids of primary MGS node - if [ -z "${nids_str}" ]; then - echo >&2 $"`basename $0`: check_lnet_connect() error:"\ - "Check the mgs nids item of host ${HOST_NAME[i]}!"\ - "Missing nids of the primary MGS node!" - return 1 - fi - else - # Get the corresponding NID(s) of the MGS node ${mgs_node} - # from the "mgs nids" field - nids_str=$(get_mgs_nids ${mgs_node} ${MGS_NIDS[i]}) - if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "${nids_str}" - return 1 - fi - fi - - ping_mgs=false - for mgs_nid in ${nids_str//,/ } - do - COMMAND=$"${LCTL} ping ${mgs_nid} 5 || echo failed 2>&1" - RET_STR=$(${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1) - if [ ${PIPESTATUS[0]} -eq 0 -a "${RET_STR}" = "${RET_STR#*failed*}" ] - then - # This node can contact the MGS node - verbose_output "${HOST_NAME[i]} can contact the MGS" \ - "node ${mgs_node} by using nid \"${mgs_nid}\"!" - ping_mgs=true - break - fi - done - - if ! ${ping_mgs}; then - echo >&2 "`basename $0`: check_lnet_connect() error:" \ - "${HOST_NAME[i]} cannot contact the MGS node ${mgs_node}"\ - "with nids - \"${nids_str}\"! Check ${LCTL} command!" - return 1 - fi - - return 0 -} - -# Start lnet network in the cluster node and check that -# this node can contact the MGS node -check_lnet() { - if ! ${VERIFY_CONNECT}; then - return 0 - fi - - # Check argument - if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: check_lnet() error: Missing"\ - "argument for function check_lnet()!" - return 1 - fi - - declare -i i=$1 - declare -i j - local COMMAND RET_STR - - # Execute remote command to start lnet network - verbose_output "Starting lnet network in ${HOST_NAME[i]}" - COMMAND="PATH=\$PATH:/sbin:/usr/sbin modprobe lnet; ${LCTL} network up 2>&1" - RET_STR=$(${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1) - if [ ${PIPESTATUS[0]} -ne 0 -o "${RET_STR}" = "${RET_STR#*LNET configured*}" ] - then - echo >&2 "`basename $0`: check_lnet() error: remote" \ - "${HOST_NAME[i]} error: ${RET_STR}" - return 1 - fi - - if is_mgs_node ${HOST_NAME[i]}; then - return 0 - fi - - # Execute remote command to check that - # this node can contact the MGS node - for ((j = 0; j < ${MGS_NUM}; j++)); do - if ! check_lnet_connect $i ${MGS_NODENAME[j]}; then - return 1 - fi - done - - return 0 -} - -# Start lnet network in the MGS node -start_mgs_lnet() { - declare -i i - declare -i idx - local COMMAND - - if [ -z "${MGS_NODENAME[0]}" -a -z "${MGS_NODENAME[1]}" ]; then - if ${USE_ALLNODES}; then - verbose_output "There is no MGS target in the ${CSV_FILE} file." - else - verbose_output "There is no MGS target in the node list \"${NODES_TO_USE}\"." - fi - return 0 - fi - - for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do - # Execute remote command to add lnet options lines to - # the MGS node's modprobe.conf/modules.conf - idx=${MGS_IDX[i]} - COMMAND=$"echo \"${MODULE_OPTS[${idx}]}\"|${MODULE_CONFIG}" - verbose_output "Adding lnet module options to ${MGS_NODENAME[i]}" - ${REMOTE} ${MGS_NODENAME[i]} "${COMMAND}" >&2 - if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "`basename $0`: start_mgs_lnet() error:"\ - "Failed to execute remote command to" \ - "add module options to ${MGS_NODENAME[i]}!"\ - "Check ${MODULE_CONFIG}!" - return 1 - fi - - # Start lnet network in the MGS node - if ! check_lnet ${idx}; then - return 1 - fi - done - - return 0 -} - # Execute remote command to add lnet options lines to remote nodes' # modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets mass_config() { @@ -986,9 +548,10 @@ mass_config() { declare -a REMOTE_CMD declare -i pid_num=0 declare -i i=0 + local checked_hosts="" if [ ${#HOST_NAME[@]} -eq 0 ]; then - verbose_output "There are no lustre targets specified." + verbose_output "There are no Lustre targets specified." return 0 fi @@ -1009,30 +572,22 @@ mass_config() { "${HOST_NAME[i]}" ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "`basename $0`: mass_config() error:"\ + error_output "mass_config():"\ "Failed to execute remote command to"\ "create the mountpoint on ${HOST_NAME[i]}!" return 1 fi - if ! $UPGRADE_TARGET && ! is_mgs_node ${HOST_NAME[i]}; then + if ! $UPGRADE_TARGET && ! is_mgs_node ${HOST_NAME[i]} && \ + ! host_in_hostlist ${HOST_NAME[i]} $checked_hosts; then # Execute remote command to add lnet options lines to # modprobe.conf/modules.conf - COMMAND=$"echo \"${MODULE_OPTS[i]}\"|${MODULE_CONFIG}" - verbose_output "Adding lnet module options to" \ - "${HOST_NAME[i]}" - ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 - if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "`basename $0`: mass_config() error:"\ - "Failed to execute remote command to"\ - "add module options to ${HOST_NAME[i]}!" - return 1 - fi + add_module_options $i ${HOST_NAME[i]} || return ${PIPESTATUS[0]} # Check lnet networks - if ! check_lnet $i; then - return 1 - fi + check_lnet $i || return ${PIPESTATUS[0]} + + checked_hosts="$checked_hosts,${HOST_NAME[i]}" fi # Execute remote command to format or upgrade Lustre target @@ -1040,7 +595,7 @@ mass_config() { $UPGRADE_TARGET && OP="Upgrading" || OP="Formatting" verbose_output "$OP Lustre target ${DEVICE_NAME[i]} on ${HOST_NAME[i]}..." - COMMAND="$EXPORT_PATH $MKFS_CMD" + COMMAND="export PATH=\$PATH:/sbin:/usr/sbin; $MKFS_CMD" REMOTE_CMD[$pid_num]="$REMOTE ${HOST_NAME[i]} \"$COMMAND\"" verbose_output "$OP command line is: ${REMOTE_CMD[$pid_num]}" @@ -1056,7 +611,7 @@ mass_config() { for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do wait ${REMOTE_PID[${pid_num}]} if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "`basename $0`: mass_config() error: Failed"\ + error_output "mass_config(): Failed"\ "to execute \"${REMOTE_CMD[${pid_num}]}\"!" fail_exit_status=true fi @@ -1122,12 +677,12 @@ modify_fstab() { # Get mount options if [ -n "${MOUNT_OPTIONS[i]}" ]; then # The mount options already specified in the csv file. - mntopts=${MOUNT_OPTIONS[i]} + mntopts="${MOUNT_OPTIONS[i]}" else mntopts=$(get_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]}\ ${FAILOVERS[i]}) if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "${mntopts}" + error_output "${mntopts}" return 1 fi fi @@ -1142,7 +697,7 @@ modify_fstab() { echo -e \"${mntent}\" >> \$(fcanon /etc/fstab)" ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "`basename $0`: modify_fstab() error:"\ + error_output "modify_fstab():"\ "Failed to modify /etc/fstab of host ${HOST_NAME[i]}"\ "to add Lustre target ${DEVICE_NAME[i]}!" return 1 @@ -1152,18 +707,16 @@ modify_fstab() { return 0 } -# Main flow +#********************************* Main Flow **********************************# + # Check the csv file -if ! check_file $1; then - exit 1 -fi +check_file $CSV_FILE || exit ${PIPESTATUS[0]} # Get the list of nodes to be operated on -NODES_TO_USE=$(get_nodelist) -[ ${PIPESTATUS[0]} -ne 0 ] && echo >&2 "${NODES_TO_USE}" && exit 1 +NODES_TO_USE=$(get_nodelist) || error_exit ${PIPESTATUS[0]} "$NODES_TO_USE" # Check the node list -check_nodelist ${NODES_TO_USE} || exit 1 +check_nodelist $NODES_TO_USE || exit ${PIPESTATUS[0]} if ${VERIFY_CONNECT}; then # Check the network connectivity and hostnames @@ -1192,22 +745,17 @@ if $CONFIG_MD_LVM && ! $UPGRADE_TARGET; then fi # Configure the Lustre cluster -echo "`basename $0`: ******** Lustre cluster configuration START ********" -if ! get_items ${CSV_FILE}; then - exit 1 -fi +echo "`basename $0`: ******** Lustre cluster configuration BEGIN ********" -if ! check_mgs; then - exit 1 -fi +get_lustre_items $CSV_FILE || exit ${PIPESTATUS[0]} -if ! mass_config; then - exit 1 -fi +check_mgs || exit ${PIPESTATUS[0]} -if ! modify_fstab; then - exit 1 -fi +# Format or upgrade Lustre server targets +mass_config || exit ${PIPESTATUS[0]} + +# Modify /etc/fstab to add the new Lustre server targets +modify_fstab || exit ${PIPESTATUS[0]} # Produce HA software's configuration files if ! config_ha; then diff --git a/lustre/scripts/lustre_createcsv.in b/lustre/scripts/lustre_createcsv.in index 752f3cd..37d6ecd 100644 --- a/lustre/scripts/lustre_createcsv.in +++ b/lustre/scripts/lustre_createcsv.in @@ -1,4 +1,7 @@ #!/bin/bash + +# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: + # # lustre_createcsv - generate a csv file from a running lustre cluster # @@ -13,7 +16,7 @@ # Usage usage() { - cat >&2 <&2 "`basename $0`: Invalid HA software type" \ + error_output "Invalid HA software type" \ "- ${HATYPE_OPT}!" - usage + usage 1>&2 + exit 1 fi ;; d) GET_MDLVM_INFO=true;; - h) usage;; + h) usage && exit 0;; v) VERBOSE_OUTPUT=true;; f) LUSTRE_CSV_FILE=$OPTARG;; - ?) usage + ?) usage 1>&2 && exit 1;; esac done # Verify the local host is the MGS node mgs_node() { if [ ! -e ${LUSTRE_PROC_DEVICES} ]; then - echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} does" \ + error_output "${LUSTRE_PROC_DEVICES} does" \ "not exist. Lustre kernel modules may not be loaded!" return 1 fi if [ -z "`cat ${LUSTRE_PROC_DEVICES}`" ]; then - echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} is" \ + error_output "${LUSTRE_PROC_DEVICES} is" \ "empty. Lustre services may not be started!" return 1 fi if [ -z "`grep ${MGS_TYPE} ${LUSTRE_PROC_DEVICES}`" ]; then - echo >&2 "`basename $0`: error: This node is not a MGS node." \ + error_output "This node is not a MGS node." \ "The script should be run on the MGS node!" return 1 fi @@ -159,7 +162,7 @@ get_hostnames() { fi if [ ! -e ${LNET_PROC_PEERS} ]; then - echo >&2 "`basename $0`: error: ${LNET_PROC_PEERS} does not" \ + error_output "${LNET_PROC_PEERS} does not" \ "exist. LNET kernel modules may not be loaded" \ "or LNET network may not be up!" return 1 @@ -188,14 +191,14 @@ get_hostnames() { # Get the hostnames of the nodes for ((idx = 1, i = 1; idx < ${#HOST_NIDS[@]}; idx++, i++)); do if [ -z "${HOST_NIDS[idx]}" ]; then - echo >&2 "`basename $0`: get_hostnames() error:" \ + error_output "get_hostnames():" \ "Invalid nid - \"${HOST_NIDS[idx]}\"!" return 1 fi HOST_NAMES[i]=$(nid2hostname ${HOST_NIDS[idx]}) if [ $? -ne 0 ]; then - echo >&2 "${HOST_NAMES[i]}" + error_output "${HOST_NAMES[i]}" return 1 fi @@ -247,7 +250,8 @@ get_md_configs() { if [ "${first_item}" != "${first_item#devices=}" ]; then MD_DEVS[j]=`echo "${line}" | sed -e 's/devices=//' -e 's/,/ /g'` fi - done < <(${REMOTE} ${host_name} "${MDADM} --detail --scan --verbose") + done < <(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin + ${MDADM} --detail --scan --verbose") if [ $i -eq 0 ]; then verbose_output "There are no active MD devices" \ @@ -265,11 +269,12 @@ get_pv_configs() { local cmd ret_str # Execute remote command to get all the PV informations. - cmd="${EXPORT_PATH} pvdisplay -c | awk -F: '{print \$1}' | xargs" + cmd="PATH=\$PATH:/sbin:/usr/sbin \ +pvdisplay -c | awk -F: '{print \$1}' | xargs" ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1` if [ $? -ne 0 ]; then if [ -n "${ret_str}" ]; then - echo >&2 "`basename $0`: get_pv_configs() error:" \ + error_output "get_pv_configs():" \ "remote command to ${host_name} error: ${ret_str}" else remote_error "get_pv_configs" ${host_name} @@ -295,7 +300,7 @@ get_vg_pvnames() { local cmd ret_str # Execute remote command to get the PV names. - cmd="${EXPORT_PATH} vgdisplay -v ${vg_name} 2>/dev/null\ + cmd="PATH=\$PATH:/sbin:/usr/sbin vgdisplay -v ${vg_name} 2>/dev/null\ | grep \"PV Name\" | awk '{print \$3}' | xargs" ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1` if [ $? -ne 0 ]; then @@ -333,12 +338,12 @@ get_vg_configs() { unset VG_PVNAMES # Execute remote command to get all the VG names. - cmd="${EXPORT_PATH} vgdisplay \ + cmd="PATH=\$PATH:/sbin:/usr/sbin vgdisplay \ | grep \"VG Name\" | awk '{print \$3}' | xargs" ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1` if [ $? -ne 0 ]; then if [ -n "${ret_str}" ]; then - echo >&2 "`basename $0`: get_vg_configs() error:" \ + error_output "get_vg_configs():" \ "remote command to ${host_name} error: ${ret_str}" else remote_error "get_vg_configs" ${host_name} @@ -357,7 +362,7 @@ get_vg_configs() { VG_NAME[i]=${vg_name} VG_PVNAMES[i]=$(get_vg_pvnames ${host_name} ${VG_NAME[i]}) if [ $? -ne 0 ]; then - echo >&2 "${VG_PVNAMES[i]}" + error_output "${VG_PVNAMES[i]}" return 1 fi let "i += 1" @@ -395,7 +400,7 @@ get_lv_configs() { LV_SIZE[i]=`echo "${line}" | awk -F: '{print $7}' | sed -e 's/.*/&K/'` let "i += 1" - done < <(${REMOTE} ${host_name} "${EXPORT_PATH} lvdisplay -c") + done < <(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin lvdisplay -c") if [ $i -eq 0 ]; then verbose_output "There are no LVs in the host ${host_name}" @@ -439,7 +444,7 @@ get_module_opts() { # Execute remote command to get the kernel version ret_str=`${REMOTE} ${host_name} "uname -r" 2>&1` if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo >&2 "`basename $0`: get_module_opts() error:" \ + error_output "get_module_opts():" \ "remote command error: ${ret_str}" return 1 fi @@ -519,7 +524,7 @@ is_ha_target() { ret_str=`${REMOTE} ${host_name} \ "grep ${target_svname} ${res_file}" 2>&1` if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo >&2 "`basename $0`: is_ha_target() error:" \ + error_output "is_ha_target():" \ "remote command error: ${ret_str}" return 1 fi @@ -577,7 +582,7 @@ get_hb_configs() { done < <(${REMOTE} ${host_name} "cat ${HA_CF}") if [ -z "${HB_CHANNELS}" ]; then - echo >&2 "`basename $0`: get_hb_configs() error:" \ + error_output "get_hb_configs():" \ "There are no heartbeat channel configs in ${HA_CF}" \ "of host ${host_name} or ${HA_CF} does not exist!" return 0 @@ -607,7 +612,7 @@ get_hb_configs() { done < <(${REMOTE} ${host_name} "cat ${HA_RES}") if [ -z "${SRV_IPADDRS}" ]; then - echo >&2 "`basename $0`: get_hb_configs() error: There"\ + error_output "get_hb_configs(): There"\ "are no service address in ${HA_RES} of host"\ "${host_name} or ${HA_RES} does not exist!" return 0 @@ -726,14 +731,14 @@ get_cluman_configs() { # Execute remote command to get Heartbeat channel HB_CHANNELS=$(get_cluman_channel ${host_name}) if [ $? -ne 0 ]; then - echo >&2 "${HB_CHANNELS}" + error_output "${HB_CHANNELS}" fi # Execute remote command to get service IP address SRV_IPADDRS=$(get_cluman_srvaddr ${host_name} \ ${TARGET_SVNAMES[i]}) if [ $? -ne 0 ]; then - echo >&2 "${SRV_IPADDRS}" + error_output "${SRV_IPADDRS}" return 0 fi @@ -828,7 +833,7 @@ get_svnames(){ let "i += 1" let "j += 1" else - echo >&2 "`basename $0`: get_svnames() error: Invalid"\ + error_output "get_svnames(): Invalid"\ "line in ${host_name}'s ${LUSTRE_PROC_DEVICES}"\ "- \"${line}\"!" return 1 @@ -869,7 +874,7 @@ get_devname() { if [ "${target_svname}" = "${MGS_SVNAME}" ]; then # Execute remote command to get the device name of mgs target ret_str=`${REMOTE} ${host_name} \ - "/sbin/findfs LABEL=${target_svname}" 2>&1` + "PATH=\$PATH:/sbin:/usr/sbin findfs LABEL=${target_svname}" 2>&1` if [ $? -ne 0 -a -n "${ret_str}" ]; then if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ] then @@ -926,7 +931,7 @@ get_devsize() { # Execute remote command to get the device size ret_str=`${REMOTE} ${host_name} \ - "/sbin/blockdev --getsize ${target_devname}" 2>&1` + "PATH=\$PATH:/sbin:/usr/sbin blockdev --getsize ${target_devname}" 2>&1` if [ $? -ne 0 -a -n "${ret_str}" ]; then echo "`basename $0`: get_devsize() error:" \ "remote command error: ${ret_str}" @@ -962,7 +967,7 @@ get_realdevname() { # Execute remote command to get the real device name ret_str=`${REMOTE} ${host_name} \ - "/sbin/losetup ${loop_dev}" 2>&1` + "PATH=\$PATH:/sbin:/usr/sbin losetup ${loop_dev}" 2>&1` if [ $? -ne 0 -a -n "${ret_str}" ]; then echo "`basename $0`: get_realdevname() error:" \ "remote command error: ${ret_str}" @@ -1038,7 +1043,7 @@ get_devnames(){ TARGET_DEVNAMES[i]=$(get_devname ${host_name} \ ${TARGET_SVNAMES[i]}) if [ $? -ne 0 ]; then - echo >&2 "${TARGET_DEVNAMES[i]}" + error_output "${TARGET_DEVNAMES[i]}" return 1 fi @@ -1048,7 +1053,7 @@ get_devnames(){ "target in ${host_name}." continue else - echo >&2 "`basename $0`: get_devname() error:"\ + error_output "get_devname():"\ "No device corresponding to target" \ "${TARGET_SVNAMES[i]} in ${host_name}!" return 1 @@ -1059,7 +1064,7 @@ get_devnames(){ TARGET_MNTPNTS[i]=$(get_mntpnt ${host_name} \ ${TARGET_DEVNAMES[i]}) if [ $? -ne 0 ]; then - echo >&2 "${TARGET_MNTPNTS[i]}" + error_output "${TARGET_MNTPNTS[i]}" return 1 fi @@ -1070,7 +1075,7 @@ get_devnames(){ TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \ ${TARGET_DEVNAMES[i]}) if [ $? -ne 0 ]; then - echo >&2 "${TARGET_DEVSIZES[i]}" + error_output "${TARGET_DEVSIZES[i]}" return 1 fi @@ -1078,7 +1083,7 @@ get_devnames(){ TARGET_DEVNAMES[i]=$(get_realdevname ${host_name} \ ${TARGET_DEVNAMES[i]}) if [ $? -ne 0 ]; then - echo >&2 "${TARGET_DEVNAMES[i]}" + error_output "${TARGET_DEVNAMES[i]}" return 1 fi fi @@ -1095,7 +1100,7 @@ is_target() { "ost") let "ret = $2 & LDD_F_SV_TYPE_OST";; "mgs") let "ret = $2 & LDD_F_SV_TYPE_MGS";; "*") - echo >&2 "`basename $0`: is_target() error: Invalid" \ + error_output "is_target(): Invalid" \ "target service type - \"$1\"!" return 1 ;; @@ -1262,8 +1267,8 @@ get_stripecount() { stripe_count=`echo ${ret_str} | awk '{print $1}'` fi - if [ -z "`echo ${stripe_count}|awk '/^[[:digit:]]/ {print $0}'`" ] - then + if [ "$stripe_count" != "-1" ] && \ + [ -z "`echo ${stripe_count}|awk '/^[[:digit:]]/ {print $0}'`" ]; then echo "`basename $0`: get_stripecount() error: can't" \ "get stripe count of ${target_fsname} in ${host_name}!" return 1 @@ -1359,7 +1364,7 @@ get_ldds(){ ${TUNEFS} --print --verbose ${TARGET_DEVNAMES[i]} 2>/dev/null") if [ -z "${flags}" ]; then - echo >&2 "`basename $0`: get_ldds() error: Invalid" \ + error_output "get_ldds(): Invalid" \ "ldd_flags of target ${TARGET_DEVNAMES[i]}" \ "in host ${host_name} - it's value is null!"\ "Check ${TUNEFS} command!" @@ -1376,7 +1381,7 @@ get_ldds(){ # Get the lustre target service type TARGET_DEVTYPES[i]=$(get_devtype ${flags}) if [ $? -ne 0 ]; then - echo >&2 "${TARGET_DEVTYPES[i]} From device" \ + error_output "${TARGET_DEVTYPES[i]} From device" \ "${TARGET_DEVNAMES[i]} in host ${host_name}!" return 1 fi @@ -1390,7 +1395,7 @@ get_ldds(){ # Get failover nids of the lustre target TARGET_FAILNIDS[i]=$(get_failnids "${params}") if [ $? -ne 0 ]; then - echo >&2 "${TARGET_FAILNIDS[i]} From device" \ + error_output "${TARGET_FAILNIDS[i]} From device" \ "${TARGET_DEVNAMES[i]} in host ${host_name}!" return 1 fi @@ -1398,7 +1403,7 @@ get_ldds(){ # Get other format options of the lustre target TARGET_FMTOPTS[i]=$(get_fmtopts ${TARGET_DEVNAMES[i]} ${host_name} "${params}") if [ $? -ne 0 ]; then - echo >&2 "${TARGET_FMTOPTS[i]}" + error_output "${TARGET_FMTOPTS[i]}" return 1 fi @@ -1422,7 +1427,7 @@ get_ldds(){ # Get the stripe count option stripecount_opt=$(get_stripecount_opt ${host_name} ${TARGET_FSNAMES[i]}) if [ $? -ne 0 ]; then - echo >&2 "${stripecount_opt}" + error_output "${stripecount_opt}" return 1 fi @@ -1453,8 +1458,8 @@ get_journalsize() { local ret_str # Execute remote command to get the journal inode number - ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ - ${target_devname} | grep 'Journal inode:'" 2>&1` + ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \ +debugfs -R 'stats -h' ${target_devname} | grep 'Journal inode:'" 2>&1` if [ $? -ne 0 -a -n "${ret_str}" ]; then echo "`basename $0`: get_journalsize() error:" \ "remote command error: ${ret_str}" @@ -1471,8 +1476,8 @@ get_journalsize() { fi # Execute remote command to get the journal size - ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R \ - 'stat <${journal_inode}>' ${target_devname}|grep '^User:'" 2>&1` + ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \ +debugfs -R 'stat <${journal_inode}>' ${target_devname}|grep '^User:'" 2>&1` if [ $? -ne 0 -a -n "${ret_str}" ]; then echo "`basename $0`: get_journalsize() error:" \ "remote command error: ${ret_str}" @@ -1531,8 +1536,8 @@ figure_journal_size() { declare -i journal_size # Execute remote command to get the block count - ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ - ${target_devname} | grep 'Block count:'" 2>&1` + ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \ +debugfs -R 'stats -h' ${target_devname} | grep 'Block count:'" 2>&1` if [ $? -ne 0 -a -n "${ret_str}" ]; then echo "`basename $0`: figure_journal_size() error:" \ "remote command error: ${ret_str}" @@ -1614,8 +1619,8 @@ get_ratio() { local ret_str # Execute remote command to get the inode count - ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ - ${target_devname} | grep 'Inode count:'" 2>&1` + ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \ +debugfs -R 'stats -h' ${target_devname} | grep 'Inode count:'" 2>&1` if [ $? -ne 0 -a -n "${ret_str}" ]; then echo "`basename $0`: get_ratio() error:" \ "remote command error: ${ret_str}" @@ -1632,8 +1637,8 @@ get_ratio() { fi # Execute remote command to get the block count - ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ - ${target_devname} | grep 'Block count:'" 2>&1` + ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \ +debugfs -R 'stats -h' ${target_devname} | grep 'Block count:'" 2>&1` if [ $? -ne 0 -a -n "${ret_str}" ]; then echo "`basename $0`: get_ratio() error:" \ "remote command error: ${ret_str}" @@ -1714,8 +1719,8 @@ get_isize() { local ret_str # Execute remote command to get the inode size - ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ - ${target_devname} | grep 'Inode size:'" 2>&1` + ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \ +debugfs -R 'stats -h' ${target_devname} | grep 'Inode size:'" 2>&1` if [ $? -ne 0 -a -n "${ret_str}" ]; then echo "`basename $0`: get_isize() error:" \ "remote command error: ${ret_str}" @@ -1851,7 +1856,7 @@ get_mkfsopts(){ TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \ ${TARGET_DEVNAMES[i]}) if [ $? -ne 0 ]; then - echo >&2 "${TARGET_DEVSIZES[i]}" + error_output "${TARGET_DEVSIZES[i]}" return 1 fi fi @@ -1860,7 +1865,7 @@ get_mkfsopts(){ journal_opt=$(get_J_opt ${host_name} ${TARGET_DEVNAMES[i]} \ ${TARGET_DEVSIZES[i]}) if [ $? -ne 0 ]; then - echo >&2 "${journal_opt}" + error_output "${journal_opt}" return 1 fi @@ -1876,7 +1881,7 @@ get_mkfsopts(){ ratio_opt=$(get_i_opt ${host_name} ${TARGET_DEVNAMES[i]} \ ${TARGET_DEVTYPES[i]} ${TARGET_DEVSIZES[i]}) if [ $? -ne 0 ]; then - echo >&2 "${ratio_opt}" + error_output "${ratio_opt}" return 1 fi @@ -1892,7 +1897,7 @@ get_mkfsopts(){ inode_size_opt=$(get_I_opt ${host_name} ${TARGET_DEVNAMES[i]} \ ${TARGET_DEVTYPES[i]} ${TARGET_FSNAMES[i]}) if [ $? -ne 0 ]; then - echo >&2 "${inode_size_opt}" + error_output "${inode_size_opt}" return 1 fi @@ -1956,7 +1961,7 @@ get_target_configs() { get_configs() { # Check the hostname if [ -z "$1" ]; then - echo >&2 "`basename $0`: get_configs() error:" \ + error_output "get_configs():" \ "Missing hostname!" return 1 fi diff --git a/lustre/scripts/lustre_start.in b/lustre/scripts/lustre_start.in new file mode 100644 index 0000000..7493bb8 --- /dev/null +++ b/lustre/scripts/lustre_start.in @@ -0,0 +1,372 @@ +#!/bin/bash + +# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: + +# +# lustre_start - start or stop multiple Lustre servers from a CSV file +# +# This script is used to parse each line of a CSV (Comma-Separated Value) file +# and execute remote command to start/stop the service on every Lustre server +# target that will be part of the Lustre cluster. +# +################################################################################ + +# Usage +usage() { + cat < + + This script is used to start or stop multiple Lustre servers from a + CSV file. + + Options: + -a select all the nodes from the CSV file to operate on + -w hostname,hostname,... + select the specified list of nodes (separated by commas) to + operate on rather than all the nodes in the CSV file + -x hostname,hostname,... + exclude the specified list of nodes (separated by commas) + -n no net - don't verify network connectivity and hostnames + in the cluster + -m pass "mount options" item in the CSV file to mount command line + -k stop the services on Lustre server targets + -v verbose mode + -h help + CSV file a comma-separated value file that contains configuration + parameters for each target in a Lustre cluster + + Please refer to "lustre_config -h" for the description of CSV file formats. + +EOF +} + +# Get the library of functions +. @scriptlibdir@/lc_common + +SPECIFY_MNTOPTS=false +STOP_SERVICE=false +# Get and check the positional parameters +while getopts "aw:x:nmkhv" OPTION; do + case $OPTION in + a) + [ -z "$SPECIFIED_NODELIST" ] && [ -z "$EXCLUDED_NODELIST" ] \ + && USE_ALLNODES=true + NODELIST_OPT="$NODELIST_OPT -a" + ;; + w) + USE_ALLNODES=false + SPECIFIED_NODELIST=$OPTARG + NODELIST_OPT="$NODELIST_OPT -w $SPECIFIED_NODELIST" + ;; + x) + USE_ALLNODES=false + EXCLUDED_NODELIST=$OPTARG + NODELIST_OPT="$NODELIST_OPT -x $EXCLUDED_NODELIST" + ;; + n) + VERIFY_CONNECT=false + ;; + m) + SPECIFY_MNTOPTS=true + ;; + k) + STOP_SERVICE=true + ;; + h) + usage + exit 0 + ;; + v) + VERBOSE_OPT="-v" + VERBOSE_OUTPUT=true + ;; + ?) + usage 1>&2 + exit 1 + ;; + esac +done + +# Toss out the parameters we've already processed +shift $((OPTIND - 1)) + +# Here we expect the CSV file +if [ $# -eq 0 ]; then + error_output "Missing CSV file!" + usage 1>&2 + exit 1 +fi + +CSV_FILE=$1 + +# get_fstab_mntopts host_name device_name mount_point +# Get the mount options from the /etc/fstab file +get_fstab_mntopts() { + local host_name=$1 + local dev_name=$2 + local mnt_pnt=$3 + + local mnt_opts="" + + if [ -z "$host_name" -o -z "$dev_name" -o -z "$mnt_pnt" ]; then + echo "get_fstab_mntopts(): Missing argument!" + return 1 + fi + + # Execute remote command to get the mount options from /etc/fstab file + mnt_opts=$($REMOTE $host_name "grep -w ^$dev_name /etc/fstab | \ +grep -w $mnt_pnt | awk '{print \$4}'" 2>/dev/null) + + mnt_opts=${mnt_opts//$host_name: /} + + echo $mnt_opts + return 0 +} + +# Start the service on one Lustre server target +start_service() { + declare -i i=$1 + shift + local extra_mntopts="$*" + local mntopts="" + + # Get mount options + if $SPECIFY_MNTOPTS; then + # Use the "mount options" item from the CSV file + [ -n "${MOUNT_OPTIONS[i]}" ] && mntopts=${MOUNT_OPTIONS[i]} + else + # Do not use the "mount options" item from the CSV file + mntopts=$(get_fstab_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]} \ + ${MOUNT_POINT[i]}) + [ ${PIPESTATUS[0]} -ne 0 ] && error_output "$mntopts" && return 1 + fi + + [ -n "$mntopts" ] && mntopts="-o $mntopts" + [ -n "$extra_mntopts" ] && mntopts="$mntopts $extra_mntopts" + # Strip of any leading space + mntopts=${mntopts# } + + # Execute remote command to start the service + verbose_output "Mounting Lustre ${DEVICE_TYPE[i]} target"\ + "${DEVICE_NAME[i]} (opts: $mntopts) on ${HOST_NAME[i]}:${MOUNT_POINT[i]}..." + $REMOTE ${HOST_NAME[i]} "PATH=\$PATH:/sbin:/usr/sbin +error() { set +x; echo \"ERROR: \$2: \$1\"; echo \"XXRETCODE:\$1\"; exit \$1; } +mkdir -p ${MOUNT_POINT[i]} || \\ + error \${PIPESTATUS[0]} \"failed to mkdir ${MOUNT_POINT[i]}\" +mount -t $FS_TYPE $mntopts ${DEVICE_NAME[i]} ${MOUNT_POINT[i]} || \\ + error \${PIPESTATUS[0]} \\ + \"failed to mount ${DEVICE_NAME[i]} on host ${HOST_NAME[i]}\"" + return ${PIPESTATUS[0]} +} + +# Stop the service on one Lustre server target +stop_service() { + declare -i i=$1 + + # Execute remote command to stop the service + verbose_output "Unmounting Lustre ${DEVICE_TYPE[i]} target"\ + "${DEVICE_NAME[i]} on ${HOST_NAME[i]}:${MOUNT_POINT[i]}..." + $REMOTE ${HOST_NAME[i]} "PATH=\$PATH:/sbin:/usr/sbin +error() { set +x; echo \"ERROR: \$2: \$1\"; echo \"XXRETCODE:\$1\"; exit \$1; } +if grep -q \" ${MOUNT_POINT[i]} \" /proc/mounts; then + umount -d -f ${MOUNT_POINT[i]} || \\ + error \${PIPESTATUS[0]} \\ + \"failed to unmount ${DEVICE_NAME[i]} on host ${HOST_NAME[i]}\" +else + echo \"${DEVICE_NAME[i]} was not mounted on\"\\ + \"${HOST_NAME[i]}:${MOUNT_POINT[i]}\" +fi" + return ${PIPESTATUS[0]} +} + +# mass_op op_type target_type +# Start/stop the services on Lustre server targets in parallel +mass_op() { + local op_type=$1 + local target_type=$2 + + local op_func + declare -i i + declare -i pid_num=0 + declare -a REMOTE_PID + local RC=0 + + if [ -z "$op_type" -o -z "$target_type" ]; then + error_output "mass_op(): Missing argument!" + return 1 + fi + + case "$op_type" in + "start") op_func=start_service;; + "stop") op_func=stop_service;; + *) error_output "mass_op(): Invalid op type \"$op_type\"!" && return 1;; + esac + + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + if [ "${DEVICE_TYPE[i]}" = "$target_type" ] \ + && [[ "${FORMAT_OPTIONS[i]}" != *noformat* ]]; then + eval "$op_func $i &" + REMOTE_PID[$pid_num]=$! + let pid_num=$pid_num+1 + fi + done + + for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do + wait ${REMOTE_PID[${pid_num}]} + local RC1=${PIPESTATUS[0]} + [ $RC1 -ne 0 ] && RC=$RC1 + done + + [ $RC -ne 0 ] && return $RC + + return 0 +} + +# Unload the modules to make cleanup +unload_modules() { + local command + local host + local host_list + + host_list=$(comma_list "${HOST_NAME[@]}") + [ -z "$host_list" ] && return 0 + + command="PATH=\$PATH:/sbin:/usr/sbin +if grep -q libcfs /proc/modules; then + lctl net down 1>/dev/null 2>&1 + lustre_rmmod +fi" + + if is_pdsh; then + $REMOTE $host_list "$command" + else + for host in ${host_list//,/ }; do + $REMOTE $host "$command" + done + fi +} + +# Start the services on Lustre server targets +mass_start() { + declare -i i + local combo_mgs_mdt=false + + if [ ${#HOST_NAME[@]} -eq 0 ]; then + verbose_output "There are no Lustre targets specified." + return 0 + fi + + # Start lnet network on the MGS node + start_mgs_lnet || return ${PIPESTATUS[0]} + + local checked_hosts="" + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + host_in_hostlist ${HOST_NAME[i]} $checked_hosts && continue + if ! is_mgs_node ${HOST_NAME[i]}; then + # Add module options to the module configuration file + add_module_options $i ${HOST_NAME[i]} || return ${PIPESTATUS[0]} + + # Check lnet networks + check_lnet $i || return ${PIPESTATUS[0]} + + checked_hosts="$checked_hosts,${HOST_NAME[i]}" + fi + done + + # Start MGS or the MGS service on combo MGS/MDT (with "-o nosvc -n" options) + if [ -n "${MGS_NODENAME[0]}" ]; then + local idx=${MGS_IDX[0]} + if [ "${DEVICE_TYPE[idx]#*mdt*}" != "${DEVICE_TYPE[idx]}" ]; then + # Combo MGS/MDT + combo_mgs_mdt=true + start_service ${MGS_IDX[0]} "-o nosvc -n" || return ${PIPESTATUS[0]} + else + start_service ${MGS_IDX[0]} || return ${PIPESTATUS[0]} + fi + fi + + # Start OST(s) + mass_op "start" "ost" || return ${PIPESTATUS[0]} + + # Start the MDT service on combo MGS/MDT (with "-o nomgs" option) + if $combo_mgs_mdt; then + start_service ${MGS_IDX[0]} "-o nomgs" || return ${PIPESTATUS[0]} + fi + + # Start MDT(s) + mass_op "start" "mdt" || return ${PIPESTATUS[0]} + + verbose_output "Success on all Lustre targets!" + return 0 +} + +# Stop the services on Lustre server targets +mass_stop() { + declare -i i + + if [ ${#HOST_NAME[@]} -eq 0 ]; then + verbose_output "There are no Lustre targets specified." + return 0 + fi + + # Stop MDT(s) + mass_op "stop" "mdt" || return ${PIPESTATUS[0]} + + # Stop the MDT service on combo MGS/MDT + if [ -n "${MGS_NODENAME[0]}" ]; then + local idx=${MGS_IDX[0]} + if [ "${DEVICE_TYPE[idx]#*mdt*}" != "${DEVICE_TYPE[idx]}" ]; then + # Combo MGS/MDT + stop_service ${MGS_IDX[0]} || return ${PIPESTATUS[0]} + fi + fi + + # Stop OST(s) + mass_op "stop" "ost" || return ${PIPESTATUS[0]} + + # Stop MGS or the MGS service on combo MGS/MDT + if [ -n "${MGS_NODENAME[0]}" ]; then + stop_service ${MGS_IDX[0]} || return ${PIPESTATUS[0]} + fi + + unload_modules + + return 0 +} + +#********************************* Main Flow **********************************# + +# Check the CSV file +check_file $CSV_FILE || exit ${PIPESTATUS[0]} + +# Get the list of nodes to be operated on +NODES_TO_USE=$(get_nodelist) || error_exit ${PIPESTATUS[0]} "$NODES_TO_USE" + +# Check the node list +check_nodelist $NODES_TO_USE || exit ${PIPESTATUS[0]} + +# Check the network connectivity and hostnames +if $VERIFY_CONNECT; then + verbose_output "Checking the cluster network connectivity and hostnames..." + $VERIFY_CLUSTER_NET $NODELIST_OPT $VERBOSE_OPT $CSV_FILE || \ + exit ${PIPESTATUS[0]} + verbose_output "Check the cluster network connectivity and hostnames OK!" +fi + +# Configure the Lustre cluster +echo "$(basename $0): ******** Lustre cluster configuration BEGIN ********" + +get_lustre_items $CSV_FILE || exit ${PIPESTATUS[0]} + +check_mgs || exit ${PIPESTATUS[0]} + +if ! $STOP_SERVICE; then + mass_start || exit ${PIPESTATUS[0]} +else + mass_stop || exit ${PIPESTATUS[0]} +fi + +echo "$(basename $0): ******** Lustre cluster configuration END **********" + +exit 0 -- 1.8.3.1