From 7277e4b80c3ec0b299cf7f3103c0df194a047948 Mon Sep 17 00:00:00 2001 From: yujian Date: Mon, 29 May 2006 13:32:00 +0000 Subject: [PATCH] b=9853 1) add lnet network verification 2) use ssh instead of pdsh 3) add Lustre targets to /etc/fstab 4) generate Heartbeat v1 + Mon and Heartbeat v2 configuration files according to new steps described in bug10474 and bug10370 --- lustre/utils/cluster_scripts/cluster_config.sh | 867 ++++++++++++++++++------- 1 file changed, 643 insertions(+), 224 deletions(-) diff --git a/lustre/utils/cluster_scripts/cluster_config.sh b/lustre/utils/cluster_scripts/cluster_config.sh index 818d8846b..8dce887 100755 --- a/lustre/utils/cluster_scripts/cluster_config.sh +++ b/lustre/utils/cluster_scripts/cluster_config.sh @@ -3,12 +3,12 @@ # cluster_config.sh - configure multiple lustre servers from a csv file # # This script is used to parse each line of a spreadsheet (csv file) and -# execute remote pdsh commands to format (mkfs.lustre) every Lustre target +# execute remote commands to format (mkfs.lustre) every Lustre target # that will be part of the Lustre cluster. # # In addition, it can also verify the network connectivity and hostnames in # the cluster and produce High-Availability software configurations for -# Heartbeat or CluManager +# Heartbeat or CluManager. # ################################################################################ @@ -16,33 +16,46 @@ usage() { cat >&2 < +Usage: `basename $0` [-t HAtype] [-n] [-f] [-m] [-h] [-v] + + This script is used to configure multiple lustre servers from a csv file. -t HAtype produce High-Availability software configurations The argument following -t is used to indicate the High- Availability software type. The HA software types which are currently supported are: hbv1 (Heartbeat v1), hbv2 - (Heartbeat v2) and clumanager (CluManager). + (Heartbeat v2) and cluman (CluManager). -n don't verify network connectivity and hostnames in the cluster -f force-format the Lustre targets using --reformat option + -m modify /etc/fstab to add the new Lustre targets -h help and examples -v verbose mode csv file a spreadsheet that contains configuration parameters (separated by commas) for each target in a Lustre cl- uster + EOF exit 1 } # Samples sample() { - cat >&2 <&2 $"`basename $0`: check_file() error: Lack argument"\ + echo >&2 $"`basename $0`: check_file() error: Missing argument"\ "for function check_file()!" return 1 fi @@ -216,7 +261,7 @@ check_file() { parse_line() { # Check argument if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: parse_line() error: Lack argument"\ + echo >&2 $"`basename $0`: parse_line() error: Missing argument"\ "for function parse_line()!" return 1 fi @@ -231,9 +276,7 @@ parse_line() { LINE=$* # Initialize the CONFIG_ITEM array - for ((i = 0; i < ${#CONFIG_ITEM[@]}; i++)); do - CONFIG_ITEM[i]=$"" - done + unset CONFIG_ITEM # Get the length of the line length=${#LINE} @@ -245,7 +288,8 @@ parse_line() { case "${TMP_LETTER}" in ",") - if [ ${s_quote_flag} -eq 1 ] || [ ${d_quote_flag} -eq 1 ]; then + if [ ${s_quote_flag} -eq 1 -o ${d_quote_flag} -eq 1 ] + then CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER} else i=$i+1 @@ -286,51 +330,94 @@ parse_line() { return 0 } -# Check the elements required for OSTs, MDTs and MGS +# Check the items required for OSTs, MDTs and MGS # -# When formatting an OST, the following elements: hostname, module_opts, +# When formatting an OST, the following items: hostname, module_opts, # device name, device type and mgs nids, cannot have null value. # -# When formatting an MDT or MGS, the following elements: hostname, +# When formatting an MDT or MGS, the following items: hostname, # module_opts, device name and device type, cannot have null value. -check_element() { +check_item() { + # Check argument + if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: check_item() error: Missing argument"\ + "for function check_item()!" + return 1 + fi + + declare -i i=$1 + # Check hostname, module_opts, device name and device type - if [ -z "${HOST_NAME}" ]||[ -z "${MODULE_OPTS}" ]||[ -z "${DEVICE_NAME}" ]\ - ||[ -z "${DEVICE_TYPE}" ]; then - echo >&2 $"`basename $0`: check_element() error: Some required"\ - "element has null value! Check hostname, module_opts,"\ + if [ -z "${HOST_NAME[i]}" ]||[ -z "${MODULE_OPTS[i]}" ]\ + ||[ -z "${DEVICE_NAME[i]}" ]||[ -z "${DEVICE_TYPE[i]}" ]; then + echo >&2 $"`basename $0`: check_item() error: Some required"\ + "item has null value! Check hostname, module_opts,"\ "device name and device type!" return 1 fi # Check mgs nids - if [ "${DEVICE_TYPE}" = "ost" ]&&[ -z "${MGS_NIDS}" ]; then - echo >&2 $"`basename $0`: check_element() error: OST's mgs nids"\ - "element has null value!" + if [ "${DEVICE_TYPE[i]}" = "ost" ]&&[ -z "${MGS_NIDS[i]}" ]; then + echo >&2 $"`basename $0`: check_item() error: OST's mgs nids"\ + "item has null value!" return 1 fi + # Check mount point + if ${MODIFY_FSTAB} && [ -z "${MOUNT_POINT[i]}" ]; then + echo >&2 $"`basename $0`: check_item() error: mount"\ + "point item of target ${DEVICE_NAME[i]} has null value!" + return 1 + fi + return 0 } -# Check the elements required for HA configuration -check_ha_element() { +# Check the items required for HA configuration +check_ha_item() { if [ -z "${HATYPE_OPT}" ]; then return 0 fi - # Check service IP element - if [ -z "${SRV_IPADDRS}" ]; then - echo >&2 $"`basename $0`: check_ha_element() error: Service IP"\ - "element has null value!" + # Check argument + if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: check_ha_item() error: Missing"\ + "argument for function check_ha_item()!" + return 1 + fi + + declare -i i=$1 + + [ -z "${HB_CHANNELS[i]}" ] && [ -z "${SRV_IPADDRS[i]}" ] \ + && [ -z "${HB_OPTIONS[i]}" ] && return 0 + + # Check mount point + if [ -z "${MOUNT_POINT[i]}" ]; then + echo >&2 $"`basename $0`: check_ha_item() error: mount"\ + "point item of target ${DEVICE_NAME[i]} has null value!" + return 1 + fi + + # Check failover nodes + if [ -z "${FAILOVERS[i]}" ]; then + echo >&2 $"`basename $0`: check_ha_item() error:"\ + "failover item of host ${HOST_NAME[i]} has null value!" + return 1 + fi + + # Check service IP item + if [ "${HATYPE_OPT}" = "${HATYPE_HBV1}" -a -z "${SRV_IPADDRS[i]}" ] + then + echo >&2 $"`basename $0`: check_ha_item() error:"\ + "service IP item of host ${HOST_NAME[i]} has null value!" return 1 fi - # Check heartbeat channel element - if [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" -a -z "${HB_CHANNELS}" ] + # Check heartbeat channel item + if [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" -a -z "${HB_CHANNELS[i]}" ] then - echo >&2 $"`basename $0`: check_ha_element() error: Heartbeat"\ - "channel element has null value!" + echo >&2 $"`basename $0`: check_ha_item() error: Heartbeat"\ + "channel item of host ${HOST_NAME[i]} has null value!" return 1 fi @@ -340,20 +427,32 @@ check_ha_element() { # Check the number of MGS. # There should be no more than one MGS specified in the entire csv file. check_mgs() { + # Check argument + if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: check_mgs() error: Missing argument"\ + "for function check_mgs()!" + return 1 + fi + + declare -i i=$1 + # Check the number of explicit MGS - if [ "${DEVICE_TYPE#*mgs*}" != "${DEVICE_TYPE}" ]; then - if [ "${EXP_MGS}" = "${HOST_NAME}" ]; then + if [ "${DEVICE_TYPE[i]#*mgs*}" != "${DEVICE_TYPE[i]}" ]; then + if [ "${EXP_MGS}" = "${HOST_NAME[i]}" ]; then echo >&2 $"`basename $0`: check_mgs() error: More than"\ "one explicit MGS in the csv file!" return 1 fi if [ -z "${EXP_MGS}" ]; then - EXP_MGS=${HOST_NAME} + EXP_MGS=${HOST_NAME[i]} + MGS_NODENAME=${EXP_MGS} + MGS_IDX=$i fi - if [ "${EXP_MGS}" != "${HOST_NAME}" ]; then - if [ "${FAILOVERS#*$EXP_MGS*}" = "${FAILOVERS}" ]; then + if [ "${EXP_MGS}" != "${HOST_NAME[i]}" ]; then + if [ "${FAILOVERS[i]#*$EXP_MGS*}" = "${FAILOVERS[i]}" ] + then echo >&2 $"`basename $0`: check_mgs() error:"\ "More than one explicit MGS in the"\ "csv file!" @@ -368,19 +467,22 @@ check_mgs() { fi # Check the number of implicit MGS - if [ "${DEVICE_TYPE}" = "mdt" ]&&[ -z "${MGS_NIDS}" ]; then - if [ "${IMP_MGS}" = "${HOST_NAME}" ]; then + if [ "${DEVICE_TYPE[i]}" = "mdt" ]&&[ -z "${MGS_NIDS[i]}" ]; then + if [ "${IMP_MGS}" = "${HOST_NAME[i]}" ]; then echo >&2 $"`basename $0`: check_mgs() error: More than"\ "one implicit MGS in the csv file!" return 1 fi if [ -z "${IMP_MGS}" ]; then - IMP_MGS=${HOST_NAME} + IMP_MGS=${HOST_NAME[i]} + MGS_NODENAME=${IMP_MGS} + MGS_IDX=$i fi - if [ "${IMP_MGS}" != "${HOST_NAME}" ]; then - if [ "${FAILOVERS#*$IMP_MGS*}" = "${FAILOVERS}" ]; then + if [ "${IMP_MGS}" != "${HOST_NAME[i]}" ]; then + if [ "${FAILOVERS[i]#*$IMP_MGS*}" = "${FAILOVERS[i]}" ] + then echo >&2 $"`basename $0`: check_mgs() error:"\ "More than one implicit MGS in the"\ "csv file!" @@ -405,10 +507,20 @@ check_mgs() { # Construct the command line of mkfs.lustre construct_mkfs_cmdline() { - MKFS_CMD=${CMD_PATH}$"mkfs.lustre " + # Check argument + if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\ + "Missing argument for function"\ + "construct_mkfs_cmdline()!" + return 1 + fi + + declare -i i=$1 + + MKFS_CMD=${MKFS}$" " MKFS_CMD=${MKFS_CMD}${REFORMAT_OPTION} - case "${DEVICE_TYPE}" in + case "${DEVICE_TYPE[i]}" in "ost") MKFS_CMD=${MKFS_CMD}$"--ost " ;; @@ -418,92 +530,112 @@ construct_mkfs_cmdline() { "mgs") MKFS_CMD=${MKFS_CMD}$"--mgs " ;; - "mdt|mgs") - MKFS_CMD=${MKFS_CMD}$"--mdt --mgs " - ;; - "mgs|mdt") + "mdt|mgs" | "mgs|mdt") MKFS_CMD=${MKFS_CMD}$"--mdt --mgs " ;; *) echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\ - "Invalid device type - \"${DEVICE_TYPE}\"" + "Invalid device type - \"${DEVICE_TYPE[i]}\"!" return 1 ;; esac - if [ -n "${FS_NAME}" ]; then - MKFS_CMD=${MKFS_CMD}$"--fsname="${FS_NAME}$" " + if [ -n "${FS_NAME[i]}" ]; then + MKFS_CMD=${MKFS_CMD}$"--fsname="${FS_NAME[i]}$" " fi - if [ -n "${MGS_NIDS}" ]; then - MGS_NIDS=`echo "${MGS_NIDS}" | sed 's/^"//' | sed 's/"$//'` - MKFS_CMD=${MKFS_CMD}$"--mgsnode="${MGS_NIDS}$" " + if [ -n "${MGS_NIDS[i]}" ]; then + MGS_NIDS[i]=`echo "${MGS_NIDS[i]}" | sed 's/^"//' | sed 's/"$//'` + MKFS_CMD=${MKFS_CMD}$"--mgsnode="${MGS_NIDS[i]}$" " fi - if [ -n "${INDEX}" ]; then - MKFS_CMD=${MKFS_CMD}$"--index="${INDEX}$" " + if [ -n "${INDEX[i]}" ]; then + MKFS_CMD=${MKFS_CMD}$"--index="${INDEX[i]}$" " fi - if [ -n "${FORMAT_OPTIONS}" ]; then - FORMAT_OPTIONS=`echo "${FORMAT_OPTIONS}" | sed 's/^"//' | sed 's/"$//'` - MKFS_CMD=${MKFS_CMD}${FORMAT_OPTIONS}$" " + if [ -n "${FORMAT_OPTIONS[i]}" ]; then + FORMAT_OPTIONS[i]=`echo "${FORMAT_OPTIONS[i]}" | sed 's/^"//' | sed 's/"$//'` + MKFS_CMD=${MKFS_CMD}${FORMAT_OPTIONS[i]}$" " fi - if [ -n "${MKFS_OPTIONS}" ]; then - MKFS_OPTIONS=`echo "${MKFS_OPTIONS}" | sed 's/^"//' | sed 's/"$//'` - MKFS_CMD=${MKFS_CMD}$"--mkfsoptions="$"\""${MKFS_OPTIONS}$"\""$" " + if [ -n "${MKFS_OPTIONS[i]}" ]; then + MKFS_OPTIONS[i]=`echo "${MKFS_OPTIONS[i]}" | sed 's/^"//' | sed 's/"$//'` + MKFS_CMD=${MKFS_CMD}$"--mkfsoptions="$"\""${MKFS_OPTIONS[i]}$"\""$" " fi - if [ -n "${MOUNT_OPTIONS}" ]; then - MOUNT_OPTIONS=`echo "${MOUNT_OPTIONS}" | sed 's/^"//' | sed 's/"$//'` - MKFS_CMD=${MKFS_CMD}$"--mountfsoptions="$"\""${MOUNT_OPTIONS}$"\""$" " + if [ -n "${MOUNT_OPTIONS[i]}" ]; then + MOUNT_OPTIONS[i]=`echo "${MOUNT_OPTIONS[i]}" | sed 's/^"//' | sed 's/"$//'` + MKFS_CMD=${MKFS_CMD}$"--mountfsoptions="$"\""${MOUNT_OPTIONS[i]}$"\""$" " fi - if [ -n "${FAILOVERS}" ]; then - FAILOVERS=`echo "${FAILOVERS}" | sed 's/^"//' | sed 's/"$//'` - MKFS_CMD=${MKFS_CMD}$"--failnode="${FAILOVERS}$" " + if [ -n "${FAILOVERS[i]}" ]; then + FAILOVERS[i]=`echo "${FAILOVERS[i]}" | sed 's/^"//' | sed 's/"$//'` + MKFS_CMD=${MKFS_CMD}$"--failnode="${FAILOVERS[i]}$" " fi - MKFS_CMD=${MKFS_CMD}${DEVICE_NAME} + MKFS_CMD=${MKFS_CMD}${DEVICE_NAME[i]} return 0 } # Get all the node names in this failover group get_nodenames() { - declare -i idx - local failover_nids failover_nid first_nid + # Check argument + if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: get_nodenames() error: Missing"\ + "argument for function get_nodenames()!" + return 1 + fi + + declare -i i=$1 + declare -i idx + local nids_str failover_nids failover_nid first_nid - NODE_NAMES[0]=${HOST_NAME} + # Initialize the NODE_NAMES array + unset NODE_NAMES - failover_nids=`echo ${FAILOVERS}|awk '{split($FAILOVERS, a, ":")}\ - END {for (i in a) print a[i]}'` + NODE_NAMES[0]=${HOST_NAME[i]} - # XXX: Suppose the first nid of one failover node contains the node name + idx=0 + nids_str=${FAILOVERS[i]} + failover_nids=`echo ${nids_str}|awk '{split($nids_str, a, ":")}\ + END {for (idx in a) print a[idx]}'` + + # FIXME: Suppose the first nid of one failover node contains node name idx=1 - for failover_nid in ${failover_nids} - do + for failover_nid in ${failover_nids} + do first_nid=`echo ${failover_nid} | awk -F, '{print $1}'` NODE_NAMES[idx]=${first_nid%@*} idx=$idx+1 - done + done - return 0 + return 0 } -# Produce HA software's configuration files -gen_ha_config() { - local cmd_line - declare -i idx +# Verify whether the format line has HA items +is_ha_line() { + declare -i i=$1 - if [ -z "${HATYPE_OPT}" ]; then - return 0 + if [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then + [ -n "${HB_CHANNELS[i]}" ] && return 0 + else + [ -n "${SRV_IPADDRS[i]}" ] && return 0 fi + return 1 +} + +# Produce HA software's configuration files +gen_ha_config() { + declare -i i=$1 + declare -i idx + local cmd_line + # Prepare parameters # Hostnames option - HOSTNAME_OPT=${HOST_NAME} + HOSTNAME_OPT=${HOST_NAME[i]} - if ! get_nodenames; then + if ! get_nodenames $i; then return 1 fi @@ -511,171 +643,441 @@ gen_ha_config() { HOSTNAME_OPT=${HOSTNAME_OPT}$":"${NODE_NAMES[idx]} done - # Target device option - TARGET_TYPE=${DEVICE_TYPE} - if [ "${TARGET_TYPE}" = "mdt|mgs" -o "${TARGET_TYPE}" = "mgs|mdt" ] - then - TARGET_TYPE=$"mgs_mdt" - fi - TARGET_OPT=${DEVICE_NAME}:${TARGET_TYPE} - # Service IP address option - SRVADDR_OPT=${SRV_IPADDRS} + SRVADDR_OPT=${SRV_IPADDRS[i]} # Heartbeat channels option - HBCHANNEL_OPT=$"\""${HB_CHANNELS}$"\"" + HBCHANNEL_OPT=$"\""${HB_CHANNELS[i]}$"\"" # Heartbeat options option - HBOPT_OPT=$"\""${HB_OPTIONS}$"\"" + HBOPT_OPT=$"\""${HB_OPTIONS[i]}$"\"" + + # Target devices option + DEVICE_OPT=" -d "${TARGET_OPTS[0]} + for ((idx = 1; idx < ${#TARGET_OPTS[@]}; idx++)); do + DEVICE_OPT=${DEVICE_OPT}" -d "${TARGET_OPTS[idx]} + done # Construct the generation script command line case "${HATYPE_OPT}" in "${HATYPE_HBV1}"|"${HATYPE_HBV2}") # Heartbeat cmd_line=${GEN_HB_CONFIG}$" -r ${HATYPE_OPT} -n ${HOSTNAME_OPT}" - cmd_line=${cmd_line}$" -d ${TARGET_OPT} -c ${HBCHANNEL_OPT}" - cmd_line=${cmd_line}$" -s ${SRVADDR_OPT}"${VERBOSE_OPT} + cmd_line=${cmd_line}$" -c ${HBCHANNEL_OPT}"${DEVICE_OPT}${VERBOSE_OPT} - if [ -n "${HB_OPTIONS}" ]; then + if [ -n "${SRV_IPADDRS[i]}" ]; then + cmd_line=${cmd_line}$" -s ${SRVADDR_OPT}" + fi + + if [ -n "${HB_OPTIONS[i]}" ]; then cmd_line=${cmd_line}$" -o ${HBOPT_OPT}" fi ;; "${HATYPE_CLUMGR}") # CluManager cmd_line=${GEN_CLUMGR_CONFIG}$" -n ${HOSTNAME_OPT}" - cmd_line=${cmd_line}$" -d ${TARGET_OPT} -s ${SRVADDR_OPT}" - cmd_line=${cmd_line}${VERBOSE_OPT} + cmd_line=${cmd_line}$" -s ${SRVADDR_OPT}"${DEVICE_OPT}${VERBOSE_OPT} if [ -n "${HBCHANNEL_OPT}" ]; then cmd_line=${cmd_line}$" -c ${HBCHANNEL_OPT}" fi - if [ -n "${HB_OPTIONS}" ]; then + if [ -n "${HB_OPTIONS[i]}" ]; then cmd_line=${cmd_line}$" -o ${HBOPT_OPT}" fi ;; esac # Execute script to generate HA software's configuration files + verbose_output "Generating HA software's configurations in"\ + "${HOST_NAME[i]} failover group..." verbose_output "${cmd_line}" eval $(echo "${cmd_line}") if [ $? -ne 0 ]; then return 1 fi + verbose_output "Generate HA software's configurations in"\ + "${HOST_NAME[i]} failover group OK" return 0 } -# Execute pdsh commands to add lnet options lines to remote nodes' -# modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets -# -# If -t option exists, then also to produce the HA software's -# configuration files -mass_config() { +# Configure HA software +config_ha() { + if [ -z "${HATYPE_OPT}" ]; then + return 0 + fi + + declare -i i j k + declare -i prim_idx # Index for PRIM_HOSTNAMES array + declare -i target_idx # Index for TARGET_OPTS and HOST_INDEX arrays + + declare -a PRIM_HOSTNAMES # Primary hostnames in all the failover + # groups in the lustre cluster + declare -a HOST_INDEX # Indices for the same node in all the + # format lines in the csv file + local prim_host + + # Initialize the PRIM_HOSTNAMES array + prim_idx=0 + unset PRIM_HOSTNAMES + + # Get failover groups and generate HA configuration files + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + prim_host=${HOST_NAME[i]} + + for ((j = 0; j < ${#PRIM_HOSTNAMES[@]}; j++)); do + [ "${prim_host}" = "${PRIM_HOSTNAMES[j]}" ] && continue 2 + done + + target_idx=0 + unset HOST_INDEX + unset TARGET_OPTS + for ((k = 0; k < ${#HOST_NAME[@]}; k++)); do + if [ "${prim_host}" = "${HOST_NAME[k]}" ] && is_ha_line "${k}" + then + HOST_INDEX[target_idx]=$k + TARGET_OPTS[target_idx]=${DEVICE_NAME[k]}:${MOUNT_POINT[k]} + target_idx=$(( target_idx + 1 )) + fi + done + + if [ ${#TARGET_OPTS[@]} -ne 0 ]; then + PRIM_HOSTNAMES[prim_idx]=${prim_host} + prim_idx=$(( prim_idx + 1 )) + + if ! gen_ha_config ${HOST_INDEX[0]}; then + return 1 + fi + fi + done + + if [ ${#PRIM_HOSTNAMES[@]} -eq 0 ]; then + verbose_output "There are no HA configuration items in the"\ + "csv file. No HA configuration files are generated!" + fi + + rm -rf ${TMP_DIRS} + return 0 +} + + +# Get all the items in the csv file and do some checks. +get_items() { # Check argument if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: mass_config() error: Lack argument"\ - "for function mass_config()!" + echo >&2 $"`basename $0`: get_items() error: Missing argument"\ + "for function get_items()!" return 1 fi CSV_FILE=$1 - local LINE COMMAND - declare -a PDSH_PID - declare -a PDSH_CMD - declare -i line_num=1 - declare -i pid_num=0 + local LINE + declare -i line_num=0 + declare -i idx=0 while read -r LINE; do + line_num=${line_num}+1 + # verbose_output "Parsing line ${line_num}: $LINE" + # Get rid of the empty line - if [ -z "`echo ${LINE} | awk '/[[:alnum:]]/{print $0}'`" ]; then - line_num=${line_num}+1 + if [ -z "`echo ${LINE}|awk '/[[:alnum:]]/ {print $0}'`" ]; then continue fi # Get rid of the comment line - if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ]; then - line_num=${line_num}+1 + if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] + then continue fi # Parse the config line into CONFIG_ITEM if ! parse_line $LINE; then + echo >&2 $"`basename $0`: parse_line() error: Occurred"\ + "on line ${line_num} in ${CSV_FILE}: $LINE" + return 1 + fi + + HOST_NAME[idx]=${CONFIG_ITEM[0]} + MODULE_OPTS[idx]=${CONFIG_ITEM[1]} + DEVICE_NAME[idx]=${CONFIG_ITEM[2]} + MOUNT_POINT[idx]=${CONFIG_ITEM[3]} + DEVICE_TYPE[idx]=${CONFIG_ITEM[4]} + FS_NAME[idx]=${CONFIG_ITEM[5]} + MGS_NIDS[idx]=${CONFIG_ITEM[6]} + INDEX[idx]=${CONFIG_ITEM[7]} + FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]} + MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]} + MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]} + FAILOVERS[idx]=${CONFIG_ITEM[11]} + + HB_CHANNELS[idx]=${CONFIG_ITEM[12]} + SRV_IPADDRS[idx]=${CONFIG_ITEM[13]} + HB_OPTIONS[idx]=${CONFIG_ITEM[14]} + + # Check some required items for formatting target + if ! check_item $idx; then + echo >&2 $"`basename $0`: check_item() error:"\ + "Occurred on line ${line_num} in ${CSV_FILE}." return 1 fi - HOST_NAME=${CONFIG_ITEM[0]} - MODULE_OPTS=${CONFIG_ITEM[1]} - DEVICE_NAME=${CONFIG_ITEM[2]} - DEVICE_TYPE=${CONFIG_ITEM[3]} - FS_NAME=${CONFIG_ITEM[4]} - MGS_NIDS=${CONFIG_ITEM[5]} - INDEX=${CONFIG_ITEM[6]} - FORMAT_OPTIONS=${CONFIG_ITEM[7]} - MKFS_OPTIONS=${CONFIG_ITEM[8]} - MOUNT_OPTIONS=${CONFIG_ITEM[9]} - FAILOVERS=${CONFIG_ITEM[10]} - - HB_CHANNELS=${CONFIG_ITEM[11]} - SRV_IPADDRS=${CONFIG_ITEM[12]} - HB_OPTIONS=${CONFIG_ITEM[13]} - - # Check some required elements for formatting target - if ! check_element; then - echo >&2 $"`basename $0`: check_element() error:"\ - "Occurred on line ${line_num} in ${CSV_FILE}" + # Check the items required for HA configuration + if ! check_ha_item $idx; then + echo >&2 $"`basename $0`: check_ha_item() error:"\ + "Occurred on line ${line_num} in ${CSV_FILE}." return 1 fi # Check the number of MGS - if ! check_mgs; then + if ! check_mgs $idx; then echo >&2 $"`basename $0`: check_mgs() error:"\ - "Occurred on line ${line_num} in ${CSV_FILE}" + "Occurred on line ${line_num} in ${CSV_FILE}." return 1 fi - # Construct the command line of mkfs.lustre - if ! construct_mkfs_cmdline; then - echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\ - "Occurred on line ${line_num} in ${CSV_FILE}" + idx=${idx}+1 + done < ${CSV_FILE} + + return 0 +} + +# Start lnet network in the cluster node and check that +# this node can contact the MGS node +check_lnet() { + if ! ${VERIFY_CONNECT}; then + return 0 + fi + + # Check argument + if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: check_lnet() error: Missing"\ + "argument for function check_lnet()!" + return 1 + fi + + declare -i i=$1 + declare -i idx=0 + local COMMAND RET_STR + local nids_str mgs_prim_nids mgs_prim_nid # primary nids of MGS node + local ping_mgs + + # Execute remote command to start lnet network + verbose_output "Starting lnet network in ${HOST_NAME[i]}" + COMMAND=$"modprobe lnet; ${LCTL} network up 2>&1" + RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1` + if [ $? -ne 0 -o "${RET_STR}" = "${RET_STR#*LNET configured*}" ] + then + echo >&2 "`basename $0`: check_lnet() error: remote" \ + "${HOST_NAME[i]} error: ${RET_STR}" + return 1 + fi + + if [ "${HOST_NAME[i]}" = "${MGS_NODENAME}" ]; then + return 0 + fi + + # Execute remote command to check that + # this node can contact the MGS node + verbose_output "Checking lnet connectivity between" \ + "${HOST_NAME[i]} and the MGS node ${MGS_NODENAME}" + nids_str=`echo ${MGS_NIDS[i]} | awk -F: '{print $1}'` + mgs_prim_nids=`echo ${nids_str} | awk '{split($nids_str, a, ",")}\ + END {for (idx in a) print a[idx]}'` + + ping_mgs=false + for mgs_prim_nid in ${mgs_prim_nids} + do + COMMAND=$"${LCTL} ping ${mgs_prim_nid} 5 || echo failed 2>&1" + RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1` + if [ $? -eq 0 -a "${RET_STR}" = "${RET_STR#*failed*}" ] + then + # This node can contact the MGS node + verbose_output "${HOST_NAME[i]} can contact the MGS" \ + "node ${MGS_NODENAME} by using nid" \ + "${mgs_prim_nid}" + ping_mgs=true + break + fi + done + + if ! ${ping_mgs}; then + echo >&2 "`basename $0`: check_lnet() error: ${HOST_NAME[i]}" \ + "cannot contact the MGS node ${MGS_NODENAME} through"\ + "lnet networks!" + return 1 + fi + + return 0 +} + +# Start lnet network in the MGS node +start_mgs_lnet() { + if [ -z "${MGS_NODENAME}" ]; then + verbose_output "There is no MGS target in the ${CSV_FILE} file." + else + # Execute remote command to add lnet options lines to + # the MGS node's modprobe.conf/modules.conf + COMMAND=$"echo \"${MODULE_OPTS[${MGS_IDX}]}\"|${MODULE_CONFIG}" + verbose_output "Adding lnet module options to ${MGS_NODENAME}" + ${REMOTE} ${MGS_NODENAME} "${COMMAND}" >&2 + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: start_mgs_lnet() error:"\ + "Failed to execute remote command to" \ + "add module options to ${MGS_NODENAME}!" + return 1 + fi + + # Start lnet network in the MGS node + if ! check_lnet ${MGS_IDX}; then return 1 fi + fi - # Produce HA software's configuration files - if ! gen_ha_config; then - return 1 + return 0 +} + +# Execute remote command to add lnet options lines to remote nodes' +# modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets +mass_config() { + local COMMAND + declare -a REMOTE_PID + declare -a REMOTE_CMD + declare -i pid_num=0 + declare -i i=0 + + # Start lnet network in the MGS node + if ! start_mgs_lnet; then + return 1 + fi + + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + # Construct the command line of mkfs.lustre + if ! construct_mkfs_cmdline $i; then + return 1 fi - # Execute pdsh command to add lnet options lines to - # modprobe.conf/modules.conf - COMMAND=$"echo \"${MODULE_OPTS}\"|${MODULE_CONFIG}" - verbose_output "Adding module options to ${HOST_NAME}" - verbose_output ${COMMAND} - ${PDSH} -w ${HOST_NAME} ${COMMAND} >&2 & - PDSH_PID[${pid_num}]=$! - PDSH_CMD[${pid_num}]="${PDSH} -w ${HOST_NAME} ${COMMAND}" - pid_num=${pid_num}+1 + if [ "${HOST_NAME[i]}" != "${MGS_NODENAME}" ]; then + # Execute remote command to add lnet options lines to + # modprobe.conf/modules.conf + COMMAND=$"echo \"${MODULE_OPTS[i]}\"|${MODULE_CONFIG}" + verbose_output "Adding lnet module options to" \ + "${HOST_NAME[i]}" + ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: mass_config() error:"\ + "Failed to execute remote command to"\ + "add module options to ${HOST_NAME[i]}!" + return 1 + fi + + # Check lnet networks + if ! check_lnet $i; then + return 1 + fi + fi - # Execute pdsh command to format Lustre target - verbose_output "Formatting Lustre target on ${HOST_NAME}..." + # Execute remote command to format Lustre target + verbose_output "Formatting Lustre target ${DEVICE_NAME[i]}"\ + "on ${HOST_NAME[i]}..." verbose_output "Format command line is: ${MKFS_CMD}" - ${PDSH} -w ${HOST_NAME} ${MKFS_CMD} >&2 & - PDSH_PID[${pid_num}]=$! - PDSH_CMD[${pid_num}]="${PDSH} -w ${HOST_NAME} ${MKFS_CMD}" + REMOTE_CMD[${pid_num}]="${REMOTE} ${HOST_NAME[i]} ${MKFS_CMD}" + ${REMOTE} ${HOST_NAME[i]} "(${EXPORT_PATH} ${MKFS_CMD})" >&2 & + REMOTE_PID[${pid_num}]=$! pid_num=${pid_num}+1 + sleep 1 + done + + # Wait for the exit status of the background remote command + verbose_output "Waiting for the return of the remote command..." + fail_exit_status=false + for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do + wait ${REMOTE_PID[${pid_num}]} + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: mass_config() error: Failed"\ + "to execute \"${REMOTE_CMD[${pid_num}]}\"!" + fail_exit_status=true + fi + done - line_num=${line_num}+1 - done < ${CSV_FILE} + if ${fail_exit_status}; then + return 1 + fi + + verbose_output "All the Lustre targets are formatted successfully!" + return 0 +} + +# get_mntopts hostname device_name failovers +# Construct the mount options of Lustre target @device_name in host @hostname +get_mntopts() { + local host_name=$1 + local device_name=$2 + local failovers=$3 + local mnt_opts= + local ret_str + + [ -n "${failovers}" ] && mnt_opts=defaults,noauto || mnt_opts=defaults + + # Execute remote command to check whether the device + # is a block device or not + ret_str=`${REMOTE} ${host_name} \ + "[ -b ${device_name} ] && echo block || echo loop" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: get_mntopts() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + + if [ -z "${ret_str}" ]; then + echo "`basename $0`: get_mntopts() error: remote error:" \ + "No results from remote!" \ + "Check network connectivity between the local host"\ + "and ${host_name}!" + return 1 + fi - # Wait for the exit status of the background pdsh command - verbose_output "Waiting for the return of the pdsh command..." - for ((pid_num = 0; pid_num < ${#PDSH_PID[@]}; pid_num++)); do - wait ${PDSH_PID[${pid_num}]} + [ "${ret_str}" != "${ret_str#loop}" ] && mnt_opts=${mnt_opts},loop + + echo ${mnt_opts} + return 0 +} + +# Execute remote command to modify /etc/fstab to add the new Lustre targets +modify_fstab() { + declare -i i + local mntent mntopts device_name + local COMMAND + + if ! ${MODIFY_FSTAB}; then + return 0 + fi + + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + verbose_output "Modify /etc/fstab of host ${HOST_NAME[i]}"\ + "to add Lustre target ${DEVICE_NAME[i]}" + mntent=${DEVICE_NAME[i]}"\t\t"${MOUNT_POINT[i]}"\t\t"${FS_TYPE} + mntopts=$(get_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]}\ + ${FAILOVERS[i]}) if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: mass_config() error:"\ - "Fail to execute \"${PDSH_CMD[${pid_num}]}\"!" + echo >&2 "${mntopts}" + return 1 fi - done - rm -rf ${TMP_DIRS} + mntent=${mntent}"\t"${mntopts}"\t"0" "0 + + # Execute remote command to modify /etc/fstab + device_name=${DEVICE_NAME[i]//\//\\/} + COMMAND="(sed -i \"/${device_name}/d\" /etc/fstab; \ + echo -e \"${mntent}\" >> /etc/fstab)" + ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: modify_fstab() error:"\ + "Failed to execute remote command to"\ + "modify /etc/fstab of host ${HOST_NAME[i]}"\ + "to add Lustre target ${DEVICE_NAME[i]}!" + return 1 + fi + done + return 0 } @@ -685,21 +1087,38 @@ if ! check_file $1; then exit 1 fi -if [ "${VERIFY_CONNECT}" != "no" ]; then +if ${VERIFY_CONNECT}; then # Check the network connectivity and hostnames - verbose_output "Checking the network connectivity and hostnames..." + echo "`basename $0`: Checking the cluster network connectivity"\ + "and hostnames..." if ! ${VERIFY_CLUSTER_NET} ${VERBOSE_OPT} ${CSV_FILE}; then exit 1 fi - verbose_output "Check the network connectivity and hostnames OK!" + echo "`basename $0`: Check the cluster network connectivity"\ + "and hostnames OK!" + echo fi # Configure the Lustre cluster -verbose_output "******** Lustre cluster configuration START ********" -if ! mass_config ${CSV_FILE}; then +echo "`basename $0`: ******** Lustre cluster configuration START ********" +if ! get_items ${CSV_FILE}; then + exit 1 +fi + +if ! mass_config; then + exit 1 +fi + +if ! modify_fstab; then + exit 1 +fi + +# Produce HA software's configuration files +if ! config_ha; then rm -rf ${TMP_DIRS} exit 1 fi -verbose_output "******** Lustre cluster configuration END **********" + +echo "`basename $0`: ******** Lustre cluster configuration END **********" exit 0 -- 1.8.3.1