From: yujian Date: Thu, 9 Feb 2006 10:54:58 +0000 (+0000) Subject: b=9853 X-Git-Tag: v1_8_0_110~486^4~44 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=1a5ca9eb1bfa1bf29dc984f48b64817a856d2827;p=fs%2Flustre-release.git b=9853 add some options, providing the following functionality: - verifying network conectivity and hostnames in the cluster - producing HA software configuration files - using --reformat to format Lustre target - verbose mode --- diff --git a/lustre/utils/mass_config.sh b/lustre/utils/mass_config.sh index c5be5a5..ea2021e 100755 --- a/lustre/utils/mass_config.sh +++ b/lustre/utils/mass_config.sh @@ -2,47 +2,216 @@ # # mass_config.sh - spreadsheet parsing for massive parallel config # -######################################################################## +# This script is used to parse each line of a spreadsheet (csv file) and +# execute remote pdsh commands to format (mkfs.lustre) every Lustre target +# that will be part of the Lustre cluster. +# +# In addition, it can also verify the network connectivity and hostnames in +# the cluster and produce High-Availability software configurations according +# to the csv file. +# +################################################################################ # Usage usage() { - echo -e >&2 $"\nUsage: `basename $0` " cat >&2 < + + -t HAtype produce High-Availability software configurations + + The argument following -t is used to indicate the High- + Availability software type. The HA software types which + are currently supported are: hbv1 (Heartbeat v1), hbv2 + (Heartbeat v2) and clumanager (CluManager). + + -n don't verify network connectivity and hostnames in the + cluster + -f format the Lustre targets using --reformat option + -h show the format of csv file and some samples + -v verbose mode + Causes `basename $0` to print debugging messages + about its progress. + csv file a spreadsheet that contains configuration parameters + (separated by commas) for each target in a Lustre cl- + uster +EOF + exit 1 +} -Sample 1 for csv file: -lustre-mgs,options lnet networks=tcp,/r/tmp/mgmt,mgs,,,,--device_size 10240,-J size=4,,lustre-mgs@tcp0 -lustre-ost,options lnet networks=tcp,/r/tmp/ost1,ost,lustre1,lustre-mgs@tcp0,0001,--device_size 10240,-J size=4,"extents,mballoc",lustre-mgs@tcp0 -lustre-mdt,options lnet networks=tcp,/r/tmp/mdt1,mdt,lustre1,lustre-mgs@tcp0,0001,--device_size 10240,-J size=4,,lustre-mgs@tcp0 +# Samples +sample() { + cat >&2 <&2 $"`basename $0`: Invalid HA software type" \ + "- ${HATYPE_OPT}!" + usage + fi + ;; + n) + VERIFY_CONNECT=$"no" + ;; + f) + REFORMAT_OPTION=$"--reformat " + ;; + h) + sample + ;; + v) + VERBOSE_OPT=$" -v" + ;; + ?) + usage + esac +done + +# Toss out the parameters we've already processed +shift `expr $OPTIND - 1` + +# Here we expect the csv file if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: Lack csv file!" usage fi +# Output verbose informations +verbose_output() { + if [ -n "${VERBOSE_OPT}" ]; then + echo "`basename $0`: $*" + fi + return 0 +} + # Check the csv file check_file() { # Check argument if [ $# -eq 0 ]; then - echo >&2 $"check_file() error: Lack argument for function check_file()!" + echo >&2 $"`basename $0`: check_file() error: Lack argument"\ + "for function check_file()!" return 1 fi CSV_FILE=$1 if [ ! -s ${CSV_FILE} ]; then - echo >&2 $"check_file() error: ${CSV_FILE} does not exist or is empty!" + echo >&2 $"`basename $0`: check_file() error: ${CSV_FILE}"\ + "does not exist or is empty!" return 1 fi @@ -53,7 +222,8 @@ check_file() { parse_line() { # Check argument if [ $# -eq 0 ]; then - echo >&2 $"parse_line() error: Lack argument for function parse_line()!" + echo >&2 $"`basename $0`: parse_line() error: Lack argument"\ + "for function parse_line()!" return 1 fi @@ -133,40 +303,92 @@ check_element() { # Check hostname, networks, device name and device type if [ -z "${HOST_NAME}" ]||[ -z "${NETWORKS}" ]||[ -z "${DEVICE_NAME}" ]\ ||[ -z "${DEVICE_TYPE}" ]; then - echo >&2 $"check_element() error: Some required element has null value!" - echo >&2 $"check_element() info: Check hostname, networks, device name and device type!" + echo >&2 $"`basename $0`: check_element() error: Some required"\ + "element has null value! Check hostname, networks,"\ + "device name and device type!" return 1 fi # Check mgmtnid - if [ "${DEVICE_TYPE}" == "ost" ]&&[ -z "${MGMT_NID}" ]; then - echo >&2 $"check_element() error: OST's mgmtnid element has null value!" + if [ "${DEVICE_TYPE}" = "ost" ]&&[ -z "${MGMT_NID}" ]; then + echo >&2 $"`basename $0`: check_element() error: OST's mgmtnid"\ + "element has null value!" return 1 fi return 0 } +# Check the elements required for HA configuration +check_ha_element() { + if [ -z "${HATYPE_OPT}" ]; then + return 0 + fi + + # Check service IP element + if [ -z "${SRV_IPADDRS}" ]; then + echo >&2 $"`basename $0`: check_ha_element() error: Service IP"\ + "element has null value!" + return 1 + fi + + # Check heartbeat channel element + if [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" -a -z "${HB_CHANNELS}" ] + then + echo >&2 $"`basename $0`: check_ha_element() error: Heartbeat"\ + "channel element has null value!" + return 1 + fi + + return 0 +} + # Check the number of MGS. # There should be no more than one MGS specified in the entire csv file. check_mgs() { # Check the number of explicit MGS if [ "${DEVICE_TYPE#*mgs*}" != "${DEVICE_TYPE}" ]; then - ex_mgs_count=${ex_mgs_count}+1 - fi + if [ "${EXP_MGS}" = "${HOST_NAME}" ]; then + echo >&2 $"`basename $0`: check_mgs() error: More than"\ + "one explicit MGS in the csv file!" + return 1 + fi - if [ ${ex_mgs_count} -gt 1 ]; then - echo >&2 $"check_mgs() error: More than one explicit MGS in the csv file!" - return 1 + if [ -z "${EXP_MGS}" ]; then + EXP_MGS=${HOST_NAME} + fi + + if [ "${EXP_MGS}" != "${HOST_NAME}" ] \ + && [ "${FAILOVERS#*$EXP_MGS*}" = "${FAILOVERS}" ]; then + echo >&2 $"`basename $0`: check_mgs() error: More than"\ + "one explicit MGS in the csv file!" + return 1 + fi fi # Check the number of implicit MGS - if [ "${DEVICE_TYPE}" == "mdt" ]&&[ -z "${MGMT_NID}" ]; then - im_mgs_count=${im_mgs_count}+1 + if [ "${DEVICE_TYPE}" = "mdt" ]&&[ -z "${MGMT_NID}" ]; then + if [ "${IMP_MGS}" = "${HOST_NAME}" ]; then + echo >&2 $"`basename $0`: check_mgs() error: More than"\ + "one implicit MGS in the csv file!" + return 1 + fi + + if [ -z "${IMP_MGS}" ]; then + IMP_MGS=${HOST_NAME} + fi + + if [ "${IMP_MGS}" != "${HOST_NAME}" ] \ + && [ "${FAILOVERS#*$IMP_MGS*}" = "${FAILOVERS}" ]; then + echo >&2 $"`basename $0`: check_mgs() error: More than"\ + "one implicit MGS in the csv file!" + return 1 + fi fi - if [ `expr ${im_mgs_count} + ${ex_mgs_count}` -gt 1 ]; then - echo >&2 $"check_mgs() error: More than one MGS in the csv file!" + if [ -n "${EXP_MGS}" -a -n "${IMP_MGS}" ]; then + echo >&2 $"`basename $0`: check_mgs() error: More than one"\ + "MGS in the csv file!" return 1 fi @@ -175,7 +397,7 @@ check_mgs() { # Construct the command line of mkfs.lustre construct_mkfs_cmdline() { - MKFS_CMD=$"mkfs.lustre " + MKFS_CMD=$"mkfs.lustre "${REFORMAT_OPTION} case "${DEVICE_TYPE}" in "ost") @@ -194,7 +416,8 @@ construct_mkfs_cmdline() { MKFS_CMD=${MKFS_CMD}$"--mdt --mgmt " ;; *) - echo >&2 $"construct_mkfs_cmdline() error: Invalid device type - \"${DEVICE_TYPE}\"" + echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\ + "Invalid device type - \"${DEVICE_TYPE}\"" return 1 ;; esac @@ -204,6 +427,7 @@ construct_mkfs_cmdline() { fi if [ -n "${MGMT_NID}" ]; then + MGMT_NID=`echo "${MGMT_NID}" | sed 's/^"//' | sed 's/"$//'` MKFS_CMD=${MKFS_CMD}$"--mgmtnid="${MGMT_NID}$" " fi @@ -212,6 +436,7 @@ construct_mkfs_cmdline() { fi if [ -n "${FORMAT_OPTIONS}" ]; then + FORMAT_OPTIONS=`echo "${FORMAT_OPTIONS}" | sed 's/^"//' | sed 's/"$//'` MKFS_CMD=${MKFS_CMD}${FORMAT_OPTIONS}$" " fi @@ -226,6 +451,7 @@ construct_mkfs_cmdline() { fi if [ -n "${FAILOVERS}" ]; then + FAILOVERS=`echo "${FAILOVERS}" | sed 's/^"//' | sed 's/"$//'` MKFS_CMD=${MKFS_CMD}$"--failover="${FAILOVERS}$" " fi @@ -233,27 +459,120 @@ construct_mkfs_cmdline() { return 0 } +# Get all the node names in this failover group +get_nodenames() { + declare -i idx + local failover_nids failover_nid + + NODE_NAMES[0]=${HOST_NAME} + + failover_nids=`echo ${FAILOVERS}|awk '{split($FAILOVERS, a, ",")}\ + END {for (i in a) print a[i]}'` + + idx=1 + for failover_nid in ${failover_nids} + do + NODE_NAMES[idx]=${failover_nid%@*} + idx=$idx+1 + done + + return 0 +} + +# Produce HA software's configuration files +gen_ha_config() { + local cmd_line + declare -i idx + + if [ -z "${HATYPE_OPT}" ]; then + return 0 + fi + + # Prepare parameters + # Hostnames option + HOSTNAME_OPT=${HOST_NAME} + + if ! get_nodenames; then + return 1 + fi + + for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do + HOSTNAME_OPT=${HOSTNAME_OPT}$":"${NODE_NAMES[idx]} + done + + # Target device option + TARGET_TYPE=${DEVICE_TYPE} + if [ "${TARGET_TYPE}" = "mdt|mgs" -o "${TARGET_TYPE}" = "mgs|mdt" ] + then + TARGET_TYPE=$"mgs_mdt" + fi + TARGET_OPT=${DEVICE_NAME}:${TARGET_TYPE} + + # Service IP address option + SRVADDR_OPT=${SRV_IPADDRS} + + # Heartbeat channels option + HBCHANNEL_OPT=$"\""${HB_CHANNELS}$"\"" + + # Heartbeat options option + HBOPT_OPT=$"\""${HB_OPTIONS}$"\"" + + # Construct the generation script command line + case "${HATYPE_OPT}" in + "${HATYPE_HBV1}"|"${HATYPE_HBV2}") # Heartbeat + cmd_line=${GEN_HB_CONFIG}$" -r ${HATYPE_OPT} -n ${HOSTNAME_OPT}" + cmd_line=${cmd_line}$" -d ${TARGET_OPT} -c ${HBCHANNEL_OPT}" + cmd_line=${cmd_line}$" -s ${SRVADDR_OPT}"${VERBOSE_OPT} + + if [ -n "${HB_OPTIONS}" ]; then + cmd_line=${cmd_line}$" -o ${HBOPT_OPT}" + fi + ;; + "${HATYPE_CLUMGR}") # CluManager + cmd_line=${GEN_CLUMGR_CONFIG}$" -n ${HOSTNAME_OPT}" + cmd_line=${cmd_line}$" -d ${TARGET_OPT} -s ${SRVADDR_OPT}" + cmd_line=${cmd_line}${VERBOSE_OPT} + + if [ -n "${HBCHANNEL_OPT}" ]; then + cmd_line=${cmd_line}$" -c ${HBCHANNEL_OPT}" + fi + + if [ -n "${HB_OPTIONS}" ]; then + cmd_line=${cmd_line}$" -o ${HBOPT_OPT}" + fi + ;; + esac + + # Execute script to generate HA software's configuration files + verbose_output "${cmd_line}" + eval $(echo "${cmd_line}") + if [ $? -ne 0 ]; then + return 1 + fi + + return 0 +} + # Execute pdsh commands to add lnet options lines to remote nodes' # modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets +# +# If -t option exists, then also to produce the HA software's +# configuration files mass_config() { # Check argument if [ $# -eq 0 ]; then - echo >&2 $"mass_config() error: Lack argument for function mass_config()!" + echo >&2 $"`basename $0`: mass_config() error: Lack argument"\ + "for function mass_config()!" return 1 fi CSV_FILE=$1 local LINE COMMAND - declare -a CONFIG_ITEM declare -a PDSH_PID declare -a PDSH_CMD - declare -i ex_mgs_count=0 - declare -i im_mgs_count=0 declare -i line_num=1 declare -i pid_num=0 - ADD_LNET_OPTIONS=$"/usr/bin/add_lnet_options.sh" - while read -r LINE; do # Get rid of the empty line if [ -z "`echo ${LINE} | awk '/[[:alnum:]]/{print $0}'`" ]; then @@ -261,6 +580,12 @@ mass_config() { continue fi + # Get rid of the comment line + if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ]; then + line_num=${line_num}+1 + continue + fi + # Parse the config line into CONFIG_ITEM if ! parse_line $LINE; then return 1 @@ -278,32 +603,47 @@ mass_config() { MOUNT_OPTIONS=${CONFIG_ITEM[9]} FAILOVERS=${CONFIG_ITEM[10]} - # Check some required elements + HB_CHANNELS=${CONFIG_ITEM[11]} + SRV_IPADDRS=${CONFIG_ITEM[12]} + HB_OPTIONS=${CONFIG_ITEM[13]} + + # Check some required elements for formatting target if ! check_element; then - echo >&2 $"check_element() error: Occurred on line ${line_num}." + echo >&2 $"`basename $0`: check_element() error:"\ + "Occurred on line ${line_num} in ${CSV_FILE}" return 1 fi # Check the number of MGS if ! check_mgs; then - echo >&2 $"check_mgs() error: Occurred on line ${line_num}." + echo >&2 $"`basename $0`: check_mgs() error:"\ + "Occurred on line ${line_num} in ${CSV_FILE}" + return 1 + fi + + # Construct the command line of mkfs.lustre + if ! construct_mkfs_cmdline; then + echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\ + "Occurred on line ${line_num} in ${CSV_FILE}" + return 1 + fi + + # Produce HA software's configuration files + if ! gen_ha_config; then return 1 fi # Execute pdsh command to add lnet options lines to modprobe.conf/modules.conf + verbose_output "Adding lnet options to ${HOST_NAME}..." COMMAND=$"echo \"${NETWORKS}\"|${ADD_LNET_OPTIONS}" pdsh -w ${HOST_NAME} ${COMMAND} >&2 & PDSH_PID[${pid_num}]=$! PDSH_CMD[${pid_num}]="pdsh -w ${HOST_NAME} ${COMMAND}" pid_num=${pid_num}+1 - # Construct the command line of mkfs.lustre - if ! construct_mkfs_cmdline; then - echo >&2 $"construct_mkfs_cmdline() error: Occurred on line ${line_num}." - return 1 - fi - # Execute pdsh command to format Lustre target + verbose_output "Formatting Lustre target on ${HOST_NAME}..." + verbose_output "Format command line is: ${MKFS_CMD}" pdsh -w ${HOST_NAME} ${MKFS_CMD} >&2 & PDSH_PID[${pid_num}]=$! PDSH_CMD[${pid_num}]="pdsh -w ${HOST_NAME} ${MKFS_CMD}" @@ -313,23 +653,40 @@ mass_config() { done < ${CSV_FILE} # Wait for the exit status of the background pdsh command - echo "Waiting......" + verbose_output "Waiting for the return of the pdsh command..." for ((pid_num = 0; pid_num < ${#PDSH_PID[@]}; pid_num++)); do wait ${PDSH_PID[${pid_num}]} if [ $? -ne 0 ]; then - echo >&2 "mass_config() error: Fail to execute \"${PDSH_CMD[${pid_num}]}\"!" + echo >&2 "`basename $0`: mass_config() error:"\ + "Fail to execute \"${PDSH_CMD[${pid_num}]}\"!" fi done + rm -rf ${TMP_DIRS} return 0 } # Main flow +# Check the csv file if ! check_file $1; then exit 1 fi +if [ "${VERIFY_CONNECT}" != "no" ]; then +# Check the network connectivity and hostnames + verbose_output "Checking the network connectivity and hostnames..." + if ! ${VERIFY_CLUSTER_NET} ${VERBOSE_OPT} ${CSV_FILE}; then + exit 1 + fi + verbose_output "Check the network connectivity and hostnames OK!" +fi + +# Configure the Lustre cluster +verbose_output "******** Lustre cluster configuration START ********" if ! mass_config ${CSV_FILE}; then + rm -rf ${TMP_DIRS} exit 1 fi +verbose_output "******** Lustre cluster configuration END **********" + exit 0