# also update lustre/autoconf/lustre-core.m4 AC_CONFIG_FILES
ALWAYS_SUBDIRS := include lvfs obdclass ldlm ptlrpc osc lov obdecho \
- mgc doc utils tests conf scripts autoconf contrib
+ mgc doc utils tests scripts autoconf contrib
SERVER_SUBDIRS := ldiskfs obdfilter ost mds mgs
pkgexampledir='${pkgdatadir}/examples'
AC_SUBST(pkgexampledir)
-
-pymoddir='${pkglibdir}/python/Lustre'
-AC_SUBST(pymoddir)
])
#
lustre/autoMakefile
lustre/autoconf/Makefile
lustre/contrib/Makefile
-lustre/conf/Makefile
lustre/doc/Makefile
lustre/include/Makefile
lustre/include/lustre_ver.h
lustre/scripts/Makefile
lustre/scripts/version_tag.pl
lustre/tests/Makefile
-lustre/utils/Lustre/Makefile
-lustre/utils/cluster_scripts/Makefile
lustre/utils/Makefile
])
case $lb_target_os in
{
char *target = NULL;
char *root_driver = "native";
- char *lustre_driver = "llite";
+ char *lustre_driver = "lustre";
char *root_path = "/";
unsigned mntflgs = 0;
int err;
-MODULES := llite
-llite-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o xattr.o
+MODULES := lustre
+lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o xattr.o
ifeq ($(PATCHLEVEL),4)
-llite-objs += rw24.o super.o
+lustre-objs += rw24.o super.o
else
-llite-objs += rw26.o super25.o
+lustre-objs += rw26.o super25.o
endif
@INCLUDE_RULES@
# See the file COPYING in this distribution
if MODULES
-modulefs_DATA = llite$(KMODEXT)
+modulefs_DATA = lustre$(KMODEXT)
endif
-DIST_SOURCES := $(llite-objs:.o=.c) llite_internal.h rw24.c super.c rw26.c super25.c
+DIST_SOURCES := $(lustre-objs:.o=.c) llite_internal.h rw24.c super.c rw26.c super25.c
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
if (!client_fill_super) {
LCONSOLE_ERROR("Nothing registered for client mount!"
- " Is llite module loaded?\n");
+ " Is the 'lustre' module loaded?\n");
rc = -ENODEV;
} else {
rc = lustre_start_mgc(sb);
echo ${START_MARKER} > ${MODLINES_FILE}
echo "# Lustre module options added automatically by `basename $0`" >> ${MODLINES_FILE}
- echo "alias lustre llite" >> ${MODLINES_FILE}
while true; do
LNET_LINE=${TMP_LINE%%\\n*}
echo ${LNET_LINE} >> ${MODLINES_FILE}
load_module lvfs/fsfilt_ldiskfs
load_module ost/ost
load_module obdfilter/obdfilter
- load_module llite/llite
+ load_module llite/lustre
load_module mgc/mgc
load_module mgs/mgs
# 'mount' doesn't look in $PATH
# Administration utilities Makefile
-SUBDIRS = Lustre cluster_scripts
-
AM_CFLAGS=$(LLCFLAGS)
AM_CPPFLAGS=$(LLCPPFLAGS) -DLUSTRE_UTILS=1
AM_LDFLAGS := -L$(top_builddir)/lnet/utils
+++ /dev/null
-Makefile
-Makefile.in
+++ /dev/null
-# combo mdt/mgs
-uml1,options lnet networks=tcp,/r/tmp/mdt,/mnt/mdt,mdt|mgs,,,,--device-size=10240
-
-# ost0
-uml1,options lnet networks=tcp,/r/tmp/ost0,/mnt/ost0,ost,,uml1@tcp0,,--device-size=10240
+++ /dev/null
-sbin_SCRIPTS = cluster_config.sh gen_clumanager_config.sh gen_cluster_config.sh gen_hb_config.sh module_config.sh mon_cf.generator.sh verify_cluster_net.sh verify_serviceIP.sh
-
-EXTRA_DIST = $(sbin_SCRIPTS)
+++ /dev/null
-#!/bin/bash
-#
-# cluster_config.sh - format and set up multiple lustre servers from a csv file
-#
-# This script is used to parse each line of a spreadsheet (csv file) and
-# execute remote commands to format (mkfs.lustre) every Lustre target
-# that will be part of the Lustre cluster.
-#
-# In addition, it can also verify the network connectivity and hostnames in
-# the cluster and produce High-Availability software configurations for
-# Heartbeat or CluManager.
-#
-################################################################################
-
-# Usage
-usage() {
- cat >&2 <<EOF
-
-Usage: `basename $0` [-t HAtype] [-n] [-f] [-m] [-h] [-v] <csv file>
-
- This script is used to format and set up multiple lustre servers from a
- csv file.
-
- -h help and examples
- -t HAtype produce High-Availability software configurations
-
- The argument following -t is used to indicate the High-
- Availability software type. The HA software types which
- are currently supported are: hbv1 (Heartbeat v1), hbv2
- (Heartbeat v2) and cluman (CluManager).
- -n no net - don't verify network connectivity and
- hostnames in the cluster
- -f force-format the Lustre targets using --reformat option
- -m modify /etc/fstab to add the new Lustre targets
- -v verbose mode
- csv file a spreadsheet that contains configuration parameters
- (separated by commas) for each target in a Lustre cl-
- uster
-
-EOF
- exit 1
-}
-
-# Samples
-sample() {
- cat <<EOF
-
-This script is used to parse each line of a spreadsheet (csv file) and
-execute remote commands to format (mkfs.lustre) every Lustre target
-that will be part of the Lustre cluster.
-
-It can also optionally:
- * verify the network connectivity and hostnames in the cluster
- * modify /etc/modprobe.conf to add Lustre networking info
- * add the Lustre server info to /etc/fstab
- * produce configurations for Heartbeat or CluManager.
-
-Each line in the csv file represents one Lustre target. The format is:
-hostname,module_opts,device name,mount point,device type,fsname,mgs nids,index,
-format options,mkfs options,mount options,failover nids,heartbeat channels,
-service address,heartbeat options
-
-Items left blank will be set to defaults.
-
-Example 1 - Simple, without HA software configuration options:
--------------------------------------------------------------------------------
-# combo mdt/mgs
-lustre-mgs,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--device-size=10240
-
-# ost0
-lustre-ost,options lnet networks=tcp,/tmp/ost0,/mnt/ost0,ost,,lustre-mgs@tcp0,,--device-size=10240
-
-# ost1
-lustre-ost,options lnet networks=tcp,/tmp/ost1,/mnt/ost1,ost,,lustre-mgs@tcp0,,--device-size=10240
--------------------------------------------------------------------------------
-
-Example 2 - Separate MGS/MDT, two networks interfaces:
--------------------------------------------------------------------------------
-# mgs
-lustre-mgs1,options lnet 'networks="tcp,elan"',/tmp/mgs,/mnt/mgs,mgs,,,,--device-size=10240,-J size=4,,"lustre-mgs2,2@elan"
-
-# mdt
-lustre-mdt1,options lnet 'networks="tcp,elan"',/tmp/mdt,/mnt/mdt,mdt,lustre2,"lustre-mgs1,1@elan:lustre-mgs2,2@elan",,--device-size=10240,-J size=4,,lustre-mdt2
-
-# ost
-lustre-ost1,options lnet 'networks="tcp,elan"',/tmp/ost,/mnt/ost,ost,lustre2,"lustre-mgs1,1@elan:lustre-mgs2,2@elan",,--device-size=10240,-J size=4,"extents,mballoc",lustre-ost2
--------------------------------------------------------------------------------
-
-Example 3 - with Heartbeat version 1 configuration options:
--------------------------------------------------------------------------------
-# mgs
-lustre-mgs1,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs,,,,--device-size=10240,,,lustre-mgs2,serial /dev/ttyS0:bcast eth1,192.168.1.170,ping 192.168.1.169:respawn hacluster /usr/lib/heartbeat/ipfail
-
-# mdt
-lustre-mdt1,options lnet networks=tcp,/tmp/mdt,/mnt/mdt,mdt,,"lustre-mgs1:lustre-mgs2",,--device-size=10240,,,lustre-mdt2,bcast eth1,192.168.1.173
-
-# ost
-lustre-ost1,options lnet networks=tcp,/tmp/ost,/mnt/ost,ost,,"lustre-mgs1:lustre-mgs2",,--device-size=10240,,,lustre-ost2,bcast eth1,192.168.1.171
--------------------------------------------------------------------------------
-
-Example 4 - with Heartbeat version 2 configuration options:
--------------------------------------------------------------------------------
-# combo mdt/mgs
-lustre-mgs1,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--device-size=10240,,,"lustre-mgs2:lustre-mgs3",bcast eth1
-
-# ost1
-lustre-ost1,options lnet networks=tcp,/tmp/ost1,/mnt/ost1,ost,,"lustre-mgs1:lustre-mgs2:lustre-mgs3",,--device-size=10240,,,lustre-ost2,bcast eth2
-
-# ost2
-lustre-ost2,options lnet networks=tcp,/tmp/ost2,/mnt/ost2,ost,,"lustre-mgs1:lustre-mgs2:lustre-mgs3",,--device-size=10240,,,lustre-ost1,bcast eth2
--------------------------------------------------------------------------------
-
-Example 5 - with Red Hat Cluster Manager configuration options:
--------------------------------------------------------------------------------
-# mgs
-lustre-mgs1,options lnet networks=tcp,/dev/sda,/mnt/mgs,mgs,,,,,,,lustre-mgs2,broadcast,192.168.1.170,--clumembd --interval=1000000 --tko_count=20
-
-# mdt
-lustre-mdt1,options lnet networks=tcp,/dev/sdb,/mnt/mdt,mdt,,"lustre-mgs1:lustre-mgs2",,,,,lustre-mdt2,multicast 225.0.0.12,192.168.1.173
-
-# ost
-lustre-ost1,options lnet networks=tcp,/dev/sdb,/mnt/ost,ost,,"lustre-mgs1:lustre-mgs2",,,,,lustre-ost2,,192.168.1.171:192.168.1.172
--------------------------------------------------------------------------------
-
-Example 6 - with combo mgs/mdt failover pair and ost failover pair:
--------------------------------------------------------------------------------
-# combo mgs/mdt
-lustre-mgs1,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--quiet --device-size=10240,,,lustre-mgs2@tcp0
-
-# combo mgs/mdt backup (--noformat)
-lustre-mgs2,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--quiet --device-size=10240 --noformat,,,lustre-mgs1@tcp0
-
-# ost
-lustre-ost1,options lnet networks=tcp,/tmp/ost1,/mnt/ost1,ost,,"lustre-mgs1@tcp0:lustre-mgs2@tcp0",,--quiet --device-size=10240,,,lustre-ost2@tcp0
-
-# ost backup (--noformat) (note different device name)
-lustre-ost2,options lnet networks=tcp,/tmp/ost2,/mnt/ost2,ost,,"lustre-mgs1@tcp0:lustre-mgs2@tcp0",,--quiet --device-size=10240 --noformat,,,lustre-ost1@tcp0
--------------------------------------------------------------------------------
-
-EOF
- exit 0
-}
-
-#***************************** Global variables *****************************#
-# Remote command
-REMOTE=${REMOTE:-"ssh -x -q"}
-export REMOTE
-
-# Command path
-CMD_PATH=${CMD_PATH:-"/usr/sbin"}
-MKFS=${MKFS:-"$CMD_PATH/mkfs.lustre"}
-LCTL=${LCTL:-"$CMD_PATH/lctl"}
-
-EXPORT_PATH=${EXPORT_PATH:-"PATH=\$PATH:/sbin:/usr/sbin;"}
-
-# Some scripts to be called
-SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"/usr/local/sbin"}
-MODULE_CONFIG=${SCRIPTS_PATH}/module_config.sh
-VERIFY_CLUSTER_NET=${SCRIPTS_PATH}/verify_cluster_net.sh
-GEN_HB_CONFIG=${SCRIPTS_PATH}/gen_hb_config.sh
-GEN_CLUMGR_CONFIG=${SCRIPTS_PATH}/gen_clumanager_config.sh
-
-# Variables of HA software
-HATYPE_HBV1="hbv1" # Heartbeat version 1
-HATYPE_HBV2="hbv2" # Heartbeat version 2
-HATYPE_CLUMGR="cluman" # Cluster Manager
-
-HB_TMP_DIR="/tmp/heartbeat" # Temporary directory
-CLUMGR_TMP_DIR="/tmp/clumanager"
-TMP_DIRS="${HB_TMP_DIR} ${CLUMGR_TMP_DIR}"
-
-FS_TYPE=${FS_TYPE:-"lustre"} # filesystem type
-
-declare -a MGS_NODENAME # node names of the MGS servers
-declare -a MGS_IDX # indexes of MGSs in the global arrays
-declare -i MGS_NUM # number of MGS servers in the cluster
-declare -i INIT_IDX
-
-declare -a CONFIG_ITEM # items in each line of the csv file
-declare -a NODE_NAMES # node names in the failover group
-declare -a TARGET_OPTS # target services in one failover group
-
-# All the items in the csv file
-declare -a HOST_NAME MODULE_OPTS DEVICE_NAME MOUNT_POINT DEVICE_TYPE FS_NAME
-declare -a MGS_NIDS INDEX FORMAT_OPTIONS MKFS_OPTIONS MOUNT_OPTIONS FAILOVERS
-declare -a HB_CHANNELS SRV_IPADDRS HB_OPTIONS
-
-
-VERIFY_CONNECT=true
-MODIFY_FSTAB=false
-# Get and check the positional parameters
-while getopts "t:nfmhv" OPTION; do
- case $OPTION in
- t)
- HATYPE_OPT=$OPTARG
- if [ "${HATYPE_OPT}" != "${HATYPE_HBV1}" ] \
- && [ "${HATYPE_OPT}" != "${HATYPE_HBV2}" ] \
- && [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
- echo >&2 $"`basename $0`: Invalid HA software type" \
- "- ${HATYPE_OPT}!"
- usage
- fi
- ;;
- n)
- VERIFY_CONNECT=false
- ;;
- f)
- REFORMAT_OPTION=$"--reformat "
- ;;
- m)
- MODIFY_FSTAB=true
- ;;
- h)
- sample
- ;;
- v)
- VERBOSE_OPT=$" -v"
- ;;
- ?)
- usage
- esac
-done
-
-# Toss out the parameters we've already processed
-shift `expr $OPTIND - 1`
-
-# Here we expect the csv file
-if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: Missing csv file!"
- usage
-fi
-
-# Output verbose informations
-verbose_output() {
- if [ -n "${VERBOSE_OPT}" ]; then
- echo "`basename $0`: $*"
- fi
- return 0
-}
-
-# Check the csv file
-check_file() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: check_file() error: Missing argument"\
- "for function check_file()!"
- return 1
- fi
-
- CSV_FILE=$1
- if [ ! -s ${CSV_FILE} ]; then
- echo >&2 $"`basename $0`: check_file() error: ${CSV_FILE}"\
- "does not exist or is empty!"
- return 1
- fi
-
- return 0
-}
-
-# Parse a line in the csv file
-parse_line() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: parse_line() error: Missing argument"\
- "for function parse_line()!"
- return 1
- fi
-
- declare -i i=0
- declare -i length=0
- declare -i idx=0
- declare -i s_quote_flag=0
- declare -i d_quote_flag=0
- local TMP_LETTER LINE
-
- LINE=$*
-
- # Initialize the CONFIG_ITEM array
- unset CONFIG_ITEM
-
- # Get the length of the line
- length=${#LINE}
-
- i=0
- while [ ${idx} -lt ${length} ]; do
- # Get a letter from the line
- TMP_LETTER=${LINE:${idx}:1}
-
- case "${TMP_LETTER}" in
- ",")
- if [ ${s_quote_flag} -eq 1 -o ${d_quote_flag} -eq 1 ]
- then
- CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER}
- else
- i=$i+1
- fi
- idx=${idx}+1
- continue
- ;;
- "'")
- if [ ${s_quote_flag} -eq 0 ]; then
- s_quote_flag=1
- else
- s_quote_flag=0
- fi
- ;;
- "\"")
- if [ ${d_quote_flag} -eq 0 ]; then
- d_quote_flag=1
- else
- d_quote_flag=0
- fi
-
- if [ ${i} -eq 1 ]; then
- CONFIG_ITEM[i]=${CONFIG_ITEM[i]}$"\\"${TMP_LETTER}
- idx=${idx}+1
- continue
- fi
- ;;
- "\r")
- idx=${idx}+1
- continue
- ;;
- *)
- ;;
- esac
- CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER}
- idx=${idx}+1
- done
- return 0
-}
-
-# Check the items required for OSTs, MDTs and MGS
-#
-# When formatting an OST, the following items: hostname, module_opts,
-# device name, device type and mgs nids, cannot have null value.
-#
-# When formatting an MDT or MGS, the following items: hostname,
-# module_opts, device name and device type, cannot have null value.
-check_item() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: check_item() error: Missing argument"\
- "for function check_item()!"
- return 1
- fi
-
- declare -i i=$1
-
- # Check hostname, module_opts, device name and device type
- if [ -z "${HOST_NAME[i]}" ]||[ -z "${MODULE_OPTS[i]}" ]\
- ||[ -z "${DEVICE_NAME[i]}" ]||[ -z "${DEVICE_TYPE[i]}" ]; then
- echo >&2 $"`basename $0`: check_item() error: Some required"\
- "item has null value! Check hostname, module_opts,"\
- "device name and device type!"
- return 1
- fi
-
- # Check mgs nids
- if [ "${DEVICE_TYPE[i]}" = "ost" ]&&[ -z "${MGS_NIDS[i]}" ]; then
- echo >&2 $"`basename $0`: check_item() error: OST's mgs nids"\
- "item has null value!"
- return 1
- fi
-
- # Check mount point
- if ${MODIFY_FSTAB} && [ -z "${MOUNT_POINT[i]}" ]; then
- echo >&2 $"`basename $0`: check_item() error: mount"\
- "point item of target ${DEVICE_NAME[i]} has null value!"
- return 1
- fi
-
- return 0
-}
-
-# Check the items required for HA configuration
-check_ha_item() {
- if [ -z "${HATYPE_OPT}" ]; then
- return 0
- fi
-
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: check_ha_item() error: Missing"\
- "argument for function check_ha_item()!"
- return 1
- fi
-
- declare -i i=$1
-
- [ -z "${HB_CHANNELS[i]}" ] && [ -z "${SRV_IPADDRS[i]}" ] \
- && [ -z "${HB_OPTIONS[i]}" ] && return 0
-
- # Check mount point
- if [ -z "${MOUNT_POINT[i]}" ]; then
- echo >&2 $"`basename $0`: check_ha_item() error: mount"\
- "point item of target ${DEVICE_NAME[i]} has null value!"
- return 1
- fi
-
- # Check failover nodes
- if [ -z "${FAILOVERS[i]}" ]; then
- echo >&2 $"`basename $0`: check_ha_item() error:"\
- "failover item of host ${HOST_NAME[i]} has null value!"
- return 1
- fi
-
- # Check service IP item
- if [ "${HATYPE_OPT}" = "${HATYPE_HBV1}" -a -z "${SRV_IPADDRS[i]}" ]
- then
- echo >&2 $"`basename $0`: check_ha_item() error:"\
- "service IP item of host ${HOST_NAME[i]} has null value!"
- return 1
- fi
-
- # Check heartbeat channel item
- if [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" -a -z "${HB_CHANNELS[i]}" ]
- then
- echo >&2 $"`basename $0`: check_ha_item() error: Heartbeat"\
- "channel item of host ${HOST_NAME[i]} has null value!"
- return 1
- fi
-
- return 0
-}
-
-# Get the number of MGS nodes in the cluster
-get_mgs_num() {
- INIT_IDX=0
- MGS_NUM=${#MGS_NODENAME[@]}
- [ -z "${MGS_NODENAME[0]}" ] && let "INIT_IDX += 1" \
- && let "MGS_NUM += 1"
-}
-
-# is_mgs_node hostname
-# Verify whether @hostname is a MGS node
-is_mgs_node() {
- local host_name=$1
- declare -i i
-
- get_mgs_num
- for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
- [ "${MGS_NODENAME[i]}" = "${host_name}" ] && return 0
- done
-
- return 1
-}
-
-# Check whether the MGS nodes are in the same failover group
-check_mgs_group() {
- declare -i i
- declare -i j
- declare -i idx
- local mgs_node
-
- get_mgs_num
- for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
- mgs_node=${MGS_NODENAME[i]}
- for ((j = ${INIT_IDX}; j < ${MGS_NUM}; j++)); do
- [ "${MGS_NODENAME[j]}" = "${mgs_node}" ] && continue 1
-
- idx=${MGS_IDX[j]}
- if [ "${FAILOVERS[idx]#*$mgs_node*}" = "${FAILOVERS[idx]}" ]
- then
- echo >&2 $"`basename $0`: check_mgs_group() error:"\
- "MGS node ${mgs_node} is not in the ${HOST_NAME[idx]}"\
- "failover group!"
- return 1
- fi
- done
- done
-
- return 0
-}
-
-# Get and check MGS servers.
-# There should be no more than one MGS specified in the entire csv file.
-check_mgs() {
- declare -i i
- declare -i j
- declare -i exp_idx # Index of explicit MGS servers
- declare -i imp_idx # Index of implicit MGS servers
- local is_exp_mgs is_imp_mgs
- local mgs_node
-
- # Initialize the MGS_NODENAME and MGS_IDX arrays
- unset MGS_NODENAME
- unset MGS_IDX
-
- exp_idx=1
- imp_idx=1
- for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
- is_exp_mgs=false
- is_imp_mgs=false
-
- # Check whether this node is an explicit MGS node
- # or an implicit one
- if [ "${DEVICE_TYPE[i]#*mgs*}" != "${DEVICE_TYPE[i]}" ]; then
- verbose_output "Explicit MGS target" \
- "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
- is_exp_mgs=true
- fi
-
- if [ "${DEVICE_TYPE[i]}" = "mdt" -a -z "${MGS_NIDS[i]}" ]; then
- verbose_output "Implicit MGS target" \
- "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
- is_imp_mgs=true
- fi
-
- # Get and check MGS servers
- if ${is_exp_mgs} || ${is_imp_mgs}; then
- # Check whether more than one MGS target in one MGS node
- if is_mgs_node ${HOST_NAME[i]}; then
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "More than one MGS target in the same node -"\
- "\"${HOST_NAME[i]}\"!"
- return 1
- fi
-
- # Get and check primary MGS server and backup MGS server
- if [ "${FORMAT_OPTIONS[i]}" = "${FORMAT_OPTIONS[i]#*noformat*}" ]
- then
- # Primary MGS server
- if [ -z "${MGS_NODENAME[0]}" ]; then
- if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
- || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "There exist both explicit and implicit MGS"\
- "targets in the csv file!"
- return 1
- fi
- MGS_NODENAME[0]=${HOST_NAME[i]}
- MGS_IDX[0]=$i
- else
- mgs_node=${MGS_NODENAME[0]}
- if [ "${FAILOVERS[i]#*$mgs_node*}" = "${FAILOVERS[i]}" ]
- then
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "More than one primary MGS nodes in the csv" \
- "file - ${MGS_NODENAME[0]} and ${HOST_NAME[i]}!"
- else
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "MGS nodes ${MGS_NODENAME[0]} and ${HOST_NAME[i]}"\
- "are failover pair, one of them should use"\
- "\"--noformat\" in the format options item!"
- fi
- return 1
- fi
- else # Backup MGS server
- if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
- || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "There exist both explicit and implicit MGS"\
- "targets in the csv file!"
- return 1
- fi
-
- if ${is_exp_mgs}; then # Explicit MGS
- MGS_NODENAME[exp_idx]=${HOST_NAME[i]}
- MGS_IDX[exp_idx]=$i
- exp_idx=$(( exp_idx + 1 ))
- else # Implicit MGS
- MGS_NODENAME[imp_idx]=${HOST_NAME[i]}
- MGS_IDX[imp_idx]=$i
- imp_idx=$(( imp_idx + 1 ))
- fi
- fi
- fi #End of "if ${is_exp_mgs} || ${is_imp_mgs}"
- done
-
- # Check whether the MGS nodes are in the same failover group
- if ! check_mgs_group; then
- return 1
- fi
-
- return 0
-}
-
-# Construct the command line of mkfs.lustre
-construct_mkfs_cmdline() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
- "Missing argument for function"\
- "construct_mkfs_cmdline()!"
- return 1
- fi
-
- declare -i i=$1
-
- MKFS_CMD=${MKFS}$" "
- MKFS_CMD=${MKFS_CMD}${REFORMAT_OPTION}
-
- case "${DEVICE_TYPE[i]}" in
- "ost")
- MKFS_CMD=${MKFS_CMD}$"--ost "
- ;;
- "mdt")
- MKFS_CMD=${MKFS_CMD}$"--mdt "
- ;;
- "mgs")
- MKFS_CMD=${MKFS_CMD}$"--mgs "
- ;;
- "mdt|mgs" | "mgs|mdt")
- MKFS_CMD=${MKFS_CMD}$"--mdt --mgs "
- ;;
- *)
- echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
- "Invalid device type - \"${DEVICE_TYPE[i]}\"!"
- return 1
- ;;
- esac
-
- if [ -n "${FS_NAME[i]}" ]; then
- MKFS_CMD=${MKFS_CMD}$"--fsname="${FS_NAME[i]}$" "
- fi
-
- if [ -n "${MGS_NIDS[i]}" ]; then
- MGS_NIDS[i]=`echo "${MGS_NIDS[i]}" | sed 's/^"//' | sed 's/"$//'`
- MKFS_CMD=${MKFS_CMD}$"--mgsnode="${MGS_NIDS[i]}$" "
- fi
-
- if [ -n "${INDEX[i]}" ]; then
- MKFS_CMD=${MKFS_CMD}$"--index="${INDEX[i]}$" "
- fi
-
- if [ -n "${FORMAT_OPTIONS[i]}" ]; then
- FORMAT_OPTIONS[i]=`echo "${FORMAT_OPTIONS[i]}" | sed 's/^"//' | sed 's/"$//'`
- MKFS_CMD=${MKFS_CMD}${FORMAT_OPTIONS[i]}$" "
- fi
-
- if [ -n "${MKFS_OPTIONS[i]}" ]; then
- MKFS_OPTIONS[i]=`echo "${MKFS_OPTIONS[i]}" | sed 's/^"//' | sed 's/"$//'`
- MKFS_CMD=${MKFS_CMD}$"--mkfsoptions="$"\""${MKFS_OPTIONS[i]}$"\""$" "
- fi
-
- if [ -n "${MOUNT_OPTIONS[i]}" ]; then
- MOUNT_OPTIONS[i]=`echo "${MOUNT_OPTIONS[i]}" | sed 's/^"//' | sed 's/"$//'`
- MKFS_CMD=${MKFS_CMD}$"--mountfsoptions="$"\""${MOUNT_OPTIONS[i]}$"\""$" "
- fi
-
- if [ -n "${FAILOVERS[i]}" ]; then
- FAILOVERS[i]=`echo "${FAILOVERS[i]}" | sed 's/^"//' | sed 's/"$//'`
- MKFS_CMD=${MKFS_CMD}$"--failnode="${FAILOVERS[i]}$" "
- fi
-
- MKFS_CMD=${MKFS_CMD}${DEVICE_NAME[i]}
- return 0
-}
-
-# Get all the node names in this failover group
-get_nodenames() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: get_nodenames() error: Missing"\
- "argument for function get_nodenames()!"
- return 1
- fi
-
- declare -i i=$1
- declare -i idx
- local nids_str failover_nids failover_nid first_nid
-
- # Initialize the NODE_NAMES array
- unset NODE_NAMES
-
- NODE_NAMES[0]=${HOST_NAME[i]}
-
- idx=0
- nids_str=${FAILOVERS[i]}
- failover_nids=`echo ${nids_str}|awk '{split($nids_str, a, ":")}\
- END {for (idx in a) print a[idx]}'`
-
- # FIXME: Suppose the first nid of one failover node contains node name
- idx=1
- for failover_nid in ${failover_nids}
- do
- first_nid=`echo ${failover_nid} | awk -F, '{print $1}'`
- NODE_NAMES[idx]=${first_nid%@*}
- idx=$idx+1
- done
-
- return 0
-}
-
-# Verify whether the format line has HA items
-is_ha_line() {
- declare -i i=$1
-
- if [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
- [ -n "${HB_CHANNELS[i]}" ] && return 0
- else
- [ -n "${SRV_IPADDRS[i]}" ] && return 0
- fi
-
- return 1
-}
-
-# Produce HA software's configuration files
-gen_ha_config() {
- declare -i i=$1
- declare -i idx
- local cmd_line
-
- # Prepare parameters
- # Hostnames option
- HOSTNAME_OPT=${HOST_NAME[i]}
-
- if ! get_nodenames $i; then
- return 1
- fi
-
- for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do
- HOSTNAME_OPT=${HOSTNAME_OPT}$":"${NODE_NAMES[idx]}
- done
-
- # Service IP address option
- SRVADDR_OPT=${SRV_IPADDRS[i]}
-
- # Heartbeat channels option
- HBCHANNEL_OPT=$"\""${HB_CHANNELS[i]}$"\""
-
- # Heartbeat options option
- HBOPT_OPT=$"\""${HB_OPTIONS[i]}$"\""
-
- # Target devices option
- DEVICE_OPT=" -d "${TARGET_OPTS[0]}
- for ((idx = 1; idx < ${#TARGET_OPTS[@]}; idx++)); do
- DEVICE_OPT=${DEVICE_OPT}" -d "${TARGET_OPTS[idx]}
- done
-
- # Construct the generation script command line
- case "${HATYPE_OPT}" in
- "${HATYPE_HBV1}"|"${HATYPE_HBV2}") # Heartbeat
- cmd_line=${GEN_HB_CONFIG}$" -r ${HATYPE_OPT} -n ${HOSTNAME_OPT}"
- cmd_line=${cmd_line}$" -c ${HBCHANNEL_OPT}"${DEVICE_OPT}${VERBOSE_OPT}
-
- if [ -n "${SRV_IPADDRS[i]}" ]; then
- cmd_line=${cmd_line}$" -s ${SRVADDR_OPT}"
- fi
-
- if [ -n "${HB_OPTIONS[i]}" ]; then
- cmd_line=${cmd_line}$" -o ${HBOPT_OPT}"
- fi
- ;;
- "${HATYPE_CLUMGR}") # CluManager
- cmd_line=${GEN_CLUMGR_CONFIG}$" -n ${HOSTNAME_OPT}"
- cmd_line=${cmd_line}$" -s ${SRVADDR_OPT}"${DEVICE_OPT}${VERBOSE_OPT}
-
- if [ -n "${HBCHANNEL_OPT}" ]; then
- cmd_line=${cmd_line}$" -c ${HBCHANNEL_OPT}"
- fi
-
- if [ -n "${HB_OPTIONS[i]}" ]; then
- cmd_line=${cmd_line}$" -o ${HBOPT_OPT}"
- fi
- ;;
- esac
-
- # Execute script to generate HA software's configuration files
- verbose_output "Generating HA software's configurations in"\
- "${HOST_NAME[i]} failover group..."
- verbose_output "${cmd_line}"
- eval $(echo "${cmd_line}")
- if [ $? -ne 0 ]; then
- return 1
- fi
- verbose_output "Generate HA software's configurations in"\
- "${HOST_NAME[i]} failover group OK"
-
- return 0
-}
-
-# Configure HA software
-config_ha() {
- if [ -z "${HATYPE_OPT}" ]; then
- return 0
- fi
-
- declare -i i j k
- declare -i prim_idx # Index for PRIM_HOSTNAMES array
- declare -i target_idx # Index for TARGET_OPTS and HOST_INDEX arrays
-
- declare -a PRIM_HOSTNAMES # Primary hostnames in all the failover
- # groups in the lustre cluster
- declare -a HOST_INDEX # Indices for the same node in all the
- # format lines in the csv file
- local prim_host
-
- # Initialize the PRIM_HOSTNAMES array
- prim_idx=0
- unset PRIM_HOSTNAMES
-
- # Get failover groups and generate HA configuration files
- for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
- prim_host=${HOST_NAME[i]}
-
- for ((j = 0; j < ${#PRIM_HOSTNAMES[@]}; j++)); do
- [ "${prim_host}" = "${PRIM_HOSTNAMES[j]}" ] && continue 2
- done
-
- target_idx=0
- unset HOST_INDEX
- unset TARGET_OPTS
- for ((k = 0; k < ${#HOST_NAME[@]}; k++)); do
- if [ "${prim_host}" = "${HOST_NAME[k]}" ] && is_ha_line "${k}"
- then
- HOST_INDEX[target_idx]=$k
- TARGET_OPTS[target_idx]=${DEVICE_NAME[k]}:${MOUNT_POINT[k]}
- target_idx=$(( target_idx + 1 ))
- fi
- done
-
- if [ ${#TARGET_OPTS[@]} -ne 0 ]; then
- PRIM_HOSTNAMES[prim_idx]=${prim_host}
- prim_idx=$(( prim_idx + 1 ))
-
- if ! gen_ha_config ${HOST_INDEX[0]}; then
- return 1
- fi
- fi
- done
-
- if [ ${#PRIM_HOSTNAMES[@]} -eq 0 ]; then
- verbose_output "There are no HA configuration items in the"\
- "csv file. No HA configuration files are generated!"
- fi
-
- rm -rf ${TMP_DIRS}
- return 0
-}
-
-
-# Get all the items in the csv file and do some checks.
-get_items() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: get_items() error: Missing argument"\
- "for function get_items()!"
- return 1
- fi
-
- CSV_FILE=$1
- local LINE
- declare -i line_num=0
- declare -i idx=0
-
- while read -r LINE; do
- line_num=${line_num}+1
- # verbose_output "Parsing line ${line_num}: $LINE"
-
- # Get rid of the empty line
- if [ -z "`echo ${LINE}|awk '/[[:alnum:]]/ {print $0}'`" ]; then
- continue
- fi
-
- # Get rid of the comment line
- if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ]
- then
- continue
- fi
-
- # Parse the config line into CONFIG_ITEM
- if ! parse_line $LINE; then
- echo >&2 $"`basename $0`: parse_line() error: Occurred"\
- "on line ${line_num} in ${CSV_FILE}: $LINE"
- return 1
- fi
-
- HOST_NAME[idx]=${CONFIG_ITEM[0]}
- MODULE_OPTS[idx]=${CONFIG_ITEM[1]}
- DEVICE_NAME[idx]=${CONFIG_ITEM[2]}
- MOUNT_POINT[idx]=${CONFIG_ITEM[3]}
- DEVICE_TYPE[idx]=${CONFIG_ITEM[4]}
- FS_NAME[idx]=${CONFIG_ITEM[5]}
- MGS_NIDS[idx]=${CONFIG_ITEM[6]}
- INDEX[idx]=${CONFIG_ITEM[7]}
- FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]}
- MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]}
- MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]}
- FAILOVERS[idx]=${CONFIG_ITEM[11]}
-
- HB_CHANNELS[idx]=${CONFIG_ITEM[12]}
- SRV_IPADDRS[idx]=${CONFIG_ITEM[13]}
- HB_OPTIONS[idx]=${CONFIG_ITEM[14]}
-
- # Check some required items for formatting target
- if ! check_item $idx; then
- echo >&2 $"`basename $0`: check_item() error:"\
- "Occurred on line ${line_num} in ${CSV_FILE}."
- return 1
- fi
-
- # Check the items required for HA configuration
- if ! check_ha_item $idx; then
- echo >&2 $"`basename $0`: check_ha_item() error:"\
- "Occurred on line ${line_num} in ${CSV_FILE}."
- return 1
- fi
-
- idx=${idx}+1
- done < ${CSV_FILE}
-
- return 0
-}
-
-# check_lnet_connect hostname_index mgs_hostname
-# Check whether the target node can contact the MGS node @mgs_hostname
-# If @mgs_hostname is null, then it means the primary MGS node
-check_lnet_connect() {
- declare -i i=$1
- declare -i idx=0
- local mgs_node=$2
-
- local COMMAND RET_STR
- local mgs_prim_nids all_nids all_nids_str
- local nids
- local nids_str=
- local mgs_nids mgs_nid
- local ping_mgs
-
- # Execute remote command to check that
- # this node can contact the MGS node
- verbose_output "Checking lnet connectivity between" \
- "${HOST_NAME[i]} and the MGS node ${mgs_node}"
- all_nids=${MGS_NIDS[i]}
- mgs_prim_nids=`echo ${all_nids} | awk -F: '{print $1}'`
- all_nids_str=`echo ${all_nids} | awk '{split($all_nids, a, ":")}\
- END {for (idx in a) print a[idx]}'`
-
- if [ -z "${mgs_node}" ]; then
- nids_str=${mgs_prim_nids} # nids of primary MGS node
- else
- for nids in ${all_nids_str}; do
- # FIXME: Suppose the MGS nids contain the node name
- [ "${nids}" != "${nids#*$mgs_node*}" ] && nids_str=${nids}
- done
- fi
-
- if [ -z "${nids_str}" ]; then
- echo >&2 $"`basename $0`: check_lnet_connect() error:"\
- "Check the mgs nids item of host ${HOST_NAME[i]}!"\
- "Missing nids of the MGS node ${mgs_node}!"
- return 1
- fi
-
- idx=0
- mgs_nids=`echo ${nids_str} | awk '{split($nids_str, a, ",")}\
- END {for (idx in a) print a[idx]}'`
-
- ping_mgs=false
- for mgs_nid in ${mgs_nids}
- do
- COMMAND=$"${LCTL} ping ${mgs_nid} 5 || echo failed 2>&1"
- RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1`
- if [ $? -eq 0 -a "${RET_STR}" = "${RET_STR#*failed*}" ]
- then
- # This node can contact the MGS node
- verbose_output "${HOST_NAME[i]} can contact the MGS" \
- "node ${mgs_node} by using nid" \
- "\"${mgs_nid}\"!"
- ping_mgs=true
- break
- fi
- done
-
- if ! ${ping_mgs}; then
- echo >&2 "`basename $0`: check_lnet_connect() error:" \
- "${HOST_NAME[i]} cannot contact the MGS node"\
- "${mgs_node} through lnet networks!"
- return 1
- fi
-
- return 0
-}
-
-# Start lnet network in the cluster node and check that
-# this node can contact the MGS node
-check_lnet() {
- if ! ${VERIFY_CONNECT}; then
- return 0
- fi
-
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: check_lnet() error: Missing"\
- "argument for function check_lnet()!"
- return 1
- fi
-
- declare -i i=$1
- declare -i j
- local COMMAND RET_STR
-
- # Execute remote command to start lnet network
- verbose_output "Starting lnet network in ${HOST_NAME[i]}"
- COMMAND=$"modprobe lnet; ${LCTL} network up 2>&1"
- RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1`
- if [ $? -ne 0 -o "${RET_STR}" = "${RET_STR#*LNET configured*}" ]
- then
- echo >&2 "`basename $0`: check_lnet() error: remote" \
- "${HOST_NAME[i]} error: ${RET_STR}"
- return 1
- fi
-
- if is_mgs_node ${HOST_NAME[i]}; then
- return 0
- fi
-
- # Execute remote command to check that
- # this node can contact the MGS node
- for ((j = 0; j < ${MGS_NUM}; j++)); do
- if ! check_lnet_connect $i ${MGS_NODENAME[j]}; then
- return 1
- fi
- done
-
- return 0
-}
-
-# Start lnet network in the MGS node
-start_mgs_lnet() {
- declare -i i
- declare -i idx
- local COMMAND
-
- if [ -z "${MGS_NODENAME[0]}" -a -z "${MGS_NODENAME[1]}" ]; then
- verbose_output "There is no MGS target in the ${CSV_FILE} file."
- return 0
- fi
-
- for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
- # Execute remote command to add lnet options lines to
- # the MGS node's modprobe.conf/modules.conf
- idx=${MGS_IDX[i]}
- COMMAND=$"echo \"${MODULE_OPTS[${idx}]}\"|${MODULE_CONFIG}"
- verbose_output "Adding lnet module options to ${MGS_NODENAME[i]}"
- ${REMOTE} ${MGS_NODENAME[i]} "${COMMAND}" >&2
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: start_mgs_lnet() error:"\
- "Failed to execute remote command to" \
- "add module options to ${MGS_NODENAME[i]}!"\
- "Check ${MODULE_CONFIG}!"
- return 1
- fi
-
- # Start lnet network in the MGS node
- if ! check_lnet ${idx}; then
- return 1
- fi
- done
-
- return 0
-}
-
-# Execute remote command to add lnet options lines to remote nodes'
-# modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets
-mass_config() {
- local COMMAND
- declare -a REMOTE_PID
- declare -a REMOTE_CMD
- declare -i pid_num=0
- declare -i i=0
-
- # Start lnet network in the MGS node
- if ! start_mgs_lnet; then
- return 1
- fi
-
- for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
- # Construct the command line of mkfs.lustre
- if ! construct_mkfs_cmdline $i; then
- return 1
- fi
-
- if ! is_mgs_node ${HOST_NAME[i]}; then
- # Execute remote command to add lnet options lines to
- # modprobe.conf/modules.conf
- COMMAND=$"echo \"${MODULE_OPTS[i]}\"|${MODULE_CONFIG}"
- verbose_output "Adding lnet module options to" \
- "${HOST_NAME[i]}"
- ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: mass_config() error:"\
- "Failed to execute remote command to"\
- "add module options to ${HOST_NAME[i]}!"
- return 1
- fi
-
- # Check lnet networks
- if ! check_lnet $i; then
- return 1
- fi
- fi
-
- # Execute remote command to format Lustre target
- verbose_output "Formatting Lustre target ${DEVICE_NAME[i]}"\
- "on ${HOST_NAME[i]}..."
- verbose_output "Format command line is: ${MKFS_CMD}"
- REMOTE_CMD[${pid_num}]="${REMOTE} ${HOST_NAME[i]} ${MKFS_CMD}"
- ${REMOTE} ${HOST_NAME[i]} "(${EXPORT_PATH} ${MKFS_CMD})" >&2 &
- REMOTE_PID[${pid_num}]=$!
- pid_num=${pid_num}+1
- sleep 1
- done
-
- # Wait for the exit status of the background remote command
- verbose_output "Waiting for the return of the remote command..."
- fail_exit_status=false
- for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
- wait ${REMOTE_PID[${pid_num}]}
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: mass_config() error: Failed"\
- "to execute \"${REMOTE_CMD[${pid_num}]}\"!"
- fail_exit_status=true
- fi
- done
-
- if ${fail_exit_status}; then
- return 1
- fi
-
- verbose_output "All the Lustre targets are formatted successfully!"
- return 0
-}
-
-# get_mntopts hostname device_name failovers
-# Construct the mount options of Lustre target @device_name in host @hostname
-get_mntopts() {
- local host_name=$1
- local device_name=$2
- local failovers=$3
- local mnt_opts=
- local ret_str
-
- [ -n "${failovers}" ] && mnt_opts=defaults,noauto || mnt_opts=defaults
-
- # Execute remote command to check whether the device
- # is a block device or not
- ret_str=`${REMOTE} ${host_name} \
- "[ -b ${device_name} ] && echo block || echo loop" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_mntopts() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if [ -z "${ret_str}" ]; then
- echo "`basename $0`: get_mntopts() error: remote error:" \
- "No results from remote!" \
- "Check network connectivity between the local host"\
- "and ${host_name}!"
- return 1
- fi
-
- [ "${ret_str}" != "${ret_str#loop}" ] && mnt_opts=${mnt_opts},loop
-
- echo ${mnt_opts}
- return 0
-}
-
-# Execute remote command to modify /etc/fstab to add the new Lustre targets
-modify_fstab() {
- declare -i i
- local mntent mntopts device_name
- local COMMAND
-
- if ! ${MODIFY_FSTAB}; then
- return 0
- fi
-
- for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
- verbose_output "Modify /etc/fstab of host ${HOST_NAME[i]}"\
- "to add Lustre target ${DEVICE_NAME[i]}"
- mntent=${DEVICE_NAME[i]}"\t\t"${MOUNT_POINT[i]}"\t\t"${FS_TYPE}
- mntopts=$(get_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]}\
- ${FAILOVERS[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${mntopts}"
- return 1
- fi
-
- mntent=${mntent}"\t"${mntopts}"\t"0" "0
-
- # Execute remote command to modify /etc/fstab
- device_name=${DEVICE_NAME[i]//\//\\/}
- COMMAND="(sed -i \"/${device_name}/d\" /etc/fstab; \
- echo -e \"${mntent}\" >> /etc/fstab)"
- ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: modify_fstab() error:"\
- "Failed to execute remote command to"\
- "modify /etc/fstab of host ${HOST_NAME[i]}"\
- "to add Lustre target ${DEVICE_NAME[i]}!"
- return 1
- fi
- done
-
- return 0
-}
-
-# Main flow
-# Check the csv file
-if ! check_file $1; then
- exit 1
-fi
-
-if ${VERIFY_CONNECT}; then
-# Check the network connectivity and hostnames
- echo "`basename $0`: Checking the cluster network connectivity"\
- "and hostnames..."
- if ! ${VERIFY_CLUSTER_NET} ${VERBOSE_OPT} ${CSV_FILE}; then
- exit 1
- fi
- echo "`basename $0`: Check the cluster network connectivity"\
- "and hostnames OK!"
- echo
-fi
-
-# Configure the Lustre cluster
-echo "`basename $0`: ******** Lustre cluster configuration START ********"
-if ! get_items ${CSV_FILE}; then
- exit 1
-fi
-
-if ! check_mgs; then
- exit 1
-fi
-
-if ! mass_config; then
- exit 1
-fi
-
-if ! modify_fstab; then
- exit 1
-fi
-
-# Produce HA software's configuration files
-if ! config_ha; then
- rm -rf ${TMP_DIRS}
- exit 1
-fi
-
-echo "`basename $0`: ******** Lustre cluster configuration END **********"
-
-exit 0
+++ /dev/null
-#!/bin/bash
-#
-# gen_clumanager_config.sh - script for generating the Red Hat Cluster Manager
-# HA software's configuration files
-#
-################################################################################
-
-# Usage
-usage() {
- cat >&2 <<EOF
-
-Usage: `basename $0` <-n hostnames> <-s service addresses>
- [-c heartbeat channel] [-o heartbeat options] [-v]
- <-d target device> [-d target device...]
-
- -n hostnames the nodenames of the primary node and its fail-
- overs
- Multiple nodenames are separated by colon (:)
- delimeter. The first one is the nodename of the
- primary node, the others are failover nodenames.
- -s service addresses the IP addresses to failover
- Multiple addresses are separated by colon (:)
- delimeter.
- -c heartbeat channel the method to send/rcv heartbeats on
- The default method is multicast, and multicast_
- ipaddress is "225.0.0.11".
- -o heartbeat options a "catchall" for other heartbeat configuration
- options
- Multiple options are separated by colon (:)
- delimeter.
- -v verbose mode
- -d target device the target device name and mount point
- The device name and mount point are separated by
- colon (:) delimeter.
-
-EOF
- exit 1
-}
-
-#****************************** Global variables ******************************#
-# Scripts to be called
-SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"."}
-SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}/verify_serviceIP.sh
-
-# Remote command
-REMOTE=${REMOTE:-"ssh -x -q"}
-
-# Lustre utilities path
-CMD_PATH=${CMD_PATH:-"/usr/sbin"}
-TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"}
-
-# CluManager tools
-CLUMAN_TOOLS_PATH=${CLUMAN_TOOLS_PATH:-"/usr/sbin"}
-CONFIG_CMD=${CONFIG_CMD:-"${CLUMAN_TOOLS_PATH}/redhat-config-cluster-cmd"}
-
-# Configuration directory
-CLUMAN_DIR="/etc" # CluManager configuration directory
-
-# Service directory and name
-INIT_DIR=${INIT_DIR:-"/etc/init.d"}
-LUSTRE_SRV=${LUSTRE_SRV:-"${INIT_DIR}/lustre"} # service script for lustre
-
-TMP_DIR="/tmp/clumanager" # temporary directory
-
-declare -a NODE_NAMES # node names in the failover group
-declare -a SRV_IPADDRS # service IP addresses
-
-# Lustre target device names, service names and mount points
-declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
-declare -i TARGET_NUM=0 # number of targets
-
-# Get and check the positional parameters
-VERBOSE_OUTPUT=false
-while getopts "n:s:c:o:vd:" OPTION; do
- case $OPTION in
- n)
- HOSTNAME_OPT=$OPTARG
- PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
- if [ -z "${PRIM_NODENAME}" ]; then
- echo >&2 $"`basename $0`: Missing primary nodename!"
- usage
- fi
- HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
- if [ ${HOSTNAME_NUM} -lt 2 ]; then
- echo >&2 $"`basename $0`: Missing failover nodenames!"
- usage
- fi
- ;;
- s)
- SRVADDR_OPT=$OPTARG
- ;;
- c)
- HBCHANNEL_OPT=$OPTARG
- HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \
- | sed 's/"$//'`
- if [ -n "${HBCHANNEL_OPT}" ] \
- && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*broadcast*}" ] \
- && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*multicast*}" ]; then
- echo >&2 $"`basename $0`: Invalid Heartbeat channel" \
- "- ${HBCHANNEL_OPT}!"
- usage
- fi
- ;;
- o)
- HBOPT_OPT=$OPTARG
- HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'`
- ;;
- v)
- VERBOSE_OUTPUT=true
- ;;
- d)
- DEVICE_OPT=$OPTARG
- TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
- TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
- if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
- echo >&2 $"`basename $0`: Missing target device name!"
- usage
- fi
- if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
- echo >&2 $"`basename $0`: Missing mount point for target"\
- "${TARGET_DEVNAMES[TARGET_NUM]}!"
- usage
- fi
- TARGET_NUM=$(( TARGET_NUM + 1 ))
- ;;
-
- ?)
- usage
- esac
-done
-
-# Check the required parameters
-if [ -z "${HOSTNAME_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -n option!"
- usage
-fi
-
-if [ -z "${SRVADDR_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -s option!"
- usage
-fi
-
-if [ -z "${DEVICE_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -d option!"
- usage
-fi
-
-# Output verbose informations
-verbose_output() {
- if ${VERBOSE_OUTPUT}; then
- echo "`basename $0`: $*"
- fi
- return 0
-}
-
-# get_nodenames
-#
-# Get all the node names in this failover group
-get_nodenames() {
- declare -i idx
- local nodename_str nodename
-
- nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
- END {for (i in a) print a[i]}'`
- idx=0
- for nodename in ${nodename_str}
- do
- NODE_NAMES[idx]=${nodename}
- idx=$idx+1
- done
-
- return 0
-}
-
-# get_check_srvIPaddrs
-#
-# Get and check all the service IP addresses in this failover group
-get_check_srvIPaddrs() {
- declare -i idx
- declare -i i
- local srvIPaddr_str srvIPaddr
-
- srvIPaddr_str=`echo ${SRVADDR_OPT}|awk '{split($SRVADDR_OPT, a, ":")}\
- END {for (i in a) print a[i]}'`
- idx=0
- for srvIPaddr in ${srvIPaddr_str}
- do
- SRV_IPADDRS[idx]=${srvIPaddr}
- idx=$idx+1
- done
-
- for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
- for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do
- # Check service IP address
- verbose_output "Verifying service IP ${SRV_IPADDRS[idx]} and" \
- "real IP of host ${NODE_NAMES[i]} are in the" \
- "same subnet..."
- if ! ${SCRIPT_VERIFY_SRVIP} ${SRV_IPADDRS[idx]} ${NODE_NAMES[i]}
- then
- return 1
- fi
- verbose_output "OK"
- done
- done
-
- return 0
-}
-
-# stop_clumanager
-#
-# Run remote command to stop each node's clumanager service
-stop_clumanager() {
- declare -i idx
- local ret_str
-
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- ret_str=`${REMOTE} ${NODE_NAMES[idx]} \
- "/sbin/service clumanager stop" 2>&1`
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: stop_clumanager() error:"\
- "from host ${NODE_NAMES[idx]} - $ret_str!"
- fi
- done
-
- return 0
-}
-
-# get_srvname hostname target_devname
-#
-# Get the lustre target server name from the node @hostname
-get_srvname() {
- local host_name=$1
- local target_devname=$2
- local target_srvname=
- local ret_str
-
- # Execute remote command to get the target server name
- ret_str=`${REMOTE} ${host_name} \
- "${TUNEFS} --print ${target_devname} | grep Target:" 2>&1`
- if [ $? -ne 0 ]; then
- echo "`basename $0`: get_srvname() error:" \
- "from host ${host_name} - ${ret_str}"
- return 1
- fi
-
- if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
- ret_str=${ret_str#*Target: }
- target_srvname=`echo ${ret_str} | awk '{print $1}'`
- fi
-
- if [ -z "${target_srvname}" ]; then
- echo "`basename $0`: get_srvname() error: Cannot get the"\
- "server name of target ${target_devname} in ${host_name}!"
- return 1
- fi
-
- echo ${target_srvname}
- return 0
-}
-
-# create_service
-#
-# Create service symlinks from /etc/init.d/lustre for Lustre targets
-create_service() {
- declare -i i
- local srv_dir
- local command ret_str
-
- # Initialize the TARGET_SRVNAMES array
- unset TARGET_SRVNAMES
-
- # Get Lustre target service names
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_SRVNAMES[i]}"
- return 1
- fi
- done
-
- # Construct remote command
- command=":"
- for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do
- command=${command}";ln -s -f ${LUSTRE_SRV} ${INIT_DIR}/${TARGET_SRVNAMES[i]}"
- done
-
- # Execute remote command to create symlinks
- for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do
- ret_str=`${REMOTE} ${NODE_NAMES[i]} "${command}" 2>&1`
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: create_service() error:" \
- "from host ${NODE_NAMES[i]} - ${ret_str}"
- return 1
- fi
- done
-
- return 0
-}
-
-# check_retval retval
-#
-# Check the return value of redhat-config-cluster-cmd
-check_retval() {
- if [ $1 -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to run ${CONFIG_CMD}!"
- return 1
- fi
-
- return 0
-}
-
-# add_services
-#
-# Add service tags into the cluster.xml file
-add_services() {
- declare -i idx
- declare -i i
-
- # Add service tag
- for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do
- ${CONFIG_CMD} --add_service --name=${TARGET_SRVNAMES[i]}
- if ! check_retval $?; then
- return 1
- fi
-
- ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
- --userscript=${INIT_DIR}/${TARGET_SRVNAMES[i]}
- if ! check_retval $?; then
- return 1
- fi
-
- for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
- ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
- --add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]}
- if ! check_retval $?; then
- return 1
- fi
- done
-
- ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
- --device=${TARGET_DEVNAMES[i]} \
- --mount \
- --mountpoint=${TARGET_MNTPNTS[i]} \
- --fstype=lustre
- if ! check_retval $?; then
- return 1
- fi
- done
-
- return 0
-}
-
-# gen_cluster_xml
-#
-# Run redhat-config-cluster-cmd to create the cluster.xml file
-gen_cluster_xml() {
- declare -i idx
- declare -i i
- local mcast_IPaddr
- local node_names
- local hbopt_str hbopt
-
- # Run redhat-config-cluster-cmd to generate cluster.xml
- # Add clumembd tag
- if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*broadcast*}" ]; then
- ${CONFIG_CMD} --clumembd --broadcast=yes
- ${CONFIG_CMD} --clumembd --multicast=no
- if ! check_retval $?; then
- return 1
- fi
- elif [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*multicast*}" ]; then
- mcast_IPaddr=`echo ${HBCHANNEL_OPT} | awk '{print $2}'`
- if [ -n "${mcast_IPaddr}" ]; then
- ${CONFIG_CMD} --clumembd --multicast=yes\
- --multicast_ipaddress=${mcast_IPaddr}
- if ! check_retval $?; then
- return 1
- fi
- fi
- fi
-
- # Add cluster tag
- node_names=
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- node_names=${node_names}"${NODE_NAMES[idx]} "
- done
-
- ${CONFIG_CMD} --cluster --name="${node_names}failover group"
- if ! check_retval $?; then
- return 1
- fi
-
- # Add member tag
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- ${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]}
- if ! check_retval $?; then
- return 1
- fi
- done
-
- # Add service tag
- if ! add_services; then
- return 1
- fi
-
- # Add other tags
- if [ -n "${HBOPT_OPT}"]; then
- hbopt_str=`echo ${HBOPT_OPT}|awk '{split($HBOPT_OPT, a, ":")}\
- END {for (i in a) print a[i]}'`
- idx=0
- for hbopt in ${hbopt_str}
- do
- ${CONFIG_CMD} ${hbopt}
- if ! check_retval $?; then
- return 1
- fi
- idx=$idx+1
- done
- fi
-
- return 0
-}
-
-# create_config
-#
-# Create the cluster.xml file and scp it to the each node's /etc/
-create_config() {
- CONFIG_PRIMNODE=${TMP_DIR}$"/cluster.xml."${PRIM_NODENAME}
- declare -i idx
-
- # Create symlinks for Lustre services
- verbose_output "Creating symlinks for lustre target services in"\
- "${PRIM_NODENAME} failover group hosts..."
- if ! create_service; then
- return 1
- fi
- verbose_output "OK"
-
- if [ -s ${CONFIG_PRIMNODE} ]; then
- if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${CONFIG_PRIMNODE}`" ]
- then
- verbose_output "${CONFIG_PRIMNODE} already exists."
- return 0
- else
- /bin/cp -f ${CONFIG_PRIMNODE} ${CLUMAN_DIR}/cluster.xml
-
- # Add services into the cluster.xml file
- if ! add_services; then
- return 1
- fi
- fi
- else
- # Run redhat-config-cluster-cmd to generate cluster.xml
- verbose_output "Creating cluster.xml file for" \
- "${PRIM_NODENAME} failover group hosts..."
- if ! gen_cluster_xml; then
- return 1
- fi
- verbose_output "OK"
- fi
-
- /bin/cp -f ${CLUMAN_DIR}/cluster.xml ${CONFIG_PRIMNODE}
-
- # scp the cluster.xml file to all the nodes
- verbose_output "Remote copying cluster.xml file to" \
- "${PRIM_NODENAME} failover group hosts..."
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- if [ "${PRIM_NODENAME}" != "${NODE_NAMES[idx]}" ]; then
- /bin/cp -f ${CONFIG_PRIMNODE} \
- ${TMP_DIR}$"/cluster.xml."${NODE_NAMES[idx]}
- fi
-
- scp ${CONFIG_PRIMNODE} ${NODE_NAMES[idx]}:${CLUMAN_DIR}/cluster.xml
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp cluster.xml file"\
- "to node ${NODE_NAMES[idx]}!"
- return 1
- fi
- done
- verbose_output "OK"
-
- return 0
-}
-
-# Main flow
-# Get all the node names
-if ! get_nodenames; then
- exit 1
-fi
-
-# Get and check all the service IP addresses
-if ! get_check_srvIPaddrs; then
- exit 1
-fi
-
-# Stop clumanager services
-verbose_output "Stopping clumanager service in the ${PRIM_NODENAME}"\
- "failover group hosts..."
-if ! stop_clumanager; then
- exit 1
-fi
-verbose_output "OK"
-
-# Generate configuration files
-if ! create_config; then
- exit 1
-fi
-
-exit 0
+++ /dev/null
-#!/bin/bash
-#
-# gen_cluster_config.sh - generate a csv file from a running lustre cluster
-#
-# This script is used to collect lustre target informations and HA software
-# configurations in a lustre cluster to generate a csv file. In reverse, the
-# csv file could be parsed by cluster_config.sh to configure multiple lustre
-# servers in parallel.
-#
-# This script should be run on the MGS node.
-#
-################################################################################
-
-# Usage
-usage() {
- cat >&2 <<EOF
-
-Usage: `basename $0` [-t HAtype] [-h] [-v] [-f csv_filename]
-
- This script is used to collect lustre target informations and HA software
- configurations from a running lustre cluster to generate a csv file. It
- should be run on the MGS node.
-
- -t HAtype collect High-Availability software configurations
- The argument following -t is used to indicate the High-
- Availability software type. The HA software types which
- are currently supported are: hbv1 (Heartbeat v1), hbv2
- (Heartbeat v2) and cluman (CluManager).
- -h help
- -v verbose mode
- -f csv_filename designate a name for the csv file
- Default is cluster_config.csv.
-
-EOF
- exit 1
-}
-
-#**************************** Global variables ****************************#
-# csv file
-CSV_FILE=${CSV_FILE:-"cluster_config.csv"}
-
-# Remote command
-REMOTE=${REMOTE:-"ssh -x -q"}
-#REMOTE=${REMOTE:-"pdsh -R ssh -w"}
-
-# Command path
-CMD_PATH=${CMD_PATH:-"/usr/sbin"}
-TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"}
-
-# Lustre proc files
-LUSTRE_PROC=${LUSTRE_PROC:-"/proc/fs/lustre"}
-LUSTRE_PROC_DEVICES=${LUSTRE_PROC}/devices
-
-LNET_PROC=${LNET_PROC:-"/proc/sys/lnet"}
-LNET_PROC_PEERS=${LNET_PROC}/peers
-
-# Default network module options
-DEFAULT_MOD_OPTS=${DEFAULT_MOD_OPTS:-"options lnet networks=tcp"}
-START_MARKER=${START_MARKER:-"# start lustre config"}
-END_MARKER=${END_MARKER:-"# end lustre config"}
-
-# Variables of HA software
-HATYPE_HBV1="hbv1" # Heartbeat version 1
-HATYPE_HBV2="hbv2" # Heartbeat version 2
-HATYPE_CLUMGR="cluman" # Cluster Manager
-
-HA_DIR=${HA_DIR:-"/etc/ha.d"} # Heartbeat configuration directory
-CIB_DIR=${CIB_DIR:-"/var/lib/heartbeat/crm"} # cib.xml directory
-HA_CF=${HA_DIR}/ha.cf # ha.cf file
-HA_RES=${HA_DIR}/haresources # haresources file
-HA_CIB=${CIB_DIR}/cib.xml
-
-CLUMAN_DIR=${CLUMAN_DIR:-"/etc"} # CluManager configuration directory
-CLUMAN_CONFIG=${CLUMAN_DIR}/cluster.xml
-
-# Lustre target obd device types
-MGS_TYPE=${MGS_TYPE:-"mgs"}
-MDT_TYPE=${MDT_TYPE:-"mds"}
-OST_TYPE=${OST_TYPE:-"obdfilter"}
-
-# The obd name of MGS target server
-MGS_SVNAME=${MGS_SVNAME:-"MGS"}
-
-# Hostnames of the lustre cluster nodes
-declare -a HOST_NAMES
-MGS_HOSTNAME=${MGS_HOSTNAME:-"`hostname`"} # Hostname of the MGS node
-
-# Configs of lustre targets in one cluster node
-declare -a TARGET_CONFIGS
-declare -a TARGET_SVNAMES TARGET_DEVNAMES TARGET_DEVSIZES TARGET_MNTPNTS
-declare -a TARGET_DEVTYPES TARGET_FSNAMES TARGET_MGSNIDS TARGET_INDEXES
-declare -a TARGET_FMTOPTS TARGET_MKFSOPTS TARGET_MNTOPTS TARGET_FAILNIDS
-declare -a HA_CONFIGS
-
-# Lustre target service types
-let "LDD_F_SV_TYPE_MDT = 0x0001"
-let "LDD_F_SV_TYPE_OST = 0x0002"
-let "LDD_F_SV_TYPE_MGS = 0x0004"
-
-# Permanent mount options for ext3 or ldiskfs
-ALWAYS_MNTOPTS=${ALWAYS_MNTOPTS:-"errors=remount-ro"}
-MDT_MGS_ALWAYS_MNTOPTS=${MDT_MGS_ALWAYS_MNTOPTS:-",iopen_nopriv,user_xattr"}
-OST_ALWAYS_MNTOPTS=${OST_ALWAYS_MNTOPTS:-",asyncdel"}
-
-# User-settable parameter keys
-PARAM_MGSNODE=${PARAM_MGSNODE:-"mgsnode="}
-PARAM_FAILNODE=${PARAM_FAILNODE:-"failnode="}
-
-# Block size
-L_BLOCK_SIZE=4096
-
-
-# Get and check the positional parameters
-VERBOSE_OUTPUT=false
-while getopts "t:hvf:" OPTION; do
- case $OPTION in
- t)
- HATYPE_OPT=$OPTARG
- if [ "${HATYPE_OPT}" != "${HATYPE_HBV1}" ] \
- && [ "${HATYPE_OPT}" != "${HATYPE_HBV2}" ] \
- && [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
- echo >&2 "`basename $0`: Invalid HA software type" \
- "- ${HATYPE_OPT}!"
- usage
- fi
- ;;
- h) usage;;
- v) VERBOSE_OUTPUT=true;;
- f) CSV_FILE=$OPTARG;;
- ?) usage
- esac
-done
-
-# Output verbose informations
-verbose_output() {
- if ${VERBOSE_OUTPUT}; then
- echo "`basename $0`: $*"
- fi
- return 0
-}
-
-# Verify the local host is the MGS node
-mgs_node() {
- if [ ! -e ${LUSTRE_PROC_DEVICES} ]; then
- echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} does" \
- "not exist. Lustre kernel modules may not be loaded!"
- return 1
- fi
-
- if [ -z "`cat ${LUSTRE_PROC_DEVICES}`" ]; then
- echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} is" \
- "empty. Lustre services may not be started!"
- return 1
- fi
-
- if [ -z "`grep ${MGS_TYPE} ${LUSTRE_PROC_DEVICES}`" ]; then
- echo >&2 "`basename $0`: error: This node is not a MGS node." \
- "The script should be run on the MGS node!"
- return 1
- fi
-
- return 0
-}
-
-# Check whether the reomte command is pdsh
-is_pdsh() {
- if [ "${REMOTE}" = "${REMOTE#pdsh}" ]; then
- return 1
- fi
-
- return 0
-}
-
-# remote_error fn_name host_addr ret_str
-# Verify the return result from remote command
-remote_error() {
- local fn_name host_addr ret_str
-
- fn_name=$1
- shift
- host_addr=$1
- shift
- ret_str=$*
-
- if [ "${ret_str}" != "${ret_str#*connect:*}" ]; then
- echo "`basename $0`: ${fn_name}() error: remote error:" \
- "${ret_str}"
- return 0
- fi
-
- if [ -z "${ret_str}" ]; then
- echo "`basename $0`: ${fn_name}() error: remote error:" \
- "No results from remote!" \
- "Check network connectivity between the local host"\
- "and ${host_addr}!"
- return 0
- fi
-
- return 1
-}
-
-# nid2hostname nid
-# Convert @nid to hostname of the lustre cluster node
-nid2hostname() {
- local nid=$1
- local host_name=
- local addr nettype ip_addr
- local ret_str
-
- addr=${nid%@*}
- nettype=${nid#*@}
- if [ -z "${addr}" ]; then
- echo "`basename $0`: nid2hostname() error:" \
- "Invalid nid - \"${nid}\"!"
- return 1
- fi
-
- case "${nettype}" in
- lo*) host_name=`hostname`;;
- elan*) # QsNet
- # FIXME: Parse the /etc/elanhosts configuration file to
- # convert ElanID to hostname
- ;;
- gm*) # Myrinet
- # FIXME: Use /usr/sbin/gmlndnid to find the hostname of
- # the specified GM Global node ID
- ;;
- ptl*) # Portals
- # FIXME: Convert portal ID to hostname
- ;;
- *) # tcp, o2ib, cib, openib, iib, vib, ra
- ip_addr=${addr}
-
- # Execute remote command to get the host name
- ret_str=`${REMOTE} ${ip_addr} "hostname" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: nid2hostname() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
- remote_error "nid2hostname" ${ip_addr} "${ret_str}" && return 1
-
- if is_pdsh; then
- host_name=`echo ${ret_str} | awk '{print $2}'`
- else
- host_name=`echo ${ret_str} | awk '{print $1}'`
- fi
- ;;
- esac
-
- echo ${host_name}
- return 0
-}
-
-# get_hostnames
-# Get lustre cluster node names
-get_hostnames() {
- declare -a HOST_NIDS
- declare -i idx # Index of HOST_NIDS array
- declare -i i # Index of HOST_NAMES array
-
- if ! mgs_node; then
- return 1
- fi
-
- if [ ! -e ${LNET_PROC_PEERS} ]; then
- echo >&2 "`basename $0`: error: ${LNET_PROC_PEERS} does not" \
- "exist. LNET kernel modules may not be loaded" \
- "or LNET network may not be up!"
- return 1
- fi
-
- HOST_NAMES[0]=${MGS_HOSTNAME} # MGS node
- HOST_NIDS[0]=${HOST_NAMES[0]}
-
- # Get the nids of the nodes which have contacted MGS
- idx=1
- for nid in `cat ${LNET_PROC_PEERS} | awk '{print $1}'`; do
- if [ "${nid}" = "nid" ]; then
- continue
- fi
-
- HOST_NIDS[idx]=${nid}
- let "idx += 1"
- done
-
- if [ ${idx} -eq 1 ]; then
- verbose_output "Only one node running in the lustre cluster." \
- "It's ${HOST_NAMES[0]}."
- return 0
- fi
-
- # Get the hostnames of the nodes
- for ((idx = 1, i = 1; idx < ${#HOST_NIDS[@]}; idx++, i++)); do
- if [ -z "${HOST_NIDS[idx]}" ]; then
- echo >&2 "`basename $0`: get_hostnames() error:" \
- "Invalid nid - \"${HOST_NIDS[idx]}\"!"
- return 1
- fi
-
- HOST_NAMES[i]=$(nid2hostname ${HOST_NIDS[idx]})
- if [ $? -ne 0 ]; then
- echo >&2 "${HOST_NAMES[i]}"
- return 1
- fi
-
- if [ "${HOST_NAMES[i]}" = "${HOST_NAMES[0]}" ]; then
- let "i -= 1"
- fi
- done
-
- return 0
-}
-
-#*************************** Network module options ***************************#
-
-# get_module_opts hostname
-# Get the network module options from the node @hostname
-get_module_opts() {
- local host_name=$1
- local ret_str
- local MODULE_CONF KERNEL_VER
- local ret_line line find_options
-
- MODULE_OPTS=${DEFAULT_MOD_OPTS}
-
- # Execute remote command to get the kernel version
- ret_str=`${REMOTE} ${host_name} "uname -r" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo >&2 "`basename $0`: get_module_opts() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
- remote_error "get_module_opts" ${host_name} "${ret_str}" && return 1
-
- if is_pdsh; then
- KERNEL_VER=`echo ${ret_str} | awk '{print $2}'`
- else
- KERNEL_VER=`echo ${ret_str} | awk '{print $1}'`
- fi
-
- # Get the module configuration file name
- if [ "${KERNEL_VER:0:3}" = "2.4" ]; then
- MODULE_CONF=/etc/modules.conf
- else
- MODULE_CONF=/etc/modprobe.conf
- fi
-
- # Execute remote command to get the lustre network module options
- find_options=false
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- if [ "${line}" = "${START_MARKER}" ]; then
- find_options=true
- MODULE_OPTS=
- continue
- fi
-
- if ${find_options}; then
- if [ "${line}" = "${END_MARKER}" ]; then
- break
- fi
-
- if [ -z "${MODULE_OPTS}" ]; then
- MODULE_OPTS=${line}
- else
- MODULE_OPTS=${MODULE_OPTS}$" \n "${line}
- fi
- fi
- done < <(${REMOTE} ${host_name} "cat ${MODULE_CONF}")
-
- if [ -z "${MODULE_OPTS}" ]; then
- MODULE_OPTS=${DEFAULT_MOD_OPTS}
- fi
-
- return 0
-}
-
-#************************ HA software configurations ************************#
-# is_ha_target hostname target_svname
-# Check whether the target service @target_svname was made to be high-available
-is_ha_target() {
- local host_name=$1
- local target_svname=$2
- local res_file
- local ret_str
-
- case "${HATYPE_OPT}" in
- "${HATYPE_HBV1}") res_file=${HA_RES};;
- "${HATYPE_HBV2}") res_file=${HA_CIB};;
- "${HATYPE_CLUMGR}") res_file=${CLUMAN_CONFIG};;
- esac
-
- # Execute remote command to check the resource file
- ret_str=`${REMOTE} ${host_name} \
- "grep ${target_svname} ${res_file}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo >&2 "`basename $0`: is_ha_target() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- [ "${ret_str}" = "${ret_str#*${target_svname}*}" ] && return 1
-
- return 0
-}
-
-# get_hb_configs hostname
-# Get the Heartbeat configurations from the node @hostname
-get_hb_configs() {
- local host_name=$1
- local ret_line line
- declare -i i
-
- unset HA_CONFIGS
- HB_CHANNELS=
- SRV_IPADDRS=
- HB_OPTIONS=
-
- # Execute remote command to get the configs of Heartbeat channels, etc
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- # Get rid of the comment line
- [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
-
- if [ "${line}" != "${line#*serial*}" ] \
- || [ "${line}" != "${line#*cast*}" ]; then
- if [ -z "${HB_CHANNELS}" ]; then
- HB_CHANNELS=${line}
- else
- HB_CHANNELS=${HB_CHANNELS}:${line}
- fi
- fi
-
- if [ "${line}" != "${line#*stonith*}" ] \
- || [ "${line}" != "${line#*ping*}" ] \
- || [ "${line}" != "${line#*respawn*}" ] \
- || [ "${line}" != "${line#*apiauth*}" ] \
- || [ "${line}" != "${line#*compression*}" ]; then
- if [ -z "${HB_OPTIONS}" ]; then
- HB_OPTIONS=${line}
- else
- HB_OPTIONS=${HB_OPTIONS}:${line}
- fi
- fi
- done < <(${REMOTE} ${host_name} "cat ${HA_CF}")
-
- if [ -z "${HB_CHANNELS}" ]; then
- echo >&2 "`basename $0`: get_hb_configs() error:" \
- "There are no heartbeat channel configs in ${HA_CF}" \
- "of host ${host_name} or ${HA_CF} does not exist!"
- return 0
- fi
-
- # Execute remote command to get Heartbeat service address
- if [ "${HATYPE_OPT}" = "${HATYPE_HBV1}" ]; then
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- # Get rid of the empty line
- [ -z "`echo ${line}|awk '/[[:alnum:]]/ {print $0}'`" ]\
- && continue
-
- # Get rid of the comment line
- [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
-
- SRV_IPADDRS=`echo ${line} | awk '{print $2}'`
- [ -n "${SRV_IPADDRS}" ] && break
- done < <(${REMOTE} ${host_name} "cat ${HA_RES}")
-
- if [ -z "${SRV_IPADDRS}" ]; then
- echo >&2 "`basename $0`: get_hb_configs() error: There"\
- "are no service address in ${HA_RES} of host"\
- "${host_name} or ${HA_RES} does not exist!"
- return 0
- fi
- fi
-
- # Construct HA configuration items
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- # Execute remote command to check whether this target service
- # was made to be high-available
- if is_ha_target ${host_name} ${TARGET_SVNAMES[i]}; then
- HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS}
- fi
- done
-
- return 0
-}
-
-# get_cluman_configs hostname
-# Get the CluManager configurations from the node @hostname
-get_cluman_configs() {
- local host_name=$1
- unset HA_CONFIGS
-
- # FIXME: Get CluManager configurations
- return 0
-}
-
-# get_ha_configs hostname
-# Get the HA software configurations from the node @hostname
-get_ha_configs() {
- local host_name=$1
-
- unset HA_CONFIGS
-
- if [ -z "${HATYPE_OPT}" ]; then
- return 0
- fi
-
- verbose_output "Collecting HA software configurations from host $1..."
-
- case "${HATYPE_OPT}" in
- "${HATYPE_HBV1}" | "${HATYPE_HBV2}") # Heartbeat
- if ! get_hb_configs ${host_name}; then
- return 1
- fi
- ;;
- "${HATYPE_CLUMGR}") # CluManager
- if ! get_cluman_configs ${host_name}; then
- return 1
- fi
- ;;
- esac
-
- verbose_output "OK"
- return 0
-}
-
-#*********************** Lustre targets configurations ***********************#
-
-# get_svnames hostname
-# Get the lustre target server obd names from the node @hostname
-get_svnames(){
- declare -i i
- local host_name=$1
- local ret_line line
-
- # Initialize the TARGET_SVNAMES array
- unset TARGET_SVNAMES
-
- # Execute remote command to the node @hostname and figure out what
- # lustre services are running.
- i=0
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- if [ -z "`echo ${line} | grep ${MGS_TYPE}`" ] \
- && [ -z "`echo ${line} | grep ${MDT_TYPE}`" ] \
- && [ -z "`echo ${line} | grep ${OST_TYPE}`" ]; then
- continue
- fi
-
- # Get target server name
- TARGET_SVNAMES[i]=`echo ${line} | awk '{print $4}'`
- if [ -n "${TARGET_SVNAMES[i]}" ]; then
- let "i += 1"
- else
- echo >&2 "`basename $0`: get_svnames() error: Invalid"\
- "line in ${host_name}'s ${LUSTRE_PROC_DEVICES}"\
- "- \"${line}\"!"
- return 1
- fi
- done < <(${REMOTE} ${host_name} "cat ${LUSTRE_PROC_DEVICES}")
-
- if [ $i -eq 0 ]; then
- verbose_output "There are no lustre services running" \
- "on the node ${host_name}!"
- fi
-
- return 0
-}
-
-# is_loopdev devname
-# Check whether a device @devname is a loop device or not
-is_loopdev() {
- local devname=$1
-
- if [ -z "${devname}" ] || \
- [ -z "`echo ${devname}|awk '/\/dev\/loop[[:digit:]]/ {print $0}'`" ]
- then
- return 1
- fi
-
- return 0
-}
-
-# get_devname hostname svname
-# Get the device name of lustre target @svname from node @hostname
-get_devname() {
- local host_name=$1
- local target_svname=$2
- local target_devname=
- local ret_str
- local target_type target_obdtype mntdev_file
-
- if [ "${target_svname}" = "${MGS_SVNAME}" ]; then
- # Execute remote command to get the device name of mgs target
- ret_str=`${REMOTE} ${host_name} \
- "/sbin/findfs LABEL=${target_svname}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]
- then
- echo "`basename $0`: get_devname() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
- fi
-
- if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]; then
- if is_pdsh; then
- target_devname=`echo ${ret_str} | awk '{print $2}'`
- else
- target_devname=`echo ${ret_str} | awk '{print $1}'`
- fi
- fi
- else # Execute remote command to get the device name of mdt/ost target
- target_type=`echo ${target_svname} | cut -d - -f 2`
- target_obdtype=${target_type:0:3}_TYPE
-
- mntdev_file=${LUSTRE_PROC}/${!target_obdtype}/${target_svname}/mntdev
-
- ret_str=`${REMOTE} ${host_name} "cat ${mntdev_file}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_devname() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if [ "${ret_str}" != "${ret_str#*No such file*}" ]; then
- echo "`basename $0`: get_devname() error:"\
- "${mntdev_file} does not exist in ${host_name}!"
- return 1
- else
- if is_pdsh; then
- target_devname=`echo ${ret_str} | awk '{print $2}'`
- else
- target_devname=`echo ${ret_str} | awk '{print $1}'`
- fi
- fi
- fi
-
- echo ${target_devname}
- return 0
-}
-
-# get_devsize hostname target_devname
-# Get the device size (KB) of @target_devname from node @hostname
-get_devsize() {
- local host_name=$1
- local target_devname=$2
- local target_devsize=
- local ret_str
-
- # Execute remote command to get the device size
- ret_str=`${REMOTE} ${host_name} \
- "/sbin/blockdev --getsize ${target_devname}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_devsize() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if is_pdsh; then
- target_devsize=`echo ${ret_str} | awk '{print $2}'`
- else
- target_devsize=`echo ${ret_str} | awk '{print $1}'`
- fi
-
- if [ -z "`echo ${target_devsize}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_devsize() error: can't" \
- "get device size of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- let " target_devsize /= 2"
-
- echo ${target_devsize}
- return 0
-}
-
-# get_realdevname hostname loop_dev
-# Get the real device name of loop device @loop_dev from node @hostname
-get_realdevname() {
- local host_name=$1
- local loop_dev=$2
- local target_devname=
- local ret_str
-
- # Execute remote command to get the real device name
- ret_str=`${REMOTE} ${host_name} \
- "/sbin/losetup ${loop_dev}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_realdevname() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if is_pdsh; then
- target_devname=`echo ${ret_str} | awk '{print $4}' \
- | sed 's/^(//' | sed 's/)$//'`
- else
- target_devname=`echo ${ret_str} | awk '{print $3}' \
- | sed 's/^(//' | sed 's/)$//'`
- fi
-
- if [ "${ret_str}" != "${ret_str#*No such*}" ] \
- || [ -z "${target_devname}" ]; then
- echo "`basename $0`: get_realdevname() error: can't" \
- "get info on device ${loop_dev} in ${host_name}!"
- return 1
- fi
-
- echo ${target_devname}
- return 0
-}
-
-# get_mntpnt hostname target_devname
-# Get the lustre target mount point from the node @hostname
-get_mntpnt(){
- local host_name=$1
- local target_devname=$2
- local mnt_point=
- local ret_str
-
- # Execute remote command to get the mount point
- ret_str=`${REMOTE} ${host_name} \
- "cat /etc/mtab | grep ${target_devname}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_mntpnt() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if is_pdsh; then
- mnt_point=`echo ${ret_str} | awk '{print $3}'`
- else
- mnt_point=`echo ${ret_str} | awk '{print $2}'`
- fi
-
- if [ -z "${mnt_point}" ]; then
- echo "`basename $0`: get_mntpnt() error: can't" \
- "get the mount point of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- echo ${mnt_point}
- return 0
-}
-
-# get_devnames hostname
-# Get the lustre target device names, mount points
-# and loop device sizes from the node @hostname
-get_devnames(){
- declare -i i
- local host_name=$1
- local ret_line line
-
- # Initialize the arrays
- unset TARGET_DEVNAMES
- unset TARGET_DEVSIZES
- unset TARGET_MNTPNTS
-
- for ((i = 0; i < ${#TARGET_SVNAMES[@]}; i++)); do
- TARGET_DEVNAMES[i]=$(get_devname ${host_name} \
- ${TARGET_SVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVNAMES[i]}"
- return 1
- fi
-
- if [ -z "${TARGET_DEVNAMES[i]}" ]; then
- if [ "${TARGET_SVNAMES[i]}" = "${MGS_SVNAME}" ]; then
- verbose_output "There exists combo mgs/mdt"\
- "target in ${host_name}."
- continue
- else
- echo >&2 "`basename $0`: get_devname() error:"\
- "No device corresponding to target" \
- "${TARGET_SVNAMES[i]} in ${host_name}!"
- return 1
- fi
- fi
-
- # Get the mount point of the target
- TARGET_MNTPNTS[i]=$(get_mntpnt ${host_name} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_MNTPNTS[i]}"
- return 1
- fi
-
- # The target device is a loop device?
- if [ -n "${TARGET_DEVNAMES[i]}" ] \
- && is_loopdev ${TARGET_DEVNAMES[i]}; then
- # Get the device size
- TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVSIZES[i]}"
- return 1
- fi
-
- # Get the real device name
- TARGET_DEVNAMES[i]=$(get_realdevname ${host_name} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVNAMES[i]}"
- return 1
- fi
- fi
- done
-
- return 0
-}
-
-# is_target target_svtype ldd_flags
-# Check the service type of a lustre target
-is_target() {
- case "$1" in
- "mdt") let "ret = $2 & LDD_F_SV_TYPE_MDT";;
- "ost") let "ret = $2 & LDD_F_SV_TYPE_OST";;
- "mgs") let "ret = $2 & LDD_F_SV_TYPE_MGS";;
- "*")
- echo >&2 "`basename $0`: is_target() error: Invalid" \
- "target service type - \"$1\"!"
- return 1
- ;;
- esac
-
- if [ ${ret} -eq 0 ]; then
- return 1
- fi
-
- return 0
-}
-
-# get_devtype ldd_flags
-# Get the service type of a lustre target from @ldd_flags
-get_devtype() {
- local target_devtype=
-
- if [ -z "${flags}" ]; then
- echo "`basename $0`: get_devtype() error: Invalid" \
- "ldd_flags - it's value is null!"
- return 1
- fi
-
- if is_target "mgs" $1; then
- if is_target "mdt" $1; then
- target_devtype="mgs|mdt"
- else
- target_devtype="mgs"
- fi
- elif is_target "mdt" $1; then
- target_devtype="mdt"
- elif is_target "ost" $1; then
- target_devtype="ost"
- else
- echo "`basename $0`: get_devtype() error: Invalid" \
- "ldd_flags - \"$1\"!"
- return 1
- fi
-
- echo ${target_devtype}
- return 0
-}
-
-# get_mntopts ldd_mount_opts
-# Get the user-specified lustre target mount options from @ldd_mount_opts
-get_mntopts() {
- local mount_opts=
- local ldd_mount_opts=$1
-
- mount_opts="${ldd_mount_opts#${ALWAYS_MNTOPTS}}"
- mount_opts="${mount_opts#${MDT_MGS_ALWAYS_MNTOPTS}}"
- mount_opts="${mount_opts#${OST_ALWAYS_MNTOPTS}}"
- mount_opts="`echo \"${mount_opts}\" | sed 's/^,//'`"
-
- [ "${mount_opts}" != "${mount_opts#*,*}" ] && echo "\""${mount_opts}"\"" \
- || echo ${mount_opts}
-
- return 0
-}
-
-# get_mgsnids ldd_params
-# Get the mgs nids of lustre target from @ldd_params
-get_mgsnids() {
- local mgs_nids=
- local param=
- local ldd_params="$*"
-
- for param in ${ldd_params}; do
- if [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ]; then
- if [ -n "${mgs_nids}" ]; then
- mgs_nids=${mgs_nids}:`echo ${param#${PARAM_MGSNODE}}`
- else
- mgs_nids=`echo ${param#${PARAM_MGSNODE}}`
- fi
- fi
- done
-
- [ "${mgs_nids}" != "${mgs_nids#*,*}" ] && echo "\""${mgs_nids}"\"" || echo ${mgs_nids}
-
- return 0
-}
-
-# ip2hostname nids
-# Convert IP addresses in @nids into hostnames
-ip2hostname() {
- local orig_nids=$1
- local nids=
- local nid nids_str
- local nettype
-
- nids_str=`echo ${orig_nids}|awk '{split($orig_nids, a, ",")}\
- END {for (i in a) print a[i]}'`
- for nid in ${nids_str}; do
- nettype=${nid#*@}
-
- case "${nettype}" in
- lo* | elan* | gm* | ptl*) ;;
- *)
- nid=$(nid2hostname ${nid})
- if [ $? -ne 0 ]; then
- echo "${nid}"
- return 1
- fi
-
- nid=${nid}@${nettype}
- ;;
- esac
-
- if [ -z "${nids}" ]; then
- nids=${nid}
- else
- nids=${nids},${nid}
- fi
- done
-
- echo ${nids}
- return 0
-}
-
-# get_failnids ldd_params
-# Get the failover nids of lustre target from @ldd_params
-get_failnids() {
- local fail_nids= # failover nids in one failover node
- local all_fail_nids= # failover nids in all failover nodes
- # of this target
- local param=
- local ldd_params="$*"
-
- for param in ${ldd_params}; do
- if [ -n "`echo ${param}|awk '/failnode=/ {print $0}'`" ]; then
- fail_nids=`echo ${param#${PARAM_FAILNODE}}`
- fail_nids=$(ip2hostname ${fail_nids})
- if [ $? -ne 0 ]; then
- echo >&2 "${fail_nids}"
- return 1
- fi
-
- if [ -n "${all_fail_nids}" ]; then
- all_fail_nids=${all_fail_nids}:${fail_nids}
- else
- all_fail_nids=${fail_nids}
- fi
- fi
- done
-
- [ "${all_fail_nids}" != "${all_fail_nids#*,*}" ] \
- && echo "\""${all_fail_nids}"\"" || echo ${all_fail_nids}
-
- return 0
-}
-
-# get_fmtopts target_devname hostname ldd_params
-# Get other format options of the lustre target @target_devname from @ldd_params
-get_fmtopts() {
- local target_devname=$1
- local host_name=$2
- shift
- shift
- local ldd_params="$*"
- local param=
- local fmt_opts=
-
- for param in ${ldd_params}; do
- [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ] && continue
- [ -n "`echo ${param}|awk '/failnode=/ {print $0}'`" ] && continue
-
- if [ -n "${param}" ]; then
- if [ -n "${fmt_opts}" ]; then
- fmt_opts=${fmt_opts}" "${param}
- else
- fmt_opts=${param}
- fi
- fi
- done
-
- echo ${fmt_opts}
- return 0
-}
-
-# get_ldds hostname
-# Get the lustre target disk data from the node @hostname
-get_ldds(){
- declare -i i
- local host_name=$1
- local ret_line line
- local flags mnt_opts params
-
- # Initialize the arrays
- unset TARGET_DEVTYPES TARGET_FSNAMES TARGET_MGSNIDS TARGET_INDEXES
- unset TARGET_FMTOPTS TARGET_MNTOPTS TARGET_FAILNIDS
-
- # Get lustre target device type, fsname, index, etc.
- # from MOUNT_DATA_FILE. Using tunefs.lustre to read it.
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- flags=
- mnt_opts=
- params=
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- # Execute remote command to read MOUNT_DATA_FILE
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- if [ -n "`echo ${line}|awk '/Index:/ {print $0}'`" ]; then
- TARGET_INDEXES[i]=`echo ${line}|awk '{print $2}'`
- continue
- fi
-
- if [ -n "`echo ${line}|awk '/Lustre FS:/ {print $0}'`" ]; then
- TARGET_FSNAMES[i]=`echo ${line}|awk '{print $3}'`
- continue
- fi
-
- if [ -n "`echo ${line}|awk '/Flags:/ {print $0}'`" ]; then
- flags=`echo ${line}|awk '{print $2}'`
- continue
- fi
-
- if [ -n "`echo ${line}|awk '/Persistent mount opts:/ {print $0}'`" ]; then
- mnt_opts=`echo ${line}|awk '{print $0}'`
- mnt_opts=`echo ${mnt_opts#Persistent mount opts: }`
- continue
- fi
-
- if [ -n "`echo ${line}|awk '/Parameters:/ {print $0}'`" ]; then
- params=`echo ${line}|awk '{print $0}'`
- params=`echo ${params#Parameters:}`
- break
- fi
- done < <(${REMOTE} ${host_name} "${TUNEFS} --print ${TARGET_DEVNAMES[i]} 2>/dev/null")
-
- if [ -z "${flags}" ]; then
- echo >&2 "`basename $0`: get_ldds() error: Invalid" \
- "ldd_flags of target ${TARGET_DEVNAMES[i]}" \
- "in host ${host_name} - it's value is null!"\
- "Check ${TUNEFS} command!"
- return 1
- fi
-
- if [ "${TARGET_INDEXES[i]}" = "unassigned" ] \
- || is_target "mgs" ${flags}; then
- TARGET_INDEXES[i]=
- fi
-
- [ "${TARGET_FSNAMES[i]}" = "lustre" ] && TARGET_FSNAMES[i]=
-
- # Get the lustre target service type
- TARGET_DEVTYPES[i]=$(get_devtype ${flags})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVTYPES[i]} From device" \
- "${TARGET_DEVNAMES[i]} in host ${host_name}!"
- return 1
- fi
-
- # Get the lustre target mount options
- TARGET_MNTOPTS[i]=$(get_mntopts "${mnt_opts}")
-
- # Get mgs nids of the lustre target
- TARGET_MGSNIDS[i]=$(get_mgsnids "${params}")
-
- # Get failover nids of the lustre target
- TARGET_FAILNIDS[i]=$(get_failnids "${params}")
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_FAILNIDS[i]} From device" \
- "${TARGET_DEVNAMES[i]} in host ${host_name}!"
- return 1
- fi
-
- # Get other format options of the lustre target
- TARGET_FMTOPTS[i]=$(get_fmtopts ${TARGET_DEVNAMES[i]} ${host_name} "${params}")
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_FMTOPTS[i]}"
- return 1
- fi
-
- if [ -n "${TARGET_DEVSIZES[i]}" ]; then
- if [ -n "${TARGET_FMTOPTS[i]}" ]; then
- TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]} ""${TARGET_FMTOPTS[i]}"
- else
- TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]}"
- fi
- fi
-
- if [ "${TARGET_FMTOPTS[i]}" != "${TARGET_FMTOPTS[i]#*,*}" ]; then
- TARGET_FMTOPTS[i]="\""${TARGET_FMTOPTS[i]}"\""
- fi
- done
-
- return 0
-}
-
-# get_journalsize target_devname hostname
-# Get the journal size of lustre target @target_devname from @hostname
-get_journalsize() {
- local target_devname=$1
- local host_name=$2
- local journal_inode=
- local journal_size=
- local ret_str
-
- # Execute remote command to get the journal inode number
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Journal inode:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_journalsize() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Journal inode:*}}
- journal_inode=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${journal_inode}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_journalsize() error: can't" \
- "get journal inode of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- # Execute remote command to get the journal size
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R \
- 'stat <${journal_inode}>' ${target_devname}|grep '^User:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_journalsize() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%User:*}}
- journal_size=`echo ${ret_str} | awk '{print $6}'`
- if [ -z "`echo ${journal_size}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_journalsize() error: can't" \
- "get journal size of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- let "journal_size /= 1024*1024" # MB
-
- echo ${journal_size}
- return 0
-}
-
-# get_defaultjournalsize target_devsize
-# Calculate the default journal size from target device size @target_devsize
-get_defaultjournalsize() {
- declare -i target_devsize=$1
- declare -i journal_size=0
- declare -i max_size base_size
-
- let "base_size = 1024*1024"
- if [ ${target_devsize} -gt ${base_size} ]; then # 1GB
- let "journal_size = target_devsize / 102400"
- let "journal_size *= 4"
- fi
-
- let "max_size = 102400 * L_BLOCK_SIZE"
- let "max_size >>= 20" # 400MB
-
- if [ ${journal_size} -gt ${max_size} ]; then
- let "journal_size = max_size"
- fi
-
- echo ${journal_size}
- return 0
-}
-
-# get_J_opt hostname target_devname target_devsize
-# Get the mkfs -J option of lustre target @target_devname
-# from the node @hostname
-get_J_opt() {
- local host_name=$1
- local target_devname=$2
- local target_devsize=$3
- local journal_size=
- local default_journal_size=
- local journal_opt=
-
- # Get the real journal size of lustre target
- journal_size=$(get_journalsize ${target_devname} ${host_name})
- if [ $? -ne 0 ]; then
- echo "${journal_size}"
- return 1
- fi
-
- # Get the default journal size of lustre target
- default_journal_size=$(get_defaultjournalsize ${target_devsize})
- if [ "${default_journal_size}" = "0" ]; then
- let "default_journal_size = L_BLOCK_SIZE/1024"
- fi
-
- if [ "${journal_size}" != "${default_journal_size}" ]; then
- journal_opt="-J size=${journal_size}"
- fi
-
- echo ${journal_opt}
- return 0
-}
-
-# get_ratio target_devname hostname
-# Get the bytes/inode ratio of lustre target @target_devname from @hostname
-get_ratio() {
- local target_devname=$1
- local host_name=$2
- local inode_count=
- local block_count=
- local ratio=
- local ret_str
-
- # Execute remote command to get the inode count
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Inode count:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_ratio() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Inode count:*}}
- inode_count=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${inode_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_ratio() error: can't" \
- "get inode count of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- # Execute remote command to get the block count
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Block count:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_ratio() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Block count:*}}
- block_count=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_ratio() error: can't" \
- "get block count of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- let "ratio = block_count*L_BLOCK_SIZE/inode_count"
-
- echo ${ratio}
- return 0
-}
-
-# get_default_ratio target_devtype target_devsize
-# Calculate the default bytes/inode ratio from target type @target_devtype
-get_default_ratio() {
- local target_devtype=$1
- declare -i target_devsize=$2
- local ratio=
-
- case "${target_devtype}" in
- "mdt" | "mgs|mdt" | "mdt|mgs")
- ratio=4096;;
- "ost")
- [ ${target_devsize} -gt 1000000 ] && ratio=16384;;
- esac
-
- [ -z "${ratio}" ] && ratio=${L_BLOCK_SIZE}
-
- echo ${ratio}
- return 0
-}
-
-# get_i_opt hostname target_devname target_devtype target_devsize
-# Get the mkfs -i option of lustre target @target_devname
-# from the node @hostname
-get_i_opt() {
- local host_name=$1
- local target_devname=$2
- local target_devtype=$3
- local target_devsize=$4
- local ratio=
- local default_ratio=
- local ratio_opt=
-
- # Get the real bytes/inode ratio of lustre target
- ratio=$(get_ratio ${target_devname} ${host_name})
- if [ $? -ne 0 ]; then
- echo "${ratio}"
- return 1
- fi
-
- # Get the default bytes/inode ratio of lustre target
- default_ratio=$(get_default_ratio ${target_devtype} ${target_devsize})
-
- if [ "${ratio}" != "${default_ratio}" ]; then
- ratio_opt="-i ${ratio}"
- fi
-
- echo ${ratio_opt}
- return 0
-}
-
-# get_isize target_devname hostname
-# Get the inode size of lustre target @target_devname from @hostname
-get_isize() {
- local target_devname=$1
- local host_name=$2
- local inode_size=
- local ret_str
-
- # Execute remote command to get the inode size
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Inode size:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_isize() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Inode size:*}}
- inode_size=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${inode_size}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_isize() error: can't" \
- "get inode size of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- echo ${inode_size}
- return 0
-}
-
-# get_default_isize target_devtype
-# Calculate the default inode size of lustre target type @target_devtype
-get_default_isize() {
- local target_devtype=$1
- local inode_size=
-
- case "${target_devtype}" in
- "mdt" | "mgs|mdt" | "mdt|mgs")
- # FIXME: How to get the value of "--stripe-count-hint=#N" option
- inode_size=512;;
- "ost")
- inode_size=256;;
- esac
-
- [ -z "${inode_size}" ] && inode_size=128
-
- echo ${inode_size}
- return 0
-}
-
-# get_I_opt hostname target_devname target_devtype
-# Get the mkfs -I option of lustre target @target_devname
-# from the node @hostname
-get_I_opt() {
- local host_name=$1
- local target_devname=$2
- local target_devtype=$3
- local isize=
- local default_isize=
- local isize_opt=
-
- # Get the real inode size of lustre target
- isize=$(get_isize ${target_devname} ${host_name})
- if [ $? -ne 0 ]; then
- echo "${isize}"
- return 1
- fi
-
- # Get the default inode size of lustre target
- default_isize=$(get_default_isize ${target_devtype})
-
- if [ "${isize}" != "${default_isize}" ]; then
- isize_opt="-I ${isize}"
- fi
-
- echo ${isize_opt}
- return 0
-}
-
-# get_mkfsopts hostname
-# Get the mkfs options of lustre targets from the node @hostname
-get_mkfsopts(){
- declare -i i
- local host_name=$1
- local journal_opt
- local ratio_opt
- local inode_size_opt
-
- # Initialize the arrays
- unset TARGET_MKFSOPTS
-
- # FIXME: Get other mkfs options of ext3/ldiskfs besides -J, -i and -I
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- journal_opt=
- ratio_opt=
- inode_size_opt=
-
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- if [ -z "${TARGET_DEVSIZES[i]}" ]; then
- # Get the device size
- TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVSIZES[i]}"
- return 1
- fi
- fi
-
- # Get the journal option
- journal_opt=$(get_J_opt ${host_name} ${TARGET_DEVNAMES[i]} \
- ${TARGET_DEVSIZES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${journal_opt}"
- return 1
- fi
-
- if [ -n "${journal_opt}" ]; then
- if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
- TARGET_MKFSOPTS[i]="${journal_opt}"
- else
- TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${journal_opt}"
- fi
- fi
-
- # Get the bytes-per-inode ratio option
- ratio_opt=$(get_i_opt ${host_name} ${TARGET_DEVNAMES[i]} \
- ${TARGET_DEVTYPES[i]} ${TARGET_DEVSIZES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${ratio_opt}"
- return 1
- fi
-
- if [ -n "${ratio_opt}" ]; then
- if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
- TARGET_MKFSOPTS[i]="${ratio_opt}"
- else
- TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${ratio_opt}"
- fi
- fi
-
- # Get the inode size option
- inode_size_opt=$(get_I_opt ${host_name} ${TARGET_DEVNAMES[i]} \
- ${TARGET_DEVTYPES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${inode_size_opt}"
- return 1
- fi
-
- if [ -n "${inode_size_opt}" ]; then
- if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
- TARGET_MKFSOPTS[i]="${inode_size_opt}"
- else
- TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${inode_size_opt}"
- fi
- fi
-
- if [ "${TARGET_MKFSOPTS[i]}" != "${TARGET_MKFSOPTS[i]#*,*}" ]; then
- TARGET_MKFSOPTS[i]="\""${TARGET_MKFSOPTS[i]}"\""
- fi
- done
- return 0
-}
-
-# get_target_configs hostname
-# Get the lustre target informations from the node @hostname
-get_target_configs() {
- declare -i i
- local host_name=$1
- local ret_line line
-
- # Initialize the arrays
- unset TARGET_CONFIGS
-
- # Get lustre target server names
- if ! get_svnames ${host_name}; then
- return 1
- fi
-
- # Get lustre target device names, mount points and loop device sizes
- if ! get_devnames ${host_name}; then
- return 1
- fi
-
- # Get lustre target device type, fsname, index, etc.
- if ! get_ldds ${host_name}; then
- return 1
- fi
-
- # Get mkfs options of lustre targets
- if ! get_mkfsopts ${host_name}; then
- return 1
- fi
-
- # Construct lustre target configs
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
- TARGET_CONFIGS[i]=${TARGET_DEVNAMES[i]},${TARGET_MNTPNTS[i]},${TARGET_DEVTYPES[i]},${TARGET_FSNAMES[i]},${TARGET_MGSNIDS[i]},${TARGET_INDEXES[i]},${TARGET_FMTOPTS[i]},${TARGET_MKFSOPTS[i]},${TARGET_MNTOPTS[i]},${TARGET_FAILNIDS[i]}
- done
-
- return 0
-}
-
-# get_configs hostname
-# Get all the informations needed to generate a csv file from
-# the node @hostname
-get_configs() {
- # Check the hostname
- if [ -z "$1" ]; then
- echo >&2 "`basename $0`: get_configs() error:" \
- "Missing hostname!"
- return 1
- fi
-
- # Get network module options
- verbose_output ""
- verbose_output "Collecting network module options from host $1..."
- if ! get_module_opts $1; then
- return 1
- fi
- verbose_output "OK"
-
- # Get lustre target informations
- verbose_output "Collecting Lustre targets informations from host $1..."
- if ! get_target_configs $1; then
- return 1
- fi
- verbose_output "OK"
-
- # Get HA software configurations
- if ! get_ha_configs $1; then
- return 1
- fi
-
- return 0
-}
-
-
-# Generate the csv file from the lustre cluster
-gen_csvfile() {
- declare -i idx
- declare -i i
- local line
-
- # Get lustre cluster node names
- verbose_output "Collecting Lustre cluster node names..."
- if ! get_hostnames; then
- return 1
- fi
- verbose_output "OK"
-
- : > ${CSV_FILE}
-
- for ((idx = 0; idx < ${#HOST_NAMES[@]}; idx++)); do
- # Collect informations
- if ! get_configs ${HOST_NAMES[idx]}; then
- rm -f ${CSV_FILE}
- return 1
- fi
-
- # Append informations to the csv file
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- if [ -z "${HA_CONFIGS[i]}" ]; then
- line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]}
- else
- line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]},${HA_CONFIGS[i]}
- fi
- verbose_output "Informations of target ${TARGET_DEVNAMES[i]}" \
- "in host ${HOST_NAMES[idx]} are as follows:"
- verbose_output "${line}"
- echo "" >> ${CSV_FILE}
- echo "${line}" >> ${CSV_FILE}
- done
- done
-
- return 0
-}
-
-# Main flow
-echo "`basename $0`: ******** Generate csv file -- ${CSV_FILE} START ********"
-if ! gen_csvfile; then
- exit 1
-fi
-echo "`basename $0`: ******** Generate csv file -- ${CSV_FILE} OK **********"
-
-exit 0
+++ /dev/null
-#!/bin/bash
-#
-# gen_hb_config.sh - script for generating the Heartbeat HA software's
-# configuration files
-#
-###############################################################################
-
-# Usage
-usage() {
- cat >&2 <<EOF
-
-Usage: `basename $0` <-r HBver> <-n hostnames> <-c heartbeat channels>
- [-s service address] [-o heartbeat options] [-v]
- <-d target device> [-d target device...]
-
- -r HBver the version of Heartbeat software
- The Heartbeat software versions which are curr-
- ently supported are: hbv1 (Heartbeat version 1)
- and hbv2 (Heartbeat version 2).
- -n hostnames the nodenames of the primary node and its fail-
- overs
- Multiple nodenames are separated by colon (:)
- delimeter. The first one is the nodename of the
- primary node, the others are failover nodenames.
- -c heartbeat channels the methods and devices to send/rcv heartbeats on
- Multiple channels are separated by colon (:)
- delimeter.
- -s service address the IP address to failover, required by hbv1
- -o heartbeat options a "catchall" for other heartbeat configuration
- options
- Multiple options are separated by colon (:)
- delimeter.
- -v verbose mode
- -d target device the target device name and mount point
- The device name and mount point are separated by
- colon (:) delimeter.
-
-EOF
- exit 1
-}
-
-#****************************** Global variables ******************************#
-# Scripts to be called
-SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"."}
-SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}/verify_serviceIP.sh
-SCRIPT_GEN_MONCF=${SCRIPTS_PATH}/mon_cf.generator.sh # create mon.cf file
-
-# Remote command
-REMOTE=${REMOTE:-"ssh -x -q"}
-
-# Lustre utilities path
-CMD_PATH=${CMD_PATH:-"/usr/sbin"}
-TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"}
-
-# Heartbeat tools
-HB_TOOLS_PATH=${HB_TOOLS_PATH:-"/usr/lib/heartbeat"} # Heartbeat tools path
-CIB_GEN_SCRIPT=${HB_TOOLS_PATH}/haresources2cib.py
-
-# Configuration directories
-HA_DIR=${HA_DIR:-"/etc/ha.d"} # Heartbeat configuration directory
-MON_DIR=${MON_DIR:-"/etc/mon"} # mon configuration directory
-CIB_DIR=${CIB_DIR:-"/var/lib/heartbeat/crm"} # cib.xml directory
-
-# Service directories and names
-INIT_DIR=${INIT_DIR:-"/etc/init.d"}
-HARES_DIR=${HARES_DIR:-"${HA_DIR}/resource.d"} # Heartbeat resources
-LUSTRE_SRV=${LUSTRE_SRV:-"${INIT_DIR}/lustre"} # service script for lustre
-LUSTRE_RESMON_SCRIPT=${LUSTRE_RESMON_SCRIPT:-"${HARES_DIR}/lustre-resource-monitor"}
-
-TMP_DIR="/tmp/heartbeat" # temporary directory
-HACF_TEMP=${TMP_DIR}/ha.cf.temp
-AUTHKEYS_TEMP=${TMP_DIR}/authkeys.temp
-
-HBVER_HBV1="hbv1" # Heartbeat version 1
-HBVER_HBV2="hbv2" # Heartbeat version 2
-
-declare -a NODE_NAMES # node names in the failover group
-
-# Lustre target device names, service names and mount points
-declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
-declare -i TARGET_NUM=0 # number of targets
-
-
-# Get and check the positional parameters
-VERBOSE_OUTPUT=false
-while getopts "r:n:c:s:o:vd:" OPTION; do
- case $OPTION in
- r)
- HBVER_OPT=$OPTARG
- if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \
- && [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then
- echo >&2 $"`basename $0`: Invalid Heartbeat software" \
- "version - ${HBVER_OPT}!"
- usage
- fi
- ;;
- n)
- HOSTNAME_OPT=$OPTARG
- PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
- if [ -z "${PRIM_NODENAME}" ]; then
- echo >&2 $"`basename $0`: Missing primary nodename!"
- usage
- fi
- HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
- if [ ${HOSTNAME_NUM} -lt 2 ]; then
- echo >&2 $"`basename $0`: Missing failover nodenames!"
- usage
- fi
- if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ]
- then
- echo >&2 $"`basename $0`: Heartbeat version 1 can" \
- "only support 2 nodes!"
- usage
- fi
- ;;
- c)
- HBCHANNEL_OPT=$OPTARG
- HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \
- | sed 's/"$//'`
- if [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*serial*}" ] \
- && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*bcast*}" ] \
- && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*ucast*}" ] \
- && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*mcast*}" ]; then
- echo >&2 $"`basename $0`: Invalid Heartbeat channel" \
- "- \"${HBCHANNEL_OPT}\"!"
- usage
- fi
- ;;
- s)
- SRVADDR_OPT=$OPTARG
- ;;
- o)
- HBOPT_OPT=$OPTARG
- HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'`
- ;;
- v)
- VERBOSE_OUTPUT=true
- ;;
- d)
- DEVICE_OPT=$OPTARG
- TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
- TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
- if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
- echo >&2 $"`basename $0`: Missing target device name!"
- usage
- fi
- if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
- echo >&2 $"`basename $0`: Missing mount point for target"\
- "${TARGET_DEVNAMES[TARGET_NUM]}!"
- usage
- fi
- TARGET_NUM=$(( TARGET_NUM + 1 ))
- ;;
- ?)
- usage
- esac
-done
-
-# Check the required parameters
-if [ -z "${HBVER_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -r option!"
- usage
-fi
-
-if [ -z "${HOSTNAME_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -n option!"
- usage
-fi
-
-if [ -z "${HBCHANNEL_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -c option!"
- usage
-fi
-
-if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a -z "${SRVADDR_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -s option!"
- usage
-fi
-
-if [ -z "${DEVICE_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -d option!"
- usage
-fi
-
-# Output verbose informations
-verbose_output() {
- if ${VERBOSE_OUTPUT}; then
- echo "`basename $0`: $*"
- fi
- return 0
-}
-
-# get_nodenames
-#
-# Get all the node names in this failover group
-get_nodenames() {
- declare -i idx
- local nodename_str nodename
-
- nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
- END {for (i in a) print a[i]}'`
- idx=0
- for nodename in ${nodename_str}
- do
- NODE_NAMES[idx]=${nodename}
- idx=$idx+1
- done
-
- return 0
-}
-
-# check_srvIPaddr
-#
-# Check service IP address in this failover group
-check_srvIPaddr() {
- declare -i idx
-
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- # Check service IP address
- verbose_output "Verifying service IP ${SRVADDR_OPT} and" \
- "real IP of host ${NODE_NAMES[idx]} are in the" \
- "same subnet..."
- if ! ${SCRIPT_VERIFY_SRVIP} ${SRVADDR_OPT} ${NODE_NAMES[idx]}
- then
- return 1
- fi
- verbose_output "OK"
- done
-
- return 0
-}
-
-# stop_heartbeat
-#
-# Run remote command to stop each node's heartbeat service
-stop_heartbeat() {
- declare -i idx
- local ret_str
-
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- ret_str=`${REMOTE} ${NODE_NAMES[idx]} \
- "/sbin/service heartbeat stop" 2>&1`
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: stop_heartbeat() error:"\
- "from host ${NODE_NAMES[idx]} - $ret_str!"
- fi
- done
-
- return 0
-}
-
-# get_srvname hostname target_devname
-#
-# Get the lustre target server name from the node @hostname
-get_srvname() {
- local host_name=$1
- local target_devname=$2
- local target_srvname=
- local ret_str
-
- # Execute remote command to get the target server name
- ret_str=`${REMOTE} ${host_name} \
- "${TUNEFS} --print ${target_devname} | grep Target:" 2>&1`
- if [ $? -ne 0 ]; then
- echo "`basename $0`: get_srvname() error:" \
- "from host ${host_name} - ${ret_str}"
- return 1
- fi
-
- if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
- ret_str=${ret_str#*Target: }
- target_srvname=`echo ${ret_str} | awk '{print $1}'`
- fi
-
- if [ -z "${target_srvname}" ]; then
- echo "`basename $0`: get_srvname() error: Cannot get the"\
- "server name of target ${target_devname} in ${host_name}!"
- return 1
- fi
-
- echo ${target_srvname}
- return 0
-}
-
-# create_service
-#
-# Create service symlinks from /etc/init.d/lustre for Lustre targets
-create_service() {
- declare -i i
- local srv_dir
- local command ret_str
-
- # Initialize the TARGET_SRVNAMES array
- unset TARGET_SRVNAMES
-
- # Get Lustre target service names
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_SRVNAMES[i]}"
- return 1
- fi
- done
-
- [ "${HBVER_OPT}" = "${HBVER_HBV1}" ] && srv_dir=${HARES_DIR} \
- || srv_dir=${INIT_DIR}
-
- # Construct remote command
- command=":"
- for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do
- command=${command}";ln -s -f ${LUSTRE_SRV} ${srv_dir}/${TARGET_SRVNAMES[i]}"
- if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
- command=${command}";/bin/cp -f ${LUSTRE_RESMON_SCRIPT} ${HARES_DIR}/${TARGET_SRVNAMES[i]}-mon"
- fi
- done
-
- # Execute remote command to create symlinks
- for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do
- ret_str=`${REMOTE} ${NODE_NAMES[i]} "${command}" 2>&1`
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: create_service() error:" \
- "from host ${NODE_NAMES[i]} - ${ret_str}"
- return 1
- fi
- done
-
- return 0
-}
-
-# create_template
-#
-# Create the templates for ha.cf and authkeys files
-create_template() {
- /bin/mkdir -p ${TMP_DIR}
-
- # Create the template for ha.cf
- if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
- cat >${HACF_TEMP} <<EOF
-debugfile /var/log/ha-debug
-logfile /var/log/ha-log
-logfacility local0
-keepalive 2
-deadtime 30
-initdead 120
-
-EOF
- elif [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
- cat >${HACF_TEMP} <<EOF
-use_logd yes
-keepalive 1
-deadtime 10
-initdead 60
-
-EOF
- fi
-
- # Create the template for authkeys
- if [ ! -s ${AUTHKEYS_TEMP} ]; then
- cat >${AUTHKEYS_TEMP} <<EOF
-auth 1
-1 sha1 HelloLustre!
-EOF
- fi
-
- return 0
-}
-
-# gen_udpport
-#
-# Generate the UDP port number for Heartbeat bcast/ucast communication
-# The default value for udpport option in ha.cf is 694. If there are multiple
-# bcast failover groups on the same subnet, this value should be different for
-# each of the failover groups.
-gen_udpport() {
- local port_file
- declare -i default_port=694
- declare -i dynamic_port=49152
- declare -i port=0
- declare -i tmp_port
- declare -i idx
-
- UDPPORT_PRIMNODE=${TMP_DIR}$"/udpport."${PRIM_NODENAME}
-
- if [ -s ${UDPPORT_PRIMNODE} ]; then
- cat ${UDPPORT_PRIMNODE}
- return 0
- fi
-
- # Get the current maximum UDP port number in the cluster
- for port_file in `ls ${TMP_DIR}/udpport.* 2>/dev/null`
- do
- if [ $? -ne 0 ]; then
- break
- fi
- tmp_port=$(cat ${port_file})
- if [ $? -ne 0 ]; then
- break
- fi
-
- if [ ${tmp_port} -gt ${port} ]; then
- port=${tmp_port}
- fi
- done
-
- # Generate and check a new UDP port number
- if [ ${port} -eq 0 ]; then
- port=${default_port}
- elif [ ${port} -eq ${default_port} ]; then
- port=${dynamic_port}
- else
- port=${port}+1
- if [ ${port} -gt 65535 ]; then
- echo >&2 $"`basename $0`: Invalid UDP port" \
- "- ${port}!"
- return 1
- fi
- fi
-
- # Add the UDP port number into each failover node's udpport file
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- UDPPORT_NODE=${TMP_DIR}$"/udpport."${NODE_NAMES[idx]}
- echo ${port} > ${UDPPORT_NODE}
- done
-
- echo ${port}
- return 0
-}
-
-# create_hacf
-#
-# Create the ha.cf file and scp it to each node's /etc/ha.d/
-create_hacf() {
- HACF_PRIMNODE=${TMP_DIR}$"/ha.cf."${PRIM_NODENAME}
-
- declare -i idx
-
- if [ -e ${HACF_PRIMNODE} ]; then
- # The ha.cf file for the primary node has already existed.
- verbose_output "${HACF_PRIMNODE} already exists."
- return 0
- fi
-
- /bin/cp -f ${HACF_TEMP} ${HACF_PRIMNODE}
-
- if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*bcast*}" ] \
- || [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*ucast*}" ]; then
- UDPPORT_OPT=$(gen_udpport)
- if [ $? -ne 0 ]; then
- return 1
- fi
- echo "udpport ${UDPPORT_OPT}" >> ${HACF_PRIMNODE}
- fi
-
- if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*serial*}" ]; then
- echo "baud 19200" >> ${HACF_PRIMNODE}
- fi
-
- echo ${HBCHANNEL_OPT} | awk '{split($HBCHANNEL_OPT, a, ":")} \
- END {for (i in a) print a[i]}' >> ${HACF_PRIMNODE}
-
- # Disable automatic failbacks
- echo "auto_failback off" >> ${HACF_PRIMNODE}
-
- [ "${HBVER_OPT}" = "${HBVER_HBV2}" ] && echo "crm yes" >> ${HACF_PRIMNODE}
-
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- echo "node ${NODE_NAMES[idx]}" >> ${HACF_PRIMNODE}
- done
-
- echo ${HBOPT_OPT} | awk '{split($HBOPT_OPT, a, ":")} \
- END {for (i in a) print a[i]}' >> ${HACF_PRIMNODE}
-
- # scp ha.cf file to all the nodes
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- touch ${TMP_DIR}$"/ha.cf."${NODE_NAMES[idx]}
- scp ${HACF_PRIMNODE} ${NODE_NAMES[idx]}:${HA_DIR}/ha.cf
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp ha.cf file"\
- "to node ${NODE_NAMES[idx]}!"
- return 1
- fi
- done
-
- return 0
-}
-
-# create_haresources
-#
-# Create the haresources file and scp it to the each node's /etc/ha.d/
-create_haresources() {
- HARES_PRIMNODE=${TMP_DIR}$"/haresources."${PRIM_NODENAME}
- declare -i idx
- local res_line
-
- if [ -s ${HARES_PRIMNODE} ]; then
- # The haresources file for the primary node has already existed
- if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${HARES_PRIMNODE}`" ]; then
- verbose_output "${HARES_PRIMNODE} already exists."
- return 0
- fi
- fi
-
- # Add the resource group line into the haresources file
- res_line=${PRIM_NODENAME}" "${SRVADDR_OPT}
- for ((idx = 0; idx < ${#TARGET_SRVNAMES[@]}; idx++)); do
- res_line=${res_line}" "${TARGET_SRVNAMES[idx]}::${TARGET_DEVNAMES[idx]}::${TARGET_MNTPNTS[idx]}
-
- if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
- res_line=${res_line}" "${TARGET_SRVNAMES[idx]}"-mon"
- fi
- done
- echo "${res_line}" >> ${HARES_PRIMNODE}
-
- # Generate the cib.xml file
- if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
- # Add group haclient and user hacluster
- [ -z "`grep haclient /etc/group`" ] && groupadd haclient
- [ -z "`grep hacluster /etc/passwd`" ] && useradd -g haclient hacluster
-
- CIB_PRIMNODE=${TMP_DIR}$"/cib.xml."${PRIM_NODENAME}
- python ${CIB_GEN_SCRIPT} --stdout -c ${HACF_PRIMNODE} \
- ${HARES_PRIMNODE} > ${CIB_PRIMNODE}
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to generate cib.xml file"\
- "for node ${PRIM_NODENAME}!"
- return 1
- fi
- fi
-
- # scp the haresources file or cib.xml file
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- if [ "${PRIM_NODENAME}" != "${NODE_NAMES[idx]}" ]; then
- /bin/cp -f ${HARES_PRIMNODE} \
- ${TMP_DIR}$"/haresources."${NODE_NAMES[idx]}
- fi
-
- scp ${HARES_PRIMNODE} ${NODE_NAMES[idx]}:${HA_DIR}/haresources
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp haresources file"\
- "to node ${NODE_NAMES[idx]}!"
- return 1
- fi
-
- if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
- scp ${CIB_PRIMNODE} ${NODE_NAMES[idx]}:${CIB_DIR}/cib.xml
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp cib.xml"\
- "file to node ${NODE_NAMES[idx]}!"
- return 1
- fi
- fi
- done
-
- return 0
-}
-
-# create_authkeys
-#
-# Create the authkeys file and scp it to the each node's /etc/ha.d/
-create_authkeys() {
- AUTHKEYS_PRIMNODE=${TMP_DIR}$"/authkeys."${PRIM_NODENAME}
- declare -i idx
-
- if [ -e ${AUTHKEYS_PRIMNODE} ]; then
- verbose_output "${AUTHKEYS_PRIMNODE} already exists."
- return 0
- fi
-
- # scp the authkeys file to all the nodes
- chmod 600 ${AUTHKEYS_TEMP}
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- touch ${TMP_DIR}$"/authkeys."${NODE_NAMES[idx]}
- scp -p ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}/authkeys
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp authkeys file"\
- "to node ${NODE_NAMES[idx]}!"
- return 1
- fi
- done
-
- return 0
-}
-
-# create_moncf
-#
-# Create the mon.cf file and scp it to the each node's /etc/mon/
-create_moncf() {
- MONCF_PRIMNODE=${TMP_DIR}$"/mon.cf."${PRIM_NODENAME}
- local srv_name params=
- declare -i idx
- declare -a OLD_TARGET_SRVNAMES # targets in other nodes
- # in this failover group
- # Initialize the OLD_TARGET_SRVNAMES array
- unset OLD_TARGET_SRVNAMES
-
- if [ -s ${MONCF_PRIMNODE} ]; then
- if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${MONCF_PRIMNODE}`" ]
- then
- verbose_output "${MONCF_PRIMNODE} already exists."
- return 0
- else
- # Get the Lustre target service names
- # from the previous mon.cf file
- idx=0
- for srv_name in `grep hostgroup ${MONCF_PRIMNODE}\
- |awk '$2 ~ /-mon/ {print $2}'|xargs`
- do
- OLD_TARGET_SRVNAMES[idx]=`echo ${srv_name}\
- |sed 's/-mon//g'`
- idx=$(( idx + 1 ))
- done
- fi
- fi
-
- # Construct the parameters to mon.cf generation script
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- params=${params}" -n "${NODE_NAMES[idx]}
- done
-
- for ((idx = 0; idx < ${#OLD_TARGET_SRVNAMES[@]}; idx++)); do
- params=${params}" -o "${OLD_TARGET_SRVNAMES[idx]}
- done
-
- for ((idx = 0; idx < ${#TARGET_SRVNAMES[@]}; idx++)); do
- params=${params}" -o "${TARGET_SRVNAMES[idx]}
- done
-
- ${SCRIPT_GEN_MONCF} ${params}
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to generate mon.cf file"\
- "by using ${SCRIPT_GEN_MONCF}!"
- return 1
- fi
-
- /bin/mv *-mon.cfg ${MONCF_PRIMNODE}
-
- # scp the mon.cf file to all the nodes
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- if [ "${PRIM_NODENAME}" != "${NODE_NAMES[idx]}" ]; then
- /bin/cp -f ${MONCF_PRIMNODE} \
- ${TMP_DIR}$"/mon.cf."${NODE_NAMES[idx]}
- fi
-
- scp ${MONCF_PRIMNODE} ${NODE_NAMES[idx]}:${MON_DIR}/mon.cf
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp mon.cf file"\
- "to node ${NODE_NAMES[idx]}!"
- return 1
- fi
- done
-
- return 0
-}
-
-# generate_config
-#
-# Generate the configuration files for Heartbeat and scp them to all the nodes
-generate_config() {
- # Create symlinks for Lustre services
- verbose_output "Creating symlinks for lustre target services in"\
- "${PRIM_NODENAME} failover group hosts..."
- if ! create_service; then
- return 1
- fi
- verbose_output "OK"
-
- if ! create_template; then
- return 1
- fi
-
- verbose_output "Creating and remote copying ha.cf file to"\
- "${PRIM_NODENAME} failover group hosts..."
- if ! create_hacf; then
- return 1
- fi
- verbose_output "OK"
-
- verbose_output "Creating and remote copying haresources file"\
- "to ${PRIM_NODENAME} failover group hosts..."
- if ! create_haresources; then
- return 1
- fi
- verbose_output "OK"
-
- verbose_output "Creating and remote copying authkeys file to" \
- "${PRIM_NODENAME} failover group hosts..."
- if ! create_authkeys; then
- return 1
- fi
- verbose_output "OK"
-
- if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
- verbose_output "Creating and remote copying mon.cf file to" \
- "${PRIM_NODENAME} failover group hosts..."
- if ! create_moncf; then
- return 1
- fi
- verbose_output "OK"
- fi
-
- return 0
-}
-
-# Main flow
-# Get all the node names
-if ! get_nodenames; then
- exit 1
-fi
-
-# Check service IP address
-if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ] && ! check_srvIPaddr; then
- exit 1
-fi
-
-# Stop heartbeat services
-verbose_output "Stopping heartbeat service in the ${PRIM_NODENAME}"\
- "failover group hosts..."
-if ! stop_heartbeat; then
- exit 1
-fi
-verbose_output "OK"
-
-# Generate configuration files
-if ! generate_config; then
- exit 1
-fi
-
-exit 0
+++ /dev/null
-#!/bin/bash
-#
-# module_config.sh - add lustre options lines into modprobe.conf or
-# modules.conf
-#
-#################################################################################
-
-# Check the kernel version
-KERNEL_VERSION=`uname -r`
-KERNEL_VERSION=${KERNEL_VERSION:0:3}
-
-if [ "${KERNEL_VERSION}" = "2.4" ]; then
- MODULE_CONF=/etc/modules.conf
-else
- MODULE_CONF=/etc/modprobe.conf
-fi
-
-read -r NETWORKS
-MODLINES_FILE=/tmp/modlines$$.txt
-START_MARKER=$"# start lustre config"
-END_MARKER=$"# end lustre config"
-
-# Generate a temp file contains lnet options lines
-generate_lnet_lines() {
- local LNET_LINE TMP_LINE
-
- TMP_LINE="${NETWORKS}"
-
- echo ${START_MARKER} > ${MODLINES_FILE}
- echo "# Lustre module options added automatically by `basename $0`" >> ${MODLINES_FILE}
- echo "alias lustre llite" >> ${MODLINES_FILE}
- while true; do
- LNET_LINE=${TMP_LINE%%\\n*}
- echo ${LNET_LINE} >> ${MODLINES_FILE}
-
- TMP_LINE=${TMP_LINE#*\\n}
-
- if [ "${TMP_LINE}" == "${LNET_LINE}" ]; then
- break
- fi
- done
- echo ${END_MARKER} >> ${MODLINES_FILE}
-
- #echo "--------------${MODLINES_FILE}--------------"
- #cat ${MODLINES_FILE}
- #echo -e "------------------------------------------\n"
-
- return 0
-}
-
-if ! generate_lnet_lines; then
- exit 1
-fi
-
-# Add lnet options lines to the module configuration file
-if [ -e ${MODULE_CONF} ]; then
- # Delete the old options
- sed -i "/${START_MARKER}/,/${END_MARKER}/d" ${MODULE_CONF}
-fi
-
-cat ${MODLINES_FILE} >> ${MODULE_CONF}
-rm -f ${MODLINES_FILE}
-exit 0
+++ /dev/null
-#!/bin/sh
-
-# Given one or more Lustre objects, create a mon configuration file
-# naming the mon watches based on the Lustre object names
-# For each Lustre object, the script will create two mon watches
-# The first watch sets a trap, and the second watch clears the
-# trap if Lustre is healthy.
-
-# This may be more fun in Perl due to the need to support a list
-# of objects
-
-# (plus we could use a Perl format for this goop)
-
-MONBASEDIR=${MONBASEDIR:-/usr/local/lib/mon}
-MONCFGDIR=${MONCFGDIR:-/etc/mon}
-TD=`date +%y_%m%d_%S`
-TMPMONCFG=${TD}-mon.cfg
-# Determines how often we will check Lustre health
-CHECKINTERVAL="3m"
-# Determines how quickly we must clear the trap
-TRAPINTERVAL="6m"
-ALERTSCRIPT=${ALERTSCRIPT:-"fail_lustre.alert"}
-TRAPSCRIPT=${TRAPSCRIPT:-"lustre.mon.trap"}
-
-# We will assume all inputs are Lustre objects
-# file locations and timeouts correct to taste
-# Correct to taste
-print_header() {
- cat >> $TMPMONCFG <<-EOF
- cfbasedir = $MONCFGDIR
- alertdir = $MONBASEDIR/alert.d
- mondir = $MONBASEDIR/mon.d
- statedir = $MONBASEDIR/state.d
- logdir = $MONBASEDIR/log.d
- dtlogfile = $MONBASEDIR/log.d/downtime.log
- maxprocs = 20
- histlength = 100
- randstart = 60s
- authtype = getpwnam
-EOF
-}
-
-# Tabs should be preserved in the config file
-# $1 object name
-# we do not set a period, it is assumed monitor is always active
-
-print_trap_rec() {
- cat >> $TMPMONCFG <<EOF
-#
-watch ${1}-obj
- service ${1}_ser
- description triggers heartbeat failure if trap springs on $1
- traptimeout $TRAPINTERVAL
- period
- alert $ALERTSCRIPT
-
-# end ${1}-obj
-
-EOF
-
-}
-
-print_trap_send() {
- cat >> $TMPMONCFG <<EOF
-#
-watch ${1}-mon
- service ${1}_mon_ser
- description clears trap for $1
- interval $CHECKINTERVAL
- monitor $TRAPSCRIPT ${1}-obj ${1}_ser ${1}
- period
- alert $ALERTSCRIPT
-# end ${1}-mon
-EOF
-
-}
-
-usage() {
- echo "$0 -n <node> -n <node> -o <Lustre object> -o <Lustre object>...."
- echo "Creates the /etc/mon/mon.cf file to monitor Lustre objects"
- exit 1
-}
-
-
-# Start of script
-
-if [ $# -eq 0 ];then
- usage
-fi
-
-# This script should work for any number of hosts
-#
-HOSTCNT=0
-OBJCNT=0
-
-declare -a HOSTS
-declare -a OBJS
-
-while getopts "n:o:" opt; do
- case $opt in
- n) HOSTS[HOSTCNT]=$OPTARG
- HOSTCNT=$(( HOSTCNT + 1 ))
- ;;
- o) OBJS[OBJCNT]=$OPTARG
- OBJCNT=$(( OBJCNT + 1 ))
- ;;
- *) usage
- ;;
- esac
-done
-
-echo "Found $HOSTCNT hosts"
-echo "Found $OBJCNT Lustre objects"
-
-# First create the host groups
-# we assume
-# each object will have two watches defined
-# each object hostgroup will have all objects
-
-# Create the file with the declared goop
-print_header
-
-for obj in ${OBJS[@]}
-do
- echo "hostgroup ${obj}-obj ${HOSTS[@]}" >> $TMPMONCFG
- echo "hostgroup ${obj}-mon ${HOSTS[@]}" >> $TMPMONCFG
- echo "#" >> $TMPMONCFG
-done
-
-# create the monitors
-
-for obj in ${OBJS[@]}
-do
- print_trap_send $obj
- print_trap_rec $obj
-done
-
-echo "Mon config completed - new mon config is $TMPMONCFG"
-exit 0
\ No newline at end of file
+++ /dev/null
-#!/bin/bash
-#
-# verify_cluster_net.sh - script for Lustre cluster network verification
-#
-###############################################################################
-
-# Usage
-usage() {
- cat >&2 <<EOF
-
-Usage: `basename $0` [-v] <csv file>
-
- -v verbose mode
- csv file a spreadsheet that contains configuration parameters
- (separated by commas) for each target in a Lustre cl-
- uster, the first field of each line is the host name
- of the cluster node
-
-EOF
- exit 1
-}
-
-# Get and check the positional parameters
-while getopts "v" OPTION; do
- case $OPTION in
- v)
- VERBOSE_OPT=$"yes"
- ;;
- ?)
- usage
- esac
-done
-
-# Toss out the parameters we've already processed
-shift `expr $OPTIND - 1`
-
-# Here we expect the csv file
-if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: Missing csv file!"
- usage
-fi
-
-# Global variables
-CSV_FILE=$1
-declare -a HOST_NAMES
-declare -a HOST_IPADDRS
-
-# Remote command
-REMOTE=${REMOTE:-"ssh -x -q"}
-
-# Output verbose informations
-verbose_output() {
- if [ "${VERBOSE_OPT}" = "yes" ]; then
- echo "`basename $0`: $*"
- fi
- return 0
-}
-
-# Check the csv file
-check_file() {
- if [ ! -s ${CSV_FILE} ]; then
- echo >&2 $"`basename $0`: check_file() error: ${CSV_FILE}" \
- "does not exist or is empty!"
- return 1
- fi
-
- return 0
-}
-
-# Get the host names from the csv file
-get_hostnames() {
- local NAME CHECK_STR
- declare -i i
- declare -i j
-
- # Initialize the HOST_NAMES array
- unset HOST_NAMES
-
- CHECK_STR=`egrep -v "([[:space:]]|^)#" ${CSV_FILE} | awk -F, \
- '/[[:alnum:]]/{if ($1 !~/[[:alnum:]]/) print $0}'`
- if [ -n "${CHECK_STR}" ]; then
- echo >&2 $"`basename $0`: get_hostnames() error: Missing"\
- "hostname field in the line - ${CHECK_STR}"
- return 1
- fi
-
- i=0
- for NAME in `egrep -v "([[:space:]]|^)#" ${CSV_FILE}\
- | awk -F, '/[[:alnum:]]/{print $1}'`
- do
- for ((j = 0; j < ${#HOST_NAMES[@]}; j++)); do
- [ "${NAME}" = "${HOST_NAMES[j]}" ] && continue 2
- done
-
- HOST_NAMES[i]=${NAME}
- i=$i+1
- done
-
- return 0
-}
-
-# Check whether the host name matches the name in the local /etc/hosts table
-# and whether the IP address corresponding to the host name is correct
-local_check() {
- # Check argument
- if [ $# -ne 2 ]; then
- echo >&2 $"`basename $0`: local_check() error: Missing"\
- "argument for function local_check()!"
- return 1
- fi
-
- local RET_STR REAL_NAME
-
- # Get the corresponding IP address of the host name from /etc/hosts table
- # of the current host
- HOST_IPADDRS[$2]=`egrep "[[:space:]]$1([[:space:]]|$)" /etc/hosts \
- | awk '{print $1}'`
- if [ -z "${HOST_IPADDRS[$2]}" ]; then
- echo >&2 "`basename $0`: local_check() error: $1 does not" \
- "exist in the local /etc/hosts table!"
- return 1
- fi
-
- if [ ${#HOST_IPADDRS[$2]} -gt 15 ]; then
- echo >&2 "`basename $0`: local_check() error: More than one" \
- "IP address line corresponding to $1 in the local" \
- "/etc/hosts table!"
- return 1
- fi
-
- # Execute remote command to get the real host name
- RET_STR=`${REMOTE} ${HOST_IPADDRS[$2]} hostname 2>&1`
- if [ $? -ne 0 -a -n "${RET_STR}" ]; then
- echo >&2 "`basename $0`: local_check() error: remote error:" \
- "${RET_STR}"
- return 1
- fi
-
- if [ -z "${RET_STR}" ]; then
- echo >&2 "`basename $0`: local_check() error: remote error: No"\
- "results from remote! Check the network connectivity"\
- "between the local host and ${HOST_IPADDRS[$2]}!"
- return 1
- fi
-
- REAL_NAME=`echo ${RET_STR} | awk '{print $1}'`
- if [ "$1" != "${REAL_NAME}" ]; then
- echo >&2 "`basename $0`: local_check() error: The real hostname"\
- "of ${HOST_IPADDRS[$2]} is \"${REAL_NAME}\","\
- "not \"$1\"! Check the local /etc/hosts table!"
- return 1
- fi
-
- return 0
-}
-
-# Check whether the correct host name and IP address pair matches
-# the one in the remote /etc/hosts tables
-remote_check() {
- # Check argument
- if [ $# -ne 2 ]; then
- echo >&2 $"`basename $0`: remote_check() error: Missing"\
- "argument for function remote_check()!"
- return 1
- fi
-
- declare -i i
- local RET_STR COMMAND IP_ADDR
-
- COMMAND=$"egrep \"[[:space:]]$1([[:space:]]|$)\" /etc/hosts"
-
- # Execute remote command to check remote /etc/hosts tables
- for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
- RET_STR=`${REMOTE} ${HOST_NAMES[i]} ${COMMAND} 2>&1`
- if [ $? -ne 0 -a -n "${RET_STR}" ]; then
- echo >&2 "`basename $0`: remote_check() error:"\
- "remote error: ${RET_STR}"
- return 1
- fi
-
- IP_ADDR=`echo ${RET_STR} | awk '{print $1}'`
- if [ -z "${IP_ADDR}" ]; then
- echo >&2 "`basename $0`: remote_check() error:" \
- "$1 does not exist in the ${HOST_NAMES[i]}'s"\
- "/etc/hosts table!"
- return 1
- fi
-
- if [ "${IP_ADDR}" != "${HOST_IPADDRS[$2]}" ]; then
- echo >&2 "`basename $0`: remote_check() error:" \
- "IP address ${IP_ADDR} of $1 in the" \
- "${HOST_NAMES[i]}'s /etc/hosts is incorrect!"
- return 1
- fi
- done
-
- return 0
-}
-
-# Verify forward and reverse network connectivity of the Lustre cluster
-network_check () {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: network_check() error: Missing"\
- "argument for function network_check()!"
- return 1
- fi
-
- declare -i i
- local RET_STR COMMAND REAL_NAME
-
- # Execute remote command to check network connectivity
- for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
- COMMAND=$"${REMOTE} ${HOST_NAMES[i]} hostname"
- RET_STR=`${REMOTE} $1 ${COMMAND} 2>&1`
- if [ $? -ne 0 -a -n "${RET_STR}" ]; then
- echo >&2 "`basename $0`: network_check() error:" \
- "remote error: ${RET_STR}"
- return 1
- fi
-
- if [ -z "${RET_STR}" ]; then
- echo >&2 "`basename $0`: network_check() error:" \
- "No results from remote! Check the network" \
- "connectivity between \"$1\" and" \
- "\"${HOST_NAMES[i]}\"!"
- return 1
- fi
-
- REAL_NAME=`echo ${RET_STR} | awk '{print $1}'`
- if [ "${HOST_NAMES[i]}" != "${REAL_NAME}" ]; then
- echo >&2 "`basename $0`: network_check() error:" \
- "${RET_STR}"
- return 1
- fi
- done
-
- return 0
-}
-
-# Verify forward and reverse network connectivity of the Lustre cluster,
-# and that hostnames match the names in the /etc/hosts tables.
-network_verify() {
- declare -i i
-
- # Initialize the HOST_IPADDRS array
- unset HOST_IPADDRS
-
- # Get all the host names from the csv file
- if ! get_hostnames; then
- return 1
- fi
-
- # Check whether all the host names match the names in
- # all the /etc/hosts tables of the Lustre cluster
- for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
- verbose_output "Verifying IP address of host" \
- "\"${HOST_NAMES[i]}\" in the local /etc/hosts..."
- if ! local_check ${HOST_NAMES[i]} $i; then
- return 1
- fi
- verbose_output "OK"
- done
-
- for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
- [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue
- verbose_output "Verifying IP address of host" \
- "\"${HOST_NAMES[i]}\" in the remote /etc/hosts..."
- if ! remote_check ${HOST_NAMES[i]} $i; then
- return 1
- fi
- verbose_output "OK"
- done
-
- # Verify network connectivity of the Lustre cluster
- for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
- [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue
- verbose_output "Verifying network connectivity of host" \
- "\"${HOST_NAMES[i]}\" to other hosts..."
- if ! network_check ${HOST_NAMES[i]}; then
- return 1
- fi
- verbose_output "OK"
- done
-
- return 0
-}
-
-# Main flow
-if ! check_file; then
- exit 1
-fi
-
-if ! network_verify; then
- exit 1
-fi
-
-exit 0
+++ /dev/null
-#!/bin/bash
-#
-# verify_serviceIP.sh - script for verifying the service IP and the real
-# interface IP in a remote host are in the same subnet
-#
-###############################################################################
-
-# Usage
-usage() {
- cat >&2 <<EOF
-
-Usage: `basename $0` <service IPaddr> <hostname>
-
- service IPaddr the IP address to failover
- hostname the hostname of the remote node
-
-EOF
- exit 1
-}
-
-# Check arguments
-if [ $# -lt 2 ]; then
- usage
-fi
-
-# Remote command
-REMOTE=${REMOTE:-"ssh -x -q"}
-
-#
-# inSameIPsubnet serviceIPaddr interfaceIPaddr mask
-#
-# Given two IP addresses and a subnet mask determine if these IP
-# addresses are in the same subnet. If they are, return 0, else return 1.
-#
-inSameIPsubnet() {
- declare -i n
- declare -ia mask
- declare -ia ip1 ip2 # IP addresses given
- declare -i quad1 quad2 # calculated quad words
-
- #
- # Remove '.' characters from dotted decimal notation and save
- # in arrays. i.e.
- #
- # 192.168.1.163 -> array[0] = 192
- # array[1] = 168
- # array[2] = 1
- # array[3] = 163
- #
- let n=0
- for quad in $(echo $1 | awk -F. '{print $1 " " $2 " " $3 " " $4}')
- do
- ip1[n]=$quad
- let n=n+1
- done
-
- let n=0
- for quad in $(echo $2 | awk -F. '{print $1 " " $2 " " $3 " " $4}')
- do
- ip2[n]=$quad
- let n=n+1
- done
-
- let n=0
- for quad in $(echo $3 | awk -F. '{print $1 " " $2 " " $3 " " $4}')
- do
- mask[n]=$quad
- let n=n+1
- done
-
- #
- # For each quad word, logically AND the IP address with the subnet
- # mask to get the network/subnet quad word. If the resulting
- # quad words for both IP addresses are the same they are in the
- # same IP subnet.
- #
- for n in 0 1 2 3
- do
- let $((quad1=${ip1[n]} & ${mask[n]}))
- let $((quad2=${ip2[n]} & ${mask[n]}))
-
- if [ $quad1 != $quad2 ]; then
- echo >&2 $"`basename $0`: Service IP address $1 and"\
- "real interface IP address $2 are in"\
- "different subnets!"
- return 1 # in different subnets
- fi
- done
-
- return 0 # in the same subnet, all quad words matched
-}
-
-#
-# findInterface IPaddr hostname
-#
-# Given a target IP address and a hostname, find the interface in which
-# this address is configured. If found return 0, if not return 1. The
-# interface name is returned to stdout.
-#
-findInterface() {
- declare line
- declare intf
- declare addr
- declare state
-
- declare target=$1
- declare hostname=$2
-
- while read intf line
- do
- while read line
- do
- if [ "$line" = "" ]; then # go to next interface
- continue 2
- fi
-
- set - $line
- addr=
- while [ $# -gt 0 ]; do
- case $1 in
- addr:*)
- addr=${1##addr:}
- if [ -n "$addr" -a "$addr" = "$target" ]
- then
- echo $intf
- return 0
- fi
- ;;
- esac
- shift
- done
- done
- done < <(${REMOTE} $hostname /sbin/ifconfig)
-
- echo >&2 "`basename $0`: Cannot find the interface in which" \
- "$target is configured in the host $hostname!"
- return 1
-}
-
-#
-# findNetmask interface hostname
-#
-# Given an interface find the netmask addresses associated with it.
-# Return 0 when found, else return 1. The netmask is returned to stdout.
-#
-findNetmask() {
- declare line
- declare addr
- declare target=$1
- declare hostname=$2
-
- while read line
- do
- set - $line
-
- while [ $# -gt 0 ]; do
- case $1 in
- Mask:*)
- echo ${1##*:} # return netmask addr
- return 0
- ;;
- esac
- shift
- done
- done < <(${REMOTE} $hostname /sbin/ifconfig $target)
-
- echo >&2 "`basename $0`: Cannot find the netmask associated with" \
- "the interface $target in the host $hostname!"
- return 1
-}
-
-#
-# check_srvIPaddr serviceIPaddr hostname
-#
-# Given a service IP address and hostname, check whether the service IP address
-# and the real interface IP address of hostname are in the same subnet.
-# If they are, return 0, else return 1.
-#
-check_srvIPaddr() {
- declare real_IPaddr
- declare real_intf
- declare netmask
- declare srv_IPaddr=$1
- declare hostname=$2
-
- # Get the corresponding IP address of the hostname from /etc/hosts table
- real_IPaddr=`egrep "[[:space:]]$hostname([[:space:]]|$)" /etc/hosts \
- | awk '{print $1}'`
- if [ -z "$real_IPaddr" ]; then
- echo >&2 "`basename $0`: Hostname $hostname does not exist in" \
- "the local /etc/hosts table!"
- return 1
- fi
-
- if [ ${#real_IPaddr} -gt 15 ]; then
- echo >&2 "`basename $0`: More than one IP address line" \
- "corresponding to $hostname in the local" \
- "/etc/hosts table!"
- return 1
- fi
-
- # Get the interface in which the real IP address is configured
- real_intf=$(findInterface $real_IPaddr $hostname)
- if [ $? -ne 0 ]; then
- return 1
- fi
- real_intf=${real_intf%%:*}
-
- # Get the netmask address associated with the real interface
- netmask=$(findNetmask $real_intf $hostname)
- if [ $? -ne 0 ]; then
- return 1
- fi
-
- # Determine if the service IP address and the real IP address
- # are in the same subnet
- inSameIPsubnet $srv_IPaddr $real_IPaddr $netmask
- if [ $? -ne 0 ]; then
- return 1
- fi
-
- return 0
-}
-
-# Check service IP address
-if ! check_srvIPaddr $1 $2; then
- exit 1
-fi
-exit 0