(Hopefully) merge lustre/scripts from b1_5 as stuff in here is needed to

author brian <brian>

Sat, 19 Aug 2006 03:08:14 +0000 (03:08 +0000)

committer brian <brian>

Sat, 19 Aug 2006 03:08:14 +0000 (03:08 +0000)
author brian <brian>
Sat, 19 Aug 2006 03:08:14 +0000 (03:08 +0000)
committer brian <brian>
Sat, 19 Aug 2006 03:08:14 +0000 (03:08 +0000)
diff --git a/lustre/scripts/.cvsignore b/lustre/scripts/.cvsignore

index 9db437f..9dae161 100644 (file)
--- a/lustre/scripts/.cvsignore
+++ b/lustre/scripts/.cvsignore
@@ -8,3 +8,11 @@ Makefile.in
  .deps
  TAGS
  version_tag.pl
+lustre_createcsv.sh
+lustre_config.sh
+lc_net.sh
+lc_modprobe.sh
+lc_hb.sh
+lc_cluman.sh
+lc_md.sh
+lc_lvm.sh
diff --git a/lustre/scripts/Makefile.am b/lustre/scripts/Makefile.am

index 1369ea1..287691a 100644 (file)
--- a/lustre/scripts/Makefile.am
+++ b/lustre/scripts/Makefile.am
@@ -3,11 +3,20 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
-EXTRA_DIST = license-status maketags.sh lustre lustrefs \
-       version_tag.pl.in
+# These are scripts that are generated from .in files
+genscripts = lustre_config.sh lc_modprobe.sh lc_net.sh lc_hb.sh lc_cluman.sh lustre_createcsv.sh lc_md.sh lc_lvm.sh
  
-initddir = $(sysconfdir)/init.d
-if UTILS
-initd_SCRIPTS = lustre lustrefs
-endif
+sbin_SCRIPTS = $(genscripts) lc_servip.sh lustre_up14.sh
  
+EXTRA_DIST = license-status maketags.sh version_tag.pl.in lc_common.sh \
+            $(addsuffix .in,$(genscripts)) lc_mon.sh lc_servip.sh \
+            lustre_up14.sh
+
+scriptlibdir = $(libdir)/@PACKAGE@
+scriptlib_DATA = lc_common.sh
+
+CLEANFILES = $(genscripts)
+
+$(genscripts): %.sh: %.sh.in
+       sed -e 's#@scriptlibdir@#$(scriptlibdir)#' < $< > $@
+       chmod +x $@
diff --git a/lustre/scripts/lc_cluman.sh.in b/lustre/scripts/lc_cluman.sh.in

new file mode 100644 (file)

index 0000000..c122c42
--- /dev/null
+++ b/lustre/scripts/lc_cluman.sh.in
@@ -0,0 +1,524 @@
+#!/bin/bash
+#
+# lc_cluman.sh - script for generating the Red Hat Cluster Manager
+#               HA software's configuration files
+#
+################################################################################
+
+# Usage
+usage() {
+       cat >&2 <<EOF
+
+Usage:  `basename $0` <-n hostnames> [-s service addresses]
+                     [-c heartbeat channel] [-o heartbeat options] [-v]
+                     <-d target device> [-d target device...]
+
+       -n hostnames            the nodenames of the primary node and its fail-
+                               overs
+                               Multiple nodenames are separated by colon (:)
+                               delimeter. The first one is the nodename of the 
+                               primary node, the others are failover nodenames.
+       -s service addresses    the IP addresses to failover
+                               Multiple addresses are separated by colon (:)
+                               delimeter.
+       -c heartbeat channel    the method to send/rcv heartbeats on
+                               The default method is multicast, and multicast_
+                               ipaddress is "225.0.0.11".
+       -o heartbeat options    a "catchall" for other heartbeat configuration 
+                               options
+                               Multiple options are separated by colon (:)
+                               delimeter.
+       -v                      verbose mode
+       -d target device        the target device name and mount point
+                               The device name and mount point are separated by
+                               colon (:) delimeter. 
+
+EOF
+       exit 1
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+#****************************** Global variables ******************************#
+TMP_DIR=${CLUMGR_TMP_DIR}              # Temporary directory
+
+declare -a NODE_NAMES                  # Node names in the failover group
+declare -a SRV_IPADDRS                 # Service IP addresses
+
+# Lustre target device names, service names and mount points
+declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
+declare -i TARGET_NUM=0                        # Number of targets
+
+# Get and check the positional parameters
+VERBOSE_OUTPUT=false
+while getopts "n:s:c:o:vd:" OPTION; do
+       case $OPTION in
+        n)
+               HOSTNAME_OPT=$OPTARG 
+               PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
+               if [ -z "${PRIM_NODENAME}" ]; then
+                       echo >&2 $"`basename $0`: Missing primary nodename!"
+                       usage
+               fi
+               HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
+               if [ ${HOSTNAME_NUM} -lt 2 ]; then
+                       echo >&2 $"`basename $0`: Missing failover nodenames!"
+                       usage
+               fi
+               ;;
+        s)
+               SRVADDR_OPT=$OPTARG 
+               ;;
+        c)
+               HBCHANNEL_OPT=$OPTARG
+               HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \
+                               | sed 's/"$//'` 
+               if [ -n "${HBCHANNEL_OPT}" ] \
+               && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*broadcast*}" ] \
+               && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*multicast*}" ]; then
+                       echo >&2 $"`basename $0`: Invalid Heartbeat channel" \
+                                 "- ${HBCHANNEL_OPT}!"
+                       usage
+               fi
+               ;;
+        o)
+               HBOPT_OPT=$OPTARG 
+               HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'`
+               ;;
+       v) 
+               VERBOSE_OUTPUT=true
+               ;;
+        d)
+               DEVICE_OPT=$OPTARG 
+               TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
+               TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
+               if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
+                       echo >&2 $"`basename $0`: Missing target device name!"
+                       usage
+               fi
+               if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
+                       echo >&2 $"`basename $0`: Missing mount point for target"\
+                                 "${TARGET_DEVNAMES[TARGET_NUM]}!"
+                       usage
+               fi
+               TARGET_NUM=$(( TARGET_NUM + 1 ))
+               ;;
+
+        ?) 
+               usage 
+       esac
+done
+
+# Check the required parameters
+if [ -z "${HOSTNAME_OPT}" ]; then
+       echo >&2 $"`basename $0`: Missing -n option!"
+       usage
+fi
+
+if [ -z "${DEVICE_OPT}" ]; then
+       echo >&2 $"`basename $0`: Missing -d option!"
+       usage
+fi
+
+# get_nodenames
+#
+# Get all the node names in this failover group
+get_nodenames() {
+       declare -i idx
+       local nodename_str nodename
+
+       nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
+                     END {for (i in a) print a[i]}'`
+       idx=0
+       for nodename in ${nodename_str}
+        do
+               NODE_NAMES[idx]=${nodename}
+               idx=$idx+1
+        done
+
+       return 0
+}
+
+# get_check_srvIPaddrs
+#
+# Get and check all the service IP addresses in this failover group
+get_check_srvIPaddrs() {
+       declare -i idx
+       declare -i i
+       local srvIPaddr_str srvIPaddr
+
+       srvIPaddr_str=`echo ${SRVADDR_OPT}|awk '{split($SRVADDR_OPT, a, ":")}\
+                     END {for (i in a) print a[i]}'`
+       idx=0
+       for srvIPaddr in ${srvIPaddr_str}
+        do
+               SRV_IPADDRS[idx]=${srvIPaddr}
+               idx=$idx+1
+        done
+
+       for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
+         for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do
+           # Check service IP address
+           verbose_output "Verifying service IP ${SRV_IPADDRS[idx]} and" \
+                          "real IP of host ${NODE_NAMES[i]} are in the" \
+                          "same subnet..."
+           if ! ${SCRIPT_VERIFY_SRVIP} ${SRV_IPADDRS[idx]} ${NODE_NAMES[i]}
+           then
+             return 1
+           fi
+           verbose_output "OK"
+         done
+       done
+
+       return 0
+}
+
+# cluman_running host_name
+# 
+# Run remote command to check whether clumanager service is running in @host_name
+cluman_running() {
+       local host_name=$1
+       local ret_str
+
+       ret_str=`${REMOTE} ${host_name} "service clumanager status" 2>&1`
+       if [ $? -ne 0 ]; then
+               if [ "${ret_str}" != "${ret_str#*unrecognized*}" ]; then
+                       echo >&2 "`basename $0`: cluman_running() error:"\
+                       "remote command to ${host_name} error: ${ret_str}!"
+                       return 2
+               else
+                       return 1
+               fi
+       fi
+
+       return 0
+}
+
+# stop_cluman host_name
+#
+# Run remote command to stop clumanager service running in @host_name
+stop_cluman() {
+       local host_name=$1
+       local ret_str
+
+       ret_str=`${REMOTE} ${host_name} "/sbin/service clumanager stop" 2>&1`
+       if [ $? -ne 0 ]; then
+               echo >&2 "`basename $0`: stop_cluman() error:"\
+               "remote command to ${host_name} error: ${ret_str}!"
+               return 1
+       fi
+
+       echo "`basename $0`: Clumanager service is stopped on node ${host_name}."
+       return 0
+}
+
+# check_cluman
+#
+# Run remote command to check each node's clumanager service
+check_cluman() {
+       declare -i idx
+       local OK
+
+       # Get and check all the service IP addresses
+       if [ -n "${SRVADDR_OPT}" ] && ! get_check_srvIPaddrs; then
+               return 1
+       fi
+
+       for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+               # Check clumanager service status
+               cluman_running ${NODE_NAMES[idx]}
+               rc=$?
+               if [ "$rc" -eq "2" ]; then
+                       return 1
+               elif [ "$rc" -eq "1" ]; then
+                       verbose_output "Clumanager service is stopped on"\
+                       "node ${NODE_NAMES[idx]}."
+               elif [ "$rc" -eq "0" ]; then
+                       OK=
+                       echo -n "`basename $0`: Clumanager service is running on"\
+                       "${NODE_NAMES[idx]}, go ahead to stop the service and"\
+                       "generate new configurations? [y/n]:"
+                       read OK
+                       if [ "${OK}" = "n" ]; then
+                               echo "`basename $0`: New Clumanager configurations"\
+                               "are not generated."
+                               return 2
+                       fi
+
+                       # Stop clumanager service       
+                       stop_cluman ${NODE_NAMES[idx]}
+               fi
+       done
+
+       return 0
+}
+
+# get_srvname hostname target_devname
+#
+# Get the lustre target server name from the node @hostname
+get_srvname() {
+       local host_name=$1
+       local target_devname=$2
+       local target_srvname=
+       local ret_str
+
+       # Execute remote command to get the target server name
+       ret_str=`${REMOTE} ${host_name} \
+               "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1`
+       if [ $? -ne 0 ]; then
+               echo "`basename $0`: get_srvname() error:" \
+                    "from host ${host_name} - ${ret_str}"
+               return 1
+       fi
+
+       if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
+               ret_str=${ret_str#*Target: }
+               target_srvname=`echo ${ret_str} | awk '{print $1}'`
+       fi
+       
+       if [ -z "${target_srvname}" ]; then
+               echo "`basename $0`: get_srvname() error: Cannot get the"\
+                    "server name of target ${target_devname} in ${host_name}!"
+               return 1
+       fi
+
+       echo ${target_srvname}
+       return 0
+} 
+
+# get_srvnames
+#
+# Get server names of all the Lustre targets in this failover group
+get_srvnames() {
+       declare -i i
+
+       # Initialize the TARGET_SRVNAMES array
+       unset TARGET_SRVNAMES
+
+       # Get Lustre target service names
+       for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+               TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
+                                    ${TARGET_DEVNAMES[i]})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${TARGET_SRVNAMES[i]}"
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+# check_retval retval
+#
+# Check the return value of redhat-config-cluster-cmd
+check_retval() {
+       if [ $1 -ne 0 ]; then
+               echo >&2 "`basename $0`: Failed to run ${CONFIG_CMD}!"
+               return 1
+       fi
+
+       return 0
+}
+
+# add_services
+#
+# Add service tags into the cluster.xml file
+add_services() {
+       declare -i idx
+       declare -i i
+
+       # Add service tag
+       for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do
+               ${CONFIG_CMD} --add_service --name=${TARGET_SRVNAMES[i]}
+               if ! check_retval $?; then
+                       return 1
+               fi
+
+               for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
+                       ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
+                       --add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]}
+                       if ! check_retval $?; then
+                               return 1
+                       fi
+               done
+
+               ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
+                             --add_device \
+                             --name=${TARGET_DEVNAMES[i]}
+               if ! check_retval $?; then
+                       return 1
+               fi
+
+               ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
+                             --device=${TARGET_DEVNAMES[i]} \
+                             --mount \
+                             --mountpoint=${TARGET_MNTPNTS[i]} \
+                             --fstype=lustre
+               if ! check_retval $?; then
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+# gen_cluster_xml
+#
+# Run redhat-config-cluster-cmd to create the cluster.xml file
+gen_cluster_xml() {
+       declare -i idx
+       declare -i i
+       local mcast_IPaddr
+       local node_names
+       local hbopt
+
+       [ -e "${CLUMAN_DIR}/cluster.xml" ] && \
+       /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old
+
+       # Run redhat-config-cluster-cmd to generate cluster.xml
+       # Add clumembd tag
+       if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*broadcast*}" ]; then
+               ${CONFIG_CMD} --clumembd --broadcast=yes
+               ${CONFIG_CMD} --clumembd --multicast=no
+               if ! check_retval $?; then
+                       return 1
+               fi
+       elif [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*multicast*}" ]; then
+               mcast_IPaddr=`echo ${HBCHANNEL_OPT} | awk '{print $2}'`
+               if [ -n "${mcast_IPaddr}" ]; then
+                       ${CONFIG_CMD} --clumembd --multicast=yes\
+                                     --multicast_ipaddress=${mcast_IPaddr}
+                       if ! check_retval $?; then
+                               return 1
+                       fi
+               fi
+       fi
+
+       # Add cluster tag
+       node_names=
+       for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+               node_names=${node_names}"${NODE_NAMES[idx]} "
+       done
+
+       ${CONFIG_CMD} --cluster --name="${node_names}failover group"
+       if ! check_retval $?; then
+               return 1
+       fi
+
+       # Add member tag
+       for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+               ${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]}
+               if ! check_retval $?; then
+                       return 1
+               fi
+       done
+
+       # Add service tag
+       if ! add_services; then
+               return 1
+       fi
+
+       # Add other tags
+       if [ -n "${HBOPT_OPT}" ]; then
+               while read -r hbopt
+               do
+                       ${CONFIG_CMD} ${hbopt}
+                       if ! check_retval $?; then
+                               return 1
+                       fi
+               done < <(echo ${HBOPT_OPT}|awk '{split($HBOPT_OPT, a, ":")}\
+                        END {for (i in a) print a[i]}')
+       fi
+
+       return 0
+}
+
+# create_config
+#
+# Create the cluster.xml file and scp it to the each node's /etc/
+create_config() {
+       declare -i idx
+
+       /bin/mkdir -p ${TMP_DIR}
+       CONFIG_PRIMNODE=${TMP_DIR}$"/cluster.xml."${PRIM_NODENAME}
+       CONFIG_LUSTRE=${TMP_DIR}$"/cluster.xml"${FILE_SUFFIX}
+
+       # Get server names of Lustre targets
+       if ! get_srvnames; then
+               return 1
+       fi
+
+       if [ -s ${CONFIG_PRIMNODE} ]; then
+               if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${CONFIG_PRIMNODE}`" ]
+               then
+                       verbose_output "${CONFIG_PRIMNODE} already exists."
+                       return 0
+               else
+                       [ -e "${CLUMAN_DIR}/cluster.xml" ] && \
+                       /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old
+
+                       /bin/cp -f ${CONFIG_PRIMNODE} ${CLUMAN_DIR}/cluster.xml 
+
+                       # Add services into the cluster.xml file
+                       if ! add_services; then
+                               return 1
+                       fi
+               fi
+       else
+               # Run redhat-config-cluster-cmd to generate cluster.xml
+               verbose_output "Creating cluster.xml file for" \
+                              "${PRIM_NODENAME} failover group hosts..."
+               if ! gen_cluster_xml; then
+                       return 1
+               fi
+               verbose_output "OK"
+       fi
+
+       /bin/mv ${CLUMAN_DIR}/cluster.xml ${CONFIG_LUSTRE}
+       [ -e "${CLUMAN_DIR}/cluster.xml.old" ] && \
+       /bin/mv ${CLUMAN_DIR}/cluster.xml.old ${CLUMAN_DIR}/cluster.xml
+
+       # scp the cluster.xml file to all the nodes
+       verbose_output "Remote copying cluster.xml${FILE_SUFFIX} file to" \
+                      "${PRIM_NODENAME} failover group hosts..."
+       for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+               /bin/cp -f ${CONFIG_LUSTRE} ${TMP_DIR}$"/cluster.xml."${NODE_NAMES[idx]}
+
+               scp ${CONFIG_LUSTRE} ${NODE_NAMES[idx]}:${CLUMAN_DIR}/
+               if [ $? -ne 0 ]; then
+                       echo >&2 "`basename $0`: Failed to scp cluster.xml file"\
+                                "to node ${NODE_NAMES[idx]}!"
+                       return 1
+               fi
+       done
+       verbose_output "OK"
+
+       return 0
+}
+
+# Main flow
+# Get all the node names
+if ! get_nodenames; then
+       exit 1
+fi
+
+# Check clumanager services
+verbose_output "Checking clumanager service in the ${PRIM_NODENAME}"\
+              "failover group hosts..."
+check_cluman
+rc=$?
+if [ "$rc" -eq "2" ]; then
+       verbose_output "OK"
+       exit 0
+elif [ "$rc" -eq "1" ]; then
+       exit 1
+fi
+verbose_output "OK"
+
+# Generate configuration files
+if ! create_config; then
+       exit 1
+fi
+
+exit 0
diff --git a/lustre/scripts/lc_common.sh b/lustre/scripts/lc_common.sh

new file mode 100644 (file)

index 0000000..ef62b9a
--- /dev/null
+++ b/lustre/scripts/lc_common.sh
@@ -0,0 +1,393 @@
+#
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+#
+# lc_common.sh - This file contains functions to be used by most or all
+#                Lustre cluster config scripts.
+#
+################################################################################
+
+# Remote command 
+REMOTE=${REMOTE:-"ssh -x -q"}
+#REMOTE=${REMOTE:-"pdsh -S -R ssh -w"}
+export REMOTE
+
+# Lustre utilities
+CMD_PATH=${CMD_PATH:-"/usr/sbin"}
+MKFS=${MKFS:-"$CMD_PATH/mkfs.lustre"}
+TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"}
+LCTL=${LCTL:-"$CMD_PATH/lctl"}
+
+EXPORT_PATH=${EXPORT_PATH:-"PATH=\$PATH:/sbin:/usr/sbin;"}
+
+# Raid command path
+RAID_CMD_PATH=${RAID_CMD_PATH:-"/sbin"}
+MDADM=${MDADM:-"$RAID_CMD_PATH/mdadm"}
+
+# Some scripts to be called
+SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"$(cd `dirname $0`; echo $PWD)"}
+MODULE_CONFIG=${SCRIPTS_PATH}/lc_modprobe.sh
+VERIFY_CLUSTER_NET=${SCRIPTS_PATH}/lc_net.sh
+GEN_HB_CONFIG=${SCRIPTS_PATH}/lc_hb.sh
+GEN_CLUMGR_CONFIG=${SCRIPTS_PATH}/lc_cluman.sh
+SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}/lc_servip.sh
+SCRIPT_GEN_MONCF=${SCRIPTS_PATH}/lc_mon.sh
+SCRIPT_CONFIG_MD=${SCRIPTS_PATH}/lc_md.sh
+SCRIPT_CONFIG_LVM=${SCRIPTS_PATH}/lc_lvm.sh
+
+# Variables of HA software
+HBVER_HBV1="hbv1"                   # Heartbeat version 1
+HBVER_HBV2="hbv2"                   # Heartbeat version 2
+HATYPE_CLUMGR="cluman"              # Cluster Manager
+
+# Configuration directories and files
+HA_DIR=${HA_DIR:-"/etc/ha.d"}          # Heartbeat configuration directory
+MON_DIR=${MON_DIR:-"/etc/mon"}         # mon configuration directory
+CIB_DIR=${CIB_DIR:-"/var/lib/heartbeat/crm"}   # cib.xml directory
+
+HA_CF=${HA_DIR}/ha.cf               # ha.cf file
+HA_RES=${HA_DIR}/haresources        # haresources file
+HA_CIB=${CIB_DIR}/cib.xml
+
+CLUMAN_DIR="/etc"                              # CluManager configuration directory
+CLUMAN_CONFIG=${CLUMAN_DIR}/cluster.xml
+
+CLUMAN_TOOLS_PATH=${CLUMAN_TOOLS_PATH:-"/usr/sbin"}    # CluManager tools
+CONFIG_CMD=${CONFIG_CMD:-"${CLUMAN_TOOLS_PATH}/redhat-config-cluster-cmd"}
+
+HB_TMP_DIR="/tmp/heartbeat"         # Temporary directory
+CLUMGR_TMP_DIR="/tmp/clumanager"
+TMP_DIRS="${HB_TMP_DIR} ${CLUMGR_TMP_DIR}"
+
+FS_TYPE=${FS_TYPE:-"lustre"}        # Lustre filesystem type
+FILE_SUFFIX=${FILE_SUFFIX:-".lustre"}  # Suffix of the generated config files
+
+# Marker of the MD device line
+MD_MARKER=${MD_MARKER:-"MD"}
+
+# Marker of the LVM device line
+PV_MARKER=${PV_MARKER:-"PV"}
+VG_MARKER=${VG_MARKER:-"VG"}
+LV_MARKER=${LV_MARKER:-"LV"}
+
+declare -a CONFIG_ITEM              # Items in each line of the csv file
+declare -a NODE_NAME                # Hostnames of nodes have been configured
+
+
+# verbose_output string
+# Output verbose information $string
+verbose_output() {
+    if ${VERBOSE_OUTPUT}; then
+        echo "`basename $0`: $*"
+    fi
+    return 0
+}
+
+# Check whether the reomte command is pdsh
+is_pdsh() {
+    if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then
+        return 1
+    fi
+
+    return 0
+}
+
+# check_file csv_file
+# Check the file $csv_file
+check_file() {
+    # Check argument
+    if [ $# -eq 0 ]; then
+        echo >&2 "`basename $0`: check_file() error: Missing csv file!"
+        return 1
+    fi
+
+    CSV_FILE=$1
+    if [ ! -s ${CSV_FILE} ]; then
+        echo >&2 "`basename $0`: check_file() error: ${CSV_FILE}"\
+                 "does not exist or is empty!"
+        return 1
+    fi
+
+    return 0
+}
+
+# parse_line line
+# Parse a line in the csv file
+parse_line() {
+    # Check argument
+    if [ $# -eq 0 ]; then
+        echo >&2 "`basename $0`: parse_line() error: Missing argument!"
+        return 1
+    fi
+
+    declare -i i=0              # Index of the CONFIG_ITEM array
+    declare -i length=0 
+    declare -i idx=0
+    declare -i s_quote_flag=0   # Flag of the single quote character 
+    declare -i d_quote_flag=0   # Flag of the double quotes character
+    local TMP_LETTER LINE
+ 
+    LINE="$*"
+
+    # Initialize the CONFIG_ITEM array
+    unset CONFIG_ITEM
+
+    # Get the length of the line
+    length=${#LINE}
+
+    i=0
+    while [ ${idx} -lt ${length} ]; do
+        # Get a letter from the line
+        TMP_LETTER=${LINE:${idx}:1}
+
+        case "${TMP_LETTER}" in
+        ",")
+            if [ ${s_quote_flag} -eq 1 -o ${d_quote_flag} -eq 1 ]
+            then
+                CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER}
+            else
+                i=$i+1
+            fi
+            idx=${idx}+1
+            continue
+            ;;
+        "'")
+            if [ ${s_quote_flag} -eq 0 ]; then
+                s_quote_flag=1
+            else
+                s_quote_flag=0
+            fi
+            ;;
+        "\"")
+            if [ ${d_quote_flag} -eq 0 ]; then
+                d_quote_flag=1
+            else
+                d_quote_flag=0
+            fi
+            ;;
+        "\r")
+            idx=${idx}+1
+            continue
+            ;;
+        *)
+            ;;
+        esac
+        CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER}
+        idx=${idx}+1
+    done
+
+    # Extract the real value of each field
+    # Remove surrounded double-quotes, etc.
+    for ((idx = 0; idx <= $i; idx++)); do
+        # Strip the leading and trailing space-characters
+        CONFIG_ITEM[idx]=`expr "${CONFIG_ITEM[idx]}" : '[[:space:]]*\(.*\)[[:space:]]*$'`
+
+        [ -z "${CONFIG_ITEM[idx]}" ] && continue
+
+        # Remove the surrounded double-quotes
+        while [ -z "`echo "${CONFIG_ITEM[idx]}"|sed -e 's/^".*"$//'`" ]; do
+            CONFIG_ITEM[idx]=`echo "${CONFIG_ITEM[idx]}" | sed -e 's/^"//' -e 's/"$//'`
+        done
+
+        CONFIG_ITEM[idx]=`echo "${CONFIG_ITEM[idx]}" | sed -e 's/""/"/g'`
+    done
+
+    return 0
+}
+
+# fcanon name
+# If $name is a symbolic link, then display it's value
+fcanon() {
+    local NAME=$1
+
+    if [ -h "$NAME" ]; then
+        readlink -f "$NAME"
+    else
+        echo "$NAME"
+    fi
+}
+
+# configured_host host_name
+#
+# Check whether the devices in $host_name has been configured or not
+configured_host() {
+    local host_name=$1
+    declare -i i
+
+    for ((i = 0; i < ${#NODE_NAME[@]}; i++)); do
+        [ "${host_name}" = "${NODE_NAME[i]}" ] && return 0
+    done
+
+    return 1
+}
+
+# remote_error fn_name host_addr ret_str
+# Verify the return result from remote command
+remote_error() {
+    local fn_name host_addr ret_str
+
+    fn_name=$1
+    shift
+    host_addr=$1
+    shift
+    ret_str=$*
+
+    if [ "${ret_str}" != "${ret_str#*connect:*}" ]; then
+        echo >&2 "`basename $0`: ${fn_name}() error: ${ret_str}"
+        return 0
+    fi
+
+    if [ -z "${ret_str}" ]; then
+        echo >&2 "`basename $0`: ${fn_name}() error:" \
+        "No results from remote!" \
+        "Check network connectivity between the local host and ${host_addr}!"
+        return 0
+    fi
+
+    return 1
+}
+
+# nid2hostname nid
+# Convert $nid to hostname of the lustre cluster node
+nid2hostname() {
+    local nid=$1
+    local host_name=
+    local addr nettype ip_addr
+    local ret_str
+
+    addr=${nid%@*}
+    [ "${nid}" != "${nid#*@*}" ] && nettype=${nid#*@} || nettype=tcp
+    if [ -z "${addr}" ]; then
+        echo "`basename $0`: nid2hostname() error: Invalid nid - \"${nid}\"!"
+        return 1
+    fi
+               
+    case "${nettype}" in
+    lo*)    host_name=`hostname`;;
+    elan*)  # QsNet
+            # FIXME: Parse the /etc/elanhosts configuration file to
+            # convert ElanID to hostname
+            ;;
+    gm*)    # Myrinet
+            # FIXME: Use /usr/sbin/gmlndnid to find the hostname of
+            # the specified GM Global node ID 
+            ;;
+    ptl*)   # Portals
+            # FIXME: Convert portal ID to hostname
+            ;;
+    *)  # tcp, o2ib, cib, openib, iib, vib, ra
+        ip_addr=${addr}
+        # Is it IP address or hostname?
+        if [ -n "`echo ${ip_addr} | sed -e 's/\([0-9]\{1,3\}\.\)\{3,3\}[0-9]\{1,3\}//'`" ]
+        then
+            host_name=${ip_addr}
+            echo ${host_name}
+            return 0
+        fi
+
+        # Execute remote command to get the host name
+        ret_str=`${REMOTE} ${ip_addr} "hostname" 2>&1`
+        if [ $? -ne 0 -a -n "${ret_str}" ]; then
+            echo "`basename $0`: nid2hostname() error:" \
+            "remote command to ${ip_addr} error: ${ret_str}"
+            return 1
+        fi
+        remote_error "nid2hostname" ${ip_addr} "${ret_str}" && return 1
+
+        if is_pdsh; then
+            host_name=`echo ${ret_str} | awk '{print $2}'`
+        else
+            host_name=`echo ${ret_str} | awk '{print $1}'`
+        fi
+        ;;
+    esac
+
+    echo ${host_name}
+    return 0
+}
+
+# nids2hostname nids
+# Get the hostname of the lustre cluster node which has the nids - $nids
+nids2hostname() {
+    local nids=$1
+    local host_name=
+    local nid
+    local nettype
+
+    for nid in ${nids//,/ }; do
+        [ "${nid}" != "${nid#*@*}" ] && nettype=${nid#*@} || nettype=tcp
+
+        case "${nettype}" in
+        lo* | elan* | gm* | ptl*) ;;
+        *)  # tcp, o2ib, cib, openib, iib, vib, ra
+            host_name=$(nid2hostname ${nid})
+            if [ $? -ne 0 ]; then
+                echo "${host_name}"
+                return 1
+            fi
+            ;;
+        esac
+    done
+
+    if [ -z "${host_name}" ]; then
+        echo "`basename $0`: nids2hostname() error:" \
+        "Can not get the hostname from nids - \"${nids}\"!"
+        return 1
+    fi
+
+    echo ${host_name}
+    return 0
+}
+
+# ip2hostname_single_node nids
+# Convert IP addresses in $nids into hostnames
+# NID in $nids are delimited by commas, ie all the $nids belong to one node
+ip2hostname_single_node() {
+    local orig_nids=$1
+    local nids=
+    local nid host_name
+    local nettype
+
+    for nid in ${orig_nids//,/ }; do
+        [ "${nid}" != "${nid#*@*}" ] && nettype=${nid#*@} || nettype=tcp
+
+        case "${nettype}" in
+        lo* | elan* | gm* | ptl*) ;;
+        *)  # tcp, o2ib, cib, openib, iib, vib, ra
+            host_name=$(nid2hostname ${nid})
+            if [ $? -ne 0 ]; then
+                echo "${host_name}"
+                return 1
+            fi
+                       
+            nid=${host_name}@${nettype}
+            ;;
+        esac
+
+        [ -z "${nids}" ] && nids=${nid} || nids=${nids},${nid}
+    done
+
+    echo ${nids}
+    return 0
+}
+
+# ip2hostname_multi_node nids
+# Convert IP addresses in $nids into hostnames
+# NIDs belong to multiple nodes are delimited by colons in $nids
+ip2hostname_multi_node() {
+    local orig_nids=$1
+    local nids=
+    local nid
+
+    for nid in ${orig_nids//:/ }; do
+        nid=$(ip2hostname_single_node ${nid})
+        if [ $? -ne 0 ]; then
+            echo "${nid}"
+            return 1
+        fi
+
+        [ -z "${nids}" ] && nids=${nid} || nids=${nids}:${nid}
+    done
+
+    echo ${nids}
+    return 0
+}
diff --git a/lustre/scripts/lc_hb.sh.in b/lustre/scripts/lc_hb.sh.in

new file mode 100644 (file)

index 0000000..f5afb4e
--- /dev/null
+++ b/lustre/scripts/lc_hb.sh.in
@@ -0,0 +1,644 @@
+#!/bin/bash
+#
+# lc_hb.sh - script for generating the Heartbeat HA software's
+#           configuration files
+#
+###############################################################################
+
+# Usage
+usage() {
+       cat >&2 <<EOF
+
+Usage: `basename $0`   <-r HBver> <-n hostnames> [-v]
+                       <-d target device> [-d target device...]
+
+       -r HBver                the version of Heartbeat software
+                               The Heartbeat software versions which are curr-
+                               ently supported are: hbv1 (Heartbeat version 1) 
+                               and hbv2 (Heartbeat version 2).
+       -n hostnames            the nodenames of the primary node and its fail-
+                               overs
+                               Multiple nodenames are separated by colon (:)
+                               delimeter. The first one is the nodename of the 
+                               primary node, the others are failover nodenames.
+       -v                      verbose mode
+       -d target device        the target device name and mount point
+                               The device name and mount point are separated by
+                               colon (:) delimeter. 
+
+EOF
+       exit 1
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+#****************************** Global variables ******************************#
+# Heartbeat tools
+HB_TOOLS_PATH=${HB_TOOLS_PATH:-"/usr/lib64/heartbeat"} # Heartbeat tools path
+CIB_GEN_SCRIPT=${HB_TOOLS_PATH}/haresources2cib.py
+CL_STATUS=${CL_STATUS:-"/usr/bin/cl_status"}
+
+# Service directories and names
+HARES_DIR=${HARES_DIR:-"${HA_DIR}/resource.d"}         # Heartbeat resources
+LUSTRE_SRV=${LUSTRE_SRV:-"Filesystem"} # Service script provided by Heartbeat
+
+TMP_DIR=${HB_TMP_DIR}                  # Temporary directory
+HACF_TEMP=${TMP_DIR}/ha.cf.temp
+AUTHKEYS_TEMP=${TMP_DIR}/authkeys${FILE_SUFFIX}
+
+declare -a NODE_NAMES                  # Node names in the failover group
+
+# Lustre target device names, service names and mount points
+declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
+declare -i TARGET_NUM=0                        # Number of targets
+
+
+# Get and check the positional parameters
+VERBOSE_OUTPUT=false
+while getopts "r:n:vd:" OPTION; do
+       case $OPTION in
+       r) 
+               HBVER_OPT=$OPTARG
+               if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \
+               && [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then
+                       echo >&2 $"`basename $0`: Invalid Heartbeat software" \
+                                 "version - ${HBVER_OPT}!"
+                       usage
+               fi
+               ;;
+        n)
+               HOSTNAME_OPT=$OPTARG 
+               PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
+               if [ -z "${PRIM_NODENAME}" ]; then
+                       echo >&2 $"`basename $0`: Missing primary nodename!"
+                       usage
+               fi
+               HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
+               if [ ${HOSTNAME_NUM} -lt 2 ]; then
+                       echo >&2 $"`basename $0`: Missing failover nodenames!"
+                       usage
+               fi
+               if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ]
+               then
+                       echo >&2 $"`basename $0`: Heartbeat version 1 can" \
+                                 "only support 2 nodes!"
+                       usage
+               fi
+               ;;
+       v) 
+               VERBOSE_OUTPUT=true
+               ;;
+        d)
+               DEVICE_OPT=$OPTARG 
+               TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
+               TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
+               if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
+                       echo >&2 $"`basename $0`: Missing target device name!"
+                       usage
+               fi
+               if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
+                       echo >&2 $"`basename $0`: Missing mount point for target"\
+                                 "${TARGET_DEVNAMES[TARGET_NUM]}!"
+                       usage
+               fi
+               TARGET_NUM=$(( TARGET_NUM + 1 ))
+               ;;
+        ?) 
+               usage 
+       esac
+done
+
+# Check the required parameters
+if [ -z "${HBVER_OPT}" ]; then
+       echo >&2 $"`basename $0`: Missing -r option!"
+       usage
+fi
+
+if [ -z "${HOSTNAME_OPT}" ]; then
+       echo >&2 $"`basename $0`: Missing -n option!"
+       usage
+fi
+
+if [ -z "${DEVICE_OPT}" ]; then
+       echo >&2 $"`basename $0`: Missing -d option!"
+       usage
+fi
+
+# get_nodenames
+#
+# Get all the node names in this failover group
+get_nodenames() {
+       declare -i idx
+       local nodename_str nodename
+
+       nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
+                     END {for (i in a) print a[i]}'`
+       idx=0
+       for nodename in ${nodename_str}
+        do
+               NODE_NAMES[idx]=${nodename}
+               idx=$idx+1
+        done
+
+       return 0
+}
+
+# check_remote_file host_name file
+#
+# Run remote command to check whether @file exists in @host_name
+check_remote_file() {
+       local host_name=$1
+       local file_name=$2
+
+       if [ -z "${host_name}" ]; then
+               echo >&2 "`basename $0`: check_remote_file() error:"\
+                        "Missing hostname!"
+               return 1
+       fi
+
+       if [ -z "${file_name}" ]; then
+               echo >&2 "`basename $0`: check_remote_file() error:"\
+                        "Missing file name!"
+               return 1
+       fi
+
+       # Execute remote command to check the file 
+       ${REMOTE} ${host_name} "[ -e ${file_name} ]"
+       if [ $? -ne 0 ]; then
+               echo >&2 "`basename $0`: check_remote_file() error:"\
+               "${file_name} does not exist in host ${host_name}!"
+               return 1
+       fi
+
+       return 0
+}
+
+# hb_running host_name
+# 
+# Run remote command to check whether heartbeat service is running in @host_name
+hb_running() {
+       local host_name=$1
+       local ret_str
+
+       ret_str=`${REMOTE} ${host_name} "${CL_STATUS} hbstatus" 2>&1`
+       if [ $? -ne 0 ]; then
+               if [ "${ret_str}" = "${ret_str#*stop*}" ]; then
+                       echo >&2 "`basename $0`: hb_running() error:"\
+                       "remote command to ${host_name} error: ${ret_str}!"
+                       return 2
+               else
+                       return 1
+               fi
+       fi
+
+       return 0
+}
+
+# stop_heartbeat host_name
+#
+# Run remote command to stop heartbeat service running in @host_name
+stop_heartbeat() {
+       local host_name=$1
+       local ret_str
+
+       ret_str=`${REMOTE} ${host_name} "/sbin/service heartbeat stop" 2>&1`
+       if [ $? -ne 0 ]; then
+               echo >&2 "`basename $0`: stop_heartbeat() error:"\
+               "remote command to ${host_name} error: ${ret_str}!"
+               return 1
+       fi
+
+       echo "`basename $0`: Heartbeat service is stopped on node ${host_name}."
+       return 0
+}
+
+# check_heartbeat
+#
+# Run remote command to check each node's heartbeat service
+check_heartbeat() {
+       declare -i idx
+       local OK
+
+       for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+               # Check Heartbeat configuration directory
+               if ! check_remote_file ${NODE_NAMES[idx]} ${HA_DIR}; then
+                       echo >&2 "`basename $0`: check_heartbeat() error:"\
+                       "Is Heartbeat package installed?"
+                       return 1
+               fi
+
+               if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
+                       # Check mon configuration directory
+                       if ! check_remote_file ${NODE_NAMES[idx]} ${MON_DIR}; then
+                               echo >&2 "`basename $0`: check_heartbeat()"\
+                               "error: Is mon package installed?"
+                               return 1
+                       fi
+               fi
+
+               if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
+                       # Check crm directory
+                       if ! check_remote_file ${NODE_NAMES[idx]} ${CIB_DIR}; then
+                               echo >&2 "`basename $0`: check_heartbeat()"\
+                               "error: Is Heartbeat v2 package installed?"
+                               return 1
+                       fi
+               fi
+               
+               # Check heartbeat service status
+               hb_running ${NODE_NAMES[idx]}
+               rc=$?
+               if [ "$rc" -eq "2" ]; then
+                       return 1
+               elif [ "$rc" -eq "1" ]; then
+                       verbose_output "Heartbeat service is stopped on"\
+                       "node ${NODE_NAMES[idx]}."
+               elif [ "$rc" -eq "0" ]; then
+                       OK=
+                       echo -n "`basename $0`: Heartbeat service is running on"\
+                       "${NODE_NAMES[idx]}, go ahead to stop the service and"\
+                       "generate new configurations? [y/n]:"
+                       read OK
+                       if [ "${OK}" = "n" ]; then
+                               echo "`basename $0`: New Heartbeat configurations"\
+                               "are not generated."
+                               return 2
+                       fi
+
+                       # Stop heartbeat service        
+                       stop_heartbeat ${NODE_NAMES[idx]}
+               fi
+       done
+
+       return 0
+}
+
+# get_srvname hostname target_devname
+#
+# Get the lustre target server name from the node @hostname
+get_srvname() {
+       local host_name=$1
+       local target_devname=$2
+       local target_srvname=
+       local ret_str
+
+       # Execute remote command to get the target server name
+       ret_str=`${REMOTE} ${host_name} \
+               "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1`
+       if [ $? -ne 0 ]; then
+               echo "`basename $0`: get_srvname() error:" \
+                    "from host ${host_name} - ${ret_str}"
+               return 1
+       fi
+
+       if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
+               ret_str=${ret_str#*Target: }
+               target_srvname=`echo ${ret_str} | awk '{print $1}'`
+       fi
+       
+       if [ -z "${target_srvname}" ]; then
+               echo "`basename $0`: get_srvname() error: Cannot get the"\
+                    "server name of target ${target_devname} in ${host_name}!"
+               return 1
+       fi
+
+       echo ${target_srvname}
+       return 0
+} 
+
+# get_srvnames
+#
+# Get server names of all the Lustre targets in this failover group
+get_srvnames() {
+       declare -i i
+
+       # Initialize the TARGET_SRVNAMES array
+       unset TARGET_SRVNAMES
+
+       # Get Lustre target service names
+       for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+               TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
+                                    ${TARGET_DEVNAMES[i]})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${TARGET_SRVNAMES[i]}"
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+# create_template
+#
+# Create the templates for ha.cf and authkeys files
+create_template() {
+       /bin/mkdir -p ${TMP_DIR}
+
+       # Create the template for ha.cf
+       if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
+               cat >${HACF_TEMP} <<EOF
+debugfile /var/log/ha-debug
+logfile /var/log/ha-log
+logfacility     local0
+keepalive 2
+deadtime 30
+initdead 120
+
+auto_failback off
+
+EOF
+       elif [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
+               cat >${HACF_TEMP} <<EOF
+use_logd        yes
+keepalive 1
+deadtime 10
+initdead 60
+
+crm yes
+
+EOF
+       fi
+
+       # Create the template for authkeys
+       if [ ! -s ${AUTHKEYS_TEMP} ]; then
+               cat >${AUTHKEYS_TEMP} <<EOF
+auth 1
+1 sha1 HelloLustre!
+EOF
+       fi
+
+       return 0
+}
+
+# create_hacf
+#
+# Create the ha.cf file and scp it to each node's /etc/ha.d/
+create_hacf() {
+       HACF_PRIMNODE=${TMP_DIR}$"/ha.cf."${PRIM_NODENAME}
+       HACF_LUSTRE=${TMP_DIR}$"/ha.cf"${FILE_SUFFIX}
+
+       declare -i idx
+
+       if [ -e ${HACF_PRIMNODE} ]; then
+               # The ha.cf file for the primary node has already existed.
+               verbose_output "${HACF_PRIMNODE} already exists."
+               return 0
+       fi
+
+       /bin/cp -f ${HACF_TEMP} ${HACF_LUSTRE}
+
+        for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+               echo "node    ${NODE_NAMES[idx]}" >> ${HACF_LUSTRE}
+        done
+
+       # scp ha.cf file to all the nodes
+       for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+               touch ${TMP_DIR}$"/ha.cf."${NODE_NAMES[idx]}
+               scp ${HACF_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
+               if [ $? -ne 0 ]; then
+                       echo >&2 "`basename $0`: Failed to scp ha.cf file"\
+                                "to node ${NODE_NAMES[idx]}!"
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+# create_haresources
+#
+# Create the haresources file and scp it to the each node's /etc/ha.d/
+create_haresources() {
+       HARES_PRIMNODE=${TMP_DIR}$"/haresources."${PRIM_NODENAME}
+       HARES_LUSTRE=${TMP_DIR}$"/haresources"${FILE_SUFFIX}
+       declare -i idx
+       local res_line
+
+       if [ -s ${HARES_PRIMNODE} ]; then
+               # The haresources file for the primary node has already existed
+               if [ -n "`/bin/grep ${TARGET_DEVNAMES[0]} ${HARES_PRIMNODE}`" ]; then
+                       verbose_output "${HARES_PRIMNODE} already exists."
+                       return 0
+               fi
+       fi
+               
+       # Add the resource group line into the haresources file
+       res_line=${PRIM_NODENAME}
+       for ((idx = 0; idx < ${#TARGET_DEVNAMES[@]}; idx++)); do
+               res_line=${res_line}" "${LUSTRE_SRV}::${TARGET_DEVNAMES[idx]}::${TARGET_MNTPNTS[idx]}::${FS_TYPE}
+                       
+               if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
+                       res_line=${res_line}" "${TARGET_SRVNAMES[idx]}"-mon"
+               fi
+       done
+       echo "${res_line}" >> ${HARES_LUSTRE}
+
+       # Generate the cib.xml file
+       if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
+               # Add group haclient and user hacluster
+               [ -z "`grep haclient /etc/group`" ] && groupadd haclient
+               [ -z "`grep hacluster /etc/passwd`" ] && useradd -g haclient hacluster
+
+               CIB_LUSTRE=${TMP_DIR}$"/cib.xml"${FILE_SUFFIX}
+               python ${CIB_GEN_SCRIPT} --stdout \
+               ${HARES_LUSTRE} > ${CIB_LUSTRE}
+               if [ $? -ne 0 ]; then
+                       echo >&2 "`basename $0`: Failed to generate cib.xml file"\
+                                "for node ${PRIM_NODENAME}!"
+                       return 1
+               fi
+       fi
+
+       # scp the haresources file or cib.xml file
+       for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+               /bin/cp -f ${HARES_LUSTRE} ${TMP_DIR}$"/haresources."${NODE_NAMES[idx]}
+               scp ${HARES_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
+               if [ $? -ne 0 ]; then
+                       echo >&2 "`basename $0`: Failed to scp haresources file"\
+                                "to node ${NODE_NAMES[idx]}!"
+                       return 1
+               fi
+
+               if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
+                       scp ${CIB_LUSTRE} ${NODE_NAMES[idx]}:${CIB_DIR}/
+                       if [ $? -ne 0 ]; then
+                               echo >&2 "`basename $0`: Failed to scp cib.xml"\
+                                        "file to node ${NODE_NAMES[idx]}!"
+                               return 1
+                       fi
+               fi
+       done
+
+       return 0
+}
+
+# create_authkeys
+#
+# Create the authkeys file and scp it to the each node's /etc/ha.d/
+create_authkeys() {
+       AUTHKEYS_PRIMNODE=${TMP_DIR}$"/authkeys."${PRIM_NODENAME}
+       declare -i idx
+
+       if [ -e ${AUTHKEYS_PRIMNODE} ]; then
+               verbose_output "${AUTHKEYS_PRIMNODE} already exists."
+               return 0
+       fi
+
+       # scp the authkeys file to all the nodes
+       chmod 600 ${AUTHKEYS_TEMP}
+       for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+               touch ${TMP_DIR}$"/authkeys."${NODE_NAMES[idx]}
+               scp -p ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}/
+               if [ $? -ne 0 ]; then
+                       echo >&2 "`basename $0`: Failed to scp authkeys file"\
+                                "to node ${NODE_NAMES[idx]}!"
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+# create_moncf
+#
+# Create the mon.cf file and scp it to the each node's /etc/mon/
+create_moncf() {
+       MONCF_PRIMNODE=${TMP_DIR}$"/mon.cf."${PRIM_NODENAME}
+       MONCF_LUSTRE=${TMP_DIR}$"/mon.cf"${FILE_SUFFIX}
+       local srv_name params=
+       declare -i idx
+       declare -a OLD_TARGET_SRVNAMES          # targets in other nodes 
+                                               # in this failover group
+       # Initialize the OLD_TARGET_SRVNAMES array
+       unset OLD_TARGET_SRVNAMES
+
+       if [ -s ${MONCF_PRIMNODE} ]; then
+               if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${MONCF_PRIMNODE}`" ]
+               then
+                       verbose_output "${MONCF_PRIMNODE} already exists."
+                       return 0
+               else
+                       # Get the Lustre target service names 
+                       # from the previous mon.cf file
+                       idx=0
+                       for srv_name in `grep hostgroup ${MONCF_PRIMNODE}\
+                                       |awk '$2 ~ /-mon/ {print $2}'|xargs`
+                       do
+                               OLD_TARGET_SRVNAMES[idx]=`echo ${srv_name}\
+                                                         |sed 's/-mon//g'`
+                               idx=$(( idx + 1 ))
+                       done
+               fi
+       fi
+
+       # Construct the parameters to mon.cf generation script
+       for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+               params=${params}" -n "${NODE_NAMES[idx]}
+       done
+
+       for ((idx = 0; idx < ${#OLD_TARGET_SRVNAMES[@]}; idx++)); do
+               params=${params}" -o "${OLD_TARGET_SRVNAMES[idx]}
+       done
+
+       for ((idx = 0; idx < ${#TARGET_SRVNAMES[@]}; idx++)); do
+               params=${params}" -o "${TARGET_SRVNAMES[idx]}
+       done
+
+       ${SCRIPT_GEN_MONCF} ${params}
+       if [ $? -ne 0 ]; then
+               echo >&2 "`basename $0`: Failed to generate mon.cf file"\
+                        "by using ${SCRIPT_GEN_MONCF}!"
+               return 1
+       fi
+
+       /bin/mv *-mon.cfg ${MONCF_LUSTRE}
+
+       # scp the mon.cf file to all the nodes
+       for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+               /bin/cp -f ${MONCF_LUSTRE} ${TMP_DIR}$"/mon.cf."${NODE_NAMES[idx]}
+
+               scp ${MONCF_LUSTRE} ${NODE_NAMES[idx]}:${MON_DIR}/
+               if [ $? -ne 0 ]; then
+                       echo >&2 "`basename $0`: Failed to scp mon.cf file"\
+                                "to node ${NODE_NAMES[idx]}!"
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+# generate_config
+#
+# Generate the configuration files for Heartbeat and scp them to all the nodes
+generate_config() {
+       if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
+               # Get server names of Lustre targets
+               if ! get_srvnames; then
+                       return 1
+               fi
+       fi
+       
+       if ! create_template; then
+               return 1
+       fi
+
+       verbose_output "Creating and remote copying ha.cf${FILE_SUFFIX} file to"\
+                      "${PRIM_NODENAME} failover group hosts..." 
+       if ! create_hacf; then
+               return 1
+       fi
+       verbose_output "OK"
+
+       verbose_output "Creating and remote copying haresources${FILE_SUFFIX} file"\
+                      "to ${PRIM_NODENAME} failover group hosts..."
+       if ! create_haresources; then
+               return 1
+       fi
+       verbose_output "OK"
+
+       verbose_output "Creating and remote copying authkeys${FILE_SUFFIX} file to" \
+                      "${PRIM_NODENAME} failover group hosts..."
+       if ! create_authkeys; then
+               return 1
+       fi
+       verbose_output "OK"
+
+       if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
+               verbose_output "Creating and remote copying mon.cf${FILE_SUFFIX} file to" \
+                               "${PRIM_NODENAME} failover group hosts..."
+               if ! create_moncf; then
+                       return 1
+               fi
+               verbose_output "OK"
+       fi
+
+       return 0
+}
+
+# Main flow
+# Get all the node names
+if ! get_nodenames; then
+       exit 1
+fi
+
+# Check heartbeat services
+verbose_output "Checking heartbeat service in the ${PRIM_NODENAME}"\
+              "failover group hosts..."
+check_heartbeat
+rc=$?
+if [ "$rc" -eq "2" ]; then
+       verbose_output "OK"
+       exit 0
+elif [ "$rc" -eq "1" ]; then
+       exit 1
+fi
+verbose_output "OK"
+
+# Generate configuration files
+if ! generate_config; then
+       exit 1
+fi
+
+exit 0
diff --git a/lustre/scripts/lc_lvm.sh.in b/lustre/scripts/lc_lvm.sh.in

new file mode 100644 (file)

index 0000000..64018d2
--- /dev/null
+++ b/lustre/scripts/lc_lvm.sh.in
@@ -0,0 +1,561 @@
+#!/bin/bash
+#
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+#
+# lc_lvm.sh - configure Linux LVM devices from a csv file
+#
+################################################################################
+
+# Usage
+usage() {
+    cat >&2 <<EOF
+
+Usage:  `basename $0` [-h] [-v] <csv file>
+
+    This script is used to configure Linux LVM devices in a Lustre cluster
+    from a csv file.
+
+    -h          help and examples
+    -v          verbose mode
+    csv file    a spreadsheet that contains configuration parameters
+                (separated by commas) for each Linux LVM component
+                (PV, VG, LV) to be configured in a Lustre cluster
+
+EOF
+    exit 1
+}
+
+# Samples 
+sample() {
+    cat <<EOF
+
+This script is used to configure Linux LVM devices in a Lustre cluster
+from a csv file.
+
+LVM is a Logical Volume Manager for the Linux operating system. The
+three-level components of it are PV (Physical Volume), VG (Volume Group)
+and LV (Logical Volume).
+
+Each line marked with "PV" in the csv file represents one or more PVs.
+The format is:
+hostname,PV,pv names,operation mode,options
+
+hostname            hostname of the node in the cluster
+PV                  marker of PV line
+pv names            devices or loopback files to be initialized for later
+                    use by LVM or to be wiped the label, e.g. /dev/sda
+                    Multiple devices or files are separated by space or by
+                    using shell expansions, e.g. "/dev/sd{a,b,c}"
+operation mode      create or remove, default is create
+options             a "catchall" for other pvcreate/pvremove options
+                    e.g. "-vv"
+
+Each line marked with "VG" in the csv file represents one VG.
+The format is:
+hostname,VG,vg name,operation mode,options,pv paths
+
+hostname            hostname of the node in the cluster
+VG                  marker of VG line
+vg name             name of the volume group, e.g. ost_vg
+operation mode      create or remove, default is create
+options             a "catchall" for other vgcreate/vgremove options
+                    e.g. "-s 32M"
+pv paths            physical volumes to construct this VG, required by
+                    create mode
+                    Multiple PVs are separated by space or by using
+                    shell expansions, e.g. "/dev/sd[k-m]1"
+
+Each line marked with "LV" in the csv file represents one LV.
+The format is:
+hostname,LV,lv name,operation mode,options,lv size,vg name
+
+hostname            hostname of the node in the cluster
+LV                  marker of LV line
+lv name             name of the logical volume to be created (optional)
+                    or path of the logical volume to be removed (required
+                    by remove mode)
+operation mode      create or remove, default is create
+options             a "catchall" for other lvcreate/lvremove options
+                    e.g. "-i 2 -I 128"
+lv size             size [kKmMgGtT] to be allocated for the new LV
+                    Default unit is megabytes.
+vg name             name of the VG in which the new LV will be created
+
+Items left blank will be set to defaults.
+
+Example:
+-------------------------------------------------------
+# MD/LVM devices on mgsnode
+# Remove the LVM devices in the mgsnode
+mgsnode,LV,/dev/mgs_vg/mdt1,remove
+mgsnode,LV,/dev/mgs_vg/mdt2,remove
+mgsnode,VG,mgs_vg,remove
+mgsnode,PV,"/dev/sd{a,b}1",remove
+
+# Create MD device in the mgsnode
+mgsnode,MD,/dev/md0,,-q,1,/dev/sda1 /dev/sdb1
+
+
+# MD/LVM devices on ostnode
+# Create MD and LVM devices in the ostnode
+ostnode,MD,/dev/md0,,-q -c 128,5,"/dev/sd{a,b,c}"
+ostnode,MD,/dev/md1,,-q -c 128,5,"/dev/sd{d,e,f}"
+
+ostnode,PV,/dev/md0 /dev/md1
+ostnode,VG,ost_vg,,-s 32M,"/dev/md{0,1}"
+ostnode,LV,ost0,,-i 2 -I 128,300G,ost_vg
+ostnode,LV,ost1,,-i 2 -I 128,300G,ost_vg
+-------------------------------------------------------
+
+EOF
+    exit 0
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+#***************************** Global variables *****************************#
+# All the LVM device items in the csv file
+declare -a HOST_NAME LINE_MARKER LVM_NAME OP_MODE OP_OPTS SIXTH_ITEM SEVENTH_ITEM
+
+# Variables related to background executions
+declare -a REMOTE_CMD
+declare -a REMOTE_PID
+declare -i pid_num=0
+
+
+VERBOSE_OUTPUT=false
+# Get and check the positional parameters
+while getopts "hv" OPTION; do
+    case $OPTION in
+    h)
+        sample
+        ;;
+    v)
+        VERBOSE_OUTPUT=true
+        ;;
+    ?)
+        usage 
+    esac
+done
+
+# Toss out the parameters we've already processed
+shift  `expr $OPTIND - 1`
+
+# Here we expect the csv file
+if [ $# -eq 0 ]; then
+    echo >&2 "`basename $0`: Missing csv file!"
+    usage
+fi
+
+# check_lvm_item index
+#
+# Check the items required for managing LVM device ${LVM_NAME[index]}
+check_lvm_item() {
+    # Check argument
+    if [ $# -eq 0 ]; then
+        echo >&2 "`basename $0`: check_lvm_item() error:"\
+                 "Missing argument!"
+        return 1
+    fi
+
+    declare -i i=$1
+
+    # Check hostname
+    if [ -z "${HOST_NAME[i]}" ]; then
+        echo >&2 "`basename $0`: check_lvm_item() error:"\
+                 "hostname item has null value!"
+        return 1
+    fi
+
+    # Check LVM device name 
+    if [ -z "${LVM_NAME[i]}" ] \
+    && [ "${LINE_MARKER[i]}" != "${LV_MARKER}" -a "${OP_MODE[i]}" != "remove" ]
+    then
+        echo >&2 "`basename $0`: check_lvm_item() error:"\
+                 "LVM component name item has null value!"
+        return 1
+    fi
+
+    # Check the operation mode
+    if [ -n "${OP_MODE[i]}" ] \
+    && [ "${OP_MODE[i]}" != "create" -a "${OP_MODE[i]}" != "remove" ]
+    then
+        echo >&2 "`basename $0`: check_lvm_item() error:"\
+                 "Invalid operation mode item - \"${OP_MODE[i]}\"!"
+        return 1
+    fi
+
+    # Check items required by create mode
+    if [ -z "${OP_MODE[i]}" -o "${OP_MODE[i]}" = "create" ]; then
+        if [ "${LINE_MARKER[i]}" = "${VG_MARKER}" -a -z "${SIXTH_ITEM[i]}" ]
+        then
+            echo >&2 "`basename $0`: check_lvm_item() error:"\
+            "pv paths item of vg ${LVM_NAME[i]} has null value!"
+            return 1
+        fi
+
+        if [ "${LINE_MARKER[i]}" = "${LV_MARKER}" ]; then
+            if [ -z "${SIXTH_ITEM[i]}" ]; then
+                echo >&2 "`basename $0`: check_lvm_item() error:"\
+                         "lv size item has null value!"
+                return 1
+            fi
+
+            if [ -z "${SEVENTH_ITEM[i]}" ]; then
+                echo >&2 "`basename $0`: check_lvm_item() error:"\
+                         "vg name item has null value!"
+                return 1
+            fi
+        fi
+    fi
+
+    return 0
+}
+
+# get_lvm_items csv_file
+#
+# Get all the LVM device items in the $csv_file and do some checks.
+get_lvm_items() {
+    # Check argument
+    if [ $# -eq 0 ]; then
+        echo >&2 "`basename $0`: get_lvm_items() error: Missing csv file!"
+        return 1
+    fi
+
+    CSV_FILE=$1
+    local LINE line_marker
+    declare -i line_num=0
+    declare -i idx=0
+
+    while read -r LINE; do
+        let "line_num += 1"
+
+        # Skip the comment line
+        [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] && continue
+
+        # Skip the non-LVM line
+        line_marker=`echo ${LINE} | awk -F, '{print $2}'`
+        [ "${line_marker}" != "${PV_MARKER}" ] \
+        && [ "${line_marker}" != "${VG_MARKER}" ] \
+        && [ "${line_marker}" != "${LV_MARKER}" ] && continue
+
+        # Parse the config line into CONFIG_ITEM
+        if ! parse_line "$LINE"; then
+            return 1    
+        fi
+
+        HOST_NAME[idx]=${CONFIG_ITEM[0]}
+        LINE_MARKER[idx]=${CONFIG_ITEM[1]}
+        LVM_NAME[idx]=${CONFIG_ITEM[2]}
+        OP_MODE[idx]=${CONFIG_ITEM[3]}
+        OP_OPTS[idx]=${CONFIG_ITEM[4]}
+        SIXTH_ITEM[idx]=${CONFIG_ITEM[5]}
+        SEVENTH_ITEM[idx]=${CONFIG_ITEM[6]}
+
+        # Check some required items
+        if ! check_lvm_item $idx; then
+            echo >&2 "`basename $0`: check_lvm_item() error:"\
+                     "Occurred on line ${line_num} in ${CSV_FILE}."
+            return 1    
+        fi
+
+        let "idx += 1"
+    done < ${CSV_FILE}
+
+    return 0
+}
+
+# construct_lvm_create_cmdline index
+#
+# Construct the creation command line for ${LVM_NAME[index]}
+construct_lvm_create_cmdline() {
+    declare -i i=$1
+    local lvm_cmd
+
+    case "${LINE_MARKER[i]}" in
+    "${PV_MARKER}")
+        lvm_cmd="pvcreate -ff -y ${OP_OPTS[i]} ${LVM_NAME[i]}"
+        ;;
+    "${VG_MARKER}")
+        lvm_cmd="vgcreate ${OP_OPTS[i]} ${LVM_NAME[i]} ${SIXTH_ITEM[i]}"
+        ;;
+    "${LV_MARKER}")
+        if [ -z "${LVM_NAME[i]}" ]; then
+            lvm_cmd="lvcreate -L ${SIXTH_ITEM[i]} ${OP_OPTS[i]} ${SEVENTH_ITEM[i]}"
+        else
+            lvm_cmd="lvcreate -L ${SIXTH_ITEM[i]} -n ${LVM_NAME[i]} ${OP_OPTS[i]} ${SEVENTH_ITEM[i]}"
+        fi
+        ;;
+    esac
+
+    echo ${lvm_cmd}
+    return 0
+}
+
+# cmdline_rm_LVs vg_name
+#
+# Construct command line to remove all the LVs on $vg_name.
+# If $vg_name is null, then remove all the LVs in the host.
+cmdline_rm_LVs() {
+    local vg_name=$1
+    local lvm_rm_cmd
+
+    lvm_rm_cmd="vgchange -a n ${vg_name} &&"
+    lvm_rm_cmd=${lvm_rm_cmd}" vgdisplay -v ${vg_name} | grep \"LV Name\" | awk '{print \$3}' |"
+    lvm_rm_cmd=${lvm_rm_cmd}" while read lv; do lvremove -f \$lv; done"
+
+    echo ${lvm_rm_cmd}
+    return 0
+}
+
+# cmdline_rm_LV lv_path
+#
+# Construct command line to remove LV $lv_path
+cmdline_rm_LV() {
+    local lv_path=$1
+    local lvm_rm_cmd
+
+    lvm_rm_cmd="lvchange -a n ${lv_path} && lvremove -f ${lv_path}"
+    echo ${lvm_rm_cmd}
+    return 0
+}
+
+
+# cmdline_rm_VG vg_name
+#
+# Construct command line to remove VG $vg_name
+cmdline_rm_VG() {
+    local vg_name=$1
+    local lvm_rm_cmd
+
+    # Remove all the LVs on this VG
+    lvm_rm_cmd=$(cmdline_rm_LVs ${vg_name})
+
+    # Remove this VG
+    lvm_rm_cmd=${lvm_rm_cmd}" && vgremove ${vg_name}"
+    echo ${lvm_rm_cmd}
+    return 0
+}
+
+# cmdline_rm_VGs
+#
+# Construct command line to remove all the VGs in the host
+cmdline_rm_VGs() {
+    local lvm_rm_cmd
+
+    # Remove all the LVs in the host
+    lvm_rm_cmd=$(cmdline_rm_LVs)
+
+    # Remove all the VGs in the host
+    lvm_rm_cmd=${lvm_rm_cmd}" && vgdisplay | grep \"VG Name\" | awk '{print \$3}' |"
+    lvm_rm_cmd=${lvm_rm_cmd}" while read vg; do vgremove \$vg; done"
+
+    echo ${lvm_rm_cmd}
+    return 0
+}
+
+# cmdline_rm_PVs
+#
+# Construct command line to remove all the PVs in the host
+cmdline_rm_PVs() {
+    local lvm_rm_cmd
+
+    # Remove all the LVs and VGs in the host
+    lvm_rm_cmd=$(cmdline_rm_VGs)
+
+    # Remove all the PVs in the host
+    lvm_rm_cmd=${lvm_rm_cmd}" && pvdisplay | grep \"PV Name\" | awk '{print \$3}' |"
+    lvm_rm_cmd=${lvm_rm_cmd}" while read pv; do pvremove -ff -y \$pv; done"
+
+    echo ${lvm_rm_cmd}
+    return 0
+}
+
+# construct_lvm_teardown_cmdline index
+#
+# Construct the teardown command line for LVM devices in ${HOST_NAME[index]}
+construct_lvm_teardown_cmdline() {
+    declare -i i=$1
+    local lvm_rm_cmd
+
+    case "${LINE_MARKER[i]}" in
+    "${LV_MARKER}")
+        lvm_rm_cmd=$(cmdline_rm_LVs ${SEVENTH_ITEM[i]})
+        ;;
+    "${VG_MARKER}")
+        # Remove all the VGs in the host
+        lvm_rm_cmd=$(cmdline_rm_VGs)
+        ;;
+    "${PV_MARKER}")
+        # Remove all the PVs in the host
+        lvm_rm_cmd=$(cmdline_rm_PVs)
+        ;;
+    esac
+
+    echo ${lvm_rm_cmd}
+    return 0
+}
+
+# construct_lvm_rm_cmdline index
+#
+# Construct the remove command line for LVM device ${LVM_NAME[index]}
+construct_lvm_rm_cmdline() {
+    declare -i i=$1
+    local lvm_rm_cmd
+                        
+    case "${LINE_MARKER[i]}" in
+    "${LV_MARKER}")
+        lvm_rm_cmd=$(cmdline_rm_LV ${LVM_NAME[i]})
+        ;;
+    "${VG_MARKER}")
+        lvm_rm_cmd=$(cmdline_rm_VG ${LVM_NAME[i]})
+        ;;
+    "${PV_MARKER}")
+        lvm_rm_cmd="pvremove -ff -y ${LVM_NAME[i]}"
+        ;;
+    esac
+
+    echo ${lvm_rm_cmd}
+    return 0
+}
+
+# construct_lvm_cmdline host_name
+#
+# Construct the command line of LVM utilities to be run in the $host_name
+construct_lvm_cmdline() {
+    LVM_CMDLINE=
+    local host_name=$1
+    local lvm_cmd
+    declare -i i
+
+    # Construct command line
+    for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+        lvm_cmd=
+        if [ "${host_name}" = "${HOST_NAME[i]}" ]; then
+            case "${OP_MODE[i]}" in
+            "" | create)
+                    # Construct the create command line
+                    lvm_cmd=$(construct_lvm_create_cmdline ${i})
+                    ;;
+            remove)
+                    if [ -z "${LVM_NAME[i]}" ]; then
+                        # Construct the teardown command line
+                        lvm_cmd=$(construct_lvm_teardown_cmdline ${i})
+                    else    # Remove instead of teardown
+                        # Construct the remove command line
+                        lvm_cmd=$(construct_lvm_rm_cmdline ${i})
+                    fi
+                    ;;
+            *)
+                echo >&2 "`basename $0`: construct_lvm_cmdline() error:"\
+                         "Invalid operation mode - \"${OP_MODE[i]}\"!"
+                return 1
+                ;;
+            esac
+
+            if [ -z "${LVM_CMDLINE}" ]; then
+                LVM_CMDLINE=${lvm_cmd}
+            else
+                LVM_CMDLINE=${LVM_CMDLINE}" && "${lvm_cmd}
+            fi
+        fi
+    done
+
+    return 0
+}
+
+# config_lvm_devs host_name
+#
+# Run remote command to configure LVM devices in $host_name
+config_lvm_devs() {
+    local host_name=$1
+
+    # Construct the LVM utilities command line
+    if ! construct_lvm_cmdline ${host_name}; then
+        return 1
+    fi
+    
+    if [ -z "${LVM_CMDLINE}" ]; then
+        verbose_output "There are no LVM devices on host ${host_name}"\
+        "needed to be configured."
+        return 0
+    fi
+
+    # Run remote command to configure LVM devices in $host_name
+    verbose_output "Configuring LVM devices in host ${host_name}..."
+    verbose_output "Configure command line is: \"${LVM_CMDLINE}\""
+    REMOTE_CMD[pid_num]="${REMOTE} ${host_name} \"${LVM_CMDLINE}\""
+    ${REMOTE} ${host_name} "(${EXPORT_PATH} ${LVM_CMDLINE})" >&2 &
+    REMOTE_PID[pid_num]=$!
+    let "pid_num += 1"
+
+    return 0
+}
+
+# Run remote command to configure all the LVM devices specified
+# in the csv file
+config_lvm() {
+    declare -i i=0
+    declare -i idx=0        # Index of NODE_NAME array
+    local host_name
+    local failed_status
+
+    # Initialize the NODE_NAME array
+    unset NODE_NAME
+
+    for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+        host_name=${HOST_NAME[i]}
+        configured_host ${host_name} && continue
+
+        NODE_NAME[idx]=${host_name}
+        let "idx += 1"
+
+        # Run remote command to configure LVM devices in $host_name
+        if ! config_lvm_devs ${host_name}; then
+            return 1
+        fi
+    done
+
+    if [ ${#HOST_NAME[@]} -eq 0 -o ${#REMOTE_PID[@]} -eq 0 ]; then
+        verbose_output "There are no LVM devices to be configured."
+        return 0
+    fi
+
+    # Wait for the exit status of the background remote command
+    verbose_output "Waiting for the return of the remote command..."
+    failed_status=false
+    for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
+        wait ${REMOTE_PID[${pid_num}]}
+        if [ $? -ne 0 ]; then
+            echo >&2 "`basename $0`: config_lvm() error: Failed"\
+                 "to execute \"${REMOTE_CMD[${pid_num}]}\"!"
+            failed_status=true
+        fi
+    done
+
+    if ${failed_status}; then
+        return 1
+    fi
+
+    verbose_output "All the LVM devices are configured successfully!"
+    return 0
+}
+
+# Main flow
+# Check the csv file
+if ! check_file $1; then
+    exit 1    
+fi
+
+# Get all the LVM device items from the csv file 
+if ! get_lvm_items ${CSV_FILE}; then
+    exit 1
+fi
+
+# Configure the LVM devices 
+if ! config_lvm; then
+    exit 1
+fi
+
+exit 0
diff --git a/lustre/scripts/lc_md.sh.in b/lustre/scripts/lc_md.sh.in

new file mode 100644 (file)

index 0000000..77a508f
--- /dev/null
+++ b/lustre/scripts/lc_md.sh.in
@@ -0,0 +1,479 @@
+#!/bin/bash
+#
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+#
+# lc_md.sh - configure Linux MD devices from a csv file
+#
+################################################################################
+
+# Usage
+usage() {
+    cat >&2 <<EOF
+
+Usage:  `basename $0` [-h] [-v] <csv file>
+
+    This script is used to configure Linux MD devices in a Lustre cluster
+    from a csv file.
+
+    -h          help and examples
+    -v          verbose mode
+    csv file    a spreadsheet that contains configuration parameters
+                (separated by commas) for each Linux MD device to be
+                configured in a Lustre cluster
+
+EOF
+    exit 1
+}
+
+# Samples 
+sample() {
+    cat <<EOF
+
+This script is used to configure Linux MD devices in a Lustre cluster
+from a csv file.
+
+Each line marked with "MD" in the csv file represents one MD device.
+The format is:
+hostname,MD,md name,operation mode,options,raid level,component devices
+
+hostname            hostname of the node in the cluster
+MD                  marker of MD device line
+md name             MD device name, e.g. /dev/md0
+operation mode      create or remove, default is create
+options             a "catchall" for other mdadm options, e.g. "-c 128"
+raid level          raid level: 0,1,4,5,6,10,linear and multipath
+component devices   block devices to be combined into the MD device
+                    Multiple devices are separated by space or by using
+                    shell expansions, e.g. "/dev/sd{a,b,c}"
+
+Items left blank will be set to defaults.
+
+Example:
+-------------------------------------------------------
+# MD devices on mgsnode
+mgsnode,MD,/dev/md0,,-q -c 32,1,/dev/sda1 /dev/sdb1
+mgsnode,MD,/dev/md1,,-q -c 32,1,/dev/sdc1 /dev/sdd1
+mgsnode,MD,/dev/md2,,-q -c 32,0,/dev/md0 /dev/md1
+
+# MD device on ostnode
+ostnode,MD,/dev/md0,,-q -c 128,5,"/dev/sd{a,b,c,d,e}"
+-------------------------------------------------------
+
+EOF
+    exit 0
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+#***************************** Global variables *****************************#
+# All the MD device items in the csv file
+declare -a HOST_NAME MD_NAME OP_MODE OP_OPTS RAID_LEVEL MD_DEVS
+
+# Variables related to background executions
+declare -a REMOTE_CMD
+declare -a REMOTE_PID
+declare -i pid_num=0
+
+
+VERBOSE_OUTPUT=false
+# Get and check the positional parameters
+while getopts "hv" OPTION; do
+    case $OPTION in
+    h)
+        sample
+        ;;
+    v)
+        VERBOSE_OUTPUT=true
+        ;;
+    ?)
+        usage 
+    esac
+done
+
+# Toss out the parameters we've already processed
+shift  `expr $OPTIND - 1`
+
+# Here we expect the csv file
+if [ $# -eq 0 ]; then
+    echo >&2 "`basename $0`: Missing csv file!"
+    usage
+fi
+
+# check_md_item index
+#
+# Check the items required for managing MD device ${MD_NAME[index]}
+check_md_item() {
+    # Check argument
+    if [ $# -eq 0 ]; then
+        echo >&2 "`basename $0`: check_md_item() error:"\
+                 "Missing argument!"
+        return 1
+    fi
+
+    declare -i i=$1
+
+    # Check hostname
+    if [ -z "${HOST_NAME[i]}" ]; then
+        echo >&2 "`basename $0`: check_md_item() error:"\
+                 "hostname item has null value!"
+        return 1
+    fi
+
+    # Check items required by create mode
+    if [ -z "${OP_MODE[i]}" -o "${OP_MODE[i]}" = "create" ]; then
+        # Check MD device name 
+        if [ -z "${MD_NAME[i]}" ]; then
+            echo >&2 "`basename $0`: check_md_item() error:"\
+            "md name item has null value!"
+            return 1
+        fi
+
+        if [ -z "${RAID_LEVEL[i]}" ]; then
+            echo >&2 "`basename $0`: check_md_item() error:"\
+            "raid level item of MD device ${MD_NAME[i]} has null value!"
+            return 1
+        fi
+
+        if [ -z "${MD_DEVS[i]}" ]; then
+            echo >&2 "`basename $0`: check_md_item() error:"\
+            "component devices item of ${MD_NAME[i]} has null value!"
+            return 1
+        fi
+    fi
+
+    return 0
+}
+
+# get_md_items csv_file
+#
+# Get all the MD device items in the $csv_file and do some checks.
+get_md_items() {
+    # Check argument
+    if [ $# -eq 0 ]; then
+        echo >&2 "`basename $0`: get_md_items() error: Missing csv file!"
+        return 1
+    fi
+
+    CSV_FILE=$1
+    local LINE
+    declare -i line_num=0
+    declare -i idx=0
+
+    while read -r LINE; do
+        let "line_num += 1"
+
+        # Skip the comment line
+        [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] && continue
+
+        # Skip the non-MD line
+        [ "`echo ${LINE}|awk -F, '{print $2}'`" != "${MD_MARKER}" ] && continue
+
+        # Parse the config line into CONFIG_ITEM
+        if ! parse_line "$LINE"; then
+            return 1    
+        fi
+
+        HOST_NAME[idx]=${CONFIG_ITEM[0]}
+        MD_NAME[idx]=${CONFIG_ITEM[2]}
+        OP_MODE[idx]=${CONFIG_ITEM[3]}
+        OP_OPTS[idx]=${CONFIG_ITEM[4]}
+        RAID_LEVEL[idx]=${CONFIG_ITEM[5]}
+        MD_DEVS[idx]=${CONFIG_ITEM[6]}
+
+        # Check some required items
+        if ! check_md_item $idx; then
+            echo >&2 "`basename $0`: check_md_item() error:"\
+                     "Occurred on line ${line_num} in ${CSV_FILE}."
+            return 1    
+        fi
+
+        let "idx += 1"
+    done < ${CSV_FILE}
+
+    return 0
+}
+
+# md_is_active host_name md_name
+#
+# Run remote command to check whether $md_name is active in @host_name
+md_is_active() {
+    local host_name=$1
+    local md_name=$2
+    local cmd ret_str
+
+    cmd="grep -q ${md_name##*/} /proc/mdstat 2>&1"
+    ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
+    if [ $? -ne 0 ]; then
+        if [ -n "${ret_str}" ]; then
+            echo >&2 "`basename $0`: md_is_active() error:"\
+            "remote command to ${host_name} error: ${ret_str}!"
+            return 2    # Error occurred
+        else
+            return 1    # inactive
+        fi
+    fi
+
+    return 0            # active
+}
+
+# construct_mdadm_create_cmdline index
+#
+# Construct the create operation command line of mdadm for ${MD_NAME[index]}
+construct_mdadm_create_cmdline() {
+    declare -i i=$1
+    local cmd_line
+    local echo_disk disk line
+    declare -i alldisks=0 
+    declare -i raiddisks=0 
+    declare -i sparedisks=0
+
+    cmd_line="${MDADM} -C -R ${MD_NAME[i]} ${OP_OPTS[i]} -l ${RAID_LEVEL[i]}"
+
+    if [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#* -n*}" ]\
+    || [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#*--raid-devices*}" ]; then
+        cmd_line=${cmd_line}" ${MD_DEVS[i]}"
+        echo ${cmd_line}
+        return 0
+    fi
+
+    # FIXME: Get the number of component devices in the array
+    echo_disk="for disk in ${MD_DEVS[i]}; do echo $disk; done"
+    while read line; do
+        let "alldisks += 1"
+    done < <(${REMOTE} ${HOST_NAME[i]} "${echo_disk}")
+
+    if [ ${alldisks} -eq 0 ]; then
+        echo "`basename $0`: construct_mdadm_create_cmdline() error:"\
+        "Failed to execute remote command to get the number of"\
+        "component devices of array ${MD_NAME[i]} from host ${HOST_NAME[i]}!"
+        return 1
+    fi
+
+    # Get the specified number of spare (eXtra) devices
+    if [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#* -x*}" ]; then
+        sparedisks=`echo ${OP_OPTS[i]##* -x}|awk -F" " '{print $1}'`
+    elif [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#*--spare-devices*}" ]; then
+        sparedisks=`echo ${OP_OPTS[i]##*--spare-devices=}|awk -F" " '{print $1}'`
+    fi
+
+    # Get the number of raid devices in the array
+    # The number of raid devices in the array plus the number of spare devices
+    # listed on the command line must equal the number of component devices 
+    # (including "missing" devices). 
+    let "raiddisks = alldisks - sparedisks"
+
+    if [ ${raiddisks} -lt 1 ]; then
+        echo "`basename $0`: construct_mdadm_create_cmdline() error:"\
+        "Invalid number of raid devices in array ${MD_NAME[i]}: ${raiddisks}!"\
+        "Check the number of spare devices and whether all the component devices"\
+        "\"${MD_DEVS[i]}\" (except \"missing\" devices) exist in host ${HOST_NAME[i]}!"
+        return 1
+    fi
+
+    cmd_line=${cmd_line}" -n ${raiddisks} ${MD_DEVS[i]}"
+
+    echo ${cmd_line}
+    return 0
+}
+
+# construct_mdadm_rm_cmdline index
+#
+# Construct the remove operation command line of mdadm for ${MD_NAME[index]}
+construct_mdadm_rm_cmdline() {
+    declare -i i=$1
+    local mdadm_cmd
+    local real_devs
+
+    # Deactivate the MD array, releasing all resources
+    mdadm_cmd="${MDADM} -S ${MD_NAME[i]}"
+
+    if [ -n "${MD_DEVS[i]}" ]; then
+        # Remove the "missing" devices from the component devices
+        real_devs=`echo ${MD_DEVS[i]} | sed 's/missing//g'`
+        # Over-written the superblock with zeros
+        mdadm_cmd=${mdadm_cmd}" && ${MDADM} --zero-superblock ${real_devs}"
+    fi
+
+    echo ${mdadm_cmd}
+    return 0
+}
+
+# construct_mdadm_cmdline host_name
+#
+# Construct the command line of mdadm to be run in $host_name
+construct_mdadm_cmdline() {
+    MDADM_CMDLINE=
+    local host_name=$1
+    local mdadm_stop_cmd mdadm_cmd
+    local rc OK
+    declare -i i
+
+    # Construct command line
+    for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+        mdadm_stop_cmd=
+        mdadm_cmd=
+        if [ "${host_name}" = "${HOST_NAME[i]}" ]; then
+            case "${OP_MODE[i]}" in
+            "" | create)
+                    # Check the status of the MD array
+                    md_is_active ${host_name} ${MD_NAME[i]}
+                    rc=$?
+                    if [ "$rc" -eq "2" ]; then
+                        return 1
+                    elif [ "$rc" -eq "0" ]; then
+                        OK=
+                        echo -n "`basename $0`: ${MD_NAME[i]} is active on"\
+                        "${host_name}, go ahead to deactivate it and create"\
+                        "the new array? [y/n]:"
+                        read OK
+                        if [ "${OK}" = "n" ]; then
+                                echo "`basename $0`: ${MD_NAME[i]} on host"\
+                                "${host_name} remains as it is."
+                                continue
+                        fi
+
+                        # Construct the remove command line
+                        mdadm_stop_cmd=$(construct_mdadm_rm_cmdline ${i})
+                    fi
+
+                    # Construct the create command line
+                    mdadm_cmd=$(construct_mdadm_create_cmdline ${i})
+                    if [ $? -ne 0 ]; then
+                        echo >&2 "${mdadm_cmd}"
+                        return 1
+                    fi
+
+                    [ -n "${mdadm_stop_cmd}" ] && mdadm_cmd=${mdadm_stop_cmd}" && "${mdadm_cmd}
+                    ;;
+            remove)
+                    if [ -z "${MD_NAME[i]}" ]; then
+                        OK=
+                        echo -n "`basename $0`: Do you really want to remove"\
+                        "all the MD devices in the host ${HOST_NAME[i]}? [y/n]:"
+                        read OK
+                        if [ "${OK}" = "n" ]; then
+                            echo "`basename $0`: MD devices on host"\
+                            "${HOST_NAME[i]} remain as they are."
+                            continue
+                        fi
+
+                        # Construct the teardown command line
+                        mdadm_cmd="(cat /proc/mdstat | egrep \"^md[[:digit:]]\" |"
+                        mdadm_cmd=${mdadm_cmd}" while read md rest; do ${MDADM} -S /dev/\$md; done)"
+                    else
+                        # Construct the remove command line
+                        mdadm_cmd=$(construct_mdadm_rm_cmdline ${i})
+                    fi
+                    ;;
+            *)
+                # Other operations
+                mdadm_cmd="${MDADM} ${OP_MODE[i]} ${MD_NAME[i]} ${OP_OPTS[i]} ${MD_DEVS[i]}"
+                ;;
+            esac
+
+            if [ -z "${MDADM_CMDLINE}" ]; then
+                MDADM_CMDLINE=${mdadm_cmd}
+            else
+                MDADM_CMDLINE=${MDADM_CMDLINE}" && "${mdadm_cmd}
+            fi
+        fi
+    done
+
+    return 0
+}
+
+# config_md_devs host_name
+#
+# Run remote command to configure MD devices in $host_name
+config_md_devs() {
+    local host_name=$1
+
+    # Construct mdadm command line
+    if ! construct_mdadm_cmdline ${host_name}; then
+        return 1
+    fi
+    
+    if [ -z "${MDADM_CMDLINE}" ]; then
+        verbose_output "There are no MD devices on host ${host_name}"\
+        "needed to be configured."
+        return 0
+    fi
+
+    # Run remote command to configure MD devices in $host_name
+    verbose_output "Configuring MD devices in host ${host_name}..."
+    verbose_output "Configure command line is: \"${MDADM_CMDLINE}\""
+    REMOTE_CMD[pid_num]="${REMOTE} ${host_name} \"${MDADM_CMDLINE}\""
+    ${REMOTE} ${host_name} "${MDADM_CMDLINE}" >&2 &
+    REMOTE_PID[pid_num]=$!
+    let "pid_num += 1"
+    sleep 1
+
+    return 0
+}
+
+# Run remote command to configure all the MD devices specified in the csv file
+config_md() {
+    declare -i i=0
+    declare -i idx=0        # Index of NODE_NAME array
+    local host_name
+    local failed_status
+
+    # Initialize the NODE_NAME array
+    unset NODE_NAME
+
+    for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+        host_name=${HOST_NAME[i]}
+        configured_host ${host_name} && continue
+
+        NODE_NAME[idx]=${host_name}
+        let "idx += 1"
+
+        # Run remote command to configure MD devices in $host_name
+        if ! config_md_devs ${host_name}; then
+            return 1
+        fi
+    done
+
+    if [ ${#HOST_NAME[@]} -eq 0 -o ${#REMOTE_PID[@]} -eq 0 ]; then
+        verbose_output "There are no MD devices to be configured."
+        return 0
+    fi
+
+    # Wait for the exit status of the background remote command
+    verbose_output "Waiting for the return of the remote command..."
+    failed_status=false
+    for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
+        wait ${REMOTE_PID[${pid_num}]}
+        if [ $? -ne 0 ]; then
+            echo >&2 "`basename $0`: config_md() error: Failed"\
+                 "to execute \"${REMOTE_CMD[${pid_num}]}\"!"
+            failed_status=true
+        fi
+    done
+
+    if ${failed_status}; then
+        return 1
+    fi
+
+    verbose_output "All the MD devices are configured successfully!"
+    return 0
+}
+
+# Main flow
+# Check the csv file
+if ! check_file $1; then
+    exit 1    
+fi
+
+# Get all the MD device items from the csv file 
+if ! get_md_items ${CSV_FILE}; then
+    exit 1
+fi
+
+# Configure the MD devices 
+if ! config_md; then
+    exit 1
+fi
+
+exit 0
diff --git a/lustre/scripts/lc_modprobe.sh.in b/lustre/scripts/lc_modprobe.sh.in

new file mode 100644 (file)

index 0000000..6116a26
--- /dev/null
+++ b/lustre/scripts/lc_modprobe.sh.in
@@ -0,0 +1,66 @@
+#!/bin/bash
+#
+# lc_modprobe.sh - add lustre module options into modprobe.conf or 
+#                 modules.conf
+#
+#################################################################################
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+# Check the kernel version
+KERNEL_VERSION=`uname -r`
+KERNEL_VERSION=${KERNEL_VERSION:0:3}
+
+if [ "${KERNEL_VERSION}" = "2.4" ]; then
+       MODULE_CONF=/etc/modules.conf
+else
+       MODULE_CONF=/etc/modprobe.conf
+fi
+
+read -r NETWORKS
+MODLINES_FILE=/tmp/modlines$$.txt
+START_MARKER=$"# start lustre config"
+END_MARKER=$"# end lustre config"
+
+# Generate a temp file contains lnet options lines 
+generate_lnet_lines() {
+       local LNET_LINE TMP_LINE
+
+       TMP_LINE="${NETWORKS}"
+
+       echo ${START_MARKER} > ${MODLINES_FILE}
+       echo "# Lustre module options added automatically by `basename $0`" >> ${MODLINES_FILE}
+       while true; do
+               LNET_LINE=${TMP_LINE%%\\n*}
+               echo ${LNET_LINE} >> ${MODLINES_FILE}
+
+               TMP_LINE=${TMP_LINE#*\\n}
+
+               if [ "${TMP_LINE}" == "${LNET_LINE}" ]; then
+                       break
+               fi
+       done
+       echo ${END_MARKER} >> ${MODLINES_FILE}
+
+       #echo "--------------${MODLINES_FILE}--------------"
+       #cat ${MODLINES_FILE}
+       #echo -e "------------------------------------------\n"
+
+       return 0
+}
+
+if ! generate_lnet_lines; then
+       exit 1  
+fi
+
+MODULE_CONF=$(fcanon ${MODULE_CONF})
+# Add lnet options lines to the module configuration file
+if [ -e ${MODULE_CONF} ]; then
+       # Delete the old options
+       sed -i "/${START_MARKER}/,/${END_MARKER}/d" ${MODULE_CONF}
+fi
+
+cat ${MODLINES_FILE} >> ${MODULE_CONF}
+rm -f ${MODLINES_FILE}
+exit 0
diff --git a/lustre/scripts/lc_mon.sh b/lustre/scripts/lc_mon.sh

new file mode 100755 (executable)

index 0000000..ac4be84
--- /dev/null
+++ b/lustre/scripts/lc_mon.sh
@@ -0,0 +1,139 @@
+#!/bin/sh
+
+# Given one or more Lustre objects, create a mon configuration file
+# naming the mon watches based on the Lustre object names 
+# For each Lustre object, the script will create two mon watches
+# The first watch sets a trap, and the second watch clears the 
+# trap if Lustre is healthy.
+
+# This may be more fun in Perl due to the need to support a list
+# of objects
+
+# (plus we could use a Perl format for this goop)
+
+MONBASEDIR=${MONBASEDIR:-/usr/local/lib/mon}
+MONCFGDIR=${MONCFGDIR:-/etc/mon}
+TD=`date +%y_%m%d_%S`
+TMPMONCFG=${TD}-mon.cfg
+# Determines how often we will check Lustre health
+CHECKINTERVAL="3m"
+# Determines how quickly we must clear the trap
+TRAPINTERVAL="6m"
+ALERTSCRIPT=${ALERTSCRIPT:-"fail_lustre.alert"}
+TRAPSCRIPT=${TRAPSCRIPT:-"lustre.mon.trap"}
+
+# We will assume all inputs are Lustre objects
+# file locations and timeouts correct to taste
+# Correct to taste
+print_header() {
+    cat >> $TMPMONCFG <<-EOF
+       cfbasedir     = $MONCFGDIR
+       alertdir      = $MONBASEDIR/alert.d
+       mondir        = $MONBASEDIR/mon.d
+       statedir      = $MONBASEDIR/state.d
+       logdir        = $MONBASEDIR/log.d
+       dtlogfile     = $MONBASEDIR/log.d/downtime.log
+       maxprocs      = 20 
+       histlength    = 100 
+       randstart     = 60s
+       authtype      = getpwnam
+EOF
+}
+
+# Tabs should be preserved in the config file
+# $1 object name
+# we do not set a period, it is assumed monitor is always active
+
+print_trap_rec() {
+    cat >> $TMPMONCFG <<EOF
+#
+watch ${1}-obj
+    service ${1}_ser
+    description triggers heartbeat failure if trap springs on $1
+    traptimeout $TRAPINTERVAL
+    period 
+       alert $ALERTSCRIPT
+
+# end ${1}-obj
+
+EOF
+
+}
+
+print_trap_send() {
+    cat >> $TMPMONCFG <<EOF
+#
+watch ${1}-mon
+    service ${1}_mon_ser
+    description clears trap for $1
+    interval $CHECKINTERVAL
+    monitor $TRAPSCRIPT ${1}-obj ${1}_ser ${1}
+    period
+       alert $ALERTSCRIPT
+# end ${1}-mon
+EOF
+
+}
+
+usage() {
+    echo "$0 -n <node> -n <node> -o <Lustre object> -o <Lustre object>...."
+    echo "Creates the /etc/mon/mon.cf file to monitor Lustre objects"
+    exit 1
+}
+
+
+# Start of script
+
+if [ $# -eq 0 ];then
+    usage
+fi
+
+# This script should work for any number of hosts
+# 
+HOSTCNT=0
+OBJCNT=0
+
+declare -a HOSTS
+declare -a OBJS
+
+while getopts "n:o:" opt; do
+    case $opt in 
+       n) HOSTS[HOSTCNT]=$OPTARG
+           HOSTCNT=$(( HOSTCNT + 1 ))
+           ;;
+       o) OBJS[OBJCNT]=$OPTARG
+           OBJCNT=$(( OBJCNT + 1 ))
+           ;;
+       *) usage
+           ;;
+    esac
+done
+
+echo "Found $HOSTCNT hosts"
+echo "Found $OBJCNT Lustre objects"
+
+# First create the host groups
+# we assume 
+# each object will have two watches defined
+# each object hostgroup will have all objects
+
+# Create the file with the declared goop
+print_header
+
+for obj in ${OBJS[@]}
+do
+    echo "hostgroup ${obj}-obj ${HOSTS[@]}" >> $TMPMONCFG
+    echo "hostgroup ${obj}-mon ${HOSTS[@]}" >> $TMPMONCFG
+    echo "#" >> $TMPMONCFG
+done
+    
+# create the monitors
+
+for obj in ${OBJS[@]}
+do
+    print_trap_send $obj
+    print_trap_rec $obj
+done
+
+echo "Mon config completed - new mon config is $TMPMONCFG"
+exit 0
+\ No newline at end of file
diff --git a/lustre/scripts/lc_net.sh.in b/lustre/scripts/lc_net.sh.in

new file mode 100644 (file)

index 0000000..e4f150c
--- /dev/null
+++ b/lustre/scripts/lc_net.sh.in
@@ -0,0 +1,216 @@
+#!/bin/bash
+#
+# lc_net.sh - script for Lustre cluster network verification
+#
+###############################################################################
+
+# Usage
+usage() {
+       cat >&2 <<EOF
+
+Usage: `basename $0` [-v] <csv file>
+
+       -v              verbose mode
+       csv file        a spreadsheet that contains configuration parameters 
+                       (separated by commas) for each target in a Lustre cl-
+                       uster, the first field of each line is the host name 
+                       of the cluster node
+
+EOF
+       exit 1
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+VERBOSE_OUTPUT=false
+# Get and check the positional parameters
+while getopts "v" OPTION; do
+       case $OPTION in
+       v) 
+               VERBOSE_OUTPUT=true
+               ;;
+        ?) 
+               usage 
+       esac
+done
+
+# Toss out the parameters we've already processed
+shift  `expr $OPTIND - 1`
+
+# Here we expect the csv file
+if [ $# -eq 0 ]; then
+       echo >&2 $"`basename $0`: Missing csv file!"
+       usage
+fi
+
+# Global variables
+CSV_FILE=$1
+declare -a HOST_NAMES
+declare -a HOST_IPADDRS
+
+# Get the host names from the csv file
+get_hostnames() {
+       local NAME CHECK_STR
+       declare -i i
+       declare -i j
+
+       # Initialize the HOST_NAMES array
+       unset HOST_NAMES
+
+       CHECK_STR=`egrep -v "([[:space:]]|^)#" ${CSV_FILE} | awk -F, \
+                 '/[[:alnum:]]/{if ($1 !~/[[:alnum:]]/) print $0}'`
+       if [ -n "${CHECK_STR}" ]; then
+                echo >&2 $"`basename $0`: get_hostnames() error: Missing"\
+                         "hostname field in the line - ${CHECK_STR}"
+               return 1
+       fi
+
+       i=0
+       for NAME in `egrep -v "([[:space:]]|^)#" ${CSV_FILE}\
+                   | awk -F, '/[[:alnum:]]/{print $1}'`
+       do
+               for ((j = 0; j < ${#HOST_NAMES[@]}; j++)); do
+                       [ "${NAME}" = "${HOST_NAMES[j]}" ] && continue 2
+               done
+
+               HOST_NAMES[i]=${NAME}
+               i=$i+1
+       done
+
+       return 0
+}
+
+# ping_host host_name
+# Check whether host $host_name is reachable. 
+# If it is, then return the IP address of this host.
+ping_host() {
+       local host_name=$1
+       local ip_addr=
+       local ret_str
+
+       if [ -z "${host_name}" ]; then
+               echo "`basename $0`: ping_host() error: Missing hostname!"
+               return 1
+       fi
+
+       # Run ping command
+       ret_str=`ping -c1 ${host_name} 2>&1`
+       if [ $? -ne 0 ]; then
+               if [ -n "${ret_str}" ]; then
+                       echo "`basename $0`: ping_host() error: ${ret_str}!"
+               else
+                       echo "`basename $0`: ping_host() error:"\
+                       "Host ${host_name} does not respond to ping!"
+               fi
+               return 1
+       fi
+
+       # Get the IP address
+       ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}' | \
+               sed -e 's/^(//' -e 's/)$//'`
+
+       echo "${ip_addr}"
+       return 0
+}
+
+# local_check index
+# Check the network connectivity between local host and ${HOST_NAMES[index]}.
+local_check() {
+       declare -i i=$1
+
+       # Check whether ${HOST_NAMES[i]} is reachable
+       # and get the IP address of this host from ping
+       HOST_IPADDRS[i]=$(ping_host ${HOST_NAMES[i]})
+       if [ $? -ne 0 ]; then
+               echo >&2 "${HOST_IPADDRS[i]}"
+               return 1
+       fi
+
+       return 0
+}
+
+# remote_check index
+# Check whether ${HOST_NAMES[index]} can resolve its own name and whether
+# this host agrees with the local host about what its name is resolved to.
+remote_check() {
+       declare -i i=$1
+       local cmd ret_str
+       local ip_addr=          # the IP address got from remote ping
+
+       # Execute remote command to check whether ${HOST_NAMES[i]}
+       # can resolve its own name
+       cmd="ping -c1 ${HOST_NAMES[i]} 2>&1"
+       ret_str=`${REMOTE} ${HOST_NAMES[i]} "${cmd}" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo >&2 "`basename $0`: remote_check() error:"\
+               "remote to ${HOST_NAMES[i]} error: ${ret_str}!"
+               return 1
+       fi
+
+       if [ -z "${ret_str}" ]; then
+               echo >&2 "`basename $0`: remote_check() error:"\
+               "No results from ${HOST_NAMES[i]}! Check the network"\
+               "connectivity between local host and ${HOST_NAMES[i]}!"
+               return 1
+       fi
+
+       # Get the IP address of ${HOST_NAMES[i]} from its own ping
+       if is_pdsh; then
+               ip_addr=`echo "${ret_str}" | head -1 | awk '{print $4}'`
+       else
+               ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}'`
+       fi
+       ip_addr=`echo "${ip_addr}" | sed -e 's/^(//' -e 's/)$//'`
+
+       # Compare IP addresses
+       # Check whether ${HOST_NAMES[i]} agrees with the local host
+       # about what its name is resolved to.
+       if [ "${ip_addr}" != "${HOST_IPADDRS[i]}" ]; then
+               echo >&2 "`basename $0`: remote_check() error:"\
+               "Local host resolves ${HOST_NAMES[i]} to IP address"\
+               "\"${HOST_IPADDRS[i]}\", while its own resolution is"\
+               "\"${ip_addr}\". They are not the same!"
+               return 1
+       fi
+       
+       return 0
+}
+
+# network_verify
+# Verify name resolution and network connectivity of the Lustre cluster
+network_verify() {
+       declare -i i
+
+       # Initialize the HOST_IPADDRS array
+       unset HOST_IPADDRS
+
+       # Get all the host names from the csv file
+       ! get_hostnames && return 1
+
+       # Check the network connectivity between local host 
+       # and other cluster nodes
+       for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
+               [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue
+
+               verbose_output "Verifying network connectivity between"\
+                              "\"`hostname`\" and \"${HOST_NAMES[i]}\"..."
+               ! local_check $i && return 1
+               ! remote_check $i && return 1
+               verbose_output "OK"
+       done
+
+       return 0
+}
+
+# Main flow
+if ! check_file ${CSV_FILE}; then
+       exit 1  
+fi
+
+# Cluster network verification
+if ! network_verify; then
+       exit 1  
+fi
+
+exit 0
diff --git a/lustre/scripts/lc_servip.sh b/lustre/scripts/lc_servip.sh

new file mode 100755 (executable)

index 0000000..779aa34
--- /dev/null
+++ b/lustre/scripts/lc_servip.sh
@@ -0,0 +1,250 @@
+#!/bin/bash
+#
+# lc_servip.sh - script for verifying the service IP and the real
+#               interface IP in a remote host are in the same subnet
+#
+###############################################################################
+
+# Usage
+usage() {
+       cat >&2 <<EOF
+
+Usage:  `basename $0` <service IPaddr> <hostname>
+       
+       service IPaddr          the IP address to failover
+       hostname                the hostname of the remote node
+
+EOF
+       exit 1
+}
+
+# Check arguments
+if [ $# -lt 2 ]; then
+        usage
+fi
+
+# Remote command
+REMOTE=${REMOTE:-"ssh -x -q"}
+
+# Check whether the reomte command is pdsh
+is_pdsh() {
+        if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then
+                return 1
+        fi
+
+        return 0
+}
+
+#
+# inSameIPsubnet serviceIPaddr interfaceIPaddr mask
+#
+# Given two IP addresses and a subnet mask determine if these IP
+# addresses are in the same subnet. If they are, return 0, else return 1.
+#
+inSameIPsubnet() {
+       declare -i n
+       declare -ia mask 
+       declare -ia ip1 ip2             # IP addresses given
+       declare -i quad1 quad2          # calculated quad words
+
+       #
+       # Remove '.' characters from dotted decimal notation and save
+       # in arrays. i.e.
+       #
+       #       192.168.1.163 -> array[0] = 192
+       #                        array[1] = 168
+       #                        array[2] = 1
+       #                        array[3] = 163
+       #
+       let n=0
+       for quad in $(echo $1 | awk -F. '{print $1 " " $2 " " $3 " " $4}')
+       do
+               ip1[n]=$quad
+               let n=n+1
+       done
+
+       let n=0
+       for quad in $(echo $2 | awk -F. '{print $1 " " $2 " " $3 " " $4}')
+       do
+               ip2[n]=$quad
+               let n=n+1
+       done
+
+       let n=0
+       for quad in $(echo $3 | awk -F. '{print $1 " " $2 " " $3 " " $4}')
+       do
+               mask[n]=$quad
+               let n=n+1
+       done
+
+       #
+       # For each quad word, logically AND the IP address with the subnet
+       # mask to get the network/subnet quad word.  If the resulting
+       # quad words for both IP addresses are the same they are in the 
+       # same IP subnet.
+       #
+       for n in 0 1 2 3
+       do
+               let $((quad1=${ip1[n]} & ${mask[n]}))
+               let $((quad2=${ip2[n]} & ${mask[n]}))
+
+               if [ $quad1 != $quad2 ]; then
+                       echo >&2 $"`basename $0`: Service IP address $1 and"\
+                                 "real interface IP address $2 are in"\
+                                 "different subnets!"
+                       return 1        # in different subnets
+               fi
+       done
+
+       return 0        # in the same subnet, all quad words matched
+}
+
+#
+# findInterface IPaddr hostname
+#
+# Given a target IP address and a hostname, find the interface in which 
+# this address is configured.  If found return 0, if not return 1.  The
+# interface name is returned to stdout.
+#
+findInterface() {
+       declare ret_line
+       declare line
+       declare intf
+       declare addr
+       declare state
+
+       declare target=$1
+       declare hostname=$2
+
+       while read ret_line
+       do
+               set -- ${ret_line}
+               is_pdsh && shift
+               intf="$1"
+               shift
+               line="$*"
+
+               while read line
+               do
+                       if [ "$line" = "" ]; then       # go to next interface
+                               continue 2
+                       fi
+
+                       set - $line
+                       addr=
+                       while [ $# -gt 0 ]; do
+                               case $1 in
+                               addr:*)
+                                       addr=${1##addr:}
+                                       if [ -n "$addr" -a "$addr" = "$target" ]
+                                       then
+                                               echo $intf
+                                               return 0
+                                       fi
+                                       ;;
+                               esac
+                               shift
+                       done
+               done
+       done < <(${REMOTE} $hostname /sbin/ifconfig)
+
+       echo >&2 "`basename $0`: Cannot find the interface in which" \
+                 "$target is configured in the host $hostname!"
+       return 1
+}
+
+#
+# findNetmask interface hostname
+#
+# Given an interface find the netmask addresses associated with it.
+# Return 0 when found, else return 1. The netmask is returned to stdout.
+#
+findNetmask() {
+       declare ret_line
+       declare line
+       declare addr
+       declare target=$1
+       declare hostname=$2
+
+       while read ret_line
+       do
+               set -- ${ret_line}
+               is_pdsh && shift
+               line="$*"
+
+               set - $line
+
+               while [ $# -gt 0 ]; do
+                       case $1 in
+                       Mask:*)
+                               echo ${1##*:}   # return netmask addr
+                               return 0 
+                               ;;
+                       esac
+                       shift
+               done
+       done < <(${REMOTE} $hostname /sbin/ifconfig $target)
+
+       echo >&2 "`basename $0`: Cannot find the netmask associated with" \
+                 "the interface $target in the host $hostname!"
+       return 1 
+}
+
+#
+# check_srvIPaddr serviceIPaddr hostname
+#
+# Given a service IP address and hostname, check whether the service IP address
+# and the real interface IP address of hostname are in the same subnet. 
+# If they are, return 0, else return 1.
+#
+check_srvIPaddr() {
+       declare real_IPaddr
+       declare real_intf
+       declare netmask
+       declare srv_IPaddr=$1
+       declare hostname=$2
+
+       # Get the corresponding IP address of the hostname from /etc/hosts table
+       real_IPaddr=`egrep "[[:space:]]$hostname([[:space:]]|$)" /etc/hosts \
+                     | awk '{print $1}'`
+        if [ -z "$real_IPaddr" ]; then
+                echo >&2 "`basename $0`: Hostname $hostname does not exist in" \
+                         "the local /etc/hosts table!"
+                return 1
+        fi
+
+        if [ ${#real_IPaddr} -gt 15 ]; then
+                echo >&2 "`basename $0`: More than one IP address line" \
+                         "corresponding to $hostname in the local"  \
+                        "/etc/hosts table!"
+                return 1
+        fi
+
+       # Get the interface in which the real IP address is configured
+       real_intf=$(findInterface $real_IPaddr $hostname)
+       if [ $? -ne 0 ]; then
+               return 1
+       fi
+       real_intf=${real_intf%%:*}
+
+       # Get the netmask address associated with the real interface
+       netmask=$(findNetmask $real_intf $hostname)
+       if [ $? -ne 0 ]; then
+               return 1
+       fi
+
+       # Determine if the service IP address and the real IP address
+       # are in the same subnet
+       inSameIPsubnet $srv_IPaddr $real_IPaddr $netmask
+       if [ $? -ne 0 ]; then
+               return 1
+       fi
+
+       return 0
+}
+
+# Check service IP address
+if ! check_srvIPaddr $1 $2; then
+       exit 1
+fi
+exit 0
diff --git a/lustre/scripts/lmc2csv.pl b/lustre/scripts/lmc2csv.pl

new file mode 100644 (file)

index 0000000..cabd2ce
--- /dev/null
+++ b/lustre/scripts/lmc2csv.pl
@@ -0,0 +1,214 @@
+#!/usr/bin/perl
+
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+
+#
+# convert an lmc batch file to a csv file for lustre_config.sh
+#
+use strict; use warnings;
+
+use Data::Dumper;
+
+sub get_arg_val {
+    my $arg = shift;
+    my ($aref) = @_;
+    for (my $i = 0; $i <= $#$aref; $i++) {
+        if ($$aref[$i] eq "--" . $arg) {
+            my @foo = splice(@$aref, $i, 2);
+            return $foo[1];
+        }
+    }
+}
+
+sub get_arg {
+    my $arg = shift;
+    my ($aref) = @_;
+    for (my $i = 0; $i <= $#$aref; $i++) {
+        if ($$aref[$i] eq "--" . $arg) {
+            splice(@$aref, $i, 1);
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+sub add_net {
+    my $net = {};
+    $net->{"node"} = get_arg_val("node", \@_);
+    $net->{"nid"} = get_arg_val("nid", \@_);
+    $net->{"nettype"} = get_arg_val("nettype", \@_);
+    $net->{"port"} = get_arg_val("port", \@_);
+    if ($#_ > 0) {
+        print STDERR "Unknown arguments to \"--add net\": @_\n";
+        exit(1);
+    }
+    return $net;
+}
+
+sub add_mds {
+    my $mds = {};
+    $mds->{"node"} = get_arg_val("node", \@_);
+    $mds->{"mds"} = get_arg_val("mds", \@_);
+    $mds->{"fstype"} = get_arg_val("fstype", \@_);
+    $mds->{"dev"} = get_arg_val("dev", \@_);
+    $mds->{"size"} = get_arg_val("size", \@_);
+    if ($#_ > 0) {
+        print STDERR "Unknown arguments to \"--add mds\": @_\n";
+        exit(1);
+    }
+    return $mds;
+}
+
+sub add_lov {
+    my $lov = {};
+    $lov->{"lov"} = get_arg_val("lov", \@_);
+    $lov->{"mds"} = get_arg_val("mds", \@_);
+    $lov->{"stripe_sz"} = get_arg_val("stripe_sz", \@_);
+    $lov->{"stripe_cnt"} = get_arg_val("stripe_cnt", \@_);
+    $lov->{"stripe_pattern"} = get_arg_val("stripe_pattern", \@_);
+    if ($#_ > 0) {
+        print STDERR "Unknown arguments to \"--add lov\": @_\n";
+        exit(1);
+    }
+    return $lov;
+}
+
+sub add_ost {
+    my $ost = {};
+    $ost->{"node"} = get_arg_val("node", \@_);
+    $ost->{"ost"} = get_arg_val("ost", \@_);
+    $ost->{"fstype"} = get_arg_val("fstype", \@_);
+    $ost->{"dev"} = get_arg_val("dev", \@_);
+    $ost->{"size"} = get_arg_val("size", \@_);
+    $ost->{"lov"} = get_arg_val("lov", \@_);
+    $ost->{"mountfsoptions"} = get_arg_val("mountfsoptions", \@_);
+    $ost->{"failover"} = get_arg("failover", \@_);
+    if ($#_ > 0) {
+        print STDERR "Unknown arguments to \"--add ost\": @_\n";
+        exit(1);
+    }
+    return $ost;
+}
+
+sub add_mtpt {
+    my $mtpt = {};
+    $mtpt->{"node"} = get_arg_val("node", \@_);
+    $mtpt->{"path"} = get_arg_val("path", \@_);
+    $mtpt->{"mds"} = get_arg_val("mds", \@_);
+    $mtpt->{"lov"} = get_arg_val("lov", \@_);
+    if ($#_ > 0) {
+        print STDERR "Unknown arguments to \"--add mtpt\": @_\n";
+        exit(1);
+    }
+    return $mtpt;
+}
+
+no strict 'refs';
+
+sub find_obj {
+    my $type = shift;
+    my $key = shift;
+    my $value = shift;
+    my @objs = @_;
+
+    foreach my $obj (@objs) {
+        if ($obj->{$key} eq $value) {
+            return $obj;
+        }
+    }
+}
+
+sub lnet_options {
+    my $net = shift;
+
+    my $options_str = "options lnet networks=" . $net->{"nettype"} .
+                   " accept=all";
+    if (defined($net->{"port"})) {
+        $options_str .= " accept_port=" . $net->{"port"};
+    }
+    return $options_str;
+
+}
+
+# main
+
+my %objs;
+my @mgses;
+
+my $MOUNTPT = "/mnt";
+if (defined($ENV{"MOUNTPT"})) {
+    $MOUNTPT = $ENV{"MOUNTPT"};
+}
+
+while(<>) {
+    my @args = split;
+
+    for (my $i = 0; $i <= $#args; $i++) {
+        if ($args[$i] eq "--add") {
+            my $type = "$args[$i + 1]";
+            my $subref = "add_$type";
+            splice(@args, $i, 2);
+            push(@{$objs{$type}}, &$subref(@args));
+            last;
+        }
+        if ($i == $#args) {
+            print STDERR "I don't know how to handle @args\n";
+            exit(1);
+        }
+    }
+}
+
+# link lovs to mdses
+foreach my $lov (@{$objs{"lov"}}) {
+    my $mds = find_obj("mds", "mds", $lov->{"mds"}, @{$objs{"mds"}});
+    $mds->{"lov"} = $lov;
+}
+# XXX could find failover pairs of osts and mdts here and link them to
+# one another and then fill in their details in the csv generators below
+foreach my $mds (@{$objs{"mds"}}) {
+    # find the net for this node
+    my $net = find_obj("net", "node", $mds->{"node"}, @{$objs{"net"}});
+    my $lov = $mds->{"lov"};
+    my $mkfs_options="";
+    if (defined($lov->{"stripe_sz"})) {
+        $mkfs_options .= "lov.stripe.size=" . $lov->{"stripe_sz"} . ",";
+    }
+    if (defined($lov->{"stripe_cnt"})) {
+        $mkfs_options .= "lov.stripe.count=" . $lov->{"stripe_cnt"} . ",";
+    }
+    if (defined($lov->{"stripe_pattern"})) {
+        $mkfs_options .= "lov.stripe.pattern=" . $lov->{"stripe_pattern"} . ",";
+    }
+    chop($mkfs_options);
+    if ($mkfs_options ne "") {
+        $mkfs_options = " --param=\"$mkfs_options\"";
+    }
+
+    printf "%s,%s,%s,$MOUNTPT/%s,mgs|mdt,,,,--device-size=%s --noformat%s,,noauto\n", 
+        $mds->{"node"},
+        lnet_options($net),
+        $mds->{"dev"},
+        $mds->{"mds"},
+        $mds->{"size"},
+        $mkfs_options;
+
+    push(@mgses, $net->{"nid"});
+}
+
+foreach my $ost (@{$objs{"ost"}}) {
+    # find the net for this node
+    my $mount_opts="noauto";
+    if (defined($ost->{"mountfsoptions"})) {
+        $mount_opts .= "," . $ost->{"mountfsoptions"};
+    }
+    my $net = find_obj("net", "node", $ost->{"node"}, @{$objs{"net"}});
+    printf "%s,%s,%s,$MOUNTPT/%s,ost,,\"%s\",,--device-size=%s --noformat,,\"%s\"\n", 
+    $ost->{"node"},
+    lnet_options($net),
+    $ost->{"dev"},
+    $ost->{"ost"},
+    join(",", @mgses),
+    $ost->{"size"},
+    $mount_opts;
+}
diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre

index 3b6b640..bb59d1e 100755 (executable)
--- a/lustre/scripts/lustre
+++ b/lustre/scripts/lustre
@@ -30,6 +30,27 @@ SERVICE=${0##*/}
  : ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml}
  : ${LCONF:=/usr/sbin/lconf}
  : ${LCTL:=/usr/sbin/lctl}
+# Some distros use modprobe.conf.local
+if [ -f /etc/modprobe.conf.local ]; then
+   : ${MODPROBE_CONF:=/etc/modprobe.conf.local}
+else
+   : ${MODPROBE_CONF:=/etc/modprobe.conf}
+fi
+# Be sure the proper directories are in PATH. 
+export PATH="/sbin:$PATH"
+
+case "$SERVICE" in
+    [SK][[:digit:]][[:digit:]]lustre | lustre)
+        SERVICE="lustre"
+       : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
+       : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
+       ;;
+    *)
+       : ${LCONF_START_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} ${LUSTRE_CONFIG_XML}"}
+       : ${LCONF_STOP_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} --failover --cleanup ${LUSTRE_CONFIG_XML}"}
+       ;;
+esac
+LOCK=/var/lock/subsys/$SERVICE
  
  case "$SERVICE" in
      [SK][[:digit:]][[:digit:]]lustre | lustre)
@@ -101,7 +122,14 @@ EOF
                 RETVAL=4 # insufficent privileges
                 return
         fi
-       ${LCONF} ${LCONF_START_ARGS}
+       # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
+       ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s/\\[:space:]\*\n//g' | grep lnet | grep forwarding=\"enabled\"`
+       if [[ ! -z ${ROUTER} ]]; then
+               modprobe lnet
+               ${LCTL} network configure
+       else
+               ${LCONF} ${LCONF_START_ARGS}
+       fi
         RETVAL=$?
         echo $SERVICE
         if [ $RETVAL -eq 0 ]; then
@@ -120,7 +148,20 @@ stop() {
                 RETVAL=4 # insufficent privileges
                 return
         fi
-       ${LCONF} ${LCONF_STOP_ARGS}
+       # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
+       ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s/\\[:space:]\*\n//g' | grep lnet | grep forwarding=\"enabled\"`
+       if [[ ! -z ${ROUTER} ]]; then
+               MODULE_LOADED=`lsmod | awk ' { print $1 } ' | grep lnet`
+               if [[ ! -z ${MODULE_LOADED} ]]; then
+                       ${LCTL} network unconfigure
+               fi
+               ${LCTL} modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
+               # do it again, in case we tried to unload ksocklnd too early
+               ${LCTL} modules | awk '{ print $2 }' | xargs rmmod
+
+       else
+               ${LCONF} ${LCONF_STOP_ARGS}
+       fi
         RETVAL=$?
         echo $SERVICE
         rm -f $LOCK 
@@ -150,13 +191,24 @@ status() {
         [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0
         
         # check for any configured devices (may indicate partial startup)
-       [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150
+       if [ -d /proc/fs/lustre ]; then
+               [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150
  
-       # check for either a server or a client filesystem
-       MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
-       OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`"
-       LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
-       [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0
+               # check for either a server or a client filesystem
+               MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
+               OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`"
+               LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
+               [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0
+       else
+               # check if this is a router
+               if [ -d /proc/sys/lnet ]; then
+                       ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`"
+                       if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
+                               STATE="running"
+                               RETVAL=0
+                       fi
+               fi
+       fi
  
         # check for server disconnections 
         DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
diff --git a/lustre/scripts/lustre_config.sh.in b/lustre/scripts/lustre_config.sh.in

new file mode 100644 (file)

index 0000000..d703029
--- /dev/null
+++ b/lustre/scripts/lustre_config.sh.in
@@ -0,0 +1,1183 @@
+#!/bin/bash
+
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+
+#
+# lustre_config.sh - format and set up multiple lustre servers from a csv file
+#
+# This script is used to parse each line of a spreadsheet (csv file) and 
+# execute remote commands to format (mkfs.lustre) every Lustre target 
+# that will be part of the Lustre cluster.
+# 
+# In addition, it can also verify the network connectivity and hostnames in 
+# the cluster, configure Linux MD/LVM devices and produce High-Availability
+# software configurations for Heartbeat or CluManager.
+#
+################################################################################
+
+# Usage
+usage() {
+    cat >&2 <<EOF
+
+Usage:  `basename $0` [-t HAtype] [-n] [-d] [-f] [-m] [-h] [-v] <csv file>
+
+    This script is used to format and set up multiple lustre servers from a
+    csv file.
+
+    -h          help and examples
+    -t HAtype   produce High-Availability software configurations
+                The argument following -t is used to indicate the High-
+                Availability software type. The HA software types which 
+                are currently supported are: hbv1 (Heartbeat version 1)
+                and hbv2 (Heartbeat version 2).
+    -n          no net - don't verify network connectivity and hostnames
+                in the cluster
+    -d          configure Linux MD/LVM devices before formatting the
+                Lustre targets
+    -f          force-format the Lustre targets using --reformat option
+    -m          no fstab change - don't modify /etc/fstab to add the new
+                Lustre targets
+                If using this option, then the value of "mount options"
+                item in the csv file will be passed to mkfs.lustre, else
+                the value will be added into the /etc/fstab.
+    -v          verbose mode
+    csv file    a spreadsheet that contains configuration parameters
+                (separated by commas) for each target in a Lustre cluster
+
+EOF
+    exit 1
+}
+
+# Samples 
+sample() {
+    cat <<EOF
+
+This script is used to parse each line of a spreadsheet (csv file) and 
+execute remote commands to format (mkfs.lustre) every Lustre target 
+that will be part of the Lustre cluster.
+
+It can also optionally: 
+ * verify the network connectivity and hostnames in the cluster
+ * configure Linux MD/LVM devices
+ * modify /etc/modprobe.conf to add Lustre networking info
+ * add the Lustre server info to /etc/fstab
+ * produce configurations for Heartbeat or CluManager.
+
+There are 5 kinds of line formats in the csv file. They represent the following 
+targets:
+1) Linux MD device
+The format is:
+hostname,MD,md name,operation mode,options,raid level,component devices
+
+hostname            hostname of the node in the cluster
+MD                  marker of MD device line
+md name             MD device name, e.g. /dev/md0
+operation mode      create or remove, default is create
+options             a "catchall" for other mdadm options, e.g. "-c 128"
+raid level          raid level: 0,1,4,5,6,10,linear and multipath
+component devices   block devices to be combined into the MD device
+                    Multiple devices are separated by space or by using
+                    shell expansions, e.g. "/dev/sd{a,b,c}"
+
+2) Linux LVM PV (Physical Volume)
+The format is:
+hostname,PV,pv names,operation mode,options
+
+hostname            hostname of the node in the cluster
+PV                  marker of PV line
+pv names            devices or loopback files to be initialized for later
+                    use by LVM or to be wiped the label, e.g. /dev/sda
+                    Multiple devices or files are separated by space or by
+                    using shell expansions, e.g. "/dev/sd{a,b,c}"
+operation mode      create or remove, default is create
+options             a "catchall" for other pvcreate/pvremove options
+                    e.g. "-vv"
+
+3) Linux LVM VG (Volume Group)
+The format is:
+hostname,VG,vg name,operation mode,options,pv paths
+
+hostname            hostname of the node in the cluster
+VG                  marker of VG line
+vg name             name of the volume group, e.g. ost_vg
+operation mode      create or remove, default is create
+options             a "catchall" for other vgcreate/vgremove options
+                    e.g. "-s 32M"
+pv paths            physical volumes to construct this VG, required by
+                    create mode
+                    Multiple PVs are separated by space or by using
+                    shell expansions, e.g. "/dev/sd[k-m]1"
+
+4) Linux LVM LV (Logical Volume)
+The format is:
+hostname,LV,lv name,operation mode,options,lv size,vg name
+
+hostname            hostname of the node in the cluster
+LV                  marker of LV line
+lv name             name of the logical volume to be created (optional)
+                    or path of the logical volume to be removed (required
+                    by remove mode)
+operation mode      create or remove, default is create
+options             a "catchall" for other lvcreate/lvremove options
+                    e.g. "-i 2 -I 128"
+lv size             size [kKmMgGtT] to be allocated for the new LV
+                    Default unit is megabytes.
+vg name             name of the VG in which the new LV will be created
+
+5) Lustre target
+The format is:
+hostname,module_opts,device name,mount point,device type,fsname,mgs nids,index,
+format options,mkfs options,mount options,failover nids
+
+hostname            hostname of the node in the cluster, must match "uname -n"
+module_opts         Lustre networking module options
+device name         Lustre target (block device or loopback file)
+mount point         Lustre target mount point
+device type         Lustre target type (mgs, mdt, ost, mgs|mdt, mdt|mgs)
+fsname              Lustre filesystem name, should be limited to 8 characters 
+                    Default is "lustre".
+mgs nids            NID(s) of remote mgs node, required for mdt and ost targets
+                    If this item is not given for an mdt, it is assumed that
+                    the mdt will also be an mgs, according to mkfs.lustre.
+index               Lustre target index
+format options      a "catchall" contains options to be passed to mkfs.lustre
+                    "--device-size", "--param", etc. all goes into this item.
+mkfs options        format options to be wrapped with --mkfsoptions="" and
+                    passed to mkfs.lustre
+mount options       If this script is invoked with "-m" option, then the value of
+                    this item will be wrapped with --mountfsoptions="" and passed
+                    to mkfs.lustre, else the value will be added into /etc/fstab.
+failover nids       NID(s) of failover partner node
+
+All the NIDs in one node are delimited by commas (','). When multiple nodes are
+specified, they are delimited by a colon (':').
+
+Items left blank will be set to defaults.
+
+Example 1 - Simple, with combo MGS/MDT:
+-------------------------------------------------------------------------------
+# combo mdt/mgs
+lustre-mgs,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--device-size=10240
+
+# ost0
+lustre-ost,options lnet networks=tcp,/tmp/ost0,/mnt/ost0,ost,,lustre-mgs@tcp0,,--device-size=10240
+
+# ost1
+lustre-ost,options lnet networks=tcp,/tmp/ost1,/mnt/ost1,ost,,lustre-mgs@tcp0,,--device-size=10240
+-------------------------------------------------------------------------------
+
+Example 2 - Separate MGS/MDT, two networks interfaces:
+-------------------------------------------------------------------------------
+# mgs
+lustre-mgs1,options lnet 'networks="tcp,elan"',/dev/sda,/mnt/mgs,mgs,,,,--quiet --param="sys.timeout=50",,"defaults,noauto","lustre-mgs2,2@elan"
+
+# mdt
+lustre-mdt1,options lnet 'networks="tcp,elan"',/dev/sdb,/mnt/mdt,mdt,lustre2,"lustre-mgs1,1@elan:lustre-mgs2,2@elan",,--quiet --param="lov.stripe.size=4194304",-J size=16,"defaults,noauto",lustre-mdt2
+
+# ost
+lustre-ost1,options lnet 'networks="tcp,elan"',/dev/sdc,/mnt/ost,ost,lustre2,"lustre-mgs1,1@elan:lustre-mgs2,2@elan",,--quiet,-I 512,"defaults,noauto",lustre-ost2
+-------------------------------------------------------------------------------
+
+Example 3 - with combo MGS/MDT failover pair and OST failover pair:
+-------------------------------------------------------------------------------
+# combo mgs/mdt
+lustre-mgs1,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--quiet --device-size=10240,,,lustre-mgs2@tcp0
+
+# combo mgs/mdt backup (--noformat)
+lustre-mgs2,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--quiet --device-size=10240 --noformat,,,lustre-mgs1@tcp0
+
+# ost
+lustre-ost1,options lnet networks=tcp,/tmp/ost1,/mnt/ost1,ost,,"lustre-mgs1@tcp0:lustre-mgs2@tcp0",,--quiet --device-size=10240,,,lustre-ost2@tcp0
+
+# ost backup (--noformat) (note different device name)
+lustre-ost2,options lnet networks=tcp,/tmp/ost2,/mnt/ost2,ost,,"lustre-mgs1@tcp0:lustre-mgs2@tcp0",,--quiet --device-size=10240 --noformat,,,lustre-ost1@tcp0
+-------------------------------------------------------------------------------
+
+Example 4 - Configure Linux MD/LVM devices before formatting Lustre targets:
+-------------------------------------------------------------------------------
+# MD device on mgsnode
+mgsnode,MD,/dev/md0,,-q,1,/dev/sda1 /dev/sdb1
+
+# MD/LVM devices on ostnode
+ostnode,MD,/dev/md0,,-q -c 128,5,"/dev/sd{a,b,c}"
+ostnode,MD,/dev/md1,,-q -c 128,5,"/dev/sd{d,e,f}"
+ostnode,PV,/dev/md0 /dev/md1
+ostnode,VG,ost_vg,,-s 32M,/dev/md0 /dev/md1
+ostnode,LV,ost0,,-i 2 -I 128,300G,ost_vg
+ostnode,LV,ost1,,-i 2 -I 128,300G,ost_vg
+
+# combo mgs/mdt
+mgsnode,options lnet networks=tcp,/dev/md0,/mnt/mgs,mgs|mdt,,,,--quiet
+
+# ost0
+ostnode,options lnet networks=tcp,/dev/ost_vg/ost0,/mnt/ost0,ost,,mgsnode,,--quiet
+
+# ost1
+ostnode,options lnet networks=tcp,/dev/ost_vg/ost1,/mnt/ost1,ost,,mgsnode,,--quiet
+-------------------------------------------------------------------------------
+
+EOF
+    exit 0
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+#***************************** Global variables *****************************#
+declare -a MGS_NODENAME             # node names of the MGS servers
+declare -a MGS_IDX                  # indexes of MGSs in the global arrays
+declare -i MGS_NUM                  # number of MGS servers in the cluster
+declare -i INIT_IDX
+
+declare -a NODE_NAMES               # node names in the failover group
+declare -a TARGET_OPTS              # target services in one failover group
+
+# All the items in the csv file
+declare -a HOST_NAME MODULE_OPTS DEVICE_NAME MOUNT_POINT DEVICE_TYPE FS_NAME
+declare -a MGS_NIDS INDEX FORMAT_OPTIONS MKFS_OPTIONS MOUNT_OPTIONS FAILOVERS
+
+# Corresponding to MGS_NIDS and FAILOVERS arrays,
+# IP addresses in which were converted to hostnames
+declare -a MGS_NIDS_NAMES FAILOVERS_NAMES
+
+VERIFY_CONNECT=true
+CONFIG_MD_LVM=false
+MODIFY_FSTAB=true
+VERBOSE_OUTPUT=false
+# Get and check the positional parameters
+while getopts "t:ndfmhv" OPTION; do
+    case $OPTION in
+    t)
+        HATYPE_OPT=$OPTARG
+        if [ "${HATYPE_OPT}" != "${HBVER_HBV1}" ] \
+        && [ "${HATYPE_OPT}" != "${HBVER_HBV2}" ] \
+        && [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
+            echo >&2 $"`basename $0`: Invalid HA software type" \
+                      "- ${HATYPE_OPT}!"
+            usage
+        fi
+        ;;
+    n)
+        VERIFY_CONNECT=false
+        ;;
+    d)
+        CONFIG_MD_LVM=true
+        ;;
+    f)
+        REFORMAT_OPTION=$"--reformat "
+        ;;
+    m)
+        MODIFY_FSTAB=false
+        ;;
+    h)
+        sample
+        ;;
+    v)
+        VERBOSE_OPT=$" -v"
+        VERBOSE_OUTPUT=true
+        ;;
+    ?)
+        usage 
+    esac
+done
+
+# Toss out the parameters we've already processed
+shift  `expr $OPTIND - 1`
+
+# Here we expect the csv file
+if [ $# -eq 0 ]; then
+    echo >&2 $"`basename $0`: Missing csv file!"
+    usage
+fi
+
+# Check the items required for OSTs, MDTs and MGS
+#
+# When formatting an OST, the following items: hostname, module_opts,
+# device name, device type and mgs nids, cannot have null value.
+#
+# When formatting an MDT or MGS, the following items: hostname,
+# module_opts, device name and device type, cannot have null value.
+check_item() {
+    # Check argument
+    if [ $# -eq 0 ]; then
+        echo >&2 $"`basename $0`: check_item() error: Missing argument"\
+                  "for function check_item()!"
+        return 1
+    fi
+
+    declare -i i=$1
+
+    # Check hostname, module_opts, device name and device type
+    if [ -z "${HOST_NAME[i]}" ]||[ -z "${MODULE_OPTS[i]}" ]\
+    ||[ -z "${DEVICE_NAME[i]}" ]||[ -z "${DEVICE_TYPE[i]}" ]; then
+        echo >&2 $"`basename $0`: check_item() error: Some required"\
+                  "item has null value! Check hostname, module_opts,"\
+                  "device name and device type!"
+        return 1
+    fi
+
+    # Check mgs nids
+    if [ "${DEVICE_TYPE[i]}" = "ost" ]&&[ -z "${MGS_NIDS[i]}" ]; then
+        echo >&2 $"`basename $0`: check_item() error: OST's mgs nids"\
+                  "item has null value!"
+        return 1
+    fi
+
+    # Check mount point
+    if [ -z "${MOUNT_POINT[i]}" ]; then
+        echo >&2 $"`basename $0`: check_item() error: mount"\
+                  "point item of target ${DEVICE_NAME[i]} has null value!"
+        return 1
+    fi
+
+    return 0
+}
+
+# Get the number of MGS nodes in the cluster
+get_mgs_num() {
+    INIT_IDX=0
+    MGS_NUM=${#MGS_NODENAME[@]}
+    [ -z "${MGS_NODENAME[0]}" ] && let "INIT_IDX += 1" \
+    && let "MGS_NUM += 1"
+}
+
+# is_mgs_node hostname
+# Verify whether @hostname is a MGS node
+is_mgs_node() {
+    local host_name=$1
+    declare -i i
+
+    get_mgs_num
+    for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
+        [ "${MGS_NODENAME[i]}" = "${host_name}" ] && return 0
+    done
+
+    return 1
+}
+
+# Check whether the MGS nodes are in the same failover group
+check_mgs_group() {
+    declare -i i
+    declare -i j
+    declare -i idx
+    local mgs_node
+
+    get_mgs_num
+    for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
+        mgs_node=${MGS_NODENAME[i]}
+        for ((j = ${INIT_IDX}; j < ${MGS_NUM}; j++)); do
+          [ "${MGS_NODENAME[j]}" = "${mgs_node}" ] && continue 1
+
+          idx=${MGS_IDX[j]}
+          if [ "${FAILOVERS_NAMES[idx]#*$mgs_node*}" = "${FAILOVERS_NAMES[idx]}" ]
+          then
+            echo >&2 $"`basename $0`: check_mgs_group() error:"\
+            "MGS node ${mgs_node} is not in the ${HOST_NAME[idx]}"\
+            "failover group!"
+            return 1
+          fi
+        done
+    done
+
+    return 0
+}
+
+# Get and check MGS servers.
+# There should be no more than one MGS specified in the entire csv file.
+check_mgs() {
+    declare -i i
+    declare -i j
+    declare -i exp_idx    # Index of explicit MGS servers
+    declare -i imp_idx    # Index of implicit MGS servers
+    local is_exp_mgs is_imp_mgs
+    local mgs_node
+
+    # Initialize the MGS_NODENAME and MGS_IDX arrays
+    unset MGS_NODENAME
+    unset MGS_IDX
+
+    exp_idx=1
+    imp_idx=1
+    for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+        is_exp_mgs=false
+        is_imp_mgs=false
+
+        # Check whether this node is an explicit MGS node 
+        # or an implicit one
+        if [ "${DEVICE_TYPE[i]#*mgs*}" != "${DEVICE_TYPE[i]}" ]; then
+            verbose_output "Explicit MGS target" \
+            "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
+            is_exp_mgs=true
+        fi
+
+        if [ "${DEVICE_TYPE[i]}" = "mdt" -a -z "${MGS_NIDS[i]}" ]; then
+            verbose_output "Implicit MGS target" \
+            "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
+            is_imp_mgs=true
+        fi
+
+        # Get and check MGS servers
+        if ${is_exp_mgs} || ${is_imp_mgs}; then
+            # Check whether more than one MGS target in one MGS node
+            if is_mgs_node ${HOST_NAME[i]}; then
+                echo >&2 $"`basename $0`: check_mgs() error:"\
+                "More than one MGS target in the same node -"\
+                "\"${HOST_NAME[i]}\"!"
+                return 1
+            fi
+
+            # Get and check primary MGS server and backup MGS server        
+            if [ "${FORMAT_OPTIONS[i]}" = "${FORMAT_OPTIONS[i]#*noformat*}" ]
+            then
+                # Primary MGS server
+                if [ -z "${MGS_NODENAME[0]}" ]; then
+                    if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
+                    || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
+                        echo >&2 $"`basename $0`: check_mgs() error:"\
+                        "There exist both explicit and implicit MGS"\
+                        "targets in the csv file!"
+                        return 1
+                    fi
+                    MGS_NODENAME[0]=${HOST_NAME[i]}
+                    MGS_IDX[0]=$i
+                else
+                    mgs_node=${MGS_NODENAME[0]}
+                    if [ "${FAILOVERS_NAMES[i]#*$mgs_node*}" = "${FAILOVERS_NAMES[i]}" ]
+                    then
+                        echo >&2 $"`basename $0`: check_mgs() error:"\
+                        "More than one primary MGS nodes in the csv" \
+                        "file - ${MGS_NODENAME[0]} and ${HOST_NAME[i]}!"
+                    else
+                        echo >&2 $"`basename $0`: check_mgs() error:"\
+                        "MGS nodes ${MGS_NODENAME[0]} and ${HOST_NAME[i]}"\
+                        "are failover pair, one of them should use"\
+                        "\"--noformat\" in the format options item!"
+                    fi
+                    return 1
+                fi
+            else    # Backup MGS server
+                if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
+                || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
+                    echo >&2 $"`basename $0`: check_mgs() error:"\
+                    "There exist both explicit and implicit MGS"\
+                    "targets in the csv file!"
+                    return 1
+                fi
+
+                if ${is_exp_mgs}; then # Explicit MGS
+                    MGS_NODENAME[exp_idx]=${HOST_NAME[i]}
+                    MGS_IDX[exp_idx]=$i
+                    exp_idx=$(( exp_idx + 1 ))
+                else    # Implicit MGS
+                    MGS_NODENAME[imp_idx]=${HOST_NAME[i]}
+                    MGS_IDX[imp_idx]=$i
+                    imp_idx=$(( imp_idx + 1 ))
+                fi
+            fi
+        fi #End of "if ${is_exp_mgs} || ${is_imp_mgs}"
+    done
+
+    # Check whether the MGS nodes are in the same failover group
+    if ! check_mgs_group; then
+        return 1
+    fi
+
+    return 0
+}
+
+# Construct the command line of mkfs.lustre
+construct_mkfs_cmdline() {
+    # Check argument
+    if [ $# -eq 0 ]; then
+        echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
+                  "Missing argument for function construct_mkfs_cmdline()!"
+        return 1
+    fi
+
+    declare -i i=$1
+    local mgsnids mgsnids_str
+    local failnids failnids_str
+
+    MKFS_CMD=${MKFS}$" "
+    MKFS_CMD=${MKFS_CMD}${REFORMAT_OPTION}
+
+    case "${DEVICE_TYPE[i]}" in
+    "ost")
+        MKFS_CMD=${MKFS_CMD}$"--ost "
+        ;;
+    "mdt")
+        MKFS_CMD=${MKFS_CMD}$"--mdt "
+        ;;
+    "mgs")
+        MKFS_CMD=${MKFS_CMD}$"--mgs "
+        ;;
+    "mdt|mgs" | "mgs|mdt")
+        MKFS_CMD=${MKFS_CMD}$"--mdt --mgs "
+        ;;
+    *)
+        echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
+                  "Invalid device type - \"${DEVICE_TYPE[i]}\"!"
+        return 1
+        ;;
+    esac
+
+    if [ -n "${FS_NAME[i]}" ]; then
+        MKFS_CMD=${MKFS_CMD}$"--fsname="${FS_NAME[i]}$" "
+    fi
+
+    if [ -n "${MGS_NIDS[i]}" ]; then
+        mgsnids_str=${MGS_NIDS[i]}
+        for mgsnids in ${mgsnids_str//:/ }; do
+            MKFS_CMD=${MKFS_CMD}$"--mgsnode="${mgsnids}$" "
+        done
+    fi
+
+    if [ -n "${INDEX[i]}" ]; then
+        MKFS_CMD=${MKFS_CMD}$"--index="${INDEX[i]}$" "
+    fi
+
+    if [ -n "${FORMAT_OPTIONS[i]}" ]; then
+        MKFS_CMD=${MKFS_CMD}${FORMAT_OPTIONS[i]}$" "
+    fi
+
+    if [ -n "${MKFS_OPTIONS[i]}" ]; then
+        MKFS_CMD=${MKFS_CMD}$"--mkfsoptions="$"\""${MKFS_OPTIONS[i]}$"\""$" "
+    fi
+
+    if [ -n "${MOUNT_OPTIONS[i]}" ]; then
+        if ! ${MODIFY_FSTAB}; then
+            MKFS_CMD=${MKFS_CMD}$"--mountfsoptions="$"\""${MOUNT_OPTIONS[i]}$"\""$" "
+        fi
+    fi
+
+    if [ -n "${FAILOVERS[i]}" ]; then
+        failnids_str=${FAILOVERS[i]}
+        for failnids in ${failnids_str//:/ }; do
+            MKFS_CMD=${MKFS_CMD}$"--failnode="${failnids}$" "
+        done
+    fi
+
+    MKFS_CMD=${MKFS_CMD}${DEVICE_NAME[i]}
+    return 0
+} 
+
+# Get all the node names in this failover group
+get_nodenames() {
+    # Check argument
+    if [ $# -eq 0 ]; then
+        echo >&2 $"`basename $0`: get_nodenames() error: Missing"\
+                  "argument for function get_nodenames()!"
+        return 1
+    fi
+
+    declare -i i=$1
+    declare -i idx
+    local nids
+
+    # Initialize the NODE_NAMES array
+    unset NODE_NAMES
+
+    NODE_NAMES[0]=${HOST_NAME[i]}
+
+    idx=1
+    for nids in ${FAILOVERS_NAMES[i]//:/ }
+    do
+        NODE_NAMES[idx]=$(nids2hostname ${nids})
+        if [ $? -ne 0 ]; then
+            echo >&2 "${NODE_NAMES[idx]}"
+            return 1
+        fi
+    
+        idx=$idx+1
+    done
+
+    return 0
+}
+
+# Verify whether the format line has HA items
+is_ha_line() {
+    declare -i i=$1
+
+    [ -n "${FAILOVERS[i]}" ] && return 0
+
+    return 1
+}
+
+# Produce HA software's configuration files
+gen_ha_config() {
+    declare -i i=$1
+    declare -i idx
+    local  cmd_line
+
+    # Prepare parameters
+    # Hostnames option
+    HOSTNAME_OPT=${HOST_NAME[i]}
+
+    if ! get_nodenames $i; then
+        echo >&2 $"`basename $0`: gen_ha_config() error: Can not get the"\
+        "failover nodenames from failover nids - \"${FAILOVERS[i]}\" in"\
+        "the \"${HOST_NAME[i]}\" failover group!"
+        return 1
+    fi
+
+    for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do
+        HOSTNAME_OPT=${HOSTNAME_OPT}$":"${NODE_NAMES[idx]}
+    done
+
+    # Target devices option
+    DEVICE_OPT=" -d "${TARGET_OPTS[0]}
+    for ((idx = 1; idx < ${#TARGET_OPTS[@]}; idx++)); do
+        DEVICE_OPT=${DEVICE_OPT}" -d "${TARGET_OPTS[idx]}
+    done
+
+    # Construct the generation script command line
+    case "${HATYPE_OPT}" in
+    "${HBVER_HBV1}"|"${HBVER_HBV2}")    # Heartbeat 
+        cmd_line=${GEN_HB_CONFIG}$" -r ${HATYPE_OPT} -n ${HOSTNAME_OPT}"
+        cmd_line=${cmd_line}${DEVICE_OPT}${VERBOSE_OPT}
+        ;;
+    "${HATYPE_CLUMGR}")                 # CluManager
+        cmd_line=${GEN_CLUMGR_CONFIG}$" -n ${HOSTNAME_OPT}"
+        cmd_line=${cmd_line}${DEVICE_OPT}${VERBOSE_OPT}
+        ;;
+    esac
+    
+    # Execute script to generate HA software's configuration files
+    verbose_output "Generating HA software's configurations in"\
+               "${HOST_NAME[i]} failover group..."
+    verbose_output "${cmd_line}"
+    eval $(echo "${cmd_line}")
+    if [ $? -ne 0 ]; then
+        return 1
+    fi
+    verbose_output "Generate HA software's configurations in"\
+               "${HOST_NAME[i]} failover group OK"
+    
+    return 0
+}
+
+# Configure HA software
+config_ha() {
+    if [ -z "${HATYPE_OPT}" ]; then
+        return 0
+    fi
+
+    declare -i i j k
+    declare -i prim_idx         # Index for PRIM_HOSTNAMES array
+    declare -i target_idx       # Index for TARGET_OPTS and HOST_INDEX arrays
+
+    declare -a PRIM_HOSTNAMES   # Primary hostnames in all the failover
+                                # groups in the lustre cluster
+    declare -a HOST_INDEX       # Indices for the same node in all the 
+                                # format lines in the csv file
+    local prim_host
+
+    # Initialize the PRIM_HOSTNAMES array
+    prim_idx=0
+    unset PRIM_HOSTNAMES
+
+    # Get failover groups and generate HA configuration files
+    for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+        prim_host=${HOST_NAME[i]}
+
+        for ((j = 0; j < ${#PRIM_HOSTNAMES[@]}; j++)); do
+            [ "${prim_host}" = "${PRIM_HOSTNAMES[j]}" ] && continue 2
+        done
+
+        target_idx=0
+        unset HOST_INDEX
+        unset TARGET_OPTS
+        for ((k = 0; k < ${#HOST_NAME[@]}; k++)); do
+            if [ "${prim_host}" = "${HOST_NAME[k]}" ] && is_ha_line "${k}"
+            then
+                HOST_INDEX[target_idx]=$k
+                TARGET_OPTS[target_idx]=${DEVICE_NAME[k]}:${MOUNT_POINT[k]}
+                target_idx=$(( target_idx + 1 ))
+            fi
+        done
+
+        if [ ${#TARGET_OPTS[@]} -ne 0 ]; then
+            PRIM_HOSTNAMES[prim_idx]=${prim_host}
+            prim_idx=$(( prim_idx + 1 ))
+
+            if ! gen_ha_config ${HOST_INDEX[0]}; then
+                return 1
+            fi
+        fi
+    done
+
+    if [ ${#PRIM_HOSTNAMES[@]} -eq 0 ]; then
+        verbose_output "There are no \"failover nids\" items in the"\
+        "csv file. No HA configuration files are generated!"
+    fi
+
+    rm -rf ${TMP_DIRS}
+    return 0
+}
+
+
+# Get all the items in the csv file and do some checks.
+get_items() {
+    # Check argument
+    if [ $# -eq 0 ]; then
+        echo >&2 $"`basename $0`: get_items() error: Missing argument"\
+                  "for function get_items()!"
+        return 1
+    fi
+
+    CSV_FILE=$1
+    local LINE
+    local marker
+    declare -i line_num=0
+    declare -i idx=0
+
+    while read -r LINE; do
+        line_num=${line_num}+1
+        # verbose_output "Parsing line ${line_num}: $LINE"
+
+        # Get rid of the empty line
+        if [ -z "`echo ${LINE}|awk '/[[:alnum:]]/ {print $0}'`" ]; then
+            continue
+        fi
+
+        # Get rid of the comment line
+        if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ]
+        then
+            continue
+        fi
+
+        # Skip the Linux MD/LVM line
+        marker=`echo ${LINE} | awk -F, '{print $2}'`
+        if [ "${marker}" = "${MD_MARKER}" -o "${marker}" = "${PV_MARKER}" ] \
+        || [ "${marker}" = "${VG_MARKER}" -o "${marker}" = "${LV_MARKER}" ]; then
+            continue
+        fi
+
+        # Parse the config line into CONFIG_ITEM
+        if ! parse_line "$LINE"; then
+            echo >&2 $"`basename $0`: parse_line() error: Occurred"\
+                  "on line ${line_num} in ${CSV_FILE}: $LINE"
+            return 1    
+        fi
+
+        HOST_NAME[idx]=${CONFIG_ITEM[0]}
+        MODULE_OPTS[idx]=${CONFIG_ITEM[1]}
+        DEVICE_NAME[idx]=${CONFIG_ITEM[2]}
+        MOUNT_POINT[idx]=${CONFIG_ITEM[3]}
+        DEVICE_TYPE[idx]=${CONFIG_ITEM[4]}
+        FS_NAME[idx]=${CONFIG_ITEM[5]}
+        MGS_NIDS[idx]=${CONFIG_ITEM[6]}
+        INDEX[idx]=${CONFIG_ITEM[7]}
+        FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]}
+        MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]}
+        MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]}
+        FAILOVERS[idx]=${CONFIG_ITEM[11]}
+
+        MODULE_OPTS[idx]=`echo "${MODULE_OPTS[idx]}" | sed 's/"/\\\"/g'`
+
+        # Convert IP addresses in NIDs to hostnames
+        MGS_NIDS_NAMES[idx]=$(ip2hostname_multi_node ${MGS_NIDS[idx]})
+        if [ $? -ne 0 ]; then
+            echo >&2 "${MGS_NIDS_NAMES[idx]}"
+            return 1
+        fi
+
+        FAILOVERS_NAMES[idx]=$(ip2hostname_multi_node ${FAILOVERS[idx]})
+        if [ $? -ne 0 ]; then
+            echo >&2 "${FAILOVERS_NAMES[idx]}"
+            return 1
+        fi
+
+        # Check some required items for formatting target
+        if ! check_item $idx; then
+            echo >&2 $"`basename $0`: check_item() error:"\
+                  "Occurred on line ${line_num} in ${CSV_FILE}."
+            return 1    
+        fi
+
+        idx=${idx}+1
+    done < ${CSV_FILE}
+
+    return 0
+}
+
+# check_lnet_connect hostname_index mgs_hostname
+# Check whether the target node can contact the MGS node @mgs_hostname
+# If @mgs_hostname is null, then it means the primary MGS node
+check_lnet_connect() {
+    declare -i i=$1
+    local mgs_node=$2
+
+    local COMMAND RET_STR
+    local mgs_prim_nids
+    local nids nids_names
+    local nids_str=
+    local mgs_nid 
+    local ping_mgs
+
+    # Execute remote command to check that 
+    # this node can contact the MGS node
+    verbose_output "Checking lnet connectivity between" \
+    "${HOST_NAME[i]} and the MGS node ${mgs_node}"
+    mgs_prim_nids=`echo ${MGS_NIDS[i]} | awk -F: '{print $1}'`
+
+    if [ -z "${mgs_node}" ]; then
+        nids_str=${mgs_prim_nids}    # nids of primary MGS node
+        if [ -z "${nids_str}" ]; then
+            echo >&2 $"`basename $0`: check_lnet_connect() error:"\
+            "Check the mgs nids item of host ${HOST_NAME[i]}!"\
+            "Missing nids of the primary MGS node!"
+            return 1
+        fi
+    else
+        for nids in ${MGS_NIDS[i]//:/ }; do
+            nids_names=$(ip2hostname_single_node ${nids})
+            if [ $? -ne 0 ]; then
+                echo >&2 "${nids_names}"
+                return 1
+            fi
+
+            [ "${nids_names}" != "${nids_names#*$mgs_node*}" ]\
+            && nids_str=${nids} # nids of backup MGS node
+        done
+        if [ -z "${nids_str}" ]; then
+            echo >&2 $"`basename $0`: check_lnet_connect() error:"\
+            "Check the mgs nids item of host ${HOST_NAME[i]}!"\
+            "Can not figure out which nids corresponding to the MGS"\
+            "node ${mgs_node} from \"${MGS_NIDS[i]}\"!"
+            return 1
+        fi
+    fi
+
+    ping_mgs=false
+    for mgs_nid in ${nids_str//,/ }
+    do
+        COMMAND=$"${LCTL} ping ${mgs_nid} 5 || echo failed 2>&1"
+        RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1`
+        if [ $? -eq 0 -a "${RET_STR}" = "${RET_STR#*failed*}" ]
+        then
+            # This node can contact the MGS node
+            verbose_output "${HOST_NAME[i]} can contact the MGS" \
+            "node ${mgs_node} by using nid \"${mgs_nid}\"!"
+            ping_mgs=true
+            break
+        fi
+    done
+
+    if ! ${ping_mgs}; then
+        echo >&2 "`basename $0`: check_lnet_connect() error:" \
+        "${HOST_NAME[i]} cannot contact the MGS node ${mgs_node}"\
+        "with nids - \"${nids_str}\"! Check ${LCTL} command!"
+        return 1
+    fi
+
+    return 0
+}
+
+# Start lnet network in the cluster node and check that 
+# this node can contact the MGS node
+check_lnet() {
+    if ! ${VERIFY_CONNECT}; then
+        return 0
+    fi
+
+    # Check argument
+    if [ $# -eq 0 ]; then
+        echo >&2 $"`basename $0`: check_lnet() error: Missing"\
+              "argument for function check_lnet()!"
+        return 1
+    fi
+
+    declare -i i=$1
+    declare -i j
+    local COMMAND RET_STR
+
+    # Execute remote command to start lnet network
+    verbose_output "Starting lnet network in ${HOST_NAME[i]}"
+    COMMAND=$"modprobe lnet; ${LCTL} network up 2>&1"
+    RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1`
+    if [ $? -ne 0 -o "${RET_STR}" = "${RET_STR#*LNET configured*}" ]
+    then
+        echo >&2 "`basename $0`: check_lnet() error: remote" \
+                 "${HOST_NAME[i]} error: ${RET_STR}"
+        return 1
+    fi
+
+    if is_mgs_node ${HOST_NAME[i]}; then
+        return 0
+    fi
+
+    # Execute remote command to check that 
+    # this node can contact the MGS node
+    for ((j = 0; j < ${MGS_NUM}; j++)); do
+        if ! check_lnet_connect $i ${MGS_NODENAME[j]}; then
+            return 1
+        fi
+    done
+
+    return 0
+}
+
+# Start lnet network in the MGS node
+start_mgs_lnet() {
+    declare -i i
+    declare -i idx
+    local COMMAND
+
+    if [ -z "${MGS_NODENAME[0]}" -a  -z "${MGS_NODENAME[1]}" ]; then
+        verbose_output "There is no MGS target in the ${CSV_FILE} file."
+        return 0
+    fi
+
+    for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
+        # Execute remote command to add lnet options lines to 
+        # the MGS node's modprobe.conf/modules.conf
+        idx=${MGS_IDX[i]}
+        COMMAND=$"echo \"${MODULE_OPTS[${idx}]}\"|${MODULE_CONFIG}"
+        verbose_output "Adding lnet module options to ${MGS_NODENAME[i]}"
+        ${REMOTE} ${MGS_NODENAME[i]} "${COMMAND}" >&2 
+        if [ $? -ne 0 ]; then
+            echo >&2 "`basename $0`: start_mgs_lnet() error:"\
+                 "Failed to execute remote command to" \
+                 "add module options to ${MGS_NODENAME[i]}!"\
+                 "Check ${MODULE_CONFIG}!"
+            return 1
+        fi
+
+        # Start lnet network in the MGS node
+        if ! check_lnet ${idx}; then
+            return 1    
+        fi
+    done
+
+    return 0
+}
+
+# Execute remote command to add lnet options lines to remote nodes'
+# modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets
+mass_config() {
+    local COMMAND
+    declare -a REMOTE_PID 
+    declare -a REMOTE_CMD 
+    declare -i pid_num=0
+    declare -i i=0
+
+    if [ ${#HOST_NAME[@]} -eq 0 ]; then
+        verbose_output "There are no Lustre targets to be formatted."
+        return 0
+    fi
+
+    # Start lnet network in the MGS node
+    if ! start_mgs_lnet; then
+        return 1    
+    fi
+
+    for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+        # Construct the command line of mkfs.lustre
+        if ! construct_mkfs_cmdline $i; then
+            return 1    
+        fi
+
+        # create the mount point on the node
+        COMMAND="mkdir -p ${MOUNT_POINT[i]}"
+        verbose_output "Creating the mount point ${MOUNT_POINT[i]} on" \
+                       "${HOST_NAME[i]}"
+        ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 
+        if [ $? -ne 0 ]; then
+            echo >&2 "`basename $0`: mass_config() error:"\
+                 "Failed to execute remote command to"\
+                 "create the mountpoint on ${HOST_NAME[i]}!"
+            return 1
+        fi
+
+        if ! is_mgs_node ${HOST_NAME[i]}; then
+            # Execute remote command to add lnet options lines to 
+            # modprobe.conf/modules.conf
+            COMMAND=$"echo \"${MODULE_OPTS[i]}\"|${MODULE_CONFIG}"
+            verbose_output "Adding lnet module options to" \
+                       "${HOST_NAME[i]}"
+            ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 
+            if [ $? -ne 0 ]; then
+                echo >&2 "`basename $0`: mass_config() error:"\
+                     "Failed to execute remote command to"\
+                     "add module options to ${HOST_NAME[i]}!"
+                return 1
+            fi
+
+            # Check lnet networks
+            if ! check_lnet $i; then
+                return 1    
+            fi
+        fi
+
+        # Execute remote command to format Lustre target
+        verbose_output "Formatting Lustre target ${DEVICE_NAME[i]} on ${HOST_NAME[i]}..."
+        REMOTE_CMD[${pid_num}]="${REMOTE} ${HOST_NAME[i]} \"(${EXPORT_PATH} ${MKFS_CMD})\""
+        verbose_output "Format command line is: ${REMOTE_CMD[${pid_num}]}"
+        ${REMOTE} ${HOST_NAME[i]} "(${EXPORT_PATH} ${MKFS_CMD})" >&2 &  
+        REMOTE_PID[${pid_num}]=$!
+        pid_num=${pid_num}+1
+        sleep 1
+    done
+
+    # Wait for the exit status of the background remote command
+    verbose_output "Waiting for the return of the remote command..."
+    fail_exit_status=false
+    for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
+        wait ${REMOTE_PID[${pid_num}]}
+        if [ $? -ne 0 ]; then
+            echo >&2 "`basename $0`: mass_config() error: Failed"\
+            "to execute \"${REMOTE_CMD[${pid_num}]}\"!"
+            fail_exit_status=true
+        fi
+    done
+
+    if ${fail_exit_status}; then
+        return 1
+    fi    
+
+    verbose_output "All the Lustre targets are formatted successfully!"
+    return 0
+}
+
+# get_mntopts hostname device_name failovers
+# Construct the mount options of Lustre target @device_name in host @hostname
+get_mntopts() {
+    local host_name=$1
+    local device_name=$2
+    local failovers=$3
+    local mnt_opts=
+    local ret_str
+
+    [ -n "${failovers}" ] && mnt_opts=defaults,noauto || mnt_opts=defaults
+
+    # Execute remote command to check whether the device
+    # is a block device or not
+    ret_str=`${REMOTE} ${host_name} \
+            "[ -b ${device_name} ] && echo block || echo loop" 2>&1`
+    if [ $? -ne 0 -a -n "${ret_str}" ]; then
+        echo "`basename $0`: get_mntopts() error:" \
+        "remote command to ${host_name} error: ${ret_str}"
+        return 1
+    fi
+
+    if [ -z "${ret_str}" ]; then
+        echo "`basename $0`: get_mntopts() error: remote error:" \
+        "No results from remote!" \
+        "Check network connectivity between the local host and ${host_name}!"
+        return 1
+    fi
+
+    [ "${ret_str}" != "${ret_str#*loop}" ] && mnt_opts=${mnt_opts},loop
+
+    echo ${mnt_opts}
+    return 0
+}
+
+# Execute remote command to modify /etc/fstab to add the new Lustre targets
+modify_fstab() {
+    declare -i i
+    local mntent mntopts device_name
+    local COMMAND
+
+    if ! ${MODIFY_FSTAB}; then
+        return 0    
+    fi
+
+    for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+        verbose_output "Modify /etc/fstab of host ${HOST_NAME[i]}"\
+                   "to add Lustre target ${DEVICE_NAME[i]}"
+        mntent=${DEVICE_NAME[i]}"\t\t"${MOUNT_POINT[i]}"\t\t"${FS_TYPE}
+
+        # Get mount options
+        if [ -n "${MOUNT_OPTIONS[i]}" ]; then
+            # The mount options already specified in the csv file.
+            mntopts=${MOUNT_OPTIONS[i]}
+        else
+            mntopts=$(get_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]}\
+                    ${FAILOVERS[i]})
+            if [ $? -ne 0 ]; then
+                echo >&2 "${mntopts}"
+                return 1
+            fi
+        fi
+
+        mntent=${mntent}"\t"${mntopts}"\t"0" "0
+        verbose_output "`echo -e ${mntent}`"
+
+        # Execute remote command to modify /etc/fstab
+        device_name=${DEVICE_NAME[i]//\//\\/}
+        COMMAND=". @scriptlibdir@/lc_common.sh; \
+                sed -i \"/^${device_name}\t/d\" \$(fcanon /etc/fstab); \
+                echo -e \"${mntent}\" >> \$(fcanon /etc/fstab)"
+        ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
+        if [ $? -ne 0 ]; then
+            echo >&2 "`basename $0`: modify_fstab() error:"\
+            "Failed to modify /etc/fstab of host ${HOST_NAME[i]}"\
+            "to add Lustre target ${DEVICE_NAME[i]}!"
+            return 1
+        fi
+    done
+
+    return 0
+}
+
+# Main flow
+# Check the csv file
+if ! check_file $1; then
+    exit 1    
+fi
+
+if ${VERIFY_CONNECT}; then
+# Check the network connectivity and hostnames
+    echo "`basename $0`: Checking the cluster network connectivity"\
+         "and hostnames..."
+    if ! ${VERIFY_CLUSTER_NET} ${VERBOSE_OPT} ${CSV_FILE}; then
+        exit 1
+    fi
+    echo "`basename $0`: Check the cluster network connectivity"\
+         "and hostnames OK!"
+    echo
+fi
+
+if ${CONFIG_MD_LVM}; then
+# Configure Linux MD/LVM devices
+    echo "`basename $0`: Configuring Linux MD/LVM devices..."
+    if ! ${SCRIPT_CONFIG_MD} ${VERBOSE_OPT} ${CSV_FILE}; then
+        exit 1
+    fi
+
+    if ! ${SCRIPT_CONFIG_LVM} ${VERBOSE_OPT} ${CSV_FILE}; then
+        exit 1
+    fi
+    echo "`basename $0`: Configure Linux MD/LVM devices OK!"
+    echo
+fi
+
+# Configure the Lustre cluster
+echo "`basename $0`: ******** Lustre cluster configuration START ********"
+if ! get_items ${CSV_FILE}; then
+    exit 1
+fi
+
+if ! check_mgs; then
+    exit 1
+fi
+
+if ! mass_config; then
+    exit 1
+fi
+
+if ! modify_fstab; then
+    exit 1
+fi
+
+# Produce HA software's configuration files
+if ! config_ha; then
+    rm -rf ${TMP_DIRS}
+    exit 1
+fi
+
+echo "`basename $0`: ******** Lustre cluster configuration END **********"
+
+exit 0
diff --git a/lustre/scripts/lustre_createcsv.sh.in b/lustre/scripts/lustre_createcsv.sh.in

new file mode 100644 (file)

index 0000000..5d73594
--- /dev/null
+++ b/lustre/scripts/lustre_createcsv.sh.in
@@ -0,0 +1,2100 @@
+#!/bin/bash
+#
+# lustre_createcsv.sh - generate a csv file from a running lustre cluster
+#
+# This script is used to collect lustre target informations, linux MD/LVM device
+# informations and HA software configurations in a lustre cluster to generate a
+# csv file. In reverse, the csv file could be parsed by lustre_config.sh to 
+# configure multiple lustre servers in parallel.
+#
+# This script should be run on the MGS node.
+#
+################################################################################
+
+# Usage
+usage() {
+       cat >&2 <<EOF
+
+Usage: `basename $0` [-t HAtype] [-d] [-h] [-v] [-f csv_filename]
+
+       This script is used to collect lustre target informations, linux MD/LVM
+       device informations and HA software configurations from a running lustre
+       cluster to generate a csv file. It should be run on the MGS node.
+
+       -t HAtype       collect High-Availability software configurations
+                       The argument following -t is used to indicate the High-
+                       Availability software type. The HA software types which 
+                       are currently supported are: hbv1 (Heartbeat version 1)
+                       and hbv2 (Heartbeat version 2).
+       -d              collect linux MD/LVM device informations
+       -h              help
+       -v              verbose mode
+       -f csv_filename designate a name for the csv file
+                       Default is lustre_config.csv.
+
+EOF
+       exit 1
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+#**************************** Global variables ****************************#
+# csv file
+LUSTRE_CSV_FILE=${LUSTRE_CSV_FILE:-"lustre_config.csv"}
+
+# Lustre proc files
+LUSTRE_PROC=${LUSTRE_PROC:-"/proc/fs/lustre"}
+LUSTRE_PROC_DEVICES=${LUSTRE_PROC}/devices
+
+LNET_PROC=${LNET_PROC:-"/proc/sys/lnet"}
+LNET_PROC_PEERS=${LNET_PROC}/peers
+
+# Default network module options
+DEFAULT_MOD_OPTS=${DEFAULT_MOD_OPTS:-"options lnet networks=tcp"}
+
+# Lustre target obd device types
+MGS_TYPE=${MGS_TYPE:-"mgs"}
+MDT_TYPE=${MDT_TYPE:-"mds"}
+OST_TYPE=${OST_TYPE:-"obdfilter"}
+
+# The obd name of MGS target server
+MGS_SVNAME=${MGS_SVNAME:-"MGS"}                
+
+# Hostnames of the lustre cluster nodes
+declare -a HOST_NAMES                  
+MGS_HOSTNAME=${MGS_HOSTNAME:-"`hostname`"} # Hostname of the MGS node
+
+# Configs of lustre targets in one cluster node
+declare -a TARGET_CONFIGS              
+declare -a TARGET_SVNAMES TARGET_DEVNAMES TARGET_DEVSIZES TARGET_MNTPNTS
+declare -a TARGET_DEVTYPES TARGET_FSNAMES TARGET_MGSNIDS TARGET_INDEXES
+declare -a TARGET_FMTOPTS TARGET_MKFSOPTS TARGET_MNTOPTS TARGET_FAILNIDS
+declare -a HA_CONFIGS
+declare -a ALL_TARGET_SVNAMES          # All the target services in the cluster
+declare -a FAILOVER_FMTOPTS            # "--noformat"  
+
+# Informations of linux MD/LVM devices in one cluster node
+declare -a MD_NAME MD_LEVEL MD_DEVS    # MD
+declare -a VG_NAME VG_PVNAMES          # VG
+declare -a LV_NAME LV_SIZE LV_VGNAME   # LV
+
+# Lustre target service types
+let "LDD_F_SV_TYPE_MDT = 0x0001"
+let "LDD_F_SV_TYPE_OST = 0x0002"
+let "LDD_F_SV_TYPE_MGS = 0x0004"
+
+# Permanent mount options for ext3 or ldiskfs
+ALWAYS_MNTOPTS=${ALWAYS_MNTOPTS:-"errors=remount-ro"}
+MDT_MGS_ALWAYS_MNTOPTS=${MDT_MGS_ALWAYS_MNTOPTS:-",iopen_nopriv,user_xattr"}
+OST_ALWAYS_MNTOPTS=${OST_ALWAYS_MNTOPTS:-",asyncdel"}
+OST_DEFAULT_MNTOPTS=${OST_DEFAULT_MNTOPTS:-",extents,mballoc"}
+
+# User-settable parameter keys
+PARAM_MGSNODE=${PARAM_MGSNODE:-"mgsnode="}
+PARAM_FAILNODE=${PARAM_FAILNODE:-"failover.node="}
+
+# Block size
+L_BLOCK_SIZE=4096
+
+# Option string of mkfs.lustre
+OPTSTR_STRIPE_COUNT=${OPTSTR_STRIPE_COUNT:-"--stripe-count-hint="}
+
+
+# Get and check the positional parameters
+VERBOSE_OUTPUT=false
+GET_MDLVM_INFO=false
+while getopts "t:dhvf:" OPTION; do
+       case $OPTION in
+       t) 
+               HATYPE_OPT=$OPTARG
+               if [ "${HATYPE_OPT}" != "${HBVER_HBV1}" ] \
+               && [ "${HATYPE_OPT}" != "${HBVER_HBV2}" ] \
+               && [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
+                       echo >&2 "`basename $0`: Invalid HA software type" \
+                                "- ${HATYPE_OPT}!"
+                       usage
+               fi
+               ;;
+       d)      GET_MDLVM_INFO=true;;
+       h)      usage;;
+       v)      VERBOSE_OUTPUT=true;;
+       f)      LUSTRE_CSV_FILE=$OPTARG;;
+        ?)     usage 
+       esac
+done
+
+# Verify the local host is the MGS node
+mgs_node() {
+       if [ ! -e ${LUSTRE_PROC_DEVICES} ]; then
+               echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} does" \
+                        "not exist. Lustre kernel modules may not be loaded!"
+               return 1
+       fi
+
+       if [ -z "`cat ${LUSTRE_PROC_DEVICES}`" ]; then
+               echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} is" \
+                        "empty. Lustre services may not be started!"
+               return 1
+       fi
+
+       if [ -z "`grep ${MGS_TYPE} ${LUSTRE_PROC_DEVICES}`" ]; then
+               echo >&2 "`basename $0`: error: This node is not a MGS node." \
+                         "The script should be run on the MGS node!"
+               return 1
+       fi
+
+       return 0
+}
+
+# get_hostnames
+# Get lustre cluster node names
+get_hostnames() {
+       declare -a HOST_NIDS
+       declare -i idx          # Index of HOST_NIDS array
+       declare -i i            # Index of HOST_NAMES array
+
+       if ! mgs_node; then
+               return 1
+       fi
+
+       if [ ! -e ${LNET_PROC_PEERS} ]; then
+               echo >&2 "`basename $0`: error: ${LNET_PROC_PEERS} does not" \
+                         "exist. LNET kernel modules may not be loaded" \
+                        "or LNET network may not be up!"
+               return 1
+       fi
+
+       HOST_NAMES[0]=${MGS_HOSTNAME} # MGS node
+       HOST_NIDS[0]=${HOST_NAMES[0]}
+
+       # Get the nids of the nodes which have contacted MGS
+       idx=1
+       for nid in `cat ${LNET_PROC_PEERS} | awk '{print $1}'`; do
+               if [ "${nid}" = "nid" ]; then
+                       continue
+               fi
+
+               HOST_NIDS[idx]=${nid}
+               let "idx += 1"
+       done
+
+       if [ ${idx} -eq 1 ]; then
+               verbose_output "Only one node running in the lustre cluster." \
+                              "It's ${HOST_NAMES[0]}."
+               return 0                
+       fi
+
+       # Get the hostnames of the nodes
+       for ((idx = 1, i = 1; idx < ${#HOST_NIDS[@]}; idx++, i++)); do
+               if [ -z "${HOST_NIDS[idx]}" ]; then
+                       echo >&2 "`basename $0`: get_hostnames() error:" \
+                                "Invalid nid - \"${HOST_NIDS[idx]}\"!"
+                       return 1
+               fi
+
+               HOST_NAMES[i]=$(nid2hostname ${HOST_NIDS[idx]})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${HOST_NAMES[i]}"
+                       return 1
+               fi
+
+               if [ "${HOST_NAMES[i]}" = "${HOST_NAMES[0]}" ]; then
+                       unset HOST_NAMES[i]
+                       let "i -= 1"
+               fi
+       done
+
+       return 0
+}
+
+#********************** Linux MD/LVM device informations **********************#
+# get_md_configs hostname
+# Get all the active MD device informations from the node @hostname
+get_md_configs() {
+       declare -i i=0
+       declare -i j=0
+       local host_name=$1
+       local ret_line line first_item
+
+       # Initialize the arrays
+       unset MD_NAME
+       unset MD_LEVEL
+       unset MD_DEVS
+       
+       # Execute remote command to the node ${host_name} and get all the
+       # active MD device informations.
+       while read -r ret_line; do
+               if is_pdsh; then
+                       set -- ${ret_line}
+                       shift
+                       line="$*"
+               else
+                       line="${ret_line}"
+               fi
+
+               first_item=`echo "${line}" | awk '{print $1}'`
+
+               # Get the MD device name and raid level
+               if [ "${first_item}" = "ARRAY" ]; then
+                       MD_NAME[i]=`echo "${line}" | awk '{print $2}'`
+                       MD_LEVEL[i]=`echo "${line}" | awk '{print $3}' | sed -e 's/level=//'`
+                       let "j = i"
+                       let "i += 1"
+               fi
+
+               # Get the MD component devices
+               if [ "${first_item}" != "${first_item#devices=}" ]; then
+                       MD_DEVS[j]=`echo "${line}" | sed -e 's/devices=//' -e 's/,/ /g'`
+               fi
+        done < <(${REMOTE} ${host_name} "${MDADM} --detail --scan --verbose")
+
+       if [ $i -eq 0 ]; then
+               verbose_output "There are no active MD devices" \
+                              "in the host ${host_name}!"
+       fi
+
+       return 0
+}
+
+# get_pv_configs hostname
+# Get all the LVM PV informations from the node @hostname
+get_pv_configs() {
+       PV_NAMES=
+       local host_name=$1
+       local cmd ret_str
+
+       # Execute remote command to get all the PV informations.
+       cmd="${EXPORT_PATH} pvdisplay -c | awk -F: '{print \$1}' | xargs"
+       ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
+       if [ $? -ne 0 ]; then
+               if [ -n "${ret_str}" ]; then
+                       echo >&2 "`basename $0`: get_pv_configs() error:" \
+                       "remote command to ${host_name} error: ${ret_str}"
+               else
+                       remote_error "get_pv_configs" ${host_name}
+               fi
+               return 1
+       fi
+
+       PV_NAMES=`echo "${ret_str}" | sed -e 's/^'${host_name}':[[:space:]]//'`
+       if [ -z "${PV_NAMES}" ]; then
+               verbose_output "There are no PVs in the host ${host_name}!"
+               return 0
+       fi
+
+       return 0
+}
+
+# get_vg_pvnames hostname vgname
+# Get the PVs contained in @vgname from the node @hostname
+get_vg_pvnames() {
+       local host_name=$1
+       local vg_name=$2
+       local pv_names=
+       local cmd ret_str
+
+       # Execute remote command to get the PV names.
+       cmd="${EXPORT_PATH} vgdisplay -v ${vg_name} 2>/dev/null\
+            | grep \"PV Name\" | awk '{print \$3}' | xargs"
+       ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
+       if [ $? -ne 0 ]; then
+               if [ -n "${ret_str}" ]; then
+                       echo "`basename $0`: get_vg_pvnames() error:" \
+                       "remote command to ${host_name} error: ${ret_str}"
+               else
+                       remote_error "get_vg_pvnames" ${host_name}
+               fi
+               return 1
+       fi
+
+       pv_names=`echo "${ret_str}" | sed -e 's/^'${host_name}':[[:space:]]//'`
+       if [ -z "${pv_names}" ]; then
+               echo "`basename $0`: get_vg_pvnames() error:" \
+               "There are no PVs in VG ${vg_name} in the host ${host_name}!"\
+               "Or VG ${vg_name} does not exist."
+               return 1
+       fi
+
+       echo "${pv_names}"
+       return 0
+}
+
+# get_vg_configs hostname
+# Get all the LVM VG informations from the node @hostname
+get_vg_configs() {
+       declare -i i=0
+       local host_name=$1
+       local cmd ret_str
+       local vg_name
+
+       # Initialize the arrays
+       unset VG_NAME
+       unset VG_PVNAMES
+
+       # Execute remote command to get all the VG names.
+       cmd="${EXPORT_PATH} vgdisplay \
+            | grep \"VG Name\" | awk '{print \$3}' | xargs"
+       ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
+       if [ $? -ne 0 ]; then
+               if [ -n "${ret_str}" ]; then
+                       echo >&2 "`basename $0`: get_vg_configs() error:" \
+                       "remote command to ${host_name} error: ${ret_str}"
+               else
+                       remote_error "get_vg_configs" ${host_name}
+               fi
+               return 1
+       fi
+
+       if [ -z "${ret_str}" ] \
+       || [ "${ret_str}" != "${ret_str#*No volume groups found*}" ]; then
+               verbose_output "There are no VGs in the host ${host_name}!"
+               return 0
+       fi
+
+       # Get all the VG informations
+       for vg_name in `echo "${ret_str}" | sed -e 's/^'${host_name}'://'`; do
+               VG_NAME[i]=${vg_name}
+               VG_PVNAMES[i]=$(get_vg_pvnames ${host_name} ${VG_NAME[i]})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${VG_PVNAMES[i]}"
+                       return 1
+               fi
+               let "i += 1"
+       done
+
+       return 0
+}
+
+# get_lv_configs hostname
+# Get all the LVM LV informations from the node @hostname
+get_lv_configs() {
+       declare -i i=0
+       local host_name=$1
+       local ret_line line
+
+       # Initialize the arrays
+       unset LV_NAME
+       unset LV_SIZE
+       unset LV_VGNAME
+
+       # Execute remote command to get all the LV informations.
+       while read -r ret_line; do
+               if is_pdsh; then
+                       set -- ${ret_line}
+                       shift
+                       line="$*"
+               else
+                       line="${ret_line}"
+               fi
+
+               [ "${line}" != "${line#*volume group*}" ] && break
+
+               LV_NAME[i]=`echo "${line}" | awk -F: '{print $1}' | sed -e 's/.*\///g'`
+               LV_VGNAME[i]=`echo "${line}" | awk -F: '{print $2}'`
+               LV_SIZE[i]=`echo "${line}" | awk -F: '{print $7}' | sed -e 's/.*/&K/'`
+
+               let "i += 1"
+        done < <(${REMOTE} ${host_name} "${EXPORT_PATH} lvdisplay -c")
+
+       if [ $i -eq 0 ]; then
+               verbose_output "There are no LVs in the host ${host_name}"
+       fi
+
+       return 0
+}
+
+#*************************** Network module options ***************************#
+# last_is_backslash line
+# Check whether the last effective letter of @line is a backslash
+last_is_backslash() {
+       local line="$*"
+       declare -i i
+       declare -i length
+       local letter last_letter
+
+       length=${#line}
+       for ((i = ${length}-1; i >= 0; i--)); do
+               letter=${line:${i}:1}
+               [ "x${letter}" != "x " -a "x${letter}" != "x    " -a -n "${letter}" ]\
+               && last_letter=${letter} && break
+       done
+
+       [ "x${last_letter}" = "x\\" ] && return 0
+
+       return 1
+}
+
+# get_module_opts hostname
+# Get the network module options from the node @hostname 
+get_module_opts() {
+       local host_name=$1
+       local ret_str
+       local MODULE_CONF KERNEL_VER
+       local ret_line line find_options
+       local continue_flag
+
+       MODULE_OPTS=${DEFAULT_MOD_OPTS}
+
+       # Execute remote command to get the kernel version
+       ret_str=`${REMOTE} ${host_name} "uname -r" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo >&2 "`basename $0`: get_module_opts() error:" \
+                        "remote command error: ${ret_str}"
+               return 1
+       fi
+       remote_error "get_module_opts" ${host_name} "${ret_str}" && return 1
+
+       if is_pdsh; then
+               KERNEL_VER=`echo ${ret_str} | awk '{print $2}'`
+       else
+               KERNEL_VER=`echo ${ret_str} | awk '{print $1}'`
+       fi
+
+       # Get the module configuration file name
+       if [ "${KERNEL_VER:0:3}" = "2.4" ]; then
+               MODULE_CONF=/etc/modules.conf
+       else
+               MODULE_CONF=/etc/modprobe.conf
+       fi
+
+       # Execute remote command to get the lustre network module options
+       continue_flag=false
+       find_options=false
+       while read -r ret_line; do
+               if is_pdsh; then
+                       set -- ${ret_line}
+                       shift
+                       line="$*"
+               else
+                       line="${ret_line}"
+               fi
+
+               # Get rid of the comment line
+               [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
+
+               if [ "${line}" != "${line#*options lnet*}" ]; then
+                       if ! ${find_options}; then
+                               find_options=true
+                               MODULE_OPTS=${line}
+                       else
+                               MODULE_OPTS=${MODULE_OPTS}$" \n "${line}
+                       fi
+
+                       last_is_backslash "${line}" && continue_flag=true \
+                       || continue_flag=false
+                       continue
+               fi      
+
+               if ${continue_flag}; then
+                       MODULE_OPTS=${MODULE_OPTS}$" \n "${line}
+                       ! last_is_backslash "${line}" && continue_flag=false
+
+               fi
+        done < <(${REMOTE} ${host_name} "cat ${MODULE_CONF}")
+
+       if [ -z "${MODULE_OPTS}" ]; then
+               MODULE_OPTS=${DEFAULT_MOD_OPTS}
+       fi
+
+       return 0
+}
+
+#************************ HA software configurations ************************#
+# is_ha_target hostname target_devname
+# Check whether the target @target_devname was made to be high-available
+is_ha_target() {
+       local host_name=$1
+       local target_svname=$2
+       local res_file
+       local ret_str
+
+       case "${HATYPE_OPT}" in
+       "${HBVER_HBV1}")        res_file=${HA_RES};;
+       "${HBVER_HBV2}")        res_file=${HA_CIB};;
+       "${HATYPE_CLUMGR}")     res_file=${CLUMAN_CONFIG};;
+       esac
+
+       # Execute remote command to check the resource file
+       ret_str=`${REMOTE} ${host_name} \
+               "grep ${target_svname} ${res_file}" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo >&2 "`basename $0`: is_ha_target() error:" \
+                        "remote command error: ${ret_str}"
+               return 1
+       fi
+
+       [ "${ret_str}" = "${ret_str#*${target_svname}*}" ] && return 1
+
+       return 0
+}
+
+# get_hb_configs hostname
+# Get the Heartbeat configurations from the node @hostname
+get_hb_configs() {
+       local host_name=$1
+       local ret_line line
+       declare -i i
+
+       unset HA_CONFIGS
+       HB_CHANNELS=
+       SRV_IPADDRS=
+       HB_OPTIONS=
+
+       # Execute remote command to get the configs of Heartbeat channels, etc
+       while read -r ret_line; do
+               if is_pdsh; then
+                       set -- ${ret_line}
+                       shift
+                       line="$*"
+               else
+                       line="${ret_line}"
+               fi
+
+                # Get rid of the comment line
+                [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
+
+               if [ "${line}" != "${line#*serial*}" ] \
+               || [ "${line}" != "${line#*cast*}" ]; then
+                       if [ -z "${HB_CHANNELS}" ]; then
+                               HB_CHANNELS=${line}
+                       else
+                               HB_CHANNELS=${HB_CHANNELS}:${line}
+                       fi
+               fi
+
+               if [ "${line}" != "${line#*stonith*}" ] \
+               || [ "${line}" != "${line#*ping*}" ] \
+               || [ "${line}" != "${line#*respawn*}" ] \
+               || [ "${line}" != "${line#*apiauth*}" ] \
+               || [ "${line}" != "${line#*compression*}" ]; then
+                       if [ -z "${HB_OPTIONS}" ]; then
+                               HB_OPTIONS=${line}
+                       else
+                               HB_OPTIONS=${HB_OPTIONS}:${line}
+                       fi
+               fi
+        done < <(${REMOTE} ${host_name} "cat ${HA_CF}")
+
+       if [ -z "${HB_CHANNELS}" ]; then
+               echo >&2 "`basename $0`: get_hb_configs() error:" \
+                        "There are no heartbeat channel configs in ${HA_CF}" \
+                        "of host ${host_name} or ${HA_CF} does not exist!"
+               return 0
+       fi
+
+       # Execute remote command to get Heartbeat service address
+       if [ "${HATYPE_OPT}" = "${HBVER_HBV1}" ]; then
+               while read -r ret_line; do
+                       if is_pdsh; then
+                               set -- ${ret_line}
+                               shift
+                               line="$*"
+                       else
+                               line="${ret_line}"
+                       fi
+
+                       # Get rid of the empty line
+                       [ -z "`echo ${line}|awk '/[[:alnum:]]/ {print $0}'`" ]\
+                        && continue
+
+                       # Get rid of the comment line
+                       [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
+
+                       SRV_IPADDRS=`echo ${line} | awk '{print $2}'`
+                       [ -n "${SRV_IPADDRS}" ] \
+                       && [ "`echo ${line} | awk '{print $1}'`" = "${host_name}" ] && break
+               done < <(${REMOTE} ${host_name} "cat ${HA_RES}")
+       
+               if [ -z "${SRV_IPADDRS}" ]; then
+                       echo >&2 "`basename $0`: get_hb_configs() error: There"\
+                                "are no service address in ${HA_RES} of host"\
+                                "${host_name} or ${HA_RES} does not exist!"
+                       return 0
+               fi
+       fi
+
+       # Construct HA configuration items 
+       for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+               [ -z "${TARGET_DEVNAMES[i]}" ] && continue
+
+               # Execute remote command to check whether this target service 
+               # was made to be high-available
+               if is_ha_target ${host_name} ${TARGET_DEVNAMES[i]}; then
+                       HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS}
+               fi
+       done
+
+       return 0
+}
+
+# get_cluman_channel hostname
+# Get the Heartbeat channel of CluManager from the node @hostname
+get_cluman_channel() {
+       local host_name=$1
+       local ret_line line
+       local cluman_channel=
+       local mcast_ipaddr
+
+       while read -r ret_line; do
+               if is_pdsh; then
+                       set -- ${ret_line}
+                       shift
+                       line="$*"
+               else
+                       line="${ret_line}"
+               fi
+
+               if [ "${line}" != "${line#*broadcast*}" ] \
+               && [ "`echo ${line}|awk '{print $3}'`" = "yes" ]; then
+                       cluman_channel="broadcast"
+                       break
+               fi
+
+               if [ "${line}" != "${line#*multicast_ipaddress*}" ]; then
+                       mcast_ipaddr=`echo ${line}|awk '{print $3}'`
+                       if [ "${mcast_ipaddr}" != "225.0.0.11" ]; then
+                               cluman_channel="multicast ${mcast_ipaddr}"
+                               break
+                       fi
+               fi
+        done < <(${REMOTE} ${host_name} "${CONFIG_CMD} --clumembd")
+
+       echo ${cluman_channel}
+       return 0
+}
+
+# get_cluman_srvaddr hostname target_svname
+# Get the service IP addresses of @target_svname from the node @hostname 
+get_cluman_srvaddr() {
+       local host_name=$1
+       local target_svname=$2
+       local ret_line line
+       local srvaddr cluman_srvaddr=
+
+       while read -r ret_line; do
+               if is_pdsh; then
+                       set -- ${ret_line}
+                       shift
+                       line="$*"
+               else
+                       line="${ret_line}"
+               fi
+
+               if [ "${line}" != "${line#*ipaddress = *}" ]; then
+                       srvaddr=`echo ${line}|awk '{print $3}'`
+                       if [ -z "${cluman_srvaddr}" ]; then
+                               cluman_srvaddr=${srvaddr}                       
+                       else
+                               cluman_srvaddr=${cluman_srvaddr}:${srvaddr}
+                       fi
+               fi
+        done < <(${REMOTE} ${host_name} "${CONFIG_CMD} \
+               --service=${target_svname} --service_ipaddresses")
+
+       if [ -z "${cluman_srvaddr}" ]; then
+               echo "`basename $0`: get_cluman_srvaddr() error: Cannot" \
+               "get the service IP addresses of ${target_svname} in" \
+               "${host_name}! Check ${CONFIG_CMD} command!"
+               return 1
+       fi
+
+       echo ${cluman_srvaddr}
+       return 0
+}
+
+# get_cluman_configs hostname
+# Get the CluManager configurations from the node @hostname
+get_cluman_configs() {
+       local host_name=$1
+       local ret_str
+       declare -i i
+
+       unset HA_CONFIGS
+
+       # Execute remote command to get the configs of CluManager
+       for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+               HB_CHANNELS=
+               SRV_IPADDRS=
+               HB_OPTIONS=
+               [ -z "${TARGET_DEVNAMES[i]}" ] && continue
+
+               # Execute remote command to check whether this target service 
+               # was made to be high-available
+               ! is_ha_target ${host_name} ${TARGET_DEVNAMES[i]} && continue
+
+               # Execute remote command to get Heartbeat channel
+               HB_CHANNELS=$(get_cluman_channel ${host_name})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${HB_CHANNELS}"
+               fi
+
+               # Execute remote command to get service IP address 
+               SRV_IPADDRS=$(get_cluman_srvaddr ${host_name} \
+                             ${TARGET_SVNAMES[i]})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${SRV_IPADDRS}"
+                       return 0
+               fi
+
+               HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS}
+       done
+
+       return 0
+}
+
+# get_ha_configs hostname
+# Get the HA software configurations from the node @hostname
+get_ha_configs() {
+       local host_name=$1
+
+       unset HA_CONFIGS
+
+       if [ -z "${HATYPE_OPT}" ]; then
+               return 0
+       fi
+
+       verbose_output "Collecting HA software configurations from host $1..."
+
+       case "${HATYPE_OPT}" in
+       "${HBVER_HBV1}" | "${HBVER_HBV2}") # Heartbeat
+               if ! get_hb_configs ${host_name}; then
+                       return 1
+               fi
+               ;;
+       "${HATYPE_CLUMGR}") # CluManager
+               if ! get_cluman_configs ${host_name}; then
+                       return 1
+               fi
+               ;;
+       esac
+
+       return 0
+}
+
+#*********************** Lustre targets configurations ***********************#
+
+# is_failover_service target_svname
+# Check whether a target service @target_svname is a failover service.
+is_failover_service() {
+       local target_svname=$1
+       declare -i i
+
+       for ((i = 0; i < ${#ALL_TARGET_SVNAMES[@]}; i++)); do
+               [ "${target_svname}" = "${ALL_TARGET_SVNAMES[i]}" ] && return 0
+       done
+
+       return 1
+}
+
+# get_svnames hostname
+# Get the lustre target server obd names from the node @hostname
+get_svnames(){
+       declare -i i
+       declare -i j
+       local host_name=$1
+       local ret_line line
+
+        # Initialize the TARGET_SVNAMES array
+       unset TARGET_SVNAMES
+       unset FAILOVER_FMTOPTS
+       
+       # Execute remote command to the node @hostname and figure out what
+       # lustre services are running.
+       i=0
+       j=${#ALL_TARGET_SVNAMES[@]}
+       while read -r ret_line; do
+               if is_pdsh; then
+                       set -- ${ret_line}
+                       shift
+                       line="$*"
+               else
+                       line="${ret_line}"
+               fi
+
+               if [ -z "`echo ${line} | grep ${MGS_TYPE}`" ] \
+               && [ -z "`echo ${line} | grep ${MDT_TYPE}`" ] \
+               && [ -z "`echo ${line} | grep ${OST_TYPE}`" ]; then
+                       continue
+               fi
+
+               # Get target server name
+               TARGET_SVNAMES[i]=`echo ${line} | awk '{print $4}'`
+               if [ -n "${TARGET_SVNAMES[i]}" ]; then
+                       if is_failover_service ${TARGET_SVNAMES[i]}; then
+                               FAILOVER_FMTOPTS[i]="--noformat"
+                       fi
+                       ALL_TARGET_SVNAMES[j]=${TARGET_SVNAMES[i]}
+                       let "i += 1"
+                       let "j += 1"
+               else
+                       echo >&2 "`basename $0`: get_svnames() error: Invalid"\
+                             "line in ${host_name}'s ${LUSTRE_PROC_DEVICES}"\
+                             "- \"${line}\"!"
+                       return 1
+               fi
+        done < <(${REMOTE} ${host_name} "cat ${LUSTRE_PROC_DEVICES}")
+
+       if [ $i -eq 0 ]; then
+               verbose_output "There are no lustre services running" \
+                              "on the node ${host_name}!"
+       fi
+
+       return 0
+} 
+
+# is_loopdev devname
+# Check whether a device @devname is a loop device or not
+is_loopdev() {
+       local devname=$1
+
+       if [ -z "${devname}" ] || \
+       [ -z "`echo ${devname}|awk '/\/dev\/loop[[:digit:]]/ {print $0}'`" ]
+       then
+               return 1
+       fi
+
+       return 0
+}
+
+# get_devname hostname svname
+# Get the device name of lustre target @svname from node @hostname
+get_devname() {
+       local host_name=$1
+       local target_svname=$2
+       local target_devname=
+       local ret_str
+       local target_type target_obdtype mntdev_file
+
+       if [ "${target_svname}" = "${MGS_SVNAME}" ]; then
+               # Execute remote command to get the device name of mgs target
+               ret_str=`${REMOTE} ${host_name} \
+                       "/sbin/findfs LABEL=${target_svname}" 2>&1`
+               if [ $? -ne 0 -a -n "${ret_str}" ]; then
+                       if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]
+                       then
+                               echo "`basename $0`: get_devname() error:" \
+                                    "remote command error: ${ret_str}"
+                               return 1
+                       fi
+               fi
+
+               if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]; then
+                       if is_pdsh; then
+                               target_devname=`echo ${ret_str} | awk '{print $2}'`
+                       else
+                               target_devname=`echo ${ret_str} | awk '{print $1}'`
+                       fi
+               fi
+       else    # Execute remote command to get the device name of mdt/ost target
+               target_type=`echo ${target_svname} | cut -d - -f 2`
+               target_obdtype=${target_type:0:3}_TYPE
+               
+               mntdev_file=${LUSTRE_PROC}/${!target_obdtype}/${target_svname}/mntdev
+
+               ret_str=`${REMOTE} ${host_name} "cat ${mntdev_file}" 2>&1`
+               if [ $? -ne 0 -a -n "${ret_str}" ]; then
+                       echo "`basename $0`: get_devname() error:" \
+                            "remote command error: ${ret_str}"
+                       return 1
+               fi
+
+               if [ "${ret_str}" != "${ret_str#*No such file*}" ]; then
+                       echo "`basename $0`: get_devname() error:"\
+                            "${mntdev_file} does not exist in ${host_name}!"
+                       return 1
+               else
+                       if is_pdsh; then
+                               target_devname=`echo ${ret_str} | awk '{print $2}'`
+                       else
+                               target_devname=`echo ${ret_str} | awk '{print $1}'`
+                       fi
+               fi
+       fi
+
+       echo ${target_devname}
+       return 0
+}
+
+# get_devsize hostname target_devname 
+# Get the device size (KB) of @target_devname from node @hostname
+get_devsize() {
+       local host_name=$1
+       local target_devname=$2
+       local target_devsize=
+       local ret_str
+
+       # Execute remote command to get the device size
+       ret_str=`${REMOTE} ${host_name} \
+               "/sbin/blockdev --getsize ${target_devname}" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo "`basename $0`: get_devsize() error:" \
+                    "remote command error: ${ret_str}"
+               return 1
+       fi
+
+       if is_pdsh; then
+               target_devsize=`echo ${ret_str} | awk '{print $2}'`
+       else
+               target_devsize=`echo ${ret_str} | awk '{print $1}'`
+       fi
+       
+       if [ -z "`echo ${target_devsize}|awk '/^[[:digit:]]/ {print $0}'`" ]
+       then
+               echo "`basename $0`: get_devsize() error: can't" \
+               "get device size of ${target_devname} in ${host_name}!"
+               return 1
+       fi
+
+       let " target_devsize /= 2"
+
+       echo ${target_devsize}
+       return 0
+}
+
+# get_realdevname hostname loop_dev
+# Get the real device name of loop device @loop_dev from node @hostname
+get_realdevname() {
+       local host_name=$1
+       local loop_dev=$2
+       local target_devname=
+       local ret_str
+
+       # Execute remote command to get the real device name
+       ret_str=`${REMOTE} ${host_name} \
+               "/sbin/losetup ${loop_dev}" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo "`basename $0`: get_realdevname() error:" \
+                    "remote command error: ${ret_str}"
+               return 1
+       fi
+
+       if is_pdsh; then
+               target_devname=`echo ${ret_str} | awk '{print $4}' \
+                               | sed 's/^(//' | sed 's/)$//'`
+       else
+               target_devname=`echo ${ret_str} | awk '{print $3}' \
+                               | sed 's/^(//' | sed 's/)$//'`
+       fi
+
+       if [ "${ret_str}" != "${ret_str#*No such*}" ] \
+       || [ -z "${target_devname}" ]; then
+               echo "`basename $0`: get_realdevname() error: can't" \
+               "get info on device ${loop_dev} in ${host_name}!"
+               return 1
+       fi
+
+       echo ${target_devname}
+       return 0
+}
+
+# get_mntpnt hostname target_devname
+# Get the lustre target mount point from the node @hostname
+get_mntpnt(){
+       local host_name=$1
+       local target_devname=$2
+       local mnt_point=
+       local ret_str
+
+       # Execute remote command to get the mount point
+       ret_str=`${REMOTE} ${host_name} \
+               "cat /etc/mtab | grep ${target_devname}" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo "`basename $0`: get_mntpnt() error:" \
+                    "remote command error: ${ret_str}"
+               return 1
+       fi
+
+       if is_pdsh; then
+               mnt_point=`echo ${ret_str} | awk '{print $3}'`
+       else
+               mnt_point=`echo ${ret_str} | awk '{print $2}'`
+       fi
+       
+       if [ -z "${mnt_point}" ]; then
+               echo "`basename $0`: get_mntpnt() error: can't" \
+               "get the mount point of ${target_devname} in ${host_name}!"
+               return 1
+       fi
+
+       echo ${mnt_point}
+       return 0
+}
+
+# get_devnames hostname
+# Get the lustre target device names, mount points
+# and loop device sizes from the node @hostname
+get_devnames(){
+       declare -i i
+       local host_name=$1
+       local ret_line line
+
+        # Initialize the arrays
+       unset TARGET_DEVNAMES
+       unset TARGET_DEVSIZES
+       unset TARGET_MNTPNTS
+
+       for ((i = 0; i < ${#TARGET_SVNAMES[@]}; i++)); do
+               TARGET_DEVNAMES[i]=$(get_devname ${host_name} \
+                                    ${TARGET_SVNAMES[i]})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${TARGET_DEVNAMES[i]}"
+                       return 1
+               fi
+
+               if [ -z "${TARGET_DEVNAMES[i]}" ]; then
+                       if [ "${TARGET_SVNAMES[i]}" = "${MGS_SVNAME}" ]; then
+                               verbose_output "There exists combo mgs/mdt"\
+                                              "target in ${host_name}."
+                               continue
+                       else
+                               echo >&2 "`basename $0`: get_devname() error:"\
+                                        "No device corresponding to target" \
+                                        "${TARGET_SVNAMES[i]} in ${host_name}!"
+                               return 1
+                       fi
+               fi
+
+               # Get the mount point of the target
+               TARGET_MNTPNTS[i]=$(get_mntpnt ${host_name} \
+                                    ${TARGET_DEVNAMES[i]})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${TARGET_MNTPNTS[i]}"
+                       return 1
+               fi
+
+               # The target device is a loop device?
+               if [ -n "${TARGET_DEVNAMES[i]}" ] \
+               && is_loopdev ${TARGET_DEVNAMES[i]}; then 
+                       # Get the device size
+                       TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \
+                                            ${TARGET_DEVNAMES[i]})
+                       if [ $? -ne 0 ]; then
+                               echo >&2 "${TARGET_DEVSIZES[i]}"
+                               return 1
+                       fi
+
+                       # Get the real device name
+                       TARGET_DEVNAMES[i]=$(get_realdevname ${host_name} \
+                                            ${TARGET_DEVNAMES[i]})
+                       if [ $? -ne 0 ]; then
+                               echo >&2 "${TARGET_DEVNAMES[i]}"
+                               return 1
+                       fi
+               fi
+        done
+
+       return 0
+}
+
+# is_target target_svtype ldd_flags
+# Check the service type of a lustre target
+is_target() {
+       case "$1" in
+       "mdt") let "ret = $2 & LDD_F_SV_TYPE_MDT";;
+       "ost") let "ret = $2 & LDD_F_SV_TYPE_OST";;
+       "mgs") let "ret = $2 & LDD_F_SV_TYPE_MGS";;
+       "*") 
+               echo >&2 "`basename $0`: is_target() error: Invalid" \
+               "target service type - \"$1\"!"
+               return 1
+               ;;
+       esac
+
+       if [ ${ret} -eq 0 ]; then
+               return 1
+       fi
+
+       return 0
+}
+
+# get_devtype ldd_flags
+# Get the service type of a lustre target from @ldd_flags
+get_devtype() {
+       local target_devtype=
+
+       if [ -z "${flags}" ]; then
+               echo "`basename $0`: get_devtype() error: Invalid" \
+                       "ldd_flags - it's value is null!"
+               return 1
+       fi
+
+       if is_target "mgs" $1; then
+               if is_target "mdt" $1; then
+                       target_devtype="mgs|mdt"
+               else
+                       target_devtype="mgs"
+               fi
+       elif is_target "mdt" $1; then
+               target_devtype="mdt"
+       elif is_target "ost" $1; then
+               target_devtype="ost"
+       else
+               echo "`basename $0`: get_devtype() error: Invalid" \
+               "ldd_flags - \"$1\"!"
+               return 1
+       fi
+
+       echo ${target_devtype}
+       return 0
+}
+
+# get_mntopts ldd_mount_opts
+# Get the user-specified lustre target mount options from @ldd_mount_opts
+get_mntopts() {
+       local mount_opts=
+       local ldd_mount_opts=$1
+
+       mount_opts="${ldd_mount_opts#${ALWAYS_MNTOPTS}}"
+       mount_opts="${mount_opts#${MDT_MGS_ALWAYS_MNTOPTS}}"
+       mount_opts="${mount_opts#${OST_ALWAYS_MNTOPTS}}"
+       mount_opts="${mount_opts#${OST_DEFAULT_MNTOPTS}}"
+       mount_opts="`echo \"${mount_opts}\" | sed 's/^,//'`"
+
+       [ "${mount_opts}" != "${mount_opts#*,*}" ] && echo "\""${mount_opts}"\"" \
+       || echo ${mount_opts}
+
+       return 0
+}
+
+# get_mgsnids ldd_params
+# Get the mgs nids of lustre target from @ldd_params
+get_mgsnids() {
+       local mgs_nids=         # mgs nids in one mgs node
+       local all_mgs_nids=     # mgs nids in all mgs failover nodes
+       local param=
+       local ldd_params="$*"
+
+       for param in ${ldd_params}; do
+               if [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ]; then
+                       mgs_nids=`echo ${param#${PARAM_MGSNODE}}`
+
+                       if [ -n "${all_mgs_nids}" ]; then
+                               all_mgs_nids=${all_mgs_nids}:${mgs_nids}
+                       else
+                               all_mgs_nids=${mgs_nids}
+                       fi
+               fi
+       done
+
+       [ "${all_mgs_nids}" != "${all_mgs_nids#*,*}" ] \
+       && echo "\""${all_mgs_nids}"\"" || echo ${all_mgs_nids}
+
+       return 0
+}
+
+# get_failnids ldd_params
+# Get the failover nids of lustre target from @ldd_params
+get_failnids() {
+       local fail_nids=        # failover nids in one failover node
+       local all_fail_nids=    # failover nids in all failover nodes
+                               # of this target
+       local param=
+       local ldd_params="$*"
+
+       for param in ${ldd_params}; do
+               if [ -n "`echo ${param}|awk '/failover.node=/ {print $0}'`" ]; then
+                       fail_nids=`echo ${param#${PARAM_FAILNODE}}`
+
+                       if [ -n "${all_fail_nids}" ]; then
+                               all_fail_nids=${all_fail_nids}:${fail_nids}
+                       else
+                               all_fail_nids=${fail_nids}
+                       fi
+               fi
+       done
+
+       [ "${all_fail_nids}" != "${all_fail_nids#*,*}" ] \
+       && echo "\""${all_fail_nids}"\"" || echo ${all_fail_nids}
+
+       return 0
+}
+
+# get_fmtopts target_devname hostname ldd_params
+# Get other format options of the lustre target @target_devname from @ldd_params
+get_fmtopts() {
+       local target_devname=$1
+       local host_name=$2
+       shift
+       shift
+       local ldd_params="$*"
+       local param= 
+       local fmt_opts=
+
+       for param in ${ldd_params}; do
+               [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ] && continue
+               [ -n "`echo ${param}|awk '/failover.node=/ {print $0}'`" ] && continue
+
+               if [ -n "${param}" ]; then
+                       if [ -n "${fmt_opts}" ]; then
+                               fmt_opts=${fmt_opts}" --param=\""${param}"\""
+                       else
+                               fmt_opts="--param=\""${param}"\""
+                       fi
+               fi
+       done
+
+       echo ${fmt_opts}
+       return 0
+}
+
+# get_stripecount host_name target_fsname
+# Get the stripe count for @target_fsname
+get_stripecount() {
+       local host_name=$1
+       local target_fsname=$2
+       local stripe_count=
+       local stripecount_file
+       local ret_str
+
+       # Get the stripe count
+       stripecount_file=${LUSTRE_PROC}/lov/${target_fsname}-mdtlov/stripecount
+       ret_str=`${REMOTE} ${host_name} "cat ${stripecount_file}" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo "`basename $0`: get_stripecount() error:" \
+               "remote command to ${host_name} error: ${ret_str}"
+               return 1
+       fi
+
+       if is_pdsh; then
+               stripe_count=`echo ${ret_str} | awk '{print $2}'`
+       else
+               stripe_count=`echo ${ret_str} | awk '{print $1}'`
+       fi
+
+       if [ -z "`echo ${stripe_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
+       then
+               echo "`basename $0`: get_stripecount() error: can't" \
+               "get stripe count of ${target_fsname} in ${host_name}!"
+               return 1
+       fi
+
+       echo ${stripe_count}
+       return 0
+}
+
+# get_stripecount_opt host_name target_fsname
+# Get the stripe count option for lustre mdt target
+get_stripecount_opt() {
+       local host_name=$1
+       local target_fsname=$2
+       local stripe_count=
+       local stripecount_opt=
+
+       # Get the stripe count
+       [ -z "${target_fsname}" ] && target_fsname="lustre"
+       stripe_count=$(get_stripecount ${host_name} ${target_fsname})
+       if [ $? -ne 0 ]; then
+               echo "${stripe_count}"
+               return 1
+       fi
+       
+       if [ "${stripe_count}" != "1" ]; then
+               stripecount_opt=${OPTSTR_STRIPE_COUNT}${stripe_count}
+       fi
+
+       echo ${stripecount_opt}
+       return 0
+}
+
+# get_ldds hostname
+# Get the lustre target disk data from the node @hostname
+get_ldds(){
+       declare -i i
+       local host_name=$1
+       local ret_line line
+       local flags mnt_opts params
+       local stripecount_opt
+
+        # Initialize the arrays
+       unset TARGET_DEVTYPES TARGET_FSNAMES TARGET_MGSNIDS TARGET_INDEXES
+       unset TARGET_FMTOPTS  TARGET_MNTOPTS TARGET_FAILNIDS
+       
+       # Get lustre target device type, fsname, index, etc.
+       # from MOUNT_DATA_FILE. Using tunefs.lustre to read it.
+       for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+               flags=
+               mnt_opts=
+               params=
+               stripecount_opt=
+               [ -z "${TARGET_DEVNAMES[i]}" ] && continue
+
+               # Execute remote command to read MOUNT_DATA_FILE
+               while read -r ret_line; do
+                       if is_pdsh; then
+                               set -- ${ret_line}
+                               shift
+                               line="$*"
+                       else
+                               line="${ret_line}"
+                       fi
+
+                       if [ -n "`echo ${line}|awk '/Index:/ {print $0}'`" ]; then
+                               TARGET_INDEXES[i]=`echo ${line}|awk '{print $2}'`
+                               continue
+                       fi
+
+                       if [ -n "`echo ${line}|awk '/Lustre FS:/ {print $0}'`" ]; then
+                               TARGET_FSNAMES[i]=`echo ${line}|awk '{print $3}'`
+                               continue
+                       fi
+                       
+                       if [ -n "`echo ${line}|awk '/Flags:/ {print $0}'`" ]; then
+                               flags=`echo ${line}|awk '{print $2}'`
+                               continue
+                       fi
+
+                       if [ -n "`echo ${line}|awk '/Persistent mount opts:/ {print $0}'`" ]; then
+                               mnt_opts=`echo ${line}|awk '{print $0}'`
+                               mnt_opts=`echo ${mnt_opts#Persistent mount opts: }`
+                               continue
+                       fi
+
+                       if [ -n "`echo ${line}|awk '/Parameters:/ {print $0}'`" ]; then
+                               params=`echo ${line}|awk '{print $0}'`
+                               params=`echo ${params#Parameters:}`
+                               break
+                       fi
+               done < <(${REMOTE} ${host_name} "${TUNEFS} --print --verbose ${TARGET_DEVNAMES[i]} 2>/dev/null")
+
+               if [ -z "${flags}" ]; then
+                       echo >&2 "`basename $0`: get_ldds() error: Invalid" \
+                                "ldd_flags of target ${TARGET_DEVNAMES[i]}" \
+                                "in host ${host_name} - it's value is null!"\
+                                "Check ${TUNEFS} command!"
+                       return 1
+               fi
+               
+               if [ "${TARGET_INDEXES[i]}" = "unassigned" ] \
+               || is_target "mgs" ${flags}; then
+                       TARGET_INDEXES[i]=
+               fi
+
+               [ "${TARGET_FSNAMES[i]}" = "lustre" ] && TARGET_FSNAMES[i]=
+
+               # Get the lustre target service type
+               TARGET_DEVTYPES[i]=$(get_devtype ${flags})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${TARGET_DEVTYPES[i]} From device" \
+                       "${TARGET_DEVNAMES[i]} in host ${host_name}!"
+                       return 1
+               fi
+
+               # Get the lustre target mount options
+               TARGET_MNTOPTS[i]=$(get_mntopts "${mnt_opts}")
+
+               # Get mgs nids of the lustre target
+               TARGET_MGSNIDS[i]=$(get_mgsnids "${params}")
+
+               # Get failover nids of the lustre target
+               TARGET_FAILNIDS[i]=$(get_failnids "${params}")
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${TARGET_FAILNIDS[i]} From device" \
+                       "${TARGET_DEVNAMES[i]} in host ${host_name}!"
+                       return 1
+               fi
+
+               # Get other format options of the lustre target
+               TARGET_FMTOPTS[i]=$(get_fmtopts ${TARGET_DEVNAMES[i]} ${host_name} "${params}")
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${TARGET_FMTOPTS[i]}"
+                       return 1
+               fi
+
+               if [ -n "${TARGET_DEVSIZES[i]}" ]; then
+                       if [ -n "${TARGET_FMTOPTS[i]}" ]; then
+                               TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]} ""${TARGET_FMTOPTS[i]}"
+                       else
+                               TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]}"
+                       fi
+               fi
+
+               if [ -n "${FAILOVER_FMTOPTS[i]}" ]; then
+                       if [ -n "${TARGET_FMTOPTS[i]}" ]; then
+                               TARGET_FMTOPTS[i]=${TARGET_FMTOPTS[i]}" "${FAILOVER_FMTOPTS[i]}
+                       else
+                               TARGET_FMTOPTS[i]=${FAILOVER_FMTOPTS[i]}
+                       fi
+               fi
+
+               if is_target "mdt" ${flags}; then
+                       # Get the stripe count option
+                       stripecount_opt=$(get_stripecount_opt ${host_name} ${TARGET_FSNAMES[i]})
+                       if [ $? -ne 0 ]; then
+                               echo >&2 "${stripecount_opt}"
+                               return 1
+                       fi
+
+                       if [ -n "${stripecount_opt}" ]; then
+                               if [ -n "${TARGET_FMTOPTS[i]}" ]; then
+                                       TARGET_FMTOPTS[i]=${TARGET_FMTOPTS[i]}" "${stripecount_opt}
+                               else
+                                       TARGET_FMTOPTS[i]=${stripecount_opt}
+                               fi
+                       fi
+               fi
+
+               if [ "${TARGET_FMTOPTS[i]}" != "${TARGET_FMTOPTS[i]#*,*}" ]; then
+                       TARGET_FMTOPTS[i]="\""${TARGET_FMTOPTS[i]}"\""
+               fi
+        done
+
+       return 0
+}
+
+# get_journalsize target_devname hostname
+# Get the journal size of lustre target @target_devname from @hostname
+get_journalsize() {
+       local target_devname=$1
+       local host_name=$2
+       local journal_inode= 
+       local journal_size=
+       local ret_str
+
+       # Execute remote command to get the journal inode number
+       ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
+                ${target_devname} | grep 'Journal inode:'" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo "`basename $0`: get_journalsize() error:" \
+                    "remote command error: ${ret_str}"
+               return 1
+       fi
+
+       ret_str=${ret_str#${ret_str%Journal inode:*}}
+       journal_inode=`echo ${ret_str} | awk '{print $3}'`
+       if [ -z "`echo ${journal_inode}|awk '/^[[:digit:]]/ {print $0}'`" ]
+       then
+               echo "`basename $0`: get_journalsize() error: can't" \
+               "get journal inode of ${target_devname} in ${host_name}!"
+               return 1
+       fi
+
+       # Execute remote command to get the journal size
+       ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R \
+               'stat <${journal_inode}>' ${target_devname}|grep '^User:'" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo "`basename $0`: get_journalsize() error:" \
+                    "remote command error: ${ret_str}"
+               return 1
+       fi
+
+       ret_str=${ret_str#${ret_str%User:*}}
+       journal_size=`echo ${ret_str} | awk '{print $6}'`
+       if [ -z "`echo ${journal_size}|awk '/^[[:digit:]]/ {print $0}'`" ]
+       then
+               echo "`basename $0`: get_journalsize() error: can't" \
+               "get journal size of ${target_devname} in ${host_name}!"
+               return 1
+       fi
+
+       let "journal_size /= 1024*1024" # MB
+
+       echo ${journal_size}
+       return 0
+}
+
+# get_defaultjournalsize target_devsize
+# Calculate the default journal size from target device size @target_devsize
+get_defaultjournalsize() {
+       declare -i target_devsize=$1
+       declare -i journal_size=0 
+       declare -i max_size base_size 
+
+       let "base_size = 1024*1024"
+       if [ ${target_devsize} -gt ${base_size} ]; then  # 1GB
+               let "journal_size = target_devsize / 102400"
+               let "journal_size *= 4"
+       fi
+
+       let "max_size = 102400 * L_BLOCK_SIZE"
+       let "max_size >>= 20" # 400MB
+
+       if [ ${journal_size} -gt ${max_size} ]; then
+               let "journal_size = max_size"
+       fi
+
+       echo ${journal_size}
+       return 0
+}
+
+# figure_journal_size target_devname hostname
+# Find a reasonable journal file size given the number of blocks 
+# in the filesystem. This algorithm is derived from figure_journal_size()
+# function in util.c of e2fsprogs-1.38.cfs2-1.src.rpm.
+figure_journal_size() {
+       local target_devname=$1
+       local host_name=$2
+       local ret_str
+       declare -i block_count
+       declare -i journal_blocks
+       declare -i journal_size
+
+       # Execute remote command to get the block count 
+       ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
+                ${target_devname} | grep 'Block count:'" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo "`basename $0`: figure_journal_size() error:" \
+                    "remote command error: ${ret_str}"
+               return 1
+       fi
+
+       ret_str=${ret_str#${ret_str%Block count:*}}
+       block_count=`echo ${ret_str} | awk '{print $3}'`
+       if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
+       then
+               echo "`basename $0`: figure_journal_size() error: can't" \
+               "get block count of ${target_devname} in ${host_name}!"
+               return 1
+       fi
+
+       if ((block_count < 32768)); then
+               let "journal_blocks = 1024"
+       elif ((block_count < 256*1024)); then
+               let "journal_blocks = 4096"
+       elif ((block_count < 512*1024)); then
+               let "journal_blocks = 8192"
+       elif ((block_count < 1024*1024)); then
+               let "journal_blocks = 16384"
+       else
+               let "journal_blocks = 32768"
+       fi
+
+       let "journal_size = journal_blocks * L_BLOCK_SIZE / 1048576"
+
+       echo ${journal_size}
+       return 0
+}
+
+# get_J_opt hostname target_devname target_devsize
+# Get the mkfs -J option of lustre target @target_devname 
+# from the node @hostname
+get_J_opt() {
+       local host_name=$1
+       local target_devname=$2
+       local target_devsize=$3
+       local journal_size=
+       local default_journal_size=
+       local journal_opt=
+
+       # Get the real journal size of lustre target
+       journal_size=$(get_journalsize ${target_devname} ${host_name})
+       if [ $? -ne 0 ]; then
+               echo "${journal_size}"
+               return 1
+       fi
+
+       # Get the default journal size of lustre target
+       default_journal_size=$(get_defaultjournalsize ${target_devsize})
+       if [ "${default_journal_size}" = "0" ]; then
+               default_journal_size=$(figure_journal_size ${target_devname} \
+                                      ${host_name})
+               if [ $? -ne 0 ]; then
+                       echo "${default_journal_size}"
+                       return 1
+               fi
+       fi
+
+       if [ "${journal_size}" != "${default_journal_size}" ]; then
+               journal_opt="-J size=${journal_size}"
+       fi
+               
+       echo ${journal_opt}
+       return 0
+}
+
+# get_ratio target_devname hostname
+# Get the bytes/inode ratio of lustre target @target_devname from @hostname
+get_ratio() {
+       local target_devname=$1
+       local host_name=$2
+       local inode_count= 
+       local block_count=
+       local ratio=
+       local ret_str
+
+       # Execute remote command to get the inode count
+       ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
+                ${target_devname} | grep 'Inode count:'" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo "`basename $0`: get_ratio() error:" \
+                    "remote command error: ${ret_str}"
+               return 1
+       fi
+
+       ret_str=${ret_str#${ret_str%Inode count:*}}
+       inode_count=`echo ${ret_str} | awk '{print $3}'`
+       if [ -z "`echo ${inode_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
+       then
+               echo "`basename $0`: get_ratio() error: can't" \
+               "get inode count of ${target_devname} in ${host_name}!"
+               return 1
+       fi
+
+       # Execute remote command to get the block count
+       ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
+                ${target_devname} | grep 'Block count:'" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo "`basename $0`: get_ratio() error:" \
+                    "remote command error: ${ret_str}"
+               return 1
+       fi
+
+       ret_str=${ret_str#${ret_str%Block count:*}}
+       block_count=`echo ${ret_str} | awk '{print $3}'`
+       if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
+       then
+               echo "`basename $0`: get_ratio() error: can't" \
+               "get block count of ${target_devname} in ${host_name}!"
+               return 1
+       fi
+
+       let "ratio = block_count*L_BLOCK_SIZE/inode_count"
+
+       echo ${ratio}
+       return 0
+}
+
+# get_default_ratio target_devtype target_devsize
+# Calculate the default bytes/inode ratio from target type @target_devtype
+get_default_ratio() {
+       local target_devtype=$1
+       declare -i target_devsize=$2
+       local ratio=
+
+       case "${target_devtype}" in
+       "mdt" | "mgs|mdt" | "mdt|mgs")
+               ratio=4096;;
+       "ost")
+               [ ${target_devsize} -gt 1000000 ] && ratio=16384;;
+       esac
+
+       [ -z "${ratio}" ] && ratio=${L_BLOCK_SIZE}
+
+       echo ${ratio}
+       return 0
+}
+
+# get_i_opt hostname target_devname target_devtype target_devsize
+# Get the mkfs -i option of lustre target @target_devname 
+# from the node @hostname
+get_i_opt() {
+       local host_name=$1
+       local target_devname=$2
+       local target_devtype=$3
+       local target_devsize=$4
+       local ratio=
+       local default_ratio=
+       local ratio_opt=
+
+       # Get the real bytes/inode ratio of lustre target
+       ratio=$(get_ratio ${target_devname} ${host_name})
+       if [ $? -ne 0 ]; then
+               echo "${ratio}"
+               return 1
+       fi
+
+       # Get the default bytes/inode ratio of lustre target
+       default_ratio=$(get_default_ratio ${target_devtype} ${target_devsize})
+
+       if [ "${ratio}" != "${default_ratio}" ]; then
+               ratio_opt="-i ${ratio}"
+       fi
+               
+       echo ${ratio_opt}
+       return 0
+}
+
+# get_isize target_devname hostname
+# Get the inode size of lustre target @target_devname from @hostname
+get_isize() {
+       local target_devname=$1
+       local host_name=$2
+       local inode_size= 
+       local ret_str
+
+       # Execute remote command to get the inode size 
+       ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
+                ${target_devname} | grep 'Inode size:'" 2>&1`
+       if [ $? -ne 0 -a -n "${ret_str}" ]; then
+               echo "`basename $0`: get_isize() error:" \
+                    "remote command error: ${ret_str}"
+               return 1
+       fi
+
+       ret_str=${ret_str#${ret_str%Inode size:*}}
+       inode_size=`echo ${ret_str} | awk '{print $3}'`
+       if [ -z "`echo ${inode_size}|awk '/^[[:digit:]]/ {print $0}'`" ]
+       then
+               echo "`basename $0`: get_isize() error: can't" \
+               "get inode size of ${target_devname} in ${host_name}!"
+               return 1
+       fi
+
+       echo ${inode_size}
+       return 0
+}
+
+# get_mdt_default_isize host_name target_fsname
+# Calculate the default inode size of lustre mdt target
+get_mdt_default_isize() {
+       local host_name=$1
+       local target_fsname=$2
+       declare -i stripe_count
+       local inode_size=
+
+       # Get the stripe count
+       stripe_count=$(get_stripecount ${host_name} ${target_fsname})
+       if [ $? -ne 0 ]; then
+               echo "${stripe_count}"
+               return 1
+       fi
+
+       if ((stripe_count > 77)); then
+               inode_size=512
+       elif ((stripe_count > 34)); then
+               inode_size=2048
+       elif ((stripe_count > 13)); then
+               inode_size=1024
+       else
+               inode_size=512
+       fi
+
+       echo ${inode_size}
+       return 0
+}
+
+# get_default_isize host_name target_devtype target_fsname
+# Calculate the default inode size of lustre target type @target_devtype
+get_default_isize() {
+       local host_name=$1
+       local target_devtype=$2
+       local target_fsname=$3
+       local inode_size=
+
+       case "${target_devtype}" in
+       "mdt" | "mgs|mdt" | "mdt|mgs")
+               inode_size=$(get_mdt_default_isize ${host_name} ${target_fsname})
+               if [ $? -ne 0 ]; then
+                       echo "${inode_size}"
+                       return 1
+               fi
+               ;;
+       "ost")
+               inode_size=256;;
+       esac
+
+       [ -z "${inode_size}" ] && inode_size=128
+
+       echo ${inode_size}
+       return 0
+}
+
+# get_I_opt hostname target_devname target_devtype target_fsname
+# Get the mkfs -I option of lustre target @target_devname 
+# from the node @hostname
+get_I_opt() {
+       local host_name=$1
+       local target_devname=$2
+       local target_devtype=$3
+       local target_fsname=$4
+       local isize=
+       local default_isize=
+       local isize_opt=
+
+       # Get the real inode size of lustre target
+       isize=$(get_isize ${target_devname} ${host_name})
+       if [ $? -ne 0 ]; then
+               echo "${isize}"
+               return 1
+       fi
+
+       # Get the default inode size of lustre target
+       [ -z "${target_fsname}" ] && target_fsname="lustre"
+       default_isize=$(get_default_isize ${host_name} ${target_devtype} \
+                       ${target_fsname})
+       if [ $? -ne 0 ]; then
+               echo "${default_isize}"
+               return 1
+       fi
+
+       if [ "${isize}" != "${default_isize}" ]; then
+               isize_opt="-I ${isize}"
+       fi
+               
+       echo ${isize_opt}
+       return 0
+}
+
+# get_mkfsopts hostname
+# Get the mkfs options of lustre targets from the node @hostname
+get_mkfsopts(){
+       declare -i i
+       local host_name=$1
+       local journal_opt
+       local ratio_opt
+       local inode_size_opt
+
+        # Initialize the arrays
+       unset TARGET_MKFSOPTS
+       
+       # FIXME: Get other mkfs options of ext3/ldiskfs besides -J, -i and -I
+       for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+               journal_opt=
+               ratio_opt=
+               inode_size_opt=
+
+               [ -z "${TARGET_DEVNAMES[i]}" ] && continue
+
+               if [ -z "${TARGET_DEVSIZES[i]}" ]; then
+                       # Get the device size
+                       TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \
+                                        ${TARGET_DEVNAMES[i]})
+                       if [ $? -ne 0 ]; then
+                               echo >&2 "${TARGET_DEVSIZES[i]}"
+                               return 1
+                       fi
+               fi
+
+               # Get the journal option
+               journal_opt=$(get_J_opt ${host_name} ${TARGET_DEVNAMES[i]} \
+                             ${TARGET_DEVSIZES[i]})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${journal_opt}"
+                       return 1
+               fi
+
+               if [ -n "${journal_opt}" ]; then
+                       if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
+                               TARGET_MKFSOPTS[i]="${journal_opt}"
+                       else
+                               TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${journal_opt}"
+                       fi
+               fi
+               
+               # Get the bytes-per-inode ratio option
+               ratio_opt=$(get_i_opt ${host_name} ${TARGET_DEVNAMES[i]} \
+                           ${TARGET_DEVTYPES[i]} ${TARGET_DEVSIZES[i]})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${ratio_opt}"
+                       return 1
+               fi
+
+               if [ -n "${ratio_opt}" ]; then
+                       if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
+                               TARGET_MKFSOPTS[i]="${ratio_opt}"
+                       else
+                               TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${ratio_opt}"
+                       fi
+               fi
+
+               # Get the inode size option
+               inode_size_opt=$(get_I_opt ${host_name} ${TARGET_DEVNAMES[i]} \
+                                ${TARGET_DEVTYPES[i]} ${TARGET_FSNAMES[i]})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${inode_size_opt}"
+                       return 1
+               fi
+
+               if [ -n "${inode_size_opt}" ]; then
+                       if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
+                               TARGET_MKFSOPTS[i]="${inode_size_opt}"
+                       else
+                               TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${inode_size_opt}"
+                       fi
+               fi
+
+               if [ "${TARGET_MKFSOPTS[i]}" != "${TARGET_MKFSOPTS[i]#*,*}" ]; then
+                       TARGET_MKFSOPTS[i]="\""${TARGET_MKFSOPTS[i]}"\""
+               fi
+       done
+       return 0
+}
+
+# get_target_configs hostname
+# Get the lustre target informations from the node @hostname
+get_target_configs() {
+       declare -i i
+       local host_name=$1
+       local ret_line line
+
+        # Initialize the arrays
+       unset TARGET_CONFIGS
+
+       # Get lustre target server names
+       if ! get_svnames ${host_name}; then
+               return 1
+       fi
+
+       # Get lustre target device names, mount points and loop device sizes
+       if ! get_devnames ${host_name}; then
+               return 1
+       fi
+
+       # Get lustre target device type, fsname, index, etc.
+       if ! get_ldds ${host_name}; then
+               return 1
+       fi
+
+       # Get mkfs options of lustre targets
+       if ! get_mkfsopts ${host_name}; then
+               return 1
+       fi
+
+       # Construct lustre target configs
+       for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+               [ -z "${TARGET_DEVNAMES[i]}" ] && continue
+               TARGET_CONFIGS[i]=${TARGET_DEVNAMES[i]},${TARGET_MNTPNTS[i]},${TARGET_DEVTYPES[i]},${TARGET_FSNAMES[i]},${TARGET_MGSNIDS[i]},${TARGET_INDEXES[i]},${TARGET_FMTOPTS[i]},${TARGET_MKFSOPTS[i]},${TARGET_MNTOPTS[i]},${TARGET_FAILNIDS[i]}
+       done
+
+       return 0
+}
+
+# get_configs hostname
+# Get all the informations needed to generate a csv file from 
+# the node @hostname
+get_configs() {
+       # Check the hostname
+       if [ -z "$1" ]; then
+               echo >&2 "`basename $0`: get_configs() error:" \
+                        "Missing hostname!"
+               return 1
+       fi
+
+       # Get network module options
+       verbose_output ""
+       verbose_output "Collecting network module options from host $1..."
+       if ! get_module_opts $1; then
+               return 1
+       fi
+       verbose_output "OK"
+
+       # Get lustre target informations
+       verbose_output "Collecting Lustre targets informations from host $1..."
+       if ! get_target_configs $1; then
+               return 1
+       fi
+       verbose_output "OK"
+
+       # Get HA software configurations
+       if ! get_ha_configs $1; then
+               return 1
+       fi
+
+       return 0
+}
+
+# Collect linux MD/LVM device informations from the lustre cluster and
+# append them to the csv file
+get_mdlvm_info() {
+       declare -i idx
+       declare -i i
+       local line
+
+       # Collect and append linux MD/LVM informations to the csv file
+       for ((idx = 0; idx < ${#HOST_NAMES[@]}; idx++)); do
+               [ -z "${HOST_NAMES[idx]}" ] && continue
+
+               # Collect MD device informations
+               ! get_md_configs ${HOST_NAMES[idx]} && return 1
+
+               # Append MD device informations to the csv file
+               for ((i = 0; i < ${#MD_NAME[@]}; i++)); do
+                       line=${HOST_NAMES[idx]},${MD_MARKER},${MD_NAME[i]},,,${MD_LEVEL[i]},${MD_DEVS[i]}
+                       verbose_output "Informations of MD device ${MD_NAME[i]}" \
+                                      "in host ${HOST_NAMES[idx]} are as follows:"
+                       verbose_output "${line}"
+                       echo "${line}" >> ${LUSTRE_CSV_FILE}
+               done
+
+               # Collect PV informations
+               ! get_pv_configs ${HOST_NAMES[idx]} && return 1
+
+               # Append PV informations to the csv file
+               if [ -n "${PV_NAMES}" ]; then
+                       line=${HOST_NAMES[idx]},${PV_MARKER},${PV_NAMES}
+                       verbose_output "Informations of PVs" \
+                                      "in host ${HOST_NAMES[idx]} are as follows:"
+                       verbose_output "${line}"
+                       echo "${line}" >> ${LUSTRE_CSV_FILE}
+               fi
+
+               # Collect VG informations
+               ! get_vg_configs ${HOST_NAMES[idx]} && return 1
+
+               # Append VG informations to the csv file
+               for ((i = 0; i < ${#VG_NAME[@]}; i++)); do
+                       line=${HOST_NAMES[idx]},${VG_MARKER},${VG_NAME[i]},,,${VG_PVNAMES[i]}
+                       verbose_output "Informations of VG ${VG_NAME[i]}" \
+                                      "in host ${HOST_NAMES[idx]} are as follows:"
+                       verbose_output "${line}"
+                       echo "${line}" >> ${LUSTRE_CSV_FILE}
+               done
+
+               # Collect LV informations
+               ! get_lv_configs ${HOST_NAMES[idx]} && return 1
+
+               # Append LV informations to the csv file
+               for ((i = 0; i < ${#LV_NAME[@]}; i++)); do
+                       line=${HOST_NAMES[idx]},${LV_MARKER},${LV_NAME[i]},,,${LV_SIZE[i]},${LV_VGNAME[i]}
+                       verbose_output "Informations of LV /dev/${LV_VGNAME[i]}/${LV_NAME[i]}"\
+                                      "in host ${HOST_NAMES[idx]} are as follows:"
+                       verbose_output "${line}"
+                       echo "${line}" >> ${LUSTRE_CSV_FILE}
+               done
+       done
+       return 0
+}
+
+# Generate the csv file from the lustre cluster
+gen_csvfile() {
+       declare -i idx
+       declare -i i
+       local line
+
+       # Get lustre cluster node names
+       verbose_output "Collecting Lustre cluster node names..."
+       if ! get_hostnames; then
+               return 1
+       fi
+       verbose_output "OK"
+
+       : > ${LUSTRE_CSV_FILE}
+
+       ${GET_MDLVM_INFO} && get_mdlvm_info
+
+       # Collect and append lustre target informations to the csv file
+       for ((idx = 0; idx < ${#HOST_NAMES[@]}; idx++)); do
+               # Collect informations
+               if ! get_configs ${HOST_NAMES[idx]}; then
+                       rm -f ${LUSTRE_CSV_FILE}
+                       return 1
+               fi
+
+               # Append informations to the csv file
+               for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+                       [ -z "${TARGET_DEVNAMES[i]}" ] && continue
+
+                       if [ -z "${HA_CONFIGS[i]}" ]; then
+                               line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]}
+                       else
+                               line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]},${HA_CONFIGS[i]}
+                       fi
+                       verbose_output "Informations of target ${TARGET_DEVNAMES[i]}" \
+                                      "in host ${HOST_NAMES[idx]} are as follows:"
+                       verbose_output "${line}"
+                       echo "" >> ${LUSTRE_CSV_FILE}
+                       echo "${line}" >> ${LUSTRE_CSV_FILE}
+               done
+       done
+
+       return 0
+}
+
+# Main flow
+echo "`basename $0`: ******** Generate csv file -- ${LUSTRE_CSV_FILE} START ********"
+if ! gen_csvfile; then
+       exit 1
+fi
+echo "`basename $0`: ******** Generate csv file -- ${LUSTRE_CSV_FILE} OK **********"
+
+exit 0
diff --git a/lustre/scripts/lustre_up14.sh b/lustre/scripts/lustre_up14.sh

new file mode 100755 (executable)

index 0000000..9027237
--- /dev/null
+++ b/lustre/scripts/lustre_up14.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+#
+# Reads old MDS config logs for transferring to a MGS
+#
+###############################################################################
+
+TMP=${TMP:-/tmp/logs}
+
+# Usage
+usage() {
+       cat >&2 <<EOF
+
+Usage:  `basename $0` <mdsdev> <newfsname>
+
+       <mdsdev>                the MDS disk device (e.g. /dev/sda1)
+       <newfsname>             the name of the new filesystem (e.g. testfs)
+
+       This script will extract old config logs from an MDS device to a
+       temporary location ($TMP). During the upgrade procedure, mount the
+       MGS disk as type ldiskfs (e.g. mount -t ldiskfs /dev/sda
+       /mnt/temp), then copy these logs into the CONFIGS directory on the
+       MGS (e.g. /mnt/temp/CONFIGS).  Logs from many MDS's can be added
+       in this way.  When done, unmount the MGS, and then re-mount it as
+       type lustre to start the service.
+
+EOF
+       exit 1
+}
+
+if [ $# -lt 2 ]; then
+        usage
+fi
+
+DEV=$1
+FSNAME=$2
+DEBUGFS="debugfs -c -R"
+mkdir -p $TMP
+
+FILES=`$DEBUGFS "ls -l LOGS" $DEV | awk '{print $9}' | awk '/[a-z]/ {print $1}'`
+
+for FILE in ${FILES}; do 
+    $DEBUGFS "dump LOGS/$FILE $TMP/temp" $DEV 2> /dev/null
+    MDC=`strings $TMP/temp | grep MDC`
+    LOV=`strings $TMP/temp | grep lov`
+    if [ -n "$MDC" ]; then
+       TYPE=client
+    else
+       if [ -n "$LOV" ]; then
+           TYPE=MDT0000
+       else
+           echo "Can't determine type for log '$FILE', skipping"
+           continue 
+       fi
+    fi
+    echo -n "Copying log '$FILE' to '${FSNAME}-${TYPE}'. Okay [y/n]?"
+    read OK
+    if [ "$OK" = "y" ]; then
+       mv $TMP/temp $TMP/${FSNAME}-${TYPE}
+    else
+       rm $TMP/temp
+    fi
+done
+
+echo ls -l $TMP
+ls -l $TMP
+
author	brian <brian>
	Sat, 19 Aug 2006 03:08:14 +0000 (03:08 +0000)
committer	brian <brian>
	Sat, 19 Aug 2006 03:08:14 +0000 (03:08 +0000)
lustre/scripts/.cvsignore		patch \| blob \| history
lustre/scripts/Makefile.am		patch \| blob \| history
lustre/scripts/lc_cluman.sh.in	[new file with mode: 0644]	patch \| blob
lustre/scripts/lc_common.sh	[new file with mode: 0644]	patch \| blob
lustre/scripts/lc_hb.sh.in	[new file with mode: 0644]	patch \| blob
lustre/scripts/lc_lvm.sh.in	[new file with mode: 0644]	patch \| blob
lustre/scripts/lc_md.sh.in	[new file with mode: 0644]	patch \| blob
lustre/scripts/lc_modprobe.sh.in	[new file with mode: 0644]	patch \| blob
lustre/scripts/lc_mon.sh	[new file with mode: 0755]	patch \| blob
lustre/scripts/lc_net.sh.in	[new file with mode: 0644]	patch \| blob
lustre/scripts/lc_servip.sh	[new file with mode: 0755]	patch \| blob
lustre/scripts/lmc2csv.pl	[new file with mode: 0644]	patch \| blob
lustre/scripts/lustre		patch \| blob \| history
lustre/scripts/lustre_config.sh.in	[new file with mode: 0644]	patch \| blob
lustre/scripts/lustre_createcsv.sh.in	[new file with mode: 0644]	patch \| blob
lustre/scripts/lustre_up14.sh	[new file with mode: 0755]	patch \| blob