Whamcloud - gitweb
b=9853
authoryujian <yujian>
Mon, 29 May 2006 13:56:09 +0000 (13:56 +0000)
committeryujian <yujian>
Mon, 29 May 2006 13:56:09 +0000 (13:56 +0000)
1) use ssh instead of pdsh
2) modify to support monitoring multiple services in one failover group
3) create service symlinks from /etc/init.d/lustre for Lustre targets

lustre/utils/cluster_scripts/gen_clumanager_config.sh

index 8469f7d..0cce008 100755 (executable)
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# gen_clumanager_config.sh - script for generating the Red Hat's Cluster Manager
+# gen_clumanager_config.sh - script for generating the Red Hat Cluster Manager
 #                           HA software's configuration files
 #
 ################################################################################
@@ -9,70 +9,80 @@
 usage() {
        cat >&2 <<EOF
 
-Usage:  `basename $0` <-n hostnames> <-d target device> <-s service addresses> 
-                     [-c heartbeat channels] [-o heartbeat options] [-v]
+Usage:  `basename $0` <-n hostnames> <-s service addresses> 
+                     [-c heartbeat channel] [-o heartbeat options] [-v]
+                     <-d target device> [-d target device...]
 
        -n hostnames            the nodenames of the primary node and its fail-
                                overs
                                Multiple nodenames are separated by colon (:)
                                delimeter. The first one is the nodename of the 
                                primary node, the others are failover nodenames.
-       -d target device        the target device name and type
-                               The name and type are separated by colon (:)
-                               delimeter. The type values are: mgs, mdt, ost or
-                               mgs_mdt.
        -s service addresses    the IP addresses to failover
                                Multiple addresses are separated by colon (:)
                                delimeter.
-       -c heartbeat channels   the methods to send/rcv heartbeats on
+       -c heartbeat channel    the method to send/rcv heartbeats on
                                The default method is multicast, and multicast_
                                ipaddress is "225.0.0.11".
        -o heartbeat options    a "catchall" for other heartbeat configuration 
                                options
+                               Multiple options are separated by colon (:)
+                               delimeter.
        -v                      verbose mode
+       -d target device        the target device name and mount point
+                               The device name and mount point are separated by
+                               colon (:) delimeter. 
 
 EOF
        exit 1
 }
 
-# Global variables
-SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"./"}
-SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}$"verify_serviceIP.sh"
+#****************************** Global variables ******************************#
+# Scripts to be called
+SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"."}
+SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}/verify_serviceIP.sh
+
+# Remote command
+REMOTE=${REMOTE:-"ssh -x -q"}
+
+# Lustre utilities path
+CMD_PATH=${CMD_PATH:-"/usr/sbin"}
+TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"}
+
+# CluManager tools
+CLUMAN_TOOLS_PATH=${CLUMAN_TOOLS_PATH:-"/usr/sbin"}
+CONFIG_CMD=${CONFIG_CMD:-"${CLUMAN_TOOLS_PATH}/redhat-config-cluster-cmd"}
 
-LUSTRE_SRV_SCRIPT=$"/etc/rc.d/init.d/lustre"   # service script for lustre
+# Configuration directory
+CLUMAN_DIR="/etc"                      # CluManager configuration directory
 
-TMP_DIR=$"/tmp/clumanager/"            # temporary directory
-CLUMGR_DIR=$"/etc/"                    # CluManager configuration directory
+# Service directory and name
+INIT_DIR=${INIT_DIR:-"/etc/init.d"}
+LUSTRE_SRV=${LUSTRE_SRV:-"${INIT_DIR}/lustre"} # service script for lustre
 
-CONFIG_CMD=$"redhat-config-cluster-cmd"
+TMP_DIR="/tmp/clumanager"              # temporary directory
 
 declare -a NODE_NAMES                  # node names in the failover group
 declare -a SRV_IPADDRS                 # service IP addresses
 
+# Lustre target device names, service names and mount points
+declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
+declare -i TARGET_NUM=0                        # number of targets
+
 # Get and check the positional parameters
-while getopts "n:d:s:c:o:v" OPTION; do
+VERBOSE_OUTPUT=false
+while getopts "n:s:c:o:vd:" OPTION; do
        case $OPTION in
         n)
                HOSTNAME_OPT=$OPTARG 
-               HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
-               if [ ${HOSTNAME_NUM} -lt 2 ]; then
-                       echo >&2 $"`basename $0`: Lack failover nodenames!"
-                       usage
-               fi
-               ;;
-        d)
-               DEVICE_OPT=$OPTARG 
-               TARGET_DEV=`echo ${DEVICE_OPT} | awk -F":" '{print $1}'`
-               TARGET_TYPE=`echo ${DEVICE_OPT} | awk -F":" '{print $2}'`
-               if [ -z "${TARGET_TYPE}" ]; then
-                       echo >&2 $"`basename $0`: Lack target device type!"
+               PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
+               if [ -z "${PRIM_NODENAME}" ]; then
+                       echo >&2 $"`basename $0`: Missing primary nodename!"
                        usage
                fi
-               if [ "${TARGET_TYPE}" != "mgs" ]&&[ "${TARGET_TYPE}" != "mdt" ]\
-               &&[ "${TARGET_TYPE}" != "ost" ]&&[ "${TARGET_TYPE}" != "mgs_mdt" ]
-               then
-                       echo >&2 $"`basename $0`: Invalid target device type" \
-                                 "- ${TARGET_TYPE}!"
+               HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
+               if [ ${HOSTNAME_NUM} -lt 2 ]; then
+                       echo >&2 $"`basename $0`: Missing failover nodenames!"
                        usage
                fi
                ;;
@@ -96,8 +106,24 @@ while getopts "n:d:s:c:o:v" OPTION; do
                HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'`
                ;;
        v) 
-               VERBOSE_OPT=$"yes"
+               VERBOSE_OUTPUT=true
+               ;;
+        d)
+               DEVICE_OPT=$OPTARG 
+               TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
+               TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
+               if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
+                       echo >&2 $"`basename $0`: Missing target device name!"
+                       usage
+               fi
+               if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
+                       echo >&2 $"`basename $0`: Missing mount point for target"\
+                                 "${TARGET_DEVNAMES[TARGET_NUM]}!"
+                       usage
+               fi
+               TARGET_NUM=$(( TARGET_NUM + 1 ))
                ;;
+
         ?) 
                usage 
        esac
@@ -105,23 +131,23 @@ done
 
 # Check the required parameters
 if [ -z "${HOSTNAME_OPT}" ]; then
-       echo >&2 $"`basename $0`: Lack -n option!"
+       echo >&2 $"`basename $0`: Missing -n option!"
        usage
 fi
 
-if [ -z "${DEVICE_OPT}" ]; then
-       echo >&2 $"`basename $0`: Lack -d option!"
+if [ -z "${SRVADDR_OPT}" ]; then
+       echo >&2 $"`basename $0`: Missing -s option!"
        usage
 fi
 
-if [ -z "${SRVADDR_OPT}" ]; then
-       echo >&2 $"`basename $0`: Lack -s option!"
+if [ -z "${DEVICE_OPT}" ]; then
+       echo >&2 $"`basename $0`: Missing -d option!"
        usage
 fi
 
 # Output verbose informations
 verbose_output() {
-       if [ "${VERBOSE_OPT}" = "yes" ]; then
+       if ${VERBOSE_OUTPUT}; then
                echo "`basename $0`: $*"
        fi
        return 0
@@ -131,8 +157,6 @@ verbose_output() {
 #
 # Get all the node names in this failover group
 get_nodenames() {
-       PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
-
        declare -i idx
        local nodename_str nodename
 
@@ -184,22 +208,93 @@ get_check_srvIPaddrs() {
 
 # stop_clumanager
 #
-# Run pdsh command to stop each node's clumanager service
+# Run remote command to stop each node's clumanager service
 stop_clumanager() {
        declare -i idx
-       local nodename_str=${PRIM_NODENAME}
+       local ret_str
 
-       for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do
-               nodename_str=${nodename_str}$","${NODE_NAMES[idx]}
+       for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+               ret_str=`${REMOTE} ${NODE_NAMES[idx]} \
+                       "/sbin/service clumanager stop" 2>&1`
+               if [ $? -ne 0 ]; then
+                       echo >&2 "`basename $0`: stop_clumanager() error:"\
+                                "from host ${NODE_NAMES[idx]} - $ret_str!"
+               fi
        done
 
-       ${PDSH} -w ${nodename_str} /sbin/service clumanager stop
+       return 0
+}
+
+# get_srvname hostname target_devname
+#
+# Get the lustre target server name from the node @hostname
+get_srvname() {
+       local host_name=$1
+       local target_devname=$2
+       local target_srvname=
+       local ret_str
+
+       # Execute remote command to get the target server name
+       ret_str=`${REMOTE} ${host_name} \
+               "${TUNEFS} --print ${target_devname} | grep Target:" 2>&1`
        if [ $? -ne 0 ]; then
-               echo >&2 "`basename $0`: stop_clumanager() error:"\
-                        "Fail to execute pdsh command!"
+               echo "`basename $0`: get_srvname() error:" \
+                    "from host ${host_name} - ${ret_str}"
+               return 1
+       fi
+
+       if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
+               ret_str=${ret_str#*Target: }
+               target_srvname=`echo ${ret_str} | awk '{print $1}'`
+       fi
+       
+       if [ -z "${target_srvname}" ]; then
+               echo "`basename $0`: get_srvname() error: Cannot get the"\
+                    "server name of target ${target_devname} in ${host_name}!"
                return 1
        fi
 
+       echo ${target_srvname}
+       return 0
+} 
+
+# create_service
+#
+# Create service symlinks from /etc/init.d/lustre for Lustre targets
+create_service() {
+       declare -i i
+       local srv_dir
+       local command ret_str
+
+       # Initialize the TARGET_SRVNAMES array
+       unset TARGET_SRVNAMES
+
+       # Get Lustre target service names
+       for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+               TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
+                                    ${TARGET_DEVNAMES[i]})
+               if [ $? -ne 0 ]; then
+                       echo >&2 "${TARGET_SRVNAMES[i]}"
+                       return 1
+               fi
+       done
+
+       # Construct remote command
+       command=":"
+       for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do
+               command=${command}";ln -s -f ${LUSTRE_SRV} ${INIT_DIR}/${TARGET_SRVNAMES[i]}"
+       done
+
+       # Execute remote command to create symlinks
+       for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do
+               ret_str=`${REMOTE} ${NODE_NAMES[i]} "${command}" 2>&1`
+               if [ $? -ne 0 ]; then
+                       echo >&2 "`basename $0`: create_service() error:" \
+                                "from host ${NODE_NAMES[i]} - ${ret_str}"
+                       return 1
+               fi
+       done
+
        return 0
 }
 
@@ -208,25 +303,69 @@ stop_clumanager() {
 # Check the return value of redhat-config-cluster-cmd
 check_retval() {
        if [ $1 -ne 0 ]; then
-               echo >&2 "`basename $0`: Fail to run ${CONFIG_CMD}!"
+               echo >&2 "`basename $0`: Failed to run ${CONFIG_CMD}!"
                return 1
        fi
 
        return 0
 }
 
+# add_services
+#
+# Add service tags into the cluster.xml file
+add_services() {
+       declare -i idx
+       declare -i i
+
+       # Add service tag
+       for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do
+               ${CONFIG_CMD} --add_service --name=${TARGET_SRVNAMES[i]}
+               if ! check_retval $?; then
+                       return 1
+               fi
+
+               ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
+                             --userscript=${INIT_DIR}/${TARGET_SRVNAMES[i]}
+               if ! check_retval $?; then
+                       return 1
+               fi
+
+               for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
+                       ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
+                       --add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]}
+                       if ! check_retval $?; then
+                               return 1
+                       fi
+               done
+
+               ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
+                             --device=${TARGET_DEVNAMES[i]} \
+                             --mount \
+                             --mountpoint=${TARGET_MNTPNTS[i]} \
+                             --fstype=lustre
+               if ! check_retval $?; then
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
 # gen_cluster_xml
 #
 # Run redhat-config-cluster-cmd to create the cluster.xml file
 gen_cluster_xml() {
        declare -i idx
+       declare -i i
        local mcast_IPaddr
+       local node_names
        local hbopt_str hbopt
 
        # Run redhat-config-cluster-cmd to generate cluster.xml
        # Add clumembd tag
        if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*broadcast*}" ]; then
                ${CONFIG_CMD} --clumembd --broadcast=yes
+               ${CONFIG_CMD} --clumembd --multicast=no
                if ! check_retval $?; then
                        return 1
                fi
@@ -242,59 +381,29 @@ gen_cluster_xml() {
        fi
 
        # Add cluster tag
-       ${CONFIG_CMD} --cluster --name='${TARGET_TYPE} failover group'
-       if ! check_retval $?; then
-               return 1
-       fi
-
-       # Add member tag
+       node_names=
        for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
-               ${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]}
-               if ! check_retval $?; then
-                       return 1
-               fi
+               node_names=${node_names}"${NODE_NAMES[idx]} "
        done
 
-       # Add failoverdomain tag
-       ${CONFIG_CMD} --add_failoverdomain --name=${TARGET_TYPE}-domain
+       ${CONFIG_CMD} --cluster --name="${node_names}failover group"
        if ! check_retval $?; then
                return 1
        fi
 
+       # Add member tag
        for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
-               ${CONFIG_CMD} --failoverdomain=${TARGET_TYPE}-domain\
-                       --add_failoverdomainnode --name=${NODE_NAMES[idx]}
+               ${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]}
                if ! check_retval $?; then
                        return 1
                fi
        done
 
        # Add service tag
-       ${CONFIG_CMD} --add_service --name=${TARGET_TYPE}-service
-       if ! check_retval $?; then
-               return 1
-       fi
-
-       ${CONFIG_CMD} --service=${TARGET_TYPE}-service \
-               --userscript=${LUSTRE_SRV_SCRIPT}
-       if ! check_retval $?; then
-               return 1
-       fi
-
-       ${CONFIG_CMD} --service=${TARGET_TYPE}-service \
-               --failoverdomain=${TARGET_TYPE}-domain
-       if ! check_retval $?; then
+       if ! add_services; then
                return 1
        fi
 
-       for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
-               ${CONFIG_CMD} --service=mgs-service \
-                       --add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]}
-               if ! check_retval $?; then
-                       return 1
-               fi
-       done
-
        # Add other tags
        if [ -n "${HBOPT_OPT}"]; then
                hbopt_str=`echo ${HBOPT_OPT}|awk '{split($HBOPT_OPT, a, ":")}\
@@ -317,32 +426,54 @@ gen_cluster_xml() {
 #
 # Create the cluster.xml file and scp it to the each node's /etc/
 create_config() {
-       CONFIG_PRIMNODE=${TMP_DIR}$"cluster.xml."${PRIM_NODENAME}
+       CONFIG_PRIMNODE=${TMP_DIR}$"/cluster.xml."${PRIM_NODENAME}
        declare -i idx
 
-       if [ -e ${CONFIG_PRIMNODE} ]; then
-               verbose_output "${CONFIG_PRIMNODE} already exists."
-               return 0
-       fi
-
-       # Run redhat-config-cluster-cmd to generate cluster.xml
-       verbose_output "Creating cluster.xml file for" \
-                      "${PRIM_NODENAME} failover group hosts..."
-       if ! gen_cluster_xml; then
+       # Create symlinks for Lustre services
+       verbose_output "Creating symlinks for lustre target services in"\
+               "${PRIM_NODENAME} failover group hosts..." 
+       if ! create_service; then
                return 1
        fi
        verbose_output "OK"
 
-       /bin/cp -f ${CLUMGR_DIR}cluster.xml ${CONFIG_PRIMNODE}
+       if [ -s ${CONFIG_PRIMNODE} ]; then
+               if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${CONFIG_PRIMNODE}`" ]
+               then
+                       verbose_output "${CONFIG_PRIMNODE} already exists."
+                       return 0
+               else
+                       /bin/cp -f ${CONFIG_PRIMNODE} ${CLUMAN_DIR}/cluster.xml 
+
+                       # Add services into the cluster.xml file
+                       if ! add_services; then
+                               return 1
+                       fi
+               fi
+       else
+               # Run redhat-config-cluster-cmd to generate cluster.xml
+               verbose_output "Creating cluster.xml file for" \
+                              "${PRIM_NODENAME} failover group hosts..."
+               if ! gen_cluster_xml; then
+                       return 1
+               fi
+               verbose_output "OK"
+       fi
+
+       /bin/cp -f ${CLUMAN_DIR}/cluster.xml ${CONFIG_PRIMNODE}
 
        # scp the cluster.xml file to all the nodes
        verbose_output "Remote copying cluster.xml file to" \
                       "${PRIM_NODENAME} failover group hosts..."
        for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
-               touch ${TMP_DIR}$"cluster.xml."${NODE_NAMES[idx]}
-               scp ${CONFIG_PRIMNODE} ${NODE_NAMES[idx]}:${CLUMGR_DIR}cluster.xml
+               if [ "${PRIM_NODENAME}" != "${NODE_NAMES[idx]}" ]; then
+                       /bin/cp -f ${CONFIG_PRIMNODE} \
+                       ${TMP_DIR}$"/cluster.xml."${NODE_NAMES[idx]}
+               fi
+
+               scp ${CONFIG_PRIMNODE} ${NODE_NAMES[idx]}:${CLUMAN_DIR}/cluster.xml
                if [ $? -ne 0 ]; then
-                       echo >&2 "`basename $0`: Fail to scp cluster.xml file"\
+                       echo >&2 "`basename $0`: Failed to scp cluster.xml file"\
                                 "to node ${NODE_NAMES[idx]}!"
                        return 1
                fi