#!/bin/bash # # lc_cluman - script for generating the Red Hat Cluster Manager # HA software's configuration files # ################################################################################ # Usage usage() { cat >&2 < [-s service addresses] [-c heartbeat channel] [-o heartbeat options] [-v] <-d target device> [-d target device...] -n hostnames the nodenames of the primary node and its fail- overs Multiple nodenames are separated by colon (:) delimeter. The first one is the nodename of the primary node, the others are failover nodenames. -s service addresses the IP addresses to failover Multiple addresses are separated by colon (:) delimeter. -c heartbeat channel the method to send/rcv heartbeats on The default method is multicast, and multicast_ ipaddress is "225.0.0.11". -o heartbeat options a "catchall" for other heartbeat configuration options Multiple options are separated by colon (:) delimeter. -v verbose mode -d target device the target device name and mount point The device name and mount point are separated by colon (:) delimeter. EOF exit 1 } # Get the library of functions . @scriptlibdir@/lc_common #****************************** Global variables ******************************# TMP_DIR=${CLUMGR_TMP_DIR} # Temporary directory declare -a NODE_NAMES # Node names in the failover group declare -a SRV_IPADDRS # Service IP addresses # Lustre target device names, service names and mount points declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS declare -i TARGET_NUM=0 # Number of targets # Get and check the positional parameters VERBOSE_OUTPUT=false while getopts "n:s:c:o:vd:" OPTION; do case $OPTION in n) HOSTNAME_OPT=$OPTARG PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'` if [ -z "${PRIM_NODENAME}" ]; then echo >&2 $"`basename $0`: Missing primary nodename!" usage fi HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'` if [ ${HOSTNAME_NUM} -lt 2 ]; then echo >&2 $"`basename $0`: Missing failover nodenames!" usage fi ;; s) SRVADDR_OPT=$OPTARG ;; c) HBCHANNEL_OPT=$OPTARG HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \ | sed 's/"$//'` if [ -n "${HBCHANNEL_OPT}" ] \ && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*broadcast*}" ] \ && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*multicast*}" ]; then echo >&2 $"`basename $0`: Invalid Heartbeat channel" \ "- ${HBCHANNEL_OPT}!" usage fi ;; o) HBOPT_OPT=$OPTARG HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'` ;; v) VERBOSE_OUTPUT=true ;; d) DEVICE_OPT=$OPTARG TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'` TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'` if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then echo >&2 $"`basename $0`: Missing target device name!" usage fi if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then echo >&2 $"`basename $0`: Missing mount point for target"\ "${TARGET_DEVNAMES[TARGET_NUM]}!" usage fi TARGET_NUM=$(( TARGET_NUM + 1 )) ;; ?) usage esac done # Check the required parameters if [ -z "${HOSTNAME_OPT}" ]; then echo >&2 $"`basename $0`: Missing -n option!" usage fi if [ -z "${DEVICE_OPT}" ]; then echo >&2 $"`basename $0`: Missing -d option!" usage fi # get_nodenames # # Get all the node names in this failover group get_nodenames() { declare -i idx local nodename_str nodename nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\ END {for (i in a) print a[i]}'` idx=0 for nodename in ${nodename_str} do NODE_NAMES[idx]=${nodename} idx=$idx+1 done return 0 } # get_check_srvIPaddrs # # Get and check all the service IP addresses in this failover group get_check_srvIPaddrs() { declare -i idx declare -i i local srvIPaddr_str srvIPaddr srvIPaddr_str=`echo ${SRVADDR_OPT}|awk '{split($SRVADDR_OPT, a, ":")}\ END {for (i in a) print a[i]}'` idx=0 for srvIPaddr in ${srvIPaddr_str} do SRV_IPADDRS[idx]=${srvIPaddr} idx=$idx+1 done for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do # Check service IP address verbose_output "Verifying service IP ${SRV_IPADDRS[idx]} and" \ "real IP of host ${NODE_NAMES[i]} are in the" \ "same subnet..." if ! ${SCRIPT_VERIFY_SRVIP} ${SRV_IPADDRS[idx]} ${NODE_NAMES[i]} then return 1 fi verbose_output "OK" done done return 0 } # cluman_running host_name # # Run remote command to check whether clumanager service is running in @host_name cluman_running() { local host_name=$1 local ret_str ret_str=`${REMOTE} ${host_name} "/sbin/service clumanager status" 2>&1` if [ $? -ne 0 ]; then if [ "${ret_str}" != "${ret_str#*unrecognized*}" ]; then echo >&2 "`basename $0`: cluman_running() error:"\ "remote command to ${host_name} error: ${ret_str}!" return 2 else return 1 fi fi return 0 } # stop_cluman host_name # # Run remote command to stop clumanager service running in @host_name stop_cluman() { local host_name=$1 local ret_str ret_str=`${REMOTE} ${host_name} "/sbin/service clumanager stop" 2>&1` if [ $? -ne 0 ]; then echo >&2 "`basename $0`: stop_cluman() error:"\ "remote command to ${host_name} error: ${ret_str}!" return 1 fi echo "`basename $0`: Clumanager service is stopped on node ${host_name}." return 0 } # check_cluman # # Run remote command to check each node's clumanager service check_cluman() { declare -i idx local OK # Get and check all the service IP addresses if [ -n "${SRVADDR_OPT}" ] && ! get_check_srvIPaddrs; then return 1 fi for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do # Check clumanager service status cluman_running ${NODE_NAMES[idx]} rc=$? if [ "$rc" -eq "2" ]; then return 1 elif [ "$rc" -eq "1" ]; then verbose_output "Clumanager service is stopped on"\ "node ${NODE_NAMES[idx]}." elif [ "$rc" -eq "0" ]; then OK= echo -n "`basename $0`: Clumanager service is running on"\ "${NODE_NAMES[idx]}, go ahead to stop the service and"\ "generate new configurations? [y/n]:" read OK if [ "${OK}" = "n" ]; then echo "`basename $0`: New Clumanager configurations"\ "are not generated." return 2 fi # Stop clumanager service stop_cluman ${NODE_NAMES[idx]} fi done return 0 } # get_srvname hostname target_devname # # Get the lustre target server name from the node @hostname get_srvname() { local host_name=$1 local target_devname=$2 local target_srvname= local ret_str # Execute remote command to get the target server name ret_str=`${REMOTE} ${host_name} \ "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1` if [ $? -ne 0 ]; then echo "`basename $0`: get_srvname() error:" \ "from host ${host_name} - ${ret_str}" return 1 fi if [ "${ret_str}" != "${ret_str#*Target: }" ]; then ret_str=${ret_str#*Target: } target_srvname=`echo ${ret_str} | awk '{print $1}'` fi if [ -z "${target_srvname}" ]; then echo "`basename $0`: get_srvname() error: Cannot get the"\ "server name of target ${target_devname} in ${host_name}!" return 1 fi echo ${target_srvname} return 0 } # get_srvnames # # Get server names of all the Lustre targets in this failover group get_srvnames() { declare -i i # Initialize the TARGET_SRVNAMES array unset TARGET_SRVNAMES # Get Lustre target service names for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \ ${TARGET_DEVNAMES[i]}) if [ $? -ne 0 ]; then echo >&2 "${TARGET_SRVNAMES[i]}" return 1 fi done return 0 } # check_retval retval # # Check the return value of redhat-config-cluster-cmd check_retval() { if [ $1 -ne 0 ]; then echo >&2 "`basename $0`: Failed to run ${CONFIG_CMD}!" return 1 fi return 0 } # add_services # # Add service tags into the cluster.xml file add_services() { declare -i idx declare -i i # Add service tag for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do ${CONFIG_CMD} --add_service --name=${TARGET_SRVNAMES[i]} if ! check_retval $?; then return 1 fi for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \ --add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]} if ! check_retval $?; then return 1 fi done ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \ --add_device \ --name=${TARGET_DEVNAMES[i]} if ! check_retval $?; then return 1 fi ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \ --device=${TARGET_DEVNAMES[i]} \ --mount \ --mountpoint=${TARGET_MNTPNTS[i]} \ --fstype=lustre if ! check_retval $?; then return 1 fi done return 0 } # gen_cluster_xml # # Run redhat-config-cluster-cmd to create the cluster.xml file gen_cluster_xml() { declare -i idx declare -i i local mcast_IPaddr local node_names local hbopt [ -e "${CLUMAN_DIR}/cluster.xml" ] && \ /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old # Run redhat-config-cluster-cmd to generate cluster.xml # Add clumembd tag if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*broadcast*}" ]; then ${CONFIG_CMD} --clumembd --broadcast=yes ${CONFIG_CMD} --clumembd --multicast=no if ! check_retval $?; then return 1 fi elif [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*multicast*}" ]; then mcast_IPaddr=`echo ${HBCHANNEL_OPT} | awk '{print $2}'` if [ -n "${mcast_IPaddr}" ]; then ${CONFIG_CMD} --clumembd --multicast=yes\ --multicast_ipaddress=${mcast_IPaddr} if ! check_retval $?; then return 1 fi fi fi # Add cluster tag node_names= for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do node_names=${node_names}"${NODE_NAMES[idx]} " done ${CONFIG_CMD} --cluster --name="${node_names}failover group" if ! check_retval $?; then return 1 fi # Add member tag for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do ${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]} if ! check_retval $?; then return 1 fi done # Add service tag if ! add_services; then return 1 fi # Add other tags if [ -n "${HBOPT_OPT}" ]; then while read -r hbopt do ${CONFIG_CMD} ${hbopt} if ! check_retval $?; then return 1 fi done < <(echo ${HBOPT_OPT}|awk '{split($HBOPT_OPT, a, ":")}\ END {for (i in a) print a[i]}') fi return 0 } # create_config # # Create the cluster.xml file and scp it to the each node's /etc/ create_config() { declare -i idx /bin/mkdir -p ${TMP_DIR} CONFIG_PRIMNODE=${TMP_DIR}$"/cluster.xml."${PRIM_NODENAME} CONFIG_LUSTRE=${TMP_DIR}$"/cluster.xml"${FILE_SUFFIX} # Get server names of Lustre targets if ! get_srvnames; then return 1 fi if [ -s ${CONFIG_PRIMNODE} ]; then if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${CONFIG_PRIMNODE}`" ] then verbose_output "${CONFIG_PRIMNODE} already exists." return 0 else [ -e "${CLUMAN_DIR}/cluster.xml" ] && \ /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old /bin/cp -f ${CONFIG_PRIMNODE} ${CLUMAN_DIR}/cluster.xml # Add services into the cluster.xml file if ! add_services; then return 1 fi fi else # Run redhat-config-cluster-cmd to generate cluster.xml verbose_output "Creating cluster.xml file for" \ "${PRIM_NODENAME} failover group hosts..." if ! gen_cluster_xml; then return 1 fi verbose_output "OK" fi /bin/mv ${CLUMAN_DIR}/cluster.xml ${CONFIG_LUSTRE} [ -e "${CLUMAN_DIR}/cluster.xml.old" ] && \ /bin/mv ${CLUMAN_DIR}/cluster.xml.old ${CLUMAN_DIR}/cluster.xml # scp the cluster.xml file to all the nodes verbose_output "Remote copying cluster.xml${FILE_SUFFIX} file to" \ "${PRIM_NODENAME} failover group hosts..." for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do /bin/cp -f ${CONFIG_LUSTRE} ${TMP_DIR}$"/cluster.xml."${NODE_NAMES[idx]} scp ${CONFIG_LUSTRE} ${NODE_NAMES[idx]}:${CLUMAN_DIR}/ if [ $? -ne 0 ]; then echo >&2 "`basename $0`: Failed to scp cluster.xml file"\ "to node ${NODE_NAMES[idx]}!" return 1 fi done verbose_output "OK" return 0 } # Main flow # Get all the node names if ! get_nodenames; then exit 1 fi # Check clumanager services verbose_output "Checking clumanager service in the ${PRIM_NODENAME}"\ "failover group hosts..." check_cluman rc=$? if [ "$rc" -eq "2" ]; then verbose_output "OK" exit 0 elif [ "$rc" -eq "1" ]; then exit 1 fi verbose_output "OK" # Generate configuration files if ! create_config; then exit 1 fi exit 0