From 7bd737657d9f956d7683c7bd82c7d7abc7ef2fa0 Mon Sep 17 00:00:00 2001 From: yujian Date: Thu, 9 Feb 2006 10:45:20 +0000 Subject: [PATCH] A script for generating the Heartbeat HA software's configuration files. --- lustre/utils/gen_hb_config.sh | 623 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 623 insertions(+) create mode 100755 lustre/utils/gen_hb_config.sh diff --git a/lustre/utils/gen_hb_config.sh b/lustre/utils/gen_hb_config.sh new file mode 100755 index 0000000..0177e14 --- /dev/null +++ b/lustre/utils/gen_hb_config.sh @@ -0,0 +1,623 @@ +#!/bin/bash +# +# gen_hb_config.sh - script for generating the Heartbeat HA software's +# configuration files +# +############################################################################### + +# Usage +usage() { + cat >&2 < <-n hostnames> <-d target device> + <-c heartbeat channels> <-s service address> + [-o heartbeat options] [-v] + + -r HBver the version of Heartbeat software + The Heartbeat software versions which are curr- + ently supported are: hbv1 (Heartbeat version 1) + and hbv2 (Heartbeat version 2). + -n hostnames the nodenames of the primary node and its fail- + overs + Multiple nodenames are separated by colon (:) + delimeter. The first one is the nodename of the + primary node, the others are failover nodenames. + -d target device the target device name and type + The name and type are separated by colon (:) + delimeter. The type values are: mgs, mdt, ost or + mgs_mdt. + -c heartbeat channels the methods and devices to send/rcv heartbeats on + -s service address the IP address to failover + -o heartbeat options a "catchall" for other heartbeat configuration + options + -v verbose mode + Causes `basename $0` to print debugging messages + about its progress. +EOF + exit 1 +} + +# Global variables +SCRIPT_PATH=$"./" +SCRIPT_VERIFY_SRVIP=${SCRIPT_PATH}$"verify_serviceIP.sh" + +LUSTRE_SRV_SCRIPT=$"lustre" # service script for lustre +MON_SRV_SCRIPT=$"mon" # service script for mon +LUSTRE_MON_SCRIPT=$"simple.health_check.monitor" +LUSTRE_ALERT_SCRIPT=$"fail_lustre.alert" +CIB_GEN_SCRIPT=$"/usr/lib/heartbeat/cts/haresources2cib.py" + +TMP_DIR=$"/tmp/heartbeat/" # temporary directory +HACF_TEMP=${TMP_DIR}$"ha.cf.temp" +AUTHKEYS_TEMP=${TMP_DIR}$"authkeys.temp" +MONCF_TEMP=${TMP_DIR}$"mon.cf.temp" + +HA_DIR=$"/etc/ha.d/" # Heartbeat configuration directory +MON_DIR=$"/etc/mon/" # mon configuration directory +CIB_DIR=$"/var/lib/heartbeat/crm/" # cib.xml directory + +HBVER_HBV1=$"hbv1" # Heartbeat version 1 +HBVER_HBV2=$"hbv2" # Heartbeat version 2 + +declare -a NODE_NAMES # node names in the failover group + +# Get and check the positional parameters +while getopts "r:n:d:c:s:o:v" OPTION; do + case $OPTION in + r) + HBVER_OPT=$OPTARG + if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \ + && [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then + echo >&2 $"`basename $0`: Invalid Heartbeat software" \ + "version - ${HBVER_OPT}!" + usage + fi + ;; + n) + HOSTNAME_OPT=$OPTARG + HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'` + if [ ${HOSTNAME_NUM} -lt 2 ]; then + echo >&2 $"`basename $0`: Lack failover nodenames!" + usage + fi + ;; + d) + DEVICE_OPT=$OPTARG + TARGET_DEV=`echo ${DEVICE_OPT} | awk -F":" '{print $1}'` + TARGET_TYPE=`echo ${DEVICE_OPT} | awk -F":" '{print $2}'` + if [ -z "${TARGET_TYPE}" ]; then + echo >&2 $"`basename $0`: Lack target device type!" + usage + fi + if [ "${TARGET_TYPE}" != "mgs" ]&&[ "${TARGET_TYPE}" != "mdt" ]\ + &&[ "${TARGET_TYPE}" != "ost" ]&&[ "${TARGET_TYPE}" != "mgs_mdt" ] + then + echo >&2 $"`basename $0`: Invalid target device type" \ + "- ${TARGET_TYPE}!" + usage + fi + ;; + c) + HBCHANNEL_OPT=$OPTARG + HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \ + | sed 's/"$//'` + if [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*serial*}" ] \ + && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*bcast*}" ] \ + && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*ucast*}" ] \ + && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*mcast*}" ]; then + echo >&2 $"`basename $0`: Invalid Heartbeat channel" \ + "- ${HBCHANNEL_OPT}!" + usage + fi + ;; + s) + SRVADDR_OPT=$OPTARG + ;; + o) + HBOPT_OPT=$OPTARG + HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'` + ;; + v) + VERBOSE_OPT=$"yes" + ;; + ?) + usage + esac +done + +# Check the required parameters +if [ -z "${HBVER_OPT}" ]; then + echo >&2 $"`basename $0`: Lack -r option!" + usage +fi + +if [ -z "${HOSTNAME_OPT}" ]; then + echo >&2 $"`basename $0`: Lack -n option!" + usage +fi + +if [ -z "${DEVICE_OPT}" ]; then + echo >&2 $"`basename $0`: Lack -d option!" + usage +fi + +if [ -z "${HBCHANNEL_OPT}" ]; then + echo >&2 $"`basename $0`: Lack -c option!" + usage +fi + +if [ -z "${SRVADDR_OPT}" ]; then + echo >&2 $"`basename $0`: Lack -s option!" + usage +fi + +if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ]; then + echo >&2 $"`basename $0`: Heartbeat version 1 can only support 2 nodes!" + usage +fi + +# Output verbose informations +verbose_output() { + if [ "${VERBOSE_OPT}" = "yes" ]; then + echo "`basename $0`: $*" + fi + return 0 +} + +# Check service IP address +PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'` +verbose_output "Verifying service IP ${SRVADDR_OPT} and real IP" \ + "of host ${PRIM_NODENAME} are in the same subnet..." +if ! ${SCRIPT_VERIFY_SRVIP} ${SRVADDR_OPT} ${PRIM_NODENAME}; then + exit 1 +fi +verbose_output "OK" + +# get_nodenames +# +# Get all the node names in this failover group +get_nodenames() { + declare -i idx + local nodename_str nodename + + nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\ + END {for (i in a) print a[i]}'` + idx=0 + for nodename in ${nodename_str} + do + NODE_NAMES[idx]=${nodename} + idx=$idx+1 + done + + return 0 +} + +# stop_heartbeat +# +# Run pdsh command to stop each node's heartbeat service +stop_heartbeat() { + declare -i idx + local nodename_str=${PRIM_NODENAME} + + for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do + nodename_str=${nodename_str}$","${NODE_NAMES[idx]} + done + + pdsh -w ${nodename_str} /sbin/service heartbeat stop + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: stop_heartbeat() error:"\ + "Fail to execute pdsh command!" + return 1 + fi + + return 0 +} + +# create_template +# +# Create the templates for ha.cf, authkeys and mon.cf files +create_template() { + /bin/mkdir -p ${TMP_DIR} + + # Create the template for ha.cf + if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then + cat >${HACF_TEMP} <${HACF_TEMP} <${AUTHKEYS_TEMP} <${MONCF_TEMP} <&2 $"`basename $0`: Invalid UDP port" \ + "- ${port}!" + return 1 + fi + fi + + # Add the UDP port number into each failover node's udpport file + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + UDPPORT_NODE=${TMP_DIR}$"udpport."${NODE_NAMES[idx]} + echo ${port} > ${UDPPORT_NODE} + done + + echo ${port} + return 0 +} + +# create_hacf +# +# Create the ha.cf file and scp it to the primary node's /etc/ha.d/ +create_hacf() { + HACF_PRIMNODE=${TMP_DIR}$"ha.cf."${PRIM_NODENAME} + + declare -i idx + + if [ -s ${HACF_PRIMNODE} ]; then + # The ha.cf file for the primary node has already existed. + verbose_output "${HACF_PRIMNODE} already exists." + return 0 + fi + + /bin/cp -f ${HACF_TEMP} ${HACF_PRIMNODE} + + if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*bcast*}" ] \ + || [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*ucast*}" ]; then + UDPPORT_OPT=$(gen_udpport) + if [ $? -ne 0 ]; then + return 1 + fi + echo "udpport ${UDPPORT_OPT}" >> ${HACF_PRIMNODE} + fi + + if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*serial*}" ]; then + echo "baud 19200" >> ${HACF_PRIMNODE} + fi + + echo ${HBCHANNEL_OPT} | awk '{split($HBCHANNEL_OPT, a, ":")} \ + END {for (i in a) print a[i]}' >> ${HACF_PRIMNODE} + + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + echo "node ${NODE_NAMES[idx]}" >> ${HACF_PRIMNODE} + done + + echo ${HBOPT_OPT} | awk '{split($HBOPT_OPT, a, ":")} \ + END {for (i in a) print a[i]}' >> ${HACF_PRIMNODE} + + # scp ha.cf file to the primary node's /etc/ha.d/ + scp ${HACF_PRIMNODE} ${PRIM_NODENAME}:${HA_DIR}ha.cf + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: Fail to scp ha.cf file to" \ + "node ${PRIM_NODENAME}!" + return 1 + fi + + return 0 +} + +# add_resgrp +# +# Add the resource group line into the haresources file +add_resgrp() { + declare -i idx + + echo "${PRIM_NODENAME} ${SRVADDR_OPT} "\ + "${LUSTRE_SRV_SCRIPT}::${TARGET_TYPE}::${TARGET_DEV} "\ + "${MON_SRV_SCRIPT}" >> ${HARES_PRIMNODE} + + for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do + HARES_NODE=${TMP_DIR}$"haresources."${NODE_NAMES[idx]} + /bin/cp -f ${HARES_PRIMNODE} ${HARES_NODE} + done + + return 0 +} + +# create_haresources +# +# Create the haresources file and scp it to the each node's /etc/ha.d/ +create_haresources() { + HARES_PRIMNODE=${TMP_DIR}$"haresources."${PRIM_NODENAME} + declare -i idx + + if [ -s ${HARES_PRIMNODE} ]; then + # The haresources file for the primary node has already existed + verbose_output "${HARES_PRIMNODE} already exists." + + if [ -z "`grep ${SRVADDR_OPT} ${HARES_PRIMNODE}`" ]; then + # Service IP does not exist in the haresources file + # Add the resource group line into the haresources file + if ! add_resgrp; then + echo >&2 "`basename $0`: add_resgrp() error!" + return 1 + fi + fi + else # The haresources file for the primary node does not exist + # Add the resource group line into the haresources file + if ! add_resgrp; then + echo >&2 "`basename $0`: add_resgrp() error!" + return 1 + fi + + fi + + # Add the primary node name into all the nodes' node files + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + NODEFILE_NODE=${TMP_DIR}$"nodefile."${NODE_NAMES[idx]} + touch ${NODEFILE_NODE} + + if [ -z "`grep ${PRIM_NODENAME} ${NODEFILE_NODE}`" ]; then + echo ${PRIM_NODENAME} >> ${NODEFILE_NODE} + fi + done + + # Check whether all the nodes in the failover group are in the node file + # If they are, then we can scp the haresources file to all the nodes; + # Else return 0 + NODEFILE_PRIMNODE=${TMP_DIR}$"nodefile."${PRIM_NODENAME} + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + if [ -z "`grep ${NODE_NAMES[idx]} ${NODEFILE_PRIMNODE}`" ]; then + verbose_output "INFO: not have the information of node"\ + "${NODE_NAMES[idx]}. The haresources"\ + "file is incompleted." + return 0 + fi + done + + # All the nodes in the failover group are in the node file, then we can + # scp the haresources file + + # Generate the cib.xml file + if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then + CIB_PRIMNODE=${TMP_DIR}$"cib.xml."${PRIM_NODENAME} + python ${CIB_GEN_SCRIPT} ${HARES_PRIMNODE} > ${CIB_PRIMNODE} + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: Fail to generate cib.xml file"\ + "for node ${PRIM_NODENAME}!" + return 1 + fi + fi + + # scp the haresources file or cib.xml file + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then + scp ${CIB_PRIMNODE} ${NODE_NAMES[idx]}:${CIB_DIR}cib.xml + else + scp ${HARES_PRIMNODE} ${NODE_NAMES[idx]}:${HA_DIR}haresources + fi + + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: Fail to scp haresources file"\ + "to node ${NODE_NAMES[idx]}!" + return 1 + fi + done + + return 0 +} + +# create_authkeys +# +# Create the authkeys file and scp it to the each node's /etc/ha.d/ +create_authkeys() { + AUTHKEYS_PRIMNODE=${TMP_DIR}$"authkeys."${PRIM_NODENAME} + declare -i idx + + if [ -e ${AUTHKEYS_PRIMNODE} ]; then + verbose_output "${AUTHKEYS_PRIMNODE} already exists." + return 0 + fi + + # scp the authkeys file to all the nodes + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + touch ${TMP_DIR}$"authkeys."${NODE_NAMES[idx]} + scp ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}authkeys + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: Fail to scp authkeys file"\ + "to node ${NODE_NAMES[idx]}!" + return 1 + fi + done + + return 0 +} + +# create_moncf +# +# Create the mon.cf file and scp it to the each node's /etc/mon/ +create_moncf() { + MONCF_PRIMNODE=${TMP_DIR}$"mon.cf."${PRIM_NODENAME} + declare -i idx + local hostgroup_str=$"hostgroup ${TARGET_TYPE}-group" + + if [ -e ${MONCF_PRIMNODE} ]; then + verbose_output "${MONCF_PRIMNODE} already exists." + return 0 + fi + + /bin/cp -f ${MONCF_TEMP} ${MONCF_PRIMNODE} + + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + hostgroup_str=${hostgroup_str}$" "${NODE_NAMES[idx]} + done + + echo ${hostgroup_str} >> ${MONCF_PRIMNODE} + + cat >>${MONCF_PRIMNODE} <&2 "`basename $0`: Fail to scp mon.cf file"\ + "to node ${NODE_NAMES[idx]}!" + return 1 + fi + done + + return 0 +} + +# generate_config +# +# Generate the configuration files for Heartbeat and scp them to all the nodes +generate_config() { + if ! create_template; then + return 1 + fi + + verbose_output "Creating and remote copying ha.cf file to host" \ + "${PRIM_NODENAME}..." + if ! create_hacf; then + return 1 + fi + verbose_output "OK" + + if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then + verbose_output "Creating and remote copying haresources file"\ + "to ${PRIM_NODENAME} failover group hosts..." + else + verbose_output "Creating and remote copying cib.xml file"\ + "to ${PRIM_NODENAME} failover group hosts..." + fi + + if ! create_haresources; then + return 1 + fi + verbose_output "OK" + + verbose_output "Creating and remote copying authkeys file to" \ + "${PRIM_NODENAME} failover group hosts..." + if ! create_authkeys; then + return 1 + fi + verbose_output "OK" + + verbose_output "Creating and remote copying mon.cf file to" \ + "${PRIM_NODENAME} failover group hosts..." + if ! create_moncf; then + return 1 + fi + verbose_output "OK" + + return 0 +} + +# Main flow +# Get all the node names +if ! get_nodenames; then + exit 1 +fi + +# Stop heartbeat services +verbose_output "Stopping heartbeat service in the ${PRIM_NODENAME}"\ + "failover group hosts..." +if ! stop_heartbeat; then + exit 1 +fi +verbose_output "OK" + +# Generate configuration files +if ! generate_config; then + exit 1 +fi + +exit 0 -- 1.8.3.1