#!/bin/bash # # lc_hb - script for generating the Heartbeat HA software's # configuration files # ############################################################################### # Usage usage() { cat >&2 < <-n hostnames> [-v] <-d target device> [-d target device...] -r HBver the version of Heartbeat software The Heartbeat software versions which are curr- ently supported are: hbv1 (Heartbeat version 1) and hbv2 (Heartbeat version 2). -n hostnames the nodenames of the primary node and its fail- overs Multiple nodenames are separated by colon (:) delimeter. The first one is the nodename of the primary node, the others are failover nodenames. -v verbose mode -d target device the target device name and mount point The device name and mount point are separated by colon (:) delimeter. EOF exit 1 } # Get the library of functions . @scriptlibdir@/lc_common #****************************** Global variables ******************************# # Heartbeat tools HB_TOOLS_PATH=${HB_TOOLS_PATH:-"/usr/lib64/heartbeat"} # Heartbeat tools path CIB_GEN_SCRIPT=${HB_TOOLS_PATH}/haresources2cib.py CL_STATUS=${CL_STATUS:-"/usr/bin/cl_status"} # Service directories and names HARES_DIR=${HARES_DIR:-"${HA_DIR}/resource.d"} # Heartbeat resources LUSTRE_SRV=${LUSTRE_SRV:-"Filesystem"} # Service script provided by Heartbeat TMP_DIR=${HB_TMP_DIR} # Temporary directory HACF_TEMP=${TMP_DIR}/ha.cf.temp AUTHKEYS_TEMP=${TMP_DIR}/authkeys${FILE_SUFFIX} declare -a NODE_NAMES # Node names in the failover group # Lustre target device names, service names and mount points declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS declare -i TARGET_NUM=0 # Number of targets # Get and check the positional parameters VERBOSE_OUTPUT=false while getopts "r:n:vd:" OPTION; do case $OPTION in r) HBVER_OPT=$OPTARG if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \ && [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then echo >&2 $"`basename $0`: Invalid Heartbeat software" \ "version - ${HBVER_OPT}!" usage fi ;; n) HOSTNAME_OPT=$OPTARG PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'` if [ -z "${PRIM_NODENAME}" ]; then echo >&2 $"`basename $0`: Missing primary nodename!" usage fi HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'` if [ ${HOSTNAME_NUM} -lt 2 ]; then echo >&2 $"`basename $0`: Missing failover nodenames!" usage fi if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ] then echo >&2 $"`basename $0`: Heartbeat version 1 can" \ "only support 2 nodes!" usage fi ;; v) VERBOSE_OUTPUT=true ;; d) DEVICE_OPT=$OPTARG TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'` TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'` if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then echo >&2 $"`basename $0`: Missing target device name!" usage fi if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then echo >&2 $"`basename $0`: Missing mount point for target"\ "${TARGET_DEVNAMES[TARGET_NUM]}!" usage fi TARGET_NUM=$(( TARGET_NUM + 1 )) ;; ?) usage esac done # Check the required parameters if [ -z "${HBVER_OPT}" ]; then echo >&2 $"`basename $0`: Missing -r option!" usage fi if [ -z "${HOSTNAME_OPT}" ]; then echo >&2 $"`basename $0`: Missing -n option!" usage fi if [ -z "${DEVICE_OPT}" ]; then echo >&2 $"`basename $0`: Missing -d option!" usage fi # get_nodenames # # Get all the node names in this failover group get_nodenames() { declare -i idx local nodename_str nodename nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\ END {for (i in a) print a[i]}'` idx=0 for nodename in ${nodename_str} do NODE_NAMES[idx]=${nodename} idx=$idx+1 done return 0 } # check_remote_file host_name file # # Run remote command to check whether @file exists in @host_name check_remote_file() { local host_name=$1 local file_name=$2 if [ -z "${host_name}" ]; then echo >&2 "`basename $0`: check_remote_file() error:"\ "Missing hostname!" return 1 fi if [ -z "${file_name}" ]; then echo >&2 "`basename $0`: check_remote_file() error:"\ "Missing file name!" return 1 fi # Execute remote command to check the file ${REMOTE} ${host_name} "[ -e ${file_name} ]" if [ $? -ne 0 ]; then echo >&2 "`basename $0`: check_remote_file() error:"\ "${file_name} does not exist in host ${host_name}!" return 1 fi return 0 } # hb_running host_name # # Run remote command to check whether heartbeat service is running in @host_name hb_running() { local host_name=$1 local ret_str ret_str=`${REMOTE} ${host_name} "${CL_STATUS} hbstatus" 2>&1` if [ $? -ne 0 ]; then if [ "${ret_str}" = "${ret_str#*stop*}" ]; then echo >&2 "`basename $0`: hb_running() error:"\ "remote command to ${host_name} error: ${ret_str}!" return 2 else return 1 fi fi return 0 } # stop_heartbeat host_name # # Run remote command to stop heartbeat service running in @host_name stop_heartbeat() { local host_name=$1 local ret_str ret_str=`${REMOTE} ${host_name} "/sbin/service heartbeat stop" 2>&1` if [ $? -ne 0 ]; then echo >&2 "`basename $0`: stop_heartbeat() error:"\ "remote command to ${host_name} error: ${ret_str}!" return 1 fi echo "`basename $0`: Heartbeat service is stopped on node ${host_name}." return 0 } # check_heartbeat # # Run remote command to check each node's heartbeat service check_heartbeat() { declare -i idx local OK for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do # Check Heartbeat configuration directory if ! check_remote_file ${NODE_NAMES[idx]} ${HA_DIR}; then echo >&2 "`basename $0`: check_heartbeat() error:"\ "Is Heartbeat package installed?" return 1 fi if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then # Check mon configuration directory if ! check_remote_file ${NODE_NAMES[idx]} ${MON_DIR}; then echo >&2 "`basename $0`: check_heartbeat()"\ "error: Is mon package installed?" return 1 fi fi if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then # Check crm directory if ! check_remote_file ${NODE_NAMES[idx]} ${CIB_DIR}; then echo >&2 "`basename $0`: check_heartbeat()"\ "error: Is Heartbeat v2 package installed?" return 1 fi fi # Check heartbeat service status hb_running ${NODE_NAMES[idx]} rc=$? if [ "$rc" -eq "2" ]; then return 1 elif [ "$rc" -eq "1" ]; then verbose_output "Heartbeat service is stopped on"\ "node ${NODE_NAMES[idx]}." elif [ "$rc" -eq "0" ]; then OK= echo -n "`basename $0`: Heartbeat service is running on"\ "${NODE_NAMES[idx]}, go ahead to stop the service and"\ "generate new configurations? [y/n]:" read OK if [ "${OK}" = "n" ]; then echo "`basename $0`: New Heartbeat configurations"\ "are not generated." return 2 fi # Stop heartbeat service stop_heartbeat ${NODE_NAMES[idx]} fi done return 0 } # get_srvname hostname target_devname # # Get the lustre target server name from the node @hostname get_srvname() { local host_name=$1 local target_devname=$2 local target_srvname= local ret_str # Execute remote command to get the target server name ret_str=`${REMOTE} ${host_name} \ "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1` if [ $? -ne 0 ]; then echo "`basename $0`: get_srvname() error:" \ "from host ${host_name} - ${ret_str}" return 1 fi if [ "${ret_str}" != "${ret_str#*Target: }" ]; then ret_str=${ret_str#*Target: } target_srvname=`echo ${ret_str} | awk '{print $1}'` fi if [ -z "${target_srvname}" ]; then echo "`basename $0`: get_srvname() error: Cannot get the"\ "server name of target ${target_devname} in ${host_name}!" return 1 fi echo ${target_srvname} return 0 } # get_srvnames # # Get server names of all the Lustre targets in this failover group get_srvnames() { declare -i i # Initialize the TARGET_SRVNAMES array unset TARGET_SRVNAMES # Get Lustre target service names for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \ ${TARGET_DEVNAMES[i]}) if [ $? -ne 0 ]; then echo >&2 "${TARGET_SRVNAMES[i]}" return 1 fi done return 0 } # create_template # # Create the templates for ha.cf and authkeys files create_template() { /bin/mkdir -p ${TMP_DIR} # Create the template for ha.cf if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then cat >${HACF_TEMP} <${HACF_TEMP} <${AUTHKEYS_TEMP} <> ${HACF_LUSTRE} done # scp ha.cf file to all the nodes for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do touch ${TMP_DIR}$"/ha.cf."${NODE_NAMES[idx]} scp ${HACF_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/ if [ $? -ne 0 ]; then echo >&2 "`basename $0`: Failed to scp ha.cf file"\ "to node ${NODE_NAMES[idx]}!" return 1 fi done return 0 } # create_haresources # # Create the haresources file and scp it to the each node's /etc/ha.d/ create_haresources() { HARES_PRIMNODE=${TMP_DIR}$"/haresources."${PRIM_NODENAME} HARES_LUSTRE=${TMP_DIR}$"/haresources"${FILE_SUFFIX} declare -i idx local res_line if [ -s ${HARES_PRIMNODE} ]; then # The haresources file for the primary node has already existed if [ -n "`/bin/grep ${TARGET_DEVNAMES[0]} ${HARES_PRIMNODE}`" ]; then verbose_output "${HARES_PRIMNODE} already exists." return 0 fi fi # Add the resource group line into the haresources file res_line=${PRIM_NODENAME} for ((idx = 0; idx < ${#TARGET_DEVNAMES[@]}; idx++)); do res_line=${res_line}" "${LUSTRE_SRV}::${TARGET_DEVNAMES[idx]}::${TARGET_MNTPNTS[idx]}::${FS_TYPE} if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then res_line=${res_line}" "${TARGET_SRVNAMES[idx]}"-mon" fi done echo "${res_line}" >> ${HARES_LUSTRE} # Generate the cib.xml file if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then # Add group haclient and user hacluster [ -z "`grep haclient /etc/group`" ] && groupadd haclient [ -z "`grep hacluster /etc/passwd`" ] && useradd -g haclient hacluster CIB_LUSTRE=${TMP_DIR}$"/cib.xml"${FILE_SUFFIX} python ${CIB_GEN_SCRIPT} --stdout \ ${HARES_LUSTRE} > ${CIB_LUSTRE} if [ $? -ne 0 ]; then echo >&2 "`basename $0`: Failed to generate cib.xml file"\ "for node ${PRIM_NODENAME}!" return 1 fi fi # scp the haresources file or cib.xml file for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do /bin/cp -f ${HARES_LUSTRE} ${TMP_DIR}$"/haresources."${NODE_NAMES[idx]} scp ${HARES_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/ if [ $? -ne 0 ]; then echo >&2 "`basename $0`: Failed to scp haresources file"\ "to node ${NODE_NAMES[idx]}!" return 1 fi if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then scp ${CIB_LUSTRE} ${NODE_NAMES[idx]}:${CIB_DIR}/ if [ $? -ne 0 ]; then echo >&2 "`basename $0`: Failed to scp cib.xml"\ "file to node ${NODE_NAMES[idx]}!" return 1 fi fi done return 0 } # create_authkeys # # Create the authkeys file and scp it to the each node's /etc/ha.d/ create_authkeys() { AUTHKEYS_PRIMNODE=${TMP_DIR}$"/authkeys."${PRIM_NODENAME} declare -i idx if [ -e ${AUTHKEYS_PRIMNODE} ]; then verbose_output "${AUTHKEYS_PRIMNODE} already exists." return 0 fi # scp the authkeys file to all the nodes chmod 600 ${AUTHKEYS_TEMP} for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do touch ${TMP_DIR}$"/authkeys."${NODE_NAMES[idx]} scp -p ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}/ if [ $? -ne 0 ]; then echo >&2 "`basename $0`: Failed to scp authkeys file"\ "to node ${NODE_NAMES[idx]}!" return 1 fi done return 0 } # create_moncf # # Create the mon.cf file and scp it to the each node's /etc/mon/ create_moncf() { MONCF_PRIMNODE=${TMP_DIR}$"/mon.cf."${PRIM_NODENAME} MONCF_LUSTRE=${TMP_DIR}$"/mon.cf"${FILE_SUFFIX} local srv_name params= declare -i idx declare -a OLD_TARGET_SRVNAMES # targets in other nodes # in this failover group # Initialize the OLD_TARGET_SRVNAMES array unset OLD_TARGET_SRVNAMES if [ -s ${MONCF_PRIMNODE} ]; then if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${MONCF_PRIMNODE}`" ] then verbose_output "${MONCF_PRIMNODE} already exists." return 0 else # Get the Lustre target service names # from the previous mon.cf file idx=0 for srv_name in `grep hostgroup ${MONCF_PRIMNODE}\ |awk '$2 ~ /-mon/ {print $2}'|xargs` do OLD_TARGET_SRVNAMES[idx]=`echo ${srv_name}\ |sed 's/-mon//g'` idx=$(( idx + 1 )) done fi fi # Construct the parameters to mon.cf generation script for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do params=${params}" -n "${NODE_NAMES[idx]} done for ((idx = 0; idx < ${#OLD_TARGET_SRVNAMES[@]}; idx++)); do params=${params}" -o "${OLD_TARGET_SRVNAMES[idx]} done for ((idx = 0; idx < ${#TARGET_SRVNAMES[@]}; idx++)); do params=${params}" -o "${TARGET_SRVNAMES[idx]} done ${SCRIPT_GEN_MONCF} ${params} if [ $? -ne 0 ]; then echo >&2 "`basename $0`: Failed to generate mon.cf file"\ "by using ${SCRIPT_GEN_MONCF}!" return 1 fi /bin/mv *-mon.cfg ${MONCF_LUSTRE} # scp the mon.cf file to all the nodes for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do /bin/cp -f ${MONCF_LUSTRE} ${TMP_DIR}$"/mon.cf."${NODE_NAMES[idx]} scp ${MONCF_LUSTRE} ${NODE_NAMES[idx]}:${MON_DIR}/ if [ $? -ne 0 ]; then echo >&2 "`basename $0`: Failed to scp mon.cf file"\ "to node ${NODE_NAMES[idx]}!" return 1 fi done return 0 } # generate_config # # Generate the configuration files for Heartbeat and scp them to all the nodes generate_config() { if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then # Get server names of Lustre targets if ! get_srvnames; then return 1 fi fi if ! create_template; then return 1 fi verbose_output "Creating and remote copying ha.cf${FILE_SUFFIX} file to"\ "${PRIM_NODENAME} failover group hosts..." if ! create_hacf; then return 1 fi verbose_output "OK" verbose_output "Creating and remote copying haresources${FILE_SUFFIX} file"\ "to ${PRIM_NODENAME} failover group hosts..." if ! create_haresources; then return 1 fi verbose_output "OK" verbose_output "Creating and remote copying authkeys${FILE_SUFFIX} file to" \ "${PRIM_NODENAME} failover group hosts..." if ! create_authkeys; then return 1 fi verbose_output "OK" if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then verbose_output "Creating and remote copying mon.cf${FILE_SUFFIX} file to" \ "${PRIM_NODENAME} failover group hosts..." if ! create_moncf; then return 1 fi verbose_output "OK" fi return 0 } # Main flow # Get all the node names if ! get_nodenames; then exit 1 fi # Check heartbeat services verbose_output "Checking heartbeat service in the ${PRIM_NODENAME}"\ "failover group hosts..." check_heartbeat rc=$? if [ "$rc" -eq "2" ]; then verbose_output "OK" exit 0 elif [ "$rc" -eq "1" ]; then exit 1 fi verbose_output "OK" # Generate configuration files if ! generate_config; then exit 1 fi exit 0