3 # vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
6 # lc_hb - script for generating the Heartbeat HA software's
9 ###############################################################################
15 Usage: `basename $0` <-r HBver> <-n hostnames> [-v]
16 <-d target device> [-d target device...]
18 -r HBver the version of Heartbeat software
19 The Heartbeat software versions which are curr-
20 ently supported are: hbv1 (Heartbeat version 1)
21 and hbv2 (Heartbeat version 2).
22 -n hostnames the nodenames of the primary node and its fail-
24 Multiple nodenames are separated by colon (:)
25 delimeter. The first one is the nodename of the
26 primary node, the others are failover nodenames.
28 -d target device the target device name and mount point
29 The device name and mount point are separated by
36 # Get the library of functions
37 . @scriptlibdir@/lc_common
39 #****************************** Global variables ******************************#
41 HB_TOOLS_PATH=${HB_TOOLS_PATH:-"/usr/lib64/heartbeat"} # Heartbeat tools path
42 CIB_GEN_SCRIPT=${HB_TOOLS_PATH}/haresources2cib.py
43 CL_STATUS=${CL_STATUS:-"/usr/bin/cl_status"}
45 # Service directories and names
46 HARES_DIR=${HARES_DIR:-"${HA_DIR}/resource.d"} # Heartbeat resources
47 LUSTRE_SRV=${LUSTRE_SRV:-"Filesystem"} # Service script provided by Heartbeat
49 TMP_DIR=${HB_TMP_DIR} # Temporary directory
50 HACF_TEMP=${TMP_DIR}/ha.cf.temp
51 AUTHKEYS_TEMP=${TMP_DIR}/authkeys${FILE_SUFFIX}
53 declare -a NODE_NAMES # Node names in the failover group
55 # Lustre target device names, service names and mount points
56 declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
57 declare -i TARGET_NUM=0 # Number of targets
60 # Get and check the positional parameters
62 while getopts "r:n:vd:" OPTION; do
66 if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \
67 && [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then
68 error_output "Invalid Heartbeat software" \
69 "version - ${HBVER_OPT}!"
75 PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
76 if [ -z "${PRIM_NODENAME}" ]; then
77 error_output "Missing primary nodename!"
80 HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
81 if [ ${HOSTNAME_NUM} -lt 2 ]; then
82 error_output "Missing failover nodenames!"
85 if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ]
87 error_output "Heartbeat version 1 can" \
88 "only support 2 nodes!"
97 TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
98 TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
99 if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
100 error_output "Missing target device name!"
103 if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
104 error_output "Missing mount point for target"\
105 "${TARGET_DEVNAMES[TARGET_NUM]}!"
108 TARGET_NUM=$(( TARGET_NUM + 1 ))
115 # Check the required parameters
116 if [ -z "${HBVER_OPT}" ]; then
117 error_output "Missing -r option!"
121 if [ -z "${HOSTNAME_OPT}" ]; then
122 error_output "Missing -n option!"
126 if [ -z "${DEVICE_OPT}" ]; then
127 error_output "Missing -d option!"
133 # Get all the node names in this failover group
136 local nodename_str nodename
138 nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
139 END {for (i in a) print a[i]}'`
141 for nodename in ${nodename_str}
143 NODE_NAMES[idx]=${nodename}
150 # check_remote_file host_name file
152 # Run remote command to check whether @file exists in @host_name
153 check_remote_file() {
157 if [ -z "${host_name}" ]; then
158 error_output "check_remote_file():"\
163 if [ -z "${file_name}" ]; then
164 error_output "check_remote_file():"\
169 # Execute remote command to check the file
170 ${REMOTE} ${host_name} "[ -e ${file_name} ]"
171 if [ $? -ne 0 ]; then
172 error_output "check_remote_file():"\
173 "${file_name} does not exist in host ${host_name}!"
180 # hb_running host_name
182 # Run remote command to check whether heartbeat service is running in @host_name
187 ret_str=`${REMOTE} ${host_name} "${CL_STATUS} hbstatus" 2>&1`
188 if [ $? -ne 0 ]; then
189 if [ "${ret_str}" = "${ret_str#*stop*}" ]; then
190 error_output "hb_running():"\
191 "remote command to ${host_name} error: ${ret_str}!"
201 # stop_heartbeat host_name
203 # Run remote command to stop heartbeat service running in @host_name
208 ret_str=$(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin
209 service heartbeat stop < /dev/null" 2>&1)
210 if [ $? -ne 0 ]; then
211 error_output "stop_heartbeat():"\
212 "remote command to ${host_name} error: ${ret_str}!"
216 echo "`basename $0`: Heartbeat service is stopped on node ${host_name}."
222 # Run remote command to check each node's heartbeat service
227 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
228 # Check Heartbeat configuration directory
229 if ! check_remote_file ${NODE_NAMES[idx]} ${HA_DIR}; then
230 error_output "check_heartbeat():"\
231 "Is Heartbeat package installed?"
235 if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
236 # Check mon configuration directory
237 if ! check_remote_file ${NODE_NAMES[idx]} ${MON_DIR}; then
238 error_output "check_heartbeat():"\
239 "Is mon package installed?"
244 if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
245 # Check crm directory
246 if ! check_remote_file ${NODE_NAMES[idx]} ${CIB_DIR}; then
247 error_output "check_heartbeat():"\
248 "Is Heartbeat v2 package installed?"
253 # Check heartbeat service status
254 hb_running ${NODE_NAMES[idx]}
256 if [ "$rc" -eq "2" ]; then
258 elif [ "$rc" -eq "1" ]; then
259 verbose_output "Heartbeat service is stopped on"\
260 "node ${NODE_NAMES[idx]}."
261 elif [ "$rc" -eq "0" ]; then
263 echo -n "`basename $0`: Heartbeat service is running on"\
264 "${NODE_NAMES[idx]}, go ahead to stop the service and"\
265 "generate new configurations? [y/n]:"
267 if [ "${OK}" = "n" ]; then
268 echo "`basename $0`: New Heartbeat configurations"\
273 # Stop heartbeat service
274 stop_heartbeat ${NODE_NAMES[idx]}
281 # get_srvname hostname target_devname
283 # Get the lustre target server name from the node @hostname
286 local target_devname=$2
287 local target_srvname=
290 # Execute remote command to get the target server name
291 ret_str=$(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin
292 ${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1)
293 if [ $? -ne 0 ]; then
294 echo "`basename $0`: get_srvname() error:" \
295 "from host ${host_name} - ${ret_str}"
299 if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
300 ret_str=${ret_str#*Target: }
301 target_srvname=`echo ${ret_str} | awk '{print $1}'`
304 if [ -z "${target_srvname}" ]; then
305 echo "`basename $0`: get_srvname() error: Cannot get the"\
306 "server name of target ${target_devname} in ${host_name}!"
310 echo ${target_srvname}
316 # Get server names of all the Lustre targets in this failover group
320 # Initialize the TARGET_SRVNAMES array
321 unset TARGET_SRVNAMES
323 # Get Lustre target service names
324 for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
325 TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
326 ${TARGET_DEVNAMES[i]})
327 if [ $? -ne 0 ]; then
328 error_output "${TARGET_SRVNAMES[i]}"
338 # Create the templates for ha.cf and authkeys files
340 /bin/mkdir -p ${TMP_DIR}
342 # Create the template for ha.cf
343 if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
344 cat >${HACF_TEMP} <<EOF
345 debugfile /var/log/ha-debug
346 logfile /var/log/ha-log
355 elif [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
356 cat >${HACF_TEMP} <<EOF
367 # Create the template for authkeys
368 if [ ! -s ${AUTHKEYS_TEMP} ]; then
369 cat >${AUTHKEYS_TEMP} <<EOF
380 # Create the ha.cf file and scp it to each node's /etc/ha.d/
382 HACF_PRIMNODE=${TMP_DIR}$"/ha.cf."${PRIM_NODENAME}
383 HACF_LUSTRE=${TMP_DIR}$"/ha.cf"${FILE_SUFFIX}
387 if [ -e ${HACF_PRIMNODE} ]; then
388 # The ha.cf file for the primary node has already existed.
389 verbose_output "${HACF_PRIMNODE} already exists."
393 /bin/cp -f ${HACF_TEMP} ${HACF_LUSTRE}
395 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
396 echo "node ${NODE_NAMES[idx]}" >> ${HACF_LUSTRE}
399 # scp ha.cf file to all the nodes
400 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
401 touch ${TMP_DIR}$"/ha.cf."${NODE_NAMES[idx]}
402 scp ${HACF_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
403 if [ $? -ne 0 ]; then
404 error_output "Failed to scp ha.cf file"\
405 "to node ${NODE_NAMES[idx]}!"
415 # Create the haresources file and scp it to the each node's /etc/ha.d/
416 create_haresources() {
417 HARES_PRIMNODE=${TMP_DIR}$"/haresources."${PRIM_NODENAME}
418 HARES_LUSTRE=${TMP_DIR}$"/haresources"${FILE_SUFFIX}
422 if [ -s ${HARES_PRIMNODE} ]; then
423 # The haresources file for the primary node has already existed
424 if [ -n "`/bin/grep ${TARGET_DEVNAMES[0]} ${HARES_PRIMNODE}`" ]; then
425 verbose_output "${HARES_PRIMNODE} already exists."
430 # Add the resource group line into the haresources file
431 res_line=${PRIM_NODENAME}
432 for ((idx = 0; idx < ${#TARGET_DEVNAMES[@]}; idx++)); do
433 res_line=${res_line}" "${LUSTRE_SRV}::${TARGET_DEVNAMES[idx]}::${TARGET_MNTPNTS[idx]}::${FS_TYPE}
435 if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
436 res_line=${res_line}" "${TARGET_SRVNAMES[idx]}"-mon"
439 echo "${res_line}" >> ${HARES_LUSTRE}
441 # Generate the cib.xml file
442 if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
443 # Add group haclient and user hacluster
444 [ -z "`grep haclient /etc/group`" ] && groupadd haclient
445 [ -z "`grep hacluster /etc/passwd`" ] && useradd -g haclient hacluster
447 CIB_LUSTRE=${TMP_DIR}$"/cib.xml"${FILE_SUFFIX}
448 python ${CIB_GEN_SCRIPT} --stdout \
449 ${HARES_LUSTRE} > ${CIB_LUSTRE}
450 if [ $? -ne 0 ]; then
451 error_output "Failed to generate cib.xml file"\
452 "for node ${PRIM_NODENAME}!"
457 # scp the haresources file or cib.xml file
458 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
459 /bin/cp -f ${HARES_LUSTRE} ${TMP_DIR}$"/haresources."${NODE_NAMES[idx]}
460 scp ${HARES_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
461 if [ $? -ne 0 ]; then
462 error_output "Failed to scp haresources file"\
463 "to node ${NODE_NAMES[idx]}!"
467 if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
468 scp ${CIB_LUSTRE} ${NODE_NAMES[idx]}:${CIB_DIR}/
469 if [ $? -ne 0 ]; then
470 error_output "Failed to scp cib.xml"\
471 "file to node ${NODE_NAMES[idx]}!"
482 # Create the authkeys file and scp it to the each node's /etc/ha.d/
484 AUTHKEYS_PRIMNODE=${TMP_DIR}$"/authkeys."${PRIM_NODENAME}
487 if [ -e ${AUTHKEYS_PRIMNODE} ]; then
488 verbose_output "${AUTHKEYS_PRIMNODE} already exists."
492 # scp the authkeys file to all the nodes
493 chmod 600 ${AUTHKEYS_TEMP}
494 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
495 touch ${TMP_DIR}$"/authkeys."${NODE_NAMES[idx]}
496 scp -p ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}/
497 if [ $? -ne 0 ]; then
498 error_output "Failed to scp authkeys file"\
499 "to node ${NODE_NAMES[idx]}!"
509 # Create the mon.cf file and scp it to the each node's /etc/mon/
511 MONCF_PRIMNODE=${TMP_DIR}$"/mon.cf."${PRIM_NODENAME}
512 MONCF_LUSTRE=${TMP_DIR}$"/mon.cf"${FILE_SUFFIX}
513 local srv_name params=
515 declare -a OLD_TARGET_SRVNAMES # targets in other nodes
516 # in this failover group
517 # Initialize the OLD_TARGET_SRVNAMES array
518 unset OLD_TARGET_SRVNAMES
520 if [ -s ${MONCF_PRIMNODE} ]; then
521 if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${MONCF_PRIMNODE}`" ]
523 verbose_output "${MONCF_PRIMNODE} already exists."
526 # Get the Lustre target service names
527 # from the previous mon.cf file
529 for srv_name in `grep hostgroup ${MONCF_PRIMNODE}\
530 |awk '$2 ~ /-mon/ {print $2}'|xargs`
532 OLD_TARGET_SRVNAMES[idx]=`echo ${srv_name}\
539 # Construct the parameters to mon.cf generation script
540 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
541 params=${params}" -n "${NODE_NAMES[idx]}
544 for ((idx = 0; idx < ${#OLD_TARGET_SRVNAMES[@]}; idx++)); do
545 params=${params}" -o "${OLD_TARGET_SRVNAMES[idx]}
548 for ((idx = 0; idx < ${#TARGET_SRVNAMES[@]}; idx++)); do
549 params=${params}" -o "${TARGET_SRVNAMES[idx]}
552 ${SCRIPT_GEN_MONCF} ${params}
553 if [ $? -ne 0 ]; then
554 error_output "Failed to generate mon.cf file"\
555 "by using ${SCRIPT_GEN_MONCF}!"
559 /bin/mv *-mon.cfg ${MONCF_LUSTRE}
561 # scp the mon.cf file to all the nodes
562 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
563 /bin/cp -f ${MONCF_LUSTRE} ${TMP_DIR}$"/mon.cf."${NODE_NAMES[idx]}
565 scp ${MONCF_LUSTRE} ${NODE_NAMES[idx]}:${MON_DIR}/
566 if [ $? -ne 0 ]; then
567 error_output "Failed to scp mon.cf file"\
568 "to node ${NODE_NAMES[idx]}!"
578 # Generate the configuration files for Heartbeat and scp them to all the nodes
580 if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
581 # Get server names of Lustre targets
582 if ! get_srvnames; then
587 if ! create_template; then
591 verbose_output "Creating and remote copying ha.cf${FILE_SUFFIX} file to"\
592 "${PRIM_NODENAME} failover group hosts..."
593 if ! create_hacf; then
598 verbose_output "Creating and remote copying haresources${FILE_SUFFIX} file"\
599 "to ${PRIM_NODENAME} failover group hosts..."
600 if ! create_haresources; then
605 verbose_output "Creating and remote copying authkeys${FILE_SUFFIX} file to" \
606 "${PRIM_NODENAME} failover group hosts..."
607 if ! create_authkeys; then
612 if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
613 verbose_output "Creating and remote copying mon.cf${FILE_SUFFIX} file to" \
614 "${PRIM_NODENAME} failover group hosts..."
615 if ! create_moncf; then
625 # Get all the node names
626 if ! get_nodenames; then
630 # Check heartbeat services
631 verbose_output "Checking heartbeat service in the ${PRIM_NODENAME}"\
632 "failover group hosts..."
635 if [ "$rc" -eq "2" ]; then
638 elif [ "$rc" -eq "1" ]; then
643 # Generate configuration files
644 if ! generate_config; then