3 # lc_hb - script for generating the Heartbeat HA software's
6 ###############################################################################
12 Usage: `basename $0` <-r HBver> <-n hostnames> [-v]
13 <-d target device> [-d target device...]
15 -r HBver the version of Heartbeat software
16 The Heartbeat software versions which are curr-
17 ently supported are: hbv1 (Heartbeat version 1)
18 and hbv2 (Heartbeat version 2).
19 -n hostnames the nodenames of the primary node and its fail-
21 Multiple nodenames are separated by colon (:)
22 delimeter. The first one is the nodename of the
23 primary node, the others are failover nodenames.
25 -d target device the target device name and mount point
26 The device name and mount point are separated by
33 # Get the library of functions
34 . @scriptlibdir@/lc_common
36 #****************************** Global variables ******************************#
38 HB_TOOLS_PATH=${HB_TOOLS_PATH:-"/usr/lib64/heartbeat"} # Heartbeat tools path
39 CIB_GEN_SCRIPT=${HB_TOOLS_PATH}/haresources2cib.py
40 CL_STATUS=${CL_STATUS:-"/usr/bin/cl_status"}
42 # Service directories and names
43 HARES_DIR=${HARES_DIR:-"${HA_DIR}/resource.d"} # Heartbeat resources
44 LUSTRE_SRV=${LUSTRE_SRV:-"Filesystem"} # Service script provided by Heartbeat
46 TMP_DIR=${HB_TMP_DIR} # Temporary directory
47 HACF_TEMP=${TMP_DIR}/ha.cf.temp
48 AUTHKEYS_TEMP=${TMP_DIR}/authkeys${FILE_SUFFIX}
50 declare -a NODE_NAMES # Node names in the failover group
52 # Lustre target device names, service names and mount points
53 declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
54 declare -i TARGET_NUM=0 # Number of targets
57 # Get and check the positional parameters
59 while getopts "r:n:vd:" OPTION; do
63 if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \
64 && [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then
65 echo >&2 $"`basename $0`: Invalid Heartbeat software" \
66 "version - ${HBVER_OPT}!"
72 PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
73 if [ -z "${PRIM_NODENAME}" ]; then
74 echo >&2 $"`basename $0`: Missing primary nodename!"
77 HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
78 if [ ${HOSTNAME_NUM} -lt 2 ]; then
79 echo >&2 $"`basename $0`: Missing failover nodenames!"
82 if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ]
84 echo >&2 $"`basename $0`: Heartbeat version 1 can" \
85 "only support 2 nodes!"
94 TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
95 TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
96 if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
97 echo >&2 $"`basename $0`: Missing target device name!"
100 if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
101 echo >&2 $"`basename $0`: Missing mount point for target"\
102 "${TARGET_DEVNAMES[TARGET_NUM]}!"
105 TARGET_NUM=$(( TARGET_NUM + 1 ))
112 # Check the required parameters
113 if [ -z "${HBVER_OPT}" ]; then
114 echo >&2 $"`basename $0`: Missing -r option!"
118 if [ -z "${HOSTNAME_OPT}" ]; then
119 echo >&2 $"`basename $0`: Missing -n option!"
123 if [ -z "${DEVICE_OPT}" ]; then
124 echo >&2 $"`basename $0`: Missing -d option!"
130 # Get all the node names in this failover group
133 local nodename_str nodename
135 nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
136 END {for (i in a) print a[i]}'`
138 for nodename in ${nodename_str}
140 NODE_NAMES[idx]=${nodename}
147 # check_remote_file host_name file
149 # Run remote command to check whether @file exists in @host_name
150 check_remote_file() {
154 if [ -z "${host_name}" ]; then
155 echo >&2 "`basename $0`: check_remote_file() error:"\
160 if [ -z "${file_name}" ]; then
161 echo >&2 "`basename $0`: check_remote_file() error:"\
166 # Execute remote command to check the file
167 ${REMOTE} ${host_name} "[ -e ${file_name} ]"
168 if [ $? -ne 0 ]; then
169 echo >&2 "`basename $0`: check_remote_file() error:"\
170 "${file_name} does not exist in host ${host_name}!"
177 # hb_running host_name
179 # Run remote command to check whether heartbeat service is running in @host_name
184 ret_str=`${REMOTE} ${host_name} "${CL_STATUS} hbstatus" 2>&1`
185 if [ $? -ne 0 ]; then
186 if [ "${ret_str}" = "${ret_str#*stop*}" ]; then
187 echo >&2 "`basename $0`: hb_running() error:"\
188 "remote command to ${host_name} error: ${ret_str}!"
198 # stop_heartbeat host_name
200 # Run remote command to stop heartbeat service running in @host_name
205 ret_str=`${REMOTE} ${host_name} "/sbin/service heartbeat stop" 2>&1`
206 if [ $? -ne 0 ]; then
207 echo >&2 "`basename $0`: stop_heartbeat() error:"\
208 "remote command to ${host_name} error: ${ret_str}!"
212 echo "`basename $0`: Heartbeat service is stopped on node ${host_name}."
218 # Run remote command to check each node's heartbeat service
223 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
224 # Check Heartbeat configuration directory
225 if ! check_remote_file ${NODE_NAMES[idx]} ${HA_DIR}; then
226 echo >&2 "`basename $0`: check_heartbeat() error:"\
227 "Is Heartbeat package installed?"
231 if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
232 # Check mon configuration directory
233 if ! check_remote_file ${NODE_NAMES[idx]} ${MON_DIR}; then
234 echo >&2 "`basename $0`: check_heartbeat()"\
235 "error: Is mon package installed?"
240 if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
241 # Check crm directory
242 if ! check_remote_file ${NODE_NAMES[idx]} ${CIB_DIR}; then
243 echo >&2 "`basename $0`: check_heartbeat()"\
244 "error: Is Heartbeat v2 package installed?"
249 # Check heartbeat service status
250 hb_running ${NODE_NAMES[idx]}
252 if [ "$rc" -eq "2" ]; then
254 elif [ "$rc" -eq "1" ]; then
255 verbose_output "Heartbeat service is stopped on"\
256 "node ${NODE_NAMES[idx]}."
257 elif [ "$rc" -eq "0" ]; then
259 echo -n "`basename $0`: Heartbeat service is running on"\
260 "${NODE_NAMES[idx]}, go ahead to stop the service and"\
261 "generate new configurations? [y/n]:"
263 if [ "${OK}" = "n" ]; then
264 echo "`basename $0`: New Heartbeat configurations"\
269 # Stop heartbeat service
270 stop_heartbeat ${NODE_NAMES[idx]}
277 # get_srvname hostname target_devname
279 # Get the lustre target server name from the node @hostname
282 local target_devname=$2
283 local target_srvname=
286 # Execute remote command to get the target server name
287 ret_str=`${REMOTE} ${host_name} \
288 "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1`
289 if [ $? -ne 0 ]; then
290 echo "`basename $0`: get_srvname() error:" \
291 "from host ${host_name} - ${ret_str}"
295 if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
296 ret_str=${ret_str#*Target: }
297 target_srvname=`echo ${ret_str} | awk '{print $1}'`
300 if [ -z "${target_srvname}" ]; then
301 echo "`basename $0`: get_srvname() error: Cannot get the"\
302 "server name of target ${target_devname} in ${host_name}!"
306 echo ${target_srvname}
312 # Get server names of all the Lustre targets in this failover group
316 # Initialize the TARGET_SRVNAMES array
317 unset TARGET_SRVNAMES
319 # Get Lustre target service names
320 for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
321 TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
322 ${TARGET_DEVNAMES[i]})
323 if [ $? -ne 0 ]; then
324 echo >&2 "${TARGET_SRVNAMES[i]}"
334 # Create the templates for ha.cf and authkeys files
336 /bin/mkdir -p ${TMP_DIR}
338 # Create the template for ha.cf
339 if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
340 cat >${HACF_TEMP} <<EOF
341 debugfile /var/log/ha-debug
342 logfile /var/log/ha-log
351 elif [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
352 cat >${HACF_TEMP} <<EOF
363 # Create the template for authkeys
364 if [ ! -s ${AUTHKEYS_TEMP} ]; then
365 cat >${AUTHKEYS_TEMP} <<EOF
376 # Create the ha.cf file and scp it to each node's /etc/ha.d/
378 HACF_PRIMNODE=${TMP_DIR}$"/ha.cf."${PRIM_NODENAME}
379 HACF_LUSTRE=${TMP_DIR}$"/ha.cf"${FILE_SUFFIX}
383 if [ -e ${HACF_PRIMNODE} ]; then
384 # The ha.cf file for the primary node has already existed.
385 verbose_output "${HACF_PRIMNODE} already exists."
389 /bin/cp -f ${HACF_TEMP} ${HACF_LUSTRE}
391 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
392 echo "node ${NODE_NAMES[idx]}" >> ${HACF_LUSTRE}
395 # scp ha.cf file to all the nodes
396 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
397 touch ${TMP_DIR}$"/ha.cf."${NODE_NAMES[idx]}
398 scp ${HACF_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
399 if [ $? -ne 0 ]; then
400 echo >&2 "`basename $0`: Failed to scp ha.cf file"\
401 "to node ${NODE_NAMES[idx]}!"
411 # Create the haresources file and scp it to the each node's /etc/ha.d/
412 create_haresources() {
413 HARES_PRIMNODE=${TMP_DIR}$"/haresources."${PRIM_NODENAME}
414 HARES_LUSTRE=${TMP_DIR}$"/haresources"${FILE_SUFFIX}
418 if [ -s ${HARES_PRIMNODE} ]; then
419 # The haresources file for the primary node has already existed
420 if [ -n "`/bin/grep ${TARGET_DEVNAMES[0]} ${HARES_PRIMNODE}`" ]; then
421 verbose_output "${HARES_PRIMNODE} already exists."
426 # Add the resource group line into the haresources file
427 res_line=${PRIM_NODENAME}
428 for ((idx = 0; idx < ${#TARGET_DEVNAMES[@]}; idx++)); do
429 res_line=${res_line}" "${LUSTRE_SRV}::${TARGET_DEVNAMES[idx]}::${TARGET_MNTPNTS[idx]}::${FS_TYPE}
431 if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
432 res_line=${res_line}" "${TARGET_SRVNAMES[idx]}"-mon"
435 echo "${res_line}" >> ${HARES_LUSTRE}
437 # Generate the cib.xml file
438 if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
439 # Add group haclient and user hacluster
440 [ -z "`grep haclient /etc/group`" ] && groupadd haclient
441 [ -z "`grep hacluster /etc/passwd`" ] && useradd -g haclient hacluster
443 CIB_LUSTRE=${TMP_DIR}$"/cib.xml"${FILE_SUFFIX}
444 python ${CIB_GEN_SCRIPT} --stdout \
445 ${HARES_LUSTRE} > ${CIB_LUSTRE}
446 if [ $? -ne 0 ]; then
447 echo >&2 "`basename $0`: Failed to generate cib.xml file"\
448 "for node ${PRIM_NODENAME}!"
453 # scp the haresources file or cib.xml file
454 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
455 /bin/cp -f ${HARES_LUSTRE} ${TMP_DIR}$"/haresources."${NODE_NAMES[idx]}
456 scp ${HARES_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
457 if [ $? -ne 0 ]; then
458 echo >&2 "`basename $0`: Failed to scp haresources file"\
459 "to node ${NODE_NAMES[idx]}!"
463 if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
464 scp ${CIB_LUSTRE} ${NODE_NAMES[idx]}:${CIB_DIR}/
465 if [ $? -ne 0 ]; then
466 echo >&2 "`basename $0`: Failed to scp cib.xml"\
467 "file to node ${NODE_NAMES[idx]}!"
478 # Create the authkeys file and scp it to the each node's /etc/ha.d/
480 AUTHKEYS_PRIMNODE=${TMP_DIR}$"/authkeys."${PRIM_NODENAME}
483 if [ -e ${AUTHKEYS_PRIMNODE} ]; then
484 verbose_output "${AUTHKEYS_PRIMNODE} already exists."
488 # scp the authkeys file to all the nodes
489 chmod 600 ${AUTHKEYS_TEMP}
490 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
491 touch ${TMP_DIR}$"/authkeys."${NODE_NAMES[idx]}
492 scp -p ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}/
493 if [ $? -ne 0 ]; then
494 echo >&2 "`basename $0`: Failed to scp authkeys file"\
495 "to node ${NODE_NAMES[idx]}!"
505 # Create the mon.cf file and scp it to the each node's /etc/mon/
507 MONCF_PRIMNODE=${TMP_DIR}$"/mon.cf."${PRIM_NODENAME}
508 MONCF_LUSTRE=${TMP_DIR}$"/mon.cf"${FILE_SUFFIX}
509 local srv_name params=
511 declare -a OLD_TARGET_SRVNAMES # targets in other nodes
512 # in this failover group
513 # Initialize the OLD_TARGET_SRVNAMES array
514 unset OLD_TARGET_SRVNAMES
516 if [ -s ${MONCF_PRIMNODE} ]; then
517 if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${MONCF_PRIMNODE}`" ]
519 verbose_output "${MONCF_PRIMNODE} already exists."
522 # Get the Lustre target service names
523 # from the previous mon.cf file
525 for srv_name in `grep hostgroup ${MONCF_PRIMNODE}\
526 |awk '$2 ~ /-mon/ {print $2}'|xargs`
528 OLD_TARGET_SRVNAMES[idx]=`echo ${srv_name}\
535 # Construct the parameters to mon.cf generation script
536 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
537 params=${params}" -n "${NODE_NAMES[idx]}
540 for ((idx = 0; idx < ${#OLD_TARGET_SRVNAMES[@]}; idx++)); do
541 params=${params}" -o "${OLD_TARGET_SRVNAMES[idx]}
544 for ((idx = 0; idx < ${#TARGET_SRVNAMES[@]}; idx++)); do
545 params=${params}" -o "${TARGET_SRVNAMES[idx]}
548 ${SCRIPT_GEN_MONCF} ${params}
549 if [ $? -ne 0 ]; then
550 echo >&2 "`basename $0`: Failed to generate mon.cf file"\
551 "by using ${SCRIPT_GEN_MONCF}!"
555 /bin/mv *-mon.cfg ${MONCF_LUSTRE}
557 # scp the mon.cf file to all the nodes
558 for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
559 /bin/cp -f ${MONCF_LUSTRE} ${TMP_DIR}$"/mon.cf."${NODE_NAMES[idx]}
561 scp ${MONCF_LUSTRE} ${NODE_NAMES[idx]}:${MON_DIR}/
562 if [ $? -ne 0 ]; then
563 echo >&2 "`basename $0`: Failed to scp mon.cf file"\
564 "to node ${NODE_NAMES[idx]}!"
574 # Generate the configuration files for Heartbeat and scp them to all the nodes
576 if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
577 # Get server names of Lustre targets
578 if ! get_srvnames; then
583 if ! create_template; then
587 verbose_output "Creating and remote copying ha.cf${FILE_SUFFIX} file to"\
588 "${PRIM_NODENAME} failover group hosts..."
589 if ! create_hacf; then
594 verbose_output "Creating and remote copying haresources${FILE_SUFFIX} file"\
595 "to ${PRIM_NODENAME} failover group hosts..."
596 if ! create_haresources; then
601 verbose_output "Creating and remote copying authkeys${FILE_SUFFIX} file to" \
602 "${PRIM_NODENAME} failover group hosts..."
603 if ! create_authkeys; then
608 if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
609 verbose_output "Creating and remote copying mon.cf${FILE_SUFFIX} file to" \
610 "${PRIM_NODENAME} failover group hosts..."
611 if ! create_moncf; then
621 # Get all the node names
622 if ! get_nodenames; then
626 # Check heartbeat services
627 verbose_output "Checking heartbeat service in the ${PRIM_NODENAME}"\
628 "failover group hosts..."
631 if [ "$rc" -eq "2" ]; then
634 elif [ "$rc" -eq "1" ]; then
639 # Generate configuration files
640 if ! generate_config; then