successfully test a mountconf branch with ltest.
.deps
TAGS
version_tag.pl
+lustre_createcsv.sh
+lustre_config.sh
+lc_net.sh
+lc_modprobe.sh
+lc_hb.sh
+lc_cluman.sh
+lc_md.sh
+lc_lvm.sh
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-EXTRA_DIST = license-status maketags.sh lustre lustrefs \
- version_tag.pl.in
+# These are scripts that are generated from .in files
+genscripts = lustre_config.sh lc_modprobe.sh lc_net.sh lc_hb.sh lc_cluman.sh lustre_createcsv.sh lc_md.sh lc_lvm.sh
-initddir = $(sysconfdir)/init.d
-if UTILS
-initd_SCRIPTS = lustre lustrefs
-endif
+sbin_SCRIPTS = $(genscripts) lc_servip.sh lustre_up14.sh
+EXTRA_DIST = license-status maketags.sh version_tag.pl.in lc_common.sh \
+ $(addsuffix .in,$(genscripts)) lc_mon.sh lc_servip.sh \
+ lustre_up14.sh
+
+scriptlibdir = $(libdir)/@PACKAGE@
+scriptlib_DATA = lc_common.sh
+
+CLEANFILES = $(genscripts)
+
+$(genscripts): %.sh: %.sh.in
+ sed -e 's#@scriptlibdir@#$(scriptlibdir)#' < $< > $@
+ chmod +x $@
--- /dev/null
+#!/bin/bash
+#
+# lc_cluman.sh - script for generating the Red Hat Cluster Manager
+# HA software's configuration files
+#
+################################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` <-n hostnames> [-s service addresses]
+ [-c heartbeat channel] [-o heartbeat options] [-v]
+ <-d target device> [-d target device...]
+
+ -n hostnames the nodenames of the primary node and its fail-
+ overs
+ Multiple nodenames are separated by colon (:)
+ delimeter. The first one is the nodename of the
+ primary node, the others are failover nodenames.
+ -s service addresses the IP addresses to failover
+ Multiple addresses are separated by colon (:)
+ delimeter.
+ -c heartbeat channel the method to send/rcv heartbeats on
+ The default method is multicast, and multicast_
+ ipaddress is "225.0.0.11".
+ -o heartbeat options a "catchall" for other heartbeat configuration
+ options
+ Multiple options are separated by colon (:)
+ delimeter.
+ -v verbose mode
+ -d target device the target device name and mount point
+ The device name and mount point are separated by
+ colon (:) delimeter.
+
+EOF
+ exit 1
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+#****************************** Global variables ******************************#
+TMP_DIR=${CLUMGR_TMP_DIR} # Temporary directory
+
+declare -a NODE_NAMES # Node names in the failover group
+declare -a SRV_IPADDRS # Service IP addresses
+
+# Lustre target device names, service names and mount points
+declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
+declare -i TARGET_NUM=0 # Number of targets
+
+# Get and check the positional parameters
+VERBOSE_OUTPUT=false
+while getopts "n:s:c:o:vd:" OPTION; do
+ case $OPTION in
+ n)
+ HOSTNAME_OPT=$OPTARG
+ PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
+ if [ -z "${PRIM_NODENAME}" ]; then
+ echo >&2 $"`basename $0`: Missing primary nodename!"
+ usage
+ fi
+ HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
+ if [ ${HOSTNAME_NUM} -lt 2 ]; then
+ echo >&2 $"`basename $0`: Missing failover nodenames!"
+ usage
+ fi
+ ;;
+ s)
+ SRVADDR_OPT=$OPTARG
+ ;;
+ c)
+ HBCHANNEL_OPT=$OPTARG
+ HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \
+ | sed 's/"$//'`
+ if [ -n "${HBCHANNEL_OPT}" ] \
+ && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*broadcast*}" ] \
+ && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*multicast*}" ]; then
+ echo >&2 $"`basename $0`: Invalid Heartbeat channel" \
+ "- ${HBCHANNEL_OPT}!"
+ usage
+ fi
+ ;;
+ o)
+ HBOPT_OPT=$OPTARG
+ HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'`
+ ;;
+ v)
+ VERBOSE_OUTPUT=true
+ ;;
+ d)
+ DEVICE_OPT=$OPTARG
+ TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
+ TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
+ if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
+ echo >&2 $"`basename $0`: Missing target device name!"
+ usage
+ fi
+ if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
+ echo >&2 $"`basename $0`: Missing mount point for target"\
+ "${TARGET_DEVNAMES[TARGET_NUM]}!"
+ usage
+ fi
+ TARGET_NUM=$(( TARGET_NUM + 1 ))
+ ;;
+
+ ?)
+ usage
+ esac
+done
+
+# Check the required parameters
+if [ -z "${HOSTNAME_OPT}" ]; then
+ echo >&2 $"`basename $0`: Missing -n option!"
+ usage
+fi
+
+if [ -z "${DEVICE_OPT}" ]; then
+ echo >&2 $"`basename $0`: Missing -d option!"
+ usage
+fi
+
+# get_nodenames
+#
+# Get all the node names in this failover group
+get_nodenames() {
+ declare -i idx
+ local nodename_str nodename
+
+ nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
+ END {for (i in a) print a[i]}'`
+ idx=0
+ for nodename in ${nodename_str}
+ do
+ NODE_NAMES[idx]=${nodename}
+ idx=$idx+1
+ done
+
+ return 0
+}
+
+# get_check_srvIPaddrs
+#
+# Get and check all the service IP addresses in this failover group
+get_check_srvIPaddrs() {
+ declare -i idx
+ declare -i i
+ local srvIPaddr_str srvIPaddr
+
+ srvIPaddr_str=`echo ${SRVADDR_OPT}|awk '{split($SRVADDR_OPT, a, ":")}\
+ END {for (i in a) print a[i]}'`
+ idx=0
+ for srvIPaddr in ${srvIPaddr_str}
+ do
+ SRV_IPADDRS[idx]=${srvIPaddr}
+ idx=$idx+1
+ done
+
+ for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
+ for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do
+ # Check service IP address
+ verbose_output "Verifying service IP ${SRV_IPADDRS[idx]} and" \
+ "real IP of host ${NODE_NAMES[i]} are in the" \
+ "same subnet..."
+ if ! ${SCRIPT_VERIFY_SRVIP} ${SRV_IPADDRS[idx]} ${NODE_NAMES[i]}
+ then
+ return 1
+ fi
+ verbose_output "OK"
+ done
+ done
+
+ return 0
+}
+
+# cluman_running host_name
+#
+# Run remote command to check whether clumanager service is running in @host_name
+cluman_running() {
+ local host_name=$1
+ local ret_str
+
+ ret_str=`${REMOTE} ${host_name} "service clumanager status" 2>&1`
+ if [ $? -ne 0 ]; then
+ if [ "${ret_str}" != "${ret_str#*unrecognized*}" ]; then
+ echo >&2 "`basename $0`: cluman_running() error:"\
+ "remote command to ${host_name} error: ${ret_str}!"
+ return 2
+ else
+ return 1
+ fi
+ fi
+
+ return 0
+}
+
+# stop_cluman host_name
+#
+# Run remote command to stop clumanager service running in @host_name
+stop_cluman() {
+ local host_name=$1
+ local ret_str
+
+ ret_str=`${REMOTE} ${host_name} "/sbin/service clumanager stop" 2>&1`
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: stop_cluman() error:"\
+ "remote command to ${host_name} error: ${ret_str}!"
+ return 1
+ fi
+
+ echo "`basename $0`: Clumanager service is stopped on node ${host_name}."
+ return 0
+}
+
+# check_cluman
+#
+# Run remote command to check each node's clumanager service
+check_cluman() {
+ declare -i idx
+ local OK
+
+ # Get and check all the service IP addresses
+ if [ -n "${SRVADDR_OPT}" ] && ! get_check_srvIPaddrs; then
+ return 1
+ fi
+
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ # Check clumanager service status
+ cluman_running ${NODE_NAMES[idx]}
+ rc=$?
+ if [ "$rc" -eq "2" ]; then
+ return 1
+ elif [ "$rc" -eq "1" ]; then
+ verbose_output "Clumanager service is stopped on"\
+ "node ${NODE_NAMES[idx]}."
+ elif [ "$rc" -eq "0" ]; then
+ OK=
+ echo -n "`basename $0`: Clumanager service is running on"\
+ "${NODE_NAMES[idx]}, go ahead to stop the service and"\
+ "generate new configurations? [y/n]:"
+ read OK
+ if [ "${OK}" = "n" ]; then
+ echo "`basename $0`: New Clumanager configurations"\
+ "are not generated."
+ return 2
+ fi
+
+ # Stop clumanager service
+ stop_cluman ${NODE_NAMES[idx]}
+ fi
+ done
+
+ return 0
+}
+
+# get_srvname hostname target_devname
+#
+# Get the lustre target server name from the node @hostname
+get_srvname() {
+ local host_name=$1
+ local target_devname=$2
+ local target_srvname=
+ local ret_str
+
+ # Execute remote command to get the target server name
+ ret_str=`${REMOTE} ${host_name} \
+ "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1`
+ if [ $? -ne 0 ]; then
+ echo "`basename $0`: get_srvname() error:" \
+ "from host ${host_name} - ${ret_str}"
+ return 1
+ fi
+
+ if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
+ ret_str=${ret_str#*Target: }
+ target_srvname=`echo ${ret_str} | awk '{print $1}'`
+ fi
+
+ if [ -z "${target_srvname}" ]; then
+ echo "`basename $0`: get_srvname() error: Cannot get the"\
+ "server name of target ${target_devname} in ${host_name}!"
+ return 1
+ fi
+
+ echo ${target_srvname}
+ return 0
+}
+
+# get_srvnames
+#
+# Get server names of all the Lustre targets in this failover group
+get_srvnames() {
+ declare -i i
+
+ # Initialize the TARGET_SRVNAMES array
+ unset TARGET_SRVNAMES
+
+ # Get Lustre target service names
+ for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+ TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
+ ${TARGET_DEVNAMES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${TARGET_SRVNAMES[i]}"
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# check_retval retval
+#
+# Check the return value of redhat-config-cluster-cmd
+check_retval() {
+ if [ $1 -ne 0 ]; then
+ echo >&2 "`basename $0`: Failed to run ${CONFIG_CMD}!"
+ return 1
+ fi
+
+ return 0
+}
+
+# add_services
+#
+# Add service tags into the cluster.xml file
+add_services() {
+ declare -i idx
+ declare -i i
+
+ # Add service tag
+ for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do
+ ${CONFIG_CMD} --add_service --name=${TARGET_SRVNAMES[i]}
+ if ! check_retval $?; then
+ return 1
+ fi
+
+ for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
+ ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
+ --add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]}
+ if ! check_retval $?; then
+ return 1
+ fi
+ done
+
+ ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
+ --add_device \
+ --name=${TARGET_DEVNAMES[i]}
+ if ! check_retval $?; then
+ return 1
+ fi
+
+ ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
+ --device=${TARGET_DEVNAMES[i]} \
+ --mount \
+ --mountpoint=${TARGET_MNTPNTS[i]} \
+ --fstype=lustre
+ if ! check_retval $?; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# gen_cluster_xml
+#
+# Run redhat-config-cluster-cmd to create the cluster.xml file
+gen_cluster_xml() {
+ declare -i idx
+ declare -i i
+ local mcast_IPaddr
+ local node_names
+ local hbopt
+
+ [ -e "${CLUMAN_DIR}/cluster.xml" ] && \
+ /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old
+
+ # Run redhat-config-cluster-cmd to generate cluster.xml
+ # Add clumembd tag
+ if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*broadcast*}" ]; then
+ ${CONFIG_CMD} --clumembd --broadcast=yes
+ ${CONFIG_CMD} --clumembd --multicast=no
+ if ! check_retval $?; then
+ return 1
+ fi
+ elif [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*multicast*}" ]; then
+ mcast_IPaddr=`echo ${HBCHANNEL_OPT} | awk '{print $2}'`
+ if [ -n "${mcast_IPaddr}" ]; then
+ ${CONFIG_CMD} --clumembd --multicast=yes\
+ --multicast_ipaddress=${mcast_IPaddr}
+ if ! check_retval $?; then
+ return 1
+ fi
+ fi
+ fi
+
+ # Add cluster tag
+ node_names=
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ node_names=${node_names}"${NODE_NAMES[idx]} "
+ done
+
+ ${CONFIG_CMD} --cluster --name="${node_names}failover group"
+ if ! check_retval $?; then
+ return 1
+ fi
+
+ # Add member tag
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ ${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]}
+ if ! check_retval $?; then
+ return 1
+ fi
+ done
+
+ # Add service tag
+ if ! add_services; then
+ return 1
+ fi
+
+ # Add other tags
+ if [ -n "${HBOPT_OPT}" ]; then
+ while read -r hbopt
+ do
+ ${CONFIG_CMD} ${hbopt}
+ if ! check_retval $?; then
+ return 1
+ fi
+ done < <(echo ${HBOPT_OPT}|awk '{split($HBOPT_OPT, a, ":")}\
+ END {for (i in a) print a[i]}')
+ fi
+
+ return 0
+}
+
+# create_config
+#
+# Create the cluster.xml file and scp it to the each node's /etc/
+create_config() {
+ declare -i idx
+
+ /bin/mkdir -p ${TMP_DIR}
+ CONFIG_PRIMNODE=${TMP_DIR}$"/cluster.xml."${PRIM_NODENAME}
+ CONFIG_LUSTRE=${TMP_DIR}$"/cluster.xml"${FILE_SUFFIX}
+
+ # Get server names of Lustre targets
+ if ! get_srvnames; then
+ return 1
+ fi
+
+ if [ -s ${CONFIG_PRIMNODE} ]; then
+ if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${CONFIG_PRIMNODE}`" ]
+ then
+ verbose_output "${CONFIG_PRIMNODE} already exists."
+ return 0
+ else
+ [ -e "${CLUMAN_DIR}/cluster.xml" ] && \
+ /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old
+
+ /bin/cp -f ${CONFIG_PRIMNODE} ${CLUMAN_DIR}/cluster.xml
+
+ # Add services into the cluster.xml file
+ if ! add_services; then
+ return 1
+ fi
+ fi
+ else
+ # Run redhat-config-cluster-cmd to generate cluster.xml
+ verbose_output "Creating cluster.xml file for" \
+ "${PRIM_NODENAME} failover group hosts..."
+ if ! gen_cluster_xml; then
+ return 1
+ fi
+ verbose_output "OK"
+ fi
+
+ /bin/mv ${CLUMAN_DIR}/cluster.xml ${CONFIG_LUSTRE}
+ [ -e "${CLUMAN_DIR}/cluster.xml.old" ] && \
+ /bin/mv ${CLUMAN_DIR}/cluster.xml.old ${CLUMAN_DIR}/cluster.xml
+
+ # scp the cluster.xml file to all the nodes
+ verbose_output "Remote copying cluster.xml${FILE_SUFFIX} file to" \
+ "${PRIM_NODENAME} failover group hosts..."
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ /bin/cp -f ${CONFIG_LUSTRE} ${TMP_DIR}$"/cluster.xml."${NODE_NAMES[idx]}
+
+ scp ${CONFIG_LUSTRE} ${NODE_NAMES[idx]}:${CLUMAN_DIR}/
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Failed to scp cluster.xml file"\
+ "to node ${NODE_NAMES[idx]}!"
+ return 1
+ fi
+ done
+ verbose_output "OK"
+
+ return 0
+}
+
+# Main flow
+# Get all the node names
+if ! get_nodenames; then
+ exit 1
+fi
+
+# Check clumanager services
+verbose_output "Checking clumanager service in the ${PRIM_NODENAME}"\
+ "failover group hosts..."
+check_cluman
+rc=$?
+if [ "$rc" -eq "2" ]; then
+ verbose_output "OK"
+ exit 0
+elif [ "$rc" -eq "1" ]; then
+ exit 1
+fi
+verbose_output "OK"
+
+# Generate configuration files
+if ! create_config; then
+ exit 1
+fi
+
+exit 0
--- /dev/null
+#
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+#
+# lc_common.sh - This file contains functions to be used by most or all
+# Lustre cluster config scripts.
+#
+################################################################################
+
+# Remote command
+REMOTE=${REMOTE:-"ssh -x -q"}
+#REMOTE=${REMOTE:-"pdsh -S -R ssh -w"}
+export REMOTE
+
+# Lustre utilities
+CMD_PATH=${CMD_PATH:-"/usr/sbin"}
+MKFS=${MKFS:-"$CMD_PATH/mkfs.lustre"}
+TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"}
+LCTL=${LCTL:-"$CMD_PATH/lctl"}
+
+EXPORT_PATH=${EXPORT_PATH:-"PATH=\$PATH:/sbin:/usr/sbin;"}
+
+# Raid command path
+RAID_CMD_PATH=${RAID_CMD_PATH:-"/sbin"}
+MDADM=${MDADM:-"$RAID_CMD_PATH/mdadm"}
+
+# Some scripts to be called
+SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"$(cd `dirname $0`; echo $PWD)"}
+MODULE_CONFIG=${SCRIPTS_PATH}/lc_modprobe.sh
+VERIFY_CLUSTER_NET=${SCRIPTS_PATH}/lc_net.sh
+GEN_HB_CONFIG=${SCRIPTS_PATH}/lc_hb.sh
+GEN_CLUMGR_CONFIG=${SCRIPTS_PATH}/lc_cluman.sh
+SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}/lc_servip.sh
+SCRIPT_GEN_MONCF=${SCRIPTS_PATH}/lc_mon.sh
+SCRIPT_CONFIG_MD=${SCRIPTS_PATH}/lc_md.sh
+SCRIPT_CONFIG_LVM=${SCRIPTS_PATH}/lc_lvm.sh
+
+# Variables of HA software
+HBVER_HBV1="hbv1" # Heartbeat version 1
+HBVER_HBV2="hbv2" # Heartbeat version 2
+HATYPE_CLUMGR="cluman" # Cluster Manager
+
+# Configuration directories and files
+HA_DIR=${HA_DIR:-"/etc/ha.d"} # Heartbeat configuration directory
+MON_DIR=${MON_DIR:-"/etc/mon"} # mon configuration directory
+CIB_DIR=${CIB_DIR:-"/var/lib/heartbeat/crm"} # cib.xml directory
+
+HA_CF=${HA_DIR}/ha.cf # ha.cf file
+HA_RES=${HA_DIR}/haresources # haresources file
+HA_CIB=${CIB_DIR}/cib.xml
+
+CLUMAN_DIR="/etc" # CluManager configuration directory
+CLUMAN_CONFIG=${CLUMAN_DIR}/cluster.xml
+
+CLUMAN_TOOLS_PATH=${CLUMAN_TOOLS_PATH:-"/usr/sbin"} # CluManager tools
+CONFIG_CMD=${CONFIG_CMD:-"${CLUMAN_TOOLS_PATH}/redhat-config-cluster-cmd"}
+
+HB_TMP_DIR="/tmp/heartbeat" # Temporary directory
+CLUMGR_TMP_DIR="/tmp/clumanager"
+TMP_DIRS="${HB_TMP_DIR} ${CLUMGR_TMP_DIR}"
+
+FS_TYPE=${FS_TYPE:-"lustre"} # Lustre filesystem type
+FILE_SUFFIX=${FILE_SUFFIX:-".lustre"} # Suffix of the generated config files
+
+# Marker of the MD device line
+MD_MARKER=${MD_MARKER:-"MD"}
+
+# Marker of the LVM device line
+PV_MARKER=${PV_MARKER:-"PV"}
+VG_MARKER=${VG_MARKER:-"VG"}
+LV_MARKER=${LV_MARKER:-"LV"}
+
+declare -a CONFIG_ITEM # Items in each line of the csv file
+declare -a NODE_NAME # Hostnames of nodes have been configured
+
+
+# verbose_output string
+# Output verbose information $string
+verbose_output() {
+ if ${VERBOSE_OUTPUT}; then
+ echo "`basename $0`: $*"
+ fi
+ return 0
+}
+
+# Check whether the reomte command is pdsh
+is_pdsh() {
+ if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+# check_file csv_file
+# Check the file $csv_file
+check_file() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 "`basename $0`: check_file() error: Missing csv file!"
+ return 1
+ fi
+
+ CSV_FILE=$1
+ if [ ! -s ${CSV_FILE} ]; then
+ echo >&2 "`basename $0`: check_file() error: ${CSV_FILE}"\
+ "does not exist or is empty!"
+ return 1
+ fi
+
+ return 0
+}
+
+# parse_line line
+# Parse a line in the csv file
+parse_line() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 "`basename $0`: parse_line() error: Missing argument!"
+ return 1
+ fi
+
+ declare -i i=0 # Index of the CONFIG_ITEM array
+ declare -i length=0
+ declare -i idx=0
+ declare -i s_quote_flag=0 # Flag of the single quote character
+ declare -i d_quote_flag=0 # Flag of the double quotes character
+ local TMP_LETTER LINE
+
+ LINE="$*"
+
+ # Initialize the CONFIG_ITEM array
+ unset CONFIG_ITEM
+
+ # Get the length of the line
+ length=${#LINE}
+
+ i=0
+ while [ ${idx} -lt ${length} ]; do
+ # Get a letter from the line
+ TMP_LETTER=${LINE:${idx}:1}
+
+ case "${TMP_LETTER}" in
+ ",")
+ if [ ${s_quote_flag} -eq 1 -o ${d_quote_flag} -eq 1 ]
+ then
+ CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER}
+ else
+ i=$i+1
+ fi
+ idx=${idx}+1
+ continue
+ ;;
+ "'")
+ if [ ${s_quote_flag} -eq 0 ]; then
+ s_quote_flag=1
+ else
+ s_quote_flag=0
+ fi
+ ;;
+ "\"")
+ if [ ${d_quote_flag} -eq 0 ]; then
+ d_quote_flag=1
+ else
+ d_quote_flag=0
+ fi
+ ;;
+ "\r")
+ idx=${idx}+1
+ continue
+ ;;
+ *)
+ ;;
+ esac
+ CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER}
+ idx=${idx}+1
+ done
+
+ # Extract the real value of each field
+ # Remove surrounded double-quotes, etc.
+ for ((idx = 0; idx <= $i; idx++)); do
+ # Strip the leading and trailing space-characters
+ CONFIG_ITEM[idx]=`expr "${CONFIG_ITEM[idx]}" : '[[:space:]]*\(.*\)[[:space:]]*$'`
+
+ [ -z "${CONFIG_ITEM[idx]}" ] && continue
+
+ # Remove the surrounded double-quotes
+ while [ -z "`echo "${CONFIG_ITEM[idx]}"|sed -e 's/^".*"$//'`" ]; do
+ CONFIG_ITEM[idx]=`echo "${CONFIG_ITEM[idx]}" | sed -e 's/^"//' -e 's/"$//'`
+ done
+
+ CONFIG_ITEM[idx]=`echo "${CONFIG_ITEM[idx]}" | sed -e 's/""/"/g'`
+ done
+
+ return 0
+}
+
+# fcanon name
+# If $name is a symbolic link, then display it's value
+fcanon() {
+ local NAME=$1
+
+ if [ -h "$NAME" ]; then
+ readlink -f "$NAME"
+ else
+ echo "$NAME"
+ fi
+}
+
+# configured_host host_name
+#
+# Check whether the devices in $host_name has been configured or not
+configured_host() {
+ local host_name=$1
+ declare -i i
+
+ for ((i = 0; i < ${#NODE_NAME[@]}; i++)); do
+ [ "${host_name}" = "${NODE_NAME[i]}" ] && return 0
+ done
+
+ return 1
+}
+
+# remote_error fn_name host_addr ret_str
+# Verify the return result from remote command
+remote_error() {
+ local fn_name host_addr ret_str
+
+ fn_name=$1
+ shift
+ host_addr=$1
+ shift
+ ret_str=$*
+
+ if [ "${ret_str}" != "${ret_str#*connect:*}" ]; then
+ echo >&2 "`basename $0`: ${fn_name}() error: ${ret_str}"
+ return 0
+ fi
+
+ if [ -z "${ret_str}" ]; then
+ echo >&2 "`basename $0`: ${fn_name}() error:" \
+ "No results from remote!" \
+ "Check network connectivity between the local host and ${host_addr}!"
+ return 0
+ fi
+
+ return 1
+}
+
+# nid2hostname nid
+# Convert $nid to hostname of the lustre cluster node
+nid2hostname() {
+ local nid=$1
+ local host_name=
+ local addr nettype ip_addr
+ local ret_str
+
+ addr=${nid%@*}
+ [ "${nid}" != "${nid#*@*}" ] && nettype=${nid#*@} || nettype=tcp
+ if [ -z "${addr}" ]; then
+ echo "`basename $0`: nid2hostname() error: Invalid nid - \"${nid}\"!"
+ return 1
+ fi
+
+ case "${nettype}" in
+ lo*) host_name=`hostname`;;
+ elan*) # QsNet
+ # FIXME: Parse the /etc/elanhosts configuration file to
+ # convert ElanID to hostname
+ ;;
+ gm*) # Myrinet
+ # FIXME: Use /usr/sbin/gmlndnid to find the hostname of
+ # the specified GM Global node ID
+ ;;
+ ptl*) # Portals
+ # FIXME: Convert portal ID to hostname
+ ;;
+ *) # tcp, o2ib, cib, openib, iib, vib, ra
+ ip_addr=${addr}
+ # Is it IP address or hostname?
+ if [ -n "`echo ${ip_addr} | sed -e 's/\([0-9]\{1,3\}\.\)\{3,3\}[0-9]\{1,3\}//'`" ]
+ then
+ host_name=${ip_addr}
+ echo ${host_name}
+ return 0
+ fi
+
+ # Execute remote command to get the host name
+ ret_str=`${REMOTE} ${ip_addr} "hostname" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: nid2hostname() error:" \
+ "remote command to ${ip_addr} error: ${ret_str}"
+ return 1
+ fi
+ remote_error "nid2hostname" ${ip_addr} "${ret_str}" && return 1
+
+ if is_pdsh; then
+ host_name=`echo ${ret_str} | awk '{print $2}'`
+ else
+ host_name=`echo ${ret_str} | awk '{print $1}'`
+ fi
+ ;;
+ esac
+
+ echo ${host_name}
+ return 0
+}
+
+# nids2hostname nids
+# Get the hostname of the lustre cluster node which has the nids - $nids
+nids2hostname() {
+ local nids=$1
+ local host_name=
+ local nid
+ local nettype
+
+ for nid in ${nids//,/ }; do
+ [ "${nid}" != "${nid#*@*}" ] && nettype=${nid#*@} || nettype=tcp
+
+ case "${nettype}" in
+ lo* | elan* | gm* | ptl*) ;;
+ *) # tcp, o2ib, cib, openib, iib, vib, ra
+ host_name=$(nid2hostname ${nid})
+ if [ $? -ne 0 ]; then
+ echo "${host_name}"
+ return 1
+ fi
+ ;;
+ esac
+ done
+
+ if [ -z "${host_name}" ]; then
+ echo "`basename $0`: nids2hostname() error:" \
+ "Can not get the hostname from nids - \"${nids}\"!"
+ return 1
+ fi
+
+ echo ${host_name}
+ return 0
+}
+
+# ip2hostname_single_node nids
+# Convert IP addresses in $nids into hostnames
+# NID in $nids are delimited by commas, ie all the $nids belong to one node
+ip2hostname_single_node() {
+ local orig_nids=$1
+ local nids=
+ local nid host_name
+ local nettype
+
+ for nid in ${orig_nids//,/ }; do
+ [ "${nid}" != "${nid#*@*}" ] && nettype=${nid#*@} || nettype=tcp
+
+ case "${nettype}" in
+ lo* | elan* | gm* | ptl*) ;;
+ *) # tcp, o2ib, cib, openib, iib, vib, ra
+ host_name=$(nid2hostname ${nid})
+ if [ $? -ne 0 ]; then
+ echo "${host_name}"
+ return 1
+ fi
+
+ nid=${host_name}@${nettype}
+ ;;
+ esac
+
+ [ -z "${nids}" ] && nids=${nid} || nids=${nids},${nid}
+ done
+
+ echo ${nids}
+ return 0
+}
+
+# ip2hostname_multi_node nids
+# Convert IP addresses in $nids into hostnames
+# NIDs belong to multiple nodes are delimited by colons in $nids
+ip2hostname_multi_node() {
+ local orig_nids=$1
+ local nids=
+ local nid
+
+ for nid in ${orig_nids//:/ }; do
+ nid=$(ip2hostname_single_node ${nid})
+ if [ $? -ne 0 ]; then
+ echo "${nid}"
+ return 1
+ fi
+
+ [ -z "${nids}" ] && nids=${nid} || nids=${nids}:${nid}
+ done
+
+ echo ${nids}
+ return 0
+}
--- /dev/null
+#!/bin/bash
+#
+# lc_hb.sh - script for generating the Heartbeat HA software's
+# configuration files
+#
+###############################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` <-r HBver> <-n hostnames> [-v]
+ <-d target device> [-d target device...]
+
+ -r HBver the version of Heartbeat software
+ The Heartbeat software versions which are curr-
+ ently supported are: hbv1 (Heartbeat version 1)
+ and hbv2 (Heartbeat version 2).
+ -n hostnames the nodenames of the primary node and its fail-
+ overs
+ Multiple nodenames are separated by colon (:)
+ delimeter. The first one is the nodename of the
+ primary node, the others are failover nodenames.
+ -v verbose mode
+ -d target device the target device name and mount point
+ The device name and mount point are separated by
+ colon (:) delimeter.
+
+EOF
+ exit 1
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+#****************************** Global variables ******************************#
+# Heartbeat tools
+HB_TOOLS_PATH=${HB_TOOLS_PATH:-"/usr/lib64/heartbeat"} # Heartbeat tools path
+CIB_GEN_SCRIPT=${HB_TOOLS_PATH}/haresources2cib.py
+CL_STATUS=${CL_STATUS:-"/usr/bin/cl_status"}
+
+# Service directories and names
+HARES_DIR=${HARES_DIR:-"${HA_DIR}/resource.d"} # Heartbeat resources
+LUSTRE_SRV=${LUSTRE_SRV:-"Filesystem"} # Service script provided by Heartbeat
+
+TMP_DIR=${HB_TMP_DIR} # Temporary directory
+HACF_TEMP=${TMP_DIR}/ha.cf.temp
+AUTHKEYS_TEMP=${TMP_DIR}/authkeys${FILE_SUFFIX}
+
+declare -a NODE_NAMES # Node names in the failover group
+
+# Lustre target device names, service names and mount points
+declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
+declare -i TARGET_NUM=0 # Number of targets
+
+
+# Get and check the positional parameters
+VERBOSE_OUTPUT=false
+while getopts "r:n:vd:" OPTION; do
+ case $OPTION in
+ r)
+ HBVER_OPT=$OPTARG
+ if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \
+ && [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then
+ echo >&2 $"`basename $0`: Invalid Heartbeat software" \
+ "version - ${HBVER_OPT}!"
+ usage
+ fi
+ ;;
+ n)
+ HOSTNAME_OPT=$OPTARG
+ PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
+ if [ -z "${PRIM_NODENAME}" ]; then
+ echo >&2 $"`basename $0`: Missing primary nodename!"
+ usage
+ fi
+ HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
+ if [ ${HOSTNAME_NUM} -lt 2 ]; then
+ echo >&2 $"`basename $0`: Missing failover nodenames!"
+ usage
+ fi
+ if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ]
+ then
+ echo >&2 $"`basename $0`: Heartbeat version 1 can" \
+ "only support 2 nodes!"
+ usage
+ fi
+ ;;
+ v)
+ VERBOSE_OUTPUT=true
+ ;;
+ d)
+ DEVICE_OPT=$OPTARG
+ TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
+ TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
+ if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
+ echo >&2 $"`basename $0`: Missing target device name!"
+ usage
+ fi
+ if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
+ echo >&2 $"`basename $0`: Missing mount point for target"\
+ "${TARGET_DEVNAMES[TARGET_NUM]}!"
+ usage
+ fi
+ TARGET_NUM=$(( TARGET_NUM + 1 ))
+ ;;
+ ?)
+ usage
+ esac
+done
+
+# Check the required parameters
+if [ -z "${HBVER_OPT}" ]; then
+ echo >&2 $"`basename $0`: Missing -r option!"
+ usage
+fi
+
+if [ -z "${HOSTNAME_OPT}" ]; then
+ echo >&2 $"`basename $0`: Missing -n option!"
+ usage
+fi
+
+if [ -z "${DEVICE_OPT}" ]; then
+ echo >&2 $"`basename $0`: Missing -d option!"
+ usage
+fi
+
+# get_nodenames
+#
+# Get all the node names in this failover group
+get_nodenames() {
+ declare -i idx
+ local nodename_str nodename
+
+ nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
+ END {for (i in a) print a[i]}'`
+ idx=0
+ for nodename in ${nodename_str}
+ do
+ NODE_NAMES[idx]=${nodename}
+ idx=$idx+1
+ done
+
+ return 0
+}
+
+# check_remote_file host_name file
+#
+# Run remote command to check whether @file exists in @host_name
+check_remote_file() {
+ local host_name=$1
+ local file_name=$2
+
+ if [ -z "${host_name}" ]; then
+ echo >&2 "`basename $0`: check_remote_file() error:"\
+ "Missing hostname!"
+ return 1
+ fi
+
+ if [ -z "${file_name}" ]; then
+ echo >&2 "`basename $0`: check_remote_file() error:"\
+ "Missing file name!"
+ return 1
+ fi
+
+ # Execute remote command to check the file
+ ${REMOTE} ${host_name} "[ -e ${file_name} ]"
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: check_remote_file() error:"\
+ "${file_name} does not exist in host ${host_name}!"
+ return 1
+ fi
+
+ return 0
+}
+
+# hb_running host_name
+#
+# Run remote command to check whether heartbeat service is running in @host_name
+hb_running() {
+ local host_name=$1
+ local ret_str
+
+ ret_str=`${REMOTE} ${host_name} "${CL_STATUS} hbstatus" 2>&1`
+ if [ $? -ne 0 ]; then
+ if [ "${ret_str}" = "${ret_str#*stop*}" ]; then
+ echo >&2 "`basename $0`: hb_running() error:"\
+ "remote command to ${host_name} error: ${ret_str}!"
+ return 2
+ else
+ return 1
+ fi
+ fi
+
+ return 0
+}
+
+# stop_heartbeat host_name
+#
+# Run remote command to stop heartbeat service running in @host_name
+stop_heartbeat() {
+ local host_name=$1
+ local ret_str
+
+ ret_str=`${REMOTE} ${host_name} "/sbin/service heartbeat stop" 2>&1`
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: stop_heartbeat() error:"\
+ "remote command to ${host_name} error: ${ret_str}!"
+ return 1
+ fi
+
+ echo "`basename $0`: Heartbeat service is stopped on node ${host_name}."
+ return 0
+}
+
+# check_heartbeat
+#
+# Run remote command to check each node's heartbeat service
+check_heartbeat() {
+ declare -i idx
+ local OK
+
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ # Check Heartbeat configuration directory
+ if ! check_remote_file ${NODE_NAMES[idx]} ${HA_DIR}; then
+ echo >&2 "`basename $0`: check_heartbeat() error:"\
+ "Is Heartbeat package installed?"
+ return 1
+ fi
+
+ if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
+ # Check mon configuration directory
+ if ! check_remote_file ${NODE_NAMES[idx]} ${MON_DIR}; then
+ echo >&2 "`basename $0`: check_heartbeat()"\
+ "error: Is mon package installed?"
+ return 1
+ fi
+ fi
+
+ if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
+ # Check crm directory
+ if ! check_remote_file ${NODE_NAMES[idx]} ${CIB_DIR}; then
+ echo >&2 "`basename $0`: check_heartbeat()"\
+ "error: Is Heartbeat v2 package installed?"
+ return 1
+ fi
+ fi
+
+ # Check heartbeat service status
+ hb_running ${NODE_NAMES[idx]}
+ rc=$?
+ if [ "$rc" -eq "2" ]; then
+ return 1
+ elif [ "$rc" -eq "1" ]; then
+ verbose_output "Heartbeat service is stopped on"\
+ "node ${NODE_NAMES[idx]}."
+ elif [ "$rc" -eq "0" ]; then
+ OK=
+ echo -n "`basename $0`: Heartbeat service is running on"\
+ "${NODE_NAMES[idx]}, go ahead to stop the service and"\
+ "generate new configurations? [y/n]:"
+ read OK
+ if [ "${OK}" = "n" ]; then
+ echo "`basename $0`: New Heartbeat configurations"\
+ "are not generated."
+ return 2
+ fi
+
+ # Stop heartbeat service
+ stop_heartbeat ${NODE_NAMES[idx]}
+ fi
+ done
+
+ return 0
+}
+
+# get_srvname hostname target_devname
+#
+# Get the lustre target server name from the node @hostname
+get_srvname() {
+ local host_name=$1
+ local target_devname=$2
+ local target_srvname=
+ local ret_str
+
+ # Execute remote command to get the target server name
+ ret_str=`${REMOTE} ${host_name} \
+ "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1`
+ if [ $? -ne 0 ]; then
+ echo "`basename $0`: get_srvname() error:" \
+ "from host ${host_name} - ${ret_str}"
+ return 1
+ fi
+
+ if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
+ ret_str=${ret_str#*Target: }
+ target_srvname=`echo ${ret_str} | awk '{print $1}'`
+ fi
+
+ if [ -z "${target_srvname}" ]; then
+ echo "`basename $0`: get_srvname() error: Cannot get the"\
+ "server name of target ${target_devname} in ${host_name}!"
+ return 1
+ fi
+
+ echo ${target_srvname}
+ return 0
+}
+
+# get_srvnames
+#
+# Get server names of all the Lustre targets in this failover group
+get_srvnames() {
+ declare -i i
+
+ # Initialize the TARGET_SRVNAMES array
+ unset TARGET_SRVNAMES
+
+ # Get Lustre target service names
+ for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+ TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
+ ${TARGET_DEVNAMES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${TARGET_SRVNAMES[i]}"
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# create_template
+#
+# Create the templates for ha.cf and authkeys files
+create_template() {
+ /bin/mkdir -p ${TMP_DIR}
+
+ # Create the template for ha.cf
+ if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
+ cat >${HACF_TEMP} <<EOF
+debugfile /var/log/ha-debug
+logfile /var/log/ha-log
+logfacility local0
+keepalive 2
+deadtime 30
+initdead 120
+
+auto_failback off
+
+EOF
+ elif [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
+ cat >${HACF_TEMP} <<EOF
+use_logd yes
+keepalive 1
+deadtime 10
+initdead 60
+
+crm yes
+
+EOF
+ fi
+
+ # Create the template for authkeys
+ if [ ! -s ${AUTHKEYS_TEMP} ]; then
+ cat >${AUTHKEYS_TEMP} <<EOF
+auth 1
+1 sha1 HelloLustre!
+EOF
+ fi
+
+ return 0
+}
+
+# create_hacf
+#
+# Create the ha.cf file and scp it to each node's /etc/ha.d/
+create_hacf() {
+ HACF_PRIMNODE=${TMP_DIR}$"/ha.cf."${PRIM_NODENAME}
+ HACF_LUSTRE=${TMP_DIR}$"/ha.cf"${FILE_SUFFIX}
+
+ declare -i idx
+
+ if [ -e ${HACF_PRIMNODE} ]; then
+ # The ha.cf file for the primary node has already existed.
+ verbose_output "${HACF_PRIMNODE} already exists."
+ return 0
+ fi
+
+ /bin/cp -f ${HACF_TEMP} ${HACF_LUSTRE}
+
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ echo "node ${NODE_NAMES[idx]}" >> ${HACF_LUSTRE}
+ done
+
+ # scp ha.cf file to all the nodes
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ touch ${TMP_DIR}$"/ha.cf."${NODE_NAMES[idx]}
+ scp ${HACF_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Failed to scp ha.cf file"\
+ "to node ${NODE_NAMES[idx]}!"
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# create_haresources
+#
+# Create the haresources file and scp it to the each node's /etc/ha.d/
+create_haresources() {
+ HARES_PRIMNODE=${TMP_DIR}$"/haresources."${PRIM_NODENAME}
+ HARES_LUSTRE=${TMP_DIR}$"/haresources"${FILE_SUFFIX}
+ declare -i idx
+ local res_line
+
+ if [ -s ${HARES_PRIMNODE} ]; then
+ # The haresources file for the primary node has already existed
+ if [ -n "`/bin/grep ${TARGET_DEVNAMES[0]} ${HARES_PRIMNODE}`" ]; then
+ verbose_output "${HARES_PRIMNODE} already exists."
+ return 0
+ fi
+ fi
+
+ # Add the resource group line into the haresources file
+ res_line=${PRIM_NODENAME}
+ for ((idx = 0; idx < ${#TARGET_DEVNAMES[@]}; idx++)); do
+ res_line=${res_line}" "${LUSTRE_SRV}::${TARGET_DEVNAMES[idx]}::${TARGET_MNTPNTS[idx]}::${FS_TYPE}
+
+ if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
+ res_line=${res_line}" "${TARGET_SRVNAMES[idx]}"-mon"
+ fi
+ done
+ echo "${res_line}" >> ${HARES_LUSTRE}
+
+ # Generate the cib.xml file
+ if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
+ # Add group haclient and user hacluster
+ [ -z "`grep haclient /etc/group`" ] && groupadd haclient
+ [ -z "`grep hacluster /etc/passwd`" ] && useradd -g haclient hacluster
+
+ CIB_LUSTRE=${TMP_DIR}$"/cib.xml"${FILE_SUFFIX}
+ python ${CIB_GEN_SCRIPT} --stdout \
+ ${HARES_LUSTRE} > ${CIB_LUSTRE}
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Failed to generate cib.xml file"\
+ "for node ${PRIM_NODENAME}!"
+ return 1
+ fi
+ fi
+
+ # scp the haresources file or cib.xml file
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ /bin/cp -f ${HARES_LUSTRE} ${TMP_DIR}$"/haresources."${NODE_NAMES[idx]}
+ scp ${HARES_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Failed to scp haresources file"\
+ "to node ${NODE_NAMES[idx]}!"
+ return 1
+ fi
+
+ if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
+ scp ${CIB_LUSTRE} ${NODE_NAMES[idx]}:${CIB_DIR}/
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Failed to scp cib.xml"\
+ "file to node ${NODE_NAMES[idx]}!"
+ return 1
+ fi
+ fi
+ done
+
+ return 0
+}
+
+# create_authkeys
+#
+# Create the authkeys file and scp it to the each node's /etc/ha.d/
+create_authkeys() {
+ AUTHKEYS_PRIMNODE=${TMP_DIR}$"/authkeys."${PRIM_NODENAME}
+ declare -i idx
+
+ if [ -e ${AUTHKEYS_PRIMNODE} ]; then
+ verbose_output "${AUTHKEYS_PRIMNODE} already exists."
+ return 0
+ fi
+
+ # scp the authkeys file to all the nodes
+ chmod 600 ${AUTHKEYS_TEMP}
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ touch ${TMP_DIR}$"/authkeys."${NODE_NAMES[idx]}
+ scp -p ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}/
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Failed to scp authkeys file"\
+ "to node ${NODE_NAMES[idx]}!"
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# create_moncf
+#
+# Create the mon.cf file and scp it to the each node's /etc/mon/
+create_moncf() {
+ MONCF_PRIMNODE=${TMP_DIR}$"/mon.cf."${PRIM_NODENAME}
+ MONCF_LUSTRE=${TMP_DIR}$"/mon.cf"${FILE_SUFFIX}
+ local srv_name params=
+ declare -i idx
+ declare -a OLD_TARGET_SRVNAMES # targets in other nodes
+ # in this failover group
+ # Initialize the OLD_TARGET_SRVNAMES array
+ unset OLD_TARGET_SRVNAMES
+
+ if [ -s ${MONCF_PRIMNODE} ]; then
+ if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${MONCF_PRIMNODE}`" ]
+ then
+ verbose_output "${MONCF_PRIMNODE} already exists."
+ return 0
+ else
+ # Get the Lustre target service names
+ # from the previous mon.cf file
+ idx=0
+ for srv_name in `grep hostgroup ${MONCF_PRIMNODE}\
+ |awk '$2 ~ /-mon/ {print $2}'|xargs`
+ do
+ OLD_TARGET_SRVNAMES[idx]=`echo ${srv_name}\
+ |sed 's/-mon//g'`
+ idx=$(( idx + 1 ))
+ done
+ fi
+ fi
+
+ # Construct the parameters to mon.cf generation script
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ params=${params}" -n "${NODE_NAMES[idx]}
+ done
+
+ for ((idx = 0; idx < ${#OLD_TARGET_SRVNAMES[@]}; idx++)); do
+ params=${params}" -o "${OLD_TARGET_SRVNAMES[idx]}
+ done
+
+ for ((idx = 0; idx < ${#TARGET_SRVNAMES[@]}; idx++)); do
+ params=${params}" -o "${TARGET_SRVNAMES[idx]}
+ done
+
+ ${SCRIPT_GEN_MONCF} ${params}
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Failed to generate mon.cf file"\
+ "by using ${SCRIPT_GEN_MONCF}!"
+ return 1
+ fi
+
+ /bin/mv *-mon.cfg ${MONCF_LUSTRE}
+
+ # scp the mon.cf file to all the nodes
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ /bin/cp -f ${MONCF_LUSTRE} ${TMP_DIR}$"/mon.cf."${NODE_NAMES[idx]}
+
+ scp ${MONCF_LUSTRE} ${NODE_NAMES[idx]}:${MON_DIR}/
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Failed to scp mon.cf file"\
+ "to node ${NODE_NAMES[idx]}!"
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# generate_config
+#
+# Generate the configuration files for Heartbeat and scp them to all the nodes
+generate_config() {
+ if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
+ # Get server names of Lustre targets
+ if ! get_srvnames; then
+ return 1
+ fi
+ fi
+
+ if ! create_template; then
+ return 1
+ fi
+
+ verbose_output "Creating and remote copying ha.cf${FILE_SUFFIX} file to"\
+ "${PRIM_NODENAME} failover group hosts..."
+ if ! create_hacf; then
+ return 1
+ fi
+ verbose_output "OK"
+
+ verbose_output "Creating and remote copying haresources${FILE_SUFFIX} file"\
+ "to ${PRIM_NODENAME} failover group hosts..."
+ if ! create_haresources; then
+ return 1
+ fi
+ verbose_output "OK"
+
+ verbose_output "Creating and remote copying authkeys${FILE_SUFFIX} file to" \
+ "${PRIM_NODENAME} failover group hosts..."
+ if ! create_authkeys; then
+ return 1
+ fi
+ verbose_output "OK"
+
+ if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
+ verbose_output "Creating and remote copying mon.cf${FILE_SUFFIX} file to" \
+ "${PRIM_NODENAME} failover group hosts..."
+ if ! create_moncf; then
+ return 1
+ fi
+ verbose_output "OK"
+ fi
+
+ return 0
+}
+
+# Main flow
+# Get all the node names
+if ! get_nodenames; then
+ exit 1
+fi
+
+# Check heartbeat services
+verbose_output "Checking heartbeat service in the ${PRIM_NODENAME}"\
+ "failover group hosts..."
+check_heartbeat
+rc=$?
+if [ "$rc" -eq "2" ]; then
+ verbose_output "OK"
+ exit 0
+elif [ "$rc" -eq "1" ]; then
+ exit 1
+fi
+verbose_output "OK"
+
+# Generate configuration files
+if ! generate_config; then
+ exit 1
+fi
+
+exit 0
--- /dev/null
+#!/bin/bash
+#
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+#
+# lc_lvm.sh - configure Linux LVM devices from a csv file
+#
+################################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` [-h] [-v] <csv file>
+
+ This script is used to configure Linux LVM devices in a Lustre cluster
+ from a csv file.
+
+ -h help and examples
+ -v verbose mode
+ csv file a spreadsheet that contains configuration parameters
+ (separated by commas) for each Linux LVM component
+ (PV, VG, LV) to be configured in a Lustre cluster
+
+EOF
+ exit 1
+}
+
+# Samples
+sample() {
+ cat <<EOF
+
+This script is used to configure Linux LVM devices in a Lustre cluster
+from a csv file.
+
+LVM is a Logical Volume Manager for the Linux operating system. The
+three-level components of it are PV (Physical Volume), VG (Volume Group)
+and LV (Logical Volume).
+
+Each line marked with "PV" in the csv file represents one or more PVs.
+The format is:
+hostname,PV,pv names,operation mode,options
+
+hostname hostname of the node in the cluster
+PV marker of PV line
+pv names devices or loopback files to be initialized for later
+ use by LVM or to be wiped the label, e.g. /dev/sda
+ Multiple devices or files are separated by space or by
+ using shell expansions, e.g. "/dev/sd{a,b,c}"
+operation mode create or remove, default is create
+options a "catchall" for other pvcreate/pvremove options
+ e.g. "-vv"
+
+Each line marked with "VG" in the csv file represents one VG.
+The format is:
+hostname,VG,vg name,operation mode,options,pv paths
+
+hostname hostname of the node in the cluster
+VG marker of VG line
+vg name name of the volume group, e.g. ost_vg
+operation mode create or remove, default is create
+options a "catchall" for other vgcreate/vgremove options
+ e.g. "-s 32M"
+pv paths physical volumes to construct this VG, required by
+ create mode
+ Multiple PVs are separated by space or by using
+ shell expansions, e.g. "/dev/sd[k-m]1"
+
+Each line marked with "LV" in the csv file represents one LV.
+The format is:
+hostname,LV,lv name,operation mode,options,lv size,vg name
+
+hostname hostname of the node in the cluster
+LV marker of LV line
+lv name name of the logical volume to be created (optional)
+ or path of the logical volume to be removed (required
+ by remove mode)
+operation mode create or remove, default is create
+options a "catchall" for other lvcreate/lvremove options
+ e.g. "-i 2 -I 128"
+lv size size [kKmMgGtT] to be allocated for the new LV
+ Default unit is megabytes.
+vg name name of the VG in which the new LV will be created
+
+Items left blank will be set to defaults.
+
+Example:
+-------------------------------------------------------
+# MD/LVM devices on mgsnode
+# Remove the LVM devices in the mgsnode
+mgsnode,LV,/dev/mgs_vg/mdt1,remove
+mgsnode,LV,/dev/mgs_vg/mdt2,remove
+mgsnode,VG,mgs_vg,remove
+mgsnode,PV,"/dev/sd{a,b}1",remove
+
+# Create MD device in the mgsnode
+mgsnode,MD,/dev/md0,,-q,1,/dev/sda1 /dev/sdb1
+
+
+# MD/LVM devices on ostnode
+# Create MD and LVM devices in the ostnode
+ostnode,MD,/dev/md0,,-q -c 128,5,"/dev/sd{a,b,c}"
+ostnode,MD,/dev/md1,,-q -c 128,5,"/dev/sd{d,e,f}"
+
+ostnode,PV,/dev/md0 /dev/md1
+ostnode,VG,ost_vg,,-s 32M,"/dev/md{0,1}"
+ostnode,LV,ost0,,-i 2 -I 128,300G,ost_vg
+ostnode,LV,ost1,,-i 2 -I 128,300G,ost_vg
+-------------------------------------------------------
+
+EOF
+ exit 0
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+#***************************** Global variables *****************************#
+# All the LVM device items in the csv file
+declare -a HOST_NAME LINE_MARKER LVM_NAME OP_MODE OP_OPTS SIXTH_ITEM SEVENTH_ITEM
+
+# Variables related to background executions
+declare -a REMOTE_CMD
+declare -a REMOTE_PID
+declare -i pid_num=0
+
+
+VERBOSE_OUTPUT=false
+# Get and check the positional parameters
+while getopts "hv" OPTION; do
+ case $OPTION in
+ h)
+ sample
+ ;;
+ v)
+ VERBOSE_OUTPUT=true
+ ;;
+ ?)
+ usage
+ esac
+done
+
+# Toss out the parameters we've already processed
+shift `expr $OPTIND - 1`
+
+# Here we expect the csv file
+if [ $# -eq 0 ]; then
+ echo >&2 "`basename $0`: Missing csv file!"
+ usage
+fi
+
+# check_lvm_item index
+#
+# Check the items required for managing LVM device ${LVM_NAME[index]}
+check_lvm_item() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 "`basename $0`: check_lvm_item() error:"\
+ "Missing argument!"
+ return 1
+ fi
+
+ declare -i i=$1
+
+ # Check hostname
+ if [ -z "${HOST_NAME[i]}" ]; then
+ echo >&2 "`basename $0`: check_lvm_item() error:"\
+ "hostname item has null value!"
+ return 1
+ fi
+
+ # Check LVM device name
+ if [ -z "${LVM_NAME[i]}" ] \
+ && [ "${LINE_MARKER[i]}" != "${LV_MARKER}" -a "${OP_MODE[i]}" != "remove" ]
+ then
+ echo >&2 "`basename $0`: check_lvm_item() error:"\
+ "LVM component name item has null value!"
+ return 1
+ fi
+
+ # Check the operation mode
+ if [ -n "${OP_MODE[i]}" ] \
+ && [ "${OP_MODE[i]}" != "create" -a "${OP_MODE[i]}" != "remove" ]
+ then
+ echo >&2 "`basename $0`: check_lvm_item() error:"\
+ "Invalid operation mode item - \"${OP_MODE[i]}\"!"
+ return 1
+ fi
+
+ # Check items required by create mode
+ if [ -z "${OP_MODE[i]}" -o "${OP_MODE[i]}" = "create" ]; then
+ if [ "${LINE_MARKER[i]}" = "${VG_MARKER}" -a -z "${SIXTH_ITEM[i]}" ]
+ then
+ echo >&2 "`basename $0`: check_lvm_item() error:"\
+ "pv paths item of vg ${LVM_NAME[i]} has null value!"
+ return 1
+ fi
+
+ if [ "${LINE_MARKER[i]}" = "${LV_MARKER}" ]; then
+ if [ -z "${SIXTH_ITEM[i]}" ]; then
+ echo >&2 "`basename $0`: check_lvm_item() error:"\
+ "lv size item has null value!"
+ return 1
+ fi
+
+ if [ -z "${SEVENTH_ITEM[i]}" ]; then
+ echo >&2 "`basename $0`: check_lvm_item() error:"\
+ "vg name item has null value!"
+ return 1
+ fi
+ fi
+ fi
+
+ return 0
+}
+
+# get_lvm_items csv_file
+#
+# Get all the LVM device items in the $csv_file and do some checks.
+get_lvm_items() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 "`basename $0`: get_lvm_items() error: Missing csv file!"
+ return 1
+ fi
+
+ CSV_FILE=$1
+ local LINE line_marker
+ declare -i line_num=0
+ declare -i idx=0
+
+ while read -r LINE; do
+ let "line_num += 1"
+
+ # Skip the comment line
+ [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] && continue
+
+ # Skip the non-LVM line
+ line_marker=`echo ${LINE} | awk -F, '{print $2}'`
+ [ "${line_marker}" != "${PV_MARKER}" ] \
+ && [ "${line_marker}" != "${VG_MARKER}" ] \
+ && [ "${line_marker}" != "${LV_MARKER}" ] && continue
+
+ # Parse the config line into CONFIG_ITEM
+ if ! parse_line "$LINE"; then
+ return 1
+ fi
+
+ HOST_NAME[idx]=${CONFIG_ITEM[0]}
+ LINE_MARKER[idx]=${CONFIG_ITEM[1]}
+ LVM_NAME[idx]=${CONFIG_ITEM[2]}
+ OP_MODE[idx]=${CONFIG_ITEM[3]}
+ OP_OPTS[idx]=${CONFIG_ITEM[4]}
+ SIXTH_ITEM[idx]=${CONFIG_ITEM[5]}
+ SEVENTH_ITEM[idx]=${CONFIG_ITEM[6]}
+
+ # Check some required items
+ if ! check_lvm_item $idx; then
+ echo >&2 "`basename $0`: check_lvm_item() error:"\
+ "Occurred on line ${line_num} in ${CSV_FILE}."
+ return 1
+ fi
+
+ let "idx += 1"
+ done < ${CSV_FILE}
+
+ return 0
+}
+
+# construct_lvm_create_cmdline index
+#
+# Construct the creation command line for ${LVM_NAME[index]}
+construct_lvm_create_cmdline() {
+ declare -i i=$1
+ local lvm_cmd
+
+ case "${LINE_MARKER[i]}" in
+ "${PV_MARKER}")
+ lvm_cmd="pvcreate -ff -y ${OP_OPTS[i]} ${LVM_NAME[i]}"
+ ;;
+ "${VG_MARKER}")
+ lvm_cmd="vgcreate ${OP_OPTS[i]} ${LVM_NAME[i]} ${SIXTH_ITEM[i]}"
+ ;;
+ "${LV_MARKER}")
+ if [ -z "${LVM_NAME[i]}" ]; then
+ lvm_cmd="lvcreate -L ${SIXTH_ITEM[i]} ${OP_OPTS[i]} ${SEVENTH_ITEM[i]}"
+ else
+ lvm_cmd="lvcreate -L ${SIXTH_ITEM[i]} -n ${LVM_NAME[i]} ${OP_OPTS[i]} ${SEVENTH_ITEM[i]}"
+ fi
+ ;;
+ esac
+
+ echo ${lvm_cmd}
+ return 0
+}
+
+# cmdline_rm_LVs vg_name
+#
+# Construct command line to remove all the LVs on $vg_name.
+# If $vg_name is null, then remove all the LVs in the host.
+cmdline_rm_LVs() {
+ local vg_name=$1
+ local lvm_rm_cmd
+
+ lvm_rm_cmd="vgchange -a n ${vg_name} &&"
+ lvm_rm_cmd=${lvm_rm_cmd}" vgdisplay -v ${vg_name} | grep \"LV Name\" | awk '{print \$3}' |"
+ lvm_rm_cmd=${lvm_rm_cmd}" while read lv; do lvremove -f \$lv; done"
+
+ echo ${lvm_rm_cmd}
+ return 0
+}
+
+# cmdline_rm_LV lv_path
+#
+# Construct command line to remove LV $lv_path
+cmdline_rm_LV() {
+ local lv_path=$1
+ local lvm_rm_cmd
+
+ lvm_rm_cmd="lvchange -a n ${lv_path} && lvremove -f ${lv_path}"
+ echo ${lvm_rm_cmd}
+ return 0
+}
+
+
+# cmdline_rm_VG vg_name
+#
+# Construct command line to remove VG $vg_name
+cmdline_rm_VG() {
+ local vg_name=$1
+ local lvm_rm_cmd
+
+ # Remove all the LVs on this VG
+ lvm_rm_cmd=$(cmdline_rm_LVs ${vg_name})
+
+ # Remove this VG
+ lvm_rm_cmd=${lvm_rm_cmd}" && vgremove ${vg_name}"
+ echo ${lvm_rm_cmd}
+ return 0
+}
+
+# cmdline_rm_VGs
+#
+# Construct command line to remove all the VGs in the host
+cmdline_rm_VGs() {
+ local lvm_rm_cmd
+
+ # Remove all the LVs in the host
+ lvm_rm_cmd=$(cmdline_rm_LVs)
+
+ # Remove all the VGs in the host
+ lvm_rm_cmd=${lvm_rm_cmd}" && vgdisplay | grep \"VG Name\" | awk '{print \$3}' |"
+ lvm_rm_cmd=${lvm_rm_cmd}" while read vg; do vgremove \$vg; done"
+
+ echo ${lvm_rm_cmd}
+ return 0
+}
+
+# cmdline_rm_PVs
+#
+# Construct command line to remove all the PVs in the host
+cmdline_rm_PVs() {
+ local lvm_rm_cmd
+
+ # Remove all the LVs and VGs in the host
+ lvm_rm_cmd=$(cmdline_rm_VGs)
+
+ # Remove all the PVs in the host
+ lvm_rm_cmd=${lvm_rm_cmd}" && pvdisplay | grep \"PV Name\" | awk '{print \$3}' |"
+ lvm_rm_cmd=${lvm_rm_cmd}" while read pv; do pvremove -ff -y \$pv; done"
+
+ echo ${lvm_rm_cmd}
+ return 0
+}
+
+# construct_lvm_teardown_cmdline index
+#
+# Construct the teardown command line for LVM devices in ${HOST_NAME[index]}
+construct_lvm_teardown_cmdline() {
+ declare -i i=$1
+ local lvm_rm_cmd
+
+ case "${LINE_MARKER[i]}" in
+ "${LV_MARKER}")
+ lvm_rm_cmd=$(cmdline_rm_LVs ${SEVENTH_ITEM[i]})
+ ;;
+ "${VG_MARKER}")
+ # Remove all the VGs in the host
+ lvm_rm_cmd=$(cmdline_rm_VGs)
+ ;;
+ "${PV_MARKER}")
+ # Remove all the PVs in the host
+ lvm_rm_cmd=$(cmdline_rm_PVs)
+ ;;
+ esac
+
+ echo ${lvm_rm_cmd}
+ return 0
+}
+
+# construct_lvm_rm_cmdline index
+#
+# Construct the remove command line for LVM device ${LVM_NAME[index]}
+construct_lvm_rm_cmdline() {
+ declare -i i=$1
+ local lvm_rm_cmd
+
+ case "${LINE_MARKER[i]}" in
+ "${LV_MARKER}")
+ lvm_rm_cmd=$(cmdline_rm_LV ${LVM_NAME[i]})
+ ;;
+ "${VG_MARKER}")
+ lvm_rm_cmd=$(cmdline_rm_VG ${LVM_NAME[i]})
+ ;;
+ "${PV_MARKER}")
+ lvm_rm_cmd="pvremove -ff -y ${LVM_NAME[i]}"
+ ;;
+ esac
+
+ echo ${lvm_rm_cmd}
+ return 0
+}
+
+# construct_lvm_cmdline host_name
+#
+# Construct the command line of LVM utilities to be run in the $host_name
+construct_lvm_cmdline() {
+ LVM_CMDLINE=
+ local host_name=$1
+ local lvm_cmd
+ declare -i i
+
+ # Construct command line
+ for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+ lvm_cmd=
+ if [ "${host_name}" = "${HOST_NAME[i]}" ]; then
+ case "${OP_MODE[i]}" in
+ "" | create)
+ # Construct the create command line
+ lvm_cmd=$(construct_lvm_create_cmdline ${i})
+ ;;
+ remove)
+ if [ -z "${LVM_NAME[i]}" ]; then
+ # Construct the teardown command line
+ lvm_cmd=$(construct_lvm_teardown_cmdline ${i})
+ else # Remove instead of teardown
+ # Construct the remove command line
+ lvm_cmd=$(construct_lvm_rm_cmdline ${i})
+ fi
+ ;;
+ *)
+ echo >&2 "`basename $0`: construct_lvm_cmdline() error:"\
+ "Invalid operation mode - \"${OP_MODE[i]}\"!"
+ return 1
+ ;;
+ esac
+
+ if [ -z "${LVM_CMDLINE}" ]; then
+ LVM_CMDLINE=${lvm_cmd}
+ else
+ LVM_CMDLINE=${LVM_CMDLINE}" && "${lvm_cmd}
+ fi
+ fi
+ done
+
+ return 0
+}
+
+# config_lvm_devs host_name
+#
+# Run remote command to configure LVM devices in $host_name
+config_lvm_devs() {
+ local host_name=$1
+
+ # Construct the LVM utilities command line
+ if ! construct_lvm_cmdline ${host_name}; then
+ return 1
+ fi
+
+ if [ -z "${LVM_CMDLINE}" ]; then
+ verbose_output "There are no LVM devices on host ${host_name}"\
+ "needed to be configured."
+ return 0
+ fi
+
+ # Run remote command to configure LVM devices in $host_name
+ verbose_output "Configuring LVM devices in host ${host_name}..."
+ verbose_output "Configure command line is: \"${LVM_CMDLINE}\""
+ REMOTE_CMD[pid_num]="${REMOTE} ${host_name} \"${LVM_CMDLINE}\""
+ ${REMOTE} ${host_name} "(${EXPORT_PATH} ${LVM_CMDLINE})" >&2 &
+ REMOTE_PID[pid_num]=$!
+ let "pid_num += 1"
+
+ return 0
+}
+
+# Run remote command to configure all the LVM devices specified
+# in the csv file
+config_lvm() {
+ declare -i i=0
+ declare -i idx=0 # Index of NODE_NAME array
+ local host_name
+ local failed_status
+
+ # Initialize the NODE_NAME array
+ unset NODE_NAME
+
+ for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+ host_name=${HOST_NAME[i]}
+ configured_host ${host_name} && continue
+
+ NODE_NAME[idx]=${host_name}
+ let "idx += 1"
+
+ # Run remote command to configure LVM devices in $host_name
+ if ! config_lvm_devs ${host_name}; then
+ return 1
+ fi
+ done
+
+ if [ ${#HOST_NAME[@]} -eq 0 -o ${#REMOTE_PID[@]} -eq 0 ]; then
+ verbose_output "There are no LVM devices to be configured."
+ return 0
+ fi
+
+ # Wait for the exit status of the background remote command
+ verbose_output "Waiting for the return of the remote command..."
+ failed_status=false
+ for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
+ wait ${REMOTE_PID[${pid_num}]}
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: config_lvm() error: Failed"\
+ "to execute \"${REMOTE_CMD[${pid_num}]}\"!"
+ failed_status=true
+ fi
+ done
+
+ if ${failed_status}; then
+ return 1
+ fi
+
+ verbose_output "All the LVM devices are configured successfully!"
+ return 0
+}
+
+# Main flow
+# Check the csv file
+if ! check_file $1; then
+ exit 1
+fi
+
+# Get all the LVM device items from the csv file
+if ! get_lvm_items ${CSV_FILE}; then
+ exit 1
+fi
+
+# Configure the LVM devices
+if ! config_lvm; then
+ exit 1
+fi
+
+exit 0
--- /dev/null
+#!/bin/bash
+#
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+#
+# lc_md.sh - configure Linux MD devices from a csv file
+#
+################################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` [-h] [-v] <csv file>
+
+ This script is used to configure Linux MD devices in a Lustre cluster
+ from a csv file.
+
+ -h help and examples
+ -v verbose mode
+ csv file a spreadsheet that contains configuration parameters
+ (separated by commas) for each Linux MD device to be
+ configured in a Lustre cluster
+
+EOF
+ exit 1
+}
+
+# Samples
+sample() {
+ cat <<EOF
+
+This script is used to configure Linux MD devices in a Lustre cluster
+from a csv file.
+
+Each line marked with "MD" in the csv file represents one MD device.
+The format is:
+hostname,MD,md name,operation mode,options,raid level,component devices
+
+hostname hostname of the node in the cluster
+MD marker of MD device line
+md name MD device name, e.g. /dev/md0
+operation mode create or remove, default is create
+options a "catchall" for other mdadm options, e.g. "-c 128"
+raid level raid level: 0,1,4,5,6,10,linear and multipath
+component devices block devices to be combined into the MD device
+ Multiple devices are separated by space or by using
+ shell expansions, e.g. "/dev/sd{a,b,c}"
+
+Items left blank will be set to defaults.
+
+Example:
+-------------------------------------------------------
+# MD devices on mgsnode
+mgsnode,MD,/dev/md0,,-q -c 32,1,/dev/sda1 /dev/sdb1
+mgsnode,MD,/dev/md1,,-q -c 32,1,/dev/sdc1 /dev/sdd1
+mgsnode,MD,/dev/md2,,-q -c 32,0,/dev/md0 /dev/md1
+
+# MD device on ostnode
+ostnode,MD,/dev/md0,,-q -c 128,5,"/dev/sd{a,b,c,d,e}"
+-------------------------------------------------------
+
+EOF
+ exit 0
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+#***************************** Global variables *****************************#
+# All the MD device items in the csv file
+declare -a HOST_NAME MD_NAME OP_MODE OP_OPTS RAID_LEVEL MD_DEVS
+
+# Variables related to background executions
+declare -a REMOTE_CMD
+declare -a REMOTE_PID
+declare -i pid_num=0
+
+
+VERBOSE_OUTPUT=false
+# Get and check the positional parameters
+while getopts "hv" OPTION; do
+ case $OPTION in
+ h)
+ sample
+ ;;
+ v)
+ VERBOSE_OUTPUT=true
+ ;;
+ ?)
+ usage
+ esac
+done
+
+# Toss out the parameters we've already processed
+shift `expr $OPTIND - 1`
+
+# Here we expect the csv file
+if [ $# -eq 0 ]; then
+ echo >&2 "`basename $0`: Missing csv file!"
+ usage
+fi
+
+# check_md_item index
+#
+# Check the items required for managing MD device ${MD_NAME[index]}
+check_md_item() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 "`basename $0`: check_md_item() error:"\
+ "Missing argument!"
+ return 1
+ fi
+
+ declare -i i=$1
+
+ # Check hostname
+ if [ -z "${HOST_NAME[i]}" ]; then
+ echo >&2 "`basename $0`: check_md_item() error:"\
+ "hostname item has null value!"
+ return 1
+ fi
+
+ # Check items required by create mode
+ if [ -z "${OP_MODE[i]}" -o "${OP_MODE[i]}" = "create" ]; then
+ # Check MD device name
+ if [ -z "${MD_NAME[i]}" ]; then
+ echo >&2 "`basename $0`: check_md_item() error:"\
+ "md name item has null value!"
+ return 1
+ fi
+
+ if [ -z "${RAID_LEVEL[i]}" ]; then
+ echo >&2 "`basename $0`: check_md_item() error:"\
+ "raid level item of MD device ${MD_NAME[i]} has null value!"
+ return 1
+ fi
+
+ if [ -z "${MD_DEVS[i]}" ]; then
+ echo >&2 "`basename $0`: check_md_item() error:"\
+ "component devices item of ${MD_NAME[i]} has null value!"
+ return 1
+ fi
+ fi
+
+ return 0
+}
+
+# get_md_items csv_file
+#
+# Get all the MD device items in the $csv_file and do some checks.
+get_md_items() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 "`basename $0`: get_md_items() error: Missing csv file!"
+ return 1
+ fi
+
+ CSV_FILE=$1
+ local LINE
+ declare -i line_num=0
+ declare -i idx=0
+
+ while read -r LINE; do
+ let "line_num += 1"
+
+ # Skip the comment line
+ [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] && continue
+
+ # Skip the non-MD line
+ [ "`echo ${LINE}|awk -F, '{print $2}'`" != "${MD_MARKER}" ] && continue
+
+ # Parse the config line into CONFIG_ITEM
+ if ! parse_line "$LINE"; then
+ return 1
+ fi
+
+ HOST_NAME[idx]=${CONFIG_ITEM[0]}
+ MD_NAME[idx]=${CONFIG_ITEM[2]}
+ OP_MODE[idx]=${CONFIG_ITEM[3]}
+ OP_OPTS[idx]=${CONFIG_ITEM[4]}
+ RAID_LEVEL[idx]=${CONFIG_ITEM[5]}
+ MD_DEVS[idx]=${CONFIG_ITEM[6]}
+
+ # Check some required items
+ if ! check_md_item $idx; then
+ echo >&2 "`basename $0`: check_md_item() error:"\
+ "Occurred on line ${line_num} in ${CSV_FILE}."
+ return 1
+ fi
+
+ let "idx += 1"
+ done < ${CSV_FILE}
+
+ return 0
+}
+
+# md_is_active host_name md_name
+#
+# Run remote command to check whether $md_name is active in @host_name
+md_is_active() {
+ local host_name=$1
+ local md_name=$2
+ local cmd ret_str
+
+ cmd="grep -q ${md_name##*/} /proc/mdstat 2>&1"
+ ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
+ if [ $? -ne 0 ]; then
+ if [ -n "${ret_str}" ]; then
+ echo >&2 "`basename $0`: md_is_active() error:"\
+ "remote command to ${host_name} error: ${ret_str}!"
+ return 2 # Error occurred
+ else
+ return 1 # inactive
+ fi
+ fi
+
+ return 0 # active
+}
+
+# construct_mdadm_create_cmdline index
+#
+# Construct the create operation command line of mdadm for ${MD_NAME[index]}
+construct_mdadm_create_cmdline() {
+ declare -i i=$1
+ local cmd_line
+ local echo_disk disk line
+ declare -i alldisks=0
+ declare -i raiddisks=0
+ declare -i sparedisks=0
+
+ cmd_line="${MDADM} -C -R ${MD_NAME[i]} ${OP_OPTS[i]} -l ${RAID_LEVEL[i]}"
+
+ if [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#* -n*}" ]\
+ || [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#*--raid-devices*}" ]; then
+ cmd_line=${cmd_line}" ${MD_DEVS[i]}"
+ echo ${cmd_line}
+ return 0
+ fi
+
+ # FIXME: Get the number of component devices in the array
+ echo_disk="for disk in ${MD_DEVS[i]}; do echo $disk; done"
+ while read line; do
+ let "alldisks += 1"
+ done < <(${REMOTE} ${HOST_NAME[i]} "${echo_disk}")
+
+ if [ ${alldisks} -eq 0 ]; then
+ echo "`basename $0`: construct_mdadm_create_cmdline() error:"\
+ "Failed to execute remote command to get the number of"\
+ "component devices of array ${MD_NAME[i]} from host ${HOST_NAME[i]}!"
+ return 1
+ fi
+
+ # Get the specified number of spare (eXtra) devices
+ if [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#* -x*}" ]; then
+ sparedisks=`echo ${OP_OPTS[i]##* -x}|awk -F" " '{print $1}'`
+ elif [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#*--spare-devices*}" ]; then
+ sparedisks=`echo ${OP_OPTS[i]##*--spare-devices=}|awk -F" " '{print $1}'`
+ fi
+
+ # Get the number of raid devices in the array
+ # The number of raid devices in the array plus the number of spare devices
+ # listed on the command line must equal the number of component devices
+ # (including "missing" devices).
+ let "raiddisks = alldisks - sparedisks"
+
+ if [ ${raiddisks} -lt 1 ]; then
+ echo "`basename $0`: construct_mdadm_create_cmdline() error:"\
+ "Invalid number of raid devices in array ${MD_NAME[i]}: ${raiddisks}!"\
+ "Check the number of spare devices and whether all the component devices"\
+ "\"${MD_DEVS[i]}\" (except \"missing\" devices) exist in host ${HOST_NAME[i]}!"
+ return 1
+ fi
+
+ cmd_line=${cmd_line}" -n ${raiddisks} ${MD_DEVS[i]}"
+
+ echo ${cmd_line}
+ return 0
+}
+
+# construct_mdadm_rm_cmdline index
+#
+# Construct the remove operation command line of mdadm for ${MD_NAME[index]}
+construct_mdadm_rm_cmdline() {
+ declare -i i=$1
+ local mdadm_cmd
+ local real_devs
+
+ # Deactivate the MD array, releasing all resources
+ mdadm_cmd="${MDADM} -S ${MD_NAME[i]}"
+
+ if [ -n "${MD_DEVS[i]}" ]; then
+ # Remove the "missing" devices from the component devices
+ real_devs=`echo ${MD_DEVS[i]} | sed 's/missing//g'`
+ # Over-written the superblock with zeros
+ mdadm_cmd=${mdadm_cmd}" && ${MDADM} --zero-superblock ${real_devs}"
+ fi
+
+ echo ${mdadm_cmd}
+ return 0
+}
+
+# construct_mdadm_cmdline host_name
+#
+# Construct the command line of mdadm to be run in $host_name
+construct_mdadm_cmdline() {
+ MDADM_CMDLINE=
+ local host_name=$1
+ local mdadm_stop_cmd mdadm_cmd
+ local rc OK
+ declare -i i
+
+ # Construct command line
+ for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+ mdadm_stop_cmd=
+ mdadm_cmd=
+ if [ "${host_name}" = "${HOST_NAME[i]}" ]; then
+ case "${OP_MODE[i]}" in
+ "" | create)
+ # Check the status of the MD array
+ md_is_active ${host_name} ${MD_NAME[i]}
+ rc=$?
+ if [ "$rc" -eq "2" ]; then
+ return 1
+ elif [ "$rc" -eq "0" ]; then
+ OK=
+ echo -n "`basename $0`: ${MD_NAME[i]} is active on"\
+ "${host_name}, go ahead to deactivate it and create"\
+ "the new array? [y/n]:"
+ read OK
+ if [ "${OK}" = "n" ]; then
+ echo "`basename $0`: ${MD_NAME[i]} on host"\
+ "${host_name} remains as it is."
+ continue
+ fi
+
+ # Construct the remove command line
+ mdadm_stop_cmd=$(construct_mdadm_rm_cmdline ${i})
+ fi
+
+ # Construct the create command line
+ mdadm_cmd=$(construct_mdadm_create_cmdline ${i})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${mdadm_cmd}"
+ return 1
+ fi
+
+ [ -n "${mdadm_stop_cmd}" ] && mdadm_cmd=${mdadm_stop_cmd}" && "${mdadm_cmd}
+ ;;
+ remove)
+ if [ -z "${MD_NAME[i]}" ]; then
+ OK=
+ echo -n "`basename $0`: Do you really want to remove"\
+ "all the MD devices in the host ${HOST_NAME[i]}? [y/n]:"
+ read OK
+ if [ "${OK}" = "n" ]; then
+ echo "`basename $0`: MD devices on host"\
+ "${HOST_NAME[i]} remain as they are."
+ continue
+ fi
+
+ # Construct the teardown command line
+ mdadm_cmd="(cat /proc/mdstat | egrep \"^md[[:digit:]]\" |"
+ mdadm_cmd=${mdadm_cmd}" while read md rest; do ${MDADM} -S /dev/\$md; done)"
+ else
+ # Construct the remove command line
+ mdadm_cmd=$(construct_mdadm_rm_cmdline ${i})
+ fi
+ ;;
+ *)
+ # Other operations
+ mdadm_cmd="${MDADM} ${OP_MODE[i]} ${MD_NAME[i]} ${OP_OPTS[i]} ${MD_DEVS[i]}"
+ ;;
+ esac
+
+ if [ -z "${MDADM_CMDLINE}" ]; then
+ MDADM_CMDLINE=${mdadm_cmd}
+ else
+ MDADM_CMDLINE=${MDADM_CMDLINE}" && "${mdadm_cmd}
+ fi
+ fi
+ done
+
+ return 0
+}
+
+# config_md_devs host_name
+#
+# Run remote command to configure MD devices in $host_name
+config_md_devs() {
+ local host_name=$1
+
+ # Construct mdadm command line
+ if ! construct_mdadm_cmdline ${host_name}; then
+ return 1
+ fi
+
+ if [ -z "${MDADM_CMDLINE}" ]; then
+ verbose_output "There are no MD devices on host ${host_name}"\
+ "needed to be configured."
+ return 0
+ fi
+
+ # Run remote command to configure MD devices in $host_name
+ verbose_output "Configuring MD devices in host ${host_name}..."
+ verbose_output "Configure command line is: \"${MDADM_CMDLINE}\""
+ REMOTE_CMD[pid_num]="${REMOTE} ${host_name} \"${MDADM_CMDLINE}\""
+ ${REMOTE} ${host_name} "${MDADM_CMDLINE}" >&2 &
+ REMOTE_PID[pid_num]=$!
+ let "pid_num += 1"
+ sleep 1
+
+ return 0
+}
+
+# Run remote command to configure all the MD devices specified in the csv file
+config_md() {
+ declare -i i=0
+ declare -i idx=0 # Index of NODE_NAME array
+ local host_name
+ local failed_status
+
+ # Initialize the NODE_NAME array
+ unset NODE_NAME
+
+ for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+ host_name=${HOST_NAME[i]}
+ configured_host ${host_name} && continue
+
+ NODE_NAME[idx]=${host_name}
+ let "idx += 1"
+
+ # Run remote command to configure MD devices in $host_name
+ if ! config_md_devs ${host_name}; then
+ return 1
+ fi
+ done
+
+ if [ ${#HOST_NAME[@]} -eq 0 -o ${#REMOTE_PID[@]} -eq 0 ]; then
+ verbose_output "There are no MD devices to be configured."
+ return 0
+ fi
+
+ # Wait for the exit status of the background remote command
+ verbose_output "Waiting for the return of the remote command..."
+ failed_status=false
+ for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
+ wait ${REMOTE_PID[${pid_num}]}
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: config_md() error: Failed"\
+ "to execute \"${REMOTE_CMD[${pid_num}]}\"!"
+ failed_status=true
+ fi
+ done
+
+ if ${failed_status}; then
+ return 1
+ fi
+
+ verbose_output "All the MD devices are configured successfully!"
+ return 0
+}
+
+# Main flow
+# Check the csv file
+if ! check_file $1; then
+ exit 1
+fi
+
+# Get all the MD device items from the csv file
+if ! get_md_items ${CSV_FILE}; then
+ exit 1
+fi
+
+# Configure the MD devices
+if ! config_md; then
+ exit 1
+fi
+
+exit 0
--- /dev/null
+#!/bin/bash
+#
+# lc_modprobe.sh - add lustre module options into modprobe.conf or
+# modules.conf
+#
+#################################################################################
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+# Check the kernel version
+KERNEL_VERSION=`uname -r`
+KERNEL_VERSION=${KERNEL_VERSION:0:3}
+
+if [ "${KERNEL_VERSION}" = "2.4" ]; then
+ MODULE_CONF=/etc/modules.conf
+else
+ MODULE_CONF=/etc/modprobe.conf
+fi
+
+read -r NETWORKS
+MODLINES_FILE=/tmp/modlines$$.txt
+START_MARKER=$"# start lustre config"
+END_MARKER=$"# end lustre config"
+
+# Generate a temp file contains lnet options lines
+generate_lnet_lines() {
+ local LNET_LINE TMP_LINE
+
+ TMP_LINE="${NETWORKS}"
+
+ echo ${START_MARKER} > ${MODLINES_FILE}
+ echo "# Lustre module options added automatically by `basename $0`" >> ${MODLINES_FILE}
+ while true; do
+ LNET_LINE=${TMP_LINE%%\\n*}
+ echo ${LNET_LINE} >> ${MODLINES_FILE}
+
+ TMP_LINE=${TMP_LINE#*\\n}
+
+ if [ "${TMP_LINE}" == "${LNET_LINE}" ]; then
+ break
+ fi
+ done
+ echo ${END_MARKER} >> ${MODLINES_FILE}
+
+ #echo "--------------${MODLINES_FILE}--------------"
+ #cat ${MODLINES_FILE}
+ #echo -e "------------------------------------------\n"
+
+ return 0
+}
+
+if ! generate_lnet_lines; then
+ exit 1
+fi
+
+MODULE_CONF=$(fcanon ${MODULE_CONF})
+# Add lnet options lines to the module configuration file
+if [ -e ${MODULE_CONF} ]; then
+ # Delete the old options
+ sed -i "/${START_MARKER}/,/${END_MARKER}/d" ${MODULE_CONF}
+fi
+
+cat ${MODLINES_FILE} >> ${MODULE_CONF}
+rm -f ${MODLINES_FILE}
+exit 0
--- /dev/null
+#!/bin/sh
+
+# Given one or more Lustre objects, create a mon configuration file
+# naming the mon watches based on the Lustre object names
+# For each Lustre object, the script will create two mon watches
+# The first watch sets a trap, and the second watch clears the
+# trap if Lustre is healthy.
+
+# This may be more fun in Perl due to the need to support a list
+# of objects
+
+# (plus we could use a Perl format for this goop)
+
+MONBASEDIR=${MONBASEDIR:-/usr/local/lib/mon}
+MONCFGDIR=${MONCFGDIR:-/etc/mon}
+TD=`date +%y_%m%d_%S`
+TMPMONCFG=${TD}-mon.cfg
+# Determines how often we will check Lustre health
+CHECKINTERVAL="3m"
+# Determines how quickly we must clear the trap
+TRAPINTERVAL="6m"
+ALERTSCRIPT=${ALERTSCRIPT:-"fail_lustre.alert"}
+TRAPSCRIPT=${TRAPSCRIPT:-"lustre.mon.trap"}
+
+# We will assume all inputs are Lustre objects
+# file locations and timeouts correct to taste
+# Correct to taste
+print_header() {
+ cat >> $TMPMONCFG <<-EOF
+ cfbasedir = $MONCFGDIR
+ alertdir = $MONBASEDIR/alert.d
+ mondir = $MONBASEDIR/mon.d
+ statedir = $MONBASEDIR/state.d
+ logdir = $MONBASEDIR/log.d
+ dtlogfile = $MONBASEDIR/log.d/downtime.log
+ maxprocs = 20
+ histlength = 100
+ randstart = 60s
+ authtype = getpwnam
+EOF
+}
+
+# Tabs should be preserved in the config file
+# $1 object name
+# we do not set a period, it is assumed monitor is always active
+
+print_trap_rec() {
+ cat >> $TMPMONCFG <<EOF
+#
+watch ${1}-obj
+ service ${1}_ser
+ description triggers heartbeat failure if trap springs on $1
+ traptimeout $TRAPINTERVAL
+ period
+ alert $ALERTSCRIPT
+
+# end ${1}-obj
+
+EOF
+
+}
+
+print_trap_send() {
+ cat >> $TMPMONCFG <<EOF
+#
+watch ${1}-mon
+ service ${1}_mon_ser
+ description clears trap for $1
+ interval $CHECKINTERVAL
+ monitor $TRAPSCRIPT ${1}-obj ${1}_ser ${1}
+ period
+ alert $ALERTSCRIPT
+# end ${1}-mon
+EOF
+
+}
+
+usage() {
+ echo "$0 -n <node> -n <node> -o <Lustre object> -o <Lustre object>...."
+ echo "Creates the /etc/mon/mon.cf file to monitor Lustre objects"
+ exit 1
+}
+
+
+# Start of script
+
+if [ $# -eq 0 ];then
+ usage
+fi
+
+# This script should work for any number of hosts
+#
+HOSTCNT=0
+OBJCNT=0
+
+declare -a HOSTS
+declare -a OBJS
+
+while getopts "n:o:" opt; do
+ case $opt in
+ n) HOSTS[HOSTCNT]=$OPTARG
+ HOSTCNT=$(( HOSTCNT + 1 ))
+ ;;
+ o) OBJS[OBJCNT]=$OPTARG
+ OBJCNT=$(( OBJCNT + 1 ))
+ ;;
+ *) usage
+ ;;
+ esac
+done
+
+echo "Found $HOSTCNT hosts"
+echo "Found $OBJCNT Lustre objects"
+
+# First create the host groups
+# we assume
+# each object will have two watches defined
+# each object hostgroup will have all objects
+
+# Create the file with the declared goop
+print_header
+
+for obj in ${OBJS[@]}
+do
+ echo "hostgroup ${obj}-obj ${HOSTS[@]}" >> $TMPMONCFG
+ echo "hostgroup ${obj}-mon ${HOSTS[@]}" >> $TMPMONCFG
+ echo "#" >> $TMPMONCFG
+done
+
+# create the monitors
+
+for obj in ${OBJS[@]}
+do
+ print_trap_send $obj
+ print_trap_rec $obj
+done
+
+echo "Mon config completed - new mon config is $TMPMONCFG"
+exit 0
\ No newline at end of file
--- /dev/null
+#!/bin/bash
+#
+# lc_net.sh - script for Lustre cluster network verification
+#
+###############################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` [-v] <csv file>
+
+ -v verbose mode
+ csv file a spreadsheet that contains configuration parameters
+ (separated by commas) for each target in a Lustre cl-
+ uster, the first field of each line is the host name
+ of the cluster node
+
+EOF
+ exit 1
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+VERBOSE_OUTPUT=false
+# Get and check the positional parameters
+while getopts "v" OPTION; do
+ case $OPTION in
+ v)
+ VERBOSE_OUTPUT=true
+ ;;
+ ?)
+ usage
+ esac
+done
+
+# Toss out the parameters we've already processed
+shift `expr $OPTIND - 1`
+
+# Here we expect the csv file
+if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: Missing csv file!"
+ usage
+fi
+
+# Global variables
+CSV_FILE=$1
+declare -a HOST_NAMES
+declare -a HOST_IPADDRS
+
+# Get the host names from the csv file
+get_hostnames() {
+ local NAME CHECK_STR
+ declare -i i
+ declare -i j
+
+ # Initialize the HOST_NAMES array
+ unset HOST_NAMES
+
+ CHECK_STR=`egrep -v "([[:space:]]|^)#" ${CSV_FILE} | awk -F, \
+ '/[[:alnum:]]/{if ($1 !~/[[:alnum:]]/) print $0}'`
+ if [ -n "${CHECK_STR}" ]; then
+ echo >&2 $"`basename $0`: get_hostnames() error: Missing"\
+ "hostname field in the line - ${CHECK_STR}"
+ return 1
+ fi
+
+ i=0
+ for NAME in `egrep -v "([[:space:]]|^)#" ${CSV_FILE}\
+ | awk -F, '/[[:alnum:]]/{print $1}'`
+ do
+ for ((j = 0; j < ${#HOST_NAMES[@]}; j++)); do
+ [ "${NAME}" = "${HOST_NAMES[j]}" ] && continue 2
+ done
+
+ HOST_NAMES[i]=${NAME}
+ i=$i+1
+ done
+
+ return 0
+}
+
+# ping_host host_name
+# Check whether host $host_name is reachable.
+# If it is, then return the IP address of this host.
+ping_host() {
+ local host_name=$1
+ local ip_addr=
+ local ret_str
+
+ if [ -z "${host_name}" ]; then
+ echo "`basename $0`: ping_host() error: Missing hostname!"
+ return 1
+ fi
+
+ # Run ping command
+ ret_str=`ping -c1 ${host_name} 2>&1`
+ if [ $? -ne 0 ]; then
+ if [ -n "${ret_str}" ]; then
+ echo "`basename $0`: ping_host() error: ${ret_str}!"
+ else
+ echo "`basename $0`: ping_host() error:"\
+ "Host ${host_name} does not respond to ping!"
+ fi
+ return 1
+ fi
+
+ # Get the IP address
+ ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}' | \
+ sed -e 's/^(//' -e 's/)$//'`
+
+ echo "${ip_addr}"
+ return 0
+}
+
+# local_check index
+# Check the network connectivity between local host and ${HOST_NAMES[index]}.
+local_check() {
+ declare -i i=$1
+
+ # Check whether ${HOST_NAMES[i]} is reachable
+ # and get the IP address of this host from ping
+ HOST_IPADDRS[i]=$(ping_host ${HOST_NAMES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${HOST_IPADDRS[i]}"
+ return 1
+ fi
+
+ return 0
+}
+
+# remote_check index
+# Check whether ${HOST_NAMES[index]} can resolve its own name and whether
+# this host agrees with the local host about what its name is resolved to.
+remote_check() {
+ declare -i i=$1
+ local cmd ret_str
+ local ip_addr= # the IP address got from remote ping
+
+ # Execute remote command to check whether ${HOST_NAMES[i]}
+ # can resolve its own name
+ cmd="ping -c1 ${HOST_NAMES[i]} 2>&1"
+ ret_str=`${REMOTE} ${HOST_NAMES[i]} "${cmd}" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo >&2 "`basename $0`: remote_check() error:"\
+ "remote to ${HOST_NAMES[i]} error: ${ret_str}!"
+ return 1
+ fi
+
+ if [ -z "${ret_str}" ]; then
+ echo >&2 "`basename $0`: remote_check() error:"\
+ "No results from ${HOST_NAMES[i]}! Check the network"\
+ "connectivity between local host and ${HOST_NAMES[i]}!"
+ return 1
+ fi
+
+ # Get the IP address of ${HOST_NAMES[i]} from its own ping
+ if is_pdsh; then
+ ip_addr=`echo "${ret_str}" | head -1 | awk '{print $4}'`
+ else
+ ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}'`
+ fi
+ ip_addr=`echo "${ip_addr}" | sed -e 's/^(//' -e 's/)$//'`
+
+ # Compare IP addresses
+ # Check whether ${HOST_NAMES[i]} agrees with the local host
+ # about what its name is resolved to.
+ if [ "${ip_addr}" != "${HOST_IPADDRS[i]}" ]; then
+ echo >&2 "`basename $0`: remote_check() error:"\
+ "Local host resolves ${HOST_NAMES[i]} to IP address"\
+ "\"${HOST_IPADDRS[i]}\", while its own resolution is"\
+ "\"${ip_addr}\". They are not the same!"
+ return 1
+ fi
+
+ return 0
+}
+
+# network_verify
+# Verify name resolution and network connectivity of the Lustre cluster
+network_verify() {
+ declare -i i
+
+ # Initialize the HOST_IPADDRS array
+ unset HOST_IPADDRS
+
+ # Get all the host names from the csv file
+ ! get_hostnames && return 1
+
+ # Check the network connectivity between local host
+ # and other cluster nodes
+ for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
+ [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue
+
+ verbose_output "Verifying network connectivity between"\
+ "\"`hostname`\" and \"${HOST_NAMES[i]}\"..."
+ ! local_check $i && return 1
+ ! remote_check $i && return 1
+ verbose_output "OK"
+ done
+
+ return 0
+}
+
+# Main flow
+if ! check_file ${CSV_FILE}; then
+ exit 1
+fi
+
+# Cluster network verification
+if ! network_verify; then
+ exit 1
+fi
+
+exit 0
--- /dev/null
+#!/bin/bash
+#
+# lc_servip.sh - script for verifying the service IP and the real
+# interface IP in a remote host are in the same subnet
+#
+###############################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` <service IPaddr> <hostname>
+
+ service IPaddr the IP address to failover
+ hostname the hostname of the remote node
+
+EOF
+ exit 1
+}
+
+# Check arguments
+if [ $# -lt 2 ]; then
+ usage
+fi
+
+# Remote command
+REMOTE=${REMOTE:-"ssh -x -q"}
+
+# Check whether the reomte command is pdsh
+is_pdsh() {
+ if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+#
+# inSameIPsubnet serviceIPaddr interfaceIPaddr mask
+#
+# Given two IP addresses and a subnet mask determine if these IP
+# addresses are in the same subnet. If they are, return 0, else return 1.
+#
+inSameIPsubnet() {
+ declare -i n
+ declare -ia mask
+ declare -ia ip1 ip2 # IP addresses given
+ declare -i quad1 quad2 # calculated quad words
+
+ #
+ # Remove '.' characters from dotted decimal notation and save
+ # in arrays. i.e.
+ #
+ # 192.168.1.163 -> array[0] = 192
+ # array[1] = 168
+ # array[2] = 1
+ # array[3] = 163
+ #
+ let n=0
+ for quad in $(echo $1 | awk -F. '{print $1 " " $2 " " $3 " " $4}')
+ do
+ ip1[n]=$quad
+ let n=n+1
+ done
+
+ let n=0
+ for quad in $(echo $2 | awk -F. '{print $1 " " $2 " " $3 " " $4}')
+ do
+ ip2[n]=$quad
+ let n=n+1
+ done
+
+ let n=0
+ for quad in $(echo $3 | awk -F. '{print $1 " " $2 " " $3 " " $4}')
+ do
+ mask[n]=$quad
+ let n=n+1
+ done
+
+ #
+ # For each quad word, logically AND the IP address with the subnet
+ # mask to get the network/subnet quad word. If the resulting
+ # quad words for both IP addresses are the same they are in the
+ # same IP subnet.
+ #
+ for n in 0 1 2 3
+ do
+ let $((quad1=${ip1[n]} & ${mask[n]}))
+ let $((quad2=${ip2[n]} & ${mask[n]}))
+
+ if [ $quad1 != $quad2 ]; then
+ echo >&2 $"`basename $0`: Service IP address $1 and"\
+ "real interface IP address $2 are in"\
+ "different subnets!"
+ return 1 # in different subnets
+ fi
+ done
+
+ return 0 # in the same subnet, all quad words matched
+}
+
+#
+# findInterface IPaddr hostname
+#
+# Given a target IP address and a hostname, find the interface in which
+# this address is configured. If found return 0, if not return 1. The
+# interface name is returned to stdout.
+#
+findInterface() {
+ declare ret_line
+ declare line
+ declare intf
+ declare addr
+ declare state
+
+ declare target=$1
+ declare hostname=$2
+
+ while read ret_line
+ do
+ set -- ${ret_line}
+ is_pdsh && shift
+ intf="$1"
+ shift
+ line="$*"
+
+ while read line
+ do
+ if [ "$line" = "" ]; then # go to next interface
+ continue 2
+ fi
+
+ set - $line
+ addr=
+ while [ $# -gt 0 ]; do
+ case $1 in
+ addr:*)
+ addr=${1##addr:}
+ if [ -n "$addr" -a "$addr" = "$target" ]
+ then
+ echo $intf
+ return 0
+ fi
+ ;;
+ esac
+ shift
+ done
+ done
+ done < <(${REMOTE} $hostname /sbin/ifconfig)
+
+ echo >&2 "`basename $0`: Cannot find the interface in which" \
+ "$target is configured in the host $hostname!"
+ return 1
+}
+
+#
+# findNetmask interface hostname
+#
+# Given an interface find the netmask addresses associated with it.
+# Return 0 when found, else return 1. The netmask is returned to stdout.
+#
+findNetmask() {
+ declare ret_line
+ declare line
+ declare addr
+ declare target=$1
+ declare hostname=$2
+
+ while read ret_line
+ do
+ set -- ${ret_line}
+ is_pdsh && shift
+ line="$*"
+
+ set - $line
+
+ while [ $# -gt 0 ]; do
+ case $1 in
+ Mask:*)
+ echo ${1##*:} # return netmask addr
+ return 0
+ ;;
+ esac
+ shift
+ done
+ done < <(${REMOTE} $hostname /sbin/ifconfig $target)
+
+ echo >&2 "`basename $0`: Cannot find the netmask associated with" \
+ "the interface $target in the host $hostname!"
+ return 1
+}
+
+#
+# check_srvIPaddr serviceIPaddr hostname
+#
+# Given a service IP address and hostname, check whether the service IP address
+# and the real interface IP address of hostname are in the same subnet.
+# If they are, return 0, else return 1.
+#
+check_srvIPaddr() {
+ declare real_IPaddr
+ declare real_intf
+ declare netmask
+ declare srv_IPaddr=$1
+ declare hostname=$2
+
+ # Get the corresponding IP address of the hostname from /etc/hosts table
+ real_IPaddr=`egrep "[[:space:]]$hostname([[:space:]]|$)" /etc/hosts \
+ | awk '{print $1}'`
+ if [ -z "$real_IPaddr" ]; then
+ echo >&2 "`basename $0`: Hostname $hostname does not exist in" \
+ "the local /etc/hosts table!"
+ return 1
+ fi
+
+ if [ ${#real_IPaddr} -gt 15 ]; then
+ echo >&2 "`basename $0`: More than one IP address line" \
+ "corresponding to $hostname in the local" \
+ "/etc/hosts table!"
+ return 1
+ fi
+
+ # Get the interface in which the real IP address is configured
+ real_intf=$(findInterface $real_IPaddr $hostname)
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+ real_intf=${real_intf%%:*}
+
+ # Get the netmask address associated with the real interface
+ netmask=$(findNetmask $real_intf $hostname)
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+
+ # Determine if the service IP address and the real IP address
+ # are in the same subnet
+ inSameIPsubnet $srv_IPaddr $real_IPaddr $netmask
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+# Check service IP address
+if ! check_srvIPaddr $1 $2; then
+ exit 1
+fi
+exit 0
--- /dev/null
+#!/usr/bin/perl
+
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+
+#
+# convert an lmc batch file to a csv file for lustre_config.sh
+#
+use strict; use warnings;
+
+use Data::Dumper;
+
+sub get_arg_val {
+ my $arg = shift;
+ my ($aref) = @_;
+ for (my $i = 0; $i <= $#$aref; $i++) {
+ if ($$aref[$i] eq "--" . $arg) {
+ my @foo = splice(@$aref, $i, 2);
+ return $foo[1];
+ }
+ }
+}
+
+sub get_arg {
+ my $arg = shift;
+ my ($aref) = @_;
+ for (my $i = 0; $i <= $#$aref; $i++) {
+ if ($$aref[$i] eq "--" . $arg) {
+ splice(@$aref, $i, 1);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+sub add_net {
+ my $net = {};
+ $net->{"node"} = get_arg_val("node", \@_);
+ $net->{"nid"} = get_arg_val("nid", \@_);
+ $net->{"nettype"} = get_arg_val("nettype", \@_);
+ $net->{"port"} = get_arg_val("port", \@_);
+ if ($#_ > 0) {
+ print STDERR "Unknown arguments to \"--add net\": @_\n";
+ exit(1);
+ }
+ return $net;
+}
+
+sub add_mds {
+ my $mds = {};
+ $mds->{"node"} = get_arg_val("node", \@_);
+ $mds->{"mds"} = get_arg_val("mds", \@_);
+ $mds->{"fstype"} = get_arg_val("fstype", \@_);
+ $mds->{"dev"} = get_arg_val("dev", \@_);
+ $mds->{"size"} = get_arg_val("size", \@_);
+ if ($#_ > 0) {
+ print STDERR "Unknown arguments to \"--add mds\": @_\n";
+ exit(1);
+ }
+ return $mds;
+}
+
+sub add_lov {
+ my $lov = {};
+ $lov->{"lov"} = get_arg_val("lov", \@_);
+ $lov->{"mds"} = get_arg_val("mds", \@_);
+ $lov->{"stripe_sz"} = get_arg_val("stripe_sz", \@_);
+ $lov->{"stripe_cnt"} = get_arg_val("stripe_cnt", \@_);
+ $lov->{"stripe_pattern"} = get_arg_val("stripe_pattern", \@_);
+ if ($#_ > 0) {
+ print STDERR "Unknown arguments to \"--add lov\": @_\n";
+ exit(1);
+ }
+ return $lov;
+}
+
+sub add_ost {
+ my $ost = {};
+ $ost->{"node"} = get_arg_val("node", \@_);
+ $ost->{"ost"} = get_arg_val("ost", \@_);
+ $ost->{"fstype"} = get_arg_val("fstype", \@_);
+ $ost->{"dev"} = get_arg_val("dev", \@_);
+ $ost->{"size"} = get_arg_val("size", \@_);
+ $ost->{"lov"} = get_arg_val("lov", \@_);
+ $ost->{"mountfsoptions"} = get_arg_val("mountfsoptions", \@_);
+ $ost->{"failover"} = get_arg("failover", \@_);
+ if ($#_ > 0) {
+ print STDERR "Unknown arguments to \"--add ost\": @_\n";
+ exit(1);
+ }
+ return $ost;
+}
+
+sub add_mtpt {
+ my $mtpt = {};
+ $mtpt->{"node"} = get_arg_val("node", \@_);
+ $mtpt->{"path"} = get_arg_val("path", \@_);
+ $mtpt->{"mds"} = get_arg_val("mds", \@_);
+ $mtpt->{"lov"} = get_arg_val("lov", \@_);
+ if ($#_ > 0) {
+ print STDERR "Unknown arguments to \"--add mtpt\": @_\n";
+ exit(1);
+ }
+ return $mtpt;
+}
+
+no strict 'refs';
+
+sub find_obj {
+ my $type = shift;
+ my $key = shift;
+ my $value = shift;
+ my @objs = @_;
+
+ foreach my $obj (@objs) {
+ if ($obj->{$key} eq $value) {
+ return $obj;
+ }
+ }
+}
+
+sub lnet_options {
+ my $net = shift;
+
+ my $options_str = "options lnet networks=" . $net->{"nettype"} .
+ " accept=all";
+ if (defined($net->{"port"})) {
+ $options_str .= " accept_port=" . $net->{"port"};
+ }
+ return $options_str;
+
+}
+
+# main
+
+my %objs;
+my @mgses;
+
+my $MOUNTPT = "/mnt";
+if (defined($ENV{"MOUNTPT"})) {
+ $MOUNTPT = $ENV{"MOUNTPT"};
+}
+
+while(<>) {
+ my @args = split;
+
+ for (my $i = 0; $i <= $#args; $i++) {
+ if ($args[$i] eq "--add") {
+ my $type = "$args[$i + 1]";
+ my $subref = "add_$type";
+ splice(@args, $i, 2);
+ push(@{$objs{$type}}, &$subref(@args));
+ last;
+ }
+ if ($i == $#args) {
+ print STDERR "I don't know how to handle @args\n";
+ exit(1);
+ }
+ }
+}
+
+# link lovs to mdses
+foreach my $lov (@{$objs{"lov"}}) {
+ my $mds = find_obj("mds", "mds", $lov->{"mds"}, @{$objs{"mds"}});
+ $mds->{"lov"} = $lov;
+}
+# XXX could find failover pairs of osts and mdts here and link them to
+# one another and then fill in their details in the csv generators below
+foreach my $mds (@{$objs{"mds"}}) {
+ # find the net for this node
+ my $net = find_obj("net", "node", $mds->{"node"}, @{$objs{"net"}});
+ my $lov = $mds->{"lov"};
+ my $mkfs_options="";
+ if (defined($lov->{"stripe_sz"})) {
+ $mkfs_options .= "lov.stripe.size=" . $lov->{"stripe_sz"} . ",";
+ }
+ if (defined($lov->{"stripe_cnt"})) {
+ $mkfs_options .= "lov.stripe.count=" . $lov->{"stripe_cnt"} . ",";
+ }
+ if (defined($lov->{"stripe_pattern"})) {
+ $mkfs_options .= "lov.stripe.pattern=" . $lov->{"stripe_pattern"} . ",";
+ }
+ chop($mkfs_options);
+ if ($mkfs_options ne "") {
+ $mkfs_options = " --param=\"$mkfs_options\"";
+ }
+
+ printf "%s,%s,%s,$MOUNTPT/%s,mgs|mdt,,,,--device-size=%s --noformat%s,,noauto\n",
+ $mds->{"node"},
+ lnet_options($net),
+ $mds->{"dev"},
+ $mds->{"mds"},
+ $mds->{"size"},
+ $mkfs_options;
+
+ push(@mgses, $net->{"nid"});
+}
+
+foreach my $ost (@{$objs{"ost"}}) {
+ # find the net for this node
+ my $mount_opts="noauto";
+ if (defined($ost->{"mountfsoptions"})) {
+ $mount_opts .= "," . $ost->{"mountfsoptions"};
+ }
+ my $net = find_obj("net", "node", $ost->{"node"}, @{$objs{"net"}});
+ printf "%s,%s,%s,$MOUNTPT/%s,ost,,\"%s\",,--device-size=%s --noformat,,\"%s\"\n",
+ $ost->{"node"},
+ lnet_options($net),
+ $ost->{"dev"},
+ $ost->{"ost"},
+ join(",", @mgses),
+ $ost->{"size"},
+ $mount_opts;
+}
: ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml}
: ${LCONF:=/usr/sbin/lconf}
: ${LCTL:=/usr/sbin/lctl}
+# Some distros use modprobe.conf.local
+if [ -f /etc/modprobe.conf.local ]; then
+ : ${MODPROBE_CONF:=/etc/modprobe.conf.local}
+else
+ : ${MODPROBE_CONF:=/etc/modprobe.conf}
+fi
+# Be sure the proper directories are in PATH.
+export PATH="/sbin:$PATH"
+
+case "$SERVICE" in
+ [SK][[:digit:]][[:digit:]]lustre | lustre)
+ SERVICE="lustre"
+ : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
+ : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
+ ;;
+ *)
+ : ${LCONF_START_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} ${LUSTRE_CONFIG_XML}"}
+ : ${LCONF_STOP_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} --failover --cleanup ${LUSTRE_CONFIG_XML}"}
+ ;;
+esac
+LOCK=/var/lock/subsys/$SERVICE
case "$SERVICE" in
[SK][[:digit:]][[:digit:]]lustre | lustre)
RETVAL=4 # insufficent privileges
return
fi
- ${LCONF} ${LCONF_START_ARGS}
+ # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
+ ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s/\\[:space:]\*\n//g' | grep lnet | grep forwarding=\"enabled\"`
+ if [[ ! -z ${ROUTER} ]]; then
+ modprobe lnet
+ ${LCTL} network configure
+ else
+ ${LCONF} ${LCONF_START_ARGS}
+ fi
RETVAL=$?
echo $SERVICE
if [ $RETVAL -eq 0 ]; then
RETVAL=4 # insufficent privileges
return
fi
- ${LCONF} ${LCONF_STOP_ARGS}
+ # Cat the modprobe file and place all lines that follow a trailing backslash on the same line
+ ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s/\\[:space:]\*\n//g' | grep lnet | grep forwarding=\"enabled\"`
+ if [[ ! -z ${ROUTER} ]]; then
+ MODULE_LOADED=`lsmod | awk ' { print $1 } ' | grep lnet`
+ if [[ ! -z ${MODULE_LOADED} ]]; then
+ ${LCTL} network unconfigure
+ fi
+ ${LCTL} modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
+ # do it again, in case we tried to unload ksocklnd too early
+ ${LCTL} modules | awk '{ print $2 }' | xargs rmmod
+
+ else
+ ${LCONF} ${LCONF_STOP_ARGS}
+ fi
RETVAL=$?
echo $SERVICE
rm -f $LOCK
[ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0
# check for any configured devices (may indicate partial startup)
- [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150
+ if [ -d /proc/fs/lustre ]; then
+ [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150
- # check for either a server or a client filesystem
- MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
- OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`"
- LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
- [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0
+ # check for either a server or a client filesystem
+ MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
+ OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`"
+ LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
+ [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0
+ else
+ # check if this is a router
+ if [ -d /proc/sys/lnet ]; then
+ ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`"
+ if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then
+ STATE="running"
+ RETVAL=0
+ fi
+ fi
+ fi
# check for server disconnections
DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
--- /dev/null
+#!/bin/bash
+
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+
+#
+# lustre_config.sh - format and set up multiple lustre servers from a csv file
+#
+# This script is used to parse each line of a spreadsheet (csv file) and
+# execute remote commands to format (mkfs.lustre) every Lustre target
+# that will be part of the Lustre cluster.
+#
+# In addition, it can also verify the network connectivity and hostnames in
+# the cluster, configure Linux MD/LVM devices and produce High-Availability
+# software configurations for Heartbeat or CluManager.
+#
+################################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` [-t HAtype] [-n] [-d] [-f] [-m] [-h] [-v] <csv file>
+
+ This script is used to format and set up multiple lustre servers from a
+ csv file.
+
+ -h help and examples
+ -t HAtype produce High-Availability software configurations
+ The argument following -t is used to indicate the High-
+ Availability software type. The HA software types which
+ are currently supported are: hbv1 (Heartbeat version 1)
+ and hbv2 (Heartbeat version 2).
+ -n no net - don't verify network connectivity and hostnames
+ in the cluster
+ -d configure Linux MD/LVM devices before formatting the
+ Lustre targets
+ -f force-format the Lustre targets using --reformat option
+ -m no fstab change - don't modify /etc/fstab to add the new
+ Lustre targets
+ If using this option, then the value of "mount options"
+ item in the csv file will be passed to mkfs.lustre, else
+ the value will be added into the /etc/fstab.
+ -v verbose mode
+ csv file a spreadsheet that contains configuration parameters
+ (separated by commas) for each target in a Lustre cluster
+
+EOF
+ exit 1
+}
+
+# Samples
+sample() {
+ cat <<EOF
+
+This script is used to parse each line of a spreadsheet (csv file) and
+execute remote commands to format (mkfs.lustre) every Lustre target
+that will be part of the Lustre cluster.
+
+It can also optionally:
+ * verify the network connectivity and hostnames in the cluster
+ * configure Linux MD/LVM devices
+ * modify /etc/modprobe.conf to add Lustre networking info
+ * add the Lustre server info to /etc/fstab
+ * produce configurations for Heartbeat or CluManager.
+
+There are 5 kinds of line formats in the csv file. They represent the following
+targets:
+1) Linux MD device
+The format is:
+hostname,MD,md name,operation mode,options,raid level,component devices
+
+hostname hostname of the node in the cluster
+MD marker of MD device line
+md name MD device name, e.g. /dev/md0
+operation mode create or remove, default is create
+options a "catchall" for other mdadm options, e.g. "-c 128"
+raid level raid level: 0,1,4,5,6,10,linear and multipath
+component devices block devices to be combined into the MD device
+ Multiple devices are separated by space or by using
+ shell expansions, e.g. "/dev/sd{a,b,c}"
+
+2) Linux LVM PV (Physical Volume)
+The format is:
+hostname,PV,pv names,operation mode,options
+
+hostname hostname of the node in the cluster
+PV marker of PV line
+pv names devices or loopback files to be initialized for later
+ use by LVM or to be wiped the label, e.g. /dev/sda
+ Multiple devices or files are separated by space or by
+ using shell expansions, e.g. "/dev/sd{a,b,c}"
+operation mode create or remove, default is create
+options a "catchall" for other pvcreate/pvremove options
+ e.g. "-vv"
+
+3) Linux LVM VG (Volume Group)
+The format is:
+hostname,VG,vg name,operation mode,options,pv paths
+
+hostname hostname of the node in the cluster
+VG marker of VG line
+vg name name of the volume group, e.g. ost_vg
+operation mode create or remove, default is create
+options a "catchall" for other vgcreate/vgremove options
+ e.g. "-s 32M"
+pv paths physical volumes to construct this VG, required by
+ create mode
+ Multiple PVs are separated by space or by using
+ shell expansions, e.g. "/dev/sd[k-m]1"
+
+4) Linux LVM LV (Logical Volume)
+The format is:
+hostname,LV,lv name,operation mode,options,lv size,vg name
+
+hostname hostname of the node in the cluster
+LV marker of LV line
+lv name name of the logical volume to be created (optional)
+ or path of the logical volume to be removed (required
+ by remove mode)
+operation mode create or remove, default is create
+options a "catchall" for other lvcreate/lvremove options
+ e.g. "-i 2 -I 128"
+lv size size [kKmMgGtT] to be allocated for the new LV
+ Default unit is megabytes.
+vg name name of the VG in which the new LV will be created
+
+5) Lustre target
+The format is:
+hostname,module_opts,device name,mount point,device type,fsname,mgs nids,index,
+format options,mkfs options,mount options,failover nids
+
+hostname hostname of the node in the cluster, must match "uname -n"
+module_opts Lustre networking module options
+device name Lustre target (block device or loopback file)
+mount point Lustre target mount point
+device type Lustre target type (mgs, mdt, ost, mgs|mdt, mdt|mgs)
+fsname Lustre filesystem name, should be limited to 8 characters
+ Default is "lustre".
+mgs nids NID(s) of remote mgs node, required for mdt and ost targets
+ If this item is not given for an mdt, it is assumed that
+ the mdt will also be an mgs, according to mkfs.lustre.
+index Lustre target index
+format options a "catchall" contains options to be passed to mkfs.lustre
+ "--device-size", "--param", etc. all goes into this item.
+mkfs options format options to be wrapped with --mkfsoptions="" and
+ passed to mkfs.lustre
+mount options If this script is invoked with "-m" option, then the value of
+ this item will be wrapped with --mountfsoptions="" and passed
+ to mkfs.lustre, else the value will be added into /etc/fstab.
+failover nids NID(s) of failover partner node
+
+All the NIDs in one node are delimited by commas (','). When multiple nodes are
+specified, they are delimited by a colon (':').
+
+Items left blank will be set to defaults.
+
+Example 1 - Simple, with combo MGS/MDT:
+-------------------------------------------------------------------------------
+# combo mdt/mgs
+lustre-mgs,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--device-size=10240
+
+# ost0
+lustre-ost,options lnet networks=tcp,/tmp/ost0,/mnt/ost0,ost,,lustre-mgs@tcp0,,--device-size=10240
+
+# ost1
+lustre-ost,options lnet networks=tcp,/tmp/ost1,/mnt/ost1,ost,,lustre-mgs@tcp0,,--device-size=10240
+-------------------------------------------------------------------------------
+
+Example 2 - Separate MGS/MDT, two networks interfaces:
+-------------------------------------------------------------------------------
+# mgs
+lustre-mgs1,options lnet 'networks="tcp,elan"',/dev/sda,/mnt/mgs,mgs,,,,--quiet --param="sys.timeout=50",,"defaults,noauto","lustre-mgs2,2@elan"
+
+# mdt
+lustre-mdt1,options lnet 'networks="tcp,elan"',/dev/sdb,/mnt/mdt,mdt,lustre2,"lustre-mgs1,1@elan:lustre-mgs2,2@elan",,--quiet --param="lov.stripe.size=4194304",-J size=16,"defaults,noauto",lustre-mdt2
+
+# ost
+lustre-ost1,options lnet 'networks="tcp,elan"',/dev/sdc,/mnt/ost,ost,lustre2,"lustre-mgs1,1@elan:lustre-mgs2,2@elan",,--quiet,-I 512,"defaults,noauto",lustre-ost2
+-------------------------------------------------------------------------------
+
+Example 3 - with combo MGS/MDT failover pair and OST failover pair:
+-------------------------------------------------------------------------------
+# combo mgs/mdt
+lustre-mgs1,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--quiet --device-size=10240,,,lustre-mgs2@tcp0
+
+# combo mgs/mdt backup (--noformat)
+lustre-mgs2,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--quiet --device-size=10240 --noformat,,,lustre-mgs1@tcp0
+
+# ost
+lustre-ost1,options lnet networks=tcp,/tmp/ost1,/mnt/ost1,ost,,"lustre-mgs1@tcp0:lustre-mgs2@tcp0",,--quiet --device-size=10240,,,lustre-ost2@tcp0
+
+# ost backup (--noformat) (note different device name)
+lustre-ost2,options lnet networks=tcp,/tmp/ost2,/mnt/ost2,ost,,"lustre-mgs1@tcp0:lustre-mgs2@tcp0",,--quiet --device-size=10240 --noformat,,,lustre-ost1@tcp0
+-------------------------------------------------------------------------------
+
+Example 4 - Configure Linux MD/LVM devices before formatting Lustre targets:
+-------------------------------------------------------------------------------
+# MD device on mgsnode
+mgsnode,MD,/dev/md0,,-q,1,/dev/sda1 /dev/sdb1
+
+# MD/LVM devices on ostnode
+ostnode,MD,/dev/md0,,-q -c 128,5,"/dev/sd{a,b,c}"
+ostnode,MD,/dev/md1,,-q -c 128,5,"/dev/sd{d,e,f}"
+ostnode,PV,/dev/md0 /dev/md1
+ostnode,VG,ost_vg,,-s 32M,/dev/md0 /dev/md1
+ostnode,LV,ost0,,-i 2 -I 128,300G,ost_vg
+ostnode,LV,ost1,,-i 2 -I 128,300G,ost_vg
+
+# combo mgs/mdt
+mgsnode,options lnet networks=tcp,/dev/md0,/mnt/mgs,mgs|mdt,,,,--quiet
+
+# ost0
+ostnode,options lnet networks=tcp,/dev/ost_vg/ost0,/mnt/ost0,ost,,mgsnode,,--quiet
+
+# ost1
+ostnode,options lnet networks=tcp,/dev/ost_vg/ost1,/mnt/ost1,ost,,mgsnode,,--quiet
+-------------------------------------------------------------------------------
+
+EOF
+ exit 0
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+#***************************** Global variables *****************************#
+declare -a MGS_NODENAME # node names of the MGS servers
+declare -a MGS_IDX # indexes of MGSs in the global arrays
+declare -i MGS_NUM # number of MGS servers in the cluster
+declare -i INIT_IDX
+
+declare -a NODE_NAMES # node names in the failover group
+declare -a TARGET_OPTS # target services in one failover group
+
+# All the items in the csv file
+declare -a HOST_NAME MODULE_OPTS DEVICE_NAME MOUNT_POINT DEVICE_TYPE FS_NAME
+declare -a MGS_NIDS INDEX FORMAT_OPTIONS MKFS_OPTIONS MOUNT_OPTIONS FAILOVERS
+
+# Corresponding to MGS_NIDS and FAILOVERS arrays,
+# IP addresses in which were converted to hostnames
+declare -a MGS_NIDS_NAMES FAILOVERS_NAMES
+
+VERIFY_CONNECT=true
+CONFIG_MD_LVM=false
+MODIFY_FSTAB=true
+VERBOSE_OUTPUT=false
+# Get and check the positional parameters
+while getopts "t:ndfmhv" OPTION; do
+ case $OPTION in
+ t)
+ HATYPE_OPT=$OPTARG
+ if [ "${HATYPE_OPT}" != "${HBVER_HBV1}" ] \
+ && [ "${HATYPE_OPT}" != "${HBVER_HBV2}" ] \
+ && [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
+ echo >&2 $"`basename $0`: Invalid HA software type" \
+ "- ${HATYPE_OPT}!"
+ usage
+ fi
+ ;;
+ n)
+ VERIFY_CONNECT=false
+ ;;
+ d)
+ CONFIG_MD_LVM=true
+ ;;
+ f)
+ REFORMAT_OPTION=$"--reformat "
+ ;;
+ m)
+ MODIFY_FSTAB=false
+ ;;
+ h)
+ sample
+ ;;
+ v)
+ VERBOSE_OPT=$" -v"
+ VERBOSE_OUTPUT=true
+ ;;
+ ?)
+ usage
+ esac
+done
+
+# Toss out the parameters we've already processed
+shift `expr $OPTIND - 1`
+
+# Here we expect the csv file
+if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: Missing csv file!"
+ usage
+fi
+
+# Check the items required for OSTs, MDTs and MGS
+#
+# When formatting an OST, the following items: hostname, module_opts,
+# device name, device type and mgs nids, cannot have null value.
+#
+# When formatting an MDT or MGS, the following items: hostname,
+# module_opts, device name and device type, cannot have null value.
+check_item() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: check_item() error: Missing argument"\
+ "for function check_item()!"
+ return 1
+ fi
+
+ declare -i i=$1
+
+ # Check hostname, module_opts, device name and device type
+ if [ -z "${HOST_NAME[i]}" ]||[ -z "${MODULE_OPTS[i]}" ]\
+ ||[ -z "${DEVICE_NAME[i]}" ]||[ -z "${DEVICE_TYPE[i]}" ]; then
+ echo >&2 $"`basename $0`: check_item() error: Some required"\
+ "item has null value! Check hostname, module_opts,"\
+ "device name and device type!"
+ return 1
+ fi
+
+ # Check mgs nids
+ if [ "${DEVICE_TYPE[i]}" = "ost" ]&&[ -z "${MGS_NIDS[i]}" ]; then
+ echo >&2 $"`basename $0`: check_item() error: OST's mgs nids"\
+ "item has null value!"
+ return 1
+ fi
+
+ # Check mount point
+ if [ -z "${MOUNT_POINT[i]}" ]; then
+ echo >&2 $"`basename $0`: check_item() error: mount"\
+ "point item of target ${DEVICE_NAME[i]} has null value!"
+ return 1
+ fi
+
+ return 0
+}
+
+# Get the number of MGS nodes in the cluster
+get_mgs_num() {
+ INIT_IDX=0
+ MGS_NUM=${#MGS_NODENAME[@]}
+ [ -z "${MGS_NODENAME[0]}" ] && let "INIT_IDX += 1" \
+ && let "MGS_NUM += 1"
+}
+
+# is_mgs_node hostname
+# Verify whether @hostname is a MGS node
+is_mgs_node() {
+ local host_name=$1
+ declare -i i
+
+ get_mgs_num
+ for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
+ [ "${MGS_NODENAME[i]}" = "${host_name}" ] && return 0
+ done
+
+ return 1
+}
+
+# Check whether the MGS nodes are in the same failover group
+check_mgs_group() {
+ declare -i i
+ declare -i j
+ declare -i idx
+ local mgs_node
+
+ get_mgs_num
+ for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
+ mgs_node=${MGS_NODENAME[i]}
+ for ((j = ${INIT_IDX}; j < ${MGS_NUM}; j++)); do
+ [ "${MGS_NODENAME[j]}" = "${mgs_node}" ] && continue 1
+
+ idx=${MGS_IDX[j]}
+ if [ "${FAILOVERS_NAMES[idx]#*$mgs_node*}" = "${FAILOVERS_NAMES[idx]}" ]
+ then
+ echo >&2 $"`basename $0`: check_mgs_group() error:"\
+ "MGS node ${mgs_node} is not in the ${HOST_NAME[idx]}"\
+ "failover group!"
+ return 1
+ fi
+ done
+ done
+
+ return 0
+}
+
+# Get and check MGS servers.
+# There should be no more than one MGS specified in the entire csv file.
+check_mgs() {
+ declare -i i
+ declare -i j
+ declare -i exp_idx # Index of explicit MGS servers
+ declare -i imp_idx # Index of implicit MGS servers
+ local is_exp_mgs is_imp_mgs
+ local mgs_node
+
+ # Initialize the MGS_NODENAME and MGS_IDX arrays
+ unset MGS_NODENAME
+ unset MGS_IDX
+
+ exp_idx=1
+ imp_idx=1
+ for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+ is_exp_mgs=false
+ is_imp_mgs=false
+
+ # Check whether this node is an explicit MGS node
+ # or an implicit one
+ if [ "${DEVICE_TYPE[i]#*mgs*}" != "${DEVICE_TYPE[i]}" ]; then
+ verbose_output "Explicit MGS target" \
+ "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
+ is_exp_mgs=true
+ fi
+
+ if [ "${DEVICE_TYPE[i]}" = "mdt" -a -z "${MGS_NIDS[i]}" ]; then
+ verbose_output "Implicit MGS target" \
+ "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
+ is_imp_mgs=true
+ fi
+
+ # Get and check MGS servers
+ if ${is_exp_mgs} || ${is_imp_mgs}; then
+ # Check whether more than one MGS target in one MGS node
+ if is_mgs_node ${HOST_NAME[i]}; then
+ echo >&2 $"`basename $0`: check_mgs() error:"\
+ "More than one MGS target in the same node -"\
+ "\"${HOST_NAME[i]}\"!"
+ return 1
+ fi
+
+ # Get and check primary MGS server and backup MGS server
+ if [ "${FORMAT_OPTIONS[i]}" = "${FORMAT_OPTIONS[i]#*noformat*}" ]
+ then
+ # Primary MGS server
+ if [ -z "${MGS_NODENAME[0]}" ]; then
+ if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
+ || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
+ echo >&2 $"`basename $0`: check_mgs() error:"\
+ "There exist both explicit and implicit MGS"\
+ "targets in the csv file!"
+ return 1
+ fi
+ MGS_NODENAME[0]=${HOST_NAME[i]}
+ MGS_IDX[0]=$i
+ else
+ mgs_node=${MGS_NODENAME[0]}
+ if [ "${FAILOVERS_NAMES[i]#*$mgs_node*}" = "${FAILOVERS_NAMES[i]}" ]
+ then
+ echo >&2 $"`basename $0`: check_mgs() error:"\
+ "More than one primary MGS nodes in the csv" \
+ "file - ${MGS_NODENAME[0]} and ${HOST_NAME[i]}!"
+ else
+ echo >&2 $"`basename $0`: check_mgs() error:"\
+ "MGS nodes ${MGS_NODENAME[0]} and ${HOST_NAME[i]}"\
+ "are failover pair, one of them should use"\
+ "\"--noformat\" in the format options item!"
+ fi
+ return 1
+ fi
+ else # Backup MGS server
+ if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
+ || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
+ echo >&2 $"`basename $0`: check_mgs() error:"\
+ "There exist both explicit and implicit MGS"\
+ "targets in the csv file!"
+ return 1
+ fi
+
+ if ${is_exp_mgs}; then # Explicit MGS
+ MGS_NODENAME[exp_idx]=${HOST_NAME[i]}
+ MGS_IDX[exp_idx]=$i
+ exp_idx=$(( exp_idx + 1 ))
+ else # Implicit MGS
+ MGS_NODENAME[imp_idx]=${HOST_NAME[i]}
+ MGS_IDX[imp_idx]=$i
+ imp_idx=$(( imp_idx + 1 ))
+ fi
+ fi
+ fi #End of "if ${is_exp_mgs} || ${is_imp_mgs}"
+ done
+
+ # Check whether the MGS nodes are in the same failover group
+ if ! check_mgs_group; then
+ return 1
+ fi
+
+ return 0
+}
+
+# Construct the command line of mkfs.lustre
+construct_mkfs_cmdline() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
+ "Missing argument for function construct_mkfs_cmdline()!"
+ return 1
+ fi
+
+ declare -i i=$1
+ local mgsnids mgsnids_str
+ local failnids failnids_str
+
+ MKFS_CMD=${MKFS}$" "
+ MKFS_CMD=${MKFS_CMD}${REFORMAT_OPTION}
+
+ case "${DEVICE_TYPE[i]}" in
+ "ost")
+ MKFS_CMD=${MKFS_CMD}$"--ost "
+ ;;
+ "mdt")
+ MKFS_CMD=${MKFS_CMD}$"--mdt "
+ ;;
+ "mgs")
+ MKFS_CMD=${MKFS_CMD}$"--mgs "
+ ;;
+ "mdt|mgs" | "mgs|mdt")
+ MKFS_CMD=${MKFS_CMD}$"--mdt --mgs "
+ ;;
+ *)
+ echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
+ "Invalid device type - \"${DEVICE_TYPE[i]}\"!"
+ return 1
+ ;;
+ esac
+
+ if [ -n "${FS_NAME[i]}" ]; then
+ MKFS_CMD=${MKFS_CMD}$"--fsname="${FS_NAME[i]}$" "
+ fi
+
+ if [ -n "${MGS_NIDS[i]}" ]; then
+ mgsnids_str=${MGS_NIDS[i]}
+ for mgsnids in ${mgsnids_str//:/ }; do
+ MKFS_CMD=${MKFS_CMD}$"--mgsnode="${mgsnids}$" "
+ done
+ fi
+
+ if [ -n "${INDEX[i]}" ]; then
+ MKFS_CMD=${MKFS_CMD}$"--index="${INDEX[i]}$" "
+ fi
+
+ if [ -n "${FORMAT_OPTIONS[i]}" ]; then
+ MKFS_CMD=${MKFS_CMD}${FORMAT_OPTIONS[i]}$" "
+ fi
+
+ if [ -n "${MKFS_OPTIONS[i]}" ]; then
+ MKFS_CMD=${MKFS_CMD}$"--mkfsoptions="$"\""${MKFS_OPTIONS[i]}$"\""$" "
+ fi
+
+ if [ -n "${MOUNT_OPTIONS[i]}" ]; then
+ if ! ${MODIFY_FSTAB}; then
+ MKFS_CMD=${MKFS_CMD}$"--mountfsoptions="$"\""${MOUNT_OPTIONS[i]}$"\""$" "
+ fi
+ fi
+
+ if [ -n "${FAILOVERS[i]}" ]; then
+ failnids_str=${FAILOVERS[i]}
+ for failnids in ${failnids_str//:/ }; do
+ MKFS_CMD=${MKFS_CMD}$"--failnode="${failnids}$" "
+ done
+ fi
+
+ MKFS_CMD=${MKFS_CMD}${DEVICE_NAME[i]}
+ return 0
+}
+
+# Get all the node names in this failover group
+get_nodenames() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: get_nodenames() error: Missing"\
+ "argument for function get_nodenames()!"
+ return 1
+ fi
+
+ declare -i i=$1
+ declare -i idx
+ local nids
+
+ # Initialize the NODE_NAMES array
+ unset NODE_NAMES
+
+ NODE_NAMES[0]=${HOST_NAME[i]}
+
+ idx=1
+ for nids in ${FAILOVERS_NAMES[i]//:/ }
+ do
+ NODE_NAMES[idx]=$(nids2hostname ${nids})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${NODE_NAMES[idx]}"
+ return 1
+ fi
+
+ idx=$idx+1
+ done
+
+ return 0
+}
+
+# Verify whether the format line has HA items
+is_ha_line() {
+ declare -i i=$1
+
+ [ -n "${FAILOVERS[i]}" ] && return 0
+
+ return 1
+}
+
+# Produce HA software's configuration files
+gen_ha_config() {
+ declare -i i=$1
+ declare -i idx
+ local cmd_line
+
+ # Prepare parameters
+ # Hostnames option
+ HOSTNAME_OPT=${HOST_NAME[i]}
+
+ if ! get_nodenames $i; then
+ echo >&2 $"`basename $0`: gen_ha_config() error: Can not get the"\
+ "failover nodenames from failover nids - \"${FAILOVERS[i]}\" in"\
+ "the \"${HOST_NAME[i]}\" failover group!"
+ return 1
+ fi
+
+ for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do
+ HOSTNAME_OPT=${HOSTNAME_OPT}$":"${NODE_NAMES[idx]}
+ done
+
+ # Target devices option
+ DEVICE_OPT=" -d "${TARGET_OPTS[0]}
+ for ((idx = 1; idx < ${#TARGET_OPTS[@]}; idx++)); do
+ DEVICE_OPT=${DEVICE_OPT}" -d "${TARGET_OPTS[idx]}
+ done
+
+ # Construct the generation script command line
+ case "${HATYPE_OPT}" in
+ "${HBVER_HBV1}"|"${HBVER_HBV2}") # Heartbeat
+ cmd_line=${GEN_HB_CONFIG}$" -r ${HATYPE_OPT} -n ${HOSTNAME_OPT}"
+ cmd_line=${cmd_line}${DEVICE_OPT}${VERBOSE_OPT}
+ ;;
+ "${HATYPE_CLUMGR}") # CluManager
+ cmd_line=${GEN_CLUMGR_CONFIG}$" -n ${HOSTNAME_OPT}"
+ cmd_line=${cmd_line}${DEVICE_OPT}${VERBOSE_OPT}
+ ;;
+ esac
+
+ # Execute script to generate HA software's configuration files
+ verbose_output "Generating HA software's configurations in"\
+ "${HOST_NAME[i]} failover group..."
+ verbose_output "${cmd_line}"
+ eval $(echo "${cmd_line}")
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+ verbose_output "Generate HA software's configurations in"\
+ "${HOST_NAME[i]} failover group OK"
+
+ return 0
+}
+
+# Configure HA software
+config_ha() {
+ if [ -z "${HATYPE_OPT}" ]; then
+ return 0
+ fi
+
+ declare -i i j k
+ declare -i prim_idx # Index for PRIM_HOSTNAMES array
+ declare -i target_idx # Index for TARGET_OPTS and HOST_INDEX arrays
+
+ declare -a PRIM_HOSTNAMES # Primary hostnames in all the failover
+ # groups in the lustre cluster
+ declare -a HOST_INDEX # Indices for the same node in all the
+ # format lines in the csv file
+ local prim_host
+
+ # Initialize the PRIM_HOSTNAMES array
+ prim_idx=0
+ unset PRIM_HOSTNAMES
+
+ # Get failover groups and generate HA configuration files
+ for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+ prim_host=${HOST_NAME[i]}
+
+ for ((j = 0; j < ${#PRIM_HOSTNAMES[@]}; j++)); do
+ [ "${prim_host}" = "${PRIM_HOSTNAMES[j]}" ] && continue 2
+ done
+
+ target_idx=0
+ unset HOST_INDEX
+ unset TARGET_OPTS
+ for ((k = 0; k < ${#HOST_NAME[@]}; k++)); do
+ if [ "${prim_host}" = "${HOST_NAME[k]}" ] && is_ha_line "${k}"
+ then
+ HOST_INDEX[target_idx]=$k
+ TARGET_OPTS[target_idx]=${DEVICE_NAME[k]}:${MOUNT_POINT[k]}
+ target_idx=$(( target_idx + 1 ))
+ fi
+ done
+
+ if [ ${#TARGET_OPTS[@]} -ne 0 ]; then
+ PRIM_HOSTNAMES[prim_idx]=${prim_host}
+ prim_idx=$(( prim_idx + 1 ))
+
+ if ! gen_ha_config ${HOST_INDEX[0]}; then
+ return 1
+ fi
+ fi
+ done
+
+ if [ ${#PRIM_HOSTNAMES[@]} -eq 0 ]; then
+ verbose_output "There are no \"failover nids\" items in the"\
+ "csv file. No HA configuration files are generated!"
+ fi
+
+ rm -rf ${TMP_DIRS}
+ return 0
+}
+
+
+# Get all the items in the csv file and do some checks.
+get_items() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: get_items() error: Missing argument"\
+ "for function get_items()!"
+ return 1
+ fi
+
+ CSV_FILE=$1
+ local LINE
+ local marker
+ declare -i line_num=0
+ declare -i idx=0
+
+ while read -r LINE; do
+ line_num=${line_num}+1
+ # verbose_output "Parsing line ${line_num}: $LINE"
+
+ # Get rid of the empty line
+ if [ -z "`echo ${LINE}|awk '/[[:alnum:]]/ {print $0}'`" ]; then
+ continue
+ fi
+
+ # Get rid of the comment line
+ if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ]
+ then
+ continue
+ fi
+
+ # Skip the Linux MD/LVM line
+ marker=`echo ${LINE} | awk -F, '{print $2}'`
+ if [ "${marker}" = "${MD_MARKER}" -o "${marker}" = "${PV_MARKER}" ] \
+ || [ "${marker}" = "${VG_MARKER}" -o "${marker}" = "${LV_MARKER}" ]; then
+ continue
+ fi
+
+ # Parse the config line into CONFIG_ITEM
+ if ! parse_line "$LINE"; then
+ echo >&2 $"`basename $0`: parse_line() error: Occurred"\
+ "on line ${line_num} in ${CSV_FILE}: $LINE"
+ return 1
+ fi
+
+ HOST_NAME[idx]=${CONFIG_ITEM[0]}
+ MODULE_OPTS[idx]=${CONFIG_ITEM[1]}
+ DEVICE_NAME[idx]=${CONFIG_ITEM[2]}
+ MOUNT_POINT[idx]=${CONFIG_ITEM[3]}
+ DEVICE_TYPE[idx]=${CONFIG_ITEM[4]}
+ FS_NAME[idx]=${CONFIG_ITEM[5]}
+ MGS_NIDS[idx]=${CONFIG_ITEM[6]}
+ INDEX[idx]=${CONFIG_ITEM[7]}
+ FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]}
+ MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]}
+ MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]}
+ FAILOVERS[idx]=${CONFIG_ITEM[11]}
+
+ MODULE_OPTS[idx]=`echo "${MODULE_OPTS[idx]}" | sed 's/"/\\\"/g'`
+
+ # Convert IP addresses in NIDs to hostnames
+ MGS_NIDS_NAMES[idx]=$(ip2hostname_multi_node ${MGS_NIDS[idx]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${MGS_NIDS_NAMES[idx]}"
+ return 1
+ fi
+
+ FAILOVERS_NAMES[idx]=$(ip2hostname_multi_node ${FAILOVERS[idx]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${FAILOVERS_NAMES[idx]}"
+ return 1
+ fi
+
+ # Check some required items for formatting target
+ if ! check_item $idx; then
+ echo >&2 $"`basename $0`: check_item() error:"\
+ "Occurred on line ${line_num} in ${CSV_FILE}."
+ return 1
+ fi
+
+ idx=${idx}+1
+ done < ${CSV_FILE}
+
+ return 0
+}
+
+# check_lnet_connect hostname_index mgs_hostname
+# Check whether the target node can contact the MGS node @mgs_hostname
+# If @mgs_hostname is null, then it means the primary MGS node
+check_lnet_connect() {
+ declare -i i=$1
+ local mgs_node=$2
+
+ local COMMAND RET_STR
+ local mgs_prim_nids
+ local nids nids_names
+ local nids_str=
+ local mgs_nid
+ local ping_mgs
+
+ # Execute remote command to check that
+ # this node can contact the MGS node
+ verbose_output "Checking lnet connectivity between" \
+ "${HOST_NAME[i]} and the MGS node ${mgs_node}"
+ mgs_prim_nids=`echo ${MGS_NIDS[i]} | awk -F: '{print $1}'`
+
+ if [ -z "${mgs_node}" ]; then
+ nids_str=${mgs_prim_nids} # nids of primary MGS node
+ if [ -z "${nids_str}" ]; then
+ echo >&2 $"`basename $0`: check_lnet_connect() error:"\
+ "Check the mgs nids item of host ${HOST_NAME[i]}!"\
+ "Missing nids of the primary MGS node!"
+ return 1
+ fi
+ else
+ for nids in ${MGS_NIDS[i]//:/ }; do
+ nids_names=$(ip2hostname_single_node ${nids})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${nids_names}"
+ return 1
+ fi
+
+ [ "${nids_names}" != "${nids_names#*$mgs_node*}" ]\
+ && nids_str=${nids} # nids of backup MGS node
+ done
+ if [ -z "${nids_str}" ]; then
+ echo >&2 $"`basename $0`: check_lnet_connect() error:"\
+ "Check the mgs nids item of host ${HOST_NAME[i]}!"\
+ "Can not figure out which nids corresponding to the MGS"\
+ "node ${mgs_node} from \"${MGS_NIDS[i]}\"!"
+ return 1
+ fi
+ fi
+
+ ping_mgs=false
+ for mgs_nid in ${nids_str//,/ }
+ do
+ COMMAND=$"${LCTL} ping ${mgs_nid} 5 || echo failed 2>&1"
+ RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1`
+ if [ $? -eq 0 -a "${RET_STR}" = "${RET_STR#*failed*}" ]
+ then
+ # This node can contact the MGS node
+ verbose_output "${HOST_NAME[i]} can contact the MGS" \
+ "node ${mgs_node} by using nid \"${mgs_nid}\"!"
+ ping_mgs=true
+ break
+ fi
+ done
+
+ if ! ${ping_mgs}; then
+ echo >&2 "`basename $0`: check_lnet_connect() error:" \
+ "${HOST_NAME[i]} cannot contact the MGS node ${mgs_node}"\
+ "with nids - \"${nids_str}\"! Check ${LCTL} command!"
+ return 1
+ fi
+
+ return 0
+}
+
+# Start lnet network in the cluster node and check that
+# this node can contact the MGS node
+check_lnet() {
+ if ! ${VERIFY_CONNECT}; then
+ return 0
+ fi
+
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: check_lnet() error: Missing"\
+ "argument for function check_lnet()!"
+ return 1
+ fi
+
+ declare -i i=$1
+ declare -i j
+ local COMMAND RET_STR
+
+ # Execute remote command to start lnet network
+ verbose_output "Starting lnet network in ${HOST_NAME[i]}"
+ COMMAND=$"modprobe lnet; ${LCTL} network up 2>&1"
+ RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1`
+ if [ $? -ne 0 -o "${RET_STR}" = "${RET_STR#*LNET configured*}" ]
+ then
+ echo >&2 "`basename $0`: check_lnet() error: remote" \
+ "${HOST_NAME[i]} error: ${RET_STR}"
+ return 1
+ fi
+
+ if is_mgs_node ${HOST_NAME[i]}; then
+ return 0
+ fi
+
+ # Execute remote command to check that
+ # this node can contact the MGS node
+ for ((j = 0; j < ${MGS_NUM}; j++)); do
+ if ! check_lnet_connect $i ${MGS_NODENAME[j]}; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# Start lnet network in the MGS node
+start_mgs_lnet() {
+ declare -i i
+ declare -i idx
+ local COMMAND
+
+ if [ -z "${MGS_NODENAME[0]}" -a -z "${MGS_NODENAME[1]}" ]; then
+ verbose_output "There is no MGS target in the ${CSV_FILE} file."
+ return 0
+ fi
+
+ for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
+ # Execute remote command to add lnet options lines to
+ # the MGS node's modprobe.conf/modules.conf
+ idx=${MGS_IDX[i]}
+ COMMAND=$"echo \"${MODULE_OPTS[${idx}]}\"|${MODULE_CONFIG}"
+ verbose_output "Adding lnet module options to ${MGS_NODENAME[i]}"
+ ${REMOTE} ${MGS_NODENAME[i]} "${COMMAND}" >&2
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: start_mgs_lnet() error:"\
+ "Failed to execute remote command to" \
+ "add module options to ${MGS_NODENAME[i]}!"\
+ "Check ${MODULE_CONFIG}!"
+ return 1
+ fi
+
+ # Start lnet network in the MGS node
+ if ! check_lnet ${idx}; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# Execute remote command to add lnet options lines to remote nodes'
+# modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets
+mass_config() {
+ local COMMAND
+ declare -a REMOTE_PID
+ declare -a REMOTE_CMD
+ declare -i pid_num=0
+ declare -i i=0
+
+ if [ ${#HOST_NAME[@]} -eq 0 ]; then
+ verbose_output "There are no Lustre targets to be formatted."
+ return 0
+ fi
+
+ # Start lnet network in the MGS node
+ if ! start_mgs_lnet; then
+ return 1
+ fi
+
+ for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+ # Construct the command line of mkfs.lustre
+ if ! construct_mkfs_cmdline $i; then
+ return 1
+ fi
+
+ # create the mount point on the node
+ COMMAND="mkdir -p ${MOUNT_POINT[i]}"
+ verbose_output "Creating the mount point ${MOUNT_POINT[i]} on" \
+ "${HOST_NAME[i]}"
+ ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: mass_config() error:"\
+ "Failed to execute remote command to"\
+ "create the mountpoint on ${HOST_NAME[i]}!"
+ return 1
+ fi
+
+ if ! is_mgs_node ${HOST_NAME[i]}; then
+ # Execute remote command to add lnet options lines to
+ # modprobe.conf/modules.conf
+ COMMAND=$"echo \"${MODULE_OPTS[i]}\"|${MODULE_CONFIG}"
+ verbose_output "Adding lnet module options to" \
+ "${HOST_NAME[i]}"
+ ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: mass_config() error:"\
+ "Failed to execute remote command to"\
+ "add module options to ${HOST_NAME[i]}!"
+ return 1
+ fi
+
+ # Check lnet networks
+ if ! check_lnet $i; then
+ return 1
+ fi
+ fi
+
+ # Execute remote command to format Lustre target
+ verbose_output "Formatting Lustre target ${DEVICE_NAME[i]} on ${HOST_NAME[i]}..."
+ REMOTE_CMD[${pid_num}]="${REMOTE} ${HOST_NAME[i]} \"(${EXPORT_PATH} ${MKFS_CMD})\""
+ verbose_output "Format command line is: ${REMOTE_CMD[${pid_num}]}"
+ ${REMOTE} ${HOST_NAME[i]} "(${EXPORT_PATH} ${MKFS_CMD})" >&2 &
+ REMOTE_PID[${pid_num}]=$!
+ pid_num=${pid_num}+1
+ sleep 1
+ done
+
+ # Wait for the exit status of the background remote command
+ verbose_output "Waiting for the return of the remote command..."
+ fail_exit_status=false
+ for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
+ wait ${REMOTE_PID[${pid_num}]}
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: mass_config() error: Failed"\
+ "to execute \"${REMOTE_CMD[${pid_num}]}\"!"
+ fail_exit_status=true
+ fi
+ done
+
+ if ${fail_exit_status}; then
+ return 1
+ fi
+
+ verbose_output "All the Lustre targets are formatted successfully!"
+ return 0
+}
+
+# get_mntopts hostname device_name failovers
+# Construct the mount options of Lustre target @device_name in host @hostname
+get_mntopts() {
+ local host_name=$1
+ local device_name=$2
+ local failovers=$3
+ local mnt_opts=
+ local ret_str
+
+ [ -n "${failovers}" ] && mnt_opts=defaults,noauto || mnt_opts=defaults
+
+ # Execute remote command to check whether the device
+ # is a block device or not
+ ret_str=`${REMOTE} ${host_name} \
+ "[ -b ${device_name} ] && echo block || echo loop" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: get_mntopts() error:" \
+ "remote command to ${host_name} error: ${ret_str}"
+ return 1
+ fi
+
+ if [ -z "${ret_str}" ]; then
+ echo "`basename $0`: get_mntopts() error: remote error:" \
+ "No results from remote!" \
+ "Check network connectivity between the local host and ${host_name}!"
+ return 1
+ fi
+
+ [ "${ret_str}" != "${ret_str#*loop}" ] && mnt_opts=${mnt_opts},loop
+
+ echo ${mnt_opts}
+ return 0
+}
+
+# Execute remote command to modify /etc/fstab to add the new Lustre targets
+modify_fstab() {
+ declare -i i
+ local mntent mntopts device_name
+ local COMMAND
+
+ if ! ${MODIFY_FSTAB}; then
+ return 0
+ fi
+
+ for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
+ verbose_output "Modify /etc/fstab of host ${HOST_NAME[i]}"\
+ "to add Lustre target ${DEVICE_NAME[i]}"
+ mntent=${DEVICE_NAME[i]}"\t\t"${MOUNT_POINT[i]}"\t\t"${FS_TYPE}
+
+ # Get mount options
+ if [ -n "${MOUNT_OPTIONS[i]}" ]; then
+ # The mount options already specified in the csv file.
+ mntopts=${MOUNT_OPTIONS[i]}
+ else
+ mntopts=$(get_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]}\
+ ${FAILOVERS[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${mntopts}"
+ return 1
+ fi
+ fi
+
+ mntent=${mntent}"\t"${mntopts}"\t"0" "0
+ verbose_output "`echo -e ${mntent}`"
+
+ # Execute remote command to modify /etc/fstab
+ device_name=${DEVICE_NAME[i]//\//\\/}
+ COMMAND=". @scriptlibdir@/lc_common.sh; \
+ sed -i \"/^${device_name}\t/d\" \$(fcanon /etc/fstab); \
+ echo -e \"${mntent}\" >> \$(fcanon /etc/fstab)"
+ ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: modify_fstab() error:"\
+ "Failed to modify /etc/fstab of host ${HOST_NAME[i]}"\
+ "to add Lustre target ${DEVICE_NAME[i]}!"
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# Main flow
+# Check the csv file
+if ! check_file $1; then
+ exit 1
+fi
+
+if ${VERIFY_CONNECT}; then
+# Check the network connectivity and hostnames
+ echo "`basename $0`: Checking the cluster network connectivity"\
+ "and hostnames..."
+ if ! ${VERIFY_CLUSTER_NET} ${VERBOSE_OPT} ${CSV_FILE}; then
+ exit 1
+ fi
+ echo "`basename $0`: Check the cluster network connectivity"\
+ "and hostnames OK!"
+ echo
+fi
+
+if ${CONFIG_MD_LVM}; then
+# Configure Linux MD/LVM devices
+ echo "`basename $0`: Configuring Linux MD/LVM devices..."
+ if ! ${SCRIPT_CONFIG_MD} ${VERBOSE_OPT} ${CSV_FILE}; then
+ exit 1
+ fi
+
+ if ! ${SCRIPT_CONFIG_LVM} ${VERBOSE_OPT} ${CSV_FILE}; then
+ exit 1
+ fi
+ echo "`basename $0`: Configure Linux MD/LVM devices OK!"
+ echo
+fi
+
+# Configure the Lustre cluster
+echo "`basename $0`: ******** Lustre cluster configuration START ********"
+if ! get_items ${CSV_FILE}; then
+ exit 1
+fi
+
+if ! check_mgs; then
+ exit 1
+fi
+
+if ! mass_config; then
+ exit 1
+fi
+
+if ! modify_fstab; then
+ exit 1
+fi
+
+# Produce HA software's configuration files
+if ! config_ha; then
+ rm -rf ${TMP_DIRS}
+ exit 1
+fi
+
+echo "`basename $0`: ******** Lustre cluster configuration END **********"
+
+exit 0
--- /dev/null
+#!/bin/bash
+#
+# lustre_createcsv.sh - generate a csv file from a running lustre cluster
+#
+# This script is used to collect lustre target informations, linux MD/LVM device
+# informations and HA software configurations in a lustre cluster to generate a
+# csv file. In reverse, the csv file could be parsed by lustre_config.sh to
+# configure multiple lustre servers in parallel.
+#
+# This script should be run on the MGS node.
+#
+################################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` [-t HAtype] [-d] [-h] [-v] [-f csv_filename]
+
+ This script is used to collect lustre target informations, linux MD/LVM
+ device informations and HA software configurations from a running lustre
+ cluster to generate a csv file. It should be run on the MGS node.
+
+ -t HAtype collect High-Availability software configurations
+ The argument following -t is used to indicate the High-
+ Availability software type. The HA software types which
+ are currently supported are: hbv1 (Heartbeat version 1)
+ and hbv2 (Heartbeat version 2).
+ -d collect linux MD/LVM device informations
+ -h help
+ -v verbose mode
+ -f csv_filename designate a name for the csv file
+ Default is lustre_config.csv.
+
+EOF
+ exit 1
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common.sh
+
+#**************************** Global variables ****************************#
+# csv file
+LUSTRE_CSV_FILE=${LUSTRE_CSV_FILE:-"lustre_config.csv"}
+
+# Lustre proc files
+LUSTRE_PROC=${LUSTRE_PROC:-"/proc/fs/lustre"}
+LUSTRE_PROC_DEVICES=${LUSTRE_PROC}/devices
+
+LNET_PROC=${LNET_PROC:-"/proc/sys/lnet"}
+LNET_PROC_PEERS=${LNET_PROC}/peers
+
+# Default network module options
+DEFAULT_MOD_OPTS=${DEFAULT_MOD_OPTS:-"options lnet networks=tcp"}
+
+# Lustre target obd device types
+MGS_TYPE=${MGS_TYPE:-"mgs"}
+MDT_TYPE=${MDT_TYPE:-"mds"}
+OST_TYPE=${OST_TYPE:-"obdfilter"}
+
+# The obd name of MGS target server
+MGS_SVNAME=${MGS_SVNAME:-"MGS"}
+
+# Hostnames of the lustre cluster nodes
+declare -a HOST_NAMES
+MGS_HOSTNAME=${MGS_HOSTNAME:-"`hostname`"} # Hostname of the MGS node
+
+# Configs of lustre targets in one cluster node
+declare -a TARGET_CONFIGS
+declare -a TARGET_SVNAMES TARGET_DEVNAMES TARGET_DEVSIZES TARGET_MNTPNTS
+declare -a TARGET_DEVTYPES TARGET_FSNAMES TARGET_MGSNIDS TARGET_INDEXES
+declare -a TARGET_FMTOPTS TARGET_MKFSOPTS TARGET_MNTOPTS TARGET_FAILNIDS
+declare -a HA_CONFIGS
+declare -a ALL_TARGET_SVNAMES # All the target services in the cluster
+declare -a FAILOVER_FMTOPTS # "--noformat"
+
+# Informations of linux MD/LVM devices in one cluster node
+declare -a MD_NAME MD_LEVEL MD_DEVS # MD
+declare -a VG_NAME VG_PVNAMES # VG
+declare -a LV_NAME LV_SIZE LV_VGNAME # LV
+
+# Lustre target service types
+let "LDD_F_SV_TYPE_MDT = 0x0001"
+let "LDD_F_SV_TYPE_OST = 0x0002"
+let "LDD_F_SV_TYPE_MGS = 0x0004"
+
+# Permanent mount options for ext3 or ldiskfs
+ALWAYS_MNTOPTS=${ALWAYS_MNTOPTS:-"errors=remount-ro"}
+MDT_MGS_ALWAYS_MNTOPTS=${MDT_MGS_ALWAYS_MNTOPTS:-",iopen_nopriv,user_xattr"}
+OST_ALWAYS_MNTOPTS=${OST_ALWAYS_MNTOPTS:-",asyncdel"}
+OST_DEFAULT_MNTOPTS=${OST_DEFAULT_MNTOPTS:-",extents,mballoc"}
+
+# User-settable parameter keys
+PARAM_MGSNODE=${PARAM_MGSNODE:-"mgsnode="}
+PARAM_FAILNODE=${PARAM_FAILNODE:-"failover.node="}
+
+# Block size
+L_BLOCK_SIZE=4096
+
+# Option string of mkfs.lustre
+OPTSTR_STRIPE_COUNT=${OPTSTR_STRIPE_COUNT:-"--stripe-count-hint="}
+
+
+# Get and check the positional parameters
+VERBOSE_OUTPUT=false
+GET_MDLVM_INFO=false
+while getopts "t:dhvf:" OPTION; do
+ case $OPTION in
+ t)
+ HATYPE_OPT=$OPTARG
+ if [ "${HATYPE_OPT}" != "${HBVER_HBV1}" ] \
+ && [ "${HATYPE_OPT}" != "${HBVER_HBV2}" ] \
+ && [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
+ echo >&2 "`basename $0`: Invalid HA software type" \
+ "- ${HATYPE_OPT}!"
+ usage
+ fi
+ ;;
+ d) GET_MDLVM_INFO=true;;
+ h) usage;;
+ v) VERBOSE_OUTPUT=true;;
+ f) LUSTRE_CSV_FILE=$OPTARG;;
+ ?) usage
+ esac
+done
+
+# Verify the local host is the MGS node
+mgs_node() {
+ if [ ! -e ${LUSTRE_PROC_DEVICES} ]; then
+ echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} does" \
+ "not exist. Lustre kernel modules may not be loaded!"
+ return 1
+ fi
+
+ if [ -z "`cat ${LUSTRE_PROC_DEVICES}`" ]; then
+ echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} is" \
+ "empty. Lustre services may not be started!"
+ return 1
+ fi
+
+ if [ -z "`grep ${MGS_TYPE} ${LUSTRE_PROC_DEVICES}`" ]; then
+ echo >&2 "`basename $0`: error: This node is not a MGS node." \
+ "The script should be run on the MGS node!"
+ return 1
+ fi
+
+ return 0
+}
+
+# get_hostnames
+# Get lustre cluster node names
+get_hostnames() {
+ declare -a HOST_NIDS
+ declare -i idx # Index of HOST_NIDS array
+ declare -i i # Index of HOST_NAMES array
+
+ if ! mgs_node; then
+ return 1
+ fi
+
+ if [ ! -e ${LNET_PROC_PEERS} ]; then
+ echo >&2 "`basename $0`: error: ${LNET_PROC_PEERS} does not" \
+ "exist. LNET kernel modules may not be loaded" \
+ "or LNET network may not be up!"
+ return 1
+ fi
+
+ HOST_NAMES[0]=${MGS_HOSTNAME} # MGS node
+ HOST_NIDS[0]=${HOST_NAMES[0]}
+
+ # Get the nids of the nodes which have contacted MGS
+ idx=1
+ for nid in `cat ${LNET_PROC_PEERS} | awk '{print $1}'`; do
+ if [ "${nid}" = "nid" ]; then
+ continue
+ fi
+
+ HOST_NIDS[idx]=${nid}
+ let "idx += 1"
+ done
+
+ if [ ${idx} -eq 1 ]; then
+ verbose_output "Only one node running in the lustre cluster." \
+ "It's ${HOST_NAMES[0]}."
+ return 0
+ fi
+
+ # Get the hostnames of the nodes
+ for ((idx = 1, i = 1; idx < ${#HOST_NIDS[@]}; idx++, i++)); do
+ if [ -z "${HOST_NIDS[idx]}" ]; then
+ echo >&2 "`basename $0`: get_hostnames() error:" \
+ "Invalid nid - \"${HOST_NIDS[idx]}\"!"
+ return 1
+ fi
+
+ HOST_NAMES[i]=$(nid2hostname ${HOST_NIDS[idx]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${HOST_NAMES[i]}"
+ return 1
+ fi
+
+ if [ "${HOST_NAMES[i]}" = "${HOST_NAMES[0]}" ]; then
+ unset HOST_NAMES[i]
+ let "i -= 1"
+ fi
+ done
+
+ return 0
+}
+
+#********************** Linux MD/LVM device informations **********************#
+# get_md_configs hostname
+# Get all the active MD device informations from the node @hostname
+get_md_configs() {
+ declare -i i=0
+ declare -i j=0
+ local host_name=$1
+ local ret_line line first_item
+
+ # Initialize the arrays
+ unset MD_NAME
+ unset MD_LEVEL
+ unset MD_DEVS
+
+ # Execute remote command to the node ${host_name} and get all the
+ # active MD device informations.
+ while read -r ret_line; do
+ if is_pdsh; then
+ set -- ${ret_line}
+ shift
+ line="$*"
+ else
+ line="${ret_line}"
+ fi
+
+ first_item=`echo "${line}" | awk '{print $1}'`
+
+ # Get the MD device name and raid level
+ if [ "${first_item}" = "ARRAY" ]; then
+ MD_NAME[i]=`echo "${line}" | awk '{print $2}'`
+ MD_LEVEL[i]=`echo "${line}" | awk '{print $3}' | sed -e 's/level=//'`
+ let "j = i"
+ let "i += 1"
+ fi
+
+ # Get the MD component devices
+ if [ "${first_item}" != "${first_item#devices=}" ]; then
+ MD_DEVS[j]=`echo "${line}" | sed -e 's/devices=//' -e 's/,/ /g'`
+ fi
+ done < <(${REMOTE} ${host_name} "${MDADM} --detail --scan --verbose")
+
+ if [ $i -eq 0 ]; then
+ verbose_output "There are no active MD devices" \
+ "in the host ${host_name}!"
+ fi
+
+ return 0
+}
+
+# get_pv_configs hostname
+# Get all the LVM PV informations from the node @hostname
+get_pv_configs() {
+ PV_NAMES=
+ local host_name=$1
+ local cmd ret_str
+
+ # Execute remote command to get all the PV informations.
+ cmd="${EXPORT_PATH} pvdisplay -c | awk -F: '{print \$1}' | xargs"
+ ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
+ if [ $? -ne 0 ]; then
+ if [ -n "${ret_str}" ]; then
+ echo >&2 "`basename $0`: get_pv_configs() error:" \
+ "remote command to ${host_name} error: ${ret_str}"
+ else
+ remote_error "get_pv_configs" ${host_name}
+ fi
+ return 1
+ fi
+
+ PV_NAMES=`echo "${ret_str}" | sed -e 's/^'${host_name}':[[:space:]]//'`
+ if [ -z "${PV_NAMES}" ]; then
+ verbose_output "There are no PVs in the host ${host_name}!"
+ return 0
+ fi
+
+ return 0
+}
+
+# get_vg_pvnames hostname vgname
+# Get the PVs contained in @vgname from the node @hostname
+get_vg_pvnames() {
+ local host_name=$1
+ local vg_name=$2
+ local pv_names=
+ local cmd ret_str
+
+ # Execute remote command to get the PV names.
+ cmd="${EXPORT_PATH} vgdisplay -v ${vg_name} 2>/dev/null\
+ | grep \"PV Name\" | awk '{print \$3}' | xargs"
+ ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
+ if [ $? -ne 0 ]; then
+ if [ -n "${ret_str}" ]; then
+ echo "`basename $0`: get_vg_pvnames() error:" \
+ "remote command to ${host_name} error: ${ret_str}"
+ else
+ remote_error "get_vg_pvnames" ${host_name}
+ fi
+ return 1
+ fi
+
+ pv_names=`echo "${ret_str}" | sed -e 's/^'${host_name}':[[:space:]]//'`
+ if [ -z "${pv_names}" ]; then
+ echo "`basename $0`: get_vg_pvnames() error:" \
+ "There are no PVs in VG ${vg_name} in the host ${host_name}!"\
+ "Or VG ${vg_name} does not exist."
+ return 1
+ fi
+
+ echo "${pv_names}"
+ return 0
+}
+
+# get_vg_configs hostname
+# Get all the LVM VG informations from the node @hostname
+get_vg_configs() {
+ declare -i i=0
+ local host_name=$1
+ local cmd ret_str
+ local vg_name
+
+ # Initialize the arrays
+ unset VG_NAME
+ unset VG_PVNAMES
+
+ # Execute remote command to get all the VG names.
+ cmd="${EXPORT_PATH} vgdisplay \
+ | grep \"VG Name\" | awk '{print \$3}' | xargs"
+ ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
+ if [ $? -ne 0 ]; then
+ if [ -n "${ret_str}" ]; then
+ echo >&2 "`basename $0`: get_vg_configs() error:" \
+ "remote command to ${host_name} error: ${ret_str}"
+ else
+ remote_error "get_vg_configs" ${host_name}
+ fi
+ return 1
+ fi
+
+ if [ -z "${ret_str}" ] \
+ || [ "${ret_str}" != "${ret_str#*No volume groups found*}" ]; then
+ verbose_output "There are no VGs in the host ${host_name}!"
+ return 0
+ fi
+
+ # Get all the VG informations
+ for vg_name in `echo "${ret_str}" | sed -e 's/^'${host_name}'://'`; do
+ VG_NAME[i]=${vg_name}
+ VG_PVNAMES[i]=$(get_vg_pvnames ${host_name} ${VG_NAME[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${VG_PVNAMES[i]}"
+ return 1
+ fi
+ let "i += 1"
+ done
+
+ return 0
+}
+
+# get_lv_configs hostname
+# Get all the LVM LV informations from the node @hostname
+get_lv_configs() {
+ declare -i i=0
+ local host_name=$1
+ local ret_line line
+
+ # Initialize the arrays
+ unset LV_NAME
+ unset LV_SIZE
+ unset LV_VGNAME
+
+ # Execute remote command to get all the LV informations.
+ while read -r ret_line; do
+ if is_pdsh; then
+ set -- ${ret_line}
+ shift
+ line="$*"
+ else
+ line="${ret_line}"
+ fi
+
+ [ "${line}" != "${line#*volume group*}" ] && break
+
+ LV_NAME[i]=`echo "${line}" | awk -F: '{print $1}' | sed -e 's/.*\///g'`
+ LV_VGNAME[i]=`echo "${line}" | awk -F: '{print $2}'`
+ LV_SIZE[i]=`echo "${line}" | awk -F: '{print $7}' | sed -e 's/.*/&K/'`
+
+ let "i += 1"
+ done < <(${REMOTE} ${host_name} "${EXPORT_PATH} lvdisplay -c")
+
+ if [ $i -eq 0 ]; then
+ verbose_output "There are no LVs in the host ${host_name}"
+ fi
+
+ return 0
+}
+
+#*************************** Network module options ***************************#
+# last_is_backslash line
+# Check whether the last effective letter of @line is a backslash
+last_is_backslash() {
+ local line="$*"
+ declare -i i
+ declare -i length
+ local letter last_letter
+
+ length=${#line}
+ for ((i = ${length}-1; i >= 0; i--)); do
+ letter=${line:${i}:1}
+ [ "x${letter}" != "x " -a "x${letter}" != "x " -a -n "${letter}" ]\
+ && last_letter=${letter} && break
+ done
+
+ [ "x${last_letter}" = "x\\" ] && return 0
+
+ return 1
+}
+
+# get_module_opts hostname
+# Get the network module options from the node @hostname
+get_module_opts() {
+ local host_name=$1
+ local ret_str
+ local MODULE_CONF KERNEL_VER
+ local ret_line line find_options
+ local continue_flag
+
+ MODULE_OPTS=${DEFAULT_MOD_OPTS}
+
+ # Execute remote command to get the kernel version
+ ret_str=`${REMOTE} ${host_name} "uname -r" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo >&2 "`basename $0`: get_module_opts() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+ remote_error "get_module_opts" ${host_name} "${ret_str}" && return 1
+
+ if is_pdsh; then
+ KERNEL_VER=`echo ${ret_str} | awk '{print $2}'`
+ else
+ KERNEL_VER=`echo ${ret_str} | awk '{print $1}'`
+ fi
+
+ # Get the module configuration file name
+ if [ "${KERNEL_VER:0:3}" = "2.4" ]; then
+ MODULE_CONF=/etc/modules.conf
+ else
+ MODULE_CONF=/etc/modprobe.conf
+ fi
+
+ # Execute remote command to get the lustre network module options
+ continue_flag=false
+ find_options=false
+ while read -r ret_line; do
+ if is_pdsh; then
+ set -- ${ret_line}
+ shift
+ line="$*"
+ else
+ line="${ret_line}"
+ fi
+
+ # Get rid of the comment line
+ [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
+
+ if [ "${line}" != "${line#*options lnet*}" ]; then
+ if ! ${find_options}; then
+ find_options=true
+ MODULE_OPTS=${line}
+ else
+ MODULE_OPTS=${MODULE_OPTS}$" \n "${line}
+ fi
+
+ last_is_backslash "${line}" && continue_flag=true \
+ || continue_flag=false
+ continue
+ fi
+
+ if ${continue_flag}; then
+ MODULE_OPTS=${MODULE_OPTS}$" \n "${line}
+ ! last_is_backslash "${line}" && continue_flag=false
+
+ fi
+ done < <(${REMOTE} ${host_name} "cat ${MODULE_CONF}")
+
+ if [ -z "${MODULE_OPTS}" ]; then
+ MODULE_OPTS=${DEFAULT_MOD_OPTS}
+ fi
+
+ return 0
+}
+
+#************************ HA software configurations ************************#
+# is_ha_target hostname target_devname
+# Check whether the target @target_devname was made to be high-available
+is_ha_target() {
+ local host_name=$1
+ local target_svname=$2
+ local res_file
+ local ret_str
+
+ case "${HATYPE_OPT}" in
+ "${HBVER_HBV1}") res_file=${HA_RES};;
+ "${HBVER_HBV2}") res_file=${HA_CIB};;
+ "${HATYPE_CLUMGR}") res_file=${CLUMAN_CONFIG};;
+ esac
+
+ # Execute remote command to check the resource file
+ ret_str=`${REMOTE} ${host_name} \
+ "grep ${target_svname} ${res_file}" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo >&2 "`basename $0`: is_ha_target() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+
+ [ "${ret_str}" = "${ret_str#*${target_svname}*}" ] && return 1
+
+ return 0
+}
+
+# get_hb_configs hostname
+# Get the Heartbeat configurations from the node @hostname
+get_hb_configs() {
+ local host_name=$1
+ local ret_line line
+ declare -i i
+
+ unset HA_CONFIGS
+ HB_CHANNELS=
+ SRV_IPADDRS=
+ HB_OPTIONS=
+
+ # Execute remote command to get the configs of Heartbeat channels, etc
+ while read -r ret_line; do
+ if is_pdsh; then
+ set -- ${ret_line}
+ shift
+ line="$*"
+ else
+ line="${ret_line}"
+ fi
+
+ # Get rid of the comment line
+ [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
+
+ if [ "${line}" != "${line#*serial*}" ] \
+ || [ "${line}" != "${line#*cast*}" ]; then
+ if [ -z "${HB_CHANNELS}" ]; then
+ HB_CHANNELS=${line}
+ else
+ HB_CHANNELS=${HB_CHANNELS}:${line}
+ fi
+ fi
+
+ if [ "${line}" != "${line#*stonith*}" ] \
+ || [ "${line}" != "${line#*ping*}" ] \
+ || [ "${line}" != "${line#*respawn*}" ] \
+ || [ "${line}" != "${line#*apiauth*}" ] \
+ || [ "${line}" != "${line#*compression*}" ]; then
+ if [ -z "${HB_OPTIONS}" ]; then
+ HB_OPTIONS=${line}
+ else
+ HB_OPTIONS=${HB_OPTIONS}:${line}
+ fi
+ fi
+ done < <(${REMOTE} ${host_name} "cat ${HA_CF}")
+
+ if [ -z "${HB_CHANNELS}" ]; then
+ echo >&2 "`basename $0`: get_hb_configs() error:" \
+ "There are no heartbeat channel configs in ${HA_CF}" \
+ "of host ${host_name} or ${HA_CF} does not exist!"
+ return 0
+ fi
+
+ # Execute remote command to get Heartbeat service address
+ if [ "${HATYPE_OPT}" = "${HBVER_HBV1}" ]; then
+ while read -r ret_line; do
+ if is_pdsh; then
+ set -- ${ret_line}
+ shift
+ line="$*"
+ else
+ line="${ret_line}"
+ fi
+
+ # Get rid of the empty line
+ [ -z "`echo ${line}|awk '/[[:alnum:]]/ {print $0}'`" ]\
+ && continue
+
+ # Get rid of the comment line
+ [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
+
+ SRV_IPADDRS=`echo ${line} | awk '{print $2}'`
+ [ -n "${SRV_IPADDRS}" ] \
+ && [ "`echo ${line} | awk '{print $1}'`" = "${host_name}" ] && break
+ done < <(${REMOTE} ${host_name} "cat ${HA_RES}")
+
+ if [ -z "${SRV_IPADDRS}" ]; then
+ echo >&2 "`basename $0`: get_hb_configs() error: There"\
+ "are no service address in ${HA_RES} of host"\
+ "${host_name} or ${HA_RES} does not exist!"
+ return 0
+ fi
+ fi
+
+ # Construct HA configuration items
+ for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+ [ -z "${TARGET_DEVNAMES[i]}" ] && continue
+
+ # Execute remote command to check whether this target service
+ # was made to be high-available
+ if is_ha_target ${host_name} ${TARGET_DEVNAMES[i]}; then
+ HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS}
+ fi
+ done
+
+ return 0
+}
+
+# get_cluman_channel hostname
+# Get the Heartbeat channel of CluManager from the node @hostname
+get_cluman_channel() {
+ local host_name=$1
+ local ret_line line
+ local cluman_channel=
+ local mcast_ipaddr
+
+ while read -r ret_line; do
+ if is_pdsh; then
+ set -- ${ret_line}
+ shift
+ line="$*"
+ else
+ line="${ret_line}"
+ fi
+
+ if [ "${line}" != "${line#*broadcast*}" ] \
+ && [ "`echo ${line}|awk '{print $3}'`" = "yes" ]; then
+ cluman_channel="broadcast"
+ break
+ fi
+
+ if [ "${line}" != "${line#*multicast_ipaddress*}" ]; then
+ mcast_ipaddr=`echo ${line}|awk '{print $3}'`
+ if [ "${mcast_ipaddr}" != "225.0.0.11" ]; then
+ cluman_channel="multicast ${mcast_ipaddr}"
+ break
+ fi
+ fi
+ done < <(${REMOTE} ${host_name} "${CONFIG_CMD} --clumembd")
+
+ echo ${cluman_channel}
+ return 0
+}
+
+# get_cluman_srvaddr hostname target_svname
+# Get the service IP addresses of @target_svname from the node @hostname
+get_cluman_srvaddr() {
+ local host_name=$1
+ local target_svname=$2
+ local ret_line line
+ local srvaddr cluman_srvaddr=
+
+ while read -r ret_line; do
+ if is_pdsh; then
+ set -- ${ret_line}
+ shift
+ line="$*"
+ else
+ line="${ret_line}"
+ fi
+
+ if [ "${line}" != "${line#*ipaddress = *}" ]; then
+ srvaddr=`echo ${line}|awk '{print $3}'`
+ if [ -z "${cluman_srvaddr}" ]; then
+ cluman_srvaddr=${srvaddr}
+ else
+ cluman_srvaddr=${cluman_srvaddr}:${srvaddr}
+ fi
+ fi
+ done < <(${REMOTE} ${host_name} "${CONFIG_CMD} \
+ --service=${target_svname} --service_ipaddresses")
+
+ if [ -z "${cluman_srvaddr}" ]; then
+ echo "`basename $0`: get_cluman_srvaddr() error: Cannot" \
+ "get the service IP addresses of ${target_svname} in" \
+ "${host_name}! Check ${CONFIG_CMD} command!"
+ return 1
+ fi
+
+ echo ${cluman_srvaddr}
+ return 0
+}
+
+# get_cluman_configs hostname
+# Get the CluManager configurations from the node @hostname
+get_cluman_configs() {
+ local host_name=$1
+ local ret_str
+ declare -i i
+
+ unset HA_CONFIGS
+
+ # Execute remote command to get the configs of CluManager
+ for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+ HB_CHANNELS=
+ SRV_IPADDRS=
+ HB_OPTIONS=
+ [ -z "${TARGET_DEVNAMES[i]}" ] && continue
+
+ # Execute remote command to check whether this target service
+ # was made to be high-available
+ ! is_ha_target ${host_name} ${TARGET_DEVNAMES[i]} && continue
+
+ # Execute remote command to get Heartbeat channel
+ HB_CHANNELS=$(get_cluman_channel ${host_name})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${HB_CHANNELS}"
+ fi
+
+ # Execute remote command to get service IP address
+ SRV_IPADDRS=$(get_cluman_srvaddr ${host_name} \
+ ${TARGET_SVNAMES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${SRV_IPADDRS}"
+ return 0
+ fi
+
+ HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS}
+ done
+
+ return 0
+}
+
+# get_ha_configs hostname
+# Get the HA software configurations from the node @hostname
+get_ha_configs() {
+ local host_name=$1
+
+ unset HA_CONFIGS
+
+ if [ -z "${HATYPE_OPT}" ]; then
+ return 0
+ fi
+
+ verbose_output "Collecting HA software configurations from host $1..."
+
+ case "${HATYPE_OPT}" in
+ "${HBVER_HBV1}" | "${HBVER_HBV2}") # Heartbeat
+ if ! get_hb_configs ${host_name}; then
+ return 1
+ fi
+ ;;
+ "${HATYPE_CLUMGR}") # CluManager
+ if ! get_cluman_configs ${host_name}; then
+ return 1
+ fi
+ ;;
+ esac
+
+ return 0
+}
+
+#*********************** Lustre targets configurations ***********************#
+
+# is_failover_service target_svname
+# Check whether a target service @target_svname is a failover service.
+is_failover_service() {
+ local target_svname=$1
+ declare -i i
+
+ for ((i = 0; i < ${#ALL_TARGET_SVNAMES[@]}; i++)); do
+ [ "${target_svname}" = "${ALL_TARGET_SVNAMES[i]}" ] && return 0
+ done
+
+ return 1
+}
+
+# get_svnames hostname
+# Get the lustre target server obd names from the node @hostname
+get_svnames(){
+ declare -i i
+ declare -i j
+ local host_name=$1
+ local ret_line line
+
+ # Initialize the TARGET_SVNAMES array
+ unset TARGET_SVNAMES
+ unset FAILOVER_FMTOPTS
+
+ # Execute remote command to the node @hostname and figure out what
+ # lustre services are running.
+ i=0
+ j=${#ALL_TARGET_SVNAMES[@]}
+ while read -r ret_line; do
+ if is_pdsh; then
+ set -- ${ret_line}
+ shift
+ line="$*"
+ else
+ line="${ret_line}"
+ fi
+
+ if [ -z "`echo ${line} | grep ${MGS_TYPE}`" ] \
+ && [ -z "`echo ${line} | grep ${MDT_TYPE}`" ] \
+ && [ -z "`echo ${line} | grep ${OST_TYPE}`" ]; then
+ continue
+ fi
+
+ # Get target server name
+ TARGET_SVNAMES[i]=`echo ${line} | awk '{print $4}'`
+ if [ -n "${TARGET_SVNAMES[i]}" ]; then
+ if is_failover_service ${TARGET_SVNAMES[i]}; then
+ FAILOVER_FMTOPTS[i]="--noformat"
+ fi
+ ALL_TARGET_SVNAMES[j]=${TARGET_SVNAMES[i]}
+ let "i += 1"
+ let "j += 1"
+ else
+ echo >&2 "`basename $0`: get_svnames() error: Invalid"\
+ "line in ${host_name}'s ${LUSTRE_PROC_DEVICES}"\
+ "- \"${line}\"!"
+ return 1
+ fi
+ done < <(${REMOTE} ${host_name} "cat ${LUSTRE_PROC_DEVICES}")
+
+ if [ $i -eq 0 ]; then
+ verbose_output "There are no lustre services running" \
+ "on the node ${host_name}!"
+ fi
+
+ return 0
+}
+
+# is_loopdev devname
+# Check whether a device @devname is a loop device or not
+is_loopdev() {
+ local devname=$1
+
+ if [ -z "${devname}" ] || \
+ [ -z "`echo ${devname}|awk '/\/dev\/loop[[:digit:]]/ {print $0}'`" ]
+ then
+ return 1
+ fi
+
+ return 0
+}
+
+# get_devname hostname svname
+# Get the device name of lustre target @svname from node @hostname
+get_devname() {
+ local host_name=$1
+ local target_svname=$2
+ local target_devname=
+ local ret_str
+ local target_type target_obdtype mntdev_file
+
+ if [ "${target_svname}" = "${MGS_SVNAME}" ]; then
+ # Execute remote command to get the device name of mgs target
+ ret_str=`${REMOTE} ${host_name} \
+ "/sbin/findfs LABEL=${target_svname}" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]
+ then
+ echo "`basename $0`: get_devname() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+ fi
+
+ if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]; then
+ if is_pdsh; then
+ target_devname=`echo ${ret_str} | awk '{print $2}'`
+ else
+ target_devname=`echo ${ret_str} | awk '{print $1}'`
+ fi
+ fi
+ else # Execute remote command to get the device name of mdt/ost target
+ target_type=`echo ${target_svname} | cut -d - -f 2`
+ target_obdtype=${target_type:0:3}_TYPE
+
+ mntdev_file=${LUSTRE_PROC}/${!target_obdtype}/${target_svname}/mntdev
+
+ ret_str=`${REMOTE} ${host_name} "cat ${mntdev_file}" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: get_devname() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+
+ if [ "${ret_str}" != "${ret_str#*No such file*}" ]; then
+ echo "`basename $0`: get_devname() error:"\
+ "${mntdev_file} does not exist in ${host_name}!"
+ return 1
+ else
+ if is_pdsh; then
+ target_devname=`echo ${ret_str} | awk '{print $2}'`
+ else
+ target_devname=`echo ${ret_str} | awk '{print $1}'`
+ fi
+ fi
+ fi
+
+ echo ${target_devname}
+ return 0
+}
+
+# get_devsize hostname target_devname
+# Get the device size (KB) of @target_devname from node @hostname
+get_devsize() {
+ local host_name=$1
+ local target_devname=$2
+ local target_devsize=
+ local ret_str
+
+ # Execute remote command to get the device size
+ ret_str=`${REMOTE} ${host_name} \
+ "/sbin/blockdev --getsize ${target_devname}" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: get_devsize() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+
+ if is_pdsh; then
+ target_devsize=`echo ${ret_str} | awk '{print $2}'`
+ else
+ target_devsize=`echo ${ret_str} | awk '{print $1}'`
+ fi
+
+ if [ -z "`echo ${target_devsize}|awk '/^[[:digit:]]/ {print $0}'`" ]
+ then
+ echo "`basename $0`: get_devsize() error: can't" \
+ "get device size of ${target_devname} in ${host_name}!"
+ return 1
+ fi
+
+ let " target_devsize /= 2"
+
+ echo ${target_devsize}
+ return 0
+}
+
+# get_realdevname hostname loop_dev
+# Get the real device name of loop device @loop_dev from node @hostname
+get_realdevname() {
+ local host_name=$1
+ local loop_dev=$2
+ local target_devname=
+ local ret_str
+
+ # Execute remote command to get the real device name
+ ret_str=`${REMOTE} ${host_name} \
+ "/sbin/losetup ${loop_dev}" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: get_realdevname() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+
+ if is_pdsh; then
+ target_devname=`echo ${ret_str} | awk '{print $4}' \
+ | sed 's/^(//' | sed 's/)$//'`
+ else
+ target_devname=`echo ${ret_str} | awk '{print $3}' \
+ | sed 's/^(//' | sed 's/)$//'`
+ fi
+
+ if [ "${ret_str}" != "${ret_str#*No such*}" ] \
+ || [ -z "${target_devname}" ]; then
+ echo "`basename $0`: get_realdevname() error: can't" \
+ "get info on device ${loop_dev} in ${host_name}!"
+ return 1
+ fi
+
+ echo ${target_devname}
+ return 0
+}
+
+# get_mntpnt hostname target_devname
+# Get the lustre target mount point from the node @hostname
+get_mntpnt(){
+ local host_name=$1
+ local target_devname=$2
+ local mnt_point=
+ local ret_str
+
+ # Execute remote command to get the mount point
+ ret_str=`${REMOTE} ${host_name} \
+ "cat /etc/mtab | grep ${target_devname}" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: get_mntpnt() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+
+ if is_pdsh; then
+ mnt_point=`echo ${ret_str} | awk '{print $3}'`
+ else
+ mnt_point=`echo ${ret_str} | awk '{print $2}'`
+ fi
+
+ if [ -z "${mnt_point}" ]; then
+ echo "`basename $0`: get_mntpnt() error: can't" \
+ "get the mount point of ${target_devname} in ${host_name}!"
+ return 1
+ fi
+
+ echo ${mnt_point}
+ return 0
+}
+
+# get_devnames hostname
+# Get the lustre target device names, mount points
+# and loop device sizes from the node @hostname
+get_devnames(){
+ declare -i i
+ local host_name=$1
+ local ret_line line
+
+ # Initialize the arrays
+ unset TARGET_DEVNAMES
+ unset TARGET_DEVSIZES
+ unset TARGET_MNTPNTS
+
+ for ((i = 0; i < ${#TARGET_SVNAMES[@]}; i++)); do
+ TARGET_DEVNAMES[i]=$(get_devname ${host_name} \
+ ${TARGET_SVNAMES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${TARGET_DEVNAMES[i]}"
+ return 1
+ fi
+
+ if [ -z "${TARGET_DEVNAMES[i]}" ]; then
+ if [ "${TARGET_SVNAMES[i]}" = "${MGS_SVNAME}" ]; then
+ verbose_output "There exists combo mgs/mdt"\
+ "target in ${host_name}."
+ continue
+ else
+ echo >&2 "`basename $0`: get_devname() error:"\
+ "No device corresponding to target" \
+ "${TARGET_SVNAMES[i]} in ${host_name}!"
+ return 1
+ fi
+ fi
+
+ # Get the mount point of the target
+ TARGET_MNTPNTS[i]=$(get_mntpnt ${host_name} \
+ ${TARGET_DEVNAMES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${TARGET_MNTPNTS[i]}"
+ return 1
+ fi
+
+ # The target device is a loop device?
+ if [ -n "${TARGET_DEVNAMES[i]}" ] \
+ && is_loopdev ${TARGET_DEVNAMES[i]}; then
+ # Get the device size
+ TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \
+ ${TARGET_DEVNAMES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${TARGET_DEVSIZES[i]}"
+ return 1
+ fi
+
+ # Get the real device name
+ TARGET_DEVNAMES[i]=$(get_realdevname ${host_name} \
+ ${TARGET_DEVNAMES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${TARGET_DEVNAMES[i]}"
+ return 1
+ fi
+ fi
+ done
+
+ return 0
+}
+
+# is_target target_svtype ldd_flags
+# Check the service type of a lustre target
+is_target() {
+ case "$1" in
+ "mdt") let "ret = $2 & LDD_F_SV_TYPE_MDT";;
+ "ost") let "ret = $2 & LDD_F_SV_TYPE_OST";;
+ "mgs") let "ret = $2 & LDD_F_SV_TYPE_MGS";;
+ "*")
+ echo >&2 "`basename $0`: is_target() error: Invalid" \
+ "target service type - \"$1\"!"
+ return 1
+ ;;
+ esac
+
+ if [ ${ret} -eq 0 ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+# get_devtype ldd_flags
+# Get the service type of a lustre target from @ldd_flags
+get_devtype() {
+ local target_devtype=
+
+ if [ -z "${flags}" ]; then
+ echo "`basename $0`: get_devtype() error: Invalid" \
+ "ldd_flags - it's value is null!"
+ return 1
+ fi
+
+ if is_target "mgs" $1; then
+ if is_target "mdt" $1; then
+ target_devtype="mgs|mdt"
+ else
+ target_devtype="mgs"
+ fi
+ elif is_target "mdt" $1; then
+ target_devtype="mdt"
+ elif is_target "ost" $1; then
+ target_devtype="ost"
+ else
+ echo "`basename $0`: get_devtype() error: Invalid" \
+ "ldd_flags - \"$1\"!"
+ return 1
+ fi
+
+ echo ${target_devtype}
+ return 0
+}
+
+# get_mntopts ldd_mount_opts
+# Get the user-specified lustre target mount options from @ldd_mount_opts
+get_mntopts() {
+ local mount_opts=
+ local ldd_mount_opts=$1
+
+ mount_opts="${ldd_mount_opts#${ALWAYS_MNTOPTS}}"
+ mount_opts="${mount_opts#${MDT_MGS_ALWAYS_MNTOPTS}}"
+ mount_opts="${mount_opts#${OST_ALWAYS_MNTOPTS}}"
+ mount_opts="${mount_opts#${OST_DEFAULT_MNTOPTS}}"
+ mount_opts="`echo \"${mount_opts}\" | sed 's/^,//'`"
+
+ [ "${mount_opts}" != "${mount_opts#*,*}" ] && echo "\""${mount_opts}"\"" \
+ || echo ${mount_opts}
+
+ return 0
+}
+
+# get_mgsnids ldd_params
+# Get the mgs nids of lustre target from @ldd_params
+get_mgsnids() {
+ local mgs_nids= # mgs nids in one mgs node
+ local all_mgs_nids= # mgs nids in all mgs failover nodes
+ local param=
+ local ldd_params="$*"
+
+ for param in ${ldd_params}; do
+ if [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ]; then
+ mgs_nids=`echo ${param#${PARAM_MGSNODE}}`
+
+ if [ -n "${all_mgs_nids}" ]; then
+ all_mgs_nids=${all_mgs_nids}:${mgs_nids}
+ else
+ all_mgs_nids=${mgs_nids}
+ fi
+ fi
+ done
+
+ [ "${all_mgs_nids}" != "${all_mgs_nids#*,*}" ] \
+ && echo "\""${all_mgs_nids}"\"" || echo ${all_mgs_nids}
+
+ return 0
+}
+
+# get_failnids ldd_params
+# Get the failover nids of lustre target from @ldd_params
+get_failnids() {
+ local fail_nids= # failover nids in one failover node
+ local all_fail_nids= # failover nids in all failover nodes
+ # of this target
+ local param=
+ local ldd_params="$*"
+
+ for param in ${ldd_params}; do
+ if [ -n "`echo ${param}|awk '/failover.node=/ {print $0}'`" ]; then
+ fail_nids=`echo ${param#${PARAM_FAILNODE}}`
+
+ if [ -n "${all_fail_nids}" ]; then
+ all_fail_nids=${all_fail_nids}:${fail_nids}
+ else
+ all_fail_nids=${fail_nids}
+ fi
+ fi
+ done
+
+ [ "${all_fail_nids}" != "${all_fail_nids#*,*}" ] \
+ && echo "\""${all_fail_nids}"\"" || echo ${all_fail_nids}
+
+ return 0
+}
+
+# get_fmtopts target_devname hostname ldd_params
+# Get other format options of the lustre target @target_devname from @ldd_params
+get_fmtopts() {
+ local target_devname=$1
+ local host_name=$2
+ shift
+ shift
+ local ldd_params="$*"
+ local param=
+ local fmt_opts=
+
+ for param in ${ldd_params}; do
+ [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ] && continue
+ [ -n "`echo ${param}|awk '/failover.node=/ {print $0}'`" ] && continue
+
+ if [ -n "${param}" ]; then
+ if [ -n "${fmt_opts}" ]; then
+ fmt_opts=${fmt_opts}" --param=\""${param}"\""
+ else
+ fmt_opts="--param=\""${param}"\""
+ fi
+ fi
+ done
+
+ echo ${fmt_opts}
+ return 0
+}
+
+# get_stripecount host_name target_fsname
+# Get the stripe count for @target_fsname
+get_stripecount() {
+ local host_name=$1
+ local target_fsname=$2
+ local stripe_count=
+ local stripecount_file
+ local ret_str
+
+ # Get the stripe count
+ stripecount_file=${LUSTRE_PROC}/lov/${target_fsname}-mdtlov/stripecount
+ ret_str=`${REMOTE} ${host_name} "cat ${stripecount_file}" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: get_stripecount() error:" \
+ "remote command to ${host_name} error: ${ret_str}"
+ return 1
+ fi
+
+ if is_pdsh; then
+ stripe_count=`echo ${ret_str} | awk '{print $2}'`
+ else
+ stripe_count=`echo ${ret_str} | awk '{print $1}'`
+ fi
+
+ if [ -z "`echo ${stripe_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
+ then
+ echo "`basename $0`: get_stripecount() error: can't" \
+ "get stripe count of ${target_fsname} in ${host_name}!"
+ return 1
+ fi
+
+ echo ${stripe_count}
+ return 0
+}
+
+# get_stripecount_opt host_name target_fsname
+# Get the stripe count option for lustre mdt target
+get_stripecount_opt() {
+ local host_name=$1
+ local target_fsname=$2
+ local stripe_count=
+ local stripecount_opt=
+
+ # Get the stripe count
+ [ -z "${target_fsname}" ] && target_fsname="lustre"
+ stripe_count=$(get_stripecount ${host_name} ${target_fsname})
+ if [ $? -ne 0 ]; then
+ echo "${stripe_count}"
+ return 1
+ fi
+
+ if [ "${stripe_count}" != "1" ]; then
+ stripecount_opt=${OPTSTR_STRIPE_COUNT}${stripe_count}
+ fi
+
+ echo ${stripecount_opt}
+ return 0
+}
+
+# get_ldds hostname
+# Get the lustre target disk data from the node @hostname
+get_ldds(){
+ declare -i i
+ local host_name=$1
+ local ret_line line
+ local flags mnt_opts params
+ local stripecount_opt
+
+ # Initialize the arrays
+ unset TARGET_DEVTYPES TARGET_FSNAMES TARGET_MGSNIDS TARGET_INDEXES
+ unset TARGET_FMTOPTS TARGET_MNTOPTS TARGET_FAILNIDS
+
+ # Get lustre target device type, fsname, index, etc.
+ # from MOUNT_DATA_FILE. Using tunefs.lustre to read it.
+ for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+ flags=
+ mnt_opts=
+ params=
+ stripecount_opt=
+ [ -z "${TARGET_DEVNAMES[i]}" ] && continue
+
+ # Execute remote command to read MOUNT_DATA_FILE
+ while read -r ret_line; do
+ if is_pdsh; then
+ set -- ${ret_line}
+ shift
+ line="$*"
+ else
+ line="${ret_line}"
+ fi
+
+ if [ -n "`echo ${line}|awk '/Index:/ {print $0}'`" ]; then
+ TARGET_INDEXES[i]=`echo ${line}|awk '{print $2}'`
+ continue
+ fi
+
+ if [ -n "`echo ${line}|awk '/Lustre FS:/ {print $0}'`" ]; then
+ TARGET_FSNAMES[i]=`echo ${line}|awk '{print $3}'`
+ continue
+ fi
+
+ if [ -n "`echo ${line}|awk '/Flags:/ {print $0}'`" ]; then
+ flags=`echo ${line}|awk '{print $2}'`
+ continue
+ fi
+
+ if [ -n "`echo ${line}|awk '/Persistent mount opts:/ {print $0}'`" ]; then
+ mnt_opts=`echo ${line}|awk '{print $0}'`
+ mnt_opts=`echo ${mnt_opts#Persistent mount opts: }`
+ continue
+ fi
+
+ if [ -n "`echo ${line}|awk '/Parameters:/ {print $0}'`" ]; then
+ params=`echo ${line}|awk '{print $0}'`
+ params=`echo ${params#Parameters:}`
+ break
+ fi
+ done < <(${REMOTE} ${host_name} "${TUNEFS} --print --verbose ${TARGET_DEVNAMES[i]} 2>/dev/null")
+
+ if [ -z "${flags}" ]; then
+ echo >&2 "`basename $0`: get_ldds() error: Invalid" \
+ "ldd_flags of target ${TARGET_DEVNAMES[i]}" \
+ "in host ${host_name} - it's value is null!"\
+ "Check ${TUNEFS} command!"
+ return 1
+ fi
+
+ if [ "${TARGET_INDEXES[i]}" = "unassigned" ] \
+ || is_target "mgs" ${flags}; then
+ TARGET_INDEXES[i]=
+ fi
+
+ [ "${TARGET_FSNAMES[i]}" = "lustre" ] && TARGET_FSNAMES[i]=
+
+ # Get the lustre target service type
+ TARGET_DEVTYPES[i]=$(get_devtype ${flags})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${TARGET_DEVTYPES[i]} From device" \
+ "${TARGET_DEVNAMES[i]} in host ${host_name}!"
+ return 1
+ fi
+
+ # Get the lustre target mount options
+ TARGET_MNTOPTS[i]=$(get_mntopts "${mnt_opts}")
+
+ # Get mgs nids of the lustre target
+ TARGET_MGSNIDS[i]=$(get_mgsnids "${params}")
+
+ # Get failover nids of the lustre target
+ TARGET_FAILNIDS[i]=$(get_failnids "${params}")
+ if [ $? -ne 0 ]; then
+ echo >&2 "${TARGET_FAILNIDS[i]} From device" \
+ "${TARGET_DEVNAMES[i]} in host ${host_name}!"
+ return 1
+ fi
+
+ # Get other format options of the lustre target
+ TARGET_FMTOPTS[i]=$(get_fmtopts ${TARGET_DEVNAMES[i]} ${host_name} "${params}")
+ if [ $? -ne 0 ]; then
+ echo >&2 "${TARGET_FMTOPTS[i]}"
+ return 1
+ fi
+
+ if [ -n "${TARGET_DEVSIZES[i]}" ]; then
+ if [ -n "${TARGET_FMTOPTS[i]}" ]; then
+ TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]} ""${TARGET_FMTOPTS[i]}"
+ else
+ TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]}"
+ fi
+ fi
+
+ if [ -n "${FAILOVER_FMTOPTS[i]}" ]; then
+ if [ -n "${TARGET_FMTOPTS[i]}" ]; then
+ TARGET_FMTOPTS[i]=${TARGET_FMTOPTS[i]}" "${FAILOVER_FMTOPTS[i]}
+ else
+ TARGET_FMTOPTS[i]=${FAILOVER_FMTOPTS[i]}
+ fi
+ fi
+
+ if is_target "mdt" ${flags}; then
+ # Get the stripe count option
+ stripecount_opt=$(get_stripecount_opt ${host_name} ${TARGET_FSNAMES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${stripecount_opt}"
+ return 1
+ fi
+
+ if [ -n "${stripecount_opt}" ]; then
+ if [ -n "${TARGET_FMTOPTS[i]}" ]; then
+ TARGET_FMTOPTS[i]=${TARGET_FMTOPTS[i]}" "${stripecount_opt}
+ else
+ TARGET_FMTOPTS[i]=${stripecount_opt}
+ fi
+ fi
+ fi
+
+ if [ "${TARGET_FMTOPTS[i]}" != "${TARGET_FMTOPTS[i]#*,*}" ]; then
+ TARGET_FMTOPTS[i]="\""${TARGET_FMTOPTS[i]}"\""
+ fi
+ done
+
+ return 0
+}
+
+# get_journalsize target_devname hostname
+# Get the journal size of lustre target @target_devname from @hostname
+get_journalsize() {
+ local target_devname=$1
+ local host_name=$2
+ local journal_inode=
+ local journal_size=
+ local ret_str
+
+ # Execute remote command to get the journal inode number
+ ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
+ ${target_devname} | grep 'Journal inode:'" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: get_journalsize() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+
+ ret_str=${ret_str#${ret_str%Journal inode:*}}
+ journal_inode=`echo ${ret_str} | awk '{print $3}'`
+ if [ -z "`echo ${journal_inode}|awk '/^[[:digit:]]/ {print $0}'`" ]
+ then
+ echo "`basename $0`: get_journalsize() error: can't" \
+ "get journal inode of ${target_devname} in ${host_name}!"
+ return 1
+ fi
+
+ # Execute remote command to get the journal size
+ ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R \
+ 'stat <${journal_inode}>' ${target_devname}|grep '^User:'" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: get_journalsize() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+
+ ret_str=${ret_str#${ret_str%User:*}}
+ journal_size=`echo ${ret_str} | awk '{print $6}'`
+ if [ -z "`echo ${journal_size}|awk '/^[[:digit:]]/ {print $0}'`" ]
+ then
+ echo "`basename $0`: get_journalsize() error: can't" \
+ "get journal size of ${target_devname} in ${host_name}!"
+ return 1
+ fi
+
+ let "journal_size /= 1024*1024" # MB
+
+ echo ${journal_size}
+ return 0
+}
+
+# get_defaultjournalsize target_devsize
+# Calculate the default journal size from target device size @target_devsize
+get_defaultjournalsize() {
+ declare -i target_devsize=$1
+ declare -i journal_size=0
+ declare -i max_size base_size
+
+ let "base_size = 1024*1024"
+ if [ ${target_devsize} -gt ${base_size} ]; then # 1GB
+ let "journal_size = target_devsize / 102400"
+ let "journal_size *= 4"
+ fi
+
+ let "max_size = 102400 * L_BLOCK_SIZE"
+ let "max_size >>= 20" # 400MB
+
+ if [ ${journal_size} -gt ${max_size} ]; then
+ let "journal_size = max_size"
+ fi
+
+ echo ${journal_size}
+ return 0
+}
+
+# figure_journal_size target_devname hostname
+# Find a reasonable journal file size given the number of blocks
+# in the filesystem. This algorithm is derived from figure_journal_size()
+# function in util.c of e2fsprogs-1.38.cfs2-1.src.rpm.
+figure_journal_size() {
+ local target_devname=$1
+ local host_name=$2
+ local ret_str
+ declare -i block_count
+ declare -i journal_blocks
+ declare -i journal_size
+
+ # Execute remote command to get the block count
+ ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
+ ${target_devname} | grep 'Block count:'" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: figure_journal_size() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+
+ ret_str=${ret_str#${ret_str%Block count:*}}
+ block_count=`echo ${ret_str} | awk '{print $3}'`
+ if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
+ then
+ echo "`basename $0`: figure_journal_size() error: can't" \
+ "get block count of ${target_devname} in ${host_name}!"
+ return 1
+ fi
+
+ if ((block_count < 32768)); then
+ let "journal_blocks = 1024"
+ elif ((block_count < 256*1024)); then
+ let "journal_blocks = 4096"
+ elif ((block_count < 512*1024)); then
+ let "journal_blocks = 8192"
+ elif ((block_count < 1024*1024)); then
+ let "journal_blocks = 16384"
+ else
+ let "journal_blocks = 32768"
+ fi
+
+ let "journal_size = journal_blocks * L_BLOCK_SIZE / 1048576"
+
+ echo ${journal_size}
+ return 0
+}
+
+# get_J_opt hostname target_devname target_devsize
+# Get the mkfs -J option of lustre target @target_devname
+# from the node @hostname
+get_J_opt() {
+ local host_name=$1
+ local target_devname=$2
+ local target_devsize=$3
+ local journal_size=
+ local default_journal_size=
+ local journal_opt=
+
+ # Get the real journal size of lustre target
+ journal_size=$(get_journalsize ${target_devname} ${host_name})
+ if [ $? -ne 0 ]; then
+ echo "${journal_size}"
+ return 1
+ fi
+
+ # Get the default journal size of lustre target
+ default_journal_size=$(get_defaultjournalsize ${target_devsize})
+ if [ "${default_journal_size}" = "0" ]; then
+ default_journal_size=$(figure_journal_size ${target_devname} \
+ ${host_name})
+ if [ $? -ne 0 ]; then
+ echo "${default_journal_size}"
+ return 1
+ fi
+ fi
+
+ if [ "${journal_size}" != "${default_journal_size}" ]; then
+ journal_opt="-J size=${journal_size}"
+ fi
+
+ echo ${journal_opt}
+ return 0
+}
+
+# get_ratio target_devname hostname
+# Get the bytes/inode ratio of lustre target @target_devname from @hostname
+get_ratio() {
+ local target_devname=$1
+ local host_name=$2
+ local inode_count=
+ local block_count=
+ local ratio=
+ local ret_str
+
+ # Execute remote command to get the inode count
+ ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
+ ${target_devname} | grep 'Inode count:'" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: get_ratio() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+
+ ret_str=${ret_str#${ret_str%Inode count:*}}
+ inode_count=`echo ${ret_str} | awk '{print $3}'`
+ if [ -z "`echo ${inode_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
+ then
+ echo "`basename $0`: get_ratio() error: can't" \
+ "get inode count of ${target_devname} in ${host_name}!"
+ return 1
+ fi
+
+ # Execute remote command to get the block count
+ ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
+ ${target_devname} | grep 'Block count:'" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: get_ratio() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+
+ ret_str=${ret_str#${ret_str%Block count:*}}
+ block_count=`echo ${ret_str} | awk '{print $3}'`
+ if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
+ then
+ echo "`basename $0`: get_ratio() error: can't" \
+ "get block count of ${target_devname} in ${host_name}!"
+ return 1
+ fi
+
+ let "ratio = block_count*L_BLOCK_SIZE/inode_count"
+
+ echo ${ratio}
+ return 0
+}
+
+# get_default_ratio target_devtype target_devsize
+# Calculate the default bytes/inode ratio from target type @target_devtype
+get_default_ratio() {
+ local target_devtype=$1
+ declare -i target_devsize=$2
+ local ratio=
+
+ case "${target_devtype}" in
+ "mdt" | "mgs|mdt" | "mdt|mgs")
+ ratio=4096;;
+ "ost")
+ [ ${target_devsize} -gt 1000000 ] && ratio=16384;;
+ esac
+
+ [ -z "${ratio}" ] && ratio=${L_BLOCK_SIZE}
+
+ echo ${ratio}
+ return 0
+}
+
+# get_i_opt hostname target_devname target_devtype target_devsize
+# Get the mkfs -i option of lustre target @target_devname
+# from the node @hostname
+get_i_opt() {
+ local host_name=$1
+ local target_devname=$2
+ local target_devtype=$3
+ local target_devsize=$4
+ local ratio=
+ local default_ratio=
+ local ratio_opt=
+
+ # Get the real bytes/inode ratio of lustre target
+ ratio=$(get_ratio ${target_devname} ${host_name})
+ if [ $? -ne 0 ]; then
+ echo "${ratio}"
+ return 1
+ fi
+
+ # Get the default bytes/inode ratio of lustre target
+ default_ratio=$(get_default_ratio ${target_devtype} ${target_devsize})
+
+ if [ "${ratio}" != "${default_ratio}" ]; then
+ ratio_opt="-i ${ratio}"
+ fi
+
+ echo ${ratio_opt}
+ return 0
+}
+
+# get_isize target_devname hostname
+# Get the inode size of lustre target @target_devname from @hostname
+get_isize() {
+ local target_devname=$1
+ local host_name=$2
+ local inode_size=
+ local ret_str
+
+ # Execute remote command to get the inode size
+ ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
+ ${target_devname} | grep 'Inode size:'" 2>&1`
+ if [ $? -ne 0 -a -n "${ret_str}" ]; then
+ echo "`basename $0`: get_isize() error:" \
+ "remote command error: ${ret_str}"
+ return 1
+ fi
+
+ ret_str=${ret_str#${ret_str%Inode size:*}}
+ inode_size=`echo ${ret_str} | awk '{print $3}'`
+ if [ -z "`echo ${inode_size}|awk '/^[[:digit:]]/ {print $0}'`" ]
+ then
+ echo "`basename $0`: get_isize() error: can't" \
+ "get inode size of ${target_devname} in ${host_name}!"
+ return 1
+ fi
+
+ echo ${inode_size}
+ return 0
+}
+
+# get_mdt_default_isize host_name target_fsname
+# Calculate the default inode size of lustre mdt target
+get_mdt_default_isize() {
+ local host_name=$1
+ local target_fsname=$2
+ declare -i stripe_count
+ local inode_size=
+
+ # Get the stripe count
+ stripe_count=$(get_stripecount ${host_name} ${target_fsname})
+ if [ $? -ne 0 ]; then
+ echo "${stripe_count}"
+ return 1
+ fi
+
+ if ((stripe_count > 77)); then
+ inode_size=512
+ elif ((stripe_count > 34)); then
+ inode_size=2048
+ elif ((stripe_count > 13)); then
+ inode_size=1024
+ else
+ inode_size=512
+ fi
+
+ echo ${inode_size}
+ return 0
+}
+
+# get_default_isize host_name target_devtype target_fsname
+# Calculate the default inode size of lustre target type @target_devtype
+get_default_isize() {
+ local host_name=$1
+ local target_devtype=$2
+ local target_fsname=$3
+ local inode_size=
+
+ case "${target_devtype}" in
+ "mdt" | "mgs|mdt" | "mdt|mgs")
+ inode_size=$(get_mdt_default_isize ${host_name} ${target_fsname})
+ if [ $? -ne 0 ]; then
+ echo "${inode_size}"
+ return 1
+ fi
+ ;;
+ "ost")
+ inode_size=256;;
+ esac
+
+ [ -z "${inode_size}" ] && inode_size=128
+
+ echo ${inode_size}
+ return 0
+}
+
+# get_I_opt hostname target_devname target_devtype target_fsname
+# Get the mkfs -I option of lustre target @target_devname
+# from the node @hostname
+get_I_opt() {
+ local host_name=$1
+ local target_devname=$2
+ local target_devtype=$3
+ local target_fsname=$4
+ local isize=
+ local default_isize=
+ local isize_opt=
+
+ # Get the real inode size of lustre target
+ isize=$(get_isize ${target_devname} ${host_name})
+ if [ $? -ne 0 ]; then
+ echo "${isize}"
+ return 1
+ fi
+
+ # Get the default inode size of lustre target
+ [ -z "${target_fsname}" ] && target_fsname="lustre"
+ default_isize=$(get_default_isize ${host_name} ${target_devtype} \
+ ${target_fsname})
+ if [ $? -ne 0 ]; then
+ echo "${default_isize}"
+ return 1
+ fi
+
+ if [ "${isize}" != "${default_isize}" ]; then
+ isize_opt="-I ${isize}"
+ fi
+
+ echo ${isize_opt}
+ return 0
+}
+
+# get_mkfsopts hostname
+# Get the mkfs options of lustre targets from the node @hostname
+get_mkfsopts(){
+ declare -i i
+ local host_name=$1
+ local journal_opt
+ local ratio_opt
+ local inode_size_opt
+
+ # Initialize the arrays
+ unset TARGET_MKFSOPTS
+
+ # FIXME: Get other mkfs options of ext3/ldiskfs besides -J, -i and -I
+ for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+ journal_opt=
+ ratio_opt=
+ inode_size_opt=
+
+ [ -z "${TARGET_DEVNAMES[i]}" ] && continue
+
+ if [ -z "${TARGET_DEVSIZES[i]}" ]; then
+ # Get the device size
+ TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \
+ ${TARGET_DEVNAMES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${TARGET_DEVSIZES[i]}"
+ return 1
+ fi
+ fi
+
+ # Get the journal option
+ journal_opt=$(get_J_opt ${host_name} ${TARGET_DEVNAMES[i]} \
+ ${TARGET_DEVSIZES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${journal_opt}"
+ return 1
+ fi
+
+ if [ -n "${journal_opt}" ]; then
+ if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
+ TARGET_MKFSOPTS[i]="${journal_opt}"
+ else
+ TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${journal_opt}"
+ fi
+ fi
+
+ # Get the bytes-per-inode ratio option
+ ratio_opt=$(get_i_opt ${host_name} ${TARGET_DEVNAMES[i]} \
+ ${TARGET_DEVTYPES[i]} ${TARGET_DEVSIZES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${ratio_opt}"
+ return 1
+ fi
+
+ if [ -n "${ratio_opt}" ]; then
+ if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
+ TARGET_MKFSOPTS[i]="${ratio_opt}"
+ else
+ TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${ratio_opt}"
+ fi
+ fi
+
+ # Get the inode size option
+ inode_size_opt=$(get_I_opt ${host_name} ${TARGET_DEVNAMES[i]} \
+ ${TARGET_DEVTYPES[i]} ${TARGET_FSNAMES[i]})
+ if [ $? -ne 0 ]; then
+ echo >&2 "${inode_size_opt}"
+ return 1
+ fi
+
+ if [ -n "${inode_size_opt}" ]; then
+ if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
+ TARGET_MKFSOPTS[i]="${inode_size_opt}"
+ else
+ TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${inode_size_opt}"
+ fi
+ fi
+
+ if [ "${TARGET_MKFSOPTS[i]}" != "${TARGET_MKFSOPTS[i]#*,*}" ]; then
+ TARGET_MKFSOPTS[i]="\""${TARGET_MKFSOPTS[i]}"\""
+ fi
+ done
+ return 0
+}
+
+# get_target_configs hostname
+# Get the lustre target informations from the node @hostname
+get_target_configs() {
+ declare -i i
+ local host_name=$1
+ local ret_line line
+
+ # Initialize the arrays
+ unset TARGET_CONFIGS
+
+ # Get lustre target server names
+ if ! get_svnames ${host_name}; then
+ return 1
+ fi
+
+ # Get lustre target device names, mount points and loop device sizes
+ if ! get_devnames ${host_name}; then
+ return 1
+ fi
+
+ # Get lustre target device type, fsname, index, etc.
+ if ! get_ldds ${host_name}; then
+ return 1
+ fi
+
+ # Get mkfs options of lustre targets
+ if ! get_mkfsopts ${host_name}; then
+ return 1
+ fi
+
+ # Construct lustre target configs
+ for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+ [ -z "${TARGET_DEVNAMES[i]}" ] && continue
+ TARGET_CONFIGS[i]=${TARGET_DEVNAMES[i]},${TARGET_MNTPNTS[i]},${TARGET_DEVTYPES[i]},${TARGET_FSNAMES[i]},${TARGET_MGSNIDS[i]},${TARGET_INDEXES[i]},${TARGET_FMTOPTS[i]},${TARGET_MKFSOPTS[i]},${TARGET_MNTOPTS[i]},${TARGET_FAILNIDS[i]}
+ done
+
+ return 0
+}
+
+# get_configs hostname
+# Get all the informations needed to generate a csv file from
+# the node @hostname
+get_configs() {
+ # Check the hostname
+ if [ -z "$1" ]; then
+ echo >&2 "`basename $0`: get_configs() error:" \
+ "Missing hostname!"
+ return 1
+ fi
+
+ # Get network module options
+ verbose_output ""
+ verbose_output "Collecting network module options from host $1..."
+ if ! get_module_opts $1; then
+ return 1
+ fi
+ verbose_output "OK"
+
+ # Get lustre target informations
+ verbose_output "Collecting Lustre targets informations from host $1..."
+ if ! get_target_configs $1; then
+ return 1
+ fi
+ verbose_output "OK"
+
+ # Get HA software configurations
+ if ! get_ha_configs $1; then
+ return 1
+ fi
+
+ return 0
+}
+
+# Collect linux MD/LVM device informations from the lustre cluster and
+# append them to the csv file
+get_mdlvm_info() {
+ declare -i idx
+ declare -i i
+ local line
+
+ # Collect and append linux MD/LVM informations to the csv file
+ for ((idx = 0; idx < ${#HOST_NAMES[@]}; idx++)); do
+ [ -z "${HOST_NAMES[idx]}" ] && continue
+
+ # Collect MD device informations
+ ! get_md_configs ${HOST_NAMES[idx]} && return 1
+
+ # Append MD device informations to the csv file
+ for ((i = 0; i < ${#MD_NAME[@]}; i++)); do
+ line=${HOST_NAMES[idx]},${MD_MARKER},${MD_NAME[i]},,,${MD_LEVEL[i]},${MD_DEVS[i]}
+ verbose_output "Informations of MD device ${MD_NAME[i]}" \
+ "in host ${HOST_NAMES[idx]} are as follows:"
+ verbose_output "${line}"
+ echo "${line}" >> ${LUSTRE_CSV_FILE}
+ done
+
+ # Collect PV informations
+ ! get_pv_configs ${HOST_NAMES[idx]} && return 1
+
+ # Append PV informations to the csv file
+ if [ -n "${PV_NAMES}" ]; then
+ line=${HOST_NAMES[idx]},${PV_MARKER},${PV_NAMES}
+ verbose_output "Informations of PVs" \
+ "in host ${HOST_NAMES[idx]} are as follows:"
+ verbose_output "${line}"
+ echo "${line}" >> ${LUSTRE_CSV_FILE}
+ fi
+
+ # Collect VG informations
+ ! get_vg_configs ${HOST_NAMES[idx]} && return 1
+
+ # Append VG informations to the csv file
+ for ((i = 0; i < ${#VG_NAME[@]}; i++)); do
+ line=${HOST_NAMES[idx]},${VG_MARKER},${VG_NAME[i]},,,${VG_PVNAMES[i]}
+ verbose_output "Informations of VG ${VG_NAME[i]}" \
+ "in host ${HOST_NAMES[idx]} are as follows:"
+ verbose_output "${line}"
+ echo "${line}" >> ${LUSTRE_CSV_FILE}
+ done
+
+ # Collect LV informations
+ ! get_lv_configs ${HOST_NAMES[idx]} && return 1
+
+ # Append LV informations to the csv file
+ for ((i = 0; i < ${#LV_NAME[@]}; i++)); do
+ line=${HOST_NAMES[idx]},${LV_MARKER},${LV_NAME[i]},,,${LV_SIZE[i]},${LV_VGNAME[i]}
+ verbose_output "Informations of LV /dev/${LV_VGNAME[i]}/${LV_NAME[i]}"\
+ "in host ${HOST_NAMES[idx]} are as follows:"
+ verbose_output "${line}"
+ echo "${line}" >> ${LUSTRE_CSV_FILE}
+ done
+ done
+ return 0
+}
+
+# Generate the csv file from the lustre cluster
+gen_csvfile() {
+ declare -i idx
+ declare -i i
+ local line
+
+ # Get lustre cluster node names
+ verbose_output "Collecting Lustre cluster node names..."
+ if ! get_hostnames; then
+ return 1
+ fi
+ verbose_output "OK"
+
+ : > ${LUSTRE_CSV_FILE}
+
+ ${GET_MDLVM_INFO} && get_mdlvm_info
+
+ # Collect and append lustre target informations to the csv file
+ for ((idx = 0; idx < ${#HOST_NAMES[@]}; idx++)); do
+ # Collect informations
+ if ! get_configs ${HOST_NAMES[idx]}; then
+ rm -f ${LUSTRE_CSV_FILE}
+ return 1
+ fi
+
+ # Append informations to the csv file
+ for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
+ [ -z "${TARGET_DEVNAMES[i]}" ] && continue
+
+ if [ -z "${HA_CONFIGS[i]}" ]; then
+ line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]}
+ else
+ line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]},${HA_CONFIGS[i]}
+ fi
+ verbose_output "Informations of target ${TARGET_DEVNAMES[i]}" \
+ "in host ${HOST_NAMES[idx]} are as follows:"
+ verbose_output "${line}"
+ echo "" >> ${LUSTRE_CSV_FILE}
+ echo "${line}" >> ${LUSTRE_CSV_FILE}
+ done
+ done
+
+ return 0
+}
+
+# Main flow
+echo "`basename $0`: ******** Generate csv file -- ${LUSTRE_CSV_FILE} START ********"
+if ! gen_csvfile; then
+ exit 1
+fi
+echo "`basename $0`: ******** Generate csv file -- ${LUSTRE_CSV_FILE} OK **********"
+
+exit 0
--- /dev/null
+#!/bin/bash
+#
+# Reads old MDS config logs for transferring to a MGS
+#
+###############################################################################
+
+TMP=${TMP:-/tmp/logs}
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` <mdsdev> <newfsname>
+
+ <mdsdev> the MDS disk device (e.g. /dev/sda1)
+ <newfsname> the name of the new filesystem (e.g. testfs)
+
+ This script will extract old config logs from an MDS device to a
+ temporary location ($TMP). During the upgrade procedure, mount the
+ MGS disk as type ldiskfs (e.g. mount -t ldiskfs /dev/sda
+ /mnt/temp), then copy these logs into the CONFIGS directory on the
+ MGS (e.g. /mnt/temp/CONFIGS). Logs from many MDS's can be added
+ in this way. When done, unmount the MGS, and then re-mount it as
+ type lustre to start the service.
+
+EOF
+ exit 1
+}
+
+if [ $# -lt 2 ]; then
+ usage
+fi
+
+DEV=$1
+FSNAME=$2
+DEBUGFS="debugfs -c -R"
+mkdir -p $TMP
+
+FILES=`$DEBUGFS "ls -l LOGS" $DEV | awk '{print $9}' | awk '/[a-z]/ {print $1}'`
+
+for FILE in ${FILES}; do
+ $DEBUGFS "dump LOGS/$FILE $TMP/temp" $DEV 2> /dev/null
+ MDC=`strings $TMP/temp | grep MDC`
+ LOV=`strings $TMP/temp | grep lov`
+ if [ -n "$MDC" ]; then
+ TYPE=client
+ else
+ if [ -n "$LOV" ]; then
+ TYPE=MDT0000
+ else
+ echo "Can't determine type for log '$FILE', skipping"
+ continue
+ fi
+ fi
+ echo -n "Copying log '$FILE' to '${FSNAME}-${TYPE}'. Okay [y/n]?"
+ read OK
+ if [ "$OK" = "y" ]; then
+ mv $TMP/temp $TMP/${FSNAME}-${TYPE}
+ else
+ rm $TMP/temp
+ fi
+done
+
+echo ls -l $TMP
+ls -l $TMP
+