Remove unneeded files, they will be generated from *.in files.
+++ /dev/null
-#!/bin/bash
-#
-# lc_cluman.sh - script for generating the Red Hat Cluster Manager
-# HA software's configuration files
-#
-################################################################################
-
-# Usage
-usage() {
- cat >&2 <<EOF
-
-Usage: `basename $0` <-n hostnames> [-s service addresses]
- [-c heartbeat channel] [-o heartbeat options] [-v]
- <-d target device> [-d target device...]
-
- -n hostnames the nodenames of the primary node and its fail-
- overs
- Multiple nodenames are separated by colon (:)
- delimeter. The first one is the nodename of the
- primary node, the others are failover nodenames.
- -s service addresses the IP addresses to failover
- Multiple addresses are separated by colon (:)
- delimeter.
- -c heartbeat channel the method to send/rcv heartbeats on
- The default method is multicast, and multicast_
- ipaddress is "225.0.0.11".
- -o heartbeat options a "catchall" for other heartbeat configuration
- options
- Multiple options are separated by colon (:)
- delimeter.
- -v verbose mode
- -d target device the target device name and mount point
- The device name and mount point are separated by
- colon (:) delimeter.
-
-EOF
- exit 1
-}
-
-#****************************** Global variables ******************************#
-# Scripts to be called
-SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"$(cd `dirname $0`; echo $PWD)"}
-SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}/lc_servip.sh
-
-# Remote command
-REMOTE=${REMOTE:-"ssh -x -q"}
-
-# Lustre utilities path
-CMD_PATH=${CMD_PATH:-"/usr/sbin"}
-TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"}
-
-# CluManager tools
-CLUMAN_TOOLS_PATH=${CLUMAN_TOOLS_PATH:-"/usr/sbin"}
-CONFIG_CMD=${CONFIG_CMD:-"${CLUMAN_TOOLS_PATH}/redhat-config-cluster-cmd"}
-
-# Configuration directory
-CLUMAN_DIR="/etc" # CluManager configuration directory
-FILE_SUFFIX=${FILE_SUFFIX:-".lustre"} # Suffix of the generated config files
-
-TMP_DIR="/tmp/clumanager" # temporary directory
-
-declare -a NODE_NAMES # node names in the failover group
-declare -a SRV_IPADDRS # service IP addresses
-
-# Lustre target device names, service names and mount points
-declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
-declare -i TARGET_NUM=0 # number of targets
-
-# Get and check the positional parameters
-VERBOSE_OUTPUT=false
-while getopts "n:s:c:o:vd:" OPTION; do
- case $OPTION in
- n)
- HOSTNAME_OPT=$OPTARG
- PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
- if [ -z "${PRIM_NODENAME}" ]; then
- echo >&2 $"`basename $0`: Missing primary nodename!"
- usage
- fi
- HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
- if [ ${HOSTNAME_NUM} -lt 2 ]; then
- echo >&2 $"`basename $0`: Missing failover nodenames!"
- usage
- fi
- ;;
- s)
- SRVADDR_OPT=$OPTARG
- ;;
- c)
- HBCHANNEL_OPT=$OPTARG
- HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \
- | sed 's/"$//'`
- if [ -n "${HBCHANNEL_OPT}" ] \
- && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*broadcast*}" ] \
- && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*multicast*}" ]; then
- echo >&2 $"`basename $0`: Invalid Heartbeat channel" \
- "- ${HBCHANNEL_OPT}!"
- usage
- fi
- ;;
- o)
- HBOPT_OPT=$OPTARG
- HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'`
- ;;
- v)
- VERBOSE_OUTPUT=true
- ;;
- d)
- DEVICE_OPT=$OPTARG
- TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
- TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
- if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
- echo >&2 $"`basename $0`: Missing target device name!"
- usage
- fi
- if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
- echo >&2 $"`basename $0`: Missing mount point for target"\
- "${TARGET_DEVNAMES[TARGET_NUM]}!"
- usage
- fi
- TARGET_NUM=$(( TARGET_NUM + 1 ))
- ;;
-
- ?)
- usage
- esac
-done
-
-# Check the required parameters
-if [ -z "${HOSTNAME_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -n option!"
- usage
-fi
-
-if [ -z "${DEVICE_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -d option!"
- usage
-fi
-
-# Output verbose informations
-verbose_output() {
- if ${VERBOSE_OUTPUT}; then
- echo "`basename $0`: $*"
- fi
- return 0
-}
-
-# get_nodenames
-#
-# Get all the node names in this failover group
-get_nodenames() {
- declare -i idx
- local nodename_str nodename
-
- nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
- END {for (i in a) print a[i]}'`
- idx=0
- for nodename in ${nodename_str}
- do
- NODE_NAMES[idx]=${nodename}
- idx=$idx+1
- done
-
- return 0
-}
-
-# get_check_srvIPaddrs
-#
-# Get and check all the service IP addresses in this failover group
-get_check_srvIPaddrs() {
- declare -i idx
- declare -i i
- local srvIPaddr_str srvIPaddr
-
- srvIPaddr_str=`echo ${SRVADDR_OPT}|awk '{split($SRVADDR_OPT, a, ":")}\
- END {for (i in a) print a[i]}'`
- idx=0
- for srvIPaddr in ${srvIPaddr_str}
- do
- SRV_IPADDRS[idx]=${srvIPaddr}
- idx=$idx+1
- done
-
- for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
- for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do
- # Check service IP address
- verbose_output "Verifying service IP ${SRV_IPADDRS[idx]} and" \
- "real IP of host ${NODE_NAMES[i]} are in the" \
- "same subnet..."
- if ! ${SCRIPT_VERIFY_SRVIP} ${SRV_IPADDRS[idx]} ${NODE_NAMES[i]}
- then
- return 1
- fi
- verbose_output "OK"
- done
- done
-
- return 0
-}
-
-# cluman_running host_name
-#
-# Run remote command to check whether clumanager service is running in @host_name
-cluman_running() {
- local host_name=$1
- local ret_str
-
- ret_str=`${REMOTE} ${host_name} "service clumanager status" 2>&1`
- if [ $? -ne 0 ]; then
- if [ "${ret_str}" != "${ret_str#*unrecognized*}" ]; then
- echo >&2 "`basename $0`: cluman_running() error:"\
- "remote command to ${host_name} error: ${ret_str}!"
- return 2
- else
- return 1
- fi
- fi
-
- return 0
-}
-
-# stop_cluman host_name
-#
-# Run remote command to stop clumanager service running in @host_name
-stop_cluman() {
- local host_name=$1
- local ret_str
-
- ret_str=`${REMOTE} ${host_name} "/sbin/service clumanager stop" 2>&1`
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: stop_cluman() error:"\
- "remote command to ${host_name} error: ${ret_str}!"
- return 1
- fi
-
- echo "`basename $0`: Clumanager service is stopped on node ${host_name}."
- return 0
-}
-
-# check_cluman
-#
-# Run remote command to check each node's clumanager service
-check_cluman() {
- declare -i idx
- local OK
-
- # Get and check all the service IP addresses
- if [ -n "${SRVADDR_OPT}" ] && ! get_check_srvIPaddrs; then
- return 1
- fi
-
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- # Check clumanager service status
- cluman_running ${NODE_NAMES[idx]}
- rc=$?
- if [ "$rc" -eq "2" ]; then
- return 1
- elif [ "$rc" -eq "1" ]; then
- verbose_output "Clumanager service is stopped on"\
- "node ${NODE_NAMES[idx]}."
- elif [ "$rc" -eq "0" ]; then
- OK=
- echo -n "`basename $0`: Clumanager service is running on"\
- "${NODE_NAMES[idx]}, go ahead to stop the service and"\
- "generate new configurations? [y/n]:"
- read OK
- if [ "${OK}" = "n" ]; then
- echo "`basename $0`: New Clumanager configurations"\
- "are not generated."
- return 2
- fi
-
- # Stop clumanager service
- stop_cluman ${NODE_NAMES[idx]}
- fi
- done
-
- return 0
-}
-
-# get_srvname hostname target_devname
-#
-# Get the lustre target server name from the node @hostname
-get_srvname() {
- local host_name=$1
- local target_devname=$2
- local target_srvname=
- local ret_str
-
- # Execute remote command to get the target server name
- ret_str=`${REMOTE} ${host_name} \
- "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1`
- if [ $? -ne 0 ]; then
- echo "`basename $0`: get_srvname() error:" \
- "from host ${host_name} - ${ret_str}"
- return 1
- fi
-
- if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
- ret_str=${ret_str#*Target: }
- target_srvname=`echo ${ret_str} | awk '{print $1}'`
- fi
-
- if [ -z "${target_srvname}" ]; then
- echo "`basename $0`: get_srvname() error: Cannot get the"\
- "server name of target ${target_devname} in ${host_name}!"
- return 1
- fi
-
- echo ${target_srvname}
- return 0
-}
-
-# get_srvnames
-#
-# Get server names of all the Lustre targets in this failover group
-get_srvnames() {
- declare -i i
-
- # Initialize the TARGET_SRVNAMES array
- unset TARGET_SRVNAMES
-
- # Get Lustre target service names
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_SRVNAMES[i]}"
- return 1
- fi
- done
-
- return 0
-}
-
-# check_retval retval
-#
-# Check the return value of redhat-config-cluster-cmd
-check_retval() {
- if [ $1 -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to run ${CONFIG_CMD}!"
- return 1
- fi
-
- return 0
-}
-
-# add_services
-#
-# Add service tags into the cluster.xml file
-add_services() {
- declare -i idx
- declare -i i
-
- # Add service tag
- for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do
- ${CONFIG_CMD} --add_service --name=${TARGET_SRVNAMES[i]}
- if ! check_retval $?; then
- return 1
- fi
-
- for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
- ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
- --add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]}
- if ! check_retval $?; then
- return 1
- fi
- done
-
- ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
- --add_device \
- --name=${TARGET_DEVNAMES[i]}
- if ! check_retval $?; then
- return 1
- fi
-
- ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
- --device=${TARGET_DEVNAMES[i]} \
- --mount \
- --mountpoint=${TARGET_MNTPNTS[i]} \
- --fstype=lustre
- if ! check_retval $?; then
- return 1
- fi
- done
-
- return 0
-}
-
-# gen_cluster_xml
-#
-# Run redhat-config-cluster-cmd to create the cluster.xml file
-gen_cluster_xml() {
- declare -i idx
- declare -i i
- local mcast_IPaddr
- local node_names
- local hbopt
-
- [ -e "${CLUMAN_DIR}/cluster.xml" ] && \
- /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old
-
- # Run redhat-config-cluster-cmd to generate cluster.xml
- # Add clumembd tag
- if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*broadcast*}" ]; then
- ${CONFIG_CMD} --clumembd --broadcast=yes
- ${CONFIG_CMD} --clumembd --multicast=no
- if ! check_retval $?; then
- return 1
- fi
- elif [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*multicast*}" ]; then
- mcast_IPaddr=`echo ${HBCHANNEL_OPT} | awk '{print $2}'`
- if [ -n "${mcast_IPaddr}" ]; then
- ${CONFIG_CMD} --clumembd --multicast=yes\
- --multicast_ipaddress=${mcast_IPaddr}
- if ! check_retval $?; then
- return 1
- fi
- fi
- fi
-
- # Add cluster tag
- node_names=
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- node_names=${node_names}"${NODE_NAMES[idx]} "
- done
-
- ${CONFIG_CMD} --cluster --name="${node_names}failover group"
- if ! check_retval $?; then
- return 1
- fi
-
- # Add member tag
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- ${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]}
- if ! check_retval $?; then
- return 1
- fi
- done
-
- # Add service tag
- if ! add_services; then
- return 1
- fi
-
- # Add other tags
- if [ -n "${HBOPT_OPT}" ]; then
- while read -r hbopt
- do
- ${CONFIG_CMD} ${hbopt}
- if ! check_retval $?; then
- return 1
- fi
- done < <(echo ${HBOPT_OPT}|awk '{split($HBOPT_OPT, a, ":")}\
- END {for (i in a) print a[i]}')
- fi
-
- return 0
-}
-
-# create_config
-#
-# Create the cluster.xml file and scp it to the each node's /etc/
-create_config() {
- declare -i idx
-
- /bin/mkdir -p ${TMP_DIR}
- CONFIG_PRIMNODE=${TMP_DIR}$"/cluster.xml."${PRIM_NODENAME}
- CONFIG_LUSTRE=${TMP_DIR}$"/cluster.xml"${FILE_SUFFIX}
-
- # Get server names of Lustre targets
- if ! get_srvnames; then
- return 1
- fi
-
- if [ -s ${CONFIG_PRIMNODE} ]; then
- if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${CONFIG_PRIMNODE}`" ]
- then
- verbose_output "${CONFIG_PRIMNODE} already exists."
- return 0
- else
- [ -e "${CLUMAN_DIR}/cluster.xml" ] && \
- /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old
-
- /bin/cp -f ${CONFIG_PRIMNODE} ${CLUMAN_DIR}/cluster.xml
-
- # Add services into the cluster.xml file
- if ! add_services; then
- return 1
- fi
- fi
- else
- # Run redhat-config-cluster-cmd to generate cluster.xml
- verbose_output "Creating cluster.xml file for" \
- "${PRIM_NODENAME} failover group hosts..."
- if ! gen_cluster_xml; then
- return 1
- fi
- verbose_output "OK"
- fi
-
- /bin/mv ${CLUMAN_DIR}/cluster.xml ${CONFIG_LUSTRE}
- [ -e "${CLUMAN_DIR}/cluster.xml.old" ] && \
- /bin/mv ${CLUMAN_DIR}/cluster.xml.old ${CLUMAN_DIR}/cluster.xml
-
- # scp the cluster.xml file to all the nodes
- verbose_output "Remote copying cluster.xml${FILE_SUFFIX} file to" \
- "${PRIM_NODENAME} failover group hosts..."
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- /bin/cp -f ${CONFIG_LUSTRE} ${TMP_DIR}$"/cluster.xml."${NODE_NAMES[idx]}
-
- scp ${CONFIG_LUSTRE} ${NODE_NAMES[idx]}:${CLUMAN_DIR}/
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp cluster.xml file"\
- "to node ${NODE_NAMES[idx]}!"
- return 1
- fi
- done
- verbose_output "OK"
-
- return 0
-}
-
-# Main flow
-# Get all the node names
-if ! get_nodenames; then
- exit 1
-fi
-
-# Check clumanager services
-verbose_output "Checking clumanager service in the ${PRIM_NODENAME}"\
- "failover group hosts..."
-check_cluman
-rc=$?
-if [ "$rc" -eq "2" ]; then
- verbose_output "OK"
- exit 0
-elif [ "$rc" -eq "1" ]; then
- exit 1
-fi
-verbose_output "OK"
-
-# Generate configuration files
-if ! create_config; then
- exit 1
-fi
-
-exit 0
+++ /dev/null
-#!/bin/bash
-#
-# lc_hb.sh - script for generating the Heartbeat HA software's
-# configuration files
-#
-###############################################################################
-
-# Usage
-usage() {
- cat >&2 <<EOF
-
-Usage: `basename $0` <-r HBver> <-n hostnames> [-v]
- <-d target device> [-d target device...]
-
- -r HBver the version of Heartbeat software
- The Heartbeat software versions which are curr-
- ently supported are: hbv1 (Heartbeat version 1)
- and hbv2 (Heartbeat version 2).
- -n hostnames the nodenames of the primary node and its fail-
- overs
- Multiple nodenames are separated by colon (:)
- delimeter. The first one is the nodename of the
- primary node, the others are failover nodenames.
- -v verbose mode
- -d target device the target device name and mount point
- The device name and mount point are separated by
- colon (:) delimeter.
-
-EOF
- exit 1
-}
-
-#****************************** Global variables ******************************#
-# Scripts to be called
-SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"$(cd `dirname $0`; echo $PWD)"}
-SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}/lc_servip.sh
-SCRIPT_GEN_MONCF=${SCRIPTS_PATH}/lc_mon.sh # create mon.cf file
-
-# Remote command
-REMOTE=${REMOTE:-"ssh -x -q"}
-
-# Lustre utilities path
-CMD_PATH=${CMD_PATH:-"/usr/sbin"}
-TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"}
-
-# Heartbeat tools
-HB_TOOLS_PATH=${HB_TOOLS_PATH:-"/usr/lib64/heartbeat"} # Heartbeat tools path
-CIB_GEN_SCRIPT=${HB_TOOLS_PATH}/haresources2cib.py
-CL_STATUS=${CL_STATUS:-"/usr/bin/cl_status"}
-
-# Configuration directories
-HA_DIR=${HA_DIR:-"/etc/ha.d"} # Heartbeat configuration directory
-MON_DIR=${MON_DIR:-"/etc/mon"} # mon configuration directory
-CIB_DIR=${CIB_DIR:-"/var/lib/heartbeat/crm"} # cib.xml directory
-
-# Service directories and names
-HARES_DIR=${HARES_DIR:-"${HA_DIR}/resource.d"} # Heartbeat resources
-LUSTRE_SRV=${LUSTRE_SRV:-"Filesystem"} # Service script provided by Heartbeat
-FS_TYPE=${FS_TYPE:-"lustre"} # Lustre filesystem type
-
-FILE_SUFFIX=${FILE_SUFFIX:-".lustre"} # Suffix of the generated config files
-
-TMP_DIR="/tmp/heartbeat" # temporary directory
-HACF_TEMP=${TMP_DIR}/ha.cf.temp
-AUTHKEYS_TEMP=${TMP_DIR}/authkeys${FILE_SUFFIX}
-
-HBVER_HBV1="hbv1" # Heartbeat version 1
-HBVER_HBV2="hbv2" # Heartbeat version 2
-
-declare -a NODE_NAMES # node names in the failover group
-
-# Lustre target device names, service names and mount points
-declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
-declare -i TARGET_NUM=0 # number of targets
-
-
-# Get and check the positional parameters
-VERBOSE_OUTPUT=false
-while getopts "r:n:vd:" OPTION; do
- case $OPTION in
- r)
- HBVER_OPT=$OPTARG
- if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \
- && [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then
- echo >&2 $"`basename $0`: Invalid Heartbeat software" \
- "version - ${HBVER_OPT}!"
- usage
- fi
- ;;
- n)
- HOSTNAME_OPT=$OPTARG
- PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
- if [ -z "${PRIM_NODENAME}" ]; then
- echo >&2 $"`basename $0`: Missing primary nodename!"
- usage
- fi
- HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
- if [ ${HOSTNAME_NUM} -lt 2 ]; then
- echo >&2 $"`basename $0`: Missing failover nodenames!"
- usage
- fi
- if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ]
- then
- echo >&2 $"`basename $0`: Heartbeat version 1 can" \
- "only support 2 nodes!"
- usage
- fi
- ;;
- v)
- VERBOSE_OUTPUT=true
- ;;
- d)
- DEVICE_OPT=$OPTARG
- TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
- TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
- if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
- echo >&2 $"`basename $0`: Missing target device name!"
- usage
- fi
- if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
- echo >&2 $"`basename $0`: Missing mount point for target"\
- "${TARGET_DEVNAMES[TARGET_NUM]}!"
- usage
- fi
- TARGET_NUM=$(( TARGET_NUM + 1 ))
- ;;
- ?)
- usage
- esac
-done
-
-# Check the required parameters
-if [ -z "${HBVER_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -r option!"
- usage
-fi
-
-if [ -z "${HOSTNAME_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -n option!"
- usage
-fi
-
-if [ -z "${DEVICE_OPT}" ]; then
- echo >&2 $"`basename $0`: Missing -d option!"
- usage
-fi
-
-# Output verbose informations
-verbose_output() {
- if ${VERBOSE_OUTPUT}; then
- echo "`basename $0`: $*"
- fi
- return 0
-}
-
-# get_nodenames
-#
-# Get all the node names in this failover group
-get_nodenames() {
- declare -i idx
- local nodename_str nodename
-
- nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
- END {for (i in a) print a[i]}'`
- idx=0
- for nodename in ${nodename_str}
- do
- NODE_NAMES[idx]=${nodename}
- idx=$idx+1
- done
-
- return 0
-}
-
-# check_file host_name file
-#
-# Run remote command to check whether @file exists in @host_name
-check_file() {
- local host_name=$1
- local file_name=$2
-
- if [ -z "${host_name}" ]; then
- echo >&2 "`basename $0`: check_file() error:"\
- "Missing hostname!"
- return 1
- fi
-
- if [ -z "${file_name}" ]; then
- echo >&2 "`basename $0`: check_file() error:"\
- "Missing file name!"
- return 1
- fi
-
- # Execute remote command to check the file
- ${REMOTE} ${host_name} "[ -e ${file_name} ]"
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: check_file() error:"\
- "${file_name} does not exist in host ${host_name}!"
- return 1
- fi
-
- return 0
-}
-
-# hb_running host_name
-#
-# Run remote command to check whether heartbeat service is running in @host_name
-hb_running() {
- local host_name=$1
- local ret_str
-
- ret_str=`${REMOTE} ${host_name} "${CL_STATUS} hbstatus" 2>&1`
- if [ $? -ne 0 ]; then
- if [ "${ret_str}" = "${ret_str#*stop*}" ]; then
- echo >&2 "`basename $0`: hb_running() error:"\
- "remote command to ${host_name} error: ${ret_str}!"
- return 2
- else
- return 1
- fi
- fi
-
- return 0
-}
-
-# stop_heartbeat host_name
-#
-# Run remote command to stop heartbeat service running in @host_name
-stop_heartbeat() {
- local host_name=$1
- local ret_str
-
- ret_str=`${REMOTE} ${host_name} "/sbin/service heartbeat stop" 2>&1`
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: stop_heartbeat() error:"\
- "remote command to ${host_name} error: ${ret_str}!"
- return 1
- fi
-
- echo "`basename $0`: Heartbeat service is stopped on node ${host_name}."
- return 0
-}
-
-# check_heartbeat
-#
-# Run remote command to check each node's heartbeat service
-check_heartbeat() {
- declare -i idx
- local OK
-
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- # Check Heartbeat configuration directory
- if ! check_file ${NODE_NAMES[idx]} ${HA_DIR}; then
- echo >&2 "`basename $0`: check_heartbeat() error:"\
- "Is Heartbeat package installed?"
- return 1
- fi
-
- if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
- # Check mon configuration directory
- if ! check_file ${NODE_NAMES[idx]} ${MON_DIR}; then
- echo >&2 "`basename $0`: check_heartbeat()"\
- "error: Is mon package installed?"
- return 1
- fi
- fi
-
- if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
- # Check crm directory
- if ! check_file ${NODE_NAMES[idx]} ${CIB_DIR}; then
- echo >&2 "`basename $0`: check_heartbeat()"\
- "error: Is Heartbeat v2 package installed?"
- return 1
- fi
- fi
-
- # Check heartbeat service status
- hb_running ${NODE_NAMES[idx]}
- rc=$?
- if [ "$rc" -eq "2" ]; then
- return 1
- elif [ "$rc" -eq "1" ]; then
- verbose_output "Heartbeat service is stopped on"\
- "node ${NODE_NAMES[idx]}."
- elif [ "$rc" -eq "0" ]; then
- OK=
- echo -n "`basename $0`: Heartbeat service is running on"\
- "${NODE_NAMES[idx]}, go ahead to stop the service and"\
- "generate new configurations? [y/n]:"
- read OK
- if [ "${OK}" = "n" ]; then
- echo "`basename $0`: New Heartbeat configurations"\
- "are not generated."
- return 2
- fi
-
- # Stop heartbeat service
- stop_heartbeat ${NODE_NAMES[idx]}
- fi
- done
-
- return 0
-}
-
-# get_srvname hostname target_devname
-#
-# Get the lustre target server name from the node @hostname
-get_srvname() {
- local host_name=$1
- local target_devname=$2
- local target_srvname=
- local ret_str
-
- # Execute remote command to get the target server name
- ret_str=`${REMOTE} ${host_name} \
- "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1`
- if [ $? -ne 0 ]; then
- echo "`basename $0`: get_srvname() error:" \
- "from host ${host_name} - ${ret_str}"
- return 1
- fi
-
- if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
- ret_str=${ret_str#*Target: }
- target_srvname=`echo ${ret_str} | awk '{print $1}'`
- fi
-
- if [ -z "${target_srvname}" ]; then
- echo "`basename $0`: get_srvname() error: Cannot get the"\
- "server name of target ${target_devname} in ${host_name}!"
- return 1
- fi
-
- echo ${target_srvname}
- return 0
-}
-
-# get_srvnames
-#
-# Get server names of all the Lustre targets in this failover group
-get_srvnames() {
- declare -i i
-
- # Initialize the TARGET_SRVNAMES array
- unset TARGET_SRVNAMES
-
- # Get Lustre target service names
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_SRVNAMES[i]}"
- return 1
- fi
- done
-
- return 0
-}
-
-# create_template
-#
-# Create the templates for ha.cf and authkeys files
-create_template() {
- /bin/mkdir -p ${TMP_DIR}
-
- # Create the template for ha.cf
- if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
- cat >${HACF_TEMP} <<EOF
-debugfile /var/log/ha-debug
-logfile /var/log/ha-log
-logfacility local0
-keepalive 2
-deadtime 30
-initdead 120
-
-auto_failback off
-
-EOF
- elif [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
- cat >${HACF_TEMP} <<EOF
-use_logd yes
-keepalive 1
-deadtime 10
-initdead 60
-
-crm yes
-
-EOF
- fi
-
- # Create the template for authkeys
- if [ ! -s ${AUTHKEYS_TEMP} ]; then
- cat >${AUTHKEYS_TEMP} <<EOF
-auth 1
-1 sha1 HelloLustre!
-EOF
- fi
-
- return 0
-}
-
-# create_hacf
-#
-# Create the ha.cf file and scp it to each node's /etc/ha.d/
-create_hacf() {
- HACF_PRIMNODE=${TMP_DIR}$"/ha.cf."${PRIM_NODENAME}
- HACF_LUSTRE=${TMP_DIR}$"/ha.cf"${FILE_SUFFIX}
-
- declare -i idx
-
- if [ -e ${HACF_PRIMNODE} ]; then
- # The ha.cf file for the primary node has already existed.
- verbose_output "${HACF_PRIMNODE} already exists."
- return 0
- fi
-
- /bin/cp -f ${HACF_TEMP} ${HACF_LUSTRE}
-
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- echo "node ${NODE_NAMES[idx]}" >> ${HACF_LUSTRE}
- done
-
- # scp ha.cf file to all the nodes
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- touch ${TMP_DIR}$"/ha.cf."${NODE_NAMES[idx]}
- scp ${HACF_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp ha.cf file"\
- "to node ${NODE_NAMES[idx]}!"
- return 1
- fi
- done
-
- return 0
-}
-
-# create_haresources
-#
-# Create the haresources file and scp it to the each node's /etc/ha.d/
-create_haresources() {
- HARES_PRIMNODE=${TMP_DIR}$"/haresources."${PRIM_NODENAME}
- HARES_LUSTRE=${TMP_DIR}$"/haresources"${FILE_SUFFIX}
- declare -i idx
- local res_line
-
- if [ -s ${HARES_PRIMNODE} ]; then
- # The haresources file for the primary node has already existed
- if [ -n "`/bin/grep ${TARGET_DEVNAMES[0]} ${HARES_PRIMNODE}`" ]; then
- verbose_output "${HARES_PRIMNODE} already exists."
- return 0
- fi
- fi
-
- # Add the resource group line into the haresources file
- res_line=${PRIM_NODENAME}
- for ((idx = 0; idx < ${#TARGET_DEVNAMES[@]}; idx++)); do
- res_line=${res_line}" "${LUSTRE_SRV}::${TARGET_DEVNAMES[idx]}::${TARGET_MNTPNTS[idx]}::${FS_TYPE}
-
- if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
- res_line=${res_line}" "${TARGET_SRVNAMES[idx]}"-mon"
- fi
- done
- echo "${res_line}" >> ${HARES_LUSTRE}
-
- # Generate the cib.xml file
- if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
- # Add group haclient and user hacluster
- [ -z "`grep haclient /etc/group`" ] && groupadd haclient
- [ -z "`grep hacluster /etc/passwd`" ] && useradd -g haclient hacluster
-
- CIB_LUSTRE=${TMP_DIR}$"/cib.xml"${FILE_SUFFIX}
- python ${CIB_GEN_SCRIPT} --stdout \
- ${HARES_LUSTRE} > ${CIB_LUSTRE}
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to generate cib.xml file"\
- "for node ${PRIM_NODENAME}!"
- return 1
- fi
- fi
-
- # scp the haresources file or cib.xml file
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- /bin/cp -f ${HARES_LUSTRE} ${TMP_DIR}$"/haresources."${NODE_NAMES[idx]}
- scp ${HARES_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp haresources file"\
- "to node ${NODE_NAMES[idx]}!"
- return 1
- fi
-
- if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
- scp ${CIB_LUSTRE} ${NODE_NAMES[idx]}:${CIB_DIR}/
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp cib.xml"\
- "file to node ${NODE_NAMES[idx]}!"
- return 1
- fi
- fi
- done
-
- return 0
-}
-
-# create_authkeys
-#
-# Create the authkeys file and scp it to the each node's /etc/ha.d/
-create_authkeys() {
- AUTHKEYS_PRIMNODE=${TMP_DIR}$"/authkeys."${PRIM_NODENAME}
- declare -i idx
-
- if [ -e ${AUTHKEYS_PRIMNODE} ]; then
- verbose_output "${AUTHKEYS_PRIMNODE} already exists."
- return 0
- fi
-
- # scp the authkeys file to all the nodes
- chmod 600 ${AUTHKEYS_TEMP}
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- touch ${TMP_DIR}$"/authkeys."${NODE_NAMES[idx]}
- scp -p ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}/
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp authkeys file"\
- "to node ${NODE_NAMES[idx]}!"
- return 1
- fi
- done
-
- return 0
-}
-
-# create_moncf
-#
-# Create the mon.cf file and scp it to the each node's /etc/mon/
-create_moncf() {
- MONCF_PRIMNODE=${TMP_DIR}$"/mon.cf."${PRIM_NODENAME}
- MONCF_LUSTRE=${TMP_DIR}$"/mon.cf"${FILE_SUFFIX}
- local srv_name params=
- declare -i idx
- declare -a OLD_TARGET_SRVNAMES # targets in other nodes
- # in this failover group
- # Initialize the OLD_TARGET_SRVNAMES array
- unset OLD_TARGET_SRVNAMES
-
- if [ -s ${MONCF_PRIMNODE} ]; then
- if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${MONCF_PRIMNODE}`" ]
- then
- verbose_output "${MONCF_PRIMNODE} already exists."
- return 0
- else
- # Get the Lustre target service names
- # from the previous mon.cf file
- idx=0
- for srv_name in `grep hostgroup ${MONCF_PRIMNODE}\
- |awk '$2 ~ /-mon/ {print $2}'|xargs`
- do
- OLD_TARGET_SRVNAMES[idx]=`echo ${srv_name}\
- |sed 's/-mon//g'`
- idx=$(( idx + 1 ))
- done
- fi
- fi
-
- # Construct the parameters to mon.cf generation script
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- params=${params}" -n "${NODE_NAMES[idx]}
- done
-
- for ((idx = 0; idx < ${#OLD_TARGET_SRVNAMES[@]}; idx++)); do
- params=${params}" -o "${OLD_TARGET_SRVNAMES[idx]}
- done
-
- for ((idx = 0; idx < ${#TARGET_SRVNAMES[@]}; idx++)); do
- params=${params}" -o "${TARGET_SRVNAMES[idx]}
- done
-
- ${SCRIPT_GEN_MONCF} ${params}
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to generate mon.cf file"\
- "by using ${SCRIPT_GEN_MONCF}!"
- return 1
- fi
-
- /bin/mv *-mon.cfg ${MONCF_LUSTRE}
-
- # scp the mon.cf file to all the nodes
- for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
- /bin/cp -f ${MONCF_LUSTRE} ${TMP_DIR}$"/mon.cf."${NODE_NAMES[idx]}
-
- scp ${MONCF_LUSTRE} ${NODE_NAMES[idx]}:${MON_DIR}/
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: Failed to scp mon.cf file"\
- "to node ${NODE_NAMES[idx]}!"
- return 1
- fi
- done
-
- return 0
-}
-
-# generate_config
-#
-# Generate the configuration files for Heartbeat and scp them to all the nodes
-generate_config() {
- if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
- # Get server names of Lustre targets
- if ! get_srvnames; then
- return 1
- fi
- fi
-
- if ! create_template; then
- return 1
- fi
-
- verbose_output "Creating and remote copying ha.cf${FILE_SUFFIX} file to"\
- "${PRIM_NODENAME} failover group hosts..."
- if ! create_hacf; then
- return 1
- fi
- verbose_output "OK"
-
- verbose_output "Creating and remote copying haresources${FILE_SUFFIX} file"\
- "to ${PRIM_NODENAME} failover group hosts..."
- if ! create_haresources; then
- return 1
- fi
- verbose_output "OK"
-
- verbose_output "Creating and remote copying authkeys${FILE_SUFFIX} file to" \
- "${PRIM_NODENAME} failover group hosts..."
- if ! create_authkeys; then
- return 1
- fi
- verbose_output "OK"
-
- if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
- verbose_output "Creating and remote copying mon.cf${FILE_SUFFIX} file to" \
- "${PRIM_NODENAME} failover group hosts..."
- if ! create_moncf; then
- return 1
- fi
- verbose_output "OK"
- fi
-
- return 0
-}
-
-# Main flow
-# Get all the node names
-if ! get_nodenames; then
- exit 1
-fi
-
-# Check heartbeat services
-verbose_output "Checking heartbeat service in the ${PRIM_NODENAME}"\
- "failover group hosts..."
-check_heartbeat
-rc=$?
-if [ "$rc" -eq "2" ]; then
- verbose_output "OK"
- exit 0
-elif [ "$rc" -eq "1" ]; then
- exit 1
-fi
-verbose_output "OK"
-
-# Generate configuration files
-if ! generate_config; then
- exit 1
-fi
-
-exit 0
+++ /dev/null
-#!/bin/bash
-#
-# lc_modprobe.sh - add lustre module options into modprobe.conf or
-# modules.conf
-#
-#################################################################################
-
-# Check the kernel version
-KERNEL_VERSION=`uname -r`
-KERNEL_VERSION=${KERNEL_VERSION:0:3}
-
-if [ "${KERNEL_VERSION}" = "2.4" ]; then
- MODULE_CONF=/etc/modules.conf
-else
- MODULE_CONF=/etc/modprobe.conf
-fi
-
-read -r NETWORKS
-MODLINES_FILE=/tmp/modlines$$.txt
-START_MARKER=$"# start lustre config"
-END_MARKER=$"# end lustre config"
-
-# Generate a temp file contains lnet options lines
-generate_lnet_lines() {
- local LNET_LINE TMP_LINE
-
- TMP_LINE="${NETWORKS}"
-
- echo ${START_MARKER} > ${MODLINES_FILE}
- echo "# Lustre module options added automatically by `basename $0`" >> ${MODLINES_FILE}
- while true; do
- LNET_LINE=${TMP_LINE%%\\n*}
- echo ${LNET_LINE} >> ${MODLINES_FILE}
-
- TMP_LINE=${TMP_LINE#*\\n}
-
- if [ "${TMP_LINE}" == "${LNET_LINE}" ]; then
- break
- fi
- done
- echo ${END_MARKER} >> ${MODLINES_FILE}
-
- #echo "--------------${MODLINES_FILE}--------------"
- #cat ${MODLINES_FILE}
- #echo -e "------------------------------------------\n"
-
- return 0
-}
-
-if ! generate_lnet_lines; then
- exit 1
-fi
-
-# Add lnet options lines to the module configuration file
-if [ -e ${MODULE_CONF} ]; then
- # Delete the old options
- sed -i "/${START_MARKER}/,/${END_MARKER}/d" ${MODULE_CONF}
-fi
-
-cat ${MODLINES_FILE} >> ${MODULE_CONF}
-rm -f ${MODLINES_FILE}
-exit 0
+++ /dev/null
-#!/bin/bash
-#
-# lc_net.sh - script for Lustre cluster network verification
-#
-###############################################################################
-
-# Usage
-usage() {
- cat >&2 <<EOF
-
-Usage: `basename $0` [-v] <csv file>
-
- -v verbose mode
- csv file a spreadsheet that contains configuration parameters
- (separated by commas) for each target in a Lustre cl-
- uster, the first field of each line is the host name
- of the cluster node
-
-EOF
- exit 1
-}
-
-# Get and check the positional parameters
-while getopts "v" OPTION; do
- case $OPTION in
- v)
- VERBOSE_OPT=$"yes"
- ;;
- ?)
- usage
- esac
-done
-
-# Toss out the parameters we've already processed
-shift `expr $OPTIND - 1`
-
-# Here we expect the csv file
-if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: Missing csv file!"
- usage
-fi
-
-# Global variables
-CSV_FILE=$1
-declare -a HOST_NAMES
-declare -a HOST_IPADDRS
-
-# Remote command
-REMOTE=${REMOTE:-"ssh -x -q"}
-
-# Check whether the reomte command is pdsh
-is_pdsh() {
- if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then
- return 1
- fi
-
- return 0
-}
-
-# Output verbose informations
-verbose_output() {
- if [ "${VERBOSE_OPT}" = "yes" ]; then
- echo "`basename $0`: $*"
- fi
- return 0
-}
-
-# Check the csv file
-check_file() {
- if [ ! -s ${CSV_FILE} ]; then
- echo >&2 $"`basename $0`: check_file() error: ${CSV_FILE}" \
- "does not exist or is empty!"
- return 1
- fi
-
- return 0
-}
-
-# Get the host names from the csv file
-get_hostnames() {
- local NAME CHECK_STR
- declare -i i
- declare -i j
-
- # Initialize the HOST_NAMES array
- unset HOST_NAMES
-
- CHECK_STR=`egrep -v "([[:space:]]|^)#" ${CSV_FILE} | awk -F, \
- '/[[:alnum:]]/{if ($1 !~/[[:alnum:]]/) print $0}'`
- if [ -n "${CHECK_STR}" ]; then
- echo >&2 $"`basename $0`: get_hostnames() error: Missing"\
- "hostname field in the line - ${CHECK_STR}"
- return 1
- fi
-
- i=0
- for NAME in `egrep -v "([[:space:]]|^)#" ${CSV_FILE}\
- | awk -F, '/[[:alnum:]]/{print $1}'`
- do
- for ((j = 0; j < ${#HOST_NAMES[@]}; j++)); do
- [ "${NAME}" = "${HOST_NAMES[j]}" ] && continue 2
- done
-
- HOST_NAMES[i]=${NAME}
- i=$i+1
- done
-
- return 0
-}
-
-# Check whether the host name matches the name in the local /etc/hosts table
-# and whether the IP address corresponding to the host name is correct
-local_check() {
- # Check argument
- if [ $# -ne 2 ]; then
- echo >&2 $"`basename $0`: local_check() error: Missing"\
- "argument for function local_check()!"
- return 1
- fi
-
- local RET_STR REAL_NAME
-
- # Get the corresponding IP address of the host name from /etc/hosts table
- # of the current host
- HOST_IPADDRS[$2]=`egrep "[[:space:]]$1([[:space:]]|$)" /etc/hosts \
- | awk '{print $1}'`
- if [ -z "${HOST_IPADDRS[$2]}" ]; then
- echo >&2 "`basename $0`: local_check() error: $1 does not" \
- "exist in the local /etc/hosts table!"
- return 1
- fi
-
- if [ ${#HOST_IPADDRS[$2]} -gt 15 ]; then
- echo >&2 "`basename $0`: local_check() error: More than one" \
- "IP address line corresponding to $1 in the local" \
- "/etc/hosts table!"
- return 1
- fi
-
- # Execute remote command to get the real host name
- RET_STR=`${REMOTE} ${HOST_IPADDRS[$2]} hostname 2>&1`
- if [ $? -ne 0 -a -n "${RET_STR}" ]; then
- echo >&2 "`basename $0`: local_check() error: remote error:" \
- "${RET_STR}"
- return 1
- fi
-
- if [ -z "${RET_STR}" ]; then
- echo >&2 "`basename $0`: local_check() error: remote error: No"\
- "results from remote! Check the network connectivity"\
- "between the local host and ${HOST_IPADDRS[$2]}!"
- return 1
- fi
-
- if is_pdsh; then
- REAL_NAME=`echo ${RET_STR} | awk '{print $2}'`
- else
- REAL_NAME=`echo ${RET_STR} | awk '{print $1}'`
- fi
-
- if [ "$1" != "${REAL_NAME}" ]; then
- echo >&2 "`basename $0`: local_check() error: The real hostname"\
- "of ${HOST_IPADDRS[$2]} is \"${REAL_NAME}\","\
- "not \"$1\"! Check the local /etc/hosts table!"
- return 1
- fi
-
- return 0
-}
-
-# Check whether the correct host name and IP address pair matches
-# the one in the remote /etc/hosts tables
-remote_check() {
- # Check argument
- if [ $# -ne 2 ]; then
- echo >&2 $"`basename $0`: remote_check() error: Missing"\
- "argument for function remote_check()!"
- return 1
- fi
-
- declare -i i
- local RET_STR COMMAND IP_ADDR
-
- COMMAND=$"egrep \"[[:space:]]$1([[:space:]]|$)\" /etc/hosts"
-
- # Execute remote command to check remote /etc/hosts tables
- for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
- RET_STR=`${REMOTE} ${HOST_NAMES[i]} ${COMMAND} 2>&1`
- if [ $? -ne 0 -a -n "${RET_STR}" ]; then
- echo >&2 "`basename $0`: remote_check() error:"\
- "remote ${HOST_NAMES[i]} error: ${RET_STR}"
- return 1
- fi
-
- if is_pdsh; then
- IP_ADDR=`echo ${RET_STR} | awk '{print $2}'`
- else
- IP_ADDR=`echo ${RET_STR} | awk '{print $1}'`
- fi
- if [ -z "${IP_ADDR}" ]; then
- echo >&2 "`basename $0`: remote_check() error:" \
- "$1 does not exist in the ${HOST_NAMES[i]}'s"\
- "/etc/hosts table!"
- return 1
- fi
-
- if [ "${IP_ADDR}" != "${HOST_IPADDRS[$2]}" ]; then
- echo >&2 "`basename $0`: remote_check() error:" \
- "IP address ${IP_ADDR} of $1 in the" \
- "${HOST_NAMES[i]}'s /etc/hosts is incorrect!"
- return 1
- fi
- done
-
- return 0
-}
-
-# Verify forward and reverse network connectivity of the Lustre cluster
-network_check () {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: network_check() error: Missing"\
- "argument for function network_check()!"
- return 1
- fi
-
- declare -i i
- local RET_STR COMMAND REAL_NAME
-
- # Execute remote command to check network connectivity
- for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
- COMMAND=$"${REMOTE} ${HOST_NAMES[i]} hostname"
- RET_STR=`${REMOTE} $1 ${COMMAND} 2>&1`
- if [ $? -ne 0 -a -n "${RET_STR}" ]; then
- echo >&2 "`basename $0`: network_check() error:" \
- "remote error: ${RET_STR}"
- return 1
- fi
-
- if [ -z "${RET_STR}" ]; then
- echo >&2 "`basename $0`: network_check() error:" \
- "No results from remote! Check the network" \
- "connectivity between \"$1\" and" \
- "\"${HOST_NAMES[i]}\"!"
- return 1
- fi
-
- if is_pdsh; then
- REAL_NAME=`echo ${RET_STR} | awk '{print $3}'`
- else
- REAL_NAME=`echo ${RET_STR} | awk '{print $1}'`
- fi
- if [ "${HOST_NAMES[i]}" != "${REAL_NAME}" ]; then
- echo >&2 "`basename $0`: network_check() error:" \
- "${RET_STR}"
- return 1
- fi
- done
-
- return 0
-}
-
-# Verify forward and reverse network connectivity of the Lustre cluster,
-# and that hostnames match the names in the /etc/hosts tables.
-network_verify() {
- declare -i i
-
- # Initialize the HOST_IPADDRS array
- unset HOST_IPADDRS
-
- # Get all the host names from the csv file
- if ! get_hostnames; then
- return 1
- fi
-
- # Check whether all the host names match the names in
- # all the /etc/hosts tables of the Lustre cluster
- for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
- verbose_output "Verifying IP address of host" \
- "\"${HOST_NAMES[i]}\" in the local /etc/hosts..."
- if ! local_check ${HOST_NAMES[i]} $i; then
- return 1
- fi
- verbose_output "OK"
- done
-
- for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
- [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue
- verbose_output "Verifying IP address of host" \
- "\"${HOST_NAMES[i]}\" in the remote /etc/hosts..."
- if ! remote_check ${HOST_NAMES[i]} $i; then
- return 1
- fi
- verbose_output "OK"
- done
-
- # Verify network connectivity of the Lustre cluster
- for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
- [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue
- verbose_output "Verifying network connectivity of host" \
- "\"${HOST_NAMES[i]}\" to other hosts..."
- if ! network_check ${HOST_NAMES[i]}; then
- return 1
- fi
- verbose_output "OK"
- done
-
- return 0
-}
-
-# Main flow
-if ! check_file; then
- exit 1
-fi
-
-if ! network_verify; then
- exit 1
-fi
-
-exit 0
+++ /dev/null
-#!/bin/bash
-
-# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
-
-#
-# lustre_config.sh - format and set up multiple lustre servers from a csv file
-#
-# This script is used to parse each line of a spreadsheet (csv file) and
-# execute remote commands to format (mkfs.lustre) every Lustre target
-# that will be part of the Lustre cluster.
-#
-# In addition, it can also verify the network connectivity and hostnames in
-# the cluster and produce High-Availability software configurations for
-# Heartbeat or CluManager.
-#
-################################################################################
-
-# Usage
-usage() {
- cat >&2 <<EOF
-
-Usage: `basename $0` [-t HAtype] [-n] [-f] [-m] [-h] [-v] <csv file>
-
- This script is used to format and set up multiple lustre servers from a
- csv file.
-
- -h help and examples
- -t HAtype produce High-Availability software configurations
- The argument following -t is used to indicate the High-
- Availability software type. The HA software types which
- are currently supported are: hbv1 (Heartbeat version 1)
- and hbv2 (Heartbeat version 2).
- -n no net - don't verify network connectivity and hostnames
- in the cluster
- -f force-format the Lustre targets using --reformat option
- -m no fstab change - don't modify /etc/fstab to add the new
- Lustre targets
- If using this option, then the value of "mount options"
- item in the csv file will be passed to mkfs.lustre, else
- the value will be added into the /etc/fstab.
- -v verbose mode
- csv file a spreadsheet that contains configuration parameters
- (separated by commas) for each target in a Lustre cluster
-
-EOF
- exit 1
-}
-
-# Samples
-sample() {
- cat <<EOF
-
-This script is used to parse each line of a spreadsheet (csv file) and
-execute remote commands to format (mkfs.lustre) every Lustre target
-that will be part of the Lustre cluster.
-
-It can also optionally:
- * verify the network connectivity and hostnames in the cluster
- * modify /etc/modprobe.conf to add Lustre networking info
- * add the Lustre server info to /etc/fstab
- * produce configurations for Heartbeat or CluManager.
-
-Each line in the csv file represents one Lustre target. The format is:
-hostname,module_opts,device name,mount point,device type,fsname,mgs nids,index,
-format options,mkfs options,mount options,failover nids
-
-hostname hostname of the node in the cluster, must match "uname -n"
-module_opts Lustre networking module options
-device name Lustre target (block device or loopback file)
-mount point Lustre target mount point
-device type Lustre target type (mgs, mdt, ost, mgs|mdt, mdt|mgs)
-fsname Lustre filesystem name, should be limited to 8 characters
- Default is "lustre".
-mgs nids NID(s) of remote mgs node, required for mdt and ost targets
- If this item is not given for an mdt, it is assumed that
- the mdt will also be an mgs, according to mkfs.lustre.
-index Lustre target index
-format options a "catchall" contains options to be passed to mkfs.lustre
- "--device-size", "--param", etc. all goes into this item.
-mkfs options format options to be wrapped with --mkfsoptions="" and
- passed to mkfs.lustre
-mount options If this script is invoked with "-m" option, then the value of
- this item will be wrapped with --mountfsoptions="" and passed
- to mkfs.lustre, else the value will be added into /etc/fstab.
-failover nids NID(s) of failover partner node
-
-All the NIDs in one node are delimited by commas (','). When multiple nodes are
-specified, they are delimited by a colon (':').
-
-Items left blank will be set to defaults.
-
-Example 1 - Simple, with combo MGS/MDT:
--------------------------------------------------------------------------------
-# combo mdt/mgs
-lustre-mgs,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--device-size=10240
-
-# ost0
-lustre-ost,options lnet networks=tcp,/tmp/ost0,/mnt/ost0,ost,,lustre-mgs@tcp0,,--device-size=10240
-
-# ost1
-lustre-ost,options lnet networks=tcp,/tmp/ost1,/mnt/ost1,ost,,lustre-mgs@tcp0,,--device-size=10240
--------------------------------------------------------------------------------
-
-Example 2 - Separate MGS/MDT, two networks interfaces:
--------------------------------------------------------------------------------
-# mgs
-lustre-mgs1,options lnet 'networks="tcp,elan"',/dev/sda,/mnt/mgs,mgs,,,,--quiet --param="sys.timeout=50",,"defaults,noauto","lustre-mgs2,2@elan"
-
-# mdt
-lustre-mdt1,options lnet 'networks="tcp,elan"',/dev/sdb,/mnt/mdt,mdt,lustre2,"lustre-mgs1,1@elan:lustre-mgs2,2@elan",,--quiet --param="lov.stripe.size=4194304",-J size=16,"defaults,noauto",lustre-mdt2
-
-# ost
-lustre-ost1,options lnet 'networks="tcp,elan"',/dev/sdc,/mnt/ost,ost,lustre2,"lustre-mgs1,1@elan:lustre-mgs2,2@elan",,--quiet,-I 512,"defaults,noauto",lustre-ost2
--------------------------------------------------------------------------------
-
-Example 3 - with combo MGS/MDT failover pair and OST failover pair:
--------------------------------------------------------------------------------
-# combo mgs/mdt
-lustre-mgs1,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--quiet --device-size=10240,,,lustre-mgs2@tcp0
-
-# combo mgs/mdt backup (--noformat)
-lustre-mgs2,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--quiet --device-size=10240 --noformat,,,lustre-mgs1@tcp0
-
-# ost
-lustre-ost1,options lnet networks=tcp,/tmp/ost1,/mnt/ost1,ost,,"lustre-mgs1@tcp0:lustre-mgs2@tcp0",,--quiet --device-size=10240,,,lustre-ost2@tcp0
-
-# ost backup (--noformat) (note different device name)
-lustre-ost2,options lnet networks=tcp,/tmp/ost2,/mnt/ost2,ost,,"lustre-mgs1@tcp0:lustre-mgs2@tcp0",,--quiet --device-size=10240 --noformat,,,lustre-ost1@tcp0
--------------------------------------------------------------------------------
-
-EOF
- exit 0
-}
-
-#***************************** Global variables *****************************#
-# Remote command
-REMOTE=${REMOTE:-"ssh -x -q"}
-#REMOTE=${REMOTE:-"pdsh -S -R ssh -w"}
-export REMOTE
-
-# Command path
-CMD_PATH=${CMD_PATH:-"/usr/sbin"}
-MKFS=${MKFS:-"$CMD_PATH/mkfs.lustre"}
-LCTL=${LCTL:-"$CMD_PATH/lctl"}
-
-EXPORT_PATH=${EXPORT_PATH:-"PATH=\$PATH:/sbin:/usr/sbin;"}
-
-# Some scripts to be called
-SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"$(cd `dirname $0`; echo $PWD)"}
-MODULE_CONFIG=${SCRIPTS_PATH}/lc_modprobe.sh
-VERIFY_CLUSTER_NET=${SCRIPTS_PATH}/lc_net.sh
-GEN_HB_CONFIG=${SCRIPTS_PATH}/lc_hb.sh
-GEN_CLUMGR_CONFIG=${SCRIPTS_PATH}/lc_cluman.sh
-
-# Variables of HA software
-HATYPE_HBV1="hbv1" # Heartbeat version 1
-HATYPE_HBV2="hbv2" # Heartbeat version 2
-HATYPE_CLUMGR="cluman" # Cluster Manager
-
-HB_TMP_DIR="/tmp/heartbeat" # Temporary directory
-CLUMGR_TMP_DIR="/tmp/clumanager"
-TMP_DIRS="${HB_TMP_DIR} ${CLUMGR_TMP_DIR}"
-
-FS_TYPE=${FS_TYPE:-"lustre"} # filesystem type
-
-declare -a MGS_NODENAME # node names of the MGS servers
-declare -a MGS_IDX # indexes of MGSs in the global arrays
-declare -i MGS_NUM # number of MGS servers in the cluster
-declare -i INIT_IDX
-
-declare -a CONFIG_ITEM # items in each line of the csv file
-declare -a NODE_NAMES # node names in the failover group
-declare -a TARGET_OPTS # target services in one failover group
-
-# All the items in the csv file
-declare -a HOST_NAME MODULE_OPTS DEVICE_NAME MOUNT_POINT DEVICE_TYPE FS_NAME
-declare -a MGS_NIDS INDEX FORMAT_OPTIONS MKFS_OPTIONS MOUNT_OPTIONS FAILOVERS
-
-
-VERIFY_CONNECT=true
-MODIFY_FSTAB=true
-# Get and check the positional parameters
-while getopts "t:nfmhv" OPTION; do
- case $OPTION in
- t)
- HATYPE_OPT=$OPTARG
- if [ "${HATYPE_OPT}" != "${HATYPE_HBV1}" ] \
- && [ "${HATYPE_OPT}" != "${HATYPE_HBV2}" ] \
- && [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
- echo >&2 $"`basename $0`: Invalid HA software type" \
- "- ${HATYPE_OPT}!"
- usage
- fi
- ;;
- n)
- VERIFY_CONNECT=false
- ;;
- f)
- REFORMAT_OPTION=$"--reformat "
- ;;
- m)
- MODIFY_FSTAB=false
- ;;
- h)
- sample
- ;;
- v)
- VERBOSE_OPT=$" -v"
- ;;
- ?)
- usage
- esac
-done
-
-# Toss out the parameters we've already processed
-shift `expr $OPTIND - 1`
-
-# Here we expect the csv file
-if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: Missing csv file!"
- usage
-fi
-
-# Output verbose informations
-verbose_output() {
- if [ -n "${VERBOSE_OPT}" ]; then
- echo "`basename $0`: $*"
- fi
- return 0
-}
-
-# Check the csv file
-check_file() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: check_file() error: Missing argument"\
- "for function check_file()!"
- return 1
- fi
-
- CSV_FILE=$1
- if [ ! -s ${CSV_FILE} ]; then
- echo >&2 $"`basename $0`: check_file() error: ${CSV_FILE}"\
- "does not exist or is empty!"
- return 1
- fi
-
- return 0
-}
-
-# Parse a line in the csv file
-parse_line() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: parse_line() error: Missing argument"\
- "for function parse_line()!"
- return 1
- fi
-
- declare -i i=0
- declare -i length=0
- declare -i idx=0
- declare -i s_quote_flag=0
- declare -i d_quote_flag=0
- local TMP_LETTER LINE
-
- LINE=$*
-
- # Initialize the CONFIG_ITEM array
- unset CONFIG_ITEM
-
- # Get the length of the line
- length=${#LINE}
-
- i=0
- while [ ${idx} -lt ${length} ]; do
- # Get a letter from the line
- TMP_LETTER=${LINE:${idx}:1}
-
- case "${TMP_LETTER}" in
- ",")
- if [ ${s_quote_flag} -eq 1 -o ${d_quote_flag} -eq 1 ]
- then
- CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER}
- else
- i=$i+1
- fi
- idx=${idx}+1
- continue
- ;;
- "'")
- if [ ${s_quote_flag} -eq 0 ]; then
- s_quote_flag=1
- else
- s_quote_flag=0
- fi
- ;;
- "\"")
- if [ ${d_quote_flag} -eq 0 ]; then
- d_quote_flag=1
- else
- d_quote_flag=0
- fi
-
- if [ ${i} -eq 1 ]; then
- CONFIG_ITEM[i]=${CONFIG_ITEM[i]}$"\\"${TMP_LETTER}
- idx=${idx}+1
- continue
- fi
- ;;
- "\r")
- idx=${idx}+1
- continue
- ;;
- *)
- ;;
- esac
- CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER}
- idx=${idx}+1
- done
- return 0
-}
-
-# Check the items required for OSTs, MDTs and MGS
-#
-# When formatting an OST, the following items: hostname, module_opts,
-# device name, device type and mgs nids, cannot have null value.
-#
-# When formatting an MDT or MGS, the following items: hostname,
-# module_opts, device name and device type, cannot have null value.
-check_item() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: check_item() error: Missing argument"\
- "for function check_item()!"
- return 1
- fi
-
- declare -i i=$1
-
- # Check hostname, module_opts, device name and device type
- if [ -z "${HOST_NAME[i]}" ]||[ -z "${MODULE_OPTS[i]}" ]\
- ||[ -z "${DEVICE_NAME[i]}" ]||[ -z "${DEVICE_TYPE[i]}" ]; then
- echo >&2 $"`basename $0`: check_item() error: Some required"\
- "item has null value! Check hostname, module_opts,"\
- "device name and device type!"
- return 1
- fi
-
- # Check mgs nids
- if [ "${DEVICE_TYPE[i]}" = "ost" ]&&[ -z "${MGS_NIDS[i]}" ]; then
- echo >&2 $"`basename $0`: check_item() error: OST's mgs nids"\
- "item has null value!"
- return 1
- fi
-
- # Check mount point
- if [ -z "${MOUNT_POINT[i]}" ]; then
- echo >&2 $"`basename $0`: check_item() error: mount"\
- "point item of target ${DEVICE_NAME[i]} has null value!"
- return 1
- fi
-
- return 0
-}
-
-# Get the number of MGS nodes in the cluster
-get_mgs_num() {
- INIT_IDX=0
- MGS_NUM=${#MGS_NODENAME[@]}
- [ -z "${MGS_NODENAME[0]}" ] && let "INIT_IDX += 1" \
- && let "MGS_NUM += 1"
-}
-
-# is_mgs_node hostname
-# Verify whether @hostname is a MGS node
-is_mgs_node() {
- local host_name=$1
- declare -i i
-
- get_mgs_num
- for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
- [ "${MGS_NODENAME[i]}" = "${host_name}" ] && return 0
- done
-
- return 1
-}
-
-# Check whether the MGS nodes are in the same failover group
-check_mgs_group() {
- declare -i i
- declare -i j
- declare -i idx
- local mgs_node
-
- get_mgs_num
- for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
- mgs_node=${MGS_NODENAME[i]}
- for ((j = ${INIT_IDX}; j < ${MGS_NUM}; j++)); do
- [ "${MGS_NODENAME[j]}" = "${mgs_node}" ] && continue 1
-
- idx=${MGS_IDX[j]}
- if [ "${FAILOVERS[idx]#*$mgs_node*}" = "${FAILOVERS[idx]}" ]
- then
- echo >&2 $"`basename $0`: check_mgs_group() error:"\
- "MGS node ${mgs_node} is not in the ${HOST_NAME[idx]}"\
- "failover group!"
- return 1
- fi
- done
- done
-
- return 0
-}
-
-# Get and check MGS servers.
-# There should be no more than one MGS specified in the entire csv file.
-check_mgs() {
- declare -i i
- declare -i j
- declare -i exp_idx # Index of explicit MGS servers
- declare -i imp_idx # Index of implicit MGS servers
- local is_exp_mgs is_imp_mgs
- local mgs_node
-
- # Initialize the MGS_NODENAME and MGS_IDX arrays
- unset MGS_NODENAME
- unset MGS_IDX
-
- exp_idx=1
- imp_idx=1
- for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
- is_exp_mgs=false
- is_imp_mgs=false
-
- # Check whether this node is an explicit MGS node
- # or an implicit one
- if [ "${DEVICE_TYPE[i]#*mgs*}" != "${DEVICE_TYPE[i]}" ]; then
- verbose_output "Explicit MGS target" \
- "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
- is_exp_mgs=true
- fi
-
- if [ "${DEVICE_TYPE[i]}" = "mdt" -a -z "${MGS_NIDS[i]}" ]; then
- verbose_output "Implicit MGS target" \
- "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
- is_imp_mgs=true
- fi
-
- # Get and check MGS servers
- if ${is_exp_mgs} || ${is_imp_mgs}; then
- # Check whether more than one MGS target in one MGS node
- if is_mgs_node ${HOST_NAME[i]}; then
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "More than one MGS target in the same node -"\
- "\"${HOST_NAME[i]}\"!"
- return 1
- fi
-
- # Get and check primary MGS server and backup MGS server
- if [ "${FORMAT_OPTIONS[i]}" = "${FORMAT_OPTIONS[i]#*noformat*}" ]
- then
- # Primary MGS server
- if [ -z "${MGS_NODENAME[0]}" ]; then
- if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
- || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "There exist both explicit and implicit MGS"\
- "targets in the csv file!"
- return 1
- fi
- MGS_NODENAME[0]=${HOST_NAME[i]}
- MGS_IDX[0]=$i
- else
- mgs_node=${MGS_NODENAME[0]}
- if [ "${FAILOVERS[i]#*$mgs_node*}" = "${FAILOVERS[i]}" ]
- then
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "More than one primary MGS nodes in the csv" \
- "file - ${MGS_NODENAME[0]} and ${HOST_NAME[i]}!"
- else
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "MGS nodes ${MGS_NODENAME[0]} and ${HOST_NAME[i]}"\
- "are failover pair, one of them should use"\
- "\"--noformat\" in the format options item!"
- fi
- return 1
- fi
- else # Backup MGS server
- if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
- || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
- echo >&2 $"`basename $0`: check_mgs() error:"\
- "There exist both explicit and implicit MGS"\
- "targets in the csv file!"
- return 1
- fi
-
- if ${is_exp_mgs}; then # Explicit MGS
- MGS_NODENAME[exp_idx]=${HOST_NAME[i]}
- MGS_IDX[exp_idx]=$i
- exp_idx=$(( exp_idx + 1 ))
- else # Implicit MGS
- MGS_NODENAME[imp_idx]=${HOST_NAME[i]}
- MGS_IDX[imp_idx]=$i
- imp_idx=$(( imp_idx + 1 ))
- fi
- fi
- fi #End of "if ${is_exp_mgs} || ${is_imp_mgs}"
- done
-
- # Check whether the MGS nodes are in the same failover group
- if ! check_mgs_group; then
- return 1
- fi
-
- return 0
-}
-
-# Construct the command line of mkfs.lustre
-construct_mkfs_cmdline() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
- "Missing argument for function construct_mkfs_cmdline()!"
- return 1
- fi
-
- declare -i i=$1
- local mgsnids mgsnids_str
- local failnids failnids_str
-
- MKFS_CMD=${MKFS}$" "
- MKFS_CMD=${MKFS_CMD}${REFORMAT_OPTION}
-
- case "${DEVICE_TYPE[i]}" in
- "ost")
- MKFS_CMD=${MKFS_CMD}$"--ost "
- ;;
- "mdt")
- MKFS_CMD=${MKFS_CMD}$"--mdt "
- ;;
- "mgs")
- MKFS_CMD=${MKFS_CMD}$"--mgs "
- ;;
- "mdt|mgs" | "mgs|mdt")
- MKFS_CMD=${MKFS_CMD}$"--mdt --mgs "
- ;;
- *)
- echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
- "Invalid device type - \"${DEVICE_TYPE[i]}\"!"
- return 1
- ;;
- esac
-
- if [ -n "${FS_NAME[i]}" ]; then
- MKFS_CMD=${MKFS_CMD}$"--fsname="${FS_NAME[i]}$" "
- fi
-
- if [ -n "${MGS_NIDS[i]}" ]; then
- MGS_NIDS[i]=`echo "${MGS_NIDS[i]}" | sed 's/^"//' | sed 's/"$//'`
- mgsnids_str=${MGS_NIDS[i]}
- while read mgsnids; do
- MKFS_CMD=${MKFS_CMD}$"--mgsnode="${mgsnids}$" "
- done < <(echo ${mgsnids_str}|awk '{split($mgsnids_str, a, ":")}\
- END {for (j in a) print a[j]}')
- fi
-
- if [ -n "${INDEX[i]}" ]; then
- MKFS_CMD=${MKFS_CMD}$"--index="${INDEX[i]}$" "
- fi
-
- if [ -n "${FORMAT_OPTIONS[i]}" ]; then
- if [ "${FORMAT_OPTIONS[i]:0:1}" = "\"" ]; then
- FORMAT_OPTIONS[i]=`echo "${FORMAT_OPTIONS[i]}" | sed 's/^"//' | sed 's/"$//'`
- fi
- MKFS_CMD=${MKFS_CMD}${FORMAT_OPTIONS[i]}$" "
- fi
-
- if [ -n "${MKFS_OPTIONS[i]}" ]; then
- MKFS_OPTIONS[i]=`echo "${MKFS_OPTIONS[i]}" | sed 's/^"//' | sed 's/"$//'`
- MKFS_CMD=${MKFS_CMD}$"--mkfsoptions="$"\""${MKFS_OPTIONS[i]}$"\""$" "
- fi
-
- if [ -n "${MOUNT_OPTIONS[i]}" ]; then
- MOUNT_OPTIONS[i]=`echo "${MOUNT_OPTIONS[i]}" | sed 's/^"//' | sed 's/"$//'`
- if ! ${MODIFY_FSTAB}; then
- MKFS_CMD=${MKFS_CMD}$"--mountfsoptions="$"\""${MOUNT_OPTIONS[i]}$"\""$" "
- fi
- fi
-
- if [ -n "${FAILOVERS[i]}" ]; then
- FAILOVERS[i]=`echo "${FAILOVERS[i]}" | sed 's/^"//' | sed 's/"$//'`
- failnids_str=${FAILOVERS[i]}
- while read failnids; do
- MKFS_CMD=${MKFS_CMD}$"--failnode="${failnids}$" "
- done < <(echo ${failnids_str}|awk '{split($failnids_str, a, ":")}\
- END {for (k in a) print a[k]}')
- fi
-
- MKFS_CMD=${MKFS_CMD}${DEVICE_NAME[i]}
- return 0
-}
-
-# Get all the node names in this failover group
-get_nodenames() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: get_nodenames() error: Missing"\
- "argument for function get_nodenames()!"
- return 1
- fi
-
- declare -i i=$1
- declare -i idx
- local nids_str failover_nids failover_nid first_nid
-
- # Initialize the NODE_NAMES array
- unset NODE_NAMES
-
- NODE_NAMES[0]=${HOST_NAME[i]}
-
- idx=0
- nids_str=${FAILOVERS[i]}
- failover_nids=`echo ${nids_str}|awk '{split($nids_str, a, ":")}\
- END {for (idx in a) print a[idx]}'`
-
- # FIXME: Suppose the first nid of one failover node contains node name
- idx=1
- for failover_nid in ${failover_nids}
- do
- first_nid=`echo ${failover_nid} | awk -F, '{print $1}'`
- NODE_NAMES[idx]=${first_nid%@*}
- idx=$idx+1
- done
-
- return 0
-}
-
-# Verify whether the format line has HA items
-is_ha_line() {
- declare -i i=$1
-
- [ -n "${FAILOVERS[i]}" ] && return 0
-
- return 1
-}
-
-# Produce HA software's configuration files
-gen_ha_config() {
- declare -i i=$1
- declare -i idx
- local cmd_line
-
- # Prepare parameters
- # Hostnames option
- HOSTNAME_OPT=${HOST_NAME[i]}
-
- if ! get_nodenames $i; then
- return 1
- fi
-
- for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do
- HOSTNAME_OPT=${HOSTNAME_OPT}$":"${NODE_NAMES[idx]}
- done
-
- # Target devices option
- DEVICE_OPT=" -d "${TARGET_OPTS[0]}
- for ((idx = 1; idx < ${#TARGET_OPTS[@]}; idx++)); do
- DEVICE_OPT=${DEVICE_OPT}" -d "${TARGET_OPTS[idx]}
- done
-
- # Construct the generation script command line
- case "${HATYPE_OPT}" in
- "${HATYPE_HBV1}"|"${HATYPE_HBV2}") # Heartbeat
- cmd_line=${GEN_HB_CONFIG}$" -r ${HATYPE_OPT} -n ${HOSTNAME_OPT}"
- cmd_line=${cmd_line}${DEVICE_OPT}${VERBOSE_OPT}
- ;;
- "${HATYPE_CLUMGR}") # CluManager
- cmd_line=${GEN_CLUMGR_CONFIG}$" -n ${HOSTNAME_OPT}"
- cmd_line=${cmd_line}${DEVICE_OPT}${VERBOSE_OPT}
- ;;
- esac
-
- # Execute script to generate HA software's configuration files
- verbose_output "Generating HA software's configurations in"\
- "${HOST_NAME[i]} failover group..."
- verbose_output "${cmd_line}"
- eval $(echo "${cmd_line}")
- if [ $? -ne 0 ]; then
- return 1
- fi
- verbose_output "Generate HA software's configurations in"\
- "${HOST_NAME[i]} failover group OK"
-
- return 0
-}
-
-# Configure HA software
-config_ha() {
- if [ -z "${HATYPE_OPT}" ]; then
- return 0
- fi
-
- declare -i i j k
- declare -i prim_idx # Index for PRIM_HOSTNAMES array
- declare -i target_idx # Index for TARGET_OPTS and HOST_INDEX arrays
-
- declare -a PRIM_HOSTNAMES # Primary hostnames in all the failover
- # groups in the lustre cluster
- declare -a HOST_INDEX # Indices for the same node in all the
- # format lines in the csv file
- local prim_host
-
- # Initialize the PRIM_HOSTNAMES array
- prim_idx=0
- unset PRIM_HOSTNAMES
-
- # Get failover groups and generate HA configuration files
- for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
- prim_host=${HOST_NAME[i]}
-
- for ((j = 0; j < ${#PRIM_HOSTNAMES[@]}; j++)); do
- [ "${prim_host}" = "${PRIM_HOSTNAMES[j]}" ] && continue 2
- done
-
- target_idx=0
- unset HOST_INDEX
- unset TARGET_OPTS
- for ((k = 0; k < ${#HOST_NAME[@]}; k++)); do
- if [ "${prim_host}" = "${HOST_NAME[k]}" ] && is_ha_line "${k}"
- then
- HOST_INDEX[target_idx]=$k
- TARGET_OPTS[target_idx]=${DEVICE_NAME[k]}:${MOUNT_POINT[k]}
- target_idx=$(( target_idx + 1 ))
- fi
- done
-
- if [ ${#TARGET_OPTS[@]} -ne 0 ]; then
- PRIM_HOSTNAMES[prim_idx]=${prim_host}
- prim_idx=$(( prim_idx + 1 ))
-
- if ! gen_ha_config ${HOST_INDEX[0]}; then
- return 1
- fi
- fi
- done
-
- if [ ${#PRIM_HOSTNAMES[@]} -eq 0 ]; then
- verbose_output "There are no \"failover nids\" items in the"\
- "csv file. No HA configuration files are generated!"
- fi
-
- rm -rf ${TMP_DIRS}
- return 0
-}
-
-
-# Get all the items in the csv file and do some checks.
-get_items() {
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: get_items() error: Missing argument"\
- "for function get_items()!"
- return 1
- fi
-
- CSV_FILE=$1
- local LINE
- declare -i line_num=0
- declare -i idx=0
-
- while read -r LINE; do
- line_num=${line_num}+1
- # verbose_output "Parsing line ${line_num}: $LINE"
-
- # Get rid of the empty line
- if [ -z "`echo ${LINE}|awk '/[[:alnum:]]/ {print $0}'`" ]; then
- continue
- fi
-
- # Get rid of the comment line
- if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ]
- then
- continue
- fi
-
- # Parse the config line into CONFIG_ITEM
- if ! parse_line $LINE; then
- echo >&2 $"`basename $0`: parse_line() error: Occurred"\
- "on line ${line_num} in ${CSV_FILE}: $LINE"
- return 1
- fi
-
- HOST_NAME[idx]=${CONFIG_ITEM[0]}
- MODULE_OPTS[idx]=${CONFIG_ITEM[1]}
- DEVICE_NAME[idx]=${CONFIG_ITEM[2]}
- MOUNT_POINT[idx]=${CONFIG_ITEM[3]}
- DEVICE_TYPE[idx]=${CONFIG_ITEM[4]}
- FS_NAME[idx]=${CONFIG_ITEM[5]}
- MGS_NIDS[idx]=${CONFIG_ITEM[6]}
- INDEX[idx]=${CONFIG_ITEM[7]}
- FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]}
- MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]}
- MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]}
- FAILOVERS[idx]=${CONFIG_ITEM[11]}
-
- # Check some required items for formatting target
- if ! check_item $idx; then
- echo >&2 $"`basename $0`: check_item() error:"\
- "Occurred on line ${line_num} in ${CSV_FILE}."
- return 1
- fi
-
- idx=${idx}+1
- done < ${CSV_FILE}
-
- return 0
-}
-
-# check_lnet_connect hostname_index mgs_hostname
-# Check whether the target node can contact the MGS node @mgs_hostname
-# If @mgs_hostname is null, then it means the primary MGS node
-check_lnet_connect() {
- declare -i i=$1
- declare -i idx=0
- local mgs_node=$2
-
- local COMMAND RET_STR
- local mgs_prim_nids all_nids all_nids_str
- local nids
- local nids_str=
- local mgs_nids mgs_nid
- local ping_mgs
-
- # Execute remote command to check that
- # this node can contact the MGS node
- verbose_output "Checking lnet connectivity between" \
- "${HOST_NAME[i]} and the MGS node ${mgs_node}"
- all_nids=${MGS_NIDS[i]}
- mgs_prim_nids=`echo ${all_nids} | awk -F: '{print $1}'`
- all_nids_str=`echo ${all_nids} | awk '{split($all_nids, a, ":")}\
- END {for (idx in a) print a[idx]}'`
-
- if [ -z "${mgs_node}" ]; then
- nids_str=${mgs_prim_nids} # nids of primary MGS node
- else
- for nids in ${all_nids_str}; do
- # FIXME: Suppose the MGS nids contain the node name
- [ "${nids}" != "${nids#*$mgs_node*}" ] && nids_str=${nids}
- done
- fi
-
- if [ -z "${nids_str}" ]; then
- echo >&2 $"`basename $0`: check_lnet_connect() error:"\
- "Check the mgs nids item of host ${HOST_NAME[i]}!"\
- "Missing nids of the MGS node ${mgs_node}!"
- return 1
- fi
-
- idx=0
- mgs_nids=`echo ${nids_str} | awk '{split($nids_str, a, ",")}\
- END {for (idx in a) print a[idx]}'`
-
- ping_mgs=false
- for mgs_nid in ${mgs_nids}
- do
- COMMAND=$"${LCTL} ping ${mgs_nid} 5 || echo failed 2>&1"
- RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1`
- if [ $? -eq 0 -a "${RET_STR}" = "${RET_STR#*failed*}" ]
- then
- # This node can contact the MGS node
- verbose_output "${HOST_NAME[i]} can contact the MGS" \
- "node ${mgs_node} by using nid \"${mgs_nid}\"!"
- ping_mgs=true
- break
- fi
- done
-
- if ! ${ping_mgs}; then
- echo >&2 "`basename $0`: check_lnet_connect() error:" \
- "${HOST_NAME[i]} cannot contact the MGS node"\
- "${mgs_node} through lnet networks!"\
- "Check ${LCTL} command!"
- return 1
- fi
-
- return 0
-}
-
-# Start lnet network in the cluster node and check that
-# this node can contact the MGS node
-check_lnet() {
- if ! ${VERIFY_CONNECT}; then
- return 0
- fi
-
- # Check argument
- if [ $# -eq 0 ]; then
- echo >&2 $"`basename $0`: check_lnet() error: Missing"\
- "argument for function check_lnet()!"
- return 1
- fi
-
- declare -i i=$1
- declare -i j
- local COMMAND RET_STR
-
- # Execute remote command to start lnet network
- verbose_output "Starting lnet network in ${HOST_NAME[i]}"
- COMMAND=$"modprobe lnet; ${LCTL} network up 2>&1"
- RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1`
- if [ $? -ne 0 -o "${RET_STR}" = "${RET_STR#*LNET configured*}" ]
- then
- echo >&2 "`basename $0`: check_lnet() error: remote" \
- "${HOST_NAME[i]} error: ${RET_STR}"
- return 1
- fi
-
- if is_mgs_node ${HOST_NAME[i]}; then
- return 0
- fi
-
- # Execute remote command to check that
- # this node can contact the MGS node
- for ((j = 0; j < ${MGS_NUM}; j++)); do
- if ! check_lnet_connect $i ${MGS_NODENAME[j]}; then
- return 1
- fi
- done
-
- return 0
-}
-
-# Start lnet network in the MGS node
-start_mgs_lnet() {
- declare -i i
- declare -i idx
- local COMMAND
-
- if [ -z "${MGS_NODENAME[0]}" -a -z "${MGS_NODENAME[1]}" ]; then
- verbose_output "There is no MGS target in the ${CSV_FILE} file."
- return 0
- fi
-
- for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
- # Execute remote command to add lnet options lines to
- # the MGS node's modprobe.conf/modules.conf
- idx=${MGS_IDX[i]}
- COMMAND=$"echo \"${MODULE_OPTS[${idx}]}\"|${MODULE_CONFIG}"
- verbose_output "Adding lnet module options to ${MGS_NODENAME[i]}"
- ${REMOTE} ${MGS_NODENAME[i]} "${COMMAND}" >&2
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: start_mgs_lnet() error:"\
- "Failed to execute remote command to" \
- "add module options to ${MGS_NODENAME[i]}!"\
- "Check ${MODULE_CONFIG}!"
- return 1
- fi
-
- # Start lnet network in the MGS node
- if ! check_lnet ${idx}; then
- return 1
- fi
- done
-
- return 0
-}
-
-# Execute remote command to add lnet options lines to remote nodes'
-# modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets
-mass_config() {
- local COMMAND
- declare -a REMOTE_PID
- declare -a REMOTE_CMD
- declare -i pid_num=0
- declare -i i=0
-
- # Start lnet network in the MGS node
- if ! start_mgs_lnet; then
- return 1
- fi
-
- for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
- # Construct the command line of mkfs.lustre
- if ! construct_mkfs_cmdline $i; then
- return 1
- fi
-
- # create the mount point on the node
- COMMAND="mkdir -p ${MOUNT_POINT[i]}"
- verbose_output "Creating the mount point ${MOUNT_POINT[i]} on" \
- "${HOST_NAME[i]}"
- ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: mass_config() error:"\
- "Failed to execute remote command to"\
- "create the mountpoint on ${HOST_NAME[i]}!"
- return 1
- fi
-
- if ! is_mgs_node ${HOST_NAME[i]}; then
- # Execute remote command to add lnet options lines to
- # modprobe.conf/modules.conf
- COMMAND=$"echo \"${MODULE_OPTS[i]}\"|${MODULE_CONFIG}"
- verbose_output "Adding lnet module options to" \
- "${HOST_NAME[i]}"
- ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: mass_config() error:"\
- "Failed to execute remote command to"\
- "add module options to ${HOST_NAME[i]}!"
- return 1
- fi
-
- # Check lnet networks
- if ! check_lnet $i; then
- return 1
- fi
- fi
-
- # Execute remote command to format Lustre target
- verbose_output "Formatting Lustre target ${DEVICE_NAME[i]}"\
- "on ${HOST_NAME[i]}..."
- verbose_output "Format command line is: ${MKFS_CMD}"
- REMOTE_CMD[${pid_num}]="${REMOTE} ${HOST_NAME[i]} ${MKFS_CMD}"
- ${REMOTE} ${HOST_NAME[i]} "(${EXPORT_PATH} ${MKFS_CMD})" >&2 &
- REMOTE_PID[${pid_num}]=$!
- pid_num=${pid_num}+1
- sleep 1
- done
-
- # Wait for the exit status of the background remote command
- verbose_output "Waiting for the return of the remote command..."
- fail_exit_status=false
- for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
- wait ${REMOTE_PID[${pid_num}]}
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: mass_config() error: Failed"\
- "to execute \"${REMOTE_CMD[${pid_num}]}\"!"
- fail_exit_status=true
- fi
- done
-
- if ${fail_exit_status}; then
- return 1
- fi
-
- verbose_output "All the Lustre targets are formatted successfully!"
- return 0
-}
-
-# get_mntopts hostname device_name failovers
-# Construct the mount options of Lustre target @device_name in host @hostname
-get_mntopts() {
- local host_name=$1
- local device_name=$2
- local failovers=$3
- local mnt_opts=
- local ret_str
-
- [ -n "${failovers}" ] && mnt_opts=defaults,noauto || mnt_opts=defaults
-
- # Execute remote command to check whether the device
- # is a block device or not
- ret_str=`${REMOTE} ${host_name} \
- "[ -b ${device_name} ] && echo block || echo loop" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_mntopts() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if [ -z "${ret_str}" ]; then
- echo "`basename $0`: get_mntopts() error: remote error:" \
- "No results from remote!" \
- "Check network connectivity between the local host"\
- "and ${host_name}!"
- return 1
- fi
-
- [ "${ret_str}" != "${ret_str#*loop}" ] && mnt_opts=${mnt_opts},loop
-
- echo ${mnt_opts}
- return 0
-}
-
-# Execute remote command to modify /etc/fstab to add the new Lustre targets
-modify_fstab() {
- declare -i i
- local mntent mntopts device_name
- local COMMAND
-
- if ! ${MODIFY_FSTAB}; then
- return 0
- fi
-
- for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
- verbose_output "Modify /etc/fstab of host ${HOST_NAME[i]}"\
- "to add Lustre target ${DEVICE_NAME[i]}"
- mntent=${DEVICE_NAME[i]}"\t\t"${MOUNT_POINT[i]}"\t\t"${FS_TYPE}
-
- # Get mount options
- if [ -n "${MOUNT_OPTIONS[i]}" ]; then
- # The mount options already specified in the csv file.
- mntopts=${MOUNT_OPTIONS[i]}
- else
- mntopts=$(get_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]}\
- ${FAILOVERS[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${mntopts}"
- return 1
- fi
- fi
-
- mntent=${mntent}"\t"${mntopts}"\t"0" "0
-
- # Execute remote command to modify /etc/fstab
- device_name=${DEVICE_NAME[i]//\//\\/}
- COMMAND="(sed -i \"/^${device_name}\t/d\" /etc/fstab; \
- echo -e \"${mntent}\" >> /etc/fstab)"
- ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
- if [ $? -ne 0 ]; then
- echo >&2 "`basename $0`: modify_fstab() error:"\
- "Failed to execute remote command to"\
- "modify /etc/fstab of host ${HOST_NAME[i]}"\
- "to add Lustre target ${DEVICE_NAME[i]}!"
- return 1
- fi
- done
-
- return 0
-}
-
-# Main flow
-# Check the csv file
-if ! check_file $1; then
- exit 1
-fi
-
-if ${VERIFY_CONNECT}; then
-# Check the network connectivity and hostnames
- echo "`basename $0`: Checking the cluster network connectivity"\
- "and hostnames..."
- if ! ${VERIFY_CLUSTER_NET} ${VERBOSE_OPT} ${CSV_FILE}; then
- exit 1
- fi
- echo "`basename $0`: Check the cluster network connectivity"\
- "and hostnames OK!"
- echo
-fi
-
-# Configure the Lustre cluster
-echo "`basename $0`: ******** Lustre cluster configuration START ********"
-if ! get_items ${CSV_FILE}; then
- exit 1
-fi
-
-if ! check_mgs; then
- exit 1
-fi
-
-if ! mass_config; then
- exit 1
-fi
-
-if ! modify_fstab; then
- exit 1
-fi
-
-# Produce HA software's configuration files
-if ! config_ha; then
- rm -rf ${TMP_DIRS}
- exit 1
-fi
-
-echo "`basename $0`: ******** Lustre cluster configuration END **********"
-
-exit 0
+++ /dev/null
-#!/bin/bash
-#
-# lustre_createcsv.sh - generate a csv file from a running lustre cluster
-#
-# This script is used to collect lustre target informations and HA software
-# configurations in a lustre cluster to generate a csv file. In reverse, the
-# csv file could be parsed by lustre_config.sh to configure multiple lustre
-# servers in parallel.
-#
-# This script should be run on the MGS node.
-#
-################################################################################
-
-# Usage
-usage() {
- cat >&2 <<EOF
-
-Usage: `basename $0` [-t HAtype] [-h] [-v] [-f csv_filename]
-
- This script is used to collect lustre target informations and HA software
- configurations from a running lustre cluster to generate a csv file. It
- should be run on the MGS node.
-
- -t HAtype collect High-Availability software configurations
- The argument following -t is used to indicate the High-
- Availability software type. The HA software types which
- are currently supported are: hbv1 (Heartbeat version 1)
- and hbv2 (Heartbeat version 2).
- -h help
- -v verbose mode
- -f csv_filename designate a name for the csv file
- Default is lustre_config.csv.
-
-EOF
- exit 1
-}
-
-#**************************** Global variables ****************************#
-# csv file
-CSV_FILE=${CSV_FILE:-"lustre_config.csv"}
-
-# Remote command
-REMOTE=${REMOTE:-"ssh -x -q"}
-#REMOTE=${REMOTE:-"pdsh -S -R ssh -w"}
-
-# Command path
-CMD_PATH=${CMD_PATH:-"/usr/sbin"}
-TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"}
-
-# Lustre proc files
-LUSTRE_PROC=${LUSTRE_PROC:-"/proc/fs/lustre"}
-LUSTRE_PROC_DEVICES=${LUSTRE_PROC}/devices
-
-LNET_PROC=${LNET_PROC:-"/proc/sys/lnet"}
-LNET_PROC_PEERS=${LNET_PROC}/peers
-
-# Default network module options
-DEFAULT_MOD_OPTS=${DEFAULT_MOD_OPTS:-"options lnet networks=tcp"}
-
-# Variables of HA software
-HATYPE_HBV1="hbv1" # Heartbeat version 1
-HATYPE_HBV2="hbv2" # Heartbeat version 2
-HATYPE_CLUMGR="cluman" # Cluster Manager
-
-HA_DIR=${HA_DIR:-"/etc/ha.d"} # Heartbeat configuration directory
-CIB_DIR=${CIB_DIR:-"/var/lib/heartbeat/crm"} # cib.xml directory
-HA_CF=${HA_DIR}/ha.cf # ha.cf file
-HA_RES=${HA_DIR}/haresources # haresources file
-HA_CIB=${CIB_DIR}/cib.xml
-
-CLUMAN_TOOLS_PATH=${CLUMAN_TOOLS_PATH:-"/usr/sbin"} # CluManager tools
-CONFIG_CMD=${CONFIG_CMD:-"${CLUMAN_TOOLS_PATH}/redhat-config-cluster-cmd"}
-
-CLUMAN_DIR=${CLUMAN_DIR:-"/etc"} # CluManager configuration directory
-CLUMAN_CONFIG=${CLUMAN_DIR}/cluster.xml
-
-# Lustre target obd device types
-MGS_TYPE=${MGS_TYPE:-"mgs"}
-MDT_TYPE=${MDT_TYPE:-"mds"}
-OST_TYPE=${OST_TYPE:-"obdfilter"}
-
-# The obd name of MGS target server
-MGS_SVNAME=${MGS_SVNAME:-"MGS"}
-
-# Hostnames of the lustre cluster nodes
-declare -a HOST_NAMES
-MGS_HOSTNAME=${MGS_HOSTNAME:-"`hostname`"} # Hostname of the MGS node
-
-# Configs of lustre targets in one cluster node
-declare -a TARGET_CONFIGS
-declare -a TARGET_SVNAMES TARGET_DEVNAMES TARGET_DEVSIZES TARGET_MNTPNTS
-declare -a TARGET_DEVTYPES TARGET_FSNAMES TARGET_MGSNIDS TARGET_INDEXES
-declare -a TARGET_FMTOPTS TARGET_MKFSOPTS TARGET_MNTOPTS TARGET_FAILNIDS
-declare -a HA_CONFIGS
-declare -a ALL_TARGET_SVNAMES # All the target services in the cluster
-declare -a FAILOVER_FMTOPTS # "--noformat"
-
-# Lustre target service types
-let "LDD_F_SV_TYPE_MDT = 0x0001"
-let "LDD_F_SV_TYPE_OST = 0x0002"
-let "LDD_F_SV_TYPE_MGS = 0x0004"
-
-# Permanent mount options for ext3 or ldiskfs
-ALWAYS_MNTOPTS=${ALWAYS_MNTOPTS:-"errors=remount-ro"}
-MDT_MGS_ALWAYS_MNTOPTS=${MDT_MGS_ALWAYS_MNTOPTS:-",iopen_nopriv,user_xattr"}
-OST_ALWAYS_MNTOPTS=${OST_ALWAYS_MNTOPTS:-",asyncdel"}
-OST_DEFAULT_MNTOPTS=${OST_DEFAULT_MNTOPTS:-",extents,mballoc"}
-
-# User-settable parameter keys
-PARAM_MGSNODE=${PARAM_MGSNODE:-"mgsnode="}
-PARAM_FAILNODE=${PARAM_FAILNODE:-"failover.node="}
-
-# Block size
-L_BLOCK_SIZE=4096
-
-# Option string of mkfs.lustre
-OPTSTR_STRIPE_COUNT=${OPTSTR_STRIPE_COUNT:-"--stripe-count-hint="}
-
-
-# Get and check the positional parameters
-VERBOSE_OUTPUT=false
-while getopts "t:hvf:" OPTION; do
- case $OPTION in
- t)
- HATYPE_OPT=$OPTARG
- if [ "${HATYPE_OPT}" != "${HATYPE_HBV1}" ] \
- && [ "${HATYPE_OPT}" != "${HATYPE_HBV2}" ] \
- && [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
- echo >&2 "`basename $0`: Invalid HA software type" \
- "- ${HATYPE_OPT}!"
- usage
- fi
- ;;
- h) usage;;
- v) VERBOSE_OUTPUT=true;;
- f) CSV_FILE=$OPTARG;;
- ?) usage
- esac
-done
-
-# Output verbose informations
-verbose_output() {
- if ${VERBOSE_OUTPUT}; then
- echo "`basename $0`: $*"
- fi
- return 0
-}
-
-# Verify the local host is the MGS node
-mgs_node() {
- if [ ! -e ${LUSTRE_PROC_DEVICES} ]; then
- echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} does" \
- "not exist. Lustre kernel modules may not be loaded!"
- return 1
- fi
-
- if [ -z "`cat ${LUSTRE_PROC_DEVICES}`" ]; then
- echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} is" \
- "empty. Lustre services may not be started!"
- return 1
- fi
-
- if [ -z "`grep ${MGS_TYPE} ${LUSTRE_PROC_DEVICES}`" ]; then
- echo >&2 "`basename $0`: error: This node is not a MGS node." \
- "The script should be run on the MGS node!"
- return 1
- fi
-
- return 0
-}
-
-# Check whether the reomte command is pdsh
-is_pdsh() {
- if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then
- return 1
- fi
-
- return 0
-}
-
-# remote_error fn_name host_addr ret_str
-# Verify the return result from remote command
-remote_error() {
- local fn_name host_addr ret_str
-
- fn_name=$1
- shift
- host_addr=$1
- shift
- ret_str=$*
-
- if [ "${ret_str}" != "${ret_str#*connect:*}" ]; then
- echo "`basename $0`: ${fn_name}() error: remote error:" \
- "${ret_str}"
- return 0
- fi
-
- if [ -z "${ret_str}" ]; then
- echo "`basename $0`: ${fn_name}() error: remote error:" \
- "No results from remote!" \
- "Check network connectivity between the local host"\
- "and ${host_addr}!"
- return 0
- fi
-
- return 1
-}
-
-# nid2hostname nid
-# Convert @nid to hostname of the lustre cluster node
-nid2hostname() {
- local nid=$1
- local host_name=
- local addr nettype ip_addr
- local ret_str
-
- addr=${nid%@*}
- nettype=${nid#*@}
- if [ -z "${addr}" ]; then
- echo "`basename $0`: nid2hostname() error:" \
- "Invalid nid - \"${nid}\"!"
- return 1
- fi
-
- case "${nettype}" in
- lo*) host_name=`hostname`;;
- elan*) # QsNet
- # FIXME: Parse the /etc/elanhosts configuration file to
- # convert ElanID to hostname
- ;;
- gm*) # Myrinet
- # FIXME: Use /usr/sbin/gmlndnid to find the hostname of
- # the specified GM Global node ID
- ;;
- ptl*) # Portals
- # FIXME: Convert portal ID to hostname
- ;;
- *) # tcp, o2ib, cib, openib, iib, vib, ra
- ip_addr=${addr}
-
- # Execute remote command to get the host name
- ret_str=`${REMOTE} ${ip_addr} "hostname" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: nid2hostname() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
- remote_error "nid2hostname" ${ip_addr} "${ret_str}" && return 1
-
- if is_pdsh; then
- host_name=`echo ${ret_str} | awk '{print $2}'`
- else
- host_name=`echo ${ret_str} | awk '{print $1}'`
- fi
- ;;
- esac
-
- echo ${host_name}
- return 0
-}
-
-# get_hostnames
-# Get lustre cluster node names
-get_hostnames() {
- declare -a HOST_NIDS
- declare -i idx # Index of HOST_NIDS array
- declare -i i # Index of HOST_NAMES array
-
- if ! mgs_node; then
- return 1
- fi
-
- if [ ! -e ${LNET_PROC_PEERS} ]; then
- echo >&2 "`basename $0`: error: ${LNET_PROC_PEERS} does not" \
- "exist. LNET kernel modules may not be loaded" \
- "or LNET network may not be up!"
- return 1
- fi
-
- HOST_NAMES[0]=${MGS_HOSTNAME} # MGS node
- HOST_NIDS[0]=${HOST_NAMES[0]}
-
- # Get the nids of the nodes which have contacted MGS
- idx=1
- for nid in `cat ${LNET_PROC_PEERS} | awk '{print $1}'`; do
- if [ "${nid}" = "nid" ]; then
- continue
- fi
-
- HOST_NIDS[idx]=${nid}
- let "idx += 1"
- done
-
- if [ ${idx} -eq 1 ]; then
- verbose_output "Only one node running in the lustre cluster." \
- "It's ${HOST_NAMES[0]}."
- return 0
- fi
-
- # Get the hostnames of the nodes
- for ((idx = 1, i = 1; idx < ${#HOST_NIDS[@]}; idx++, i++)); do
- if [ -z "${HOST_NIDS[idx]}" ]; then
- echo >&2 "`basename $0`: get_hostnames() error:" \
- "Invalid nid - \"${HOST_NIDS[idx]}\"!"
- return 1
- fi
-
- HOST_NAMES[i]=$(nid2hostname ${HOST_NIDS[idx]})
- if [ $? -ne 0 ]; then
- echo >&2 "${HOST_NAMES[i]}"
- return 1
- fi
-
- if [ "${HOST_NAMES[i]}" = "${HOST_NAMES[0]}" ]; then
- let "i -= 1"
- fi
- done
-
- return 0
-}
-
-#*************************** Network module options ***************************#
-# last_is_backslash line
-# Check whether the last effective letter of @line is a backslash
-last_is_backslash() {
- local line="$*"
- declare -i i
- declare -i length
- local letter last_letter
-
- length=${#line}
- for ((i = ${length}-1; i >= 0; i--)); do
- letter=${line:${i}:1}
- [ "x${letter}" != "x " -a "x${letter}" != "x " -a -n "${letter}" ]\
- && last_letter=${letter} && break
- done
-
- [ "x${last_letter}" = "x\\" ] && return 0
-
- return 1
-}
-
-# get_module_opts hostname
-# Get the network module options from the node @hostname
-get_module_opts() {
- local host_name=$1
- local ret_str
- local MODULE_CONF KERNEL_VER
- local ret_line line find_options
- local continue_flag
-
- MODULE_OPTS=${DEFAULT_MOD_OPTS}
-
- # Execute remote command to get the kernel version
- ret_str=`${REMOTE} ${host_name} "uname -r" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo >&2 "`basename $0`: get_module_opts() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
- remote_error "get_module_opts" ${host_name} "${ret_str}" && return 1
-
- if is_pdsh; then
- KERNEL_VER=`echo ${ret_str} | awk '{print $2}'`
- else
- KERNEL_VER=`echo ${ret_str} | awk '{print $1}'`
- fi
-
- # Get the module configuration file name
- if [ "${KERNEL_VER:0:3}" = "2.4" ]; then
- MODULE_CONF=/etc/modules.conf
- else
- MODULE_CONF=/etc/modprobe.conf
- fi
-
- # Execute remote command to get the lustre network module options
- continue_flag=false
- find_options=false
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- # Get rid of the comment line
- [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
-
- if [ "${line}" != "${line#*options lnet*}" ]; then
- if ! ${find_options}; then
- find_options=true
- MODULE_OPTS=${line}
- else
- MODULE_OPTS=${MODULE_OPTS}$" \n "${line}
- fi
-
- last_is_backslash "${line}" && continue_flag=true \
- || continue_flag=false
- continue
- fi
-
- if ${continue_flag}; then
- MODULE_OPTS=${MODULE_OPTS}$" \n "${line}
- ! last_is_backslash "${line}" && continue_flag=false
-
- fi
- done < <(${REMOTE} ${host_name} "cat ${MODULE_CONF}")
-
- if [ -z "${MODULE_OPTS}" ]; then
- MODULE_OPTS=${DEFAULT_MOD_OPTS}
- fi
-
- return 0
-}
-
-#************************ HA software configurations ************************#
-# is_ha_target hostname target_devname
-# Check whether the target @target_devname was made to be high-available
-is_ha_target() {
- local host_name=$1
- local target_svname=$2
- local res_file
- local ret_str
-
- case "${HATYPE_OPT}" in
- "${HATYPE_HBV1}") res_file=${HA_RES};;
- "${HATYPE_HBV2}") res_file=${HA_CIB};;
- "${HATYPE_CLUMGR}") res_file=${CLUMAN_CONFIG};;
- esac
-
- # Execute remote command to check the resource file
- ret_str=`${REMOTE} ${host_name} \
- "grep ${target_svname} ${res_file}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo >&2 "`basename $0`: is_ha_target() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- [ "${ret_str}" = "${ret_str#*${target_svname}*}" ] && return 1
-
- return 0
-}
-
-# get_hb_configs hostname
-# Get the Heartbeat configurations from the node @hostname
-get_hb_configs() {
- local host_name=$1
- local ret_line line
- declare -i i
-
- unset HA_CONFIGS
- HB_CHANNELS=
- SRV_IPADDRS=
- HB_OPTIONS=
-
- # Execute remote command to get the configs of Heartbeat channels, etc
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- # Get rid of the comment line
- [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
-
- if [ "${line}" != "${line#*serial*}" ] \
- || [ "${line}" != "${line#*cast*}" ]; then
- if [ -z "${HB_CHANNELS}" ]; then
- HB_CHANNELS=${line}
- else
- HB_CHANNELS=${HB_CHANNELS}:${line}
- fi
- fi
-
- if [ "${line}" != "${line#*stonith*}" ] \
- || [ "${line}" != "${line#*ping*}" ] \
- || [ "${line}" != "${line#*respawn*}" ] \
- || [ "${line}" != "${line#*apiauth*}" ] \
- || [ "${line}" != "${line#*compression*}" ]; then
- if [ -z "${HB_OPTIONS}" ]; then
- HB_OPTIONS=${line}
- else
- HB_OPTIONS=${HB_OPTIONS}:${line}
- fi
- fi
- done < <(${REMOTE} ${host_name} "cat ${HA_CF}")
-
- if [ -z "${HB_CHANNELS}" ]; then
- echo >&2 "`basename $0`: get_hb_configs() error:" \
- "There are no heartbeat channel configs in ${HA_CF}" \
- "of host ${host_name} or ${HA_CF} does not exist!"
- return 0
- fi
-
- # Execute remote command to get Heartbeat service address
- if [ "${HATYPE_OPT}" = "${HATYPE_HBV1}" ]; then
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- # Get rid of the empty line
- [ -z "`echo ${line}|awk '/[[:alnum:]]/ {print $0}'`" ]\
- && continue
-
- # Get rid of the comment line
- [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
-
- SRV_IPADDRS=`echo ${line} | awk '{print $2}'`
- [ -n "${SRV_IPADDRS}" ] \
- && [ "`echo ${line} | awk '{print $1}'`" = "${host_name}" ] && break
- done < <(${REMOTE} ${host_name} "cat ${HA_RES}")
-
- if [ -z "${SRV_IPADDRS}" ]; then
- echo >&2 "`basename $0`: get_hb_configs() error: There"\
- "are no service address in ${HA_RES} of host"\
- "${host_name} or ${HA_RES} does not exist!"
- return 0
- fi
- fi
-
- # Construct HA configuration items
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- # Execute remote command to check whether this target service
- # was made to be high-available
- if is_ha_target ${host_name} ${TARGET_DEVNAMES[i]}; then
- HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS}
- fi
- done
-
- return 0
-}
-
-# get_cluman_channel hostname
-# Get the Heartbeat channel of CluManager from the node @hostname
-get_cluman_channel() {
- local host_name=$1
- local ret_line line
- local cluman_channel=
- local mcast_ipaddr
-
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- if [ "${line}" != "${line#*broadcast*}" ] \
- && [ "`echo ${line}|awk '{print $3}'`" = "yes" ]; then
- cluman_channel="broadcast"
- break
- fi
-
- if [ "${line}" != "${line#*multicast_ipaddress*}" ]; then
- mcast_ipaddr=`echo ${line}|awk '{print $3}'`
- if [ "${mcast_ipaddr}" != "225.0.0.11" ]; then
- cluman_channel="multicast ${mcast_ipaddr}"
- break
- fi
- fi
- done < <(${REMOTE} ${host_name} "${CONFIG_CMD} --clumembd")
-
- echo ${cluman_channel}
- return 0
-}
-
-# get_cluman_srvaddr hostname target_svname
-# Get the service IP addresses of @target_svname from the node @hostname
-get_cluman_srvaddr() {
- local host_name=$1
- local target_svname=$2
- local ret_line line
- local srvaddr cluman_srvaddr=
-
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- if [ "${line}" != "${line#*ipaddress = *}" ]; then
- srvaddr=`echo ${line}|awk '{print $3}'`
- if [ -z "${cluman_srvaddr}" ]; then
- cluman_srvaddr=${srvaddr}
- else
- cluman_srvaddr=${cluman_srvaddr}:${srvaddr}
- fi
- fi
- done < <(${REMOTE} ${host_name} "${CONFIG_CMD} \
- --service=${target_svname} --service_ipaddresses")
-
- if [ -z "${cluman_srvaddr}" ]; then
- echo "`basename $0`: get_cluman_srvaddr() error: Cannot" \
- "get the service IP addresses of ${target_svname} in" \
- "${host_name}! Check ${CONFIG_CMD} command!"
- return 1
- fi
-
- echo ${cluman_srvaddr}
- return 0
-}
-
-# get_cluman_configs hostname
-# Get the CluManager configurations from the node @hostname
-get_cluman_configs() {
- local host_name=$1
- local ret_str
- declare -i i
-
- unset HA_CONFIGS
-
- # Execute remote command to get the configs of CluManager
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- HB_CHANNELS=
- SRV_IPADDRS=
- HB_OPTIONS=
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- # Execute remote command to check whether this target service
- # was made to be high-available
- ! is_ha_target ${host_name} ${TARGET_DEVNAMES[i]} && continue
-
- # Execute remote command to get Heartbeat channel
- HB_CHANNELS=$(get_cluman_channel ${host_name})
- if [ $? -ne 0 ]; then
- echo >&2 "${HB_CHANNELS}"
- fi
-
- # Execute remote command to get service IP address
- SRV_IPADDRS=$(get_cluman_srvaddr ${host_name} \
- ${TARGET_SVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${SRV_IPADDRS}"
- return 0
- fi
-
- HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS}
- done
-
- return 0
-}
-
-# get_ha_configs hostname
-# Get the HA software configurations from the node @hostname
-get_ha_configs() {
- local host_name=$1
-
- unset HA_CONFIGS
-
- if [ -z "${HATYPE_OPT}" ]; then
- return 0
- fi
-
- verbose_output "Collecting HA software configurations from host $1..."
-
- case "${HATYPE_OPT}" in
- "${HATYPE_HBV1}" | "${HATYPE_HBV2}") # Heartbeat
- if ! get_hb_configs ${host_name}; then
- return 1
- fi
- ;;
- "${HATYPE_CLUMGR}") # CluManager
- if ! get_cluman_configs ${host_name}; then
- return 1
- fi
- ;;
- esac
-
- return 0
-}
-
-#*********************** Lustre targets configurations ***********************#
-
-# is_failover_service target_svname
-# Check whether a target service @target_svname is a failover service.
-is_failover_service() {
- local target_svname=$1
- declare -i i
-
- for ((i = 0; i < ${#ALL_TARGET_SVNAMES[@]}; i++)); do
- [ "${target_svname}" = "${ALL_TARGET_SVNAMES[i]}" ] && return 0
- done
-
- return 1
-}
-
-# get_svnames hostname
-# Get the lustre target server obd names from the node @hostname
-get_svnames(){
- declare -i i
- declare -i j
- local host_name=$1
- local ret_line line
-
- # Initialize the TARGET_SVNAMES array
- unset TARGET_SVNAMES
- unset FAILOVER_FMTOPTS
-
- # Execute remote command to the node @hostname and figure out what
- # lustre services are running.
- i=0
- j=${#ALL_TARGET_SVNAMES[@]}
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- if [ -z "`echo ${line} | grep ${MGS_TYPE}`" ] \
- && [ -z "`echo ${line} | grep ${MDT_TYPE}`" ] \
- && [ -z "`echo ${line} | grep ${OST_TYPE}`" ]; then
- continue
- fi
-
- # Get target server name
- TARGET_SVNAMES[i]=`echo ${line} | awk '{print $4}'`
- if [ -n "${TARGET_SVNAMES[i]}" ]; then
- if is_failover_service ${TARGET_SVNAMES[i]}; then
- FAILOVER_FMTOPTS[i]="--noformat"
- fi
- ALL_TARGET_SVNAMES[j]=${TARGET_SVNAMES[i]}
- let "i += 1"
- let "j += 1"
- else
- echo >&2 "`basename $0`: get_svnames() error: Invalid"\
- "line in ${host_name}'s ${LUSTRE_PROC_DEVICES}"\
- "- \"${line}\"!"
- return 1
- fi
- done < <(${REMOTE} ${host_name} "cat ${LUSTRE_PROC_DEVICES}")
-
- if [ $i -eq 0 ]; then
- verbose_output "There are no lustre services running" \
- "on the node ${host_name}!"
- fi
-
- return 0
-}
-
-# is_loopdev devname
-# Check whether a device @devname is a loop device or not
-is_loopdev() {
- local devname=$1
-
- if [ -z "${devname}" ] || \
- [ -z "`echo ${devname}|awk '/\/dev\/loop[[:digit:]]/ {print $0}'`" ]
- then
- return 1
- fi
-
- return 0
-}
-
-# get_devname hostname svname
-# Get the device name of lustre target @svname from node @hostname
-get_devname() {
- local host_name=$1
- local target_svname=$2
- local target_devname=
- local ret_str
- local target_type target_obdtype mntdev_file
-
- if [ "${target_svname}" = "${MGS_SVNAME}" ]; then
- # Execute remote command to get the device name of mgs target
- ret_str=`${REMOTE} ${host_name} \
- "/sbin/findfs LABEL=${target_svname}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]
- then
- echo "`basename $0`: get_devname() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
- fi
-
- if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]; then
- if is_pdsh; then
- target_devname=`echo ${ret_str} | awk '{print $2}'`
- else
- target_devname=`echo ${ret_str} | awk '{print $1}'`
- fi
- fi
- else # Execute remote command to get the device name of mdt/ost target
- target_type=`echo ${target_svname} | cut -d - -f 2`
- target_obdtype=${target_type:0:3}_TYPE
-
- mntdev_file=${LUSTRE_PROC}/${!target_obdtype}/${target_svname}/mntdev
-
- ret_str=`${REMOTE} ${host_name} "cat ${mntdev_file}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_devname() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if [ "${ret_str}" != "${ret_str#*No such file*}" ]; then
- echo "`basename $0`: get_devname() error:"\
- "${mntdev_file} does not exist in ${host_name}!"
- return 1
- else
- if is_pdsh; then
- target_devname=`echo ${ret_str} | awk '{print $2}'`
- else
- target_devname=`echo ${ret_str} | awk '{print $1}'`
- fi
- fi
- fi
-
- echo ${target_devname}
- return 0
-}
-
-# get_devsize hostname target_devname
-# Get the device size (KB) of @target_devname from node @hostname
-get_devsize() {
- local host_name=$1
- local target_devname=$2
- local target_devsize=
- local ret_str
-
- # Execute remote command to get the device size
- ret_str=`${REMOTE} ${host_name} \
- "/sbin/blockdev --getsize ${target_devname}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_devsize() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if is_pdsh; then
- target_devsize=`echo ${ret_str} | awk '{print $2}'`
- else
- target_devsize=`echo ${ret_str} | awk '{print $1}'`
- fi
-
- if [ -z "`echo ${target_devsize}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_devsize() error: can't" \
- "get device size of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- let " target_devsize /= 2"
-
- echo ${target_devsize}
- return 0
-}
-
-# get_realdevname hostname loop_dev
-# Get the real device name of loop device @loop_dev from node @hostname
-get_realdevname() {
- local host_name=$1
- local loop_dev=$2
- local target_devname=
- local ret_str
-
- # Execute remote command to get the real device name
- ret_str=`${REMOTE} ${host_name} \
- "/sbin/losetup ${loop_dev}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_realdevname() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if is_pdsh; then
- target_devname=`echo ${ret_str} | awk '{print $4}' \
- | sed 's/^(//' | sed 's/)$//'`
- else
- target_devname=`echo ${ret_str} | awk '{print $3}' \
- | sed 's/^(//' | sed 's/)$//'`
- fi
-
- if [ "${ret_str}" != "${ret_str#*No such*}" ] \
- || [ -z "${target_devname}" ]; then
- echo "`basename $0`: get_realdevname() error: can't" \
- "get info on device ${loop_dev} in ${host_name}!"
- return 1
- fi
-
- echo ${target_devname}
- return 0
-}
-
-# get_mntpnt hostname target_devname
-# Get the lustre target mount point from the node @hostname
-get_mntpnt(){
- local host_name=$1
- local target_devname=$2
- local mnt_point=
- local ret_str
-
- # Execute remote command to get the mount point
- ret_str=`${REMOTE} ${host_name} \
- "cat /etc/mtab | grep ${target_devname}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_mntpnt() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if is_pdsh; then
- mnt_point=`echo ${ret_str} | awk '{print $3}'`
- else
- mnt_point=`echo ${ret_str} | awk '{print $2}'`
- fi
-
- if [ -z "${mnt_point}" ]; then
- echo "`basename $0`: get_mntpnt() error: can't" \
- "get the mount point of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- echo ${mnt_point}
- return 0
-}
-
-# get_devnames hostname
-# Get the lustre target device names, mount points
-# and loop device sizes from the node @hostname
-get_devnames(){
- declare -i i
- local host_name=$1
- local ret_line line
-
- # Initialize the arrays
- unset TARGET_DEVNAMES
- unset TARGET_DEVSIZES
- unset TARGET_MNTPNTS
-
- for ((i = 0; i < ${#TARGET_SVNAMES[@]}; i++)); do
- TARGET_DEVNAMES[i]=$(get_devname ${host_name} \
- ${TARGET_SVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVNAMES[i]}"
- return 1
- fi
-
- if [ -z "${TARGET_DEVNAMES[i]}" ]; then
- if [ "${TARGET_SVNAMES[i]}" = "${MGS_SVNAME}" ]; then
- verbose_output "There exists combo mgs/mdt"\
- "target in ${host_name}."
- continue
- else
- echo >&2 "`basename $0`: get_devname() error:"\
- "No device corresponding to target" \
- "${TARGET_SVNAMES[i]} in ${host_name}!"
- return 1
- fi
- fi
-
- # Get the mount point of the target
- TARGET_MNTPNTS[i]=$(get_mntpnt ${host_name} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_MNTPNTS[i]}"
- return 1
- fi
-
- # The target device is a loop device?
- if [ -n "${TARGET_DEVNAMES[i]}" ] \
- && is_loopdev ${TARGET_DEVNAMES[i]}; then
- # Get the device size
- TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVSIZES[i]}"
- return 1
- fi
-
- # Get the real device name
- TARGET_DEVNAMES[i]=$(get_realdevname ${host_name} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVNAMES[i]}"
- return 1
- fi
- fi
- done
-
- return 0
-}
-
-# is_target target_svtype ldd_flags
-# Check the service type of a lustre target
-is_target() {
- case "$1" in
- "mdt") let "ret = $2 & LDD_F_SV_TYPE_MDT";;
- "ost") let "ret = $2 & LDD_F_SV_TYPE_OST";;
- "mgs") let "ret = $2 & LDD_F_SV_TYPE_MGS";;
- "*")
- echo >&2 "`basename $0`: is_target() error: Invalid" \
- "target service type - \"$1\"!"
- return 1
- ;;
- esac
-
- if [ ${ret} -eq 0 ]; then
- return 1
- fi
-
- return 0
-}
-
-# get_devtype ldd_flags
-# Get the service type of a lustre target from @ldd_flags
-get_devtype() {
- local target_devtype=
-
- if [ -z "${flags}" ]; then
- echo "`basename $0`: get_devtype() error: Invalid" \
- "ldd_flags - it's value is null!"
- return 1
- fi
-
- if is_target "mgs" $1; then
- if is_target "mdt" $1; then
- target_devtype="mgs|mdt"
- else
- target_devtype="mgs"
- fi
- elif is_target "mdt" $1; then
- target_devtype="mdt"
- elif is_target "ost" $1; then
- target_devtype="ost"
- else
- echo "`basename $0`: get_devtype() error: Invalid" \
- "ldd_flags - \"$1\"!"
- return 1
- fi
-
- echo ${target_devtype}
- return 0
-}
-
-# get_mntopts ldd_mount_opts
-# Get the user-specified lustre target mount options from @ldd_mount_opts
-get_mntopts() {
- local mount_opts=
- local ldd_mount_opts=$1
-
- mount_opts="${ldd_mount_opts#${ALWAYS_MNTOPTS}}"
- mount_opts="${mount_opts#${MDT_MGS_ALWAYS_MNTOPTS}}"
- mount_opts="${mount_opts#${OST_ALWAYS_MNTOPTS}}"
- mount_opts="${mount_opts#${OST_DEFAULT_MNTOPTS}}"
- mount_opts="`echo \"${mount_opts}\" | sed 's/^,//'`"
-
- [ "${mount_opts}" != "${mount_opts#*,*}" ] && echo "\""${mount_opts}"\"" \
- || echo ${mount_opts}
-
- return 0
-}
-
-# ip2hostname nids
-# Convert IP addresses in @nids into hostnames
-ip2hostname() {
- local orig_nids=$1
- local nids=
- local nid nids_str
- local nettype
-
- nids_str=`echo ${orig_nids}|awk '{split($orig_nids, a, ",")}\
- END {for (i in a) print a[i]}'`
- for nid in ${nids_str}; do
- nettype=${nid#*@}
-
- case "${nettype}" in
- lo* | elan* | gm* | ptl*) ;;
- *)
- nid=$(nid2hostname ${nid})
- if [ $? -ne 0 ]; then
- echo "${nid}"
- return 1
- fi
-
- nid=${nid}@${nettype}
- ;;
- esac
-
- if [ -z "${nids}" ]; then
- nids=${nid}
- else
- nids=${nids},${nid}
- fi
- done
-
- echo ${nids}
- return 0
-}
-
-# get_mgsnids ldd_params
-# Get the mgs nids of lustre target from @ldd_params
-get_mgsnids() {
- local mgs_nids= # mgs nids in one mgs node
- local all_mgs_nids= # mgs nids in all mgs failover nodes
- local param=
- local ldd_params="$*"
-
- for param in ${ldd_params}; do
- if [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ]; then
- mgs_nids=`echo ${param#${PARAM_MGSNODE}}`
- mgs_nids=$(ip2hostname ${mgs_nids})
- if [ $? -ne 0 ]; then
- echo >&2 "${mgs_nids}"
- return 1
- fi
-
- if [ -n "${all_mgs_nids}" ]; then
- all_mgs_nids=${all_mgs_nids}:${mgs_nids}
- else
- all_mgs_nids=${mgs_nids}
- fi
- fi
- done
-
- [ "${all_mgs_nids}" != "${all_mgs_nids#*,*}" ] \
- && echo "\""${all_mgs_nids}"\"" || echo ${all_mgs_nids}
-
- return 0
-}
-
-# get_failnids ldd_params
-# Get the failover nids of lustre target from @ldd_params
-get_failnids() {
- local fail_nids= # failover nids in one failover node
- local all_fail_nids= # failover nids in all failover nodes
- # of this target
- local param=
- local ldd_params="$*"
-
- for param in ${ldd_params}; do
- if [ -n "`echo ${param}|awk '/failover.node=/ {print $0}'`" ]; then
- fail_nids=`echo ${param#${PARAM_FAILNODE}}`
- fail_nids=$(ip2hostname ${fail_nids})
- if [ $? -ne 0 ]; then
- echo >&2 "${fail_nids}"
- return 1
- fi
-
- if [ -n "${all_fail_nids}" ]; then
- all_fail_nids=${all_fail_nids}:${fail_nids}
- else
- all_fail_nids=${fail_nids}
- fi
- fi
- done
-
- [ "${all_fail_nids}" != "${all_fail_nids#*,*}" ] \
- && echo "\""${all_fail_nids}"\"" || echo ${all_fail_nids}
-
- return 0
-}
-
-# get_fmtopts target_devname hostname ldd_params
-# Get other format options of the lustre target @target_devname from @ldd_params
-get_fmtopts() {
- local target_devname=$1
- local host_name=$2
- shift
- shift
- local ldd_params="$*"
- local param=
- local fmt_opts=
-
- for param in ${ldd_params}; do
- [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ] && continue
- [ -n "`echo ${param}|awk '/failover.node=/ {print $0}'`" ] && continue
-
- if [ -n "${param}" ]; then
- if [ -n "${fmt_opts}" ]; then
- fmt_opts=${fmt_opts}" --param=\""${param}"\""
- else
- fmt_opts="--param=\""${param}"\""
- fi
- fi
- done
-
- echo ${fmt_opts}
- return 0
-}
-
-# get_stripecount host_name target_fsname
-# Get the stripe count for @target_fsname
-get_stripecount() {
- local host_name=$1
- local target_fsname=$2
- local stripe_count=
- local stripecount_file
- local ret_str
-
- # Get the stripe count
- stripecount_file=${LUSTRE_PROC}/lov/${target_fsname}-mdtlov/stripecount
- ret_str=`${REMOTE} ${host_name} "cat ${stripecount_file}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_stripecount() error:" \
- "remote command to ${host_name} error: ${ret_str}"
- return 1
- fi
-
- if is_pdsh; then
- stripe_count=`echo ${ret_str} | awk '{print $2}'`
- else
- stripe_count=`echo ${ret_str} | awk '{print $1}'`
- fi
-
- if [ -z "`echo ${stripe_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_stripecount() error: can't" \
- "get stripe count of ${target_fsname} in ${host_name}!"
- return 1
- fi
-
- echo ${stripe_count}
- return 0
-}
-
-# get_stripecount_opt host_name target_fsname
-# Get the stripe count option for lustre mdt target
-get_stripecount_opt() {
- local host_name=$1
- local target_fsname=$2
- local stripe_count=
- local stripecount_opt=
-
- # Get the stripe count
- [ -z "${target_fsname}" ] && target_fsname="lustre"
- stripe_count=$(get_stripecount ${host_name} ${target_fsname})
- if [ $? -ne 0 ]; then
- echo "${stripe_count}"
- return 1
- fi
-
- if [ "${stripe_count}" != "1" ]; then
- stripecount_opt=${OPTSTR_STRIPE_COUNT}${stripe_count}
- fi
-
- echo ${stripecount_opt}
- return 0
-}
-
-# get_ldds hostname
-# Get the lustre target disk data from the node @hostname
-get_ldds(){
- declare -i i
- local host_name=$1
- local ret_line line
- local flags mnt_opts params
- local stripecount_opt
-
- # Initialize the arrays
- unset TARGET_DEVTYPES TARGET_FSNAMES TARGET_MGSNIDS TARGET_INDEXES
- unset TARGET_FMTOPTS TARGET_MNTOPTS TARGET_FAILNIDS
-
- # Get lustre target device type, fsname, index, etc.
- # from MOUNT_DATA_FILE. Using tunefs.lustre to read it.
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- flags=
- mnt_opts=
- params=
- stripecount_opt=
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- # Execute remote command to read MOUNT_DATA_FILE
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- if [ -n "`echo ${line}|awk '/Index:/ {print $0}'`" ]; then
- TARGET_INDEXES[i]=`echo ${line}|awk '{print $2}'`
- continue
- fi
-
- if [ -n "`echo ${line}|awk '/Lustre FS:/ {print $0}'`" ]; then
- TARGET_FSNAMES[i]=`echo ${line}|awk '{print $3}'`
- continue
- fi
-
- if [ -n "`echo ${line}|awk '/Flags:/ {print $0}'`" ]; then
- flags=`echo ${line}|awk '{print $2}'`
- continue
- fi
-
- if [ -n "`echo ${line}|awk '/Persistent mount opts:/ {print $0}'`" ]; then
- mnt_opts=`echo ${line}|awk '{print $0}'`
- mnt_opts=`echo ${mnt_opts#Persistent mount opts: }`
- continue
- fi
-
- if [ -n "`echo ${line}|awk '/Parameters:/ {print $0}'`" ]; then
- params=`echo ${line}|awk '{print $0}'`
- params=`echo ${params#Parameters:}`
- break
- fi
- done < <(${REMOTE} ${host_name} "${TUNEFS} --print --verbose ${TARGET_DEVNAMES[i]} 2>/dev/null")
-
- if [ -z "${flags}" ]; then
- echo >&2 "`basename $0`: get_ldds() error: Invalid" \
- "ldd_flags of target ${TARGET_DEVNAMES[i]}" \
- "in host ${host_name} - it's value is null!"\
- "Check ${TUNEFS} command!"
- return 1
- fi
-
- if [ "${TARGET_INDEXES[i]}" = "unassigned" ] \
- || is_target "mgs" ${flags}; then
- TARGET_INDEXES[i]=
- fi
-
- [ "${TARGET_FSNAMES[i]}" = "lustre" ] && TARGET_FSNAMES[i]=
-
- # Get the lustre target service type
- TARGET_DEVTYPES[i]=$(get_devtype ${flags})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVTYPES[i]} From device" \
- "${TARGET_DEVNAMES[i]} in host ${host_name}!"
- return 1
- fi
-
- # Get the lustre target mount options
- TARGET_MNTOPTS[i]=$(get_mntopts "${mnt_opts}")
-
- # Get mgs nids of the lustre target
- TARGET_MGSNIDS[i]=$(get_mgsnids "${params}")
-
- # Get failover nids of the lustre target
- TARGET_FAILNIDS[i]=$(get_failnids "${params}")
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_FAILNIDS[i]} From device" \
- "${TARGET_DEVNAMES[i]} in host ${host_name}!"
- return 1
- fi
-
- # Get other format options of the lustre target
- TARGET_FMTOPTS[i]=$(get_fmtopts ${TARGET_DEVNAMES[i]} ${host_name} "${params}")
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_FMTOPTS[i]}"
- return 1
- fi
-
- if [ -n "${TARGET_DEVSIZES[i]}" ]; then
- if [ -n "${TARGET_FMTOPTS[i]}" ]; then
- TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]} ""${TARGET_FMTOPTS[i]}"
- else
- TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]}"
- fi
- fi
-
- if [ -n "${FAILOVER_FMTOPTS[i]}" ]; then
- if [ -n "${TARGET_FMTOPTS[i]}" ]; then
- TARGET_FMTOPTS[i]=${TARGET_FMTOPTS[i]}" "${FAILOVER_FMTOPTS[i]}
- else
- TARGET_FMTOPTS[i]=${FAILOVER_FMTOPTS[i]}
- fi
- fi
-
- if is_target "mdt" ${flags}; then
- # Get the stripe count option
- stripecount_opt=$(get_stripecount_opt ${host_name} ${TARGET_FSNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${stripecount_opt}"
- return 1
- fi
-
- if [ -n "${stripecount_opt}" ]; then
- if [ -n "${TARGET_FMTOPTS[i]}" ]; then
- TARGET_FMTOPTS[i]=${TARGET_FMTOPTS[i]}" "${stripecount_opt}
- else
- TARGET_FMTOPTS[i]=${stripecount_opt}
- fi
- fi
- fi
-
- if [ "${TARGET_FMTOPTS[i]}" != "${TARGET_FMTOPTS[i]#*,*}" ]; then
- TARGET_FMTOPTS[i]="\""${TARGET_FMTOPTS[i]}"\""
- fi
- done
-
- return 0
-}
-
-# get_journalsize target_devname hostname
-# Get the journal size of lustre target @target_devname from @hostname
-get_journalsize() {
- local target_devname=$1
- local host_name=$2
- local journal_inode=
- local journal_size=
- local ret_str
-
- # Execute remote command to get the journal inode number
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Journal inode:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_journalsize() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Journal inode:*}}
- journal_inode=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${journal_inode}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_journalsize() error: can't" \
- "get journal inode of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- # Execute remote command to get the journal size
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R \
- 'stat <${journal_inode}>' ${target_devname}|grep '^User:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_journalsize() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%User:*}}
- journal_size=`echo ${ret_str} | awk '{print $6}'`
- if [ -z "`echo ${journal_size}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_journalsize() error: can't" \
- "get journal size of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- let "journal_size /= 1024*1024" # MB
-
- echo ${journal_size}
- return 0
-}
-
-# get_defaultjournalsize target_devsize
-# Calculate the default journal size from target device size @target_devsize
-get_defaultjournalsize() {
- declare -i target_devsize=$1
- declare -i journal_size=0
- declare -i max_size base_size
-
- let "base_size = 1024*1024"
- if [ ${target_devsize} -gt ${base_size} ]; then # 1GB
- let "journal_size = target_devsize / 102400"
- let "journal_size *= 4"
- fi
-
- let "max_size = 102400 * L_BLOCK_SIZE"
- let "max_size >>= 20" # 400MB
-
- if [ ${journal_size} -gt ${max_size} ]; then
- let "journal_size = max_size"
- fi
-
- echo ${journal_size}
- return 0
-}
-
-# figure_journal_size target_devname hostname
-# Find a reasonable journal file size given the number of blocks
-# in the filesystem. This algorithm is derived from figure_journal_size()
-# function in util.c of e2fsprogs-1.38.cfs2-1.src.rpm.
-figure_journal_size() {
- local target_devname=$1
- local host_name=$2
- local ret_str
- declare -i block_count
- declare -i journal_blocks
- declare -i journal_size
-
- # Execute remote command to get the block count
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Block count:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: figure_journal_size() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Block count:*}}
- block_count=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: figure_journal_size() error: can't" \
- "get block count of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- if ((block_count < 32768)); then
- let "journal_blocks = 1024"
- elif ((block_count < 256*1024)); then
- let "journal_blocks = 4096"
- elif ((block_count < 512*1024)); then
- let "journal_blocks = 8192"
- elif ((block_count < 1024*1024)); then
- let "journal_blocks = 16384"
- else
- let "journal_blocks = 32768"
- fi
-
- let "journal_size = journal_blocks * L_BLOCK_SIZE / 1048576"
-
- echo ${journal_size}
- return 0
-}
-
-# get_J_opt hostname target_devname target_devsize
-# Get the mkfs -J option of lustre target @target_devname
-# from the node @hostname
-get_J_opt() {
- local host_name=$1
- local target_devname=$2
- local target_devsize=$3
- local journal_size=
- local default_journal_size=
- local journal_opt=
-
- # Get the real journal size of lustre target
- journal_size=$(get_journalsize ${target_devname} ${host_name})
- if [ $? -ne 0 ]; then
- echo "${journal_size}"
- return 1
- fi
-
- # Get the default journal size of lustre target
- default_journal_size=$(get_defaultjournalsize ${target_devsize})
- if [ "${default_journal_size}" = "0" ]; then
- default_journal_size=$(figure_journal_size ${target_devname} \
- ${host_name})
- if [ $? -ne 0 ]; then
- echo "${default_journal_size}"
- return 1
- fi
- fi
-
- if [ "${journal_size}" != "${default_journal_size}" ]; then
- journal_opt="-J size=${journal_size}"
- fi
-
- echo ${journal_opt}
- return 0
-}
-
-# get_ratio target_devname hostname
-# Get the bytes/inode ratio of lustre target @target_devname from @hostname
-get_ratio() {
- local target_devname=$1
- local host_name=$2
- local inode_count=
- local block_count=
- local ratio=
- local ret_str
-
- # Execute remote command to get the inode count
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Inode count:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_ratio() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Inode count:*}}
- inode_count=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${inode_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_ratio() error: can't" \
- "get inode count of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- # Execute remote command to get the block count
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Block count:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_ratio() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Block count:*}}
- block_count=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_ratio() error: can't" \
- "get block count of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- let "ratio = block_count*L_BLOCK_SIZE/inode_count"
-
- echo ${ratio}
- return 0
-}
-
-# get_default_ratio target_devtype target_devsize
-# Calculate the default bytes/inode ratio from target type @target_devtype
-get_default_ratio() {
- local target_devtype=$1
- declare -i target_devsize=$2
- local ratio=
-
- case "${target_devtype}" in
- "mdt" | "mgs|mdt" | "mdt|mgs")
- ratio=4096;;
- "ost")
- [ ${target_devsize} -gt 1000000 ] && ratio=16384;;
- esac
-
- [ -z "${ratio}" ] && ratio=${L_BLOCK_SIZE}
-
- echo ${ratio}
- return 0
-}
-
-# get_i_opt hostname target_devname target_devtype target_devsize
-# Get the mkfs -i option of lustre target @target_devname
-# from the node @hostname
-get_i_opt() {
- local host_name=$1
- local target_devname=$2
- local target_devtype=$3
- local target_devsize=$4
- local ratio=
- local default_ratio=
- local ratio_opt=
-
- # Get the real bytes/inode ratio of lustre target
- ratio=$(get_ratio ${target_devname} ${host_name})
- if [ $? -ne 0 ]; then
- echo "${ratio}"
- return 1
- fi
-
- # Get the default bytes/inode ratio of lustre target
- default_ratio=$(get_default_ratio ${target_devtype} ${target_devsize})
-
- if [ "${ratio}" != "${default_ratio}" ]; then
- ratio_opt="-i ${ratio}"
- fi
-
- echo ${ratio_opt}
- return 0
-}
-
-# get_isize target_devname hostname
-# Get the inode size of lustre target @target_devname from @hostname
-get_isize() {
- local target_devname=$1
- local host_name=$2
- local inode_size=
- local ret_str
-
- # Execute remote command to get the inode size
- ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \
- ${target_devname} | grep 'Inode size:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_isize() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Inode size:*}}
- inode_size=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${inode_size}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_isize() error: can't" \
- "get inode size of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- echo ${inode_size}
- return 0
-}
-
-# get_mdt_default_isize host_name target_fsname
-# Calculate the default inode size of lustre mdt target
-get_mdt_default_isize() {
- local host_name=$1
- local target_fsname=$2
- declare -i stripe_count
- local inode_size=
-
- # Get the stripe count
- stripe_count=$(get_stripecount ${host_name} ${target_fsname})
- if [ $? -ne 0 ]; then
- echo "${stripe_count}"
- return 1
- fi
-
- if ((stripe_count > 77)); then
- inode_size=512
- elif ((stripe_count > 34)); then
- inode_size=2048
- elif ((stripe_count > 13)); then
- inode_size=1024
- else
- inode_size=512
- fi
-
- echo ${inode_size}
- return 0
-}
-
-# get_default_isize host_name target_devtype target_fsname
-# Calculate the default inode size of lustre target type @target_devtype
-get_default_isize() {
- local host_name=$1
- local target_devtype=$2
- local target_fsname=$3
- local inode_size=
-
- case "${target_devtype}" in
- "mdt" | "mgs|mdt" | "mdt|mgs")
- inode_size=$(get_mdt_default_isize ${host_name} ${target_fsname})
- if [ $? -ne 0 ]; then
- echo "${inode_size}"
- return 1
- fi
- ;;
- "ost")
- inode_size=256;;
- esac
-
- [ -z "${inode_size}" ] && inode_size=128
-
- echo ${inode_size}
- return 0
-}
-
-# get_I_opt hostname target_devname target_devtype target_fsname
-# Get the mkfs -I option of lustre target @target_devname
-# from the node @hostname
-get_I_opt() {
- local host_name=$1
- local target_devname=$2
- local target_devtype=$3
- local target_fsname=$4
- local isize=
- local default_isize=
- local isize_opt=
-
- # Get the real inode size of lustre target
- isize=$(get_isize ${target_devname} ${host_name})
- if [ $? -ne 0 ]; then
- echo "${isize}"
- return 1
- fi
-
- # Get the default inode size of lustre target
- [ -z "${target_fsname}" ] && target_fsname="lustre"
- default_isize=$(get_default_isize ${host_name} ${target_devtype} \
- ${target_fsname})
- if [ $? -ne 0 ]; then
- echo "${default_isize}"
- return 1
- fi
-
- if [ "${isize}" != "${default_isize}" ]; then
- isize_opt="-I ${isize}"
- fi
-
- echo ${isize_opt}
- return 0
-}
-
-# get_mkfsopts hostname
-# Get the mkfs options of lustre targets from the node @hostname
-get_mkfsopts(){
- declare -i i
- local host_name=$1
- local journal_opt
- local ratio_opt
- local inode_size_opt
-
- # Initialize the arrays
- unset TARGET_MKFSOPTS
-
- # FIXME: Get other mkfs options of ext3/ldiskfs besides -J, -i and -I
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- journal_opt=
- ratio_opt=
- inode_size_opt=
-
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- if [ -z "${TARGET_DEVSIZES[i]}" ]; then
- # Get the device size
- TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${TARGET_DEVSIZES[i]}"
- return 1
- fi
- fi
-
- # Get the journal option
- journal_opt=$(get_J_opt ${host_name} ${TARGET_DEVNAMES[i]} \
- ${TARGET_DEVSIZES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${journal_opt}"
- return 1
- fi
-
- if [ -n "${journal_opt}" ]; then
- if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
- TARGET_MKFSOPTS[i]="${journal_opt}"
- else
- TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${journal_opt}"
- fi
- fi
-
- # Get the bytes-per-inode ratio option
- ratio_opt=$(get_i_opt ${host_name} ${TARGET_DEVNAMES[i]} \
- ${TARGET_DEVTYPES[i]} ${TARGET_DEVSIZES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${ratio_opt}"
- return 1
- fi
-
- if [ -n "${ratio_opt}" ]; then
- if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
- TARGET_MKFSOPTS[i]="${ratio_opt}"
- else
- TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${ratio_opt}"
- fi
- fi
-
- # Get the inode size option
- inode_size_opt=$(get_I_opt ${host_name} ${TARGET_DEVNAMES[i]} \
- ${TARGET_DEVTYPES[i]} ${TARGET_FSNAMES[i]})
- if [ $? -ne 0 ]; then
- echo >&2 "${inode_size_opt}"
- return 1
- fi
-
- if [ -n "${inode_size_opt}" ]; then
- if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
- TARGET_MKFSOPTS[i]="${inode_size_opt}"
- else
- TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${inode_size_opt}"
- fi
- fi
-
- if [ "${TARGET_MKFSOPTS[i]}" != "${TARGET_MKFSOPTS[i]#*,*}" ]; then
- TARGET_MKFSOPTS[i]="\""${TARGET_MKFSOPTS[i]}"\""
- fi
- done
- return 0
-}
-
-# get_target_configs hostname
-# Get the lustre target informations from the node @hostname
-get_target_configs() {
- declare -i i
- local host_name=$1
- local ret_line line
-
- # Initialize the arrays
- unset TARGET_CONFIGS
-
- # Get lustre target server names
- if ! get_svnames ${host_name}; then
- return 1
- fi
-
- # Get lustre target device names, mount points and loop device sizes
- if ! get_devnames ${host_name}; then
- return 1
- fi
-
- # Get lustre target device type, fsname, index, etc.
- if ! get_ldds ${host_name}; then
- return 1
- fi
-
- # Get mkfs options of lustre targets
- if ! get_mkfsopts ${host_name}; then
- return 1
- fi
-
- # Construct lustre target configs
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
- TARGET_CONFIGS[i]=${TARGET_DEVNAMES[i]},${TARGET_MNTPNTS[i]},${TARGET_DEVTYPES[i]},${TARGET_FSNAMES[i]},${TARGET_MGSNIDS[i]},${TARGET_INDEXES[i]},${TARGET_FMTOPTS[i]},${TARGET_MKFSOPTS[i]},${TARGET_MNTOPTS[i]},${TARGET_FAILNIDS[i]}
- done
-
- return 0
-}
-
-# get_configs hostname
-# Get all the informations needed to generate a csv file from
-# the node @hostname
-get_configs() {
- # Check the hostname
- if [ -z "$1" ]; then
- echo >&2 "`basename $0`: get_configs() error:" \
- "Missing hostname!"
- return 1
- fi
-
- # Get network module options
- verbose_output ""
- verbose_output "Collecting network module options from host $1..."
- if ! get_module_opts $1; then
- return 1
- fi
- verbose_output "OK"
-
- # Get lustre target informations
- verbose_output "Collecting Lustre targets informations from host $1..."
- if ! get_target_configs $1; then
- return 1
- fi
- verbose_output "OK"
-
- # Get HA software configurations
- if ! get_ha_configs $1; then
- return 1
- fi
-
- return 0
-}
-
-
-# Generate the csv file from the lustre cluster
-gen_csvfile() {
- declare -i idx
- declare -i i
- local line
-
- # Get lustre cluster node names
- verbose_output "Collecting Lustre cluster node names..."
- if ! get_hostnames; then
- return 1
- fi
- verbose_output "OK"
-
- : > ${CSV_FILE}
-
- for ((idx = 0; idx < ${#HOST_NAMES[@]}; idx++)); do
- # Collect informations
- if ! get_configs ${HOST_NAMES[idx]}; then
- rm -f ${CSV_FILE}
- return 1
- fi
-
- # Append informations to the csv file
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- if [ -z "${HA_CONFIGS[i]}" ]; then
- line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]}
- else
- line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]},${HA_CONFIGS[i]}
- fi
- verbose_output "Informations of target ${TARGET_DEVNAMES[i]}" \
- "in host ${HOST_NAMES[idx]} are as follows:"
- verbose_output "${line}"
- echo "" >> ${CSV_FILE}
- echo "${line}" >> ${CSV_FILE}
- done
- done
-
- return 0
-}
-
-# Main flow
-echo "`basename $0`: ******** Generate csv file -- ${CSV_FILE} START ********"
-if ! gen_csvfile; then
- exit 1
-fi
-echo "`basename $0`: ******** Generate csv file -- ${CSV_FILE} OK **********"
-
-exit 0