+++ /dev/null
-#!/bin/bash
-
-# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
-
-#
-# lustre_createcsv - generate a csv file from a running lustre cluster
-#
-# This script is used to collect lustre target informations, linux MD/LVM device
-# informations and HA software configurations in a lustre cluster to generate a
-# csv file. In reverse, the csv file could be parsed by lustre_config to
-# configure multiple lustre servers in parallel.
-#
-# This script should be run on the MGS node.
-#
-################################################################################
-
-# Usage
-usage() {
- cat <<EOF
-
-Usage: `basename $0` [-t HAtype] [-d] [-h] [-v] [-f csv_filename]
-
- This script is used to collect lustre target informations, linux MD/LVM
- device informations and HA software configurations from a running lustre
- cluster to generate a csv file. It should be run on the MGS node.
-
- -t HAtype collect High-Availability software configurations
- The argument following -t is used to indicate the High-
- Availability software type. The HA software types which
- are currently supported are: hbv1 (Heartbeat version 1)
- and hbv2 (Heartbeat version 2).
- -d collect linux MD/LVM device informations
- -h help
- -v verbose mode
- -f csv_filename designate a name for the csv file
- Default is lustre_config.csv.
-
-EOF
-}
-
-# Get the library of functions
-. @scriptlibdir@/lc_common
-
-#**************************** Global variables ****************************#
-# csv file
-LUSTRE_CSV_FILE=${LUSTRE_CSV_FILE:-"lustre_config.csv"}
-
-# Lustre proc files
-LUSTRE_PROC=${LUSTRE_PROC:-"/proc/fs/lustre"}
-LUSTRE_PROC_DEVICES=${LUSTRE_PROC}/devices
-
-LNET_PROC=${LNET_PROC:-"/proc/sys/lnet"}
-LNET_PROC_PEERS=${LNET_PROC}/peers
-
-# Default network module options
-DEFAULT_MOD_OPTS=${DEFAULT_MOD_OPTS:-"options lnet networks=tcp"}
-
-# Lustre target obd device types
-MGS_TYPE=${MGS_TYPE:-"mgs"}
-MDT_TYPE=${MDT_TYPE:-"mds"}
-OST_TYPE=${OST_TYPE:-"obdfilter"}
-
-# The obd name of MGS target server
-MGS_SVNAME=${MGS_SVNAME:-"MGS"}
-
-# Hostnames of the lustre cluster nodes
-declare -a HOST_NAMES
-MGS_HOSTNAME=${MGS_HOSTNAME:-"`hostname`"} # Hostname of the MGS node
-
-# Configs of lustre targets in one cluster node
-declare -a TARGET_CONFIGS
-declare -a TARGET_SVNAMES TARGET_DEVNAMES TARGET_DEVSIZES TARGET_MNTPNTS
-declare -a TARGET_DEVTYPES TARGET_FSNAMES TARGET_MGSNIDS TARGET_INDEXES
-declare -a TARGET_FMTOPTS TARGET_MKFSOPTS TARGET_MNTOPTS TARGET_FAILNIDS
-declare -a HA_CONFIGS
-declare -a ALL_TARGET_SVNAMES # All the target services in the cluster
-declare -a FAILOVER_FMTOPTS # "--noformat"
-
-# Informations of linux MD/LVM devices in one cluster node
-declare -a MD_NAME MD_LEVEL MD_DEVS # MD
-declare -a VG_NAME VG_PVNAMES # VG
-declare -a LV_NAME LV_SIZE LV_VGNAME # LV
-
-# Lustre target service types
-let "LDD_F_SV_TYPE_MDT = 0x0001"
-let "LDD_F_SV_TYPE_OST = 0x0002"
-let "LDD_F_SV_TYPE_MGS = 0x0004"
-
-# Permanent mount options for ext3 or ldiskfs
-ALWAYS_MNTOPTS=${ALWAYS_MNTOPTS:-"errors=remount-ro"}
-MDT_MGS_ALWAYS_MNTOPTS=${MDT_MGS_ALWAYS_MNTOPTS:-"user_xattr"}
-OST_ALWAYS_MNTOPTS=${OST_ALWAYS_MNTOPTS:-",asyncdel"}
-OST_DEFAULT_MNTOPTS=${OST_DEFAULT_MNTOPTS:-",extents,mballoc"}
-
-# User-settable parameter keys
-PARAM_MGSNODE=${PARAM_MGSNODE:-"mgsnode="}
-PARAM_FAILNODE=${PARAM_FAILNODE:-"failover.node="}
-
-# Block size
-L_BLOCK_SIZE=4096
-
-# Option string of mkfs.lustre
-OPTSTR_STRIPE_COUNT=${OPTSTR_STRIPE_COUNT:-"--stripe-count-hint="}
-
-
-# Get and check the positional parameters
-VERBOSE_OUTPUT=false
-GET_MDLVM_INFO=false
-while getopts "t:dhvf:" OPTION; do
- case $OPTION in
- t)
- HATYPE_OPT=$OPTARG
- if [ "${HATYPE_OPT}" != "${HBVER_HBV1}" ] \
- && [ "${HATYPE_OPT}" != "${HBVER_HBV2}" ] \
- && [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
- error_output "Invalid HA software type" \
- "- ${HATYPE_OPT}!"
- usage 1>&2
- exit 1
- fi
- ;;
- d) GET_MDLVM_INFO=true;;
- h) usage && exit 0;;
- v) VERBOSE_OUTPUT=true;;
- f) LUSTRE_CSV_FILE=$OPTARG;;
- ?) usage 1>&2 && exit 1;;
- esac
-done
-
-# Verify the local host is the MGS node
-mgs_node() {
- if [ ! -e ${LUSTRE_PROC_DEVICES} ]; then
- error_output "${LUSTRE_PROC_DEVICES} does" \
- "not exist. Lustre kernel modules may not be loaded!"
- return 1
- fi
-
- if [ -z "`cat ${LUSTRE_PROC_DEVICES}`" ]; then
- error_output "${LUSTRE_PROC_DEVICES} is" \
- "empty. Lustre services may not be started!"
- return 1
- fi
-
- if [ -z "`grep ${MGS_TYPE} ${LUSTRE_PROC_DEVICES}`" ]; then
- error_output "This node is not a MGS node." \
- "The script should be run on the MGS node!"
- return 1
- fi
-
- return 0
-}
-
-# get_hostnames
-# Get lustre cluster node names
-get_hostnames() {
- declare -a HOST_NIDS
- declare -i idx # Index of HOST_NIDS array
- declare -i i # Index of HOST_NAMES array
-
- if ! mgs_node; then
- return 1
- fi
-
- if [ ! -e ${LNET_PROC_PEERS} ]; then
- error_output "${LNET_PROC_PEERS} does not" \
- "exist. LNET kernel modules may not be loaded" \
- "or LNET network may not be up!"
- return 1
- fi
-
- HOST_NAMES[0]=${MGS_HOSTNAME} # MGS node
- HOST_NIDS[0]=${HOST_NAMES[0]}
-
- # Get the nids of the nodes which have contacted MGS
- idx=1
- for nid in `cat ${LNET_PROC_PEERS} | awk '{print $1}'`; do
- if [ "${nid}" = "nid" ]; then
- continue
- fi
-
- HOST_NIDS[idx]=${nid}
- let "idx += 1"
- done
-
- if [ ${idx} -eq 1 ]; then
- verbose_output "Only one node running in the lustre cluster." \
- "It's ${HOST_NAMES[0]}."
- return 0
- fi
-
- # Get the hostnames of the nodes
- for ((idx = 1, i = 1; idx < ${#HOST_NIDS[@]}; idx++, i++)); do
- if [ -z "${HOST_NIDS[idx]}" ]; then
- error_output "get_hostnames():" \
- "Invalid nid - \"${HOST_NIDS[idx]}\"!"
- return 1
- fi
-
- HOST_NAMES[i]=$(nid2hostname ${HOST_NIDS[idx]})
- if [ $? -ne 0 ]; then
- error_output "${HOST_NAMES[i]}"
- return 1
- fi
-
- if [ "${HOST_NAMES[i]}" = "${HOST_NAMES[0]}" ]; then
- unset HOST_NAMES[i]
- let "i -= 1"
- fi
- done
-
- return 0
-}
-
-#********************** Linux MD/LVM device informations **********************#
-# get_md_configs hostname
-# Get all the active MD device informations from the node @hostname
-get_md_configs() {
- declare -i i=0
- declare -i j=0
- local host_name=$1
- local ret_line line first_item
-
- # Initialize the arrays
- unset MD_NAME
- unset MD_LEVEL
- unset MD_DEVS
-
- # Execute remote command to the node ${host_name} and get all the
- # active MD device informations.
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- first_item=`echo "${line}" | awk '{print $1}'`
-
- # Get the MD device name and raid level
- if [ "${first_item}" = "ARRAY" ]; then
- MD_NAME[i]=`echo "${line}" | awk '{print $2}'`
- MD_LEVEL[i]=`echo "${line}" | awk '{print $3}' | sed -e 's/level=//'`
- let "j = i"
- let "i += 1"
- fi
-
- # Get the MD component devices
- if [ "${first_item}" != "${first_item#devices=}" ]; then
- MD_DEVS[j]=`echo "${line}" | sed -e 's/devices=//' -e 's/,/ /g'`
- fi
- done < <(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin
- ${MDADM} --detail --scan --verbose")
-
- if [ $i -eq 0 ]; then
- verbose_output "There are no active MD devices" \
- "in the host ${host_name}!"
- fi
-
- return 0
-}
-
-# get_pv_configs hostname
-# Get all the LVM PV informations from the node @hostname
-get_pv_configs() {
- PV_NAMES=
- local host_name=$1
- local cmd ret_str
-
- # Execute remote command to get all the PV informations.
- cmd="PATH=\$PATH:/sbin:/usr/sbin \
-pvdisplay -c | awk -F: '{print \$1}' | xargs"
- ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
- if [ $? -ne 0 ]; then
- if [ -n "${ret_str}" ]; then
- error_output "get_pv_configs():" \
- "remote command to ${host_name} error: ${ret_str}"
- else
- remote_error "get_pv_configs" ${host_name}
- fi
- return 1
- fi
-
- PV_NAMES=`echo "${ret_str}" | sed -e 's/^'${host_name}':[[:space:]]//'`
- if [ -z "${PV_NAMES}" ]; then
- verbose_output "There are no PVs in the host ${host_name}!"
- return 0
- fi
-
- return 0
-}
-
-# get_vg_pvnames hostname vgname
-# Get the PVs contained in @vgname from the node @hostname
-get_vg_pvnames() {
- local host_name=$1
- local vg_name=$2
- local pv_names=
- local cmd ret_str
-
- # Execute remote command to get the PV names.
- cmd="PATH=\$PATH:/sbin:/usr/sbin vgdisplay -v ${vg_name} 2>/dev/null\
- | grep \"PV Name\" | awk '{print \$3}' | xargs"
- ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
- if [ $? -ne 0 ]; then
- if [ -n "${ret_str}" ]; then
- echo "`basename $0`: get_vg_pvnames() error:" \
- "remote command to ${host_name} error: ${ret_str}"
- else
- remote_error "get_vg_pvnames" ${host_name}
- fi
- return 1
- fi
-
- pv_names=`echo "${ret_str}" | sed -e 's/^'${host_name}':[[:space:]]//'`
- if [ -z "${pv_names}" ]; then
- echo "`basename $0`: get_vg_pvnames() error:" \
- "There are no PVs in VG ${vg_name} in the host ${host_name}!"\
- "Or VG ${vg_name} does not exist."
- return 1
- fi
-
- echo "${pv_names}"
- return 0
-}
-
-# get_vg_configs hostname
-# Get all the LVM VG informations from the node @hostname
-get_vg_configs() {
- declare -i i=0
- local host_name=$1
- local cmd ret_str
- local vg_name
-
- # Initialize the arrays
- unset VG_NAME
- unset VG_PVNAMES
-
- # Execute remote command to get all the VG names.
- cmd="PATH=\$PATH:/sbin:/usr/sbin vgdisplay \
- | grep \"VG Name\" | awk '{print \$3}' | xargs"
- ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1`
- if [ $? -ne 0 ]; then
- if [ -n "${ret_str}" ]; then
- error_output "get_vg_configs():" \
- "remote command to ${host_name} error: ${ret_str}"
- else
- remote_error "get_vg_configs" ${host_name}
- fi
- return 1
- fi
-
- if [ -z "${ret_str}" ] \
- || [ "${ret_str}" != "${ret_str#*No volume groups found*}" ]; then
- verbose_output "There are no VGs in the host ${host_name}!"
- return 0
- fi
-
- # Get all the VG informations
- for vg_name in `echo "${ret_str}" | sed -e 's/^'${host_name}'://'`; do
- VG_NAME[i]=${vg_name}
- VG_PVNAMES[i]=$(get_vg_pvnames ${host_name} ${VG_NAME[i]})
- if [ $? -ne 0 ]; then
- error_output "${VG_PVNAMES[i]}"
- return 1
- fi
- let "i += 1"
- done
-
- return 0
-}
-
-# get_lv_configs hostname
-# Get all the LVM LV informations from the node @hostname
-get_lv_configs() {
- declare -i i=0
- local host_name=$1
- local ret_line line
-
- # Initialize the arrays
- unset LV_NAME
- unset LV_SIZE
- unset LV_VGNAME
-
- # Execute remote command to get all the LV informations.
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- [ "${line}" != "${line#*volume group*}" ] && break
-
- LV_NAME[i]=`echo "${line}" | awk -F: '{print $1}' | sed -e 's/.*\///g'`
- LV_VGNAME[i]=`echo "${line}" | awk -F: '{print $2}'`
- LV_SIZE[i]=`echo "${line}" | awk -F: '{print $7}' | sed -e 's/.*/&K/'`
-
- let "i += 1"
- done < <(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin lvdisplay -c")
-
- if [ $i -eq 0 ]; then
- verbose_output "There are no LVs in the host ${host_name}"
- fi
-
- return 0
-}
-
-#*************************** Network module options ***************************#
-# last_is_backslash line
-# Check whether the last effective letter of @line is a backslash
-last_is_backslash() {
- local line="$*"
- declare -i i
- declare -i length
- local letter last_letter
-
- length=${#line}
- for ((i = ${length}-1; i >= 0; i--)); do
- letter=${line:${i}:1}
- [ "x${letter}" != "x " -a "x${letter}" != "x " -a -n "${letter}" ]\
- && last_letter=${letter} && break
- done
-
- [ "x${last_letter}" = "x\\" ] && return 0
-
- return 1
-}
-
-# get_module_opts hostname
-# Get the network module options from the node @hostname
-get_module_opts() {
- local host_name=$1
- local ret_str
- local MODULE_CONF KERNEL_VER
- local ret_line line find_options
- local continue_flag
-
- MODULE_OPTS=${DEFAULT_MOD_OPTS}
-
- # Execute remote command to get the kernel version
- ret_str=`${REMOTE} ${host_name} "uname -r" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- error_output "get_module_opts():" \
- "remote command error: ${ret_str}"
- return 1
- fi
- remote_error "get_module_opts" ${host_name} "${ret_str}" && return 1
-
- if is_pdsh; then
- KERNEL_VER=`echo ${ret_str} | awk '{print $2}'`
- else
- KERNEL_VER=`echo ${ret_str} | awk '{print $1}'`
- fi
-
- # Get the module configuration file name
- if [ "${KERNEL_VER:0:3}" = "2.4" ]; then
- MODULE_CONF=/etc/modules.conf
- else
- MODULE_CONF=/etc/modprobe.conf
- fi
-
- # Execute remote command to get the lustre network module options
- continue_flag=false
- find_options=false
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- # Get rid of the comment line
- [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
-
- if [ "${line}" != "${line#*options lnet*}" ]; then
- if ! ${find_options}; then
- find_options=true
- MODULE_OPTS=${line}
- else
- MODULE_OPTS=${MODULE_OPTS}$" \n "${line}
- fi
-
- last_is_backslash "${line}" && continue_flag=true \
- || continue_flag=false
- continue
- fi
-
- if ${continue_flag}; then
- MODULE_OPTS=${MODULE_OPTS}$" \n "${line}
- ! last_is_backslash "${line}" && continue_flag=false
-
- fi
- done < <(${REMOTE} ${host_name} "cat ${MODULE_CONF}")
-
- if [ -z "${MODULE_OPTS}" ]; then
- MODULE_OPTS=${DEFAULT_MOD_OPTS}
- fi
-
- return 0
-}
-
-#************************ HA software configurations ************************#
-# is_ha_target hostname target_devname
-# Check whether the target @target_devname was made to be high-available
-is_ha_target() {
- local host_name=$1
- local target_svname=$2
- local res_file
- local ret_str
-
- case "${HATYPE_OPT}" in
- "${HBVER_HBV1}") res_file=${HA_RES};;
- "${HBVER_HBV2}") res_file=${HA_CIB};;
- "${HATYPE_CLUMGR}") res_file=${CLUMAN_CONFIG};;
- esac
-
- # Execute remote command to check the resource file
- ret_str=`${REMOTE} ${host_name} \
- "grep ${target_svname} ${res_file}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- error_output "is_ha_target():" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- [ "${ret_str}" = "${ret_str#*${target_svname}*}" ] && return 1
-
- return 0
-}
-
-# get_hb_configs hostname
-# Get the Heartbeat configurations from the node @hostname
-get_hb_configs() {
- local host_name=$1
- local ret_line line
- declare -i i
-
- unset HA_CONFIGS
- HB_CHANNELS=
- SRV_IPADDRS=
- HB_OPTIONS=
-
- # Execute remote command to get the configs of Heartbeat channels, etc
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- # Get rid of the comment line
- [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
-
- if [ "${line}" != "${line#*serial*}" ] \
- || [ "${line}" != "${line#*cast*}" ]; then
- if [ -z "${HB_CHANNELS}" ]; then
- HB_CHANNELS=${line}
- else
- HB_CHANNELS=${HB_CHANNELS}:${line}
- fi
- fi
-
- if [ "${line}" != "${line#*stonith*}" ] \
- || [ "${line}" != "${line#*ping*}" ] \
- || [ "${line}" != "${line#*respawn*}" ] \
- || [ "${line}" != "${line#*apiauth*}" ] \
- || [ "${line}" != "${line#*compression*}" ]; then
- if [ -z "${HB_OPTIONS}" ]; then
- HB_OPTIONS=${line}
- else
- HB_OPTIONS=${HB_OPTIONS}:${line}
- fi
- fi
- done < <(${REMOTE} ${host_name} "cat ${HA_CF}")
-
- if [ -z "${HB_CHANNELS}" ]; then
- error_output "get_hb_configs():" \
- "There are no heartbeat channel configs in ${HA_CF}" \
- "of host ${host_name} or ${HA_CF} does not exist!"
- return 0
- fi
-
- # Execute remote command to get Heartbeat service address
- if [ "${HATYPE_OPT}" = "${HBVER_HBV1}" ]; then
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- # Get rid of the empty line
- [ -z "`echo ${line}|awk '/[[:alnum:]]/ {print $0}'`" ]\
- && continue
-
- # Get rid of the comment line
- [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue
-
- SRV_IPADDRS=`echo ${line} | awk '{print $2}'`
- [ -n "${SRV_IPADDRS}" ] \
- && [ "`echo ${line} | awk '{print $1}'`" = "${host_name}" ] && break
- done < <(${REMOTE} ${host_name} "cat ${HA_RES}")
-
- if [ -z "${SRV_IPADDRS}" ]; then
- error_output "get_hb_configs(): There"\
- "are no service address in ${HA_RES} of host"\
- "${host_name} or ${HA_RES} does not exist!"
- return 0
- fi
- fi
-
- # Construct HA configuration items
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- # Execute remote command to check whether this target service
- # was made to be high-available
- if is_ha_target ${host_name} ${TARGET_DEVNAMES[i]}; then
- HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS}
- fi
- done
-
- return 0
-}
-
-# get_cluman_channel hostname
-# Get the Heartbeat channel of CluManager from the node @hostname
-get_cluman_channel() {
- local host_name=$1
- local ret_line line
- local cluman_channel=
- local mcast_ipaddr
-
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- if [ "${line}" != "${line#*broadcast*}" ] \
- && [ "`echo ${line}|awk '{print $3}'`" = "yes" ]; then
- cluman_channel="broadcast"
- break
- fi
-
- if [ "${line}" != "${line#*multicast_ipaddress*}" ]; then
- mcast_ipaddr=`echo ${line}|awk '{print $3}'`
- if [ "${mcast_ipaddr}" != "225.0.0.11" ]; then
- cluman_channel="multicast ${mcast_ipaddr}"
- break
- fi
- fi
- done < <(${REMOTE} ${host_name} "${CONFIG_CMD} --clumembd")
-
- echo ${cluman_channel}
- return 0
-}
-
-# get_cluman_srvaddr hostname target_svname
-# Get the service IP addresses of @target_svname from the node @hostname
-get_cluman_srvaddr() {
- local host_name=$1
- local target_svname=$2
- local ret_line line
- local srvaddr cluman_srvaddr=
-
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- if [ "${line}" != "${line#*ipaddress = *}" ]; then
- srvaddr=`echo ${line}|awk '{print $3}'`
- if [ -z "${cluman_srvaddr}" ]; then
- cluman_srvaddr=${srvaddr}
- else
- cluman_srvaddr=${cluman_srvaddr}:${srvaddr}
- fi
- fi
- done < <(${REMOTE} ${host_name} "${CONFIG_CMD} \
- --service=${target_svname} --service_ipaddresses")
-
- if [ -z "${cluman_srvaddr}" ]; then
- echo "`basename $0`: get_cluman_srvaddr() error: Cannot" \
- "get the service IP addresses of ${target_svname} in" \
- "${host_name}! Check ${CONFIG_CMD} command!"
- return 1
- fi
-
- echo ${cluman_srvaddr}
- return 0
-}
-
-# get_cluman_configs hostname
-# Get the CluManager configurations from the node @hostname
-get_cluman_configs() {
- local host_name=$1
- local ret_str
- declare -i i
-
- unset HA_CONFIGS
-
- # Execute remote command to get the configs of CluManager
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- HB_CHANNELS=
- SRV_IPADDRS=
- HB_OPTIONS=
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- # Execute remote command to check whether this target service
- # was made to be high-available
- ! is_ha_target ${host_name} ${TARGET_DEVNAMES[i]} && continue
-
- # Execute remote command to get Heartbeat channel
- HB_CHANNELS=$(get_cluman_channel ${host_name})
- if [ $? -ne 0 ]; then
- error_output "${HB_CHANNELS}"
- fi
-
- # Execute remote command to get service IP address
- SRV_IPADDRS=$(get_cluman_srvaddr ${host_name} \
- ${TARGET_SVNAMES[i]})
- if [ $? -ne 0 ]; then
- error_output "${SRV_IPADDRS}"
- return 0
- fi
-
- HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS}
- done
-
- return 0
-}
-
-# get_ha_configs hostname
-# Get the HA software configurations from the node @hostname
-get_ha_configs() {
- local host_name=$1
-
- unset HA_CONFIGS
-
- if [ -z "${HATYPE_OPT}" ]; then
- return 0
- fi
-
- verbose_output "Collecting HA software configurations from host $1..."
-
- case "${HATYPE_OPT}" in
- "${HBVER_HBV1}" | "${HBVER_HBV2}") # Heartbeat
- if ! get_hb_configs ${host_name}; then
- return 1
- fi
- ;;
- "${HATYPE_CLUMGR}") # CluManager
- if ! get_cluman_configs ${host_name}; then
- return 1
- fi
- ;;
- esac
-
- return 0
-}
-
-#*********************** Lustre targets configurations ***********************#
-
-# is_failover_service target_svname
-# Check whether a target service @target_svname is a failover service.
-is_failover_service() {
- local target_svname=$1
- declare -i i
-
- for ((i = 0; i < ${#ALL_TARGET_SVNAMES[@]}; i++)); do
- [ "${target_svname}" = "${ALL_TARGET_SVNAMES[i]}" ] && return 0
- done
-
- return 1
-}
-
-# get_svnames hostname
-# Get the lustre target server obd names from the node @hostname
-get_svnames(){
- declare -i i
- declare -i j
- local host_name=$1
- local ret_line line
-
- # Initialize the TARGET_SVNAMES array
- unset TARGET_SVNAMES
- unset FAILOVER_FMTOPTS
-
- # Execute remote command to the node @hostname and figure out what
- # lustre services are running.
- i=0
- j=${#ALL_TARGET_SVNAMES[@]}
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- if [ -z "`echo ${line} | grep ${MGS_TYPE}`" ] \
- && [ -z "`echo ${line} | grep ${MDT_TYPE}`" ] \
- && [ -z "`echo ${line} | grep ${OST_TYPE}`" ]; then
- continue
- fi
-
- # Get target server name
- TARGET_SVNAMES[i]=`echo ${line} | awk '{print $4}'`
- if [ -n "${TARGET_SVNAMES[i]}" ]; then
- if is_failover_service ${TARGET_SVNAMES[i]}; then
- FAILOVER_FMTOPTS[i]="--noformat"
- fi
- ALL_TARGET_SVNAMES[j]=${TARGET_SVNAMES[i]}
- let "i += 1"
- let "j += 1"
- else
- error_output "get_svnames(): Invalid"\
- "line in ${host_name}'s ${LUSTRE_PROC_DEVICES}"\
- "- \"${line}\"!"
- return 1
- fi
- done < <(${REMOTE} ${host_name} "cat ${LUSTRE_PROC_DEVICES}")
-
- if [ $i -eq 0 ]; then
- verbose_output "There are no lustre services running" \
- "on the node ${host_name}!"
- fi
-
- return 0
-}
-
-# is_loopdev devname
-# Check whether a device @devname is a loop device or not
-is_loopdev() {
- local devname=$1
-
- if [ -z "${devname}" ] || \
- [ -z "`echo ${devname}|awk '/\/dev\/loop[[:digit:]]/ {print $0}'`" ]
- then
- return 1
- fi
-
- return 0
-}
-
-# get_devname hostname svname
-# Get the device name of lustre target @svname from node @hostname
-get_devname() {
- local host_name=$1
- local target_svname=$2
- local target_devname=
- local ret_str
- local target_type target_obdtype mntdev_file
-
- if [ "${target_svname}" = "${MGS_SVNAME}" ]; then
- # Execute remote command to get the device name of mgs target
- ret_str=`${REMOTE} ${host_name} \
- "PATH=\$PATH:/sbin:/usr/sbin findfs LABEL=${target_svname}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]
- then
- echo "`basename $0`: get_devname() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
- fi
-
- if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]; then
- if is_pdsh; then
- target_devname=`echo ${ret_str} | awk '{print $2}'`
- else
- target_devname=`echo ${ret_str} | awk '{print $1}'`
- fi
- fi
- else # Execute remote command to get the device name of mdt/ost target
- target_type=`echo ${target_svname} | cut -d - -f 2`
- target_obdtype=${target_type:0:3}_TYPE
-
- mntdev_file=${LUSTRE_PROC}/${!target_obdtype}/${target_svname}/mntdev
-
- ret_str=`${REMOTE} ${host_name} "cat ${mntdev_file}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_devname() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if [ "${ret_str}" != "${ret_str#*No such file*}" ]; then
- echo "`basename $0`: get_devname() error:"\
- "${mntdev_file} does not exist in ${host_name}!"
- return 1
- else
- if is_pdsh; then
- target_devname=`echo ${ret_str} | awk '{print $2}'`
- else
- target_devname=`echo ${ret_str} | awk '{print $1}'`
- fi
- fi
- fi
-
- echo ${target_devname}
- return 0
-}
-
-# get_devsize hostname target_devname
-# Get the device size (KB) of @target_devname from node @hostname
-get_devsize() {
- local host_name=$1
- local target_devname=$2
- local target_devsize=
- local ret_str
-
- # Execute remote command to get the device size
- ret_str=`${REMOTE} ${host_name} \
- "PATH=\$PATH:/sbin:/usr/sbin blockdev --getsize ${target_devname}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_devsize() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if is_pdsh; then
- target_devsize=`echo ${ret_str} | awk '{print $2}'`
- else
- target_devsize=`echo ${ret_str} | awk '{print $1}'`
- fi
-
- if [ -z "`echo ${target_devsize}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_devsize() error: can't" \
- "get device size of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- let " target_devsize /= 2"
-
- echo ${target_devsize}
- return 0
-}
-
-# get_realdevname hostname loop_dev
-# Get the real device name of loop device @loop_dev from node @hostname
-get_realdevname() {
- local host_name=$1
- local loop_dev=$2
- local target_devname=
- local ret_str
-
- # Execute remote command to get the real device name
- ret_str=`${REMOTE} ${host_name} \
- "PATH=\$PATH:/sbin:/usr/sbin losetup ${loop_dev}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_realdevname() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if is_pdsh; then
- target_devname=`echo ${ret_str} | awk '{print $4}' \
- | sed 's/^(//' | sed 's/)$//'`
- else
- target_devname=`echo ${ret_str} | awk '{print $3}' \
- | sed 's/^(//' | sed 's/)$//'`
- fi
-
- if [ "${ret_str}" != "${ret_str#*No such*}" ] \
- || [ -z "${target_devname}" ]; then
- echo "`basename $0`: get_realdevname() error: can't" \
- "get info on device ${loop_dev} in ${host_name}!"
- return 1
- fi
-
- echo ${target_devname}
- return 0
-}
-
-# get_mntpnt hostname target_devname
-# Get the lustre target mount point from the node @hostname
-get_mntpnt(){
- local host_name=$1
- local target_devname=$2
- local mnt_point=
- local ret_str
-
- # Execute remote command to get the mount point
- ret_str=`${REMOTE} ${host_name} \
- "cat /etc/mtab | grep ${target_devname}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_mntpnt() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- if is_pdsh; then
- mnt_point=`echo ${ret_str} | awk '{print $3}'`
- else
- mnt_point=`echo ${ret_str} | awk '{print $2}'`
- fi
-
- if [ -z "${mnt_point}" ]; then
- echo "`basename $0`: get_mntpnt() error: can't" \
- "get the mount point of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- echo ${mnt_point}
- return 0
-}
-
-# get_devnames hostname
-# Get the lustre target device names, mount points
-# and loop device sizes from the node @hostname
-get_devnames(){
- declare -i i
- local host_name=$1
- local ret_line line
-
- # Initialize the arrays
- unset TARGET_DEVNAMES
- unset TARGET_DEVSIZES
- unset TARGET_MNTPNTS
-
- for ((i = 0; i < ${#TARGET_SVNAMES[@]}; i++)); do
- TARGET_DEVNAMES[i]=$(get_devname ${host_name} \
- ${TARGET_SVNAMES[i]})
- if [ $? -ne 0 ]; then
- error_output "${TARGET_DEVNAMES[i]}"
- return 1
- fi
-
- if [ -z "${TARGET_DEVNAMES[i]}" ]; then
- if [ "${TARGET_SVNAMES[i]}" = "${MGS_SVNAME}" ]; then
- verbose_output "There exists combo mgs/mdt"\
- "target in ${host_name}."
- continue
- else
- error_output "get_devname():"\
- "No device corresponding to target" \
- "${TARGET_SVNAMES[i]} in ${host_name}!"
- return 1
- fi
- fi
-
- # Get the mount point of the target
- TARGET_MNTPNTS[i]=$(get_mntpnt ${host_name} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- error_output "${TARGET_MNTPNTS[i]}"
- return 1
- fi
-
- # The target device is a loop device?
- if [ -n "${TARGET_DEVNAMES[i]}" ] \
- && is_loopdev ${TARGET_DEVNAMES[i]}; then
- # Get the device size
- TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- error_output "${TARGET_DEVSIZES[i]}"
- return 1
- fi
-
- # Get the real device name
- TARGET_DEVNAMES[i]=$(get_realdevname ${host_name} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- error_output "${TARGET_DEVNAMES[i]}"
- return 1
- fi
- fi
- done
-
- return 0
-}
-
-# is_target target_svtype ldd_flags
-# Check the service type of a lustre target
-is_target() {
- case "$1" in
- "mdt") let "ret = $2 & LDD_F_SV_TYPE_MDT";;
- "ost") let "ret = $2 & LDD_F_SV_TYPE_OST";;
- "mgs") let "ret = $2 & LDD_F_SV_TYPE_MGS";;
- "*")
- error_output "is_target(): Invalid" \
- "target service type - \"$1\"!"
- return 1
- ;;
- esac
-
- if [ ${ret} -eq 0 ]; then
- return 1
- fi
-
- return 0
-}
-
-# get_devtype ldd_flags
-# Get the service type of a lustre target from @ldd_flags
-get_devtype() {
- local target_devtype=
-
- if [ -z "${flags}" ]; then
- echo "`basename $0`: get_devtype() error: Invalid" \
- "ldd_flags - it's value is null!"
- return 1
- fi
-
- if is_target "mgs" $1; then
- if is_target "mdt" $1; then
- target_devtype="mgs|mdt"
- else
- target_devtype="mgs"
- fi
- elif is_target "mdt" $1; then
- target_devtype="mdt"
- elif is_target "ost" $1; then
- target_devtype="ost"
- else
- echo "`basename $0`: get_devtype() error: Invalid" \
- "ldd_flags - \"$1\"!"
- return 1
- fi
-
- echo ${target_devtype}
- return 0
-}
-
-# get_mntopts ldd_mount_opts
-# Get the user-specified lustre target mount options from @ldd_mount_opts
-get_mntopts() {
- local mount_opts=
- local ldd_mount_opts=$1
-
- mount_opts="${ldd_mount_opts#${ALWAYS_MNTOPTS}}"
- mount_opts="${mount_opts#${MDT_MGS_ALWAYS_MNTOPTS}}"
- mount_opts="${mount_opts#${OST_ALWAYS_MNTOPTS}}"
- mount_opts="${mount_opts#${OST_DEFAULT_MNTOPTS}}"
- mount_opts="`echo \"${mount_opts}\" | sed 's/^,//'`"
-
- [ "${mount_opts}" != "${mount_opts#*,*}" ] && echo "\""${mount_opts}"\"" \
- || echo ${mount_opts}
-
- return 0
-}
-
-# get_mgsnids ldd_params
-# Get the mgs nids of lustre target from @ldd_params
-get_mgsnids() {
- local mgs_nids= # mgs nids in one mgs node
- local all_mgs_nids= # mgs nids in all mgs failover nodes
- local param=
- local ldd_params="$*"
-
- for param in ${ldd_params}; do
- if [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ]; then
- mgs_nids=`echo ${param#${PARAM_MGSNODE}}`
-
- if [ -n "${all_mgs_nids}" ]; then
- all_mgs_nids=${all_mgs_nids}:${mgs_nids}
- else
- all_mgs_nids=${mgs_nids}
- fi
- fi
- done
-
- [ "${all_mgs_nids}" != "${all_mgs_nids#*,*}" ] \
- && echo "\""${all_mgs_nids}"\"" || echo ${all_mgs_nids}
-
- return 0
-}
-
-# get_failnids ldd_params
-# Get the failover nids of lustre target from @ldd_params
-get_failnids() {
- local fail_nids= # failover nids in one failover node
- local all_fail_nids= # failover nids in all failover nodes
- # of this target
- local param=
- local ldd_params="$*"
-
- for param in ${ldd_params}; do
- if [ -n "`echo ${param}|awk '/failover.node=/ {print $0}'`" ]; then
- fail_nids=`echo ${param#${PARAM_FAILNODE}}`
-
- if [ -n "${all_fail_nids}" ]; then
- all_fail_nids=${all_fail_nids}:${fail_nids}
- else
- all_fail_nids=${fail_nids}
- fi
- fi
- done
-
- [ "${all_fail_nids}" != "${all_fail_nids#*,*}" ] \
- && echo "\""${all_fail_nids}"\"" || echo ${all_fail_nids}
-
- return 0
-}
-
-# get_fmtopts target_devname hostname ldd_params
-# Get other format options of the lustre target @target_devname from @ldd_params
-get_fmtopts() {
- local target_devname=$1
- local host_name=$2
- shift
- shift
- local ldd_params="$*"
- local param=
- local fmt_opts=
-
- for param in ${ldd_params}; do
- [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ] && continue
- [ -n "`echo ${param}|awk '/failover.node=/ {print $0}'`" ] && continue
-
- if [ -n "${param}" ]; then
- if [ -n "${fmt_opts}" ]; then
- fmt_opts=${fmt_opts}" --param=\""${param}"\""
- else
- fmt_opts="--param=\""${param}"\""
- fi
- fi
- done
-
- echo ${fmt_opts}
- return 0
-}
-
-# get_stripecount host_name target_fsname
-# Get the stripe count for @target_fsname
-get_stripecount() {
- local host_name=$1
- local target_fsname=$2
- local stripe_count=
- local stripecount_file
- local ret_str
-
- # Get the stripe count
- stripecount_file=${LUSTRE_PROC}/lov/${target_fsname}-mdtlov/stripecount
- ret_str=`${REMOTE} ${host_name} "cat ${stripecount_file}" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_stripecount() error:" \
- "remote command to ${host_name} error: ${ret_str}"
- return 1
- fi
-
- if is_pdsh; then
- stripe_count=`echo ${ret_str} | awk '{print $2}'`
- else
- stripe_count=`echo ${ret_str} | awk '{print $1}'`
- fi
-
- if [ "$stripe_count" != "-1" ] && \
- [ -z "`echo ${stripe_count}|awk '/^[[:digit:]]/ {print $0}'`" ]; then
- echo "`basename $0`: get_stripecount() error: can't" \
- "get stripe count of ${target_fsname} in ${host_name}!"
- return 1
- fi
-
- echo ${stripe_count}
- return 0
-}
-
-# get_stripecount_opt host_name target_fsname
-# Get the stripe count option for lustre mdt target
-get_stripecount_opt() {
- local host_name=$1
- local target_fsname=$2
- local stripe_count=
- local stripecount_opt=
-
- # Get the stripe count
- [ -z "${target_fsname}" ] && target_fsname="lustre"
- stripe_count=$(get_stripecount ${host_name} ${target_fsname})
- if [ $? -ne 0 ]; then
- echo "${stripe_count}"
- return 1
- fi
-
- if [ "${stripe_count}" != "1" ]; then
- stripecount_opt=${OPTSTR_STRIPE_COUNT}${stripe_count}
- fi
-
- echo ${stripecount_opt}
- return 0
-}
-
-# get_ldds hostname
-# Get the lustre target disk data from the node @hostname
-get_ldds(){
- declare -i i
- local host_name=$1
- local ret_line line
- local flags mnt_opts params
- local stripecount_opt
-
- # Initialize the arrays
- unset TARGET_DEVTYPES TARGET_FSNAMES TARGET_MGSNIDS TARGET_INDEXES
- unset TARGET_FMTOPTS TARGET_MNTOPTS TARGET_FAILNIDS
-
- # Get lustre target device type, fsname, index, etc.
- # from MOUNT_DATA_FILE. Using tunefs.lustre to read it.
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- flags=
- mnt_opts=
- params=
- stripecount_opt=
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- # Execute remote command to read MOUNT_DATA_FILE
- while read -r ret_line; do
- if is_pdsh; then
- set -- ${ret_line}
- shift
- line="$*"
- else
- line="${ret_line}"
- fi
-
- if [ -n "`echo ${line}|awk '/Index:/ {print $0}'`" ]; then
- TARGET_INDEXES[i]=`echo ${line}|awk '{print $2}'`
- continue
- fi
-
- if [ -n "`echo ${line}|awk '/Lustre FS:/ {print $0}'`" ]; then
- TARGET_FSNAMES[i]=`echo ${line}|awk '{print $3}'`
- continue
- fi
-
- if [ -n "`echo ${line}|awk '/Flags:/ {print $0}'`" ]; then
- flags=`echo ${line}|awk '{print $2}'`
- continue
- fi
-
- if [ -n "`echo ${line}|awk '/Persistent mount opts:/ {print $0}'`" ]; then
- mnt_opts=`echo ${line}|awk '{print $0}'`
- mnt_opts=`echo ${mnt_opts#Persistent mount opts: }`
- continue
- fi
-
- if [ -n "`echo ${line}|awk '/Parameters:/ {print $0}'`" ]; then
- params=`echo ${line}|awk '{print $0}'`
- params=`echo ${params#Parameters:}`
- break
- fi
- done < <(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin
- ${TUNEFS} --print --verbose ${TARGET_DEVNAMES[i]} 2>/dev/null")
-
- if [ -z "${flags}" ]; then
- error_output "get_ldds(): Invalid" \
- "ldd_flags of target ${TARGET_DEVNAMES[i]}" \
- "in host ${host_name} - it's value is null!"\
- "Check ${TUNEFS} command!"
- return 1
- fi
-
- if [ "${TARGET_INDEXES[i]}" = "unassigned" ] \
- || is_target "mgs" ${flags}; then
- TARGET_INDEXES[i]=
- fi
-
- [ "${TARGET_FSNAMES[i]}" = "lustre" ] && TARGET_FSNAMES[i]=
-
- # Get the lustre target service type
- TARGET_DEVTYPES[i]=$(get_devtype ${flags})
- if [ $? -ne 0 ]; then
- error_output "${TARGET_DEVTYPES[i]} From device" \
- "${TARGET_DEVNAMES[i]} in host ${host_name}!"
- return 1
- fi
-
- # Get the lustre target mount options
- TARGET_MNTOPTS[i]=$(get_mntopts "${mnt_opts}")
-
- # Get mgs nids of the lustre target
- TARGET_MGSNIDS[i]=$(get_mgsnids "${params}")
-
- # Get failover nids of the lustre target
- TARGET_FAILNIDS[i]=$(get_failnids "${params}")
- if [ $? -ne 0 ]; then
- error_output "${TARGET_FAILNIDS[i]} From device" \
- "${TARGET_DEVNAMES[i]} in host ${host_name}!"
- return 1
- fi
-
- # Get other format options of the lustre target
- TARGET_FMTOPTS[i]=$(get_fmtopts ${TARGET_DEVNAMES[i]} ${host_name} "${params}")
- if [ $? -ne 0 ]; then
- error_output "${TARGET_FMTOPTS[i]}"
- return 1
- fi
-
- if [ -n "${TARGET_DEVSIZES[i]}" ]; then
- if [ -n "${TARGET_FMTOPTS[i]}" ]; then
- TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]} ""${TARGET_FMTOPTS[i]}"
- else
- TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]}"
- fi
- fi
-
- if [ -n "${FAILOVER_FMTOPTS[i]}" ]; then
- if [ -n "${TARGET_FMTOPTS[i]}" ]; then
- TARGET_FMTOPTS[i]=${TARGET_FMTOPTS[i]}" "${FAILOVER_FMTOPTS[i]}
- else
- TARGET_FMTOPTS[i]=${FAILOVER_FMTOPTS[i]}
- fi
- fi
-
- if is_target "mdt" ${flags}; then
- # Get the stripe count option
- stripecount_opt=$(get_stripecount_opt ${host_name} ${TARGET_FSNAMES[i]})
- if [ $? -ne 0 ]; then
- error_output "${stripecount_opt}"
- return 1
- fi
-
- if [ -n "${stripecount_opt}" ]; then
- if [ -n "${TARGET_FMTOPTS[i]}" ]; then
- TARGET_FMTOPTS[i]=${TARGET_FMTOPTS[i]}" "${stripecount_opt}
- else
- TARGET_FMTOPTS[i]=${stripecount_opt}
- fi
- fi
- fi
-
- if [ "${TARGET_FMTOPTS[i]}" != "${TARGET_FMTOPTS[i]#*,*}" ]; then
- TARGET_FMTOPTS[i]="\""${TARGET_FMTOPTS[i]}"\""
- fi
- done
-
- return 0
-}
-
-# get_journalsize target_devname hostname
-# Get the journal size of lustre target @target_devname from @hostname
-get_journalsize() {
- local target_devname=$1
- local host_name=$2
- local journal_inode=
- local journal_size=
- local ret_str
-
- # Execute remote command to get the journal inode number
- ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \
-debugfs -R 'stats -h' ${target_devname} | grep 'Journal inode:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_journalsize() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Journal inode:*}}
- journal_inode=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${journal_inode}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_journalsize() error: can't" \
- "get journal inode of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- # Execute remote command to get the journal size
- ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \
-debugfs -R 'stat <${journal_inode}>' ${target_devname}|grep '^User:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_journalsize() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%User:*}}
- journal_size=`echo ${ret_str} | awk '{print $6}'`
- if [ -z "`echo ${journal_size}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_journalsize() error: can't" \
- "get journal size of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- let "journal_size /= 1024*1024" # MB
-
- echo ${journal_size}
- return 0
-}
-
-# get_defaultjournalsize target_devsize
-# Calculate the default journal size from target device size @target_devsize
-get_defaultjournalsize() {
- declare -i target_devsize=$1
- declare -i journal_size=0
- declare -i max_size base_size
-
- let "base_size = 1024*1024"
- if [ ${target_devsize} -gt ${base_size} ]; then # 1GB
- let "journal_size = target_devsize / 102400"
- let "journal_size *= 4"
- fi
-
- let "max_size = 102400 * L_BLOCK_SIZE"
- let "max_size >>= 20" # 400MB
-
- if [ ${journal_size} -gt ${max_size} ]; then
- let "journal_size = max_size"
- fi
-
- echo ${journal_size}
- return 0
-}
-
-# figure_journal_size target_devname hostname
-# Find a reasonable journal file size given the number of blocks
-# in the filesystem. This algorithm is derived from figure_journal_size()
-# function in util.c of e2fsprogs-1.38.cfs2-1.src.rpm.
-figure_journal_size() {
- local target_devname=$1
- local host_name=$2
- local ret_str
- declare -i block_count
- declare -i journal_blocks
- declare -i journal_size
-
- # Execute remote command to get the block count
- ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \
-debugfs -R 'stats -h' ${target_devname} | grep 'Block count:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: figure_journal_size() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Block count:*}}
- block_count=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: figure_journal_size() error: can't" \
- "get block count of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- if ((block_count < 32768)); then
- let "journal_blocks = 1024"
- elif ((block_count < 256*1024)); then
- let "journal_blocks = 4096"
- elif ((block_count < 512*1024)); then
- let "journal_blocks = 8192"
- elif ((block_count < 1024*1024)); then
- let "journal_blocks = 16384"
- else
- let "journal_blocks = 32768"
- fi
-
- let "journal_size = journal_blocks * L_BLOCK_SIZE / 1048576"
-
- echo ${journal_size}
- return 0
-}
-
-# get_J_opt hostname target_devname target_devsize
-# Get the mkfs -J option of lustre target @target_devname
-# from the node @hostname
-get_J_opt() {
- local host_name=$1
- local target_devname=$2
- local target_devsize=$3
- local journal_size=
- local default_journal_size=
- local journal_opt=
-
- # Get the real journal size of lustre target
- journal_size=$(get_journalsize ${target_devname} ${host_name})
- if [ $? -ne 0 ]; then
- echo "${journal_size}"
- return 1
- fi
-
- # Get the default journal size of lustre target
- default_journal_size=$(get_defaultjournalsize ${target_devsize})
- if [ "${default_journal_size}" = "0" ]; then
- default_journal_size=$(figure_journal_size ${target_devname} \
- ${host_name})
- if [ $? -ne 0 ]; then
- echo "${default_journal_size}"
- return 1
- fi
- fi
-
- if [ "${journal_size}" != "${default_journal_size}" ]; then
- journal_opt="-J size=${journal_size}"
- fi
-
- echo ${journal_opt}
- return 0
-}
-
-# get_ratio target_devname hostname
-# Get the bytes/inode ratio of lustre target @target_devname from @hostname
-get_ratio() {
- local target_devname=$1
- local host_name=$2
- local inode_count=
- local block_count=
- local ratio=
- local ret_str
-
- # Execute remote command to get the inode count
- ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \
-debugfs -R 'stats -h' ${target_devname} | grep 'Inode count:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_ratio() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Inode count:*}}
- inode_count=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${inode_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_ratio() error: can't" \
- "get inode count of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- # Execute remote command to get the block count
- ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \
-debugfs -R 'stats -h' ${target_devname} | grep 'Block count:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_ratio() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Block count:*}}
- block_count=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_ratio() error: can't" \
- "get block count of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- let "ratio = block_count*L_BLOCK_SIZE/inode_count"
-
- echo ${ratio}
- return 0
-}
-
-# get_default_ratio target_devtype target_devsize
-# Calculate the default bytes/inode ratio from target type @target_devtype
-get_default_ratio() {
- local target_devtype=$1
- declare -i target_devsize=$2
- local ratio=
-
- case "${target_devtype}" in
- "mdt" | "mgs|mdt" | "mdt|mgs")
- ratio=4096;;
- "ost")
- [ ${target_devsize} -gt 1000000 ] && ratio=16384;;
- esac
-
- [ -z "${ratio}" ] && ratio=${L_BLOCK_SIZE}
-
- echo ${ratio}
- return 0
-}
-
-# get_i_opt hostname target_devname target_devtype target_devsize
-# Get the mkfs -i option of lustre target @target_devname
-# from the node @hostname
-get_i_opt() {
- local host_name=$1
- local target_devname=$2
- local target_devtype=$3
- local target_devsize=$4
- local ratio=
- local default_ratio=
- local ratio_opt=
-
- # Get the real bytes/inode ratio of lustre target
- ratio=$(get_ratio ${target_devname} ${host_name})
- if [ $? -ne 0 ]; then
- echo "${ratio}"
- return 1
- fi
-
- # Get the default bytes/inode ratio of lustre target
- default_ratio=$(get_default_ratio ${target_devtype} ${target_devsize})
-
- if [ "${ratio}" != "${default_ratio}" ]; then
- ratio_opt="-i ${ratio}"
- fi
-
- echo ${ratio_opt}
- return 0
-}
-
-# get_isize target_devname hostname
-# Get the inode size of lustre target @target_devname from @hostname
-get_isize() {
- local target_devname=$1
- local host_name=$2
- local inode_size=
- local ret_str
-
- # Execute remote command to get the inode size
- ret_str=`${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin \
-debugfs -R 'stats -h' ${target_devname} | grep 'Inode size:'" 2>&1`
- if [ $? -ne 0 -a -n "${ret_str}" ]; then
- echo "`basename $0`: get_isize() error:" \
- "remote command error: ${ret_str}"
- return 1
- fi
-
- ret_str=${ret_str#${ret_str%Inode size:*}}
- inode_size=`echo ${ret_str} | awk '{print $3}'`
- if [ -z "`echo ${inode_size}|awk '/^[[:digit:]]/ {print $0}'`" ]
- then
- echo "`basename $0`: get_isize() error: can't" \
- "get inode size of ${target_devname} in ${host_name}!"
- return 1
- fi
-
- echo ${inode_size}
- return 0
-}
-
-# get_mdt_default_isize host_name target_fsname
-# Calculate the default inode size of lustre mdt target
-get_mdt_default_isize() {
- local host_name=$1
- local target_fsname=$2
- declare -i stripe_count
- local inode_size=
-
- # Get the stripe count
- stripe_count=$(get_stripecount ${host_name} ${target_fsname})
- if [ $? -ne 0 ]; then
- echo "${stripe_count}"
- return 1
- fi
-
- if ((stripe_count > 77)); then
- inode_size=512
- elif ((stripe_count > 34)); then
- inode_size=2048
- elif ((stripe_count > 13)); then
- inode_size=1024
- else
- inode_size=512
- fi
-
- echo ${inode_size}
- return 0
-}
-
-# get_default_isize host_name target_devtype target_fsname
-# Calculate the default inode size of lustre target type @target_devtype
-get_default_isize() {
- local host_name=$1
- local target_devtype=$2
- local target_fsname=$3
- local inode_size=
-
- case "${target_devtype}" in
- "mdt" | "mgs|mdt" | "mdt|mgs")
- inode_size=$(get_mdt_default_isize ${host_name} ${target_fsname})
- if [ $? -ne 0 ]; then
- echo "${inode_size}"
- return 1
- fi
- ;;
- "ost")
- inode_size=256;;
- esac
-
- [ -z "${inode_size}" ] && inode_size=128
-
- echo ${inode_size}
- return 0
-}
-
-# get_I_opt hostname target_devname target_devtype target_fsname
-# Get the mkfs -I option of lustre target @target_devname
-# from the node @hostname
-get_I_opt() {
- local host_name=$1
- local target_devname=$2
- local target_devtype=$3
- local target_fsname=$4
- local isize=
- local default_isize=
- local isize_opt=
-
- # Get the real inode size of lustre target
- isize=$(get_isize ${target_devname} ${host_name})
- if [ $? -ne 0 ]; then
- echo "${isize}"
- return 1
- fi
-
- # Get the default inode size of lustre target
- [ -z "${target_fsname}" ] && target_fsname="lustre"
- default_isize=$(get_default_isize ${host_name} ${target_devtype} \
- ${target_fsname})
- if [ $? -ne 0 ]; then
- echo "${default_isize}"
- return 1
- fi
-
- if [ "${isize}" != "${default_isize}" ]; then
- isize_opt="-I ${isize}"
- fi
-
- echo ${isize_opt}
- return 0
-}
-
-# get_mkfsopts hostname
-# Get the mkfs options of lustre targets from the node @hostname
-get_mkfsopts(){
- declare -i i
- local host_name=$1
- local journal_opt
- local ratio_opt
- local inode_size_opt
-
- # Initialize the arrays
- unset TARGET_MKFSOPTS
-
- # FIXME: Get other mkfs options of ext3/ldiskfs besides -J, -i and -I
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- journal_opt=
- ratio_opt=
- inode_size_opt=
-
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- if [ -z "${TARGET_DEVSIZES[i]}" ]; then
- # Get the device size
- TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \
- ${TARGET_DEVNAMES[i]})
- if [ $? -ne 0 ]; then
- error_output "${TARGET_DEVSIZES[i]}"
- return 1
- fi
- fi
-
- # Get the journal option
- journal_opt=$(get_J_opt ${host_name} ${TARGET_DEVNAMES[i]} \
- ${TARGET_DEVSIZES[i]})
- if [ $? -ne 0 ]; then
- error_output "${journal_opt}"
- return 1
- fi
-
- if [ -n "${journal_opt}" ]; then
- if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
- TARGET_MKFSOPTS[i]="${journal_opt}"
- else
- TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${journal_opt}"
- fi
- fi
-
- # Get the bytes-per-inode ratio option
- ratio_opt=$(get_i_opt ${host_name} ${TARGET_DEVNAMES[i]} \
- ${TARGET_DEVTYPES[i]} ${TARGET_DEVSIZES[i]})
- if [ $? -ne 0 ]; then
- error_output "${ratio_opt}"
- return 1
- fi
-
- if [ -n "${ratio_opt}" ]; then
- if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
- TARGET_MKFSOPTS[i]="${ratio_opt}"
- else
- TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${ratio_opt}"
- fi
- fi
-
- # Get the inode size option
- inode_size_opt=$(get_I_opt ${host_name} ${TARGET_DEVNAMES[i]} \
- ${TARGET_DEVTYPES[i]} ${TARGET_FSNAMES[i]})
- if [ $? -ne 0 ]; then
- error_output "${inode_size_opt}"
- return 1
- fi
-
- if [ -n "${inode_size_opt}" ]; then
- if [ -z "${TARGET_MKFSOPTS[i]}" ]; then
- TARGET_MKFSOPTS[i]="${inode_size_opt}"
- else
- TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${inode_size_opt}"
- fi
- fi
-
- if [ "${TARGET_MKFSOPTS[i]}" != "${TARGET_MKFSOPTS[i]#*,*}" ]; then
- TARGET_MKFSOPTS[i]="\""${TARGET_MKFSOPTS[i]}"\""
- fi
- done
- return 0
-}
-
-# get_target_configs hostname
-# Get the lustre target informations from the node @hostname
-get_target_configs() {
- declare -i i
- local host_name=$1
- local ret_line line
-
- # Initialize the arrays
- unset TARGET_CONFIGS
-
- # Get lustre target server names
- if ! get_svnames ${host_name}; then
- return 1
- fi
-
- # Get lustre target device names, mount points and loop device sizes
- if ! get_devnames ${host_name}; then
- return 1
- fi
-
- # Get lustre target device type, fsname, index, etc.
- if ! get_ldds ${host_name}; then
- return 1
- fi
-
- # Get mkfs options of lustre targets
- if ! get_mkfsopts ${host_name}; then
- return 1
- fi
-
- # Construct lustre target configs
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
- TARGET_CONFIGS[i]=${TARGET_DEVNAMES[i]},${TARGET_MNTPNTS[i]},${TARGET_DEVTYPES[i]},${TARGET_FSNAMES[i]},${TARGET_MGSNIDS[i]},${TARGET_INDEXES[i]},${TARGET_FMTOPTS[i]},${TARGET_MKFSOPTS[i]},${TARGET_MNTOPTS[i]},${TARGET_FAILNIDS[i]}
- done
-
- return 0
-}
-
-# get_configs hostname
-# Get all the informations needed to generate a csv file from
-# the node @hostname
-get_configs() {
- # Check the hostname
- if [ -z "$1" ]; then
- error_output "get_configs():" \
- "Missing hostname!"
- return 1
- fi
-
- # Get network module options
- verbose_output ""
- verbose_output "Collecting network module options from host $1..."
- if ! get_module_opts $1; then
- return 1
- fi
- verbose_output "OK"
-
- # Get lustre target informations
- verbose_output "Collecting Lustre targets informations from host $1..."
- if ! get_target_configs $1; then
- return 1
- fi
- verbose_output "OK"
-
- # Get HA software configurations
- if ! get_ha_configs $1; then
- return 1
- fi
-
- return 0
-}
-
-# Collect linux MD/LVM device informations from the lustre cluster and
-# append them to the csv file
-get_mdlvm_info() {
- declare -i idx
- declare -i i
- local line
-
- # Collect and append linux MD/LVM informations to the csv file
- for ((idx = 0; idx < ${#HOST_NAMES[@]}; idx++)); do
- [ -z "${HOST_NAMES[idx]}" ] && continue
-
- # Collect MD device informations
- ! get_md_configs ${HOST_NAMES[idx]} && return 1
-
- # Append MD device informations to the csv file
- for ((i = 0; i < ${#MD_NAME[@]}; i++)); do
- line=${HOST_NAMES[idx]},${MD_MARKER},${MD_NAME[i]},,,${MD_LEVEL[i]},${MD_DEVS[i]}
- verbose_output "Informations of MD device ${MD_NAME[i]}" \
- "in host ${HOST_NAMES[idx]} are as follows:"
- verbose_output "${line}"
- echo "${line}" >> ${LUSTRE_CSV_FILE}
- done
-
- # Collect PV informations
- ! get_pv_configs ${HOST_NAMES[idx]} && return 1
-
- # Append PV informations to the csv file
- if [ -n "${PV_NAMES}" ]; then
- line=${HOST_NAMES[idx]},${PV_MARKER},${PV_NAMES}
- verbose_output "Informations of PVs" \
- "in host ${HOST_NAMES[idx]} are as follows:"
- verbose_output "${line}"
- echo "${line}" >> ${LUSTRE_CSV_FILE}
- fi
-
- # Collect VG informations
- ! get_vg_configs ${HOST_NAMES[idx]} && return 1
-
- # Append VG informations to the csv file
- for ((i = 0; i < ${#VG_NAME[@]}; i++)); do
- line=${HOST_NAMES[idx]},${VG_MARKER},${VG_NAME[i]},,,${VG_PVNAMES[i]}
- verbose_output "Informations of VG ${VG_NAME[i]}" \
- "in host ${HOST_NAMES[idx]} are as follows:"
- verbose_output "${line}"
- echo "${line}" >> ${LUSTRE_CSV_FILE}
- done
-
- # Collect LV informations
- ! get_lv_configs ${HOST_NAMES[idx]} && return 1
-
- # Append LV informations to the csv file
- for ((i = 0; i < ${#LV_NAME[@]}; i++)); do
- line=${HOST_NAMES[idx]},${LV_MARKER},${LV_NAME[i]},,,${LV_SIZE[i]},${LV_VGNAME[i]}
- verbose_output "Informations of LV /dev/${LV_VGNAME[i]}/${LV_NAME[i]}"\
- "in host ${HOST_NAMES[idx]} are as follows:"
- verbose_output "${line}"
- echo "${line}" >> ${LUSTRE_CSV_FILE}
- done
- done
- return 0
-}
-
-# Generate the csv file from the lustre cluster
-gen_csvfile() {
- declare -i idx
- declare -i i
- local line
-
- # Get lustre cluster node names
- verbose_output "Collecting Lustre cluster node names..."
- if ! get_hostnames; then
- return 1
- fi
- verbose_output "OK"
-
- : > ${LUSTRE_CSV_FILE}
-
- ${GET_MDLVM_INFO} && get_mdlvm_info
-
- # Collect and append lustre target informations to the csv file
- for ((idx = 0; idx < ${#HOST_NAMES[@]}; idx++)); do
- # Collect informations
- if ! get_configs ${HOST_NAMES[idx]}; then
- rm -f ${LUSTRE_CSV_FILE}
- return 1
- fi
-
- # Append informations to the csv file
- for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
- [ -z "${TARGET_DEVNAMES[i]}" ] && continue
-
- if [ -z "${HA_CONFIGS[i]}" ]; then
- line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]}
- else
- line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]},${HA_CONFIGS[i]}
- fi
- verbose_output "Informations of target ${TARGET_DEVNAMES[i]}" \
- "in host ${HOST_NAMES[idx]} are as follows:"
- verbose_output "${line}"
- echo "" >> ${LUSTRE_CSV_FILE}
- echo "${line}" >> ${LUSTRE_CSV_FILE}
- done
- done
-
- return 0
-}
-
-# Main flow
-echo "`basename $0`: ******** Generate csv file -- ${LUSTRE_CSV_FILE} START ********"
-if ! gen_csvfile; then
- exit 1
-fi
-echo "`basename $0`: ******** Generate csv file -- ${LUSTRE_CSV_FILE} OK **********"
-
-exit 0