From a3593994ff503df50ba80781497aa805b543e204 Mon Sep 17 00:00:00 2001 From: brian Date: Sat, 19 Aug 2006 03:08:14 +0000 Subject: [PATCH] (Hopefully) merge lustre/scripts from b1_5 as stuff in here is needed to successfully test a mountconf branch with ltest. --- lustre/scripts/.cvsignore | 8 + lustre/scripts/Makefile.am | 21 +- lustre/scripts/lc_cluman.sh.in | 524 ++++++++ lustre/scripts/lc_common.sh | 393 ++++++ lustre/scripts/lc_hb.sh.in | 644 ++++++++++ lustre/scripts/lc_lvm.sh.in | 561 +++++++++ lustre/scripts/lc_md.sh.in | 479 ++++++++ lustre/scripts/lc_modprobe.sh.in | 66 ++ lustre/scripts/lc_mon.sh | 139 +++ lustre/scripts/lc_net.sh.in | 216 ++++ lustre/scripts/lc_servip.sh | 250 ++++ lustre/scripts/lmc2csv.pl | 214 ++++ lustre/scripts/lustre | 68 +- lustre/scripts/lustre_config.sh.in | 1183 +++++++++++++++++++ lustre/scripts/lustre_createcsv.sh.in | 2100 +++++++++++++++++++++++++++++++++ lustre/scripts/lustre_up14.sh | 66 ++ 16 files changed, 6918 insertions(+), 14 deletions(-) create mode 100644 lustre/scripts/lc_cluman.sh.in create mode 100644 lustre/scripts/lc_common.sh create mode 100644 lustre/scripts/lc_hb.sh.in create mode 100644 lustre/scripts/lc_lvm.sh.in create mode 100644 lustre/scripts/lc_md.sh.in create mode 100644 lustre/scripts/lc_modprobe.sh.in create mode 100755 lustre/scripts/lc_mon.sh create mode 100644 lustre/scripts/lc_net.sh.in create mode 100755 lustre/scripts/lc_servip.sh create mode 100644 lustre/scripts/lmc2csv.pl create mode 100644 lustre/scripts/lustre_config.sh.in create mode 100644 lustre/scripts/lustre_createcsv.sh.in create mode 100755 lustre/scripts/lustre_up14.sh diff --git a/lustre/scripts/.cvsignore b/lustre/scripts/.cvsignore index 9db437f..9dae161 100644 --- a/lustre/scripts/.cvsignore +++ b/lustre/scripts/.cvsignore @@ -8,3 +8,11 @@ Makefile.in .deps TAGS version_tag.pl +lustre_createcsv.sh +lustre_config.sh +lc_net.sh +lc_modprobe.sh +lc_hb.sh +lc_cluman.sh +lc_md.sh +lc_lvm.sh diff --git a/lustre/scripts/Makefile.am b/lustre/scripts/Makefile.am index 1369ea1..287691a 100644 --- a/lustre/scripts/Makefile.am +++ b/lustre/scripts/Makefile.am @@ -3,11 +3,20 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -EXTRA_DIST = license-status maketags.sh lustre lustrefs \ - version_tag.pl.in +# These are scripts that are generated from .in files +genscripts = lustre_config.sh lc_modprobe.sh lc_net.sh lc_hb.sh lc_cluman.sh lustre_createcsv.sh lc_md.sh lc_lvm.sh -initddir = $(sysconfdir)/init.d -if UTILS -initd_SCRIPTS = lustre lustrefs -endif +sbin_SCRIPTS = $(genscripts) lc_servip.sh lustre_up14.sh +EXTRA_DIST = license-status maketags.sh version_tag.pl.in lc_common.sh \ + $(addsuffix .in,$(genscripts)) lc_mon.sh lc_servip.sh \ + lustre_up14.sh + +scriptlibdir = $(libdir)/@PACKAGE@ +scriptlib_DATA = lc_common.sh + +CLEANFILES = $(genscripts) + +$(genscripts): %.sh: %.sh.in + sed -e 's#@scriptlibdir@#$(scriptlibdir)#' < $< > $@ + chmod +x $@ diff --git a/lustre/scripts/lc_cluman.sh.in b/lustre/scripts/lc_cluman.sh.in new file mode 100644 index 0000000..c122c42 --- /dev/null +++ b/lustre/scripts/lc_cluman.sh.in @@ -0,0 +1,524 @@ +#!/bin/bash +# +# lc_cluman.sh - script for generating the Red Hat Cluster Manager +# HA software's configuration files +# +################################################################################ + +# Usage +usage() { + cat >&2 < [-s service addresses] + [-c heartbeat channel] [-o heartbeat options] [-v] + <-d target device> [-d target device...] + + -n hostnames the nodenames of the primary node and its fail- + overs + Multiple nodenames are separated by colon (:) + delimeter. The first one is the nodename of the + primary node, the others are failover nodenames. + -s service addresses the IP addresses to failover + Multiple addresses are separated by colon (:) + delimeter. + -c heartbeat channel the method to send/rcv heartbeats on + The default method is multicast, and multicast_ + ipaddress is "225.0.0.11". + -o heartbeat options a "catchall" for other heartbeat configuration + options + Multiple options are separated by colon (:) + delimeter. + -v verbose mode + -d target device the target device name and mount point + The device name and mount point are separated by + colon (:) delimeter. + +EOF + exit 1 +} + +# Get the library of functions +. @scriptlibdir@/lc_common.sh + +#****************************** Global variables ******************************# +TMP_DIR=${CLUMGR_TMP_DIR} # Temporary directory + +declare -a NODE_NAMES # Node names in the failover group +declare -a SRV_IPADDRS # Service IP addresses + +# Lustre target device names, service names and mount points +declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS +declare -i TARGET_NUM=0 # Number of targets + +# Get and check the positional parameters +VERBOSE_OUTPUT=false +while getopts "n:s:c:o:vd:" OPTION; do + case $OPTION in + n) + HOSTNAME_OPT=$OPTARG + PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'` + if [ -z "${PRIM_NODENAME}" ]; then + echo >&2 $"`basename $0`: Missing primary nodename!" + usage + fi + HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'` + if [ ${HOSTNAME_NUM} -lt 2 ]; then + echo >&2 $"`basename $0`: Missing failover nodenames!" + usage + fi + ;; + s) + SRVADDR_OPT=$OPTARG + ;; + c) + HBCHANNEL_OPT=$OPTARG + HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \ + | sed 's/"$//'` + if [ -n "${HBCHANNEL_OPT}" ] \ + && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*broadcast*}" ] \ + && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*multicast*}" ]; then + echo >&2 $"`basename $0`: Invalid Heartbeat channel" \ + "- ${HBCHANNEL_OPT}!" + usage + fi + ;; + o) + HBOPT_OPT=$OPTARG + HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'` + ;; + v) + VERBOSE_OUTPUT=true + ;; + d) + DEVICE_OPT=$OPTARG + TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'` + TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'` + if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then + echo >&2 $"`basename $0`: Missing target device name!" + usage + fi + if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then + echo >&2 $"`basename $0`: Missing mount point for target"\ + "${TARGET_DEVNAMES[TARGET_NUM]}!" + usage + fi + TARGET_NUM=$(( TARGET_NUM + 1 )) + ;; + + ?) + usage + esac +done + +# Check the required parameters +if [ -z "${HOSTNAME_OPT}" ]; then + echo >&2 $"`basename $0`: Missing -n option!" + usage +fi + +if [ -z "${DEVICE_OPT}" ]; then + echo >&2 $"`basename $0`: Missing -d option!" + usage +fi + +# get_nodenames +# +# Get all the node names in this failover group +get_nodenames() { + declare -i idx + local nodename_str nodename + + nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\ + END {for (i in a) print a[i]}'` + idx=0 + for nodename in ${nodename_str} + do + NODE_NAMES[idx]=${nodename} + idx=$idx+1 + done + + return 0 +} + +# get_check_srvIPaddrs +# +# Get and check all the service IP addresses in this failover group +get_check_srvIPaddrs() { + declare -i idx + declare -i i + local srvIPaddr_str srvIPaddr + + srvIPaddr_str=`echo ${SRVADDR_OPT}|awk '{split($SRVADDR_OPT, a, ":")}\ + END {for (i in a) print a[i]}'` + idx=0 + for srvIPaddr in ${srvIPaddr_str} + do + SRV_IPADDRS[idx]=${srvIPaddr} + idx=$idx+1 + done + + for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do + for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do + # Check service IP address + verbose_output "Verifying service IP ${SRV_IPADDRS[idx]} and" \ + "real IP of host ${NODE_NAMES[i]} are in the" \ + "same subnet..." + if ! ${SCRIPT_VERIFY_SRVIP} ${SRV_IPADDRS[idx]} ${NODE_NAMES[i]} + then + return 1 + fi + verbose_output "OK" + done + done + + return 0 +} + +# cluman_running host_name +# +# Run remote command to check whether clumanager service is running in @host_name +cluman_running() { + local host_name=$1 + local ret_str + + ret_str=`${REMOTE} ${host_name} "service clumanager status" 2>&1` + if [ $? -ne 0 ]; then + if [ "${ret_str}" != "${ret_str#*unrecognized*}" ]; then + echo >&2 "`basename $0`: cluman_running() error:"\ + "remote command to ${host_name} error: ${ret_str}!" + return 2 + else + return 1 + fi + fi + + return 0 +} + +# stop_cluman host_name +# +# Run remote command to stop clumanager service running in @host_name +stop_cluman() { + local host_name=$1 + local ret_str + + ret_str=`${REMOTE} ${host_name} "/sbin/service clumanager stop" 2>&1` + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: stop_cluman() error:"\ + "remote command to ${host_name} error: ${ret_str}!" + return 1 + fi + + echo "`basename $0`: Clumanager service is stopped on node ${host_name}." + return 0 +} + +# check_cluman +# +# Run remote command to check each node's clumanager service +check_cluman() { + declare -i idx + local OK + + # Get and check all the service IP addresses + if [ -n "${SRVADDR_OPT}" ] && ! get_check_srvIPaddrs; then + return 1 + fi + + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + # Check clumanager service status + cluman_running ${NODE_NAMES[idx]} + rc=$? + if [ "$rc" -eq "2" ]; then + return 1 + elif [ "$rc" -eq "1" ]; then + verbose_output "Clumanager service is stopped on"\ + "node ${NODE_NAMES[idx]}." + elif [ "$rc" -eq "0" ]; then + OK= + echo -n "`basename $0`: Clumanager service is running on"\ + "${NODE_NAMES[idx]}, go ahead to stop the service and"\ + "generate new configurations? [y/n]:" + read OK + if [ "${OK}" = "n" ]; then + echo "`basename $0`: New Clumanager configurations"\ + "are not generated." + return 2 + fi + + # Stop clumanager service + stop_cluman ${NODE_NAMES[idx]} + fi + done + + return 0 +} + +# get_srvname hostname target_devname +# +# Get the lustre target server name from the node @hostname +get_srvname() { + local host_name=$1 + local target_devname=$2 + local target_srvname= + local ret_str + + # Execute remote command to get the target server name + ret_str=`${REMOTE} ${host_name} \ + "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1` + if [ $? -ne 0 ]; then + echo "`basename $0`: get_srvname() error:" \ + "from host ${host_name} - ${ret_str}" + return 1 + fi + + if [ "${ret_str}" != "${ret_str#*Target: }" ]; then + ret_str=${ret_str#*Target: } + target_srvname=`echo ${ret_str} | awk '{print $1}'` + fi + + if [ -z "${target_srvname}" ]; then + echo "`basename $0`: get_srvname() error: Cannot get the"\ + "server name of target ${target_devname} in ${host_name}!" + return 1 + fi + + echo ${target_srvname} + return 0 +} + +# get_srvnames +# +# Get server names of all the Lustre targets in this failover group +get_srvnames() { + declare -i i + + # Initialize the TARGET_SRVNAMES array + unset TARGET_SRVNAMES + + # Get Lustre target service names + for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do + TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \ + ${TARGET_DEVNAMES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${TARGET_SRVNAMES[i]}" + return 1 + fi + done + + return 0 +} + +# check_retval retval +# +# Check the return value of redhat-config-cluster-cmd +check_retval() { + if [ $1 -ne 0 ]; then + echo >&2 "`basename $0`: Failed to run ${CONFIG_CMD}!" + return 1 + fi + + return 0 +} + +# add_services +# +# Add service tags into the cluster.xml file +add_services() { + declare -i idx + declare -i i + + # Add service tag + for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do + ${CONFIG_CMD} --add_service --name=${TARGET_SRVNAMES[i]} + if ! check_retval $?; then + return 1 + fi + + for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do + ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \ + --add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]} + if ! check_retval $?; then + return 1 + fi + done + + ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \ + --add_device \ + --name=${TARGET_DEVNAMES[i]} + if ! check_retval $?; then + return 1 + fi + + ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \ + --device=${TARGET_DEVNAMES[i]} \ + --mount \ + --mountpoint=${TARGET_MNTPNTS[i]} \ + --fstype=lustre + if ! check_retval $?; then + return 1 + fi + done + + return 0 +} + +# gen_cluster_xml +# +# Run redhat-config-cluster-cmd to create the cluster.xml file +gen_cluster_xml() { + declare -i idx + declare -i i + local mcast_IPaddr + local node_names + local hbopt + + [ -e "${CLUMAN_DIR}/cluster.xml" ] && \ + /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old + + # Run redhat-config-cluster-cmd to generate cluster.xml + # Add clumembd tag + if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*broadcast*}" ]; then + ${CONFIG_CMD} --clumembd --broadcast=yes + ${CONFIG_CMD} --clumembd --multicast=no + if ! check_retval $?; then + return 1 + fi + elif [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*multicast*}" ]; then + mcast_IPaddr=`echo ${HBCHANNEL_OPT} | awk '{print $2}'` + if [ -n "${mcast_IPaddr}" ]; then + ${CONFIG_CMD} --clumembd --multicast=yes\ + --multicast_ipaddress=${mcast_IPaddr} + if ! check_retval $?; then + return 1 + fi + fi + fi + + # Add cluster tag + node_names= + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + node_names=${node_names}"${NODE_NAMES[idx]} " + done + + ${CONFIG_CMD} --cluster --name="${node_names}failover group" + if ! check_retval $?; then + return 1 + fi + + # Add member tag + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + ${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]} + if ! check_retval $?; then + return 1 + fi + done + + # Add service tag + if ! add_services; then + return 1 + fi + + # Add other tags + if [ -n "${HBOPT_OPT}" ]; then + while read -r hbopt + do + ${CONFIG_CMD} ${hbopt} + if ! check_retval $?; then + return 1 + fi + done < <(echo ${HBOPT_OPT}|awk '{split($HBOPT_OPT, a, ":")}\ + END {for (i in a) print a[i]}') + fi + + return 0 +} + +# create_config +# +# Create the cluster.xml file and scp it to the each node's /etc/ +create_config() { + declare -i idx + + /bin/mkdir -p ${TMP_DIR} + CONFIG_PRIMNODE=${TMP_DIR}$"/cluster.xml."${PRIM_NODENAME} + CONFIG_LUSTRE=${TMP_DIR}$"/cluster.xml"${FILE_SUFFIX} + + # Get server names of Lustre targets + if ! get_srvnames; then + return 1 + fi + + if [ -s ${CONFIG_PRIMNODE} ]; then + if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${CONFIG_PRIMNODE}`" ] + then + verbose_output "${CONFIG_PRIMNODE} already exists." + return 0 + else + [ -e "${CLUMAN_DIR}/cluster.xml" ] && \ + /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old + + /bin/cp -f ${CONFIG_PRIMNODE} ${CLUMAN_DIR}/cluster.xml + + # Add services into the cluster.xml file + if ! add_services; then + return 1 + fi + fi + else + # Run redhat-config-cluster-cmd to generate cluster.xml + verbose_output "Creating cluster.xml file for" \ + "${PRIM_NODENAME} failover group hosts..." + if ! gen_cluster_xml; then + return 1 + fi + verbose_output "OK" + fi + + /bin/mv ${CLUMAN_DIR}/cluster.xml ${CONFIG_LUSTRE} + [ -e "${CLUMAN_DIR}/cluster.xml.old" ] && \ + /bin/mv ${CLUMAN_DIR}/cluster.xml.old ${CLUMAN_DIR}/cluster.xml + + # scp the cluster.xml file to all the nodes + verbose_output "Remote copying cluster.xml${FILE_SUFFIX} file to" \ + "${PRIM_NODENAME} failover group hosts..." + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + /bin/cp -f ${CONFIG_LUSTRE} ${TMP_DIR}$"/cluster.xml."${NODE_NAMES[idx]} + + scp ${CONFIG_LUSTRE} ${NODE_NAMES[idx]}:${CLUMAN_DIR}/ + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: Failed to scp cluster.xml file"\ + "to node ${NODE_NAMES[idx]}!" + return 1 + fi + done + verbose_output "OK" + + return 0 +} + +# Main flow +# Get all the node names +if ! get_nodenames; then + exit 1 +fi + +# Check clumanager services +verbose_output "Checking clumanager service in the ${PRIM_NODENAME}"\ + "failover group hosts..." +check_cluman +rc=$? +if [ "$rc" -eq "2" ]; then + verbose_output "OK" + exit 0 +elif [ "$rc" -eq "1" ]; then + exit 1 +fi +verbose_output "OK" + +# Generate configuration files +if ! create_config; then + exit 1 +fi + +exit 0 diff --git a/lustre/scripts/lc_common.sh b/lustre/scripts/lc_common.sh new file mode 100644 index 0000000..ef62b9a --- /dev/null +++ b/lustre/scripts/lc_common.sh @@ -0,0 +1,393 @@ +# +# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: +# +# lc_common.sh - This file contains functions to be used by most or all +# Lustre cluster config scripts. +# +################################################################################ + +# Remote command +REMOTE=${REMOTE:-"ssh -x -q"} +#REMOTE=${REMOTE:-"pdsh -S -R ssh -w"} +export REMOTE + +# Lustre utilities +CMD_PATH=${CMD_PATH:-"/usr/sbin"} +MKFS=${MKFS:-"$CMD_PATH/mkfs.lustre"} +TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"} +LCTL=${LCTL:-"$CMD_PATH/lctl"} + +EXPORT_PATH=${EXPORT_PATH:-"PATH=\$PATH:/sbin:/usr/sbin;"} + +# Raid command path +RAID_CMD_PATH=${RAID_CMD_PATH:-"/sbin"} +MDADM=${MDADM:-"$RAID_CMD_PATH/mdadm"} + +# Some scripts to be called +SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"$(cd `dirname $0`; echo $PWD)"} +MODULE_CONFIG=${SCRIPTS_PATH}/lc_modprobe.sh +VERIFY_CLUSTER_NET=${SCRIPTS_PATH}/lc_net.sh +GEN_HB_CONFIG=${SCRIPTS_PATH}/lc_hb.sh +GEN_CLUMGR_CONFIG=${SCRIPTS_PATH}/lc_cluman.sh +SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}/lc_servip.sh +SCRIPT_GEN_MONCF=${SCRIPTS_PATH}/lc_mon.sh +SCRIPT_CONFIG_MD=${SCRIPTS_PATH}/lc_md.sh +SCRIPT_CONFIG_LVM=${SCRIPTS_PATH}/lc_lvm.sh + +# Variables of HA software +HBVER_HBV1="hbv1" # Heartbeat version 1 +HBVER_HBV2="hbv2" # Heartbeat version 2 +HATYPE_CLUMGR="cluman" # Cluster Manager + +# Configuration directories and files +HA_DIR=${HA_DIR:-"/etc/ha.d"} # Heartbeat configuration directory +MON_DIR=${MON_DIR:-"/etc/mon"} # mon configuration directory +CIB_DIR=${CIB_DIR:-"/var/lib/heartbeat/crm"} # cib.xml directory + +HA_CF=${HA_DIR}/ha.cf # ha.cf file +HA_RES=${HA_DIR}/haresources # haresources file +HA_CIB=${CIB_DIR}/cib.xml + +CLUMAN_DIR="/etc" # CluManager configuration directory +CLUMAN_CONFIG=${CLUMAN_DIR}/cluster.xml + +CLUMAN_TOOLS_PATH=${CLUMAN_TOOLS_PATH:-"/usr/sbin"} # CluManager tools +CONFIG_CMD=${CONFIG_CMD:-"${CLUMAN_TOOLS_PATH}/redhat-config-cluster-cmd"} + +HB_TMP_DIR="/tmp/heartbeat" # Temporary directory +CLUMGR_TMP_DIR="/tmp/clumanager" +TMP_DIRS="${HB_TMP_DIR} ${CLUMGR_TMP_DIR}" + +FS_TYPE=${FS_TYPE:-"lustre"} # Lustre filesystem type +FILE_SUFFIX=${FILE_SUFFIX:-".lustre"} # Suffix of the generated config files + +# Marker of the MD device line +MD_MARKER=${MD_MARKER:-"MD"} + +# Marker of the LVM device line +PV_MARKER=${PV_MARKER:-"PV"} +VG_MARKER=${VG_MARKER:-"VG"} +LV_MARKER=${LV_MARKER:-"LV"} + +declare -a CONFIG_ITEM # Items in each line of the csv file +declare -a NODE_NAME # Hostnames of nodes have been configured + + +# verbose_output string +# Output verbose information $string +verbose_output() { + if ${VERBOSE_OUTPUT}; then + echo "`basename $0`: $*" + fi + return 0 +} + +# Check whether the reomte command is pdsh +is_pdsh() { + if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then + return 1 + fi + + return 0 +} + +# check_file csv_file +# Check the file $csv_file +check_file() { + # Check argument + if [ $# -eq 0 ]; then + echo >&2 "`basename $0`: check_file() error: Missing csv file!" + return 1 + fi + + CSV_FILE=$1 + if [ ! -s ${CSV_FILE} ]; then + echo >&2 "`basename $0`: check_file() error: ${CSV_FILE}"\ + "does not exist or is empty!" + return 1 + fi + + return 0 +} + +# parse_line line +# Parse a line in the csv file +parse_line() { + # Check argument + if [ $# -eq 0 ]; then + echo >&2 "`basename $0`: parse_line() error: Missing argument!" + return 1 + fi + + declare -i i=0 # Index of the CONFIG_ITEM array + declare -i length=0 + declare -i idx=0 + declare -i s_quote_flag=0 # Flag of the single quote character + declare -i d_quote_flag=0 # Flag of the double quotes character + local TMP_LETTER LINE + + LINE="$*" + + # Initialize the CONFIG_ITEM array + unset CONFIG_ITEM + + # Get the length of the line + length=${#LINE} + + i=0 + while [ ${idx} -lt ${length} ]; do + # Get a letter from the line + TMP_LETTER=${LINE:${idx}:1} + + case "${TMP_LETTER}" in + ",") + if [ ${s_quote_flag} -eq 1 -o ${d_quote_flag} -eq 1 ] + then + CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER} + else + i=$i+1 + fi + idx=${idx}+1 + continue + ;; + "'") + if [ ${s_quote_flag} -eq 0 ]; then + s_quote_flag=1 + else + s_quote_flag=0 + fi + ;; + "\"") + if [ ${d_quote_flag} -eq 0 ]; then + d_quote_flag=1 + else + d_quote_flag=0 + fi + ;; + " ") + idx=${idx}+1 + continue + ;; + *) + ;; + esac + CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER} + idx=${idx}+1 + done + + # Extract the real value of each field + # Remove surrounded double-quotes, etc. + for ((idx = 0; idx <= $i; idx++)); do + # Strip the leading and trailing space-characters + CONFIG_ITEM[idx]=`expr "${CONFIG_ITEM[idx]}" : '[[:space:]]*\(.*\)[[:space:]]*$'` + + [ -z "${CONFIG_ITEM[idx]}" ] && continue + + # Remove the surrounded double-quotes + while [ -z "`echo "${CONFIG_ITEM[idx]}"|sed -e 's/^".*"$//'`" ]; do + CONFIG_ITEM[idx]=`echo "${CONFIG_ITEM[idx]}" | sed -e 's/^"//' -e 's/"$//'` + done + + CONFIG_ITEM[idx]=`echo "${CONFIG_ITEM[idx]}" | sed -e 's/""/"/g'` + done + + return 0 +} + +# fcanon name +# If $name is a symbolic link, then display it's value +fcanon() { + local NAME=$1 + + if [ -h "$NAME" ]; then + readlink -f "$NAME" + else + echo "$NAME" + fi +} + +# configured_host host_name +# +# Check whether the devices in $host_name has been configured or not +configured_host() { + local host_name=$1 + declare -i i + + for ((i = 0; i < ${#NODE_NAME[@]}; i++)); do + [ "${host_name}" = "${NODE_NAME[i]}" ] && return 0 + done + + return 1 +} + +# remote_error fn_name host_addr ret_str +# Verify the return result from remote command +remote_error() { + local fn_name host_addr ret_str + + fn_name=$1 + shift + host_addr=$1 + shift + ret_str=$* + + if [ "${ret_str}" != "${ret_str#*connect:*}" ]; then + echo >&2 "`basename $0`: ${fn_name}() error: ${ret_str}" + return 0 + fi + + if [ -z "${ret_str}" ]; then + echo >&2 "`basename $0`: ${fn_name}() error:" \ + "No results from remote!" \ + "Check network connectivity between the local host and ${host_addr}!" + return 0 + fi + + return 1 +} + +# nid2hostname nid +# Convert $nid to hostname of the lustre cluster node +nid2hostname() { + local nid=$1 + local host_name= + local addr nettype ip_addr + local ret_str + + addr=${nid%@*} + [ "${nid}" != "${nid#*@*}" ] && nettype=${nid#*@} || nettype=tcp + if [ -z "${addr}" ]; then + echo "`basename $0`: nid2hostname() error: Invalid nid - \"${nid}\"!" + return 1 + fi + + case "${nettype}" in + lo*) host_name=`hostname`;; + elan*) # QsNet + # FIXME: Parse the /etc/elanhosts configuration file to + # convert ElanID to hostname + ;; + gm*) # Myrinet + # FIXME: Use /usr/sbin/gmlndnid to find the hostname of + # the specified GM Global node ID + ;; + ptl*) # Portals + # FIXME: Convert portal ID to hostname + ;; + *) # tcp, o2ib, cib, openib, iib, vib, ra + ip_addr=${addr} + # Is it IP address or hostname? + if [ -n "`echo ${ip_addr} | sed -e 's/\([0-9]\{1,3\}\.\)\{3,3\}[0-9]\{1,3\}//'`" ] + then + host_name=${ip_addr} + echo ${host_name} + return 0 + fi + + # Execute remote command to get the host name + ret_str=`${REMOTE} ${ip_addr} "hostname" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: nid2hostname() error:" \ + "remote command to ${ip_addr} error: ${ret_str}" + return 1 + fi + remote_error "nid2hostname" ${ip_addr} "${ret_str}" && return 1 + + if is_pdsh; then + host_name=`echo ${ret_str} | awk '{print $2}'` + else + host_name=`echo ${ret_str} | awk '{print $1}'` + fi + ;; + esac + + echo ${host_name} + return 0 +} + +# nids2hostname nids +# Get the hostname of the lustre cluster node which has the nids - $nids +nids2hostname() { + local nids=$1 + local host_name= + local nid + local nettype + + for nid in ${nids//,/ }; do + [ "${nid}" != "${nid#*@*}" ] && nettype=${nid#*@} || nettype=tcp + + case "${nettype}" in + lo* | elan* | gm* | ptl*) ;; + *) # tcp, o2ib, cib, openib, iib, vib, ra + host_name=$(nid2hostname ${nid}) + if [ $? -ne 0 ]; then + echo "${host_name}" + return 1 + fi + ;; + esac + done + + if [ -z "${host_name}" ]; then + echo "`basename $0`: nids2hostname() error:" \ + "Can not get the hostname from nids - \"${nids}\"!" + return 1 + fi + + echo ${host_name} + return 0 +} + +# ip2hostname_single_node nids +# Convert IP addresses in $nids into hostnames +# NID in $nids are delimited by commas, ie all the $nids belong to one node +ip2hostname_single_node() { + local orig_nids=$1 + local nids= + local nid host_name + local nettype + + for nid in ${orig_nids//,/ }; do + [ "${nid}" != "${nid#*@*}" ] && nettype=${nid#*@} || nettype=tcp + + case "${nettype}" in + lo* | elan* | gm* | ptl*) ;; + *) # tcp, o2ib, cib, openib, iib, vib, ra + host_name=$(nid2hostname ${nid}) + if [ $? -ne 0 ]; then + echo "${host_name}" + return 1 + fi + + nid=${host_name}@${nettype} + ;; + esac + + [ -z "${nids}" ] && nids=${nid} || nids=${nids},${nid} + done + + echo ${nids} + return 0 +} + +# ip2hostname_multi_node nids +# Convert IP addresses in $nids into hostnames +# NIDs belong to multiple nodes are delimited by colons in $nids +ip2hostname_multi_node() { + local orig_nids=$1 + local nids= + local nid + + for nid in ${orig_nids//:/ }; do + nid=$(ip2hostname_single_node ${nid}) + if [ $? -ne 0 ]; then + echo "${nid}" + return 1 + fi + + [ -z "${nids}" ] && nids=${nid} || nids=${nids}:${nid} + done + + echo ${nids} + return 0 +} diff --git a/lustre/scripts/lc_hb.sh.in b/lustre/scripts/lc_hb.sh.in new file mode 100644 index 0000000..f5afb4e --- /dev/null +++ b/lustre/scripts/lc_hb.sh.in @@ -0,0 +1,644 @@ +#!/bin/bash +# +# lc_hb.sh - script for generating the Heartbeat HA software's +# configuration files +# +############################################################################### + +# Usage +usage() { + cat >&2 < <-n hostnames> [-v] + <-d target device> [-d target device...] + + -r HBver the version of Heartbeat software + The Heartbeat software versions which are curr- + ently supported are: hbv1 (Heartbeat version 1) + and hbv2 (Heartbeat version 2). + -n hostnames the nodenames of the primary node and its fail- + overs + Multiple nodenames are separated by colon (:) + delimeter. The first one is the nodename of the + primary node, the others are failover nodenames. + -v verbose mode + -d target device the target device name and mount point + The device name and mount point are separated by + colon (:) delimeter. + +EOF + exit 1 +} + +# Get the library of functions +. @scriptlibdir@/lc_common.sh + +#****************************** Global variables ******************************# +# Heartbeat tools +HB_TOOLS_PATH=${HB_TOOLS_PATH:-"/usr/lib64/heartbeat"} # Heartbeat tools path +CIB_GEN_SCRIPT=${HB_TOOLS_PATH}/haresources2cib.py +CL_STATUS=${CL_STATUS:-"/usr/bin/cl_status"} + +# Service directories and names +HARES_DIR=${HARES_DIR:-"${HA_DIR}/resource.d"} # Heartbeat resources +LUSTRE_SRV=${LUSTRE_SRV:-"Filesystem"} # Service script provided by Heartbeat + +TMP_DIR=${HB_TMP_DIR} # Temporary directory +HACF_TEMP=${TMP_DIR}/ha.cf.temp +AUTHKEYS_TEMP=${TMP_DIR}/authkeys${FILE_SUFFIX} + +declare -a NODE_NAMES # Node names in the failover group + +# Lustre target device names, service names and mount points +declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS +declare -i TARGET_NUM=0 # Number of targets + + +# Get and check the positional parameters +VERBOSE_OUTPUT=false +while getopts "r:n:vd:" OPTION; do + case $OPTION in + r) + HBVER_OPT=$OPTARG + if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \ + && [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then + echo >&2 $"`basename $0`: Invalid Heartbeat software" \ + "version - ${HBVER_OPT}!" + usage + fi + ;; + n) + HOSTNAME_OPT=$OPTARG + PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'` + if [ -z "${PRIM_NODENAME}" ]; then + echo >&2 $"`basename $0`: Missing primary nodename!" + usage + fi + HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'` + if [ ${HOSTNAME_NUM} -lt 2 ]; then + echo >&2 $"`basename $0`: Missing failover nodenames!" + usage + fi + if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ] + then + echo >&2 $"`basename $0`: Heartbeat version 1 can" \ + "only support 2 nodes!" + usage + fi + ;; + v) + VERBOSE_OUTPUT=true + ;; + d) + DEVICE_OPT=$OPTARG + TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'` + TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'` + if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then + echo >&2 $"`basename $0`: Missing target device name!" + usage + fi + if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then + echo >&2 $"`basename $0`: Missing mount point for target"\ + "${TARGET_DEVNAMES[TARGET_NUM]}!" + usage + fi + TARGET_NUM=$(( TARGET_NUM + 1 )) + ;; + ?) + usage + esac +done + +# Check the required parameters +if [ -z "${HBVER_OPT}" ]; then + echo >&2 $"`basename $0`: Missing -r option!" + usage +fi + +if [ -z "${HOSTNAME_OPT}" ]; then + echo >&2 $"`basename $0`: Missing -n option!" + usage +fi + +if [ -z "${DEVICE_OPT}" ]; then + echo >&2 $"`basename $0`: Missing -d option!" + usage +fi + +# get_nodenames +# +# Get all the node names in this failover group +get_nodenames() { + declare -i idx + local nodename_str nodename + + nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\ + END {for (i in a) print a[i]}'` + idx=0 + for nodename in ${nodename_str} + do + NODE_NAMES[idx]=${nodename} + idx=$idx+1 + done + + return 0 +} + +# check_remote_file host_name file +# +# Run remote command to check whether @file exists in @host_name +check_remote_file() { + local host_name=$1 + local file_name=$2 + + if [ -z "${host_name}" ]; then + echo >&2 "`basename $0`: check_remote_file() error:"\ + "Missing hostname!" + return 1 + fi + + if [ -z "${file_name}" ]; then + echo >&2 "`basename $0`: check_remote_file() error:"\ + "Missing file name!" + return 1 + fi + + # Execute remote command to check the file + ${REMOTE} ${host_name} "[ -e ${file_name} ]" + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: check_remote_file() error:"\ + "${file_name} does not exist in host ${host_name}!" + return 1 + fi + + return 0 +} + +# hb_running host_name +# +# Run remote command to check whether heartbeat service is running in @host_name +hb_running() { + local host_name=$1 + local ret_str + + ret_str=`${REMOTE} ${host_name} "${CL_STATUS} hbstatus" 2>&1` + if [ $? -ne 0 ]; then + if [ "${ret_str}" = "${ret_str#*stop*}" ]; then + echo >&2 "`basename $0`: hb_running() error:"\ + "remote command to ${host_name} error: ${ret_str}!" + return 2 + else + return 1 + fi + fi + + return 0 +} + +# stop_heartbeat host_name +# +# Run remote command to stop heartbeat service running in @host_name +stop_heartbeat() { + local host_name=$1 + local ret_str + + ret_str=`${REMOTE} ${host_name} "/sbin/service heartbeat stop" 2>&1` + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: stop_heartbeat() error:"\ + "remote command to ${host_name} error: ${ret_str}!" + return 1 + fi + + echo "`basename $0`: Heartbeat service is stopped on node ${host_name}." + return 0 +} + +# check_heartbeat +# +# Run remote command to check each node's heartbeat service +check_heartbeat() { + declare -i idx + local OK + + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + # Check Heartbeat configuration directory + if ! check_remote_file ${NODE_NAMES[idx]} ${HA_DIR}; then + echo >&2 "`basename $0`: check_heartbeat() error:"\ + "Is Heartbeat package installed?" + return 1 + fi + + if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then + # Check mon configuration directory + if ! check_remote_file ${NODE_NAMES[idx]} ${MON_DIR}; then + echo >&2 "`basename $0`: check_heartbeat()"\ + "error: Is mon package installed?" + return 1 + fi + fi + + if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then + # Check crm directory + if ! check_remote_file ${NODE_NAMES[idx]} ${CIB_DIR}; then + echo >&2 "`basename $0`: check_heartbeat()"\ + "error: Is Heartbeat v2 package installed?" + return 1 + fi + fi + + # Check heartbeat service status + hb_running ${NODE_NAMES[idx]} + rc=$? + if [ "$rc" -eq "2" ]; then + return 1 + elif [ "$rc" -eq "1" ]; then + verbose_output "Heartbeat service is stopped on"\ + "node ${NODE_NAMES[idx]}." + elif [ "$rc" -eq "0" ]; then + OK= + echo -n "`basename $0`: Heartbeat service is running on"\ + "${NODE_NAMES[idx]}, go ahead to stop the service and"\ + "generate new configurations? [y/n]:" + read OK + if [ "${OK}" = "n" ]; then + echo "`basename $0`: New Heartbeat configurations"\ + "are not generated." + return 2 + fi + + # Stop heartbeat service + stop_heartbeat ${NODE_NAMES[idx]} + fi + done + + return 0 +} + +# get_srvname hostname target_devname +# +# Get the lustre target server name from the node @hostname +get_srvname() { + local host_name=$1 + local target_devname=$2 + local target_srvname= + local ret_str + + # Execute remote command to get the target server name + ret_str=`${REMOTE} ${host_name} \ + "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1` + if [ $? -ne 0 ]; then + echo "`basename $0`: get_srvname() error:" \ + "from host ${host_name} - ${ret_str}" + return 1 + fi + + if [ "${ret_str}" != "${ret_str#*Target: }" ]; then + ret_str=${ret_str#*Target: } + target_srvname=`echo ${ret_str} | awk '{print $1}'` + fi + + if [ -z "${target_srvname}" ]; then + echo "`basename $0`: get_srvname() error: Cannot get the"\ + "server name of target ${target_devname} in ${host_name}!" + return 1 + fi + + echo ${target_srvname} + return 0 +} + +# get_srvnames +# +# Get server names of all the Lustre targets in this failover group +get_srvnames() { + declare -i i + + # Initialize the TARGET_SRVNAMES array + unset TARGET_SRVNAMES + + # Get Lustre target service names + for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do + TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \ + ${TARGET_DEVNAMES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${TARGET_SRVNAMES[i]}" + return 1 + fi + done + + return 0 +} + +# create_template +# +# Create the templates for ha.cf and authkeys files +create_template() { + /bin/mkdir -p ${TMP_DIR} + + # Create the template for ha.cf + if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then + cat >${HACF_TEMP} <${HACF_TEMP} <${AUTHKEYS_TEMP} <> ${HACF_LUSTRE} + done + + # scp ha.cf file to all the nodes + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + touch ${TMP_DIR}$"/ha.cf."${NODE_NAMES[idx]} + scp ${HACF_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/ + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: Failed to scp ha.cf file"\ + "to node ${NODE_NAMES[idx]}!" + return 1 + fi + done + + return 0 +} + +# create_haresources +# +# Create the haresources file and scp it to the each node's /etc/ha.d/ +create_haresources() { + HARES_PRIMNODE=${TMP_DIR}$"/haresources."${PRIM_NODENAME} + HARES_LUSTRE=${TMP_DIR}$"/haresources"${FILE_SUFFIX} + declare -i idx + local res_line + + if [ -s ${HARES_PRIMNODE} ]; then + # The haresources file for the primary node has already existed + if [ -n "`/bin/grep ${TARGET_DEVNAMES[0]} ${HARES_PRIMNODE}`" ]; then + verbose_output "${HARES_PRIMNODE} already exists." + return 0 + fi + fi + + # Add the resource group line into the haresources file + res_line=${PRIM_NODENAME} + for ((idx = 0; idx < ${#TARGET_DEVNAMES[@]}; idx++)); do + res_line=${res_line}" "${LUSTRE_SRV}::${TARGET_DEVNAMES[idx]}::${TARGET_MNTPNTS[idx]}::${FS_TYPE} + + if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then + res_line=${res_line}" "${TARGET_SRVNAMES[idx]}"-mon" + fi + done + echo "${res_line}" >> ${HARES_LUSTRE} + + # Generate the cib.xml file + if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then + # Add group haclient and user hacluster + [ -z "`grep haclient /etc/group`" ] && groupadd haclient + [ -z "`grep hacluster /etc/passwd`" ] && useradd -g haclient hacluster + + CIB_LUSTRE=${TMP_DIR}$"/cib.xml"${FILE_SUFFIX} + python ${CIB_GEN_SCRIPT} --stdout \ + ${HARES_LUSTRE} > ${CIB_LUSTRE} + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: Failed to generate cib.xml file"\ + "for node ${PRIM_NODENAME}!" + return 1 + fi + fi + + # scp the haresources file or cib.xml file + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + /bin/cp -f ${HARES_LUSTRE} ${TMP_DIR}$"/haresources."${NODE_NAMES[idx]} + scp ${HARES_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/ + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: Failed to scp haresources file"\ + "to node ${NODE_NAMES[idx]}!" + return 1 + fi + + if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then + scp ${CIB_LUSTRE} ${NODE_NAMES[idx]}:${CIB_DIR}/ + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: Failed to scp cib.xml"\ + "file to node ${NODE_NAMES[idx]}!" + return 1 + fi + fi + done + + return 0 +} + +# create_authkeys +# +# Create the authkeys file and scp it to the each node's /etc/ha.d/ +create_authkeys() { + AUTHKEYS_PRIMNODE=${TMP_DIR}$"/authkeys."${PRIM_NODENAME} + declare -i idx + + if [ -e ${AUTHKEYS_PRIMNODE} ]; then + verbose_output "${AUTHKEYS_PRIMNODE} already exists." + return 0 + fi + + # scp the authkeys file to all the nodes + chmod 600 ${AUTHKEYS_TEMP} + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + touch ${TMP_DIR}$"/authkeys."${NODE_NAMES[idx]} + scp -p ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}/ + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: Failed to scp authkeys file"\ + "to node ${NODE_NAMES[idx]}!" + return 1 + fi + done + + return 0 +} + +# create_moncf +# +# Create the mon.cf file and scp it to the each node's /etc/mon/ +create_moncf() { + MONCF_PRIMNODE=${TMP_DIR}$"/mon.cf."${PRIM_NODENAME} + MONCF_LUSTRE=${TMP_DIR}$"/mon.cf"${FILE_SUFFIX} + local srv_name params= + declare -i idx + declare -a OLD_TARGET_SRVNAMES # targets in other nodes + # in this failover group + # Initialize the OLD_TARGET_SRVNAMES array + unset OLD_TARGET_SRVNAMES + + if [ -s ${MONCF_PRIMNODE} ]; then + if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${MONCF_PRIMNODE}`" ] + then + verbose_output "${MONCF_PRIMNODE} already exists." + return 0 + else + # Get the Lustre target service names + # from the previous mon.cf file + idx=0 + for srv_name in `grep hostgroup ${MONCF_PRIMNODE}\ + |awk '$2 ~ /-mon/ {print $2}'|xargs` + do + OLD_TARGET_SRVNAMES[idx]=`echo ${srv_name}\ + |sed 's/-mon//g'` + idx=$(( idx + 1 )) + done + fi + fi + + # Construct the parameters to mon.cf generation script + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + params=${params}" -n "${NODE_NAMES[idx]} + done + + for ((idx = 0; idx < ${#OLD_TARGET_SRVNAMES[@]}; idx++)); do + params=${params}" -o "${OLD_TARGET_SRVNAMES[idx]} + done + + for ((idx = 0; idx < ${#TARGET_SRVNAMES[@]}; idx++)); do + params=${params}" -o "${TARGET_SRVNAMES[idx]} + done + + ${SCRIPT_GEN_MONCF} ${params} + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: Failed to generate mon.cf file"\ + "by using ${SCRIPT_GEN_MONCF}!" + return 1 + fi + + /bin/mv *-mon.cfg ${MONCF_LUSTRE} + + # scp the mon.cf file to all the nodes + for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do + /bin/cp -f ${MONCF_LUSTRE} ${TMP_DIR}$"/mon.cf."${NODE_NAMES[idx]} + + scp ${MONCF_LUSTRE} ${NODE_NAMES[idx]}:${MON_DIR}/ + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: Failed to scp mon.cf file"\ + "to node ${NODE_NAMES[idx]}!" + return 1 + fi + done + + return 0 +} + +# generate_config +# +# Generate the configuration files for Heartbeat and scp them to all the nodes +generate_config() { + if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then + # Get server names of Lustre targets + if ! get_srvnames; then + return 1 + fi + fi + + if ! create_template; then + return 1 + fi + + verbose_output "Creating and remote copying ha.cf${FILE_SUFFIX} file to"\ + "${PRIM_NODENAME} failover group hosts..." + if ! create_hacf; then + return 1 + fi + verbose_output "OK" + + verbose_output "Creating and remote copying haresources${FILE_SUFFIX} file"\ + "to ${PRIM_NODENAME} failover group hosts..." + if ! create_haresources; then + return 1 + fi + verbose_output "OK" + + verbose_output "Creating and remote copying authkeys${FILE_SUFFIX} file to" \ + "${PRIM_NODENAME} failover group hosts..." + if ! create_authkeys; then + return 1 + fi + verbose_output "OK" + + if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then + verbose_output "Creating and remote copying mon.cf${FILE_SUFFIX} file to" \ + "${PRIM_NODENAME} failover group hosts..." + if ! create_moncf; then + return 1 + fi + verbose_output "OK" + fi + + return 0 +} + +# Main flow +# Get all the node names +if ! get_nodenames; then + exit 1 +fi + +# Check heartbeat services +verbose_output "Checking heartbeat service in the ${PRIM_NODENAME}"\ + "failover group hosts..." +check_heartbeat +rc=$? +if [ "$rc" -eq "2" ]; then + verbose_output "OK" + exit 0 +elif [ "$rc" -eq "1" ]; then + exit 1 +fi +verbose_output "OK" + +# Generate configuration files +if ! generate_config; then + exit 1 +fi + +exit 0 diff --git a/lustre/scripts/lc_lvm.sh.in b/lustre/scripts/lc_lvm.sh.in new file mode 100644 index 0000000..64018d2 --- /dev/null +++ b/lustre/scripts/lc_lvm.sh.in @@ -0,0 +1,561 @@ +#!/bin/bash +# +# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: +# +# lc_lvm.sh - configure Linux LVM devices from a csv file +# +################################################################################ + +# Usage +usage() { + cat >&2 < + + This script is used to configure Linux LVM devices in a Lustre cluster + from a csv file. + + -h help and examples + -v verbose mode + csv file a spreadsheet that contains configuration parameters + (separated by commas) for each Linux LVM component + (PV, VG, LV) to be configured in a Lustre cluster + +EOF + exit 1 +} + +# Samples +sample() { + cat <&2 "`basename $0`: Missing csv file!" + usage +fi + +# check_lvm_item index +# +# Check the items required for managing LVM device ${LVM_NAME[index]} +check_lvm_item() { + # Check argument + if [ $# -eq 0 ]; then + echo >&2 "`basename $0`: check_lvm_item() error:"\ + "Missing argument!" + return 1 + fi + + declare -i i=$1 + + # Check hostname + if [ -z "${HOST_NAME[i]}" ]; then + echo >&2 "`basename $0`: check_lvm_item() error:"\ + "hostname item has null value!" + return 1 + fi + + # Check LVM device name + if [ -z "${LVM_NAME[i]}" ] \ + && [ "${LINE_MARKER[i]}" != "${LV_MARKER}" -a "${OP_MODE[i]}" != "remove" ] + then + echo >&2 "`basename $0`: check_lvm_item() error:"\ + "LVM component name item has null value!" + return 1 + fi + + # Check the operation mode + if [ -n "${OP_MODE[i]}" ] \ + && [ "${OP_MODE[i]}" != "create" -a "${OP_MODE[i]}" != "remove" ] + then + echo >&2 "`basename $0`: check_lvm_item() error:"\ + "Invalid operation mode item - \"${OP_MODE[i]}\"!" + return 1 + fi + + # Check items required by create mode + if [ -z "${OP_MODE[i]}" -o "${OP_MODE[i]}" = "create" ]; then + if [ "${LINE_MARKER[i]}" = "${VG_MARKER}" -a -z "${SIXTH_ITEM[i]}" ] + then + echo >&2 "`basename $0`: check_lvm_item() error:"\ + "pv paths item of vg ${LVM_NAME[i]} has null value!" + return 1 + fi + + if [ "${LINE_MARKER[i]}" = "${LV_MARKER}" ]; then + if [ -z "${SIXTH_ITEM[i]}" ]; then + echo >&2 "`basename $0`: check_lvm_item() error:"\ + "lv size item has null value!" + return 1 + fi + + if [ -z "${SEVENTH_ITEM[i]}" ]; then + echo >&2 "`basename $0`: check_lvm_item() error:"\ + "vg name item has null value!" + return 1 + fi + fi + fi + + return 0 +} + +# get_lvm_items csv_file +# +# Get all the LVM device items in the $csv_file and do some checks. +get_lvm_items() { + # Check argument + if [ $# -eq 0 ]; then + echo >&2 "`basename $0`: get_lvm_items() error: Missing csv file!" + return 1 + fi + + CSV_FILE=$1 + local LINE line_marker + declare -i line_num=0 + declare -i idx=0 + + while read -r LINE; do + let "line_num += 1" + + # Skip the comment line + [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] && continue + + # Skip the non-LVM line + line_marker=`echo ${LINE} | awk -F, '{print $2}'` + [ "${line_marker}" != "${PV_MARKER}" ] \ + && [ "${line_marker}" != "${VG_MARKER}" ] \ + && [ "${line_marker}" != "${LV_MARKER}" ] && continue + + # Parse the config line into CONFIG_ITEM + if ! parse_line "$LINE"; then + return 1 + fi + + HOST_NAME[idx]=${CONFIG_ITEM[0]} + LINE_MARKER[idx]=${CONFIG_ITEM[1]} + LVM_NAME[idx]=${CONFIG_ITEM[2]} + OP_MODE[idx]=${CONFIG_ITEM[3]} + OP_OPTS[idx]=${CONFIG_ITEM[4]} + SIXTH_ITEM[idx]=${CONFIG_ITEM[5]} + SEVENTH_ITEM[idx]=${CONFIG_ITEM[6]} + + # Check some required items + if ! check_lvm_item $idx; then + echo >&2 "`basename $0`: check_lvm_item() error:"\ + "Occurred on line ${line_num} in ${CSV_FILE}." + return 1 + fi + + let "idx += 1" + done < ${CSV_FILE} + + return 0 +} + +# construct_lvm_create_cmdline index +# +# Construct the creation command line for ${LVM_NAME[index]} +construct_lvm_create_cmdline() { + declare -i i=$1 + local lvm_cmd + + case "${LINE_MARKER[i]}" in + "${PV_MARKER}") + lvm_cmd="pvcreate -ff -y ${OP_OPTS[i]} ${LVM_NAME[i]}" + ;; + "${VG_MARKER}") + lvm_cmd="vgcreate ${OP_OPTS[i]} ${LVM_NAME[i]} ${SIXTH_ITEM[i]}" + ;; + "${LV_MARKER}") + if [ -z "${LVM_NAME[i]}" ]; then + lvm_cmd="lvcreate -L ${SIXTH_ITEM[i]} ${OP_OPTS[i]} ${SEVENTH_ITEM[i]}" + else + lvm_cmd="lvcreate -L ${SIXTH_ITEM[i]} -n ${LVM_NAME[i]} ${OP_OPTS[i]} ${SEVENTH_ITEM[i]}" + fi + ;; + esac + + echo ${lvm_cmd} + return 0 +} + +# cmdline_rm_LVs vg_name +# +# Construct command line to remove all the LVs on $vg_name. +# If $vg_name is null, then remove all the LVs in the host. +cmdline_rm_LVs() { + local vg_name=$1 + local lvm_rm_cmd + + lvm_rm_cmd="vgchange -a n ${vg_name} &&" + lvm_rm_cmd=${lvm_rm_cmd}" vgdisplay -v ${vg_name} | grep \"LV Name\" | awk '{print \$3}' |" + lvm_rm_cmd=${lvm_rm_cmd}" while read lv; do lvremove -f \$lv; done" + + echo ${lvm_rm_cmd} + return 0 +} + +# cmdline_rm_LV lv_path +# +# Construct command line to remove LV $lv_path +cmdline_rm_LV() { + local lv_path=$1 + local lvm_rm_cmd + + lvm_rm_cmd="lvchange -a n ${lv_path} && lvremove -f ${lv_path}" + echo ${lvm_rm_cmd} + return 0 +} + + +# cmdline_rm_VG vg_name +# +# Construct command line to remove VG $vg_name +cmdline_rm_VG() { + local vg_name=$1 + local lvm_rm_cmd + + # Remove all the LVs on this VG + lvm_rm_cmd=$(cmdline_rm_LVs ${vg_name}) + + # Remove this VG + lvm_rm_cmd=${lvm_rm_cmd}" && vgremove ${vg_name}" + echo ${lvm_rm_cmd} + return 0 +} + +# cmdline_rm_VGs +# +# Construct command line to remove all the VGs in the host +cmdline_rm_VGs() { + local lvm_rm_cmd + + # Remove all the LVs in the host + lvm_rm_cmd=$(cmdline_rm_LVs) + + # Remove all the VGs in the host + lvm_rm_cmd=${lvm_rm_cmd}" && vgdisplay | grep \"VG Name\" | awk '{print \$3}' |" + lvm_rm_cmd=${lvm_rm_cmd}" while read vg; do vgremove \$vg; done" + + echo ${lvm_rm_cmd} + return 0 +} + +# cmdline_rm_PVs +# +# Construct command line to remove all the PVs in the host +cmdline_rm_PVs() { + local lvm_rm_cmd + + # Remove all the LVs and VGs in the host + lvm_rm_cmd=$(cmdline_rm_VGs) + + # Remove all the PVs in the host + lvm_rm_cmd=${lvm_rm_cmd}" && pvdisplay | grep \"PV Name\" | awk '{print \$3}' |" + lvm_rm_cmd=${lvm_rm_cmd}" while read pv; do pvremove -ff -y \$pv; done" + + echo ${lvm_rm_cmd} + return 0 +} + +# construct_lvm_teardown_cmdline index +# +# Construct the teardown command line for LVM devices in ${HOST_NAME[index]} +construct_lvm_teardown_cmdline() { + declare -i i=$1 + local lvm_rm_cmd + + case "${LINE_MARKER[i]}" in + "${LV_MARKER}") + lvm_rm_cmd=$(cmdline_rm_LVs ${SEVENTH_ITEM[i]}) + ;; + "${VG_MARKER}") + # Remove all the VGs in the host + lvm_rm_cmd=$(cmdline_rm_VGs) + ;; + "${PV_MARKER}") + # Remove all the PVs in the host + lvm_rm_cmd=$(cmdline_rm_PVs) + ;; + esac + + echo ${lvm_rm_cmd} + return 0 +} + +# construct_lvm_rm_cmdline index +# +# Construct the remove command line for LVM device ${LVM_NAME[index]} +construct_lvm_rm_cmdline() { + declare -i i=$1 + local lvm_rm_cmd + + case "${LINE_MARKER[i]}" in + "${LV_MARKER}") + lvm_rm_cmd=$(cmdline_rm_LV ${LVM_NAME[i]}) + ;; + "${VG_MARKER}") + lvm_rm_cmd=$(cmdline_rm_VG ${LVM_NAME[i]}) + ;; + "${PV_MARKER}") + lvm_rm_cmd="pvremove -ff -y ${LVM_NAME[i]}" + ;; + esac + + echo ${lvm_rm_cmd} + return 0 +} + +# construct_lvm_cmdline host_name +# +# Construct the command line of LVM utilities to be run in the $host_name +construct_lvm_cmdline() { + LVM_CMDLINE= + local host_name=$1 + local lvm_cmd + declare -i i + + # Construct command line + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + lvm_cmd= + if [ "${host_name}" = "${HOST_NAME[i]}" ]; then + case "${OP_MODE[i]}" in + "" | create) + # Construct the create command line + lvm_cmd=$(construct_lvm_create_cmdline ${i}) + ;; + remove) + if [ -z "${LVM_NAME[i]}" ]; then + # Construct the teardown command line + lvm_cmd=$(construct_lvm_teardown_cmdline ${i}) + else # Remove instead of teardown + # Construct the remove command line + lvm_cmd=$(construct_lvm_rm_cmdline ${i}) + fi + ;; + *) + echo >&2 "`basename $0`: construct_lvm_cmdline() error:"\ + "Invalid operation mode - \"${OP_MODE[i]}\"!" + return 1 + ;; + esac + + if [ -z "${LVM_CMDLINE}" ]; then + LVM_CMDLINE=${lvm_cmd} + else + LVM_CMDLINE=${LVM_CMDLINE}" && "${lvm_cmd} + fi + fi + done + + return 0 +} + +# config_lvm_devs host_name +# +# Run remote command to configure LVM devices in $host_name +config_lvm_devs() { + local host_name=$1 + + # Construct the LVM utilities command line + if ! construct_lvm_cmdline ${host_name}; then + return 1 + fi + + if [ -z "${LVM_CMDLINE}" ]; then + verbose_output "There are no LVM devices on host ${host_name}"\ + "needed to be configured." + return 0 + fi + + # Run remote command to configure LVM devices in $host_name + verbose_output "Configuring LVM devices in host ${host_name}..." + verbose_output "Configure command line is: \"${LVM_CMDLINE}\"" + REMOTE_CMD[pid_num]="${REMOTE} ${host_name} \"${LVM_CMDLINE}\"" + ${REMOTE} ${host_name} "(${EXPORT_PATH} ${LVM_CMDLINE})" >&2 & + REMOTE_PID[pid_num]=$! + let "pid_num += 1" + + return 0 +} + +# Run remote command to configure all the LVM devices specified +# in the csv file +config_lvm() { + declare -i i=0 + declare -i idx=0 # Index of NODE_NAME array + local host_name + local failed_status + + # Initialize the NODE_NAME array + unset NODE_NAME + + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + host_name=${HOST_NAME[i]} + configured_host ${host_name} && continue + + NODE_NAME[idx]=${host_name} + let "idx += 1" + + # Run remote command to configure LVM devices in $host_name + if ! config_lvm_devs ${host_name}; then + return 1 + fi + done + + if [ ${#HOST_NAME[@]} -eq 0 -o ${#REMOTE_PID[@]} -eq 0 ]; then + verbose_output "There are no LVM devices to be configured." + return 0 + fi + + # Wait for the exit status of the background remote command + verbose_output "Waiting for the return of the remote command..." + failed_status=false + for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do + wait ${REMOTE_PID[${pid_num}]} + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: config_lvm() error: Failed"\ + "to execute \"${REMOTE_CMD[${pid_num}]}\"!" + failed_status=true + fi + done + + if ${failed_status}; then + return 1 + fi + + verbose_output "All the LVM devices are configured successfully!" + return 0 +} + +# Main flow +# Check the csv file +if ! check_file $1; then + exit 1 +fi + +# Get all the LVM device items from the csv file +if ! get_lvm_items ${CSV_FILE}; then + exit 1 +fi + +# Configure the LVM devices +if ! config_lvm; then + exit 1 +fi + +exit 0 diff --git a/lustre/scripts/lc_md.sh.in b/lustre/scripts/lc_md.sh.in new file mode 100644 index 0000000..77a508f --- /dev/null +++ b/lustre/scripts/lc_md.sh.in @@ -0,0 +1,479 @@ +#!/bin/bash +# +# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: +# +# lc_md.sh - configure Linux MD devices from a csv file +# +################################################################################ + +# Usage +usage() { + cat >&2 < + + This script is used to configure Linux MD devices in a Lustre cluster + from a csv file. + + -h help and examples + -v verbose mode + csv file a spreadsheet that contains configuration parameters + (separated by commas) for each Linux MD device to be + configured in a Lustre cluster + +EOF + exit 1 +} + +# Samples +sample() { + cat <&2 "`basename $0`: Missing csv file!" + usage +fi + +# check_md_item index +# +# Check the items required for managing MD device ${MD_NAME[index]} +check_md_item() { + # Check argument + if [ $# -eq 0 ]; then + echo >&2 "`basename $0`: check_md_item() error:"\ + "Missing argument!" + return 1 + fi + + declare -i i=$1 + + # Check hostname + if [ -z "${HOST_NAME[i]}" ]; then + echo >&2 "`basename $0`: check_md_item() error:"\ + "hostname item has null value!" + return 1 + fi + + # Check items required by create mode + if [ -z "${OP_MODE[i]}" -o "${OP_MODE[i]}" = "create" ]; then + # Check MD device name + if [ -z "${MD_NAME[i]}" ]; then + echo >&2 "`basename $0`: check_md_item() error:"\ + "md name item has null value!" + return 1 + fi + + if [ -z "${RAID_LEVEL[i]}" ]; then + echo >&2 "`basename $0`: check_md_item() error:"\ + "raid level item of MD device ${MD_NAME[i]} has null value!" + return 1 + fi + + if [ -z "${MD_DEVS[i]}" ]; then + echo >&2 "`basename $0`: check_md_item() error:"\ + "component devices item of ${MD_NAME[i]} has null value!" + return 1 + fi + fi + + return 0 +} + +# get_md_items csv_file +# +# Get all the MD device items in the $csv_file and do some checks. +get_md_items() { + # Check argument + if [ $# -eq 0 ]; then + echo >&2 "`basename $0`: get_md_items() error: Missing csv file!" + return 1 + fi + + CSV_FILE=$1 + local LINE + declare -i line_num=0 + declare -i idx=0 + + while read -r LINE; do + let "line_num += 1" + + # Skip the comment line + [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] && continue + + # Skip the non-MD line + [ "`echo ${LINE}|awk -F, '{print $2}'`" != "${MD_MARKER}" ] && continue + + # Parse the config line into CONFIG_ITEM + if ! parse_line "$LINE"; then + return 1 + fi + + HOST_NAME[idx]=${CONFIG_ITEM[0]} + MD_NAME[idx]=${CONFIG_ITEM[2]} + OP_MODE[idx]=${CONFIG_ITEM[3]} + OP_OPTS[idx]=${CONFIG_ITEM[4]} + RAID_LEVEL[idx]=${CONFIG_ITEM[5]} + MD_DEVS[idx]=${CONFIG_ITEM[6]} + + # Check some required items + if ! check_md_item $idx; then + echo >&2 "`basename $0`: check_md_item() error:"\ + "Occurred on line ${line_num} in ${CSV_FILE}." + return 1 + fi + + let "idx += 1" + done < ${CSV_FILE} + + return 0 +} + +# md_is_active host_name md_name +# +# Run remote command to check whether $md_name is active in @host_name +md_is_active() { + local host_name=$1 + local md_name=$2 + local cmd ret_str + + cmd="grep -q ${md_name##*/} /proc/mdstat 2>&1" + ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1` + if [ $? -ne 0 ]; then + if [ -n "${ret_str}" ]; then + echo >&2 "`basename $0`: md_is_active() error:"\ + "remote command to ${host_name} error: ${ret_str}!" + return 2 # Error occurred + else + return 1 # inactive + fi + fi + + return 0 # active +} + +# construct_mdadm_create_cmdline index +# +# Construct the create operation command line of mdadm for ${MD_NAME[index]} +construct_mdadm_create_cmdline() { + declare -i i=$1 + local cmd_line + local echo_disk disk line + declare -i alldisks=0 + declare -i raiddisks=0 + declare -i sparedisks=0 + + cmd_line="${MDADM} -C -R ${MD_NAME[i]} ${OP_OPTS[i]} -l ${RAID_LEVEL[i]}" + + if [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#* -n*}" ]\ + || [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#*--raid-devices*}" ]; then + cmd_line=${cmd_line}" ${MD_DEVS[i]}" + echo ${cmd_line} + return 0 + fi + + # FIXME: Get the number of component devices in the array + echo_disk="for disk in ${MD_DEVS[i]}; do echo $disk; done" + while read line; do + let "alldisks += 1" + done < <(${REMOTE} ${HOST_NAME[i]} "${echo_disk}") + + if [ ${alldisks} -eq 0 ]; then + echo "`basename $0`: construct_mdadm_create_cmdline() error:"\ + "Failed to execute remote command to get the number of"\ + "component devices of array ${MD_NAME[i]} from host ${HOST_NAME[i]}!" + return 1 + fi + + # Get the specified number of spare (eXtra) devices + if [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#* -x*}" ]; then + sparedisks=`echo ${OP_OPTS[i]##* -x}|awk -F" " '{print $1}'` + elif [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#*--spare-devices*}" ]; then + sparedisks=`echo ${OP_OPTS[i]##*--spare-devices=}|awk -F" " '{print $1}'` + fi + + # Get the number of raid devices in the array + # The number of raid devices in the array plus the number of spare devices + # listed on the command line must equal the number of component devices + # (including "missing" devices). + let "raiddisks = alldisks - sparedisks" + + if [ ${raiddisks} -lt 1 ]; then + echo "`basename $0`: construct_mdadm_create_cmdline() error:"\ + "Invalid number of raid devices in array ${MD_NAME[i]}: ${raiddisks}!"\ + "Check the number of spare devices and whether all the component devices"\ + "\"${MD_DEVS[i]}\" (except \"missing\" devices) exist in host ${HOST_NAME[i]}!" + return 1 + fi + + cmd_line=${cmd_line}" -n ${raiddisks} ${MD_DEVS[i]}" + + echo ${cmd_line} + return 0 +} + +# construct_mdadm_rm_cmdline index +# +# Construct the remove operation command line of mdadm for ${MD_NAME[index]} +construct_mdadm_rm_cmdline() { + declare -i i=$1 + local mdadm_cmd + local real_devs + + # Deactivate the MD array, releasing all resources + mdadm_cmd="${MDADM} -S ${MD_NAME[i]}" + + if [ -n "${MD_DEVS[i]}" ]; then + # Remove the "missing" devices from the component devices + real_devs=`echo ${MD_DEVS[i]} | sed 's/missing//g'` + # Over-written the superblock with zeros + mdadm_cmd=${mdadm_cmd}" && ${MDADM} --zero-superblock ${real_devs}" + fi + + echo ${mdadm_cmd} + return 0 +} + +# construct_mdadm_cmdline host_name +# +# Construct the command line of mdadm to be run in $host_name +construct_mdadm_cmdline() { + MDADM_CMDLINE= + local host_name=$1 + local mdadm_stop_cmd mdadm_cmd + local rc OK + declare -i i + + # Construct command line + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + mdadm_stop_cmd= + mdadm_cmd= + if [ "${host_name}" = "${HOST_NAME[i]}" ]; then + case "${OP_MODE[i]}" in + "" | create) + # Check the status of the MD array + md_is_active ${host_name} ${MD_NAME[i]} + rc=$? + if [ "$rc" -eq "2" ]; then + return 1 + elif [ "$rc" -eq "0" ]; then + OK= + echo -n "`basename $0`: ${MD_NAME[i]} is active on"\ + "${host_name}, go ahead to deactivate it and create"\ + "the new array? [y/n]:" + read OK + if [ "${OK}" = "n" ]; then + echo "`basename $0`: ${MD_NAME[i]} on host"\ + "${host_name} remains as it is." + continue + fi + + # Construct the remove command line + mdadm_stop_cmd=$(construct_mdadm_rm_cmdline ${i}) + fi + + # Construct the create command line + mdadm_cmd=$(construct_mdadm_create_cmdline ${i}) + if [ $? -ne 0 ]; then + echo >&2 "${mdadm_cmd}" + return 1 + fi + + [ -n "${mdadm_stop_cmd}" ] && mdadm_cmd=${mdadm_stop_cmd}" && "${mdadm_cmd} + ;; + remove) + if [ -z "${MD_NAME[i]}" ]; then + OK= + echo -n "`basename $0`: Do you really want to remove"\ + "all the MD devices in the host ${HOST_NAME[i]}? [y/n]:" + read OK + if [ "${OK}" = "n" ]; then + echo "`basename $0`: MD devices on host"\ + "${HOST_NAME[i]} remain as they are." + continue + fi + + # Construct the teardown command line + mdadm_cmd="(cat /proc/mdstat | egrep \"^md[[:digit:]]\" |" + mdadm_cmd=${mdadm_cmd}" while read md rest; do ${MDADM} -S /dev/\$md; done)" + else + # Construct the remove command line + mdadm_cmd=$(construct_mdadm_rm_cmdline ${i}) + fi + ;; + *) + # Other operations + mdadm_cmd="${MDADM} ${OP_MODE[i]} ${MD_NAME[i]} ${OP_OPTS[i]} ${MD_DEVS[i]}" + ;; + esac + + if [ -z "${MDADM_CMDLINE}" ]; then + MDADM_CMDLINE=${mdadm_cmd} + else + MDADM_CMDLINE=${MDADM_CMDLINE}" && "${mdadm_cmd} + fi + fi + done + + return 0 +} + +# config_md_devs host_name +# +# Run remote command to configure MD devices in $host_name +config_md_devs() { + local host_name=$1 + + # Construct mdadm command line + if ! construct_mdadm_cmdline ${host_name}; then + return 1 + fi + + if [ -z "${MDADM_CMDLINE}" ]; then + verbose_output "There are no MD devices on host ${host_name}"\ + "needed to be configured." + return 0 + fi + + # Run remote command to configure MD devices in $host_name + verbose_output "Configuring MD devices in host ${host_name}..." + verbose_output "Configure command line is: \"${MDADM_CMDLINE}\"" + REMOTE_CMD[pid_num]="${REMOTE} ${host_name} \"${MDADM_CMDLINE}\"" + ${REMOTE} ${host_name} "${MDADM_CMDLINE}" >&2 & + REMOTE_PID[pid_num]=$! + let "pid_num += 1" + sleep 1 + + return 0 +} + +# Run remote command to configure all the MD devices specified in the csv file +config_md() { + declare -i i=0 + declare -i idx=0 # Index of NODE_NAME array + local host_name + local failed_status + + # Initialize the NODE_NAME array + unset NODE_NAME + + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + host_name=${HOST_NAME[i]} + configured_host ${host_name} && continue + + NODE_NAME[idx]=${host_name} + let "idx += 1" + + # Run remote command to configure MD devices in $host_name + if ! config_md_devs ${host_name}; then + return 1 + fi + done + + if [ ${#HOST_NAME[@]} -eq 0 -o ${#REMOTE_PID[@]} -eq 0 ]; then + verbose_output "There are no MD devices to be configured." + return 0 + fi + + # Wait for the exit status of the background remote command + verbose_output "Waiting for the return of the remote command..." + failed_status=false + for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do + wait ${REMOTE_PID[${pid_num}]} + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: config_md() error: Failed"\ + "to execute \"${REMOTE_CMD[${pid_num}]}\"!" + failed_status=true + fi + done + + if ${failed_status}; then + return 1 + fi + + verbose_output "All the MD devices are configured successfully!" + return 0 +} + +# Main flow +# Check the csv file +if ! check_file $1; then + exit 1 +fi + +# Get all the MD device items from the csv file +if ! get_md_items ${CSV_FILE}; then + exit 1 +fi + +# Configure the MD devices +if ! config_md; then + exit 1 +fi + +exit 0 diff --git a/lustre/scripts/lc_modprobe.sh.in b/lustre/scripts/lc_modprobe.sh.in new file mode 100644 index 0000000..6116a26 --- /dev/null +++ b/lustre/scripts/lc_modprobe.sh.in @@ -0,0 +1,66 @@ +#!/bin/bash +# +# lc_modprobe.sh - add lustre module options into modprobe.conf or +# modules.conf +# +################################################################################# + +# Get the library of functions +. @scriptlibdir@/lc_common.sh + +# Check the kernel version +KERNEL_VERSION=`uname -r` +KERNEL_VERSION=${KERNEL_VERSION:0:3} + +if [ "${KERNEL_VERSION}" = "2.4" ]; then + MODULE_CONF=/etc/modules.conf +else + MODULE_CONF=/etc/modprobe.conf +fi + +read -r NETWORKS +MODLINES_FILE=/tmp/modlines$$.txt +START_MARKER=$"# start lustre config" +END_MARKER=$"# end lustre config" + +# Generate a temp file contains lnet options lines +generate_lnet_lines() { + local LNET_LINE TMP_LINE + + TMP_LINE="${NETWORKS}" + + echo ${START_MARKER} > ${MODLINES_FILE} + echo "# Lustre module options added automatically by `basename $0`" >> ${MODLINES_FILE} + while true; do + LNET_LINE=${TMP_LINE%%\\n*} + echo ${LNET_LINE} >> ${MODLINES_FILE} + + TMP_LINE=${TMP_LINE#*\\n} + + if [ "${TMP_LINE}" == "${LNET_LINE}" ]; then + break + fi + done + echo ${END_MARKER} >> ${MODLINES_FILE} + + #echo "--------------${MODLINES_FILE}--------------" + #cat ${MODLINES_FILE} + #echo -e "------------------------------------------\n" + + return 0 +} + +if ! generate_lnet_lines; then + exit 1 +fi + +MODULE_CONF=$(fcanon ${MODULE_CONF}) +# Add lnet options lines to the module configuration file +if [ -e ${MODULE_CONF} ]; then + # Delete the old options + sed -i "/${START_MARKER}/,/${END_MARKER}/d" ${MODULE_CONF} +fi + +cat ${MODLINES_FILE} >> ${MODULE_CONF} +rm -f ${MODLINES_FILE} +exit 0 diff --git a/lustre/scripts/lc_mon.sh b/lustre/scripts/lc_mon.sh new file mode 100755 index 0000000..ac4be84e7 --- /dev/null +++ b/lustre/scripts/lc_mon.sh @@ -0,0 +1,139 @@ +#!/bin/sh + +# Given one or more Lustre objects, create a mon configuration file +# naming the mon watches based on the Lustre object names +# For each Lustre object, the script will create two mon watches +# The first watch sets a trap, and the second watch clears the +# trap if Lustre is healthy. + +# This may be more fun in Perl due to the need to support a list +# of objects + +# (plus we could use a Perl format for this goop) + +MONBASEDIR=${MONBASEDIR:-/usr/local/lib/mon} +MONCFGDIR=${MONCFGDIR:-/etc/mon} +TD=`date +%y_%m%d_%S` +TMPMONCFG=${TD}-mon.cfg +# Determines how often we will check Lustre health +CHECKINTERVAL="3m" +# Determines how quickly we must clear the trap +TRAPINTERVAL="6m" +ALERTSCRIPT=${ALERTSCRIPT:-"fail_lustre.alert"} +TRAPSCRIPT=${TRAPSCRIPT:-"lustre.mon.trap"} + +# We will assume all inputs are Lustre objects +# file locations and timeouts correct to taste +# Correct to taste +print_header() { + cat >> $TMPMONCFG <<-EOF + cfbasedir = $MONCFGDIR + alertdir = $MONBASEDIR/alert.d + mondir = $MONBASEDIR/mon.d + statedir = $MONBASEDIR/state.d + logdir = $MONBASEDIR/log.d + dtlogfile = $MONBASEDIR/log.d/downtime.log + maxprocs = 20 + histlength = 100 + randstart = 60s + authtype = getpwnam +EOF +} + +# Tabs should be preserved in the config file +# $1 object name +# we do not set a period, it is assumed monitor is always active + +print_trap_rec() { + cat >> $TMPMONCFG <> $TMPMONCFG < -n -o -o ...." + echo "Creates the /etc/mon/mon.cf file to monitor Lustre objects" + exit 1 +} + + +# Start of script + +if [ $# -eq 0 ];then + usage +fi + +# This script should work for any number of hosts +# +HOSTCNT=0 +OBJCNT=0 + +declare -a HOSTS +declare -a OBJS + +while getopts "n:o:" opt; do + case $opt in + n) HOSTS[HOSTCNT]=$OPTARG + HOSTCNT=$(( HOSTCNT + 1 )) + ;; + o) OBJS[OBJCNT]=$OPTARG + OBJCNT=$(( OBJCNT + 1 )) + ;; + *) usage + ;; + esac +done + +echo "Found $HOSTCNT hosts" +echo "Found $OBJCNT Lustre objects" + +# First create the host groups +# we assume +# each object will have two watches defined +# each object hostgroup will have all objects + +# Create the file with the declared goop +print_header + +for obj in ${OBJS[@]} +do + echo "hostgroup ${obj}-obj ${HOSTS[@]}" >> $TMPMONCFG + echo "hostgroup ${obj}-mon ${HOSTS[@]}" >> $TMPMONCFG + echo "#" >> $TMPMONCFG +done + +# create the monitors + +for obj in ${OBJS[@]} +do + print_trap_send $obj + print_trap_rec $obj +done + +echo "Mon config completed - new mon config is $TMPMONCFG" +exit 0 \ No newline at end of file diff --git a/lustre/scripts/lc_net.sh.in b/lustre/scripts/lc_net.sh.in new file mode 100644 index 0000000..e4f150c --- /dev/null +++ b/lustre/scripts/lc_net.sh.in @@ -0,0 +1,216 @@ +#!/bin/bash +# +# lc_net.sh - script for Lustre cluster network verification +# +############################################################################### + +# Usage +usage() { + cat >&2 < + + -v verbose mode + csv file a spreadsheet that contains configuration parameters + (separated by commas) for each target in a Lustre cl- + uster, the first field of each line is the host name + of the cluster node + +EOF + exit 1 +} + +# Get the library of functions +. @scriptlibdir@/lc_common.sh + +VERBOSE_OUTPUT=false +# Get and check the positional parameters +while getopts "v" OPTION; do + case $OPTION in + v) + VERBOSE_OUTPUT=true + ;; + ?) + usage + esac +done + +# Toss out the parameters we've already processed +shift `expr $OPTIND - 1` + +# Here we expect the csv file +if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: Missing csv file!" + usage +fi + +# Global variables +CSV_FILE=$1 +declare -a HOST_NAMES +declare -a HOST_IPADDRS + +# Get the host names from the csv file +get_hostnames() { + local NAME CHECK_STR + declare -i i + declare -i j + + # Initialize the HOST_NAMES array + unset HOST_NAMES + + CHECK_STR=`egrep -v "([[:space:]]|^)#" ${CSV_FILE} | awk -F, \ + '/[[:alnum:]]/{if ($1 !~/[[:alnum:]]/) print $0}'` + if [ -n "${CHECK_STR}" ]; then + echo >&2 $"`basename $0`: get_hostnames() error: Missing"\ + "hostname field in the line - ${CHECK_STR}" + return 1 + fi + + i=0 + for NAME in `egrep -v "([[:space:]]|^)#" ${CSV_FILE}\ + | awk -F, '/[[:alnum:]]/{print $1}'` + do + for ((j = 0; j < ${#HOST_NAMES[@]}; j++)); do + [ "${NAME}" = "${HOST_NAMES[j]}" ] && continue 2 + done + + HOST_NAMES[i]=${NAME} + i=$i+1 + done + + return 0 +} + +# ping_host host_name +# Check whether host $host_name is reachable. +# If it is, then return the IP address of this host. +ping_host() { + local host_name=$1 + local ip_addr= + local ret_str + + if [ -z "${host_name}" ]; then + echo "`basename $0`: ping_host() error: Missing hostname!" + return 1 + fi + + # Run ping command + ret_str=`ping -c1 ${host_name} 2>&1` + if [ $? -ne 0 ]; then + if [ -n "${ret_str}" ]; then + echo "`basename $0`: ping_host() error: ${ret_str}!" + else + echo "`basename $0`: ping_host() error:"\ + "Host ${host_name} does not respond to ping!" + fi + return 1 + fi + + # Get the IP address + ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}' | \ + sed -e 's/^(//' -e 's/)$//'` + + echo "${ip_addr}" + return 0 +} + +# local_check index +# Check the network connectivity between local host and ${HOST_NAMES[index]}. +local_check() { + declare -i i=$1 + + # Check whether ${HOST_NAMES[i]} is reachable + # and get the IP address of this host from ping + HOST_IPADDRS[i]=$(ping_host ${HOST_NAMES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${HOST_IPADDRS[i]}" + return 1 + fi + + return 0 +} + +# remote_check index +# Check whether ${HOST_NAMES[index]} can resolve its own name and whether +# this host agrees with the local host about what its name is resolved to. +remote_check() { + declare -i i=$1 + local cmd ret_str + local ip_addr= # the IP address got from remote ping + + # Execute remote command to check whether ${HOST_NAMES[i]} + # can resolve its own name + cmd="ping -c1 ${HOST_NAMES[i]} 2>&1" + ret_str=`${REMOTE} ${HOST_NAMES[i]} "${cmd}" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo >&2 "`basename $0`: remote_check() error:"\ + "remote to ${HOST_NAMES[i]} error: ${ret_str}!" + return 1 + fi + + if [ -z "${ret_str}" ]; then + echo >&2 "`basename $0`: remote_check() error:"\ + "No results from ${HOST_NAMES[i]}! Check the network"\ + "connectivity between local host and ${HOST_NAMES[i]}!" + return 1 + fi + + # Get the IP address of ${HOST_NAMES[i]} from its own ping + if is_pdsh; then + ip_addr=`echo "${ret_str}" | head -1 | awk '{print $4}'` + else + ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}'` + fi + ip_addr=`echo "${ip_addr}" | sed -e 's/^(//' -e 's/)$//'` + + # Compare IP addresses + # Check whether ${HOST_NAMES[i]} agrees with the local host + # about what its name is resolved to. + if [ "${ip_addr}" != "${HOST_IPADDRS[i]}" ]; then + echo >&2 "`basename $0`: remote_check() error:"\ + "Local host resolves ${HOST_NAMES[i]} to IP address"\ + "\"${HOST_IPADDRS[i]}\", while its own resolution is"\ + "\"${ip_addr}\". They are not the same!" + return 1 + fi + + return 0 +} + +# network_verify +# Verify name resolution and network connectivity of the Lustre cluster +network_verify() { + declare -i i + + # Initialize the HOST_IPADDRS array + unset HOST_IPADDRS + + # Get all the host names from the csv file + ! get_hostnames && return 1 + + # Check the network connectivity between local host + # and other cluster nodes + for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do + [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue + + verbose_output "Verifying network connectivity between"\ + "\"`hostname`\" and \"${HOST_NAMES[i]}\"..." + ! local_check $i && return 1 + ! remote_check $i && return 1 + verbose_output "OK" + done + + return 0 +} + +# Main flow +if ! check_file ${CSV_FILE}; then + exit 1 +fi + +# Cluster network verification +if ! network_verify; then + exit 1 +fi + +exit 0 diff --git a/lustre/scripts/lc_servip.sh b/lustre/scripts/lc_servip.sh new file mode 100755 index 0000000..779aa34 --- /dev/null +++ b/lustre/scripts/lc_servip.sh @@ -0,0 +1,250 @@ +#!/bin/bash +# +# lc_servip.sh - script for verifying the service IP and the real +# interface IP in a remote host are in the same subnet +# +############################################################################### + +# Usage +usage() { + cat >&2 < + + service IPaddr the IP address to failover + hostname the hostname of the remote node + +EOF + exit 1 +} + +# Check arguments +if [ $# -lt 2 ]; then + usage +fi + +# Remote command +REMOTE=${REMOTE:-"ssh -x -q"} + +# Check whether the reomte command is pdsh +is_pdsh() { + if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then + return 1 + fi + + return 0 +} + +# +# inSameIPsubnet serviceIPaddr interfaceIPaddr mask +# +# Given two IP addresses and a subnet mask determine if these IP +# addresses are in the same subnet. If they are, return 0, else return 1. +# +inSameIPsubnet() { + declare -i n + declare -ia mask + declare -ia ip1 ip2 # IP addresses given + declare -i quad1 quad2 # calculated quad words + + # + # Remove '.' characters from dotted decimal notation and save + # in arrays. i.e. + # + # 192.168.1.163 -> array[0] = 192 + # array[1] = 168 + # array[2] = 1 + # array[3] = 163 + # + let n=0 + for quad in $(echo $1 | awk -F. '{print $1 " " $2 " " $3 " " $4}') + do + ip1[n]=$quad + let n=n+1 + done + + let n=0 + for quad in $(echo $2 | awk -F. '{print $1 " " $2 " " $3 " " $4}') + do + ip2[n]=$quad + let n=n+1 + done + + let n=0 + for quad in $(echo $3 | awk -F. '{print $1 " " $2 " " $3 " " $4}') + do + mask[n]=$quad + let n=n+1 + done + + # + # For each quad word, logically AND the IP address with the subnet + # mask to get the network/subnet quad word. If the resulting + # quad words for both IP addresses are the same they are in the + # same IP subnet. + # + for n in 0 1 2 3 + do + let $((quad1=${ip1[n]} & ${mask[n]})) + let $((quad2=${ip2[n]} & ${mask[n]})) + + if [ $quad1 != $quad2 ]; then + echo >&2 $"`basename $0`: Service IP address $1 and"\ + "real interface IP address $2 are in"\ + "different subnets!" + return 1 # in different subnets + fi + done + + return 0 # in the same subnet, all quad words matched +} + +# +# findInterface IPaddr hostname +# +# Given a target IP address and a hostname, find the interface in which +# this address is configured. If found return 0, if not return 1. The +# interface name is returned to stdout. +# +findInterface() { + declare ret_line + declare line + declare intf + declare addr + declare state + + declare target=$1 + declare hostname=$2 + + while read ret_line + do + set -- ${ret_line} + is_pdsh && shift + intf="$1" + shift + line="$*" + + while read line + do + if [ "$line" = "" ]; then # go to next interface + continue 2 + fi + + set - $line + addr= + while [ $# -gt 0 ]; do + case $1 in + addr:*) + addr=${1##addr:} + if [ -n "$addr" -a "$addr" = "$target" ] + then + echo $intf + return 0 + fi + ;; + esac + shift + done + done + done < <(${REMOTE} $hostname /sbin/ifconfig) + + echo >&2 "`basename $0`: Cannot find the interface in which" \ + "$target is configured in the host $hostname!" + return 1 +} + +# +# findNetmask interface hostname +# +# Given an interface find the netmask addresses associated with it. +# Return 0 when found, else return 1. The netmask is returned to stdout. +# +findNetmask() { + declare ret_line + declare line + declare addr + declare target=$1 + declare hostname=$2 + + while read ret_line + do + set -- ${ret_line} + is_pdsh && shift + line="$*" + + set - $line + + while [ $# -gt 0 ]; do + case $1 in + Mask:*) + echo ${1##*:} # return netmask addr + return 0 + ;; + esac + shift + done + done < <(${REMOTE} $hostname /sbin/ifconfig $target) + + echo >&2 "`basename $0`: Cannot find the netmask associated with" \ + "the interface $target in the host $hostname!" + return 1 +} + +# +# check_srvIPaddr serviceIPaddr hostname +# +# Given a service IP address and hostname, check whether the service IP address +# and the real interface IP address of hostname are in the same subnet. +# If they are, return 0, else return 1. +# +check_srvIPaddr() { + declare real_IPaddr + declare real_intf + declare netmask + declare srv_IPaddr=$1 + declare hostname=$2 + + # Get the corresponding IP address of the hostname from /etc/hosts table + real_IPaddr=`egrep "[[:space:]]$hostname([[:space:]]|$)" /etc/hosts \ + | awk '{print $1}'` + if [ -z "$real_IPaddr" ]; then + echo >&2 "`basename $0`: Hostname $hostname does not exist in" \ + "the local /etc/hosts table!" + return 1 + fi + + if [ ${#real_IPaddr} -gt 15 ]; then + echo >&2 "`basename $0`: More than one IP address line" \ + "corresponding to $hostname in the local" \ + "/etc/hosts table!" + return 1 + fi + + # Get the interface in which the real IP address is configured + real_intf=$(findInterface $real_IPaddr $hostname) + if [ $? -ne 0 ]; then + return 1 + fi + real_intf=${real_intf%%:*} + + # Get the netmask address associated with the real interface + netmask=$(findNetmask $real_intf $hostname) + if [ $? -ne 0 ]; then + return 1 + fi + + # Determine if the service IP address and the real IP address + # are in the same subnet + inSameIPsubnet $srv_IPaddr $real_IPaddr $netmask + if [ $? -ne 0 ]; then + return 1 + fi + + return 0 +} + +# Check service IP address +if ! check_srvIPaddr $1 $2; then + exit 1 +fi +exit 0 diff --git a/lustre/scripts/lmc2csv.pl b/lustre/scripts/lmc2csv.pl new file mode 100644 index 0000000..cabd2ce --- /dev/null +++ b/lustre/scripts/lmc2csv.pl @@ -0,0 +1,214 @@ +#!/usr/bin/perl + +# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: + +# +# convert an lmc batch file to a csv file for lustre_config.sh +# +use strict; use warnings; + +use Data::Dumper; + +sub get_arg_val { + my $arg = shift; + my ($aref) = @_; + for (my $i = 0; $i <= $#$aref; $i++) { + if ($$aref[$i] eq "--" . $arg) { + my @foo = splice(@$aref, $i, 2); + return $foo[1]; + } + } +} + +sub get_arg { + my $arg = shift; + my ($aref) = @_; + for (my $i = 0; $i <= $#$aref; $i++) { + if ($$aref[$i] eq "--" . $arg) { + splice(@$aref, $i, 1); + return 1; + } + } + + return 0; +} + +sub add_net { + my $net = {}; + $net->{"node"} = get_arg_val("node", \@_); + $net->{"nid"} = get_arg_val("nid", \@_); + $net->{"nettype"} = get_arg_val("nettype", \@_); + $net->{"port"} = get_arg_val("port", \@_); + if ($#_ > 0) { + print STDERR "Unknown arguments to \"--add net\": @_\n"; + exit(1); + } + return $net; +} + +sub add_mds { + my $mds = {}; + $mds->{"node"} = get_arg_val("node", \@_); + $mds->{"mds"} = get_arg_val("mds", \@_); + $mds->{"fstype"} = get_arg_val("fstype", \@_); + $mds->{"dev"} = get_arg_val("dev", \@_); + $mds->{"size"} = get_arg_val("size", \@_); + if ($#_ > 0) { + print STDERR "Unknown arguments to \"--add mds\": @_\n"; + exit(1); + } + return $mds; +} + +sub add_lov { + my $lov = {}; + $lov->{"lov"} = get_arg_val("lov", \@_); + $lov->{"mds"} = get_arg_val("mds", \@_); + $lov->{"stripe_sz"} = get_arg_val("stripe_sz", \@_); + $lov->{"stripe_cnt"} = get_arg_val("stripe_cnt", \@_); + $lov->{"stripe_pattern"} = get_arg_val("stripe_pattern", \@_); + if ($#_ > 0) { + print STDERR "Unknown arguments to \"--add lov\": @_\n"; + exit(1); + } + return $lov; +} + +sub add_ost { + my $ost = {}; + $ost->{"node"} = get_arg_val("node", \@_); + $ost->{"ost"} = get_arg_val("ost", \@_); + $ost->{"fstype"} = get_arg_val("fstype", \@_); + $ost->{"dev"} = get_arg_val("dev", \@_); + $ost->{"size"} = get_arg_val("size", \@_); + $ost->{"lov"} = get_arg_val("lov", \@_); + $ost->{"mountfsoptions"} = get_arg_val("mountfsoptions", \@_); + $ost->{"failover"} = get_arg("failover", \@_); + if ($#_ > 0) { + print STDERR "Unknown arguments to \"--add ost\": @_\n"; + exit(1); + } + return $ost; +} + +sub add_mtpt { + my $mtpt = {}; + $mtpt->{"node"} = get_arg_val("node", \@_); + $mtpt->{"path"} = get_arg_val("path", \@_); + $mtpt->{"mds"} = get_arg_val("mds", \@_); + $mtpt->{"lov"} = get_arg_val("lov", \@_); + if ($#_ > 0) { + print STDERR "Unknown arguments to \"--add mtpt\": @_\n"; + exit(1); + } + return $mtpt; +} + +no strict 'refs'; + +sub find_obj { + my $type = shift; + my $key = shift; + my $value = shift; + my @objs = @_; + + foreach my $obj (@objs) { + if ($obj->{$key} eq $value) { + return $obj; + } + } +} + +sub lnet_options { + my $net = shift; + + my $options_str = "options lnet networks=" . $net->{"nettype"} . + " accept=all"; + if (defined($net->{"port"})) { + $options_str .= " accept_port=" . $net->{"port"}; + } + return $options_str; + +} + +# main + +my %objs; +my @mgses; + +my $MOUNTPT = "/mnt"; +if (defined($ENV{"MOUNTPT"})) { + $MOUNTPT = $ENV{"MOUNTPT"}; +} + +while(<>) { + my @args = split; + + for (my $i = 0; $i <= $#args; $i++) { + if ($args[$i] eq "--add") { + my $type = "$args[$i + 1]"; + my $subref = "add_$type"; + splice(@args, $i, 2); + push(@{$objs{$type}}, &$subref(@args)); + last; + } + if ($i == $#args) { + print STDERR "I don't know how to handle @args\n"; + exit(1); + } + } +} + +# link lovs to mdses +foreach my $lov (@{$objs{"lov"}}) { + my $mds = find_obj("mds", "mds", $lov->{"mds"}, @{$objs{"mds"}}); + $mds->{"lov"} = $lov; +} +# XXX could find failover pairs of osts and mdts here and link them to +# one another and then fill in their details in the csv generators below +foreach my $mds (@{$objs{"mds"}}) { + # find the net for this node + my $net = find_obj("net", "node", $mds->{"node"}, @{$objs{"net"}}); + my $lov = $mds->{"lov"}; + my $mkfs_options=""; + if (defined($lov->{"stripe_sz"})) { + $mkfs_options .= "lov.stripe.size=" . $lov->{"stripe_sz"} . ","; + } + if (defined($lov->{"stripe_cnt"})) { + $mkfs_options .= "lov.stripe.count=" . $lov->{"stripe_cnt"} . ","; + } + if (defined($lov->{"stripe_pattern"})) { + $mkfs_options .= "lov.stripe.pattern=" . $lov->{"stripe_pattern"} . ","; + } + chop($mkfs_options); + if ($mkfs_options ne "") { + $mkfs_options = " --param=\"$mkfs_options\""; + } + + printf "%s,%s,%s,$MOUNTPT/%s,mgs|mdt,,,,--device-size=%s --noformat%s,,noauto\n", + $mds->{"node"}, + lnet_options($net), + $mds->{"dev"}, + $mds->{"mds"}, + $mds->{"size"}, + $mkfs_options; + + push(@mgses, $net->{"nid"}); +} + +foreach my $ost (@{$objs{"ost"}}) { + # find the net for this node + my $mount_opts="noauto"; + if (defined($ost->{"mountfsoptions"})) { + $mount_opts .= "," . $ost->{"mountfsoptions"}; + } + my $net = find_obj("net", "node", $ost->{"node"}, @{$objs{"net"}}); + printf "%s,%s,%s,$MOUNTPT/%s,ost,,\"%s\",,--device-size=%s --noformat,,\"%s\"\n", + $ost->{"node"}, + lnet_options($net), + $ost->{"dev"}, + $ost->{"ost"}, + join(",", @mgses), + $ost->{"size"}, + $mount_opts; +} diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre index 3b6b640a..bb59d1e 100755 --- a/lustre/scripts/lustre +++ b/lustre/scripts/lustre @@ -30,6 +30,27 @@ SERVICE=${0##*/} : ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml} : ${LCONF:=/usr/sbin/lconf} : ${LCTL:=/usr/sbin/lctl} +# Some distros use modprobe.conf.local +if [ -f /etc/modprobe.conf.local ]; then + : ${MODPROBE_CONF:=/etc/modprobe.conf.local} +else + : ${MODPROBE_CONF:=/etc/modprobe.conf} +fi +# Be sure the proper directories are in PATH. +export PATH="/sbin:$PATH" + +case "$SERVICE" in + [SK][[:digit:]][[:digit:]]lustre | lustre) + SERVICE="lustre" + : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"} + : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"} + ;; + *) + : ${LCONF_START_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} ${LUSTRE_CONFIG_XML}"} + : ${LCONF_STOP_ARGS:="--group ${SERVICE} --select ${SERVICE}=${HOSTNAME} --failover --cleanup ${LUSTRE_CONFIG_XML}"} + ;; +esac +LOCK=/var/lock/subsys/$SERVICE case "$SERVICE" in [SK][[:digit:]][[:digit:]]lustre | lustre) @@ -101,7 +122,14 @@ EOF RETVAL=4 # insufficent privileges return fi - ${LCONF} ${LCONF_START_ARGS} + # Cat the modprobe file and place all lines that follow a trailing backslash on the same line + ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s/\\[:space:]\*\n//g' | grep lnet | grep forwarding=\"enabled\"` + if [[ ! -z ${ROUTER} ]]; then + modprobe lnet + ${LCTL} network configure + else + ${LCONF} ${LCONF_START_ARGS} + fi RETVAL=$? echo $SERVICE if [ $RETVAL -eq 0 ]; then @@ -120,7 +148,20 @@ stop() { RETVAL=4 # insufficent privileges return fi - ${LCONF} ${LCONF_STOP_ARGS} + # Cat the modprobe file and place all lines that follow a trailing backslash on the same line + ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s/\\[:space:]\*\n//g' | grep lnet | grep forwarding=\"enabled\"` + if [[ ! -z ${ROUTER} ]]; then + MODULE_LOADED=`lsmod | awk ' { print $1 } ' | grep lnet` + if [[ ! -z ${MODULE_LOADED} ]]; then + ${LCTL} network unconfigure + fi + ${LCTL} modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 + # do it again, in case we tried to unload ksocklnd too early + ${LCTL} modules | awk '{ print $2 }' | xargs rmmod + + else + ${LCONF} ${LCONF_STOP_ARGS} + fi RETVAL=$? echo $SERVICE rm -f $LOCK @@ -150,13 +191,24 @@ status() { [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0 # check for any configured devices (may indicate partial startup) - [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150 + if [ -d /proc/fs/lustre ]; then + [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150 - # check for either a server or a client filesystem - MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`" - OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`" - LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`" - [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0 + # check for either a server or a client filesystem + MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`" + OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`" + LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`" + [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0 + else + # check if this is a router + if [ -d /proc/sys/lnet ]; then + ROUTER="`cat /proc/sys/lnet/routes | head -1 | grep -i -c \"Routing enabled\"`" + if [[ ! -z ${ROUTER} && ${ROUTER} -ge 1 ]]; then + STATE="running" + RETVAL=0 + fi + fi + fi # check for server disconnections DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`" diff --git a/lustre/scripts/lustre_config.sh.in b/lustre/scripts/lustre_config.sh.in new file mode 100644 index 0000000..d703029 --- /dev/null +++ b/lustre/scripts/lustre_config.sh.in @@ -0,0 +1,1183 @@ +#!/bin/bash + +# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: + +# +# lustre_config.sh - format and set up multiple lustre servers from a csv file +# +# This script is used to parse each line of a spreadsheet (csv file) and +# execute remote commands to format (mkfs.lustre) every Lustre target +# that will be part of the Lustre cluster. +# +# In addition, it can also verify the network connectivity and hostnames in +# the cluster, configure Linux MD/LVM devices and produce High-Availability +# software configurations for Heartbeat or CluManager. +# +################################################################################ + +# Usage +usage() { + cat >&2 < + + This script is used to format and set up multiple lustre servers from a + csv file. + + -h help and examples + -t HAtype produce High-Availability software configurations + The argument following -t is used to indicate the High- + Availability software type. The HA software types which + are currently supported are: hbv1 (Heartbeat version 1) + and hbv2 (Heartbeat version 2). + -n no net - don't verify network connectivity and hostnames + in the cluster + -d configure Linux MD/LVM devices before formatting the + Lustre targets + -f force-format the Lustre targets using --reformat option + -m no fstab change - don't modify /etc/fstab to add the new + Lustre targets + If using this option, then the value of "mount options" + item in the csv file will be passed to mkfs.lustre, else + the value will be added into the /etc/fstab. + -v verbose mode + csv file a spreadsheet that contains configuration parameters + (separated by commas) for each target in a Lustre cluster + +EOF + exit 1 +} + +# Samples +sample() { + cat <&2 $"`basename $0`: Invalid HA software type" \ + "- ${HATYPE_OPT}!" + usage + fi + ;; + n) + VERIFY_CONNECT=false + ;; + d) + CONFIG_MD_LVM=true + ;; + f) + REFORMAT_OPTION=$"--reformat " + ;; + m) + MODIFY_FSTAB=false + ;; + h) + sample + ;; + v) + VERBOSE_OPT=$" -v" + VERBOSE_OUTPUT=true + ;; + ?) + usage + esac +done + +# Toss out the parameters we've already processed +shift `expr $OPTIND - 1` + +# Here we expect the csv file +if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: Missing csv file!" + usage +fi + +# Check the items required for OSTs, MDTs and MGS +# +# When formatting an OST, the following items: hostname, module_opts, +# device name, device type and mgs nids, cannot have null value. +# +# When formatting an MDT or MGS, the following items: hostname, +# module_opts, device name and device type, cannot have null value. +check_item() { + # Check argument + if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: check_item() error: Missing argument"\ + "for function check_item()!" + return 1 + fi + + declare -i i=$1 + + # Check hostname, module_opts, device name and device type + if [ -z "${HOST_NAME[i]}" ]||[ -z "${MODULE_OPTS[i]}" ]\ + ||[ -z "${DEVICE_NAME[i]}" ]||[ -z "${DEVICE_TYPE[i]}" ]; then + echo >&2 $"`basename $0`: check_item() error: Some required"\ + "item has null value! Check hostname, module_opts,"\ + "device name and device type!" + return 1 + fi + + # Check mgs nids + if [ "${DEVICE_TYPE[i]}" = "ost" ]&&[ -z "${MGS_NIDS[i]}" ]; then + echo >&2 $"`basename $0`: check_item() error: OST's mgs nids"\ + "item has null value!" + return 1 + fi + + # Check mount point + if [ -z "${MOUNT_POINT[i]}" ]; then + echo >&2 $"`basename $0`: check_item() error: mount"\ + "point item of target ${DEVICE_NAME[i]} has null value!" + return 1 + fi + + return 0 +} + +# Get the number of MGS nodes in the cluster +get_mgs_num() { + INIT_IDX=0 + MGS_NUM=${#MGS_NODENAME[@]} + [ -z "${MGS_NODENAME[0]}" ] && let "INIT_IDX += 1" \ + && let "MGS_NUM += 1" +} + +# is_mgs_node hostname +# Verify whether @hostname is a MGS node +is_mgs_node() { + local host_name=$1 + declare -i i + + get_mgs_num + for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do + [ "${MGS_NODENAME[i]}" = "${host_name}" ] && return 0 + done + + return 1 +} + +# Check whether the MGS nodes are in the same failover group +check_mgs_group() { + declare -i i + declare -i j + declare -i idx + local mgs_node + + get_mgs_num + for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do + mgs_node=${MGS_NODENAME[i]} + for ((j = ${INIT_IDX}; j < ${MGS_NUM}; j++)); do + [ "${MGS_NODENAME[j]}" = "${mgs_node}" ] && continue 1 + + idx=${MGS_IDX[j]} + if [ "${FAILOVERS_NAMES[idx]#*$mgs_node*}" = "${FAILOVERS_NAMES[idx]}" ] + then + echo >&2 $"`basename $0`: check_mgs_group() error:"\ + "MGS node ${mgs_node} is not in the ${HOST_NAME[idx]}"\ + "failover group!" + return 1 + fi + done + done + + return 0 +} + +# Get and check MGS servers. +# There should be no more than one MGS specified in the entire csv file. +check_mgs() { + declare -i i + declare -i j + declare -i exp_idx # Index of explicit MGS servers + declare -i imp_idx # Index of implicit MGS servers + local is_exp_mgs is_imp_mgs + local mgs_node + + # Initialize the MGS_NODENAME and MGS_IDX arrays + unset MGS_NODENAME + unset MGS_IDX + + exp_idx=1 + imp_idx=1 + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + is_exp_mgs=false + is_imp_mgs=false + + # Check whether this node is an explicit MGS node + # or an implicit one + if [ "${DEVICE_TYPE[i]#*mgs*}" != "${DEVICE_TYPE[i]}" ]; then + verbose_output "Explicit MGS target" \ + "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}." + is_exp_mgs=true + fi + + if [ "${DEVICE_TYPE[i]}" = "mdt" -a -z "${MGS_NIDS[i]}" ]; then + verbose_output "Implicit MGS target" \ + "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}." + is_imp_mgs=true + fi + + # Get and check MGS servers + if ${is_exp_mgs} || ${is_imp_mgs}; then + # Check whether more than one MGS target in one MGS node + if is_mgs_node ${HOST_NAME[i]}; then + echo >&2 $"`basename $0`: check_mgs() error:"\ + "More than one MGS target in the same node -"\ + "\"${HOST_NAME[i]}\"!" + return 1 + fi + + # Get and check primary MGS server and backup MGS server + if [ "${FORMAT_OPTIONS[i]}" = "${FORMAT_OPTIONS[i]#*noformat*}" ] + then + # Primary MGS server + if [ -z "${MGS_NODENAME[0]}" ]; then + if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \ + || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then + echo >&2 $"`basename $0`: check_mgs() error:"\ + "There exist both explicit and implicit MGS"\ + "targets in the csv file!" + return 1 + fi + MGS_NODENAME[0]=${HOST_NAME[i]} + MGS_IDX[0]=$i + else + mgs_node=${MGS_NODENAME[0]} + if [ "${FAILOVERS_NAMES[i]#*$mgs_node*}" = "${FAILOVERS_NAMES[i]}" ] + then + echo >&2 $"`basename $0`: check_mgs() error:"\ + "More than one primary MGS nodes in the csv" \ + "file - ${MGS_NODENAME[0]} and ${HOST_NAME[i]}!" + else + echo >&2 $"`basename $0`: check_mgs() error:"\ + "MGS nodes ${MGS_NODENAME[0]} and ${HOST_NAME[i]}"\ + "are failover pair, one of them should use"\ + "\"--noformat\" in the format options item!" + fi + return 1 + fi + else # Backup MGS server + if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \ + || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then + echo >&2 $"`basename $0`: check_mgs() error:"\ + "There exist both explicit and implicit MGS"\ + "targets in the csv file!" + return 1 + fi + + if ${is_exp_mgs}; then # Explicit MGS + MGS_NODENAME[exp_idx]=${HOST_NAME[i]} + MGS_IDX[exp_idx]=$i + exp_idx=$(( exp_idx + 1 )) + else # Implicit MGS + MGS_NODENAME[imp_idx]=${HOST_NAME[i]} + MGS_IDX[imp_idx]=$i + imp_idx=$(( imp_idx + 1 )) + fi + fi + fi #End of "if ${is_exp_mgs} || ${is_imp_mgs}" + done + + # Check whether the MGS nodes are in the same failover group + if ! check_mgs_group; then + return 1 + fi + + return 0 +} + +# Construct the command line of mkfs.lustre +construct_mkfs_cmdline() { + # Check argument + if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\ + "Missing argument for function construct_mkfs_cmdline()!" + return 1 + fi + + declare -i i=$1 + local mgsnids mgsnids_str + local failnids failnids_str + + MKFS_CMD=${MKFS}$" " + MKFS_CMD=${MKFS_CMD}${REFORMAT_OPTION} + + case "${DEVICE_TYPE[i]}" in + "ost") + MKFS_CMD=${MKFS_CMD}$"--ost " + ;; + "mdt") + MKFS_CMD=${MKFS_CMD}$"--mdt " + ;; + "mgs") + MKFS_CMD=${MKFS_CMD}$"--mgs " + ;; + "mdt|mgs" | "mgs|mdt") + MKFS_CMD=${MKFS_CMD}$"--mdt --mgs " + ;; + *) + echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\ + "Invalid device type - \"${DEVICE_TYPE[i]}\"!" + return 1 + ;; + esac + + if [ -n "${FS_NAME[i]}" ]; then + MKFS_CMD=${MKFS_CMD}$"--fsname="${FS_NAME[i]}$" " + fi + + if [ -n "${MGS_NIDS[i]}" ]; then + mgsnids_str=${MGS_NIDS[i]} + for mgsnids in ${mgsnids_str//:/ }; do + MKFS_CMD=${MKFS_CMD}$"--mgsnode="${mgsnids}$" " + done + fi + + if [ -n "${INDEX[i]}" ]; then + MKFS_CMD=${MKFS_CMD}$"--index="${INDEX[i]}$" " + fi + + if [ -n "${FORMAT_OPTIONS[i]}" ]; then + MKFS_CMD=${MKFS_CMD}${FORMAT_OPTIONS[i]}$" " + fi + + if [ -n "${MKFS_OPTIONS[i]}" ]; then + MKFS_CMD=${MKFS_CMD}$"--mkfsoptions="$"\""${MKFS_OPTIONS[i]}$"\""$" " + fi + + if [ -n "${MOUNT_OPTIONS[i]}" ]; then + if ! ${MODIFY_FSTAB}; then + MKFS_CMD=${MKFS_CMD}$"--mountfsoptions="$"\""${MOUNT_OPTIONS[i]}$"\""$" " + fi + fi + + if [ -n "${FAILOVERS[i]}" ]; then + failnids_str=${FAILOVERS[i]} + for failnids in ${failnids_str//:/ }; do + MKFS_CMD=${MKFS_CMD}$"--failnode="${failnids}$" " + done + fi + + MKFS_CMD=${MKFS_CMD}${DEVICE_NAME[i]} + return 0 +} + +# Get all the node names in this failover group +get_nodenames() { + # Check argument + if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: get_nodenames() error: Missing"\ + "argument for function get_nodenames()!" + return 1 + fi + + declare -i i=$1 + declare -i idx + local nids + + # Initialize the NODE_NAMES array + unset NODE_NAMES + + NODE_NAMES[0]=${HOST_NAME[i]} + + idx=1 + for nids in ${FAILOVERS_NAMES[i]//:/ } + do + NODE_NAMES[idx]=$(nids2hostname ${nids}) + if [ $? -ne 0 ]; then + echo >&2 "${NODE_NAMES[idx]}" + return 1 + fi + + idx=$idx+1 + done + + return 0 +} + +# Verify whether the format line has HA items +is_ha_line() { + declare -i i=$1 + + [ -n "${FAILOVERS[i]}" ] && return 0 + + return 1 +} + +# Produce HA software's configuration files +gen_ha_config() { + declare -i i=$1 + declare -i idx + local cmd_line + + # Prepare parameters + # Hostnames option + HOSTNAME_OPT=${HOST_NAME[i]} + + if ! get_nodenames $i; then + echo >&2 $"`basename $0`: gen_ha_config() error: Can not get the"\ + "failover nodenames from failover nids - \"${FAILOVERS[i]}\" in"\ + "the \"${HOST_NAME[i]}\" failover group!" + return 1 + fi + + for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do + HOSTNAME_OPT=${HOSTNAME_OPT}$":"${NODE_NAMES[idx]} + done + + # Target devices option + DEVICE_OPT=" -d "${TARGET_OPTS[0]} + for ((idx = 1; idx < ${#TARGET_OPTS[@]}; idx++)); do + DEVICE_OPT=${DEVICE_OPT}" -d "${TARGET_OPTS[idx]} + done + + # Construct the generation script command line + case "${HATYPE_OPT}" in + "${HBVER_HBV1}"|"${HBVER_HBV2}") # Heartbeat + cmd_line=${GEN_HB_CONFIG}$" -r ${HATYPE_OPT} -n ${HOSTNAME_OPT}" + cmd_line=${cmd_line}${DEVICE_OPT}${VERBOSE_OPT} + ;; + "${HATYPE_CLUMGR}") # CluManager + cmd_line=${GEN_CLUMGR_CONFIG}$" -n ${HOSTNAME_OPT}" + cmd_line=${cmd_line}${DEVICE_OPT}${VERBOSE_OPT} + ;; + esac + + # Execute script to generate HA software's configuration files + verbose_output "Generating HA software's configurations in"\ + "${HOST_NAME[i]} failover group..." + verbose_output "${cmd_line}" + eval $(echo "${cmd_line}") + if [ $? -ne 0 ]; then + return 1 + fi + verbose_output "Generate HA software's configurations in"\ + "${HOST_NAME[i]} failover group OK" + + return 0 +} + +# Configure HA software +config_ha() { + if [ -z "${HATYPE_OPT}" ]; then + return 0 + fi + + declare -i i j k + declare -i prim_idx # Index for PRIM_HOSTNAMES array + declare -i target_idx # Index for TARGET_OPTS and HOST_INDEX arrays + + declare -a PRIM_HOSTNAMES # Primary hostnames in all the failover + # groups in the lustre cluster + declare -a HOST_INDEX # Indices for the same node in all the + # format lines in the csv file + local prim_host + + # Initialize the PRIM_HOSTNAMES array + prim_idx=0 + unset PRIM_HOSTNAMES + + # Get failover groups and generate HA configuration files + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + prim_host=${HOST_NAME[i]} + + for ((j = 0; j < ${#PRIM_HOSTNAMES[@]}; j++)); do + [ "${prim_host}" = "${PRIM_HOSTNAMES[j]}" ] && continue 2 + done + + target_idx=0 + unset HOST_INDEX + unset TARGET_OPTS + for ((k = 0; k < ${#HOST_NAME[@]}; k++)); do + if [ "${prim_host}" = "${HOST_NAME[k]}" ] && is_ha_line "${k}" + then + HOST_INDEX[target_idx]=$k + TARGET_OPTS[target_idx]=${DEVICE_NAME[k]}:${MOUNT_POINT[k]} + target_idx=$(( target_idx + 1 )) + fi + done + + if [ ${#TARGET_OPTS[@]} -ne 0 ]; then + PRIM_HOSTNAMES[prim_idx]=${prim_host} + prim_idx=$(( prim_idx + 1 )) + + if ! gen_ha_config ${HOST_INDEX[0]}; then + return 1 + fi + fi + done + + if [ ${#PRIM_HOSTNAMES[@]} -eq 0 ]; then + verbose_output "There are no \"failover nids\" items in the"\ + "csv file. No HA configuration files are generated!" + fi + + rm -rf ${TMP_DIRS} + return 0 +} + + +# Get all the items in the csv file and do some checks. +get_items() { + # Check argument + if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: get_items() error: Missing argument"\ + "for function get_items()!" + return 1 + fi + + CSV_FILE=$1 + local LINE + local marker + declare -i line_num=0 + declare -i idx=0 + + while read -r LINE; do + line_num=${line_num}+1 + # verbose_output "Parsing line ${line_num}: $LINE" + + # Get rid of the empty line + if [ -z "`echo ${LINE}|awk '/[[:alnum:]]/ {print $0}'`" ]; then + continue + fi + + # Get rid of the comment line + if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] + then + continue + fi + + # Skip the Linux MD/LVM line + marker=`echo ${LINE} | awk -F, '{print $2}'` + if [ "${marker}" = "${MD_MARKER}" -o "${marker}" = "${PV_MARKER}" ] \ + || [ "${marker}" = "${VG_MARKER}" -o "${marker}" = "${LV_MARKER}" ]; then + continue + fi + + # Parse the config line into CONFIG_ITEM + if ! parse_line "$LINE"; then + echo >&2 $"`basename $0`: parse_line() error: Occurred"\ + "on line ${line_num} in ${CSV_FILE}: $LINE" + return 1 + fi + + HOST_NAME[idx]=${CONFIG_ITEM[0]} + MODULE_OPTS[idx]=${CONFIG_ITEM[1]} + DEVICE_NAME[idx]=${CONFIG_ITEM[2]} + MOUNT_POINT[idx]=${CONFIG_ITEM[3]} + DEVICE_TYPE[idx]=${CONFIG_ITEM[4]} + FS_NAME[idx]=${CONFIG_ITEM[5]} + MGS_NIDS[idx]=${CONFIG_ITEM[6]} + INDEX[idx]=${CONFIG_ITEM[7]} + FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]} + MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]} + MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]} + FAILOVERS[idx]=${CONFIG_ITEM[11]} + + MODULE_OPTS[idx]=`echo "${MODULE_OPTS[idx]}" | sed 's/"/\\\"/g'` + + # Convert IP addresses in NIDs to hostnames + MGS_NIDS_NAMES[idx]=$(ip2hostname_multi_node ${MGS_NIDS[idx]}) + if [ $? -ne 0 ]; then + echo >&2 "${MGS_NIDS_NAMES[idx]}" + return 1 + fi + + FAILOVERS_NAMES[idx]=$(ip2hostname_multi_node ${FAILOVERS[idx]}) + if [ $? -ne 0 ]; then + echo >&2 "${FAILOVERS_NAMES[idx]}" + return 1 + fi + + # Check some required items for formatting target + if ! check_item $idx; then + echo >&2 $"`basename $0`: check_item() error:"\ + "Occurred on line ${line_num} in ${CSV_FILE}." + return 1 + fi + + idx=${idx}+1 + done < ${CSV_FILE} + + return 0 +} + +# check_lnet_connect hostname_index mgs_hostname +# Check whether the target node can contact the MGS node @mgs_hostname +# If @mgs_hostname is null, then it means the primary MGS node +check_lnet_connect() { + declare -i i=$1 + local mgs_node=$2 + + local COMMAND RET_STR + local mgs_prim_nids + local nids nids_names + local nids_str= + local mgs_nid + local ping_mgs + + # Execute remote command to check that + # this node can contact the MGS node + verbose_output "Checking lnet connectivity between" \ + "${HOST_NAME[i]} and the MGS node ${mgs_node}" + mgs_prim_nids=`echo ${MGS_NIDS[i]} | awk -F: '{print $1}'` + + if [ -z "${mgs_node}" ]; then + nids_str=${mgs_prim_nids} # nids of primary MGS node + if [ -z "${nids_str}" ]; then + echo >&2 $"`basename $0`: check_lnet_connect() error:"\ + "Check the mgs nids item of host ${HOST_NAME[i]}!"\ + "Missing nids of the primary MGS node!" + return 1 + fi + else + for nids in ${MGS_NIDS[i]//:/ }; do + nids_names=$(ip2hostname_single_node ${nids}) + if [ $? -ne 0 ]; then + echo >&2 "${nids_names}" + return 1 + fi + + [ "${nids_names}" != "${nids_names#*$mgs_node*}" ]\ + && nids_str=${nids} # nids of backup MGS node + done + if [ -z "${nids_str}" ]; then + echo >&2 $"`basename $0`: check_lnet_connect() error:"\ + "Check the mgs nids item of host ${HOST_NAME[i]}!"\ + "Can not figure out which nids corresponding to the MGS"\ + "node ${mgs_node} from \"${MGS_NIDS[i]}\"!" + return 1 + fi + fi + + ping_mgs=false + for mgs_nid in ${nids_str//,/ } + do + COMMAND=$"${LCTL} ping ${mgs_nid} 5 || echo failed 2>&1" + RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1` + if [ $? -eq 0 -a "${RET_STR}" = "${RET_STR#*failed*}" ] + then + # This node can contact the MGS node + verbose_output "${HOST_NAME[i]} can contact the MGS" \ + "node ${mgs_node} by using nid \"${mgs_nid}\"!" + ping_mgs=true + break + fi + done + + if ! ${ping_mgs}; then + echo >&2 "`basename $0`: check_lnet_connect() error:" \ + "${HOST_NAME[i]} cannot contact the MGS node ${mgs_node}"\ + "with nids - \"${nids_str}\"! Check ${LCTL} command!" + return 1 + fi + + return 0 +} + +# Start lnet network in the cluster node and check that +# this node can contact the MGS node +check_lnet() { + if ! ${VERIFY_CONNECT}; then + return 0 + fi + + # Check argument + if [ $# -eq 0 ]; then + echo >&2 $"`basename $0`: check_lnet() error: Missing"\ + "argument for function check_lnet()!" + return 1 + fi + + declare -i i=$1 + declare -i j + local COMMAND RET_STR + + # Execute remote command to start lnet network + verbose_output "Starting lnet network in ${HOST_NAME[i]}" + COMMAND=$"modprobe lnet; ${LCTL} network up 2>&1" + RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1` + if [ $? -ne 0 -o "${RET_STR}" = "${RET_STR#*LNET configured*}" ] + then + echo >&2 "`basename $0`: check_lnet() error: remote" \ + "${HOST_NAME[i]} error: ${RET_STR}" + return 1 + fi + + if is_mgs_node ${HOST_NAME[i]}; then + return 0 + fi + + # Execute remote command to check that + # this node can contact the MGS node + for ((j = 0; j < ${MGS_NUM}; j++)); do + if ! check_lnet_connect $i ${MGS_NODENAME[j]}; then + return 1 + fi + done + + return 0 +} + +# Start lnet network in the MGS node +start_mgs_lnet() { + declare -i i + declare -i idx + local COMMAND + + if [ -z "${MGS_NODENAME[0]}" -a -z "${MGS_NODENAME[1]}" ]; then + verbose_output "There is no MGS target in the ${CSV_FILE} file." + return 0 + fi + + for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do + # Execute remote command to add lnet options lines to + # the MGS node's modprobe.conf/modules.conf + idx=${MGS_IDX[i]} + COMMAND=$"echo \"${MODULE_OPTS[${idx}]}\"|${MODULE_CONFIG}" + verbose_output "Adding lnet module options to ${MGS_NODENAME[i]}" + ${REMOTE} ${MGS_NODENAME[i]} "${COMMAND}" >&2 + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: start_mgs_lnet() error:"\ + "Failed to execute remote command to" \ + "add module options to ${MGS_NODENAME[i]}!"\ + "Check ${MODULE_CONFIG}!" + return 1 + fi + + # Start lnet network in the MGS node + if ! check_lnet ${idx}; then + return 1 + fi + done + + return 0 +} + +# Execute remote command to add lnet options lines to remote nodes' +# modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets +mass_config() { + local COMMAND + declare -a REMOTE_PID + declare -a REMOTE_CMD + declare -i pid_num=0 + declare -i i=0 + + if [ ${#HOST_NAME[@]} -eq 0 ]; then + verbose_output "There are no Lustre targets to be formatted." + return 0 + fi + + # Start lnet network in the MGS node + if ! start_mgs_lnet; then + return 1 + fi + + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + # Construct the command line of mkfs.lustre + if ! construct_mkfs_cmdline $i; then + return 1 + fi + + # create the mount point on the node + COMMAND="mkdir -p ${MOUNT_POINT[i]}" + verbose_output "Creating the mount point ${MOUNT_POINT[i]} on" \ + "${HOST_NAME[i]}" + ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: mass_config() error:"\ + "Failed to execute remote command to"\ + "create the mountpoint on ${HOST_NAME[i]}!" + return 1 + fi + + if ! is_mgs_node ${HOST_NAME[i]}; then + # Execute remote command to add lnet options lines to + # modprobe.conf/modules.conf + COMMAND=$"echo \"${MODULE_OPTS[i]}\"|${MODULE_CONFIG}" + verbose_output "Adding lnet module options to" \ + "${HOST_NAME[i]}" + ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: mass_config() error:"\ + "Failed to execute remote command to"\ + "add module options to ${HOST_NAME[i]}!" + return 1 + fi + + # Check lnet networks + if ! check_lnet $i; then + return 1 + fi + fi + + # Execute remote command to format Lustre target + verbose_output "Formatting Lustre target ${DEVICE_NAME[i]} on ${HOST_NAME[i]}..." + REMOTE_CMD[${pid_num}]="${REMOTE} ${HOST_NAME[i]} \"(${EXPORT_PATH} ${MKFS_CMD})\"" + verbose_output "Format command line is: ${REMOTE_CMD[${pid_num}]}" + ${REMOTE} ${HOST_NAME[i]} "(${EXPORT_PATH} ${MKFS_CMD})" >&2 & + REMOTE_PID[${pid_num}]=$! + pid_num=${pid_num}+1 + sleep 1 + done + + # Wait for the exit status of the background remote command + verbose_output "Waiting for the return of the remote command..." + fail_exit_status=false + for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do + wait ${REMOTE_PID[${pid_num}]} + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: mass_config() error: Failed"\ + "to execute \"${REMOTE_CMD[${pid_num}]}\"!" + fail_exit_status=true + fi + done + + if ${fail_exit_status}; then + return 1 + fi + + verbose_output "All the Lustre targets are formatted successfully!" + return 0 +} + +# get_mntopts hostname device_name failovers +# Construct the mount options of Lustre target @device_name in host @hostname +get_mntopts() { + local host_name=$1 + local device_name=$2 + local failovers=$3 + local mnt_opts= + local ret_str + + [ -n "${failovers}" ] && mnt_opts=defaults,noauto || mnt_opts=defaults + + # Execute remote command to check whether the device + # is a block device or not + ret_str=`${REMOTE} ${host_name} \ + "[ -b ${device_name} ] && echo block || echo loop" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: get_mntopts() error:" \ + "remote command to ${host_name} error: ${ret_str}" + return 1 + fi + + if [ -z "${ret_str}" ]; then + echo "`basename $0`: get_mntopts() error: remote error:" \ + "No results from remote!" \ + "Check network connectivity between the local host and ${host_name}!" + return 1 + fi + + [ "${ret_str}" != "${ret_str#*loop}" ] && mnt_opts=${mnt_opts},loop + + echo ${mnt_opts} + return 0 +} + +# Execute remote command to modify /etc/fstab to add the new Lustre targets +modify_fstab() { + declare -i i + local mntent mntopts device_name + local COMMAND + + if ! ${MODIFY_FSTAB}; then + return 0 + fi + + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + verbose_output "Modify /etc/fstab of host ${HOST_NAME[i]}"\ + "to add Lustre target ${DEVICE_NAME[i]}" + mntent=${DEVICE_NAME[i]}"\t\t"${MOUNT_POINT[i]}"\t\t"${FS_TYPE} + + # Get mount options + if [ -n "${MOUNT_OPTIONS[i]}" ]; then + # The mount options already specified in the csv file. + mntopts=${MOUNT_OPTIONS[i]} + else + mntopts=$(get_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]}\ + ${FAILOVERS[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${mntopts}" + return 1 + fi + fi + + mntent=${mntent}"\t"${mntopts}"\t"0" "0 + verbose_output "`echo -e ${mntent}`" + + # Execute remote command to modify /etc/fstab + device_name=${DEVICE_NAME[i]//\//\\/} + COMMAND=". @scriptlibdir@/lc_common.sh; \ + sed -i \"/^${device_name}\t/d\" \$(fcanon /etc/fstab); \ + echo -e \"${mntent}\" >> \$(fcanon /etc/fstab)" + ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 + if [ $? -ne 0 ]; then + echo >&2 "`basename $0`: modify_fstab() error:"\ + "Failed to modify /etc/fstab of host ${HOST_NAME[i]}"\ + "to add Lustre target ${DEVICE_NAME[i]}!" + return 1 + fi + done + + return 0 +} + +# Main flow +# Check the csv file +if ! check_file $1; then + exit 1 +fi + +if ${VERIFY_CONNECT}; then +# Check the network connectivity and hostnames + echo "`basename $0`: Checking the cluster network connectivity"\ + "and hostnames..." + if ! ${VERIFY_CLUSTER_NET} ${VERBOSE_OPT} ${CSV_FILE}; then + exit 1 + fi + echo "`basename $0`: Check the cluster network connectivity"\ + "and hostnames OK!" + echo +fi + +if ${CONFIG_MD_LVM}; then +# Configure Linux MD/LVM devices + echo "`basename $0`: Configuring Linux MD/LVM devices..." + if ! ${SCRIPT_CONFIG_MD} ${VERBOSE_OPT} ${CSV_FILE}; then + exit 1 + fi + + if ! ${SCRIPT_CONFIG_LVM} ${VERBOSE_OPT} ${CSV_FILE}; then + exit 1 + fi + echo "`basename $0`: Configure Linux MD/LVM devices OK!" + echo +fi + +# Configure the Lustre cluster +echo "`basename $0`: ******** Lustre cluster configuration START ********" +if ! get_items ${CSV_FILE}; then + exit 1 +fi + +if ! check_mgs; then + exit 1 +fi + +if ! mass_config; then + exit 1 +fi + +if ! modify_fstab; then + exit 1 +fi + +# Produce HA software's configuration files +if ! config_ha; then + rm -rf ${TMP_DIRS} + exit 1 +fi + +echo "`basename $0`: ******** Lustre cluster configuration END **********" + +exit 0 diff --git a/lustre/scripts/lustre_createcsv.sh.in b/lustre/scripts/lustre_createcsv.sh.in new file mode 100644 index 0000000..5d73594 --- /dev/null +++ b/lustre/scripts/lustre_createcsv.sh.in @@ -0,0 +1,2100 @@ +#!/bin/bash +# +# lustre_createcsv.sh - generate a csv file from a running lustre cluster +# +# This script is used to collect lustre target informations, linux MD/LVM device +# informations and HA software configurations in a lustre cluster to generate a +# csv file. In reverse, the csv file could be parsed by lustre_config.sh to +# configure multiple lustre servers in parallel. +# +# This script should be run on the MGS node. +# +################################################################################ + +# Usage +usage() { + cat >&2 <&2 "`basename $0`: Invalid HA software type" \ + "- ${HATYPE_OPT}!" + usage + fi + ;; + d) GET_MDLVM_INFO=true;; + h) usage;; + v) VERBOSE_OUTPUT=true;; + f) LUSTRE_CSV_FILE=$OPTARG;; + ?) usage + esac +done + +# Verify the local host is the MGS node +mgs_node() { + if [ ! -e ${LUSTRE_PROC_DEVICES} ]; then + echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} does" \ + "not exist. Lustre kernel modules may not be loaded!" + return 1 + fi + + if [ -z "`cat ${LUSTRE_PROC_DEVICES}`" ]; then + echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} is" \ + "empty. Lustre services may not be started!" + return 1 + fi + + if [ -z "`grep ${MGS_TYPE} ${LUSTRE_PROC_DEVICES}`" ]; then + echo >&2 "`basename $0`: error: This node is not a MGS node." \ + "The script should be run on the MGS node!" + return 1 + fi + + return 0 +} + +# get_hostnames +# Get lustre cluster node names +get_hostnames() { + declare -a HOST_NIDS + declare -i idx # Index of HOST_NIDS array + declare -i i # Index of HOST_NAMES array + + if ! mgs_node; then + return 1 + fi + + if [ ! -e ${LNET_PROC_PEERS} ]; then + echo >&2 "`basename $0`: error: ${LNET_PROC_PEERS} does not" \ + "exist. LNET kernel modules may not be loaded" \ + "or LNET network may not be up!" + return 1 + fi + + HOST_NAMES[0]=${MGS_HOSTNAME} # MGS node + HOST_NIDS[0]=${HOST_NAMES[0]} + + # Get the nids of the nodes which have contacted MGS + idx=1 + for nid in `cat ${LNET_PROC_PEERS} | awk '{print $1}'`; do + if [ "${nid}" = "nid" ]; then + continue + fi + + HOST_NIDS[idx]=${nid} + let "idx += 1" + done + + if [ ${idx} -eq 1 ]; then + verbose_output "Only one node running in the lustre cluster." \ + "It's ${HOST_NAMES[0]}." + return 0 + fi + + # Get the hostnames of the nodes + for ((idx = 1, i = 1; idx < ${#HOST_NIDS[@]}; idx++, i++)); do + if [ -z "${HOST_NIDS[idx]}" ]; then + echo >&2 "`basename $0`: get_hostnames() error:" \ + "Invalid nid - \"${HOST_NIDS[idx]}\"!" + return 1 + fi + + HOST_NAMES[i]=$(nid2hostname ${HOST_NIDS[idx]}) + if [ $? -ne 0 ]; then + echo >&2 "${HOST_NAMES[i]}" + return 1 + fi + + if [ "${HOST_NAMES[i]}" = "${HOST_NAMES[0]}" ]; then + unset HOST_NAMES[i] + let "i -= 1" + fi + done + + return 0 +} + +#********************** Linux MD/LVM device informations **********************# +# get_md_configs hostname +# Get all the active MD device informations from the node @hostname +get_md_configs() { + declare -i i=0 + declare -i j=0 + local host_name=$1 + local ret_line line first_item + + # Initialize the arrays + unset MD_NAME + unset MD_LEVEL + unset MD_DEVS + + # Execute remote command to the node ${host_name} and get all the + # active MD device informations. + while read -r ret_line; do + if is_pdsh; then + set -- ${ret_line} + shift + line="$*" + else + line="${ret_line}" + fi + + first_item=`echo "${line}" | awk '{print $1}'` + + # Get the MD device name and raid level + if [ "${first_item}" = "ARRAY" ]; then + MD_NAME[i]=`echo "${line}" | awk '{print $2}'` + MD_LEVEL[i]=`echo "${line}" | awk '{print $3}' | sed -e 's/level=//'` + let "j = i" + let "i += 1" + fi + + # Get the MD component devices + if [ "${first_item}" != "${first_item#devices=}" ]; then + MD_DEVS[j]=`echo "${line}" | sed -e 's/devices=//' -e 's/,/ /g'` + fi + done < <(${REMOTE} ${host_name} "${MDADM} --detail --scan --verbose") + + if [ $i -eq 0 ]; then + verbose_output "There are no active MD devices" \ + "in the host ${host_name}!" + fi + + return 0 +} + +# get_pv_configs hostname +# Get all the LVM PV informations from the node @hostname +get_pv_configs() { + PV_NAMES= + local host_name=$1 + local cmd ret_str + + # Execute remote command to get all the PV informations. + cmd="${EXPORT_PATH} pvdisplay -c | awk -F: '{print \$1}' | xargs" + ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1` + if [ $? -ne 0 ]; then + if [ -n "${ret_str}" ]; then + echo >&2 "`basename $0`: get_pv_configs() error:" \ + "remote command to ${host_name} error: ${ret_str}" + else + remote_error "get_pv_configs" ${host_name} + fi + return 1 + fi + + PV_NAMES=`echo "${ret_str}" | sed -e 's/^'${host_name}':[[:space:]]//'` + if [ -z "${PV_NAMES}" ]; then + verbose_output "There are no PVs in the host ${host_name}!" + return 0 + fi + + return 0 +} + +# get_vg_pvnames hostname vgname +# Get the PVs contained in @vgname from the node @hostname +get_vg_pvnames() { + local host_name=$1 + local vg_name=$2 + local pv_names= + local cmd ret_str + + # Execute remote command to get the PV names. + cmd="${EXPORT_PATH} vgdisplay -v ${vg_name} 2>/dev/null\ + | grep \"PV Name\" | awk '{print \$3}' | xargs" + ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1` + if [ $? -ne 0 ]; then + if [ -n "${ret_str}" ]; then + echo "`basename $0`: get_vg_pvnames() error:" \ + "remote command to ${host_name} error: ${ret_str}" + else + remote_error "get_vg_pvnames" ${host_name} + fi + return 1 + fi + + pv_names=`echo "${ret_str}" | sed -e 's/^'${host_name}':[[:space:]]//'` + if [ -z "${pv_names}" ]; then + echo "`basename $0`: get_vg_pvnames() error:" \ + "There are no PVs in VG ${vg_name} in the host ${host_name}!"\ + "Or VG ${vg_name} does not exist." + return 1 + fi + + echo "${pv_names}" + return 0 +} + +# get_vg_configs hostname +# Get all the LVM VG informations from the node @hostname +get_vg_configs() { + declare -i i=0 + local host_name=$1 + local cmd ret_str + local vg_name + + # Initialize the arrays + unset VG_NAME + unset VG_PVNAMES + + # Execute remote command to get all the VG names. + cmd="${EXPORT_PATH} vgdisplay \ + | grep \"VG Name\" | awk '{print \$3}' | xargs" + ret_str=`${REMOTE} ${host_name} "${cmd}" 2>&1` + if [ $? -ne 0 ]; then + if [ -n "${ret_str}" ]; then + echo >&2 "`basename $0`: get_vg_configs() error:" \ + "remote command to ${host_name} error: ${ret_str}" + else + remote_error "get_vg_configs" ${host_name} + fi + return 1 + fi + + if [ -z "${ret_str}" ] \ + || [ "${ret_str}" != "${ret_str#*No volume groups found*}" ]; then + verbose_output "There are no VGs in the host ${host_name}!" + return 0 + fi + + # Get all the VG informations + for vg_name in `echo "${ret_str}" | sed -e 's/^'${host_name}'://'`; do + VG_NAME[i]=${vg_name} + VG_PVNAMES[i]=$(get_vg_pvnames ${host_name} ${VG_NAME[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${VG_PVNAMES[i]}" + return 1 + fi + let "i += 1" + done + + return 0 +} + +# get_lv_configs hostname +# Get all the LVM LV informations from the node @hostname +get_lv_configs() { + declare -i i=0 + local host_name=$1 + local ret_line line + + # Initialize the arrays + unset LV_NAME + unset LV_SIZE + unset LV_VGNAME + + # Execute remote command to get all the LV informations. + while read -r ret_line; do + if is_pdsh; then + set -- ${ret_line} + shift + line="$*" + else + line="${ret_line}" + fi + + [ "${line}" != "${line#*volume group*}" ] && break + + LV_NAME[i]=`echo "${line}" | awk -F: '{print $1}' | sed -e 's/.*\///g'` + LV_VGNAME[i]=`echo "${line}" | awk -F: '{print $2}'` + LV_SIZE[i]=`echo "${line}" | awk -F: '{print $7}' | sed -e 's/.*/&K/'` + + let "i += 1" + done < <(${REMOTE} ${host_name} "${EXPORT_PATH} lvdisplay -c") + + if [ $i -eq 0 ]; then + verbose_output "There are no LVs in the host ${host_name}" + fi + + return 0 +} + +#*************************** Network module options ***************************# +# last_is_backslash line +# Check whether the last effective letter of @line is a backslash +last_is_backslash() { + local line="$*" + declare -i i + declare -i length + local letter last_letter + + length=${#line} + for ((i = ${length}-1; i >= 0; i--)); do + letter=${line:${i}:1} + [ "x${letter}" != "x " -a "x${letter}" != "x " -a -n "${letter}" ]\ + && last_letter=${letter} && break + done + + [ "x${last_letter}" = "x\\" ] && return 0 + + return 1 +} + +# get_module_opts hostname +# Get the network module options from the node @hostname +get_module_opts() { + local host_name=$1 + local ret_str + local MODULE_CONF KERNEL_VER + local ret_line line find_options + local continue_flag + + MODULE_OPTS=${DEFAULT_MOD_OPTS} + + # Execute remote command to get the kernel version + ret_str=`${REMOTE} ${host_name} "uname -r" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo >&2 "`basename $0`: get_module_opts() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + remote_error "get_module_opts" ${host_name} "${ret_str}" && return 1 + + if is_pdsh; then + KERNEL_VER=`echo ${ret_str} | awk '{print $2}'` + else + KERNEL_VER=`echo ${ret_str} | awk '{print $1}'` + fi + + # Get the module configuration file name + if [ "${KERNEL_VER:0:3}" = "2.4" ]; then + MODULE_CONF=/etc/modules.conf + else + MODULE_CONF=/etc/modprobe.conf + fi + + # Execute remote command to get the lustre network module options + continue_flag=false + find_options=false + while read -r ret_line; do + if is_pdsh; then + set -- ${ret_line} + shift + line="$*" + else + line="${ret_line}" + fi + + # Get rid of the comment line + [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue + + if [ "${line}" != "${line#*options lnet*}" ]; then + if ! ${find_options}; then + find_options=true + MODULE_OPTS=${line} + else + MODULE_OPTS=${MODULE_OPTS}$" \n "${line} + fi + + last_is_backslash "${line}" && continue_flag=true \ + || continue_flag=false + continue + fi + + if ${continue_flag}; then + MODULE_OPTS=${MODULE_OPTS}$" \n "${line} + ! last_is_backslash "${line}" && continue_flag=false + + fi + done < <(${REMOTE} ${host_name} "cat ${MODULE_CONF}") + + if [ -z "${MODULE_OPTS}" ]; then + MODULE_OPTS=${DEFAULT_MOD_OPTS} + fi + + return 0 +} + +#************************ HA software configurations ************************# +# is_ha_target hostname target_devname +# Check whether the target @target_devname was made to be high-available +is_ha_target() { + local host_name=$1 + local target_svname=$2 + local res_file + local ret_str + + case "${HATYPE_OPT}" in + "${HBVER_HBV1}") res_file=${HA_RES};; + "${HBVER_HBV2}") res_file=${HA_CIB};; + "${HATYPE_CLUMGR}") res_file=${CLUMAN_CONFIG};; + esac + + # Execute remote command to check the resource file + ret_str=`${REMOTE} ${host_name} \ + "grep ${target_svname} ${res_file}" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo >&2 "`basename $0`: is_ha_target() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + + [ "${ret_str}" = "${ret_str#*${target_svname}*}" ] && return 1 + + return 0 +} + +# get_hb_configs hostname +# Get the Heartbeat configurations from the node @hostname +get_hb_configs() { + local host_name=$1 + local ret_line line + declare -i i + + unset HA_CONFIGS + HB_CHANNELS= + SRV_IPADDRS= + HB_OPTIONS= + + # Execute remote command to get the configs of Heartbeat channels, etc + while read -r ret_line; do + if is_pdsh; then + set -- ${ret_line} + shift + line="$*" + else + line="${ret_line}" + fi + + # Get rid of the comment line + [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue + + if [ "${line}" != "${line#*serial*}" ] \ + || [ "${line}" != "${line#*cast*}" ]; then + if [ -z "${HB_CHANNELS}" ]; then + HB_CHANNELS=${line} + else + HB_CHANNELS=${HB_CHANNELS}:${line} + fi + fi + + if [ "${line}" != "${line#*stonith*}" ] \ + || [ "${line}" != "${line#*ping*}" ] \ + || [ "${line}" != "${line#*respawn*}" ] \ + || [ "${line}" != "${line#*apiauth*}" ] \ + || [ "${line}" != "${line#*compression*}" ]; then + if [ -z "${HB_OPTIONS}" ]; then + HB_OPTIONS=${line} + else + HB_OPTIONS=${HB_OPTIONS}:${line} + fi + fi + done < <(${REMOTE} ${host_name} "cat ${HA_CF}") + + if [ -z "${HB_CHANNELS}" ]; then + echo >&2 "`basename $0`: get_hb_configs() error:" \ + "There are no heartbeat channel configs in ${HA_CF}" \ + "of host ${host_name} or ${HA_CF} does not exist!" + return 0 + fi + + # Execute remote command to get Heartbeat service address + if [ "${HATYPE_OPT}" = "${HBVER_HBV1}" ]; then + while read -r ret_line; do + if is_pdsh; then + set -- ${ret_line} + shift + line="$*" + else + line="${ret_line}" + fi + + # Get rid of the empty line + [ -z "`echo ${line}|awk '/[[:alnum:]]/ {print $0}'`" ]\ + && continue + + # Get rid of the comment line + [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue + + SRV_IPADDRS=`echo ${line} | awk '{print $2}'` + [ -n "${SRV_IPADDRS}" ] \ + && [ "`echo ${line} | awk '{print $1}'`" = "${host_name}" ] && break + done < <(${REMOTE} ${host_name} "cat ${HA_RES}") + + if [ -z "${SRV_IPADDRS}" ]; then + echo >&2 "`basename $0`: get_hb_configs() error: There"\ + "are no service address in ${HA_RES} of host"\ + "${host_name} or ${HA_RES} does not exist!" + return 0 + fi + fi + + # Construct HA configuration items + for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do + [ -z "${TARGET_DEVNAMES[i]}" ] && continue + + # Execute remote command to check whether this target service + # was made to be high-available + if is_ha_target ${host_name} ${TARGET_DEVNAMES[i]}; then + HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS} + fi + done + + return 0 +} + +# get_cluman_channel hostname +# Get the Heartbeat channel of CluManager from the node @hostname +get_cluman_channel() { + local host_name=$1 + local ret_line line + local cluman_channel= + local mcast_ipaddr + + while read -r ret_line; do + if is_pdsh; then + set -- ${ret_line} + shift + line="$*" + else + line="${ret_line}" + fi + + if [ "${line}" != "${line#*broadcast*}" ] \ + && [ "`echo ${line}|awk '{print $3}'`" = "yes" ]; then + cluman_channel="broadcast" + break + fi + + if [ "${line}" != "${line#*multicast_ipaddress*}" ]; then + mcast_ipaddr=`echo ${line}|awk '{print $3}'` + if [ "${mcast_ipaddr}" != "225.0.0.11" ]; then + cluman_channel="multicast ${mcast_ipaddr}" + break + fi + fi + done < <(${REMOTE} ${host_name} "${CONFIG_CMD} --clumembd") + + echo ${cluman_channel} + return 0 +} + +# get_cluman_srvaddr hostname target_svname +# Get the service IP addresses of @target_svname from the node @hostname +get_cluman_srvaddr() { + local host_name=$1 + local target_svname=$2 + local ret_line line + local srvaddr cluman_srvaddr= + + while read -r ret_line; do + if is_pdsh; then + set -- ${ret_line} + shift + line="$*" + else + line="${ret_line}" + fi + + if [ "${line}" != "${line#*ipaddress = *}" ]; then + srvaddr=`echo ${line}|awk '{print $3}'` + if [ -z "${cluman_srvaddr}" ]; then + cluman_srvaddr=${srvaddr} + else + cluman_srvaddr=${cluman_srvaddr}:${srvaddr} + fi + fi + done < <(${REMOTE} ${host_name} "${CONFIG_CMD} \ + --service=${target_svname} --service_ipaddresses") + + if [ -z "${cluman_srvaddr}" ]; then + echo "`basename $0`: get_cluman_srvaddr() error: Cannot" \ + "get the service IP addresses of ${target_svname} in" \ + "${host_name}! Check ${CONFIG_CMD} command!" + return 1 + fi + + echo ${cluman_srvaddr} + return 0 +} + +# get_cluman_configs hostname +# Get the CluManager configurations from the node @hostname +get_cluman_configs() { + local host_name=$1 + local ret_str + declare -i i + + unset HA_CONFIGS + + # Execute remote command to get the configs of CluManager + for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do + HB_CHANNELS= + SRV_IPADDRS= + HB_OPTIONS= + [ -z "${TARGET_DEVNAMES[i]}" ] && continue + + # Execute remote command to check whether this target service + # was made to be high-available + ! is_ha_target ${host_name} ${TARGET_DEVNAMES[i]} && continue + + # Execute remote command to get Heartbeat channel + HB_CHANNELS=$(get_cluman_channel ${host_name}) + if [ $? -ne 0 ]; then + echo >&2 "${HB_CHANNELS}" + fi + + # Execute remote command to get service IP address + SRV_IPADDRS=$(get_cluman_srvaddr ${host_name} \ + ${TARGET_SVNAMES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${SRV_IPADDRS}" + return 0 + fi + + HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS} + done + + return 0 +} + +# get_ha_configs hostname +# Get the HA software configurations from the node @hostname +get_ha_configs() { + local host_name=$1 + + unset HA_CONFIGS + + if [ -z "${HATYPE_OPT}" ]; then + return 0 + fi + + verbose_output "Collecting HA software configurations from host $1..." + + case "${HATYPE_OPT}" in + "${HBVER_HBV1}" | "${HBVER_HBV2}") # Heartbeat + if ! get_hb_configs ${host_name}; then + return 1 + fi + ;; + "${HATYPE_CLUMGR}") # CluManager + if ! get_cluman_configs ${host_name}; then + return 1 + fi + ;; + esac + + return 0 +} + +#*********************** Lustre targets configurations ***********************# + +# is_failover_service target_svname +# Check whether a target service @target_svname is a failover service. +is_failover_service() { + local target_svname=$1 + declare -i i + + for ((i = 0; i < ${#ALL_TARGET_SVNAMES[@]}; i++)); do + [ "${target_svname}" = "${ALL_TARGET_SVNAMES[i]}" ] && return 0 + done + + return 1 +} + +# get_svnames hostname +# Get the lustre target server obd names from the node @hostname +get_svnames(){ + declare -i i + declare -i j + local host_name=$1 + local ret_line line + + # Initialize the TARGET_SVNAMES array + unset TARGET_SVNAMES + unset FAILOVER_FMTOPTS + + # Execute remote command to the node @hostname and figure out what + # lustre services are running. + i=0 + j=${#ALL_TARGET_SVNAMES[@]} + while read -r ret_line; do + if is_pdsh; then + set -- ${ret_line} + shift + line="$*" + else + line="${ret_line}" + fi + + if [ -z "`echo ${line} | grep ${MGS_TYPE}`" ] \ + && [ -z "`echo ${line} | grep ${MDT_TYPE}`" ] \ + && [ -z "`echo ${line} | grep ${OST_TYPE}`" ]; then + continue + fi + + # Get target server name + TARGET_SVNAMES[i]=`echo ${line} | awk '{print $4}'` + if [ -n "${TARGET_SVNAMES[i]}" ]; then + if is_failover_service ${TARGET_SVNAMES[i]}; then + FAILOVER_FMTOPTS[i]="--noformat" + fi + ALL_TARGET_SVNAMES[j]=${TARGET_SVNAMES[i]} + let "i += 1" + let "j += 1" + else + echo >&2 "`basename $0`: get_svnames() error: Invalid"\ + "line in ${host_name}'s ${LUSTRE_PROC_DEVICES}"\ + "- \"${line}\"!" + return 1 + fi + done < <(${REMOTE} ${host_name} "cat ${LUSTRE_PROC_DEVICES}") + + if [ $i -eq 0 ]; then + verbose_output "There are no lustre services running" \ + "on the node ${host_name}!" + fi + + return 0 +} + +# is_loopdev devname +# Check whether a device @devname is a loop device or not +is_loopdev() { + local devname=$1 + + if [ -z "${devname}" ] || \ + [ -z "`echo ${devname}|awk '/\/dev\/loop[[:digit:]]/ {print $0}'`" ] + then + return 1 + fi + + return 0 +} + +# get_devname hostname svname +# Get the device name of lustre target @svname from node @hostname +get_devname() { + local host_name=$1 + local target_svname=$2 + local target_devname= + local ret_str + local target_type target_obdtype mntdev_file + + if [ "${target_svname}" = "${MGS_SVNAME}" ]; then + # Execute remote command to get the device name of mgs target + ret_str=`${REMOTE} ${host_name} \ + "/sbin/findfs LABEL=${target_svname}" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ] + then + echo "`basename $0`: get_devname() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + fi + + if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]; then + if is_pdsh; then + target_devname=`echo ${ret_str} | awk '{print $2}'` + else + target_devname=`echo ${ret_str} | awk '{print $1}'` + fi + fi + else # Execute remote command to get the device name of mdt/ost target + target_type=`echo ${target_svname} | cut -d - -f 2` + target_obdtype=${target_type:0:3}_TYPE + + mntdev_file=${LUSTRE_PROC}/${!target_obdtype}/${target_svname}/mntdev + + ret_str=`${REMOTE} ${host_name} "cat ${mntdev_file}" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: get_devname() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + + if [ "${ret_str}" != "${ret_str#*No such file*}" ]; then + echo "`basename $0`: get_devname() error:"\ + "${mntdev_file} does not exist in ${host_name}!" + return 1 + else + if is_pdsh; then + target_devname=`echo ${ret_str} | awk '{print $2}'` + else + target_devname=`echo ${ret_str} | awk '{print $1}'` + fi + fi + fi + + echo ${target_devname} + return 0 +} + +# get_devsize hostname target_devname +# Get the device size (KB) of @target_devname from node @hostname +get_devsize() { + local host_name=$1 + local target_devname=$2 + local target_devsize= + local ret_str + + # Execute remote command to get the device size + ret_str=`${REMOTE} ${host_name} \ + "/sbin/blockdev --getsize ${target_devname}" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: get_devsize() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + + if is_pdsh; then + target_devsize=`echo ${ret_str} | awk '{print $2}'` + else + target_devsize=`echo ${ret_str} | awk '{print $1}'` + fi + + if [ -z "`echo ${target_devsize}|awk '/^[[:digit:]]/ {print $0}'`" ] + then + echo "`basename $0`: get_devsize() error: can't" \ + "get device size of ${target_devname} in ${host_name}!" + return 1 + fi + + let " target_devsize /= 2" + + echo ${target_devsize} + return 0 +} + +# get_realdevname hostname loop_dev +# Get the real device name of loop device @loop_dev from node @hostname +get_realdevname() { + local host_name=$1 + local loop_dev=$2 + local target_devname= + local ret_str + + # Execute remote command to get the real device name + ret_str=`${REMOTE} ${host_name} \ + "/sbin/losetup ${loop_dev}" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: get_realdevname() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + + if is_pdsh; then + target_devname=`echo ${ret_str} | awk '{print $4}' \ + | sed 's/^(//' | sed 's/)$//'` + else + target_devname=`echo ${ret_str} | awk '{print $3}' \ + | sed 's/^(//' | sed 's/)$//'` + fi + + if [ "${ret_str}" != "${ret_str#*No such*}" ] \ + || [ -z "${target_devname}" ]; then + echo "`basename $0`: get_realdevname() error: can't" \ + "get info on device ${loop_dev} in ${host_name}!" + return 1 + fi + + echo ${target_devname} + return 0 +} + +# get_mntpnt hostname target_devname +# Get the lustre target mount point from the node @hostname +get_mntpnt(){ + local host_name=$1 + local target_devname=$2 + local mnt_point= + local ret_str + + # Execute remote command to get the mount point + ret_str=`${REMOTE} ${host_name} \ + "cat /etc/mtab | grep ${target_devname}" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: get_mntpnt() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + + if is_pdsh; then + mnt_point=`echo ${ret_str} | awk '{print $3}'` + else + mnt_point=`echo ${ret_str} | awk '{print $2}'` + fi + + if [ -z "${mnt_point}" ]; then + echo "`basename $0`: get_mntpnt() error: can't" \ + "get the mount point of ${target_devname} in ${host_name}!" + return 1 + fi + + echo ${mnt_point} + return 0 +} + +# get_devnames hostname +# Get the lustre target device names, mount points +# and loop device sizes from the node @hostname +get_devnames(){ + declare -i i + local host_name=$1 + local ret_line line + + # Initialize the arrays + unset TARGET_DEVNAMES + unset TARGET_DEVSIZES + unset TARGET_MNTPNTS + + for ((i = 0; i < ${#TARGET_SVNAMES[@]}; i++)); do + TARGET_DEVNAMES[i]=$(get_devname ${host_name} \ + ${TARGET_SVNAMES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${TARGET_DEVNAMES[i]}" + return 1 + fi + + if [ -z "${TARGET_DEVNAMES[i]}" ]; then + if [ "${TARGET_SVNAMES[i]}" = "${MGS_SVNAME}" ]; then + verbose_output "There exists combo mgs/mdt"\ + "target in ${host_name}." + continue + else + echo >&2 "`basename $0`: get_devname() error:"\ + "No device corresponding to target" \ + "${TARGET_SVNAMES[i]} in ${host_name}!" + return 1 + fi + fi + + # Get the mount point of the target + TARGET_MNTPNTS[i]=$(get_mntpnt ${host_name} \ + ${TARGET_DEVNAMES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${TARGET_MNTPNTS[i]}" + return 1 + fi + + # The target device is a loop device? + if [ -n "${TARGET_DEVNAMES[i]}" ] \ + && is_loopdev ${TARGET_DEVNAMES[i]}; then + # Get the device size + TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \ + ${TARGET_DEVNAMES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${TARGET_DEVSIZES[i]}" + return 1 + fi + + # Get the real device name + TARGET_DEVNAMES[i]=$(get_realdevname ${host_name} \ + ${TARGET_DEVNAMES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${TARGET_DEVNAMES[i]}" + return 1 + fi + fi + done + + return 0 +} + +# is_target target_svtype ldd_flags +# Check the service type of a lustre target +is_target() { + case "$1" in + "mdt") let "ret = $2 & LDD_F_SV_TYPE_MDT";; + "ost") let "ret = $2 & LDD_F_SV_TYPE_OST";; + "mgs") let "ret = $2 & LDD_F_SV_TYPE_MGS";; + "*") + echo >&2 "`basename $0`: is_target() error: Invalid" \ + "target service type - \"$1\"!" + return 1 + ;; + esac + + if [ ${ret} -eq 0 ]; then + return 1 + fi + + return 0 +} + +# get_devtype ldd_flags +# Get the service type of a lustre target from @ldd_flags +get_devtype() { + local target_devtype= + + if [ -z "${flags}" ]; then + echo "`basename $0`: get_devtype() error: Invalid" \ + "ldd_flags - it's value is null!" + return 1 + fi + + if is_target "mgs" $1; then + if is_target "mdt" $1; then + target_devtype="mgs|mdt" + else + target_devtype="mgs" + fi + elif is_target "mdt" $1; then + target_devtype="mdt" + elif is_target "ost" $1; then + target_devtype="ost" + else + echo "`basename $0`: get_devtype() error: Invalid" \ + "ldd_flags - \"$1\"!" + return 1 + fi + + echo ${target_devtype} + return 0 +} + +# get_mntopts ldd_mount_opts +# Get the user-specified lustre target mount options from @ldd_mount_opts +get_mntopts() { + local mount_opts= + local ldd_mount_opts=$1 + + mount_opts="${ldd_mount_opts#${ALWAYS_MNTOPTS}}" + mount_opts="${mount_opts#${MDT_MGS_ALWAYS_MNTOPTS}}" + mount_opts="${mount_opts#${OST_ALWAYS_MNTOPTS}}" + mount_opts="${mount_opts#${OST_DEFAULT_MNTOPTS}}" + mount_opts="`echo \"${mount_opts}\" | sed 's/^,//'`" + + [ "${mount_opts}" != "${mount_opts#*,*}" ] && echo "\""${mount_opts}"\"" \ + || echo ${mount_opts} + + return 0 +} + +# get_mgsnids ldd_params +# Get the mgs nids of lustre target from @ldd_params +get_mgsnids() { + local mgs_nids= # mgs nids in one mgs node + local all_mgs_nids= # mgs nids in all mgs failover nodes + local param= + local ldd_params="$*" + + for param in ${ldd_params}; do + if [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ]; then + mgs_nids=`echo ${param#${PARAM_MGSNODE}}` + + if [ -n "${all_mgs_nids}" ]; then + all_mgs_nids=${all_mgs_nids}:${mgs_nids} + else + all_mgs_nids=${mgs_nids} + fi + fi + done + + [ "${all_mgs_nids}" != "${all_mgs_nids#*,*}" ] \ + && echo "\""${all_mgs_nids}"\"" || echo ${all_mgs_nids} + + return 0 +} + +# get_failnids ldd_params +# Get the failover nids of lustre target from @ldd_params +get_failnids() { + local fail_nids= # failover nids in one failover node + local all_fail_nids= # failover nids in all failover nodes + # of this target + local param= + local ldd_params="$*" + + for param in ${ldd_params}; do + if [ -n "`echo ${param}|awk '/failover.node=/ {print $0}'`" ]; then + fail_nids=`echo ${param#${PARAM_FAILNODE}}` + + if [ -n "${all_fail_nids}" ]; then + all_fail_nids=${all_fail_nids}:${fail_nids} + else + all_fail_nids=${fail_nids} + fi + fi + done + + [ "${all_fail_nids}" != "${all_fail_nids#*,*}" ] \ + && echo "\""${all_fail_nids}"\"" || echo ${all_fail_nids} + + return 0 +} + +# get_fmtopts target_devname hostname ldd_params +# Get other format options of the lustre target @target_devname from @ldd_params +get_fmtopts() { + local target_devname=$1 + local host_name=$2 + shift + shift + local ldd_params="$*" + local param= + local fmt_opts= + + for param in ${ldd_params}; do + [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ] && continue + [ -n "`echo ${param}|awk '/failover.node=/ {print $0}'`" ] && continue + + if [ -n "${param}" ]; then + if [ -n "${fmt_opts}" ]; then + fmt_opts=${fmt_opts}" --param=\""${param}"\"" + else + fmt_opts="--param=\""${param}"\"" + fi + fi + done + + echo ${fmt_opts} + return 0 +} + +# get_stripecount host_name target_fsname +# Get the stripe count for @target_fsname +get_stripecount() { + local host_name=$1 + local target_fsname=$2 + local stripe_count= + local stripecount_file + local ret_str + + # Get the stripe count + stripecount_file=${LUSTRE_PROC}/lov/${target_fsname}-mdtlov/stripecount + ret_str=`${REMOTE} ${host_name} "cat ${stripecount_file}" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: get_stripecount() error:" \ + "remote command to ${host_name} error: ${ret_str}" + return 1 + fi + + if is_pdsh; then + stripe_count=`echo ${ret_str} | awk '{print $2}'` + else + stripe_count=`echo ${ret_str} | awk '{print $1}'` + fi + + if [ -z "`echo ${stripe_count}|awk '/^[[:digit:]]/ {print $0}'`" ] + then + echo "`basename $0`: get_stripecount() error: can't" \ + "get stripe count of ${target_fsname} in ${host_name}!" + return 1 + fi + + echo ${stripe_count} + return 0 +} + +# get_stripecount_opt host_name target_fsname +# Get the stripe count option for lustre mdt target +get_stripecount_opt() { + local host_name=$1 + local target_fsname=$2 + local stripe_count= + local stripecount_opt= + + # Get the stripe count + [ -z "${target_fsname}" ] && target_fsname="lustre" + stripe_count=$(get_stripecount ${host_name} ${target_fsname}) + if [ $? -ne 0 ]; then + echo "${stripe_count}" + return 1 + fi + + if [ "${stripe_count}" != "1" ]; then + stripecount_opt=${OPTSTR_STRIPE_COUNT}${stripe_count} + fi + + echo ${stripecount_opt} + return 0 +} + +# get_ldds hostname +# Get the lustre target disk data from the node @hostname +get_ldds(){ + declare -i i + local host_name=$1 + local ret_line line + local flags mnt_opts params + local stripecount_opt + + # Initialize the arrays + unset TARGET_DEVTYPES TARGET_FSNAMES TARGET_MGSNIDS TARGET_INDEXES + unset TARGET_FMTOPTS TARGET_MNTOPTS TARGET_FAILNIDS + + # Get lustre target device type, fsname, index, etc. + # from MOUNT_DATA_FILE. Using tunefs.lustre to read it. + for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do + flags= + mnt_opts= + params= + stripecount_opt= + [ -z "${TARGET_DEVNAMES[i]}" ] && continue + + # Execute remote command to read MOUNT_DATA_FILE + while read -r ret_line; do + if is_pdsh; then + set -- ${ret_line} + shift + line="$*" + else + line="${ret_line}" + fi + + if [ -n "`echo ${line}|awk '/Index:/ {print $0}'`" ]; then + TARGET_INDEXES[i]=`echo ${line}|awk '{print $2}'` + continue + fi + + if [ -n "`echo ${line}|awk '/Lustre FS:/ {print $0}'`" ]; then + TARGET_FSNAMES[i]=`echo ${line}|awk '{print $3}'` + continue + fi + + if [ -n "`echo ${line}|awk '/Flags:/ {print $0}'`" ]; then + flags=`echo ${line}|awk '{print $2}'` + continue + fi + + if [ -n "`echo ${line}|awk '/Persistent mount opts:/ {print $0}'`" ]; then + mnt_opts=`echo ${line}|awk '{print $0}'` + mnt_opts=`echo ${mnt_opts#Persistent mount opts: }` + continue + fi + + if [ -n "`echo ${line}|awk '/Parameters:/ {print $0}'`" ]; then + params=`echo ${line}|awk '{print $0}'` + params=`echo ${params#Parameters:}` + break + fi + done < <(${REMOTE} ${host_name} "${TUNEFS} --print --verbose ${TARGET_DEVNAMES[i]} 2>/dev/null") + + if [ -z "${flags}" ]; then + echo >&2 "`basename $0`: get_ldds() error: Invalid" \ + "ldd_flags of target ${TARGET_DEVNAMES[i]}" \ + "in host ${host_name} - it's value is null!"\ + "Check ${TUNEFS} command!" + return 1 + fi + + if [ "${TARGET_INDEXES[i]}" = "unassigned" ] \ + || is_target "mgs" ${flags}; then + TARGET_INDEXES[i]= + fi + + [ "${TARGET_FSNAMES[i]}" = "lustre" ] && TARGET_FSNAMES[i]= + + # Get the lustre target service type + TARGET_DEVTYPES[i]=$(get_devtype ${flags}) + if [ $? -ne 0 ]; then + echo >&2 "${TARGET_DEVTYPES[i]} From device" \ + "${TARGET_DEVNAMES[i]} in host ${host_name}!" + return 1 + fi + + # Get the lustre target mount options + TARGET_MNTOPTS[i]=$(get_mntopts "${mnt_opts}") + + # Get mgs nids of the lustre target + TARGET_MGSNIDS[i]=$(get_mgsnids "${params}") + + # Get failover nids of the lustre target + TARGET_FAILNIDS[i]=$(get_failnids "${params}") + if [ $? -ne 0 ]; then + echo >&2 "${TARGET_FAILNIDS[i]} From device" \ + "${TARGET_DEVNAMES[i]} in host ${host_name}!" + return 1 + fi + + # Get other format options of the lustre target + TARGET_FMTOPTS[i]=$(get_fmtopts ${TARGET_DEVNAMES[i]} ${host_name} "${params}") + if [ $? -ne 0 ]; then + echo >&2 "${TARGET_FMTOPTS[i]}" + return 1 + fi + + if [ -n "${TARGET_DEVSIZES[i]}" ]; then + if [ -n "${TARGET_FMTOPTS[i]}" ]; then + TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]} ""${TARGET_FMTOPTS[i]}" + else + TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]}" + fi + fi + + if [ -n "${FAILOVER_FMTOPTS[i]}" ]; then + if [ -n "${TARGET_FMTOPTS[i]}" ]; then + TARGET_FMTOPTS[i]=${TARGET_FMTOPTS[i]}" "${FAILOVER_FMTOPTS[i]} + else + TARGET_FMTOPTS[i]=${FAILOVER_FMTOPTS[i]} + fi + fi + + if is_target "mdt" ${flags}; then + # Get the stripe count option + stripecount_opt=$(get_stripecount_opt ${host_name} ${TARGET_FSNAMES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${stripecount_opt}" + return 1 + fi + + if [ -n "${stripecount_opt}" ]; then + if [ -n "${TARGET_FMTOPTS[i]}" ]; then + TARGET_FMTOPTS[i]=${TARGET_FMTOPTS[i]}" "${stripecount_opt} + else + TARGET_FMTOPTS[i]=${stripecount_opt} + fi + fi + fi + + if [ "${TARGET_FMTOPTS[i]}" != "${TARGET_FMTOPTS[i]#*,*}" ]; then + TARGET_FMTOPTS[i]="\""${TARGET_FMTOPTS[i]}"\"" + fi + done + + return 0 +} + +# get_journalsize target_devname hostname +# Get the journal size of lustre target @target_devname from @hostname +get_journalsize() { + local target_devname=$1 + local host_name=$2 + local journal_inode= + local journal_size= + local ret_str + + # Execute remote command to get the journal inode number + ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ + ${target_devname} | grep 'Journal inode:'" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: get_journalsize() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + + ret_str=${ret_str#${ret_str%Journal inode:*}} + journal_inode=`echo ${ret_str} | awk '{print $3}'` + if [ -z "`echo ${journal_inode}|awk '/^[[:digit:]]/ {print $0}'`" ] + then + echo "`basename $0`: get_journalsize() error: can't" \ + "get journal inode of ${target_devname} in ${host_name}!" + return 1 + fi + + # Execute remote command to get the journal size + ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R \ + 'stat <${journal_inode}>' ${target_devname}|grep '^User:'" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: get_journalsize() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + + ret_str=${ret_str#${ret_str%User:*}} + journal_size=`echo ${ret_str} | awk '{print $6}'` + if [ -z "`echo ${journal_size}|awk '/^[[:digit:]]/ {print $0}'`" ] + then + echo "`basename $0`: get_journalsize() error: can't" \ + "get journal size of ${target_devname} in ${host_name}!" + return 1 + fi + + let "journal_size /= 1024*1024" # MB + + echo ${journal_size} + return 0 +} + +# get_defaultjournalsize target_devsize +# Calculate the default journal size from target device size @target_devsize +get_defaultjournalsize() { + declare -i target_devsize=$1 + declare -i journal_size=0 + declare -i max_size base_size + + let "base_size = 1024*1024" + if [ ${target_devsize} -gt ${base_size} ]; then # 1GB + let "journal_size = target_devsize / 102400" + let "journal_size *= 4" + fi + + let "max_size = 102400 * L_BLOCK_SIZE" + let "max_size >>= 20" # 400MB + + if [ ${journal_size} -gt ${max_size} ]; then + let "journal_size = max_size" + fi + + echo ${journal_size} + return 0 +} + +# figure_journal_size target_devname hostname +# Find a reasonable journal file size given the number of blocks +# in the filesystem. This algorithm is derived from figure_journal_size() +# function in util.c of e2fsprogs-1.38.cfs2-1.src.rpm. +figure_journal_size() { + local target_devname=$1 + local host_name=$2 + local ret_str + declare -i block_count + declare -i journal_blocks + declare -i journal_size + + # Execute remote command to get the block count + ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ + ${target_devname} | grep 'Block count:'" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: figure_journal_size() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + + ret_str=${ret_str#${ret_str%Block count:*}} + block_count=`echo ${ret_str} | awk '{print $3}'` + if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ] + then + echo "`basename $0`: figure_journal_size() error: can't" \ + "get block count of ${target_devname} in ${host_name}!" + return 1 + fi + + if ((block_count < 32768)); then + let "journal_blocks = 1024" + elif ((block_count < 256*1024)); then + let "journal_blocks = 4096" + elif ((block_count < 512*1024)); then + let "journal_blocks = 8192" + elif ((block_count < 1024*1024)); then + let "journal_blocks = 16384" + else + let "journal_blocks = 32768" + fi + + let "journal_size = journal_blocks * L_BLOCK_SIZE / 1048576" + + echo ${journal_size} + return 0 +} + +# get_J_opt hostname target_devname target_devsize +# Get the mkfs -J option of lustre target @target_devname +# from the node @hostname +get_J_opt() { + local host_name=$1 + local target_devname=$2 + local target_devsize=$3 + local journal_size= + local default_journal_size= + local journal_opt= + + # Get the real journal size of lustre target + journal_size=$(get_journalsize ${target_devname} ${host_name}) + if [ $? -ne 0 ]; then + echo "${journal_size}" + return 1 + fi + + # Get the default journal size of lustre target + default_journal_size=$(get_defaultjournalsize ${target_devsize}) + if [ "${default_journal_size}" = "0" ]; then + default_journal_size=$(figure_journal_size ${target_devname} \ + ${host_name}) + if [ $? -ne 0 ]; then + echo "${default_journal_size}" + return 1 + fi + fi + + if [ "${journal_size}" != "${default_journal_size}" ]; then + journal_opt="-J size=${journal_size}" + fi + + echo ${journal_opt} + return 0 +} + +# get_ratio target_devname hostname +# Get the bytes/inode ratio of lustre target @target_devname from @hostname +get_ratio() { + local target_devname=$1 + local host_name=$2 + local inode_count= + local block_count= + local ratio= + local ret_str + + # Execute remote command to get the inode count + ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ + ${target_devname} | grep 'Inode count:'" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: get_ratio() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + + ret_str=${ret_str#${ret_str%Inode count:*}} + inode_count=`echo ${ret_str} | awk '{print $3}'` + if [ -z "`echo ${inode_count}|awk '/^[[:digit:]]/ {print $0}'`" ] + then + echo "`basename $0`: get_ratio() error: can't" \ + "get inode count of ${target_devname} in ${host_name}!" + return 1 + fi + + # Execute remote command to get the block count + ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ + ${target_devname} | grep 'Block count:'" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: get_ratio() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + + ret_str=${ret_str#${ret_str%Block count:*}} + block_count=`echo ${ret_str} | awk '{print $3}'` + if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ] + then + echo "`basename $0`: get_ratio() error: can't" \ + "get block count of ${target_devname} in ${host_name}!" + return 1 + fi + + let "ratio = block_count*L_BLOCK_SIZE/inode_count" + + echo ${ratio} + return 0 +} + +# get_default_ratio target_devtype target_devsize +# Calculate the default bytes/inode ratio from target type @target_devtype +get_default_ratio() { + local target_devtype=$1 + declare -i target_devsize=$2 + local ratio= + + case "${target_devtype}" in + "mdt" | "mgs|mdt" | "mdt|mgs") + ratio=4096;; + "ost") + [ ${target_devsize} -gt 1000000 ] && ratio=16384;; + esac + + [ -z "${ratio}" ] && ratio=${L_BLOCK_SIZE} + + echo ${ratio} + return 0 +} + +# get_i_opt hostname target_devname target_devtype target_devsize +# Get the mkfs -i option of lustre target @target_devname +# from the node @hostname +get_i_opt() { + local host_name=$1 + local target_devname=$2 + local target_devtype=$3 + local target_devsize=$4 + local ratio= + local default_ratio= + local ratio_opt= + + # Get the real bytes/inode ratio of lustre target + ratio=$(get_ratio ${target_devname} ${host_name}) + if [ $? -ne 0 ]; then + echo "${ratio}" + return 1 + fi + + # Get the default bytes/inode ratio of lustre target + default_ratio=$(get_default_ratio ${target_devtype} ${target_devsize}) + + if [ "${ratio}" != "${default_ratio}" ]; then + ratio_opt="-i ${ratio}" + fi + + echo ${ratio_opt} + return 0 +} + +# get_isize target_devname hostname +# Get the inode size of lustre target @target_devname from @hostname +get_isize() { + local target_devname=$1 + local host_name=$2 + local inode_size= + local ret_str + + # Execute remote command to get the inode size + ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ + ${target_devname} | grep 'Inode size:'" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo "`basename $0`: get_isize() error:" \ + "remote command error: ${ret_str}" + return 1 + fi + + ret_str=${ret_str#${ret_str%Inode size:*}} + inode_size=`echo ${ret_str} | awk '{print $3}'` + if [ -z "`echo ${inode_size}|awk '/^[[:digit:]]/ {print $0}'`" ] + then + echo "`basename $0`: get_isize() error: can't" \ + "get inode size of ${target_devname} in ${host_name}!" + return 1 + fi + + echo ${inode_size} + return 0 +} + +# get_mdt_default_isize host_name target_fsname +# Calculate the default inode size of lustre mdt target +get_mdt_default_isize() { + local host_name=$1 + local target_fsname=$2 + declare -i stripe_count + local inode_size= + + # Get the stripe count + stripe_count=$(get_stripecount ${host_name} ${target_fsname}) + if [ $? -ne 0 ]; then + echo "${stripe_count}" + return 1 + fi + + if ((stripe_count > 77)); then + inode_size=512 + elif ((stripe_count > 34)); then + inode_size=2048 + elif ((stripe_count > 13)); then + inode_size=1024 + else + inode_size=512 + fi + + echo ${inode_size} + return 0 +} + +# get_default_isize host_name target_devtype target_fsname +# Calculate the default inode size of lustre target type @target_devtype +get_default_isize() { + local host_name=$1 + local target_devtype=$2 + local target_fsname=$3 + local inode_size= + + case "${target_devtype}" in + "mdt" | "mgs|mdt" | "mdt|mgs") + inode_size=$(get_mdt_default_isize ${host_name} ${target_fsname}) + if [ $? -ne 0 ]; then + echo "${inode_size}" + return 1 + fi + ;; + "ost") + inode_size=256;; + esac + + [ -z "${inode_size}" ] && inode_size=128 + + echo ${inode_size} + return 0 +} + +# get_I_opt hostname target_devname target_devtype target_fsname +# Get the mkfs -I option of lustre target @target_devname +# from the node @hostname +get_I_opt() { + local host_name=$1 + local target_devname=$2 + local target_devtype=$3 + local target_fsname=$4 + local isize= + local default_isize= + local isize_opt= + + # Get the real inode size of lustre target + isize=$(get_isize ${target_devname} ${host_name}) + if [ $? -ne 0 ]; then + echo "${isize}" + return 1 + fi + + # Get the default inode size of lustre target + [ -z "${target_fsname}" ] && target_fsname="lustre" + default_isize=$(get_default_isize ${host_name} ${target_devtype} \ + ${target_fsname}) + if [ $? -ne 0 ]; then + echo "${default_isize}" + return 1 + fi + + if [ "${isize}" != "${default_isize}" ]; then + isize_opt="-I ${isize}" + fi + + echo ${isize_opt} + return 0 +} + +# get_mkfsopts hostname +# Get the mkfs options of lustre targets from the node @hostname +get_mkfsopts(){ + declare -i i + local host_name=$1 + local journal_opt + local ratio_opt + local inode_size_opt + + # Initialize the arrays + unset TARGET_MKFSOPTS + + # FIXME: Get other mkfs options of ext3/ldiskfs besides -J, -i and -I + for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do + journal_opt= + ratio_opt= + inode_size_opt= + + [ -z "${TARGET_DEVNAMES[i]}" ] && continue + + if [ -z "${TARGET_DEVSIZES[i]}" ]; then + # Get the device size + TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \ + ${TARGET_DEVNAMES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${TARGET_DEVSIZES[i]}" + return 1 + fi + fi + + # Get the journal option + journal_opt=$(get_J_opt ${host_name} ${TARGET_DEVNAMES[i]} \ + ${TARGET_DEVSIZES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${journal_opt}" + return 1 + fi + + if [ -n "${journal_opt}" ]; then + if [ -z "${TARGET_MKFSOPTS[i]}" ]; then + TARGET_MKFSOPTS[i]="${journal_opt}" + else + TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${journal_opt}" + fi + fi + + # Get the bytes-per-inode ratio option + ratio_opt=$(get_i_opt ${host_name} ${TARGET_DEVNAMES[i]} \ + ${TARGET_DEVTYPES[i]} ${TARGET_DEVSIZES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${ratio_opt}" + return 1 + fi + + if [ -n "${ratio_opt}" ]; then + if [ -z "${TARGET_MKFSOPTS[i]}" ]; then + TARGET_MKFSOPTS[i]="${ratio_opt}" + else + TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${ratio_opt}" + fi + fi + + # Get the inode size option + inode_size_opt=$(get_I_opt ${host_name} ${TARGET_DEVNAMES[i]} \ + ${TARGET_DEVTYPES[i]} ${TARGET_FSNAMES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${inode_size_opt}" + return 1 + fi + + if [ -n "${inode_size_opt}" ]; then + if [ -z "${TARGET_MKFSOPTS[i]}" ]; then + TARGET_MKFSOPTS[i]="${inode_size_opt}" + else + TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${inode_size_opt}" + fi + fi + + if [ "${TARGET_MKFSOPTS[i]}" != "${TARGET_MKFSOPTS[i]#*,*}" ]; then + TARGET_MKFSOPTS[i]="\""${TARGET_MKFSOPTS[i]}"\"" + fi + done + return 0 +} + +# get_target_configs hostname +# Get the lustre target informations from the node @hostname +get_target_configs() { + declare -i i + local host_name=$1 + local ret_line line + + # Initialize the arrays + unset TARGET_CONFIGS + + # Get lustre target server names + if ! get_svnames ${host_name}; then + return 1 + fi + + # Get lustre target device names, mount points and loop device sizes + if ! get_devnames ${host_name}; then + return 1 + fi + + # Get lustre target device type, fsname, index, etc. + if ! get_ldds ${host_name}; then + return 1 + fi + + # Get mkfs options of lustre targets + if ! get_mkfsopts ${host_name}; then + return 1 + fi + + # Construct lustre target configs + for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do + [ -z "${TARGET_DEVNAMES[i]}" ] && continue + TARGET_CONFIGS[i]=${TARGET_DEVNAMES[i]},${TARGET_MNTPNTS[i]},${TARGET_DEVTYPES[i]},${TARGET_FSNAMES[i]},${TARGET_MGSNIDS[i]},${TARGET_INDEXES[i]},${TARGET_FMTOPTS[i]},${TARGET_MKFSOPTS[i]},${TARGET_MNTOPTS[i]},${TARGET_FAILNIDS[i]} + done + + return 0 +} + +# get_configs hostname +# Get all the informations needed to generate a csv file from +# the node @hostname +get_configs() { + # Check the hostname + if [ -z "$1" ]; then + echo >&2 "`basename $0`: get_configs() error:" \ + "Missing hostname!" + return 1 + fi + + # Get network module options + verbose_output "" + verbose_output "Collecting network module options from host $1..." + if ! get_module_opts $1; then + return 1 + fi + verbose_output "OK" + + # Get lustre target informations + verbose_output "Collecting Lustre targets informations from host $1..." + if ! get_target_configs $1; then + return 1 + fi + verbose_output "OK" + + # Get HA software configurations + if ! get_ha_configs $1; then + return 1 + fi + + return 0 +} + +# Collect linux MD/LVM device informations from the lustre cluster and +# append them to the csv file +get_mdlvm_info() { + declare -i idx + declare -i i + local line + + # Collect and append linux MD/LVM informations to the csv file + for ((idx = 0; idx < ${#HOST_NAMES[@]}; idx++)); do + [ -z "${HOST_NAMES[idx]}" ] && continue + + # Collect MD device informations + ! get_md_configs ${HOST_NAMES[idx]} && return 1 + + # Append MD device informations to the csv file + for ((i = 0; i < ${#MD_NAME[@]}; i++)); do + line=${HOST_NAMES[idx]},${MD_MARKER},${MD_NAME[i]},,,${MD_LEVEL[i]},${MD_DEVS[i]} + verbose_output "Informations of MD device ${MD_NAME[i]}" \ + "in host ${HOST_NAMES[idx]} are as follows:" + verbose_output "${line}" + echo "${line}" >> ${LUSTRE_CSV_FILE} + done + + # Collect PV informations + ! get_pv_configs ${HOST_NAMES[idx]} && return 1 + + # Append PV informations to the csv file + if [ -n "${PV_NAMES}" ]; then + line=${HOST_NAMES[idx]},${PV_MARKER},${PV_NAMES} + verbose_output "Informations of PVs" \ + "in host ${HOST_NAMES[idx]} are as follows:" + verbose_output "${line}" + echo "${line}" >> ${LUSTRE_CSV_FILE} + fi + + # Collect VG informations + ! get_vg_configs ${HOST_NAMES[idx]} && return 1 + + # Append VG informations to the csv file + for ((i = 0; i < ${#VG_NAME[@]}; i++)); do + line=${HOST_NAMES[idx]},${VG_MARKER},${VG_NAME[i]},,,${VG_PVNAMES[i]} + verbose_output "Informations of VG ${VG_NAME[i]}" \ + "in host ${HOST_NAMES[idx]} are as follows:" + verbose_output "${line}" + echo "${line}" >> ${LUSTRE_CSV_FILE} + done + + # Collect LV informations + ! get_lv_configs ${HOST_NAMES[idx]} && return 1 + + # Append LV informations to the csv file + for ((i = 0; i < ${#LV_NAME[@]}; i++)); do + line=${HOST_NAMES[idx]},${LV_MARKER},${LV_NAME[i]},,,${LV_SIZE[i]},${LV_VGNAME[i]} + verbose_output "Informations of LV /dev/${LV_VGNAME[i]}/${LV_NAME[i]}"\ + "in host ${HOST_NAMES[idx]} are as follows:" + verbose_output "${line}" + echo "${line}" >> ${LUSTRE_CSV_FILE} + done + done + return 0 +} + +# Generate the csv file from the lustre cluster +gen_csvfile() { + declare -i idx + declare -i i + local line + + # Get lustre cluster node names + verbose_output "Collecting Lustre cluster node names..." + if ! get_hostnames; then + return 1 + fi + verbose_output "OK" + + : > ${LUSTRE_CSV_FILE} + + ${GET_MDLVM_INFO} && get_mdlvm_info + + # Collect and append lustre target informations to the csv file + for ((idx = 0; idx < ${#HOST_NAMES[@]}; idx++)); do + # Collect informations + if ! get_configs ${HOST_NAMES[idx]}; then + rm -f ${LUSTRE_CSV_FILE} + return 1 + fi + + # Append informations to the csv file + for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do + [ -z "${TARGET_DEVNAMES[i]}" ] && continue + + if [ -z "${HA_CONFIGS[i]}" ]; then + line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]} + else + line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]},${HA_CONFIGS[i]} + fi + verbose_output "Informations of target ${TARGET_DEVNAMES[i]}" \ + "in host ${HOST_NAMES[idx]} are as follows:" + verbose_output "${line}" + echo "" >> ${LUSTRE_CSV_FILE} + echo "${line}" >> ${LUSTRE_CSV_FILE} + done + done + + return 0 +} + +# Main flow +echo "`basename $0`: ******** Generate csv file -- ${LUSTRE_CSV_FILE} START ********" +if ! gen_csvfile; then + exit 1 +fi +echo "`basename $0`: ******** Generate csv file -- ${LUSTRE_CSV_FILE} OK **********" + +exit 0 diff --git a/lustre/scripts/lustre_up14.sh b/lustre/scripts/lustre_up14.sh new file mode 100755 index 0000000..9027237 --- /dev/null +++ b/lustre/scripts/lustre_up14.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# +# Reads old MDS config logs for transferring to a MGS +# +############################################################################### + +TMP=${TMP:-/tmp/logs} + +# Usage +usage() { + cat >&2 < + + the MDS disk device (e.g. /dev/sda1) + the name of the new filesystem (e.g. testfs) + + This script will extract old config logs from an MDS device to a + temporary location ($TMP). During the upgrade procedure, mount the + MGS disk as type ldiskfs (e.g. mount -t ldiskfs /dev/sda + /mnt/temp), then copy these logs into the CONFIGS directory on the + MGS (e.g. /mnt/temp/CONFIGS). Logs from many MDS's can be added + in this way. When done, unmount the MGS, and then re-mount it as + type lustre to start the service. + +EOF + exit 1 +} + +if [ $# -lt 2 ]; then + usage +fi + +DEV=$1 +FSNAME=$2 +DEBUGFS="debugfs -c -R" +mkdir -p $TMP + +FILES=`$DEBUGFS "ls -l LOGS" $DEV | awk '{print $9}' | awk '/[a-z]/ {print $1}'` + +for FILE in ${FILES}; do + $DEBUGFS "dump LOGS/$FILE $TMP/temp" $DEV 2> /dev/null + MDC=`strings $TMP/temp | grep MDC` + LOV=`strings $TMP/temp | grep lov` + if [ -n "$MDC" ]; then + TYPE=client + else + if [ -n "$LOV" ]; then + TYPE=MDT0000 + else + echo "Can't determine type for log '$FILE', skipping" + continue + fi + fi + echo -n "Copying log '$FILE' to '${FSNAME}-${TYPE}'. Okay [y/n]?" + read OK + if [ "$OK" = "y" ]; then + mv $TMP/temp $TMP/${FSNAME}-${TYPE} + else + rm $TMP/temp + fi +done + +echo ls -l $TMP +ls -l $TMP + -- 1.8.3.1