From 8a27772feed2f9852b71dc14de372bf5e489fac3 Mon Sep 17 00:00:00 2001 From: yujian Date: Mon, 3 Jul 2006 02:53:21 +0000 Subject: [PATCH] b=9853 Remove unneeded files, they will be generated from *.in files. --- lustre/scripts/lc_cluman.sh | 548 ---------- lustre/scripts/lc_hb.sh | 672 ------------ lustre/scripts/lc_modprobe.sh | 62 -- lustre/scripts/lc_net.sh | 320 ------ lustre/scripts/lustre_config.sh | 1178 --------------------- lustre/scripts/lustre_createcsv.sh | 1998 ------------------------------------ 6 files changed, 4778 deletions(-) delete mode 100755 lustre/scripts/lc_cluman.sh delete mode 100755 lustre/scripts/lc_hb.sh delete mode 100755 lustre/scripts/lc_modprobe.sh delete mode 100755 lustre/scripts/lc_net.sh delete mode 100755 lustre/scripts/lustre_config.sh delete mode 100755 lustre/scripts/lustre_createcsv.sh diff --git a/lustre/scripts/lc_cluman.sh b/lustre/scripts/lc_cluman.sh deleted file mode 100755 index 8fb463c..0000000 --- a/lustre/scripts/lc_cluman.sh +++ /dev/null @@ -1,548 +0,0 @@ -#!/bin/bash -# -# lc_cluman.sh - script for generating the Red Hat Cluster Manager -# HA software's configuration files -# -################################################################################ - -# Usage -usage() { - cat >&2 < [-s service addresses] - [-c heartbeat channel] [-o heartbeat options] [-v] - <-d target device> [-d target device...] - - -n hostnames the nodenames of the primary node and its fail- - overs - Multiple nodenames are separated by colon (:) - delimeter. The first one is the nodename of the - primary node, the others are failover nodenames. - -s service addresses the IP addresses to failover - Multiple addresses are separated by colon (:) - delimeter. - -c heartbeat channel the method to send/rcv heartbeats on - The default method is multicast, and multicast_ - ipaddress is "225.0.0.11". - -o heartbeat options a "catchall" for other heartbeat configuration - options - Multiple options are separated by colon (:) - delimeter. - -v verbose mode - -d target device the target device name and mount point - The device name and mount point are separated by - colon (:) delimeter. - -EOF - exit 1 -} - -#****************************** Global variables ******************************# -# Scripts to be called -SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"$(cd `dirname $0`; echo $PWD)"} -SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}/lc_servip.sh - -# Remote command -REMOTE=${REMOTE:-"ssh -x -q"} - -# Lustre utilities path -CMD_PATH=${CMD_PATH:-"/usr/sbin"} -TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"} - -# CluManager tools -CLUMAN_TOOLS_PATH=${CLUMAN_TOOLS_PATH:-"/usr/sbin"} -CONFIG_CMD=${CONFIG_CMD:-"${CLUMAN_TOOLS_PATH}/redhat-config-cluster-cmd"} - -# Configuration directory -CLUMAN_DIR="/etc" # CluManager configuration directory -FILE_SUFFIX=${FILE_SUFFIX:-".lustre"} # Suffix of the generated config files - -TMP_DIR="/tmp/clumanager" # temporary directory - -declare -a NODE_NAMES # node names in the failover group -declare -a SRV_IPADDRS # service IP addresses - -# Lustre target device names, service names and mount points -declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS -declare -i TARGET_NUM=0 # number of targets - -# Get and check the positional parameters -VERBOSE_OUTPUT=false -while getopts "n:s:c:o:vd:" OPTION; do - case $OPTION in - n) - HOSTNAME_OPT=$OPTARG - PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'` - if [ -z "${PRIM_NODENAME}" ]; then - echo >&2 $"`basename $0`: Missing primary nodename!" - usage - fi - HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'` - if [ ${HOSTNAME_NUM} -lt 2 ]; then - echo >&2 $"`basename $0`: Missing failover nodenames!" - usage - fi - ;; - s) - SRVADDR_OPT=$OPTARG - ;; - c) - HBCHANNEL_OPT=$OPTARG - HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \ - | sed 's/"$//'` - if [ -n "${HBCHANNEL_OPT}" ] \ - && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*broadcast*}" ] \ - && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*multicast*}" ]; then - echo >&2 $"`basename $0`: Invalid Heartbeat channel" \ - "- ${HBCHANNEL_OPT}!" - usage - fi - ;; - o) - HBOPT_OPT=$OPTARG - HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'` - ;; - v) - VERBOSE_OUTPUT=true - ;; - d) - DEVICE_OPT=$OPTARG - TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'` - TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'` - if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then - echo >&2 $"`basename $0`: Missing target device name!" - usage - fi - if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then - echo >&2 $"`basename $0`: Missing mount point for target"\ - "${TARGET_DEVNAMES[TARGET_NUM]}!" - usage - fi - TARGET_NUM=$(( TARGET_NUM + 1 )) - ;; - - ?) - usage - esac -done - -# Check the required parameters -if [ -z "${HOSTNAME_OPT}" ]; then - echo >&2 $"`basename $0`: Missing -n option!" - usage -fi - -if [ -z "${DEVICE_OPT}" ]; then - echo >&2 $"`basename $0`: Missing -d option!" - usage -fi - -# Output verbose informations -verbose_output() { - if ${VERBOSE_OUTPUT}; then - echo "`basename $0`: $*" - fi - return 0 -} - -# get_nodenames -# -# Get all the node names in this failover group -get_nodenames() { - declare -i idx - local nodename_str nodename - - nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\ - END {for (i in a) print a[i]}'` - idx=0 - for nodename in ${nodename_str} - do - NODE_NAMES[idx]=${nodename} - idx=$idx+1 - done - - return 0 -} - -# get_check_srvIPaddrs -# -# Get and check all the service IP addresses in this failover group -get_check_srvIPaddrs() { - declare -i idx - declare -i i - local srvIPaddr_str srvIPaddr - - srvIPaddr_str=`echo ${SRVADDR_OPT}|awk '{split($SRVADDR_OPT, a, ":")}\ - END {for (i in a) print a[i]}'` - idx=0 - for srvIPaddr in ${srvIPaddr_str} - do - SRV_IPADDRS[idx]=${srvIPaddr} - idx=$idx+1 - done - - for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do - for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do - # Check service IP address - verbose_output "Verifying service IP ${SRV_IPADDRS[idx]} and" \ - "real IP of host ${NODE_NAMES[i]} are in the" \ - "same subnet..." - if ! ${SCRIPT_VERIFY_SRVIP} ${SRV_IPADDRS[idx]} ${NODE_NAMES[i]} - then - return 1 - fi - verbose_output "OK" - done - done - - return 0 -} - -# cluman_running host_name -# -# Run remote command to check whether clumanager service is running in @host_name -cluman_running() { - local host_name=$1 - local ret_str - - ret_str=`${REMOTE} ${host_name} "service clumanager status" 2>&1` - if [ $? -ne 0 ]; then - if [ "${ret_str}" != "${ret_str#*unrecognized*}" ]; then - echo >&2 "`basename $0`: cluman_running() error:"\ - "remote command to ${host_name} error: ${ret_str}!" - return 2 - else - return 1 - fi - fi - - return 0 -} - -# stop_cluman host_name -# -# Run remote command to stop clumanager service running in @host_name -stop_cluman() { - local host_name=$1 - local ret_str - - ret_str=`${REMOTE} ${host_name} "/sbin/service clumanager stop" 2>&1` - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: stop_cluman() error:"\ - "remote command to ${host_name} error: ${ret_str}!" - return 1 - fi - - echo "`basename $0`: Clumanager service is stopped on node ${host_name}." - return 0 -} - -# check_cluman -# -# Run remote command to check each node's clumanager service -check_cluman() { - declare -i idx - local OK - - # Get and check all the service IP addresses - if [ -n "${SRVADDR_OPT}" ] && ! get_check_srvIPaddrs; then - return 1 - fi - - for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do - # Check clumanager service status - cluman_running ${NODE_NAMES[idx]} - rc=$? - if [ "$rc" -eq "2" ]; then - return 1 - elif [ "$rc" -eq "1" ]; then - verbose_output "Clumanager service is stopped on"\ - "node ${NODE_NAMES[idx]}." - elif [ "$rc" -eq "0" ]; then - OK= - echo -n "`basename $0`: Clumanager service is running on"\ - "${NODE_NAMES[idx]}, go ahead to stop the service and"\ - "generate new configurations? [y/n]:" - read OK - if [ "${OK}" = "n" ]; then - echo "`basename $0`: New Clumanager configurations"\ - "are not generated." - return 2 - fi - - # Stop clumanager service - stop_cluman ${NODE_NAMES[idx]} - fi - done - - return 0 -} - -# get_srvname hostname target_devname -# -# Get the lustre target server name from the node @hostname -get_srvname() { - local host_name=$1 - local target_devname=$2 - local target_srvname= - local ret_str - - # Execute remote command to get the target server name - ret_str=`${REMOTE} ${host_name} \ - "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1` - if [ $? -ne 0 ]; then - echo "`basename $0`: get_srvname() error:" \ - "from host ${host_name} - ${ret_str}" - return 1 - fi - - if [ "${ret_str}" != "${ret_str#*Target: }" ]; then - ret_str=${ret_str#*Target: } - target_srvname=`echo ${ret_str} | awk '{print $1}'` - fi - - if [ -z "${target_srvname}" ]; then - echo "`basename $0`: get_srvname() error: Cannot get the"\ - "server name of target ${target_devname} in ${host_name}!" - return 1 - fi - - echo ${target_srvname} - return 0 -} - -# get_srvnames -# -# Get server names of all the Lustre targets in this failover group -get_srvnames() { - declare -i i - - # Initialize the TARGET_SRVNAMES array - unset TARGET_SRVNAMES - - # Get Lustre target service names - for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do - TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \ - ${TARGET_DEVNAMES[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${TARGET_SRVNAMES[i]}" - return 1 - fi - done - - return 0 -} - -# check_retval retval -# -# Check the return value of redhat-config-cluster-cmd -check_retval() { - if [ $1 -ne 0 ]; then - echo >&2 "`basename $0`: Failed to run ${CONFIG_CMD}!" - return 1 - fi - - return 0 -} - -# add_services -# -# Add service tags into the cluster.xml file -add_services() { - declare -i idx - declare -i i - - # Add service tag - for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do - ${CONFIG_CMD} --add_service --name=${TARGET_SRVNAMES[i]} - if ! check_retval $?; then - return 1 - fi - - for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do - ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \ - --add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]} - if ! check_retval $?; then - return 1 - fi - done - - ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \ - --add_device \ - --name=${TARGET_DEVNAMES[i]} - if ! check_retval $?; then - return 1 - fi - - ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \ - --device=${TARGET_DEVNAMES[i]} \ - --mount \ - --mountpoint=${TARGET_MNTPNTS[i]} \ - --fstype=lustre - if ! check_retval $?; then - return 1 - fi - done - - return 0 -} - -# gen_cluster_xml -# -# Run redhat-config-cluster-cmd to create the cluster.xml file -gen_cluster_xml() { - declare -i idx - declare -i i - local mcast_IPaddr - local node_names - local hbopt - - [ -e "${CLUMAN_DIR}/cluster.xml" ] && \ - /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old - - # Run redhat-config-cluster-cmd to generate cluster.xml - # Add clumembd tag - if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*broadcast*}" ]; then - ${CONFIG_CMD} --clumembd --broadcast=yes - ${CONFIG_CMD} --clumembd --multicast=no - if ! check_retval $?; then - return 1 - fi - elif [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*multicast*}" ]; then - mcast_IPaddr=`echo ${HBCHANNEL_OPT} | awk '{print $2}'` - if [ -n "${mcast_IPaddr}" ]; then - ${CONFIG_CMD} --clumembd --multicast=yes\ - --multicast_ipaddress=${mcast_IPaddr} - if ! check_retval $?; then - return 1 - fi - fi - fi - - # Add cluster tag - node_names= - for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do - node_names=${node_names}"${NODE_NAMES[idx]} " - done - - ${CONFIG_CMD} --cluster --name="${node_names}failover group" - if ! check_retval $?; then - return 1 - fi - - # Add member tag - for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do - ${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]} - if ! check_retval $?; then - return 1 - fi - done - - # Add service tag - if ! add_services; then - return 1 - fi - - # Add other tags - if [ -n "${HBOPT_OPT}" ]; then - while read -r hbopt - do - ${CONFIG_CMD} ${hbopt} - if ! check_retval $?; then - return 1 - fi - done < <(echo ${HBOPT_OPT}|awk '{split($HBOPT_OPT, a, ":")}\ - END {for (i in a) print a[i]}') - fi - - return 0 -} - -# create_config -# -# Create the cluster.xml file and scp it to the each node's /etc/ -create_config() { - declare -i idx - - /bin/mkdir -p ${TMP_DIR} - CONFIG_PRIMNODE=${TMP_DIR}$"/cluster.xml."${PRIM_NODENAME} - CONFIG_LUSTRE=${TMP_DIR}$"/cluster.xml"${FILE_SUFFIX} - - # Get server names of Lustre targets - if ! get_srvnames; then - return 1 - fi - - if [ -s ${CONFIG_PRIMNODE} ]; then - if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${CONFIG_PRIMNODE}`" ] - then - verbose_output "${CONFIG_PRIMNODE} already exists." - return 0 - else - [ -e "${CLUMAN_DIR}/cluster.xml" ] && \ - /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old - - /bin/cp -f ${CONFIG_PRIMNODE} ${CLUMAN_DIR}/cluster.xml - - # Add services into the cluster.xml file - if ! add_services; then - return 1 - fi - fi - else - # Run redhat-config-cluster-cmd to generate cluster.xml - verbose_output "Creating cluster.xml file for" \ - "${PRIM_NODENAME} failover group hosts..." - if ! gen_cluster_xml; then - return 1 - fi - verbose_output "OK" - fi - - /bin/mv ${CLUMAN_DIR}/cluster.xml ${CONFIG_LUSTRE} - [ -e "${CLUMAN_DIR}/cluster.xml.old" ] && \ - /bin/mv ${CLUMAN_DIR}/cluster.xml.old ${CLUMAN_DIR}/cluster.xml - - # scp the cluster.xml file to all the nodes - verbose_output "Remote copying cluster.xml${FILE_SUFFIX} file to" \ - "${PRIM_NODENAME} failover group hosts..." - for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do - /bin/cp -f ${CONFIG_LUSTRE} ${TMP_DIR}$"/cluster.xml."${NODE_NAMES[idx]} - - scp ${CONFIG_LUSTRE} ${NODE_NAMES[idx]}:${CLUMAN_DIR}/ - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to scp cluster.xml file"\ - "to node ${NODE_NAMES[idx]}!" - return 1 - fi - done - verbose_output "OK" - - return 0 -} - -# Main flow -# Get all the node names -if ! get_nodenames; then - exit 1 -fi - -# Check clumanager services -verbose_output "Checking clumanager service in the ${PRIM_NODENAME}"\ - "failover group hosts..." -check_cluman -rc=$? -if [ "$rc" -eq "2" ]; then - verbose_output "OK" - exit 0 -elif [ "$rc" -eq "1" ]; then - exit 1 -fi -verbose_output "OK" - -# Generate configuration files -if ! create_config; then - exit 1 -fi - -exit 0 diff --git a/lustre/scripts/lc_hb.sh b/lustre/scripts/lc_hb.sh deleted file mode 100755 index e4c606d..0000000 --- a/lustre/scripts/lc_hb.sh +++ /dev/null @@ -1,672 +0,0 @@ -#!/bin/bash -# -# lc_hb.sh - script for generating the Heartbeat HA software's -# configuration files -# -############################################################################### - -# Usage -usage() { - cat >&2 < <-n hostnames> [-v] - <-d target device> [-d target device...] - - -r HBver the version of Heartbeat software - The Heartbeat software versions which are curr- - ently supported are: hbv1 (Heartbeat version 1) - and hbv2 (Heartbeat version 2). - -n hostnames the nodenames of the primary node and its fail- - overs - Multiple nodenames are separated by colon (:) - delimeter. The first one is the nodename of the - primary node, the others are failover nodenames. - -v verbose mode - -d target device the target device name and mount point - The device name and mount point are separated by - colon (:) delimeter. - -EOF - exit 1 -} - -#****************************** Global variables ******************************# -# Scripts to be called -SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"$(cd `dirname $0`; echo $PWD)"} -SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}/lc_servip.sh -SCRIPT_GEN_MONCF=${SCRIPTS_PATH}/lc_mon.sh # create mon.cf file - -# Remote command -REMOTE=${REMOTE:-"ssh -x -q"} - -# Lustre utilities path -CMD_PATH=${CMD_PATH:-"/usr/sbin"} -TUNEFS=${TUNEFS:-"$CMD_PATH/tunefs.lustre"} - -# Heartbeat tools -HB_TOOLS_PATH=${HB_TOOLS_PATH:-"/usr/lib64/heartbeat"} # Heartbeat tools path -CIB_GEN_SCRIPT=${HB_TOOLS_PATH}/haresources2cib.py -CL_STATUS=${CL_STATUS:-"/usr/bin/cl_status"} - -# Configuration directories -HA_DIR=${HA_DIR:-"/etc/ha.d"} # Heartbeat configuration directory -MON_DIR=${MON_DIR:-"/etc/mon"} # mon configuration directory -CIB_DIR=${CIB_DIR:-"/var/lib/heartbeat/crm"} # cib.xml directory - -# Service directories and names -HARES_DIR=${HARES_DIR:-"${HA_DIR}/resource.d"} # Heartbeat resources -LUSTRE_SRV=${LUSTRE_SRV:-"Filesystem"} # Service script provided by Heartbeat -FS_TYPE=${FS_TYPE:-"lustre"} # Lustre filesystem type - -FILE_SUFFIX=${FILE_SUFFIX:-".lustre"} # Suffix of the generated config files - -TMP_DIR="/tmp/heartbeat" # temporary directory -HACF_TEMP=${TMP_DIR}/ha.cf.temp -AUTHKEYS_TEMP=${TMP_DIR}/authkeys${FILE_SUFFIX} - -HBVER_HBV1="hbv1" # Heartbeat version 1 -HBVER_HBV2="hbv2" # Heartbeat version 2 - -declare -a NODE_NAMES # node names in the failover group - -# Lustre target device names, service names and mount points -declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS -declare -i TARGET_NUM=0 # number of targets - - -# Get and check the positional parameters -VERBOSE_OUTPUT=false -while getopts "r:n:vd:" OPTION; do - case $OPTION in - r) - HBVER_OPT=$OPTARG - if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \ - && [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then - echo >&2 $"`basename $0`: Invalid Heartbeat software" \ - "version - ${HBVER_OPT}!" - usage - fi - ;; - n) - HOSTNAME_OPT=$OPTARG - PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'` - if [ -z "${PRIM_NODENAME}" ]; then - echo >&2 $"`basename $0`: Missing primary nodename!" - usage - fi - HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'` - if [ ${HOSTNAME_NUM} -lt 2 ]; then - echo >&2 $"`basename $0`: Missing failover nodenames!" - usage - fi - if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ] - then - echo >&2 $"`basename $0`: Heartbeat version 1 can" \ - "only support 2 nodes!" - usage - fi - ;; - v) - VERBOSE_OUTPUT=true - ;; - d) - DEVICE_OPT=$OPTARG - TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'` - TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'` - if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then - echo >&2 $"`basename $0`: Missing target device name!" - usage - fi - if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then - echo >&2 $"`basename $0`: Missing mount point for target"\ - "${TARGET_DEVNAMES[TARGET_NUM]}!" - usage - fi - TARGET_NUM=$(( TARGET_NUM + 1 )) - ;; - ?) - usage - esac -done - -# Check the required parameters -if [ -z "${HBVER_OPT}" ]; then - echo >&2 $"`basename $0`: Missing -r option!" - usage -fi - -if [ -z "${HOSTNAME_OPT}" ]; then - echo >&2 $"`basename $0`: Missing -n option!" - usage -fi - -if [ -z "${DEVICE_OPT}" ]; then - echo >&2 $"`basename $0`: Missing -d option!" - usage -fi - -# Output verbose informations -verbose_output() { - if ${VERBOSE_OUTPUT}; then - echo "`basename $0`: $*" - fi - return 0 -} - -# get_nodenames -# -# Get all the node names in this failover group -get_nodenames() { - declare -i idx - local nodename_str nodename - - nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\ - END {for (i in a) print a[i]}'` - idx=0 - for nodename in ${nodename_str} - do - NODE_NAMES[idx]=${nodename} - idx=$idx+1 - done - - return 0 -} - -# check_file host_name file -# -# Run remote command to check whether @file exists in @host_name -check_file() { - local host_name=$1 - local file_name=$2 - - if [ -z "${host_name}" ]; then - echo >&2 "`basename $0`: check_file() error:"\ - "Missing hostname!" - return 1 - fi - - if [ -z "${file_name}" ]; then - echo >&2 "`basename $0`: check_file() error:"\ - "Missing file name!" - return 1 - fi - - # Execute remote command to check the file - ${REMOTE} ${host_name} "[ -e ${file_name} ]" - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: check_file() error:"\ - "${file_name} does not exist in host ${host_name}!" - return 1 - fi - - return 0 -} - -# hb_running host_name -# -# Run remote command to check whether heartbeat service is running in @host_name -hb_running() { - local host_name=$1 - local ret_str - - ret_str=`${REMOTE} ${host_name} "${CL_STATUS} hbstatus" 2>&1` - if [ $? -ne 0 ]; then - if [ "${ret_str}" = "${ret_str#*stop*}" ]; then - echo >&2 "`basename $0`: hb_running() error:"\ - "remote command to ${host_name} error: ${ret_str}!" - return 2 - else - return 1 - fi - fi - - return 0 -} - -# stop_heartbeat host_name -# -# Run remote command to stop heartbeat service running in @host_name -stop_heartbeat() { - local host_name=$1 - local ret_str - - ret_str=`${REMOTE} ${host_name} "/sbin/service heartbeat stop" 2>&1` - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: stop_heartbeat() error:"\ - "remote command to ${host_name} error: ${ret_str}!" - return 1 - fi - - echo "`basename $0`: Heartbeat service is stopped on node ${host_name}." - return 0 -} - -# check_heartbeat -# -# Run remote command to check each node's heartbeat service -check_heartbeat() { - declare -i idx - local OK - - for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do - # Check Heartbeat configuration directory - if ! check_file ${NODE_NAMES[idx]} ${HA_DIR}; then - echo >&2 "`basename $0`: check_heartbeat() error:"\ - "Is Heartbeat package installed?" - return 1 - fi - - if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then - # Check mon configuration directory - if ! check_file ${NODE_NAMES[idx]} ${MON_DIR}; then - echo >&2 "`basename $0`: check_heartbeat()"\ - "error: Is mon package installed?" - return 1 - fi - fi - - if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then - # Check crm directory - if ! check_file ${NODE_NAMES[idx]} ${CIB_DIR}; then - echo >&2 "`basename $0`: check_heartbeat()"\ - "error: Is Heartbeat v2 package installed?" - return 1 - fi - fi - - # Check heartbeat service status - hb_running ${NODE_NAMES[idx]} - rc=$? - if [ "$rc" -eq "2" ]; then - return 1 - elif [ "$rc" -eq "1" ]; then - verbose_output "Heartbeat service is stopped on"\ - "node ${NODE_NAMES[idx]}." - elif [ "$rc" -eq "0" ]; then - OK= - echo -n "`basename $0`: Heartbeat service is running on"\ - "${NODE_NAMES[idx]}, go ahead to stop the service and"\ - "generate new configurations? [y/n]:" - read OK - if [ "${OK}" = "n" ]; then - echo "`basename $0`: New Heartbeat configurations"\ - "are not generated." - return 2 - fi - - # Stop heartbeat service - stop_heartbeat ${NODE_NAMES[idx]} - fi - done - - return 0 -} - -# get_srvname hostname target_devname -# -# Get the lustre target server name from the node @hostname -get_srvname() { - local host_name=$1 - local target_devname=$2 - local target_srvname= - local ret_str - - # Execute remote command to get the target server name - ret_str=`${REMOTE} ${host_name} \ - "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1` - if [ $? -ne 0 ]; then - echo "`basename $0`: get_srvname() error:" \ - "from host ${host_name} - ${ret_str}" - return 1 - fi - - if [ "${ret_str}" != "${ret_str#*Target: }" ]; then - ret_str=${ret_str#*Target: } - target_srvname=`echo ${ret_str} | awk '{print $1}'` - fi - - if [ -z "${target_srvname}" ]; then - echo "`basename $0`: get_srvname() error: Cannot get the"\ - "server name of target ${target_devname} in ${host_name}!" - return 1 - fi - - echo ${target_srvname} - return 0 -} - -# get_srvnames -# -# Get server names of all the Lustre targets in this failover group -get_srvnames() { - declare -i i - - # Initialize the TARGET_SRVNAMES array - unset TARGET_SRVNAMES - - # Get Lustre target service names - for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do - TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \ - ${TARGET_DEVNAMES[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${TARGET_SRVNAMES[i]}" - return 1 - fi - done - - return 0 -} - -# create_template -# -# Create the templates for ha.cf and authkeys files -create_template() { - /bin/mkdir -p ${TMP_DIR} - - # Create the template for ha.cf - if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then - cat >${HACF_TEMP} <${HACF_TEMP} <${AUTHKEYS_TEMP} <> ${HACF_LUSTRE} - done - - # scp ha.cf file to all the nodes - for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do - touch ${TMP_DIR}$"/ha.cf."${NODE_NAMES[idx]} - scp ${HACF_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/ - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to scp ha.cf file"\ - "to node ${NODE_NAMES[idx]}!" - return 1 - fi - done - - return 0 -} - -# create_haresources -# -# Create the haresources file and scp it to the each node's /etc/ha.d/ -create_haresources() { - HARES_PRIMNODE=${TMP_DIR}$"/haresources."${PRIM_NODENAME} - HARES_LUSTRE=${TMP_DIR}$"/haresources"${FILE_SUFFIX} - declare -i idx - local res_line - - if [ -s ${HARES_PRIMNODE} ]; then - # The haresources file for the primary node has already existed - if [ -n "`/bin/grep ${TARGET_DEVNAMES[0]} ${HARES_PRIMNODE}`" ]; then - verbose_output "${HARES_PRIMNODE} already exists." - return 0 - fi - fi - - # Add the resource group line into the haresources file - res_line=${PRIM_NODENAME} - for ((idx = 0; idx < ${#TARGET_DEVNAMES[@]}; idx++)); do - res_line=${res_line}" "${LUSTRE_SRV}::${TARGET_DEVNAMES[idx]}::${TARGET_MNTPNTS[idx]}::${FS_TYPE} - - if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then - res_line=${res_line}" "${TARGET_SRVNAMES[idx]}"-mon" - fi - done - echo "${res_line}" >> ${HARES_LUSTRE} - - # Generate the cib.xml file - if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then - # Add group haclient and user hacluster - [ -z "`grep haclient /etc/group`" ] && groupadd haclient - [ -z "`grep hacluster /etc/passwd`" ] && useradd -g haclient hacluster - - CIB_LUSTRE=${TMP_DIR}$"/cib.xml"${FILE_SUFFIX} - python ${CIB_GEN_SCRIPT} --stdout \ - ${HARES_LUSTRE} > ${CIB_LUSTRE} - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to generate cib.xml file"\ - "for node ${PRIM_NODENAME}!" - return 1 - fi - fi - - # scp the haresources file or cib.xml file - for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do - /bin/cp -f ${HARES_LUSTRE} ${TMP_DIR}$"/haresources."${NODE_NAMES[idx]} - scp ${HARES_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/ - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to scp haresources file"\ - "to node ${NODE_NAMES[idx]}!" - return 1 - fi - - if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then - scp ${CIB_LUSTRE} ${NODE_NAMES[idx]}:${CIB_DIR}/ - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to scp cib.xml"\ - "file to node ${NODE_NAMES[idx]}!" - return 1 - fi - fi - done - - return 0 -} - -# create_authkeys -# -# Create the authkeys file and scp it to the each node's /etc/ha.d/ -create_authkeys() { - AUTHKEYS_PRIMNODE=${TMP_DIR}$"/authkeys."${PRIM_NODENAME} - declare -i idx - - if [ -e ${AUTHKEYS_PRIMNODE} ]; then - verbose_output "${AUTHKEYS_PRIMNODE} already exists." - return 0 - fi - - # scp the authkeys file to all the nodes - chmod 600 ${AUTHKEYS_TEMP} - for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do - touch ${TMP_DIR}$"/authkeys."${NODE_NAMES[idx]} - scp -p ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}/ - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to scp authkeys file"\ - "to node ${NODE_NAMES[idx]}!" - return 1 - fi - done - - return 0 -} - -# create_moncf -# -# Create the mon.cf file and scp it to the each node's /etc/mon/ -create_moncf() { - MONCF_PRIMNODE=${TMP_DIR}$"/mon.cf."${PRIM_NODENAME} - MONCF_LUSTRE=${TMP_DIR}$"/mon.cf"${FILE_SUFFIX} - local srv_name params= - declare -i idx - declare -a OLD_TARGET_SRVNAMES # targets in other nodes - # in this failover group - # Initialize the OLD_TARGET_SRVNAMES array - unset OLD_TARGET_SRVNAMES - - if [ -s ${MONCF_PRIMNODE} ]; then - if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${MONCF_PRIMNODE}`" ] - then - verbose_output "${MONCF_PRIMNODE} already exists." - return 0 - else - # Get the Lustre target service names - # from the previous mon.cf file - idx=0 - for srv_name in `grep hostgroup ${MONCF_PRIMNODE}\ - |awk '$2 ~ /-mon/ {print $2}'|xargs` - do - OLD_TARGET_SRVNAMES[idx]=`echo ${srv_name}\ - |sed 's/-mon//g'` - idx=$(( idx + 1 )) - done - fi - fi - - # Construct the parameters to mon.cf generation script - for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do - params=${params}" -n "${NODE_NAMES[idx]} - done - - for ((idx = 0; idx < ${#OLD_TARGET_SRVNAMES[@]}; idx++)); do - params=${params}" -o "${OLD_TARGET_SRVNAMES[idx]} - done - - for ((idx = 0; idx < ${#TARGET_SRVNAMES[@]}; idx++)); do - params=${params}" -o "${TARGET_SRVNAMES[idx]} - done - - ${SCRIPT_GEN_MONCF} ${params} - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to generate mon.cf file"\ - "by using ${SCRIPT_GEN_MONCF}!" - return 1 - fi - - /bin/mv *-mon.cfg ${MONCF_LUSTRE} - - # scp the mon.cf file to all the nodes - for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do - /bin/cp -f ${MONCF_LUSTRE} ${TMP_DIR}$"/mon.cf."${NODE_NAMES[idx]} - - scp ${MONCF_LUSTRE} ${NODE_NAMES[idx]}:${MON_DIR}/ - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: Failed to scp mon.cf file"\ - "to node ${NODE_NAMES[idx]}!" - return 1 - fi - done - - return 0 -} - -# generate_config -# -# Generate the configuration files for Heartbeat and scp them to all the nodes -generate_config() { - if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then - # Get server names of Lustre targets - if ! get_srvnames; then - return 1 - fi - fi - - if ! create_template; then - return 1 - fi - - verbose_output "Creating and remote copying ha.cf${FILE_SUFFIX} file to"\ - "${PRIM_NODENAME} failover group hosts..." - if ! create_hacf; then - return 1 - fi - verbose_output "OK" - - verbose_output "Creating and remote copying haresources${FILE_SUFFIX} file"\ - "to ${PRIM_NODENAME} failover group hosts..." - if ! create_haresources; then - return 1 - fi - verbose_output "OK" - - verbose_output "Creating and remote copying authkeys${FILE_SUFFIX} file to" \ - "${PRIM_NODENAME} failover group hosts..." - if ! create_authkeys; then - return 1 - fi - verbose_output "OK" - - if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then - verbose_output "Creating and remote copying mon.cf${FILE_SUFFIX} file to" \ - "${PRIM_NODENAME} failover group hosts..." - if ! create_moncf; then - return 1 - fi - verbose_output "OK" - fi - - return 0 -} - -# Main flow -# Get all the node names -if ! get_nodenames; then - exit 1 -fi - -# Check heartbeat services -verbose_output "Checking heartbeat service in the ${PRIM_NODENAME}"\ - "failover group hosts..." -check_heartbeat -rc=$? -if [ "$rc" -eq "2" ]; then - verbose_output "OK" - exit 0 -elif [ "$rc" -eq "1" ]; then - exit 1 -fi -verbose_output "OK" - -# Generate configuration files -if ! generate_config; then - exit 1 -fi - -exit 0 diff --git a/lustre/scripts/lc_modprobe.sh b/lustre/scripts/lc_modprobe.sh deleted file mode 100755 index 486bf35..0000000 --- a/lustre/scripts/lc_modprobe.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash -# -# lc_modprobe.sh - add lustre module options into modprobe.conf or -# modules.conf -# -################################################################################# - -# Check the kernel version -KERNEL_VERSION=`uname -r` -KERNEL_VERSION=${KERNEL_VERSION:0:3} - -if [ "${KERNEL_VERSION}" = "2.4" ]; then - MODULE_CONF=/etc/modules.conf -else - MODULE_CONF=/etc/modprobe.conf -fi - -read -r NETWORKS -MODLINES_FILE=/tmp/modlines$$.txt -START_MARKER=$"# start lustre config" -END_MARKER=$"# end lustre config" - -# Generate a temp file contains lnet options lines -generate_lnet_lines() { - local LNET_LINE TMP_LINE - - TMP_LINE="${NETWORKS}" - - echo ${START_MARKER} > ${MODLINES_FILE} - echo "# Lustre module options added automatically by `basename $0`" >> ${MODLINES_FILE} - while true; do - LNET_LINE=${TMP_LINE%%\\n*} - echo ${LNET_LINE} >> ${MODLINES_FILE} - - TMP_LINE=${TMP_LINE#*\\n} - - if [ "${TMP_LINE}" == "${LNET_LINE}" ]; then - break - fi - done - echo ${END_MARKER} >> ${MODLINES_FILE} - - #echo "--------------${MODLINES_FILE}--------------" - #cat ${MODLINES_FILE} - #echo -e "------------------------------------------\n" - - return 0 -} - -if ! generate_lnet_lines; then - exit 1 -fi - -# Add lnet options lines to the module configuration file -if [ -e ${MODULE_CONF} ]; then - # Delete the old options - sed -i "/${START_MARKER}/,/${END_MARKER}/d" ${MODULE_CONF} -fi - -cat ${MODLINES_FILE} >> ${MODULE_CONF} -rm -f ${MODLINES_FILE} -exit 0 diff --git a/lustre/scripts/lc_net.sh b/lustre/scripts/lc_net.sh deleted file mode 100755 index dc0a96b..0000000 --- a/lustre/scripts/lc_net.sh +++ /dev/null @@ -1,320 +0,0 @@ -#!/bin/bash -# -# lc_net.sh - script for Lustre cluster network verification -# -############################################################################### - -# Usage -usage() { - cat >&2 < - - -v verbose mode - csv file a spreadsheet that contains configuration parameters - (separated by commas) for each target in a Lustre cl- - uster, the first field of each line is the host name - of the cluster node - -EOF - exit 1 -} - -# Get and check the positional parameters -while getopts "v" OPTION; do - case $OPTION in - v) - VERBOSE_OPT=$"yes" - ;; - ?) - usage - esac -done - -# Toss out the parameters we've already processed -shift `expr $OPTIND - 1` - -# Here we expect the csv file -if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: Missing csv file!" - usage -fi - -# Global variables -CSV_FILE=$1 -declare -a HOST_NAMES -declare -a HOST_IPADDRS - -# Remote command -REMOTE=${REMOTE:-"ssh -x -q"} - -# Check whether the reomte command is pdsh -is_pdsh() { - if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then - return 1 - fi - - return 0 -} - -# Output verbose informations -verbose_output() { - if [ "${VERBOSE_OPT}" = "yes" ]; then - echo "`basename $0`: $*" - fi - return 0 -} - -# Check the csv file -check_file() { - if [ ! -s ${CSV_FILE} ]; then - echo >&2 $"`basename $0`: check_file() error: ${CSV_FILE}" \ - "does not exist or is empty!" - return 1 - fi - - return 0 -} - -# Get the host names from the csv file -get_hostnames() { - local NAME CHECK_STR - declare -i i - declare -i j - - # Initialize the HOST_NAMES array - unset HOST_NAMES - - CHECK_STR=`egrep -v "([[:space:]]|^)#" ${CSV_FILE} | awk -F, \ - '/[[:alnum:]]/{if ($1 !~/[[:alnum:]]/) print $0}'` - if [ -n "${CHECK_STR}" ]; then - echo >&2 $"`basename $0`: get_hostnames() error: Missing"\ - "hostname field in the line - ${CHECK_STR}" - return 1 - fi - - i=0 - for NAME in `egrep -v "([[:space:]]|^)#" ${CSV_FILE}\ - | awk -F, '/[[:alnum:]]/{print $1}'` - do - for ((j = 0; j < ${#HOST_NAMES[@]}; j++)); do - [ "${NAME}" = "${HOST_NAMES[j]}" ] && continue 2 - done - - HOST_NAMES[i]=${NAME} - i=$i+1 - done - - return 0 -} - -# Check whether the host name matches the name in the local /etc/hosts table -# and whether the IP address corresponding to the host name is correct -local_check() { - # Check argument - if [ $# -ne 2 ]; then - echo >&2 $"`basename $0`: local_check() error: Missing"\ - "argument for function local_check()!" - return 1 - fi - - local RET_STR REAL_NAME - - # Get the corresponding IP address of the host name from /etc/hosts table - # of the current host - HOST_IPADDRS[$2]=`egrep "[[:space:]]$1([[:space:]]|$)" /etc/hosts \ - | awk '{print $1}'` - if [ -z "${HOST_IPADDRS[$2]}" ]; then - echo >&2 "`basename $0`: local_check() error: $1 does not" \ - "exist in the local /etc/hosts table!" - return 1 - fi - - if [ ${#HOST_IPADDRS[$2]} -gt 15 ]; then - echo >&2 "`basename $0`: local_check() error: More than one" \ - "IP address line corresponding to $1 in the local" \ - "/etc/hosts table!" - return 1 - fi - - # Execute remote command to get the real host name - RET_STR=`${REMOTE} ${HOST_IPADDRS[$2]} hostname 2>&1` - if [ $? -ne 0 -a -n "${RET_STR}" ]; then - echo >&2 "`basename $0`: local_check() error: remote error:" \ - "${RET_STR}" - return 1 - fi - - if [ -z "${RET_STR}" ]; then - echo >&2 "`basename $0`: local_check() error: remote error: No"\ - "results from remote! Check the network connectivity"\ - "between the local host and ${HOST_IPADDRS[$2]}!" - return 1 - fi - - if is_pdsh; then - REAL_NAME=`echo ${RET_STR} | awk '{print $2}'` - else - REAL_NAME=`echo ${RET_STR} | awk '{print $1}'` - fi - - if [ "$1" != "${REAL_NAME}" ]; then - echo >&2 "`basename $0`: local_check() error: The real hostname"\ - "of ${HOST_IPADDRS[$2]} is \"${REAL_NAME}\","\ - "not \"$1\"! Check the local /etc/hosts table!" - return 1 - fi - - return 0 -} - -# Check whether the correct host name and IP address pair matches -# the one in the remote /etc/hosts tables -remote_check() { - # Check argument - if [ $# -ne 2 ]; then - echo >&2 $"`basename $0`: remote_check() error: Missing"\ - "argument for function remote_check()!" - return 1 - fi - - declare -i i - local RET_STR COMMAND IP_ADDR - - COMMAND=$"egrep \"[[:space:]]$1([[:space:]]|$)\" /etc/hosts" - - # Execute remote command to check remote /etc/hosts tables - for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do - RET_STR=`${REMOTE} ${HOST_NAMES[i]} ${COMMAND} 2>&1` - if [ $? -ne 0 -a -n "${RET_STR}" ]; then - echo >&2 "`basename $0`: remote_check() error:"\ - "remote ${HOST_NAMES[i]} error: ${RET_STR}" - return 1 - fi - - if is_pdsh; then - IP_ADDR=`echo ${RET_STR} | awk '{print $2}'` - else - IP_ADDR=`echo ${RET_STR} | awk '{print $1}'` - fi - if [ -z "${IP_ADDR}" ]; then - echo >&2 "`basename $0`: remote_check() error:" \ - "$1 does not exist in the ${HOST_NAMES[i]}'s"\ - "/etc/hosts table!" - return 1 - fi - - if [ "${IP_ADDR}" != "${HOST_IPADDRS[$2]}" ]; then - echo >&2 "`basename $0`: remote_check() error:" \ - "IP address ${IP_ADDR} of $1 in the" \ - "${HOST_NAMES[i]}'s /etc/hosts is incorrect!" - return 1 - fi - done - - return 0 -} - -# Verify forward and reverse network connectivity of the Lustre cluster -network_check () { - # Check argument - if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: network_check() error: Missing"\ - "argument for function network_check()!" - return 1 - fi - - declare -i i - local RET_STR COMMAND REAL_NAME - - # Execute remote command to check network connectivity - for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do - COMMAND=$"${REMOTE} ${HOST_NAMES[i]} hostname" - RET_STR=`${REMOTE} $1 ${COMMAND} 2>&1` - if [ $? -ne 0 -a -n "${RET_STR}" ]; then - echo >&2 "`basename $0`: network_check() error:" \ - "remote error: ${RET_STR}" - return 1 - fi - - if [ -z "${RET_STR}" ]; then - echo >&2 "`basename $0`: network_check() error:" \ - "No results from remote! Check the network" \ - "connectivity between \"$1\" and" \ - "\"${HOST_NAMES[i]}\"!" - return 1 - fi - - if is_pdsh; then - REAL_NAME=`echo ${RET_STR} | awk '{print $3}'` - else - REAL_NAME=`echo ${RET_STR} | awk '{print $1}'` - fi - if [ "${HOST_NAMES[i]}" != "${REAL_NAME}" ]; then - echo >&2 "`basename $0`: network_check() error:" \ - "${RET_STR}" - return 1 - fi - done - - return 0 -} - -# Verify forward and reverse network connectivity of the Lustre cluster, -# and that hostnames match the names in the /etc/hosts tables. -network_verify() { - declare -i i - - # Initialize the HOST_IPADDRS array - unset HOST_IPADDRS - - # Get all the host names from the csv file - if ! get_hostnames; then - return 1 - fi - - # Check whether all the host names match the names in - # all the /etc/hosts tables of the Lustre cluster - for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do - verbose_output "Verifying IP address of host" \ - "\"${HOST_NAMES[i]}\" in the local /etc/hosts..." - if ! local_check ${HOST_NAMES[i]} $i; then - return 1 - fi - verbose_output "OK" - done - - for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do - [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue - verbose_output "Verifying IP address of host" \ - "\"${HOST_NAMES[i]}\" in the remote /etc/hosts..." - if ! remote_check ${HOST_NAMES[i]} $i; then - return 1 - fi - verbose_output "OK" - done - - # Verify network connectivity of the Lustre cluster - for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do - [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue - verbose_output "Verifying network connectivity of host" \ - "\"${HOST_NAMES[i]}\" to other hosts..." - if ! network_check ${HOST_NAMES[i]}; then - return 1 - fi - verbose_output "OK" - done - - return 0 -} - -# Main flow -if ! check_file; then - exit 1 -fi - -if ! network_verify; then - exit 1 -fi - -exit 0 diff --git a/lustre/scripts/lustre_config.sh b/lustre/scripts/lustre_config.sh deleted file mode 100755 index 3e5e651..0000000 --- a/lustre/scripts/lustre_config.sh +++ /dev/null @@ -1,1178 +0,0 @@ -#!/bin/bash - -# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: - -# -# lustre_config.sh - format and set up multiple lustre servers from a csv file -# -# This script is used to parse each line of a spreadsheet (csv file) and -# execute remote commands to format (mkfs.lustre) every Lustre target -# that will be part of the Lustre cluster. -# -# In addition, it can also verify the network connectivity and hostnames in -# the cluster and produce High-Availability software configurations for -# Heartbeat or CluManager. -# -################################################################################ - -# Usage -usage() { - cat >&2 < - - This script is used to format and set up multiple lustre servers from a - csv file. - - -h help and examples - -t HAtype produce High-Availability software configurations - The argument following -t is used to indicate the High- - Availability software type. The HA software types which - are currently supported are: hbv1 (Heartbeat version 1) - and hbv2 (Heartbeat version 2). - -n no net - don't verify network connectivity and hostnames - in the cluster - -f force-format the Lustre targets using --reformat option - -m no fstab change - don't modify /etc/fstab to add the new - Lustre targets - If using this option, then the value of "mount options" - item in the csv file will be passed to mkfs.lustre, else - the value will be added into the /etc/fstab. - -v verbose mode - csv file a spreadsheet that contains configuration parameters - (separated by commas) for each target in a Lustre cluster - -EOF - exit 1 -} - -# Samples -sample() { - cat <&2 $"`basename $0`: Invalid HA software type" \ - "- ${HATYPE_OPT}!" - usage - fi - ;; - n) - VERIFY_CONNECT=false - ;; - f) - REFORMAT_OPTION=$"--reformat " - ;; - m) - MODIFY_FSTAB=false - ;; - h) - sample - ;; - v) - VERBOSE_OPT=$" -v" - ;; - ?) - usage - esac -done - -# Toss out the parameters we've already processed -shift `expr $OPTIND - 1` - -# Here we expect the csv file -if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: Missing csv file!" - usage -fi - -# Output verbose informations -verbose_output() { - if [ -n "${VERBOSE_OPT}" ]; then - echo "`basename $0`: $*" - fi - return 0 -} - -# Check the csv file -check_file() { - # Check argument - if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: check_file() error: Missing argument"\ - "for function check_file()!" - return 1 - fi - - CSV_FILE=$1 - if [ ! -s ${CSV_FILE} ]; then - echo >&2 $"`basename $0`: check_file() error: ${CSV_FILE}"\ - "does not exist or is empty!" - return 1 - fi - - return 0 -} - -# Parse a line in the csv file -parse_line() { - # Check argument - if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: parse_line() error: Missing argument"\ - "for function parse_line()!" - return 1 - fi - - declare -i i=0 - declare -i length=0 - declare -i idx=0 - declare -i s_quote_flag=0 - declare -i d_quote_flag=0 - local TMP_LETTER LINE - - LINE=$* - - # Initialize the CONFIG_ITEM array - unset CONFIG_ITEM - - # Get the length of the line - length=${#LINE} - - i=0 - while [ ${idx} -lt ${length} ]; do - # Get a letter from the line - TMP_LETTER=${LINE:${idx}:1} - - case "${TMP_LETTER}" in - ",") - if [ ${s_quote_flag} -eq 1 -o ${d_quote_flag} -eq 1 ] - then - CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER} - else - i=$i+1 - fi - idx=${idx}+1 - continue - ;; - "'") - if [ ${s_quote_flag} -eq 0 ]; then - s_quote_flag=1 - else - s_quote_flag=0 - fi - ;; - "\"") - if [ ${d_quote_flag} -eq 0 ]; then - d_quote_flag=1 - else - d_quote_flag=0 - fi - - if [ ${i} -eq 1 ]; then - CONFIG_ITEM[i]=${CONFIG_ITEM[i]}$"\\"${TMP_LETTER} - idx=${idx}+1 - continue - fi - ;; - " ") - idx=${idx}+1 - continue - ;; - *) - ;; - esac - CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER} - idx=${idx}+1 - done - return 0 -} - -# Check the items required for OSTs, MDTs and MGS -# -# When formatting an OST, the following items: hostname, module_opts, -# device name, device type and mgs nids, cannot have null value. -# -# When formatting an MDT or MGS, the following items: hostname, -# module_opts, device name and device type, cannot have null value. -check_item() { - # Check argument - if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: check_item() error: Missing argument"\ - "for function check_item()!" - return 1 - fi - - declare -i i=$1 - - # Check hostname, module_opts, device name and device type - if [ -z "${HOST_NAME[i]}" ]||[ -z "${MODULE_OPTS[i]}" ]\ - ||[ -z "${DEVICE_NAME[i]}" ]||[ -z "${DEVICE_TYPE[i]}" ]; then - echo >&2 $"`basename $0`: check_item() error: Some required"\ - "item has null value! Check hostname, module_opts,"\ - "device name and device type!" - return 1 - fi - - # Check mgs nids - if [ "${DEVICE_TYPE[i]}" = "ost" ]&&[ -z "${MGS_NIDS[i]}" ]; then - echo >&2 $"`basename $0`: check_item() error: OST's mgs nids"\ - "item has null value!" - return 1 - fi - - # Check mount point - if [ -z "${MOUNT_POINT[i]}" ]; then - echo >&2 $"`basename $0`: check_item() error: mount"\ - "point item of target ${DEVICE_NAME[i]} has null value!" - return 1 - fi - - return 0 -} - -# Get the number of MGS nodes in the cluster -get_mgs_num() { - INIT_IDX=0 - MGS_NUM=${#MGS_NODENAME[@]} - [ -z "${MGS_NODENAME[0]}" ] && let "INIT_IDX += 1" \ - && let "MGS_NUM += 1" -} - -# is_mgs_node hostname -# Verify whether @hostname is a MGS node -is_mgs_node() { - local host_name=$1 - declare -i i - - get_mgs_num - for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do - [ "${MGS_NODENAME[i]}" = "${host_name}" ] && return 0 - done - - return 1 -} - -# Check whether the MGS nodes are in the same failover group -check_mgs_group() { - declare -i i - declare -i j - declare -i idx - local mgs_node - - get_mgs_num - for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do - mgs_node=${MGS_NODENAME[i]} - for ((j = ${INIT_IDX}; j < ${MGS_NUM}; j++)); do - [ "${MGS_NODENAME[j]}" = "${mgs_node}" ] && continue 1 - - idx=${MGS_IDX[j]} - if [ "${FAILOVERS[idx]#*$mgs_node*}" = "${FAILOVERS[idx]}" ] - then - echo >&2 $"`basename $0`: check_mgs_group() error:"\ - "MGS node ${mgs_node} is not in the ${HOST_NAME[idx]}"\ - "failover group!" - return 1 - fi - done - done - - return 0 -} - -# Get and check MGS servers. -# There should be no more than one MGS specified in the entire csv file. -check_mgs() { - declare -i i - declare -i j - declare -i exp_idx # Index of explicit MGS servers - declare -i imp_idx # Index of implicit MGS servers - local is_exp_mgs is_imp_mgs - local mgs_node - - # Initialize the MGS_NODENAME and MGS_IDX arrays - unset MGS_NODENAME - unset MGS_IDX - - exp_idx=1 - imp_idx=1 - for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do - is_exp_mgs=false - is_imp_mgs=false - - # Check whether this node is an explicit MGS node - # or an implicit one - if [ "${DEVICE_TYPE[i]#*mgs*}" != "${DEVICE_TYPE[i]}" ]; then - verbose_output "Explicit MGS target" \ - "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}." - is_exp_mgs=true - fi - - if [ "${DEVICE_TYPE[i]}" = "mdt" -a -z "${MGS_NIDS[i]}" ]; then - verbose_output "Implicit MGS target" \ - "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}." - is_imp_mgs=true - fi - - # Get and check MGS servers - if ${is_exp_mgs} || ${is_imp_mgs}; then - # Check whether more than one MGS target in one MGS node - if is_mgs_node ${HOST_NAME[i]}; then - echo >&2 $"`basename $0`: check_mgs() error:"\ - "More than one MGS target in the same node -"\ - "\"${HOST_NAME[i]}\"!" - return 1 - fi - - # Get and check primary MGS server and backup MGS server - if [ "${FORMAT_OPTIONS[i]}" = "${FORMAT_OPTIONS[i]#*noformat*}" ] - then - # Primary MGS server - if [ -z "${MGS_NODENAME[0]}" ]; then - if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \ - || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then - echo >&2 $"`basename $0`: check_mgs() error:"\ - "There exist both explicit and implicit MGS"\ - "targets in the csv file!" - return 1 - fi - MGS_NODENAME[0]=${HOST_NAME[i]} - MGS_IDX[0]=$i - else - mgs_node=${MGS_NODENAME[0]} - if [ "${FAILOVERS[i]#*$mgs_node*}" = "${FAILOVERS[i]}" ] - then - echo >&2 $"`basename $0`: check_mgs() error:"\ - "More than one primary MGS nodes in the csv" \ - "file - ${MGS_NODENAME[0]} and ${HOST_NAME[i]}!" - else - echo >&2 $"`basename $0`: check_mgs() error:"\ - "MGS nodes ${MGS_NODENAME[0]} and ${HOST_NAME[i]}"\ - "are failover pair, one of them should use"\ - "\"--noformat\" in the format options item!" - fi - return 1 - fi - else # Backup MGS server - if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \ - || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then - echo >&2 $"`basename $0`: check_mgs() error:"\ - "There exist both explicit and implicit MGS"\ - "targets in the csv file!" - return 1 - fi - - if ${is_exp_mgs}; then # Explicit MGS - MGS_NODENAME[exp_idx]=${HOST_NAME[i]} - MGS_IDX[exp_idx]=$i - exp_idx=$(( exp_idx + 1 )) - else # Implicit MGS - MGS_NODENAME[imp_idx]=${HOST_NAME[i]} - MGS_IDX[imp_idx]=$i - imp_idx=$(( imp_idx + 1 )) - fi - fi - fi #End of "if ${is_exp_mgs} || ${is_imp_mgs}" - done - - # Check whether the MGS nodes are in the same failover group - if ! check_mgs_group; then - return 1 - fi - - return 0 -} - -# Construct the command line of mkfs.lustre -construct_mkfs_cmdline() { - # Check argument - if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\ - "Missing argument for function construct_mkfs_cmdline()!" - return 1 - fi - - declare -i i=$1 - local mgsnids mgsnids_str - local failnids failnids_str - - MKFS_CMD=${MKFS}$" " - MKFS_CMD=${MKFS_CMD}${REFORMAT_OPTION} - - case "${DEVICE_TYPE[i]}" in - "ost") - MKFS_CMD=${MKFS_CMD}$"--ost " - ;; - "mdt") - MKFS_CMD=${MKFS_CMD}$"--mdt " - ;; - "mgs") - MKFS_CMD=${MKFS_CMD}$"--mgs " - ;; - "mdt|mgs" | "mgs|mdt") - MKFS_CMD=${MKFS_CMD}$"--mdt --mgs " - ;; - *) - echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\ - "Invalid device type - \"${DEVICE_TYPE[i]}\"!" - return 1 - ;; - esac - - if [ -n "${FS_NAME[i]}" ]; then - MKFS_CMD=${MKFS_CMD}$"--fsname="${FS_NAME[i]}$" " - fi - - if [ -n "${MGS_NIDS[i]}" ]; then - MGS_NIDS[i]=`echo "${MGS_NIDS[i]}" | sed 's/^"//' | sed 's/"$//'` - mgsnids_str=${MGS_NIDS[i]} - while read mgsnids; do - MKFS_CMD=${MKFS_CMD}$"--mgsnode="${mgsnids}$" " - done < <(echo ${mgsnids_str}|awk '{split($mgsnids_str, a, ":")}\ - END {for (j in a) print a[j]}') - fi - - if [ -n "${INDEX[i]}" ]; then - MKFS_CMD=${MKFS_CMD}$"--index="${INDEX[i]}$" " - fi - - if [ -n "${FORMAT_OPTIONS[i]}" ]; then - if [ "${FORMAT_OPTIONS[i]:0:1}" = "\"" ]; then - FORMAT_OPTIONS[i]=`echo "${FORMAT_OPTIONS[i]}" | sed 's/^"//' | sed 's/"$//'` - fi - MKFS_CMD=${MKFS_CMD}${FORMAT_OPTIONS[i]}$" " - fi - - if [ -n "${MKFS_OPTIONS[i]}" ]; then - MKFS_OPTIONS[i]=`echo "${MKFS_OPTIONS[i]}" | sed 's/^"//' | sed 's/"$//'` - MKFS_CMD=${MKFS_CMD}$"--mkfsoptions="$"\""${MKFS_OPTIONS[i]}$"\""$" " - fi - - if [ -n "${MOUNT_OPTIONS[i]}" ]; then - MOUNT_OPTIONS[i]=`echo "${MOUNT_OPTIONS[i]}" | sed 's/^"//' | sed 's/"$//'` - if ! ${MODIFY_FSTAB}; then - MKFS_CMD=${MKFS_CMD}$"--mountfsoptions="$"\""${MOUNT_OPTIONS[i]}$"\""$" " - fi - fi - - if [ -n "${FAILOVERS[i]}" ]; then - FAILOVERS[i]=`echo "${FAILOVERS[i]}" | sed 's/^"//' | sed 's/"$//'` - failnids_str=${FAILOVERS[i]} - while read failnids; do - MKFS_CMD=${MKFS_CMD}$"--failnode="${failnids}$" " - done < <(echo ${failnids_str}|awk '{split($failnids_str, a, ":")}\ - END {for (k in a) print a[k]}') - fi - - MKFS_CMD=${MKFS_CMD}${DEVICE_NAME[i]} - return 0 -} - -# Get all the node names in this failover group -get_nodenames() { - # Check argument - if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: get_nodenames() error: Missing"\ - "argument for function get_nodenames()!" - return 1 - fi - - declare -i i=$1 - declare -i idx - local nids_str failover_nids failover_nid first_nid - - # Initialize the NODE_NAMES array - unset NODE_NAMES - - NODE_NAMES[0]=${HOST_NAME[i]} - - idx=0 - nids_str=${FAILOVERS[i]} - failover_nids=`echo ${nids_str}|awk '{split($nids_str, a, ":")}\ - END {for (idx in a) print a[idx]}'` - - # FIXME: Suppose the first nid of one failover node contains node name - idx=1 - for failover_nid in ${failover_nids} - do - first_nid=`echo ${failover_nid} | awk -F, '{print $1}'` - NODE_NAMES[idx]=${first_nid%@*} - idx=$idx+1 - done - - return 0 -} - -# Verify whether the format line has HA items -is_ha_line() { - declare -i i=$1 - - [ -n "${FAILOVERS[i]}" ] && return 0 - - return 1 -} - -# Produce HA software's configuration files -gen_ha_config() { - declare -i i=$1 - declare -i idx - local cmd_line - - # Prepare parameters - # Hostnames option - HOSTNAME_OPT=${HOST_NAME[i]} - - if ! get_nodenames $i; then - return 1 - fi - - for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do - HOSTNAME_OPT=${HOSTNAME_OPT}$":"${NODE_NAMES[idx]} - done - - # Target devices option - DEVICE_OPT=" -d "${TARGET_OPTS[0]} - for ((idx = 1; idx < ${#TARGET_OPTS[@]}; idx++)); do - DEVICE_OPT=${DEVICE_OPT}" -d "${TARGET_OPTS[idx]} - done - - # Construct the generation script command line - case "${HATYPE_OPT}" in - "${HATYPE_HBV1}"|"${HATYPE_HBV2}") # Heartbeat - cmd_line=${GEN_HB_CONFIG}$" -r ${HATYPE_OPT} -n ${HOSTNAME_OPT}" - cmd_line=${cmd_line}${DEVICE_OPT}${VERBOSE_OPT} - ;; - "${HATYPE_CLUMGR}") # CluManager - cmd_line=${GEN_CLUMGR_CONFIG}$" -n ${HOSTNAME_OPT}" - cmd_line=${cmd_line}${DEVICE_OPT}${VERBOSE_OPT} - ;; - esac - - # Execute script to generate HA software's configuration files - verbose_output "Generating HA software's configurations in"\ - "${HOST_NAME[i]} failover group..." - verbose_output "${cmd_line}" - eval $(echo "${cmd_line}") - if [ $? -ne 0 ]; then - return 1 - fi - verbose_output "Generate HA software's configurations in"\ - "${HOST_NAME[i]} failover group OK" - - return 0 -} - -# Configure HA software -config_ha() { - if [ -z "${HATYPE_OPT}" ]; then - return 0 - fi - - declare -i i j k - declare -i prim_idx # Index for PRIM_HOSTNAMES array - declare -i target_idx # Index for TARGET_OPTS and HOST_INDEX arrays - - declare -a PRIM_HOSTNAMES # Primary hostnames in all the failover - # groups in the lustre cluster - declare -a HOST_INDEX # Indices for the same node in all the - # format lines in the csv file - local prim_host - - # Initialize the PRIM_HOSTNAMES array - prim_idx=0 - unset PRIM_HOSTNAMES - - # Get failover groups and generate HA configuration files - for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do - prim_host=${HOST_NAME[i]} - - for ((j = 0; j < ${#PRIM_HOSTNAMES[@]}; j++)); do - [ "${prim_host}" = "${PRIM_HOSTNAMES[j]}" ] && continue 2 - done - - target_idx=0 - unset HOST_INDEX - unset TARGET_OPTS - for ((k = 0; k < ${#HOST_NAME[@]}; k++)); do - if [ "${prim_host}" = "${HOST_NAME[k]}" ] && is_ha_line "${k}" - then - HOST_INDEX[target_idx]=$k - TARGET_OPTS[target_idx]=${DEVICE_NAME[k]}:${MOUNT_POINT[k]} - target_idx=$(( target_idx + 1 )) - fi - done - - if [ ${#TARGET_OPTS[@]} -ne 0 ]; then - PRIM_HOSTNAMES[prim_idx]=${prim_host} - prim_idx=$(( prim_idx + 1 )) - - if ! gen_ha_config ${HOST_INDEX[0]}; then - return 1 - fi - fi - done - - if [ ${#PRIM_HOSTNAMES[@]} -eq 0 ]; then - verbose_output "There are no \"failover nids\" items in the"\ - "csv file. No HA configuration files are generated!" - fi - - rm -rf ${TMP_DIRS} - return 0 -} - - -# Get all the items in the csv file and do some checks. -get_items() { - # Check argument - if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: get_items() error: Missing argument"\ - "for function get_items()!" - return 1 - fi - - CSV_FILE=$1 - local LINE - declare -i line_num=0 - declare -i idx=0 - - while read -r LINE; do - line_num=${line_num}+1 - # verbose_output "Parsing line ${line_num}: $LINE" - - # Get rid of the empty line - if [ -z "`echo ${LINE}|awk '/[[:alnum:]]/ {print $0}'`" ]; then - continue - fi - - # Get rid of the comment line - if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] - then - continue - fi - - # Parse the config line into CONFIG_ITEM - if ! parse_line $LINE; then - echo >&2 $"`basename $0`: parse_line() error: Occurred"\ - "on line ${line_num} in ${CSV_FILE}: $LINE" - return 1 - fi - - HOST_NAME[idx]=${CONFIG_ITEM[0]} - MODULE_OPTS[idx]=${CONFIG_ITEM[1]} - DEVICE_NAME[idx]=${CONFIG_ITEM[2]} - MOUNT_POINT[idx]=${CONFIG_ITEM[3]} - DEVICE_TYPE[idx]=${CONFIG_ITEM[4]} - FS_NAME[idx]=${CONFIG_ITEM[5]} - MGS_NIDS[idx]=${CONFIG_ITEM[6]} - INDEX[idx]=${CONFIG_ITEM[7]} - FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]} - MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]} - MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]} - FAILOVERS[idx]=${CONFIG_ITEM[11]} - - # Check some required items for formatting target - if ! check_item $idx; then - echo >&2 $"`basename $0`: check_item() error:"\ - "Occurred on line ${line_num} in ${CSV_FILE}." - return 1 - fi - - idx=${idx}+1 - done < ${CSV_FILE} - - return 0 -} - -# check_lnet_connect hostname_index mgs_hostname -# Check whether the target node can contact the MGS node @mgs_hostname -# If @mgs_hostname is null, then it means the primary MGS node -check_lnet_connect() { - declare -i i=$1 - declare -i idx=0 - local mgs_node=$2 - - local COMMAND RET_STR - local mgs_prim_nids all_nids all_nids_str - local nids - local nids_str= - local mgs_nids mgs_nid - local ping_mgs - - # Execute remote command to check that - # this node can contact the MGS node - verbose_output "Checking lnet connectivity between" \ - "${HOST_NAME[i]} and the MGS node ${mgs_node}" - all_nids=${MGS_NIDS[i]} - mgs_prim_nids=`echo ${all_nids} | awk -F: '{print $1}'` - all_nids_str=`echo ${all_nids} | awk '{split($all_nids, a, ":")}\ - END {for (idx in a) print a[idx]}'` - - if [ -z "${mgs_node}" ]; then - nids_str=${mgs_prim_nids} # nids of primary MGS node - else - for nids in ${all_nids_str}; do - # FIXME: Suppose the MGS nids contain the node name - [ "${nids}" != "${nids#*$mgs_node*}" ] && nids_str=${nids} - done - fi - - if [ -z "${nids_str}" ]; then - echo >&2 $"`basename $0`: check_lnet_connect() error:"\ - "Check the mgs nids item of host ${HOST_NAME[i]}!"\ - "Missing nids of the MGS node ${mgs_node}!" - return 1 - fi - - idx=0 - mgs_nids=`echo ${nids_str} | awk '{split($nids_str, a, ",")}\ - END {for (idx in a) print a[idx]}'` - - ping_mgs=false - for mgs_nid in ${mgs_nids} - do - COMMAND=$"${LCTL} ping ${mgs_nid} 5 || echo failed 2>&1" - RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1` - if [ $? -eq 0 -a "${RET_STR}" = "${RET_STR#*failed*}" ] - then - # This node can contact the MGS node - verbose_output "${HOST_NAME[i]} can contact the MGS" \ - "node ${mgs_node} by using nid \"${mgs_nid}\"!" - ping_mgs=true - break - fi - done - - if ! ${ping_mgs}; then - echo >&2 "`basename $0`: check_lnet_connect() error:" \ - "${HOST_NAME[i]} cannot contact the MGS node"\ - "${mgs_node} through lnet networks!"\ - "Check ${LCTL} command!" - return 1 - fi - - return 0 -} - -# Start lnet network in the cluster node and check that -# this node can contact the MGS node -check_lnet() { - if ! ${VERIFY_CONNECT}; then - return 0 - fi - - # Check argument - if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: check_lnet() error: Missing"\ - "argument for function check_lnet()!" - return 1 - fi - - declare -i i=$1 - declare -i j - local COMMAND RET_STR - - # Execute remote command to start lnet network - verbose_output "Starting lnet network in ${HOST_NAME[i]}" - COMMAND=$"modprobe lnet; ${LCTL} network up 2>&1" - RET_STR=`${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1` - if [ $? -ne 0 -o "${RET_STR}" = "${RET_STR#*LNET configured*}" ] - then - echo >&2 "`basename $0`: check_lnet() error: remote" \ - "${HOST_NAME[i]} error: ${RET_STR}" - return 1 - fi - - if is_mgs_node ${HOST_NAME[i]}; then - return 0 - fi - - # Execute remote command to check that - # this node can contact the MGS node - for ((j = 0; j < ${MGS_NUM}; j++)); do - if ! check_lnet_connect $i ${MGS_NODENAME[j]}; then - return 1 - fi - done - - return 0 -} - -# Start lnet network in the MGS node -start_mgs_lnet() { - declare -i i - declare -i idx - local COMMAND - - if [ -z "${MGS_NODENAME[0]}" -a -z "${MGS_NODENAME[1]}" ]; then - verbose_output "There is no MGS target in the ${CSV_FILE} file." - return 0 - fi - - for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do - # Execute remote command to add lnet options lines to - # the MGS node's modprobe.conf/modules.conf - idx=${MGS_IDX[i]} - COMMAND=$"echo \"${MODULE_OPTS[${idx}]}\"|${MODULE_CONFIG}" - verbose_output "Adding lnet module options to ${MGS_NODENAME[i]}" - ${REMOTE} ${MGS_NODENAME[i]} "${COMMAND}" >&2 - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: start_mgs_lnet() error:"\ - "Failed to execute remote command to" \ - "add module options to ${MGS_NODENAME[i]}!"\ - "Check ${MODULE_CONFIG}!" - return 1 - fi - - # Start lnet network in the MGS node - if ! check_lnet ${idx}; then - return 1 - fi - done - - return 0 -} - -# Execute remote command to add lnet options lines to remote nodes' -# modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets -mass_config() { - local COMMAND - declare -a REMOTE_PID - declare -a REMOTE_CMD - declare -i pid_num=0 - declare -i i=0 - - # Start lnet network in the MGS node - if ! start_mgs_lnet; then - return 1 - fi - - for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do - # Construct the command line of mkfs.lustre - if ! construct_mkfs_cmdline $i; then - return 1 - fi - - # create the mount point on the node - COMMAND="mkdir -p ${MOUNT_POINT[i]}" - verbose_output "Creating the mount point ${MOUNT_POINT[i]} on" \ - "${HOST_NAME[i]}" - ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: mass_config() error:"\ - "Failed to execute remote command to"\ - "create the mountpoint on ${HOST_NAME[i]}!" - return 1 - fi - - if ! is_mgs_node ${HOST_NAME[i]}; then - # Execute remote command to add lnet options lines to - # modprobe.conf/modules.conf - COMMAND=$"echo \"${MODULE_OPTS[i]}\"|${MODULE_CONFIG}" - verbose_output "Adding lnet module options to" \ - "${HOST_NAME[i]}" - ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: mass_config() error:"\ - "Failed to execute remote command to"\ - "add module options to ${HOST_NAME[i]}!" - return 1 - fi - - # Check lnet networks - if ! check_lnet $i; then - return 1 - fi - fi - - # Execute remote command to format Lustre target - verbose_output "Formatting Lustre target ${DEVICE_NAME[i]}"\ - "on ${HOST_NAME[i]}..." - verbose_output "Format command line is: ${MKFS_CMD}" - REMOTE_CMD[${pid_num}]="${REMOTE} ${HOST_NAME[i]} ${MKFS_CMD}" - ${REMOTE} ${HOST_NAME[i]} "(${EXPORT_PATH} ${MKFS_CMD})" >&2 & - REMOTE_PID[${pid_num}]=$! - pid_num=${pid_num}+1 - sleep 1 - done - - # Wait for the exit status of the background remote command - verbose_output "Waiting for the return of the remote command..." - fail_exit_status=false - for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do - wait ${REMOTE_PID[${pid_num}]} - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: mass_config() error: Failed"\ - "to execute \"${REMOTE_CMD[${pid_num}]}\"!" - fail_exit_status=true - fi - done - - if ${fail_exit_status}; then - return 1 - fi - - verbose_output "All the Lustre targets are formatted successfully!" - return 0 -} - -# get_mntopts hostname device_name failovers -# Construct the mount options of Lustre target @device_name in host @hostname -get_mntopts() { - local host_name=$1 - local device_name=$2 - local failovers=$3 - local mnt_opts= - local ret_str - - [ -n "${failovers}" ] && mnt_opts=defaults,noauto || mnt_opts=defaults - - # Execute remote command to check whether the device - # is a block device or not - ret_str=`${REMOTE} ${host_name} \ - "[ -b ${device_name} ] && echo block || echo loop" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: get_mntopts() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - - if [ -z "${ret_str}" ]; then - echo "`basename $0`: get_mntopts() error: remote error:" \ - "No results from remote!" \ - "Check network connectivity between the local host"\ - "and ${host_name}!" - return 1 - fi - - [ "${ret_str}" != "${ret_str#*loop}" ] && mnt_opts=${mnt_opts},loop - - echo ${mnt_opts} - return 0 -} - -# Execute remote command to modify /etc/fstab to add the new Lustre targets -modify_fstab() { - declare -i i - local mntent mntopts device_name - local COMMAND - - if ! ${MODIFY_FSTAB}; then - return 0 - fi - - for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do - verbose_output "Modify /etc/fstab of host ${HOST_NAME[i]}"\ - "to add Lustre target ${DEVICE_NAME[i]}" - mntent=${DEVICE_NAME[i]}"\t\t"${MOUNT_POINT[i]}"\t\t"${FS_TYPE} - - # Get mount options - if [ -n "${MOUNT_OPTIONS[i]}" ]; then - # The mount options already specified in the csv file. - mntopts=${MOUNT_OPTIONS[i]} - else - mntopts=$(get_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]}\ - ${FAILOVERS[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${mntopts}" - return 1 - fi - fi - - mntent=${mntent}"\t"${mntopts}"\t"0" "0 - - # Execute remote command to modify /etc/fstab - device_name=${DEVICE_NAME[i]//\//\\/} - COMMAND="(sed -i \"/^${device_name}\t/d\" /etc/fstab; \ - echo -e \"${mntent}\" >> /etc/fstab)" - ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2 - if [ $? -ne 0 ]; then - echo >&2 "`basename $0`: modify_fstab() error:"\ - "Failed to execute remote command to"\ - "modify /etc/fstab of host ${HOST_NAME[i]}"\ - "to add Lustre target ${DEVICE_NAME[i]}!" - return 1 - fi - done - - return 0 -} - -# Main flow -# Check the csv file -if ! check_file $1; then - exit 1 -fi - -if ${VERIFY_CONNECT}; then -# Check the network connectivity and hostnames - echo "`basename $0`: Checking the cluster network connectivity"\ - "and hostnames..." - if ! ${VERIFY_CLUSTER_NET} ${VERBOSE_OPT} ${CSV_FILE}; then - exit 1 - fi - echo "`basename $0`: Check the cluster network connectivity"\ - "and hostnames OK!" - echo -fi - -# Configure the Lustre cluster -echo "`basename $0`: ******** Lustre cluster configuration START ********" -if ! get_items ${CSV_FILE}; then - exit 1 -fi - -if ! check_mgs; then - exit 1 -fi - -if ! mass_config; then - exit 1 -fi - -if ! modify_fstab; then - exit 1 -fi - -# Produce HA software's configuration files -if ! config_ha; then - rm -rf ${TMP_DIRS} - exit 1 -fi - -echo "`basename $0`: ******** Lustre cluster configuration END **********" - -exit 0 diff --git a/lustre/scripts/lustre_createcsv.sh b/lustre/scripts/lustre_createcsv.sh deleted file mode 100755 index da95c87..0000000 --- a/lustre/scripts/lustre_createcsv.sh +++ /dev/null @@ -1,1998 +0,0 @@ -#!/bin/bash -# -# lustre_createcsv.sh - generate a csv file from a running lustre cluster -# -# This script is used to collect lustre target informations and HA software -# configurations in a lustre cluster to generate a csv file. In reverse, the -# csv file could be parsed by lustre_config.sh to configure multiple lustre -# servers in parallel. -# -# This script should be run on the MGS node. -# -################################################################################ - -# Usage -usage() { - cat >&2 <&2 "`basename $0`: Invalid HA software type" \ - "- ${HATYPE_OPT}!" - usage - fi - ;; - h) usage;; - v) VERBOSE_OUTPUT=true;; - f) CSV_FILE=$OPTARG;; - ?) usage - esac -done - -# Output verbose informations -verbose_output() { - if ${VERBOSE_OUTPUT}; then - echo "`basename $0`: $*" - fi - return 0 -} - -# Verify the local host is the MGS node -mgs_node() { - if [ ! -e ${LUSTRE_PROC_DEVICES} ]; then - echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} does" \ - "not exist. Lustre kernel modules may not be loaded!" - return 1 - fi - - if [ -z "`cat ${LUSTRE_PROC_DEVICES}`" ]; then - echo >&2 "`basename $0`: error: ${LUSTRE_PROC_DEVICES} is" \ - "empty. Lustre services may not be started!" - return 1 - fi - - if [ -z "`grep ${MGS_TYPE} ${LUSTRE_PROC_DEVICES}`" ]; then - echo >&2 "`basename $0`: error: This node is not a MGS node." \ - "The script should be run on the MGS node!" - return 1 - fi - - return 0 -} - -# Check whether the reomte command is pdsh -is_pdsh() { - if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then - return 1 - fi - - return 0 -} - -# remote_error fn_name host_addr ret_str -# Verify the return result from remote command -remote_error() { - local fn_name host_addr ret_str - - fn_name=$1 - shift - host_addr=$1 - shift - ret_str=$* - - if [ "${ret_str}" != "${ret_str#*connect:*}" ]; then - echo "`basename $0`: ${fn_name}() error: remote error:" \ - "${ret_str}" - return 0 - fi - - if [ -z "${ret_str}" ]; then - echo "`basename $0`: ${fn_name}() error: remote error:" \ - "No results from remote!" \ - "Check network connectivity between the local host"\ - "and ${host_addr}!" - return 0 - fi - - return 1 -} - -# nid2hostname nid -# Convert @nid to hostname of the lustre cluster node -nid2hostname() { - local nid=$1 - local host_name= - local addr nettype ip_addr - local ret_str - - addr=${nid%@*} - nettype=${nid#*@} - if [ -z "${addr}" ]; then - echo "`basename $0`: nid2hostname() error:" \ - "Invalid nid - \"${nid}\"!" - return 1 - fi - - case "${nettype}" in - lo*) host_name=`hostname`;; - elan*) # QsNet - # FIXME: Parse the /etc/elanhosts configuration file to - # convert ElanID to hostname - ;; - gm*) # Myrinet - # FIXME: Use /usr/sbin/gmlndnid to find the hostname of - # the specified GM Global node ID - ;; - ptl*) # Portals - # FIXME: Convert portal ID to hostname - ;; - *) # tcp, o2ib, cib, openib, iib, vib, ra - ip_addr=${addr} - - # Execute remote command to get the host name - ret_str=`${REMOTE} ${ip_addr} "hostname" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: nid2hostname() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - remote_error "nid2hostname" ${ip_addr} "${ret_str}" && return 1 - - if is_pdsh; then - host_name=`echo ${ret_str} | awk '{print $2}'` - else - host_name=`echo ${ret_str} | awk '{print $1}'` - fi - ;; - esac - - echo ${host_name} - return 0 -} - -# get_hostnames -# Get lustre cluster node names -get_hostnames() { - declare -a HOST_NIDS - declare -i idx # Index of HOST_NIDS array - declare -i i # Index of HOST_NAMES array - - if ! mgs_node; then - return 1 - fi - - if [ ! -e ${LNET_PROC_PEERS} ]; then - echo >&2 "`basename $0`: error: ${LNET_PROC_PEERS} does not" \ - "exist. LNET kernel modules may not be loaded" \ - "or LNET network may not be up!" - return 1 - fi - - HOST_NAMES[0]=${MGS_HOSTNAME} # MGS node - HOST_NIDS[0]=${HOST_NAMES[0]} - - # Get the nids of the nodes which have contacted MGS - idx=1 - for nid in `cat ${LNET_PROC_PEERS} | awk '{print $1}'`; do - if [ "${nid}" = "nid" ]; then - continue - fi - - HOST_NIDS[idx]=${nid} - let "idx += 1" - done - - if [ ${idx} -eq 1 ]; then - verbose_output "Only one node running in the lustre cluster." \ - "It's ${HOST_NAMES[0]}." - return 0 - fi - - # Get the hostnames of the nodes - for ((idx = 1, i = 1; idx < ${#HOST_NIDS[@]}; idx++, i++)); do - if [ -z "${HOST_NIDS[idx]}" ]; then - echo >&2 "`basename $0`: get_hostnames() error:" \ - "Invalid nid - \"${HOST_NIDS[idx]}\"!" - return 1 - fi - - HOST_NAMES[i]=$(nid2hostname ${HOST_NIDS[idx]}) - if [ $? -ne 0 ]; then - echo >&2 "${HOST_NAMES[i]}" - return 1 - fi - - if [ "${HOST_NAMES[i]}" = "${HOST_NAMES[0]}" ]; then - let "i -= 1" - fi - done - - return 0 -} - -#*************************** Network module options ***************************# -# last_is_backslash line -# Check whether the last effective letter of @line is a backslash -last_is_backslash() { - local line="$*" - declare -i i - declare -i length - local letter last_letter - - length=${#line} - for ((i = ${length}-1; i >= 0; i--)); do - letter=${line:${i}:1} - [ "x${letter}" != "x " -a "x${letter}" != "x " -a -n "${letter}" ]\ - && last_letter=${letter} && break - done - - [ "x${last_letter}" = "x\\" ] && return 0 - - return 1 -} - -# get_module_opts hostname -# Get the network module options from the node @hostname -get_module_opts() { - local host_name=$1 - local ret_str - local MODULE_CONF KERNEL_VER - local ret_line line find_options - local continue_flag - - MODULE_OPTS=${DEFAULT_MOD_OPTS} - - # Execute remote command to get the kernel version - ret_str=`${REMOTE} ${host_name} "uname -r" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo >&2 "`basename $0`: get_module_opts() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - remote_error "get_module_opts" ${host_name} "${ret_str}" && return 1 - - if is_pdsh; then - KERNEL_VER=`echo ${ret_str} | awk '{print $2}'` - else - KERNEL_VER=`echo ${ret_str} | awk '{print $1}'` - fi - - # Get the module configuration file name - if [ "${KERNEL_VER:0:3}" = "2.4" ]; then - MODULE_CONF=/etc/modules.conf - else - MODULE_CONF=/etc/modprobe.conf - fi - - # Execute remote command to get the lustre network module options - continue_flag=false - find_options=false - while read -r ret_line; do - if is_pdsh; then - set -- ${ret_line} - shift - line="$*" - else - line="${ret_line}" - fi - - # Get rid of the comment line - [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue - - if [ "${line}" != "${line#*options lnet*}" ]; then - if ! ${find_options}; then - find_options=true - MODULE_OPTS=${line} - else - MODULE_OPTS=${MODULE_OPTS}$" \n "${line} - fi - - last_is_backslash "${line}" && continue_flag=true \ - || continue_flag=false - continue - fi - - if ${continue_flag}; then - MODULE_OPTS=${MODULE_OPTS}$" \n "${line} - ! last_is_backslash "${line}" && continue_flag=false - - fi - done < <(${REMOTE} ${host_name} "cat ${MODULE_CONF}") - - if [ -z "${MODULE_OPTS}" ]; then - MODULE_OPTS=${DEFAULT_MOD_OPTS} - fi - - return 0 -} - -#************************ HA software configurations ************************# -# is_ha_target hostname target_devname -# Check whether the target @target_devname was made to be high-available -is_ha_target() { - local host_name=$1 - local target_svname=$2 - local res_file - local ret_str - - case "${HATYPE_OPT}" in - "${HATYPE_HBV1}") res_file=${HA_RES};; - "${HATYPE_HBV2}") res_file=${HA_CIB};; - "${HATYPE_CLUMGR}") res_file=${CLUMAN_CONFIG};; - esac - - # Execute remote command to check the resource file - ret_str=`${REMOTE} ${host_name} \ - "grep ${target_svname} ${res_file}" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo >&2 "`basename $0`: is_ha_target() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - - [ "${ret_str}" = "${ret_str#*${target_svname}*}" ] && return 1 - - return 0 -} - -# get_hb_configs hostname -# Get the Heartbeat configurations from the node @hostname -get_hb_configs() { - local host_name=$1 - local ret_line line - declare -i i - - unset HA_CONFIGS - HB_CHANNELS= - SRV_IPADDRS= - HB_OPTIONS= - - # Execute remote command to get the configs of Heartbeat channels, etc - while read -r ret_line; do - if is_pdsh; then - set -- ${ret_line} - shift - line="$*" - else - line="${ret_line}" - fi - - # Get rid of the comment line - [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue - - if [ "${line}" != "${line#*serial*}" ] \ - || [ "${line}" != "${line#*cast*}" ]; then - if [ -z "${HB_CHANNELS}" ]; then - HB_CHANNELS=${line} - else - HB_CHANNELS=${HB_CHANNELS}:${line} - fi - fi - - if [ "${line}" != "${line#*stonith*}" ] \ - || [ "${line}" != "${line#*ping*}" ] \ - || [ "${line}" != "${line#*respawn*}" ] \ - || [ "${line}" != "${line#*apiauth*}" ] \ - || [ "${line}" != "${line#*compression*}" ]; then - if [ -z "${HB_OPTIONS}" ]; then - HB_OPTIONS=${line} - else - HB_OPTIONS=${HB_OPTIONS}:${line} - fi - fi - done < <(${REMOTE} ${host_name} "cat ${HA_CF}") - - if [ -z "${HB_CHANNELS}" ]; then - echo >&2 "`basename $0`: get_hb_configs() error:" \ - "There are no heartbeat channel configs in ${HA_CF}" \ - "of host ${host_name} or ${HA_CF} does not exist!" - return 0 - fi - - # Execute remote command to get Heartbeat service address - if [ "${HATYPE_OPT}" = "${HATYPE_HBV1}" ]; then - while read -r ret_line; do - if is_pdsh; then - set -- ${ret_line} - shift - line="$*" - else - line="${ret_line}" - fi - - # Get rid of the empty line - [ -z "`echo ${line}|awk '/[[:alnum:]]/ {print $0}'`" ]\ - && continue - - # Get rid of the comment line - [ -z "`echo \"${line}\"|egrep -v \"^#\"`" ] && continue - - SRV_IPADDRS=`echo ${line} | awk '{print $2}'` - [ -n "${SRV_IPADDRS}" ] \ - && [ "`echo ${line} | awk '{print $1}'`" = "${host_name}" ] && break - done < <(${REMOTE} ${host_name} "cat ${HA_RES}") - - if [ -z "${SRV_IPADDRS}" ]; then - echo >&2 "`basename $0`: get_hb_configs() error: There"\ - "are no service address in ${HA_RES} of host"\ - "${host_name} or ${HA_RES} does not exist!" - return 0 - fi - fi - - # Construct HA configuration items - for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do - [ -z "${TARGET_DEVNAMES[i]}" ] && continue - - # Execute remote command to check whether this target service - # was made to be high-available - if is_ha_target ${host_name} ${TARGET_DEVNAMES[i]}; then - HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS} - fi - done - - return 0 -} - -# get_cluman_channel hostname -# Get the Heartbeat channel of CluManager from the node @hostname -get_cluman_channel() { - local host_name=$1 - local ret_line line - local cluman_channel= - local mcast_ipaddr - - while read -r ret_line; do - if is_pdsh; then - set -- ${ret_line} - shift - line="$*" - else - line="${ret_line}" - fi - - if [ "${line}" != "${line#*broadcast*}" ] \ - && [ "`echo ${line}|awk '{print $3}'`" = "yes" ]; then - cluman_channel="broadcast" - break - fi - - if [ "${line}" != "${line#*multicast_ipaddress*}" ]; then - mcast_ipaddr=`echo ${line}|awk '{print $3}'` - if [ "${mcast_ipaddr}" != "225.0.0.11" ]; then - cluman_channel="multicast ${mcast_ipaddr}" - break - fi - fi - done < <(${REMOTE} ${host_name} "${CONFIG_CMD} --clumembd") - - echo ${cluman_channel} - return 0 -} - -# get_cluman_srvaddr hostname target_svname -# Get the service IP addresses of @target_svname from the node @hostname -get_cluman_srvaddr() { - local host_name=$1 - local target_svname=$2 - local ret_line line - local srvaddr cluman_srvaddr= - - while read -r ret_line; do - if is_pdsh; then - set -- ${ret_line} - shift - line="$*" - else - line="${ret_line}" - fi - - if [ "${line}" != "${line#*ipaddress = *}" ]; then - srvaddr=`echo ${line}|awk '{print $3}'` - if [ -z "${cluman_srvaddr}" ]; then - cluman_srvaddr=${srvaddr} - else - cluman_srvaddr=${cluman_srvaddr}:${srvaddr} - fi - fi - done < <(${REMOTE} ${host_name} "${CONFIG_CMD} \ - --service=${target_svname} --service_ipaddresses") - - if [ -z "${cluman_srvaddr}" ]; then - echo "`basename $0`: get_cluman_srvaddr() error: Cannot" \ - "get the service IP addresses of ${target_svname} in" \ - "${host_name}! Check ${CONFIG_CMD} command!" - return 1 - fi - - echo ${cluman_srvaddr} - return 0 -} - -# get_cluman_configs hostname -# Get the CluManager configurations from the node @hostname -get_cluman_configs() { - local host_name=$1 - local ret_str - declare -i i - - unset HA_CONFIGS - - # Execute remote command to get the configs of CluManager - for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do - HB_CHANNELS= - SRV_IPADDRS= - HB_OPTIONS= - [ -z "${TARGET_DEVNAMES[i]}" ] && continue - - # Execute remote command to check whether this target service - # was made to be high-available - ! is_ha_target ${host_name} ${TARGET_DEVNAMES[i]} && continue - - # Execute remote command to get Heartbeat channel - HB_CHANNELS=$(get_cluman_channel ${host_name}) - if [ $? -ne 0 ]; then - echo >&2 "${HB_CHANNELS}" - fi - - # Execute remote command to get service IP address - SRV_IPADDRS=$(get_cluman_srvaddr ${host_name} \ - ${TARGET_SVNAMES[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${SRV_IPADDRS}" - return 0 - fi - - HA_CONFIGS[i]=${HB_CHANNELS},${SRV_IPADDRS},${HB_OPTIONS} - done - - return 0 -} - -# get_ha_configs hostname -# Get the HA software configurations from the node @hostname -get_ha_configs() { - local host_name=$1 - - unset HA_CONFIGS - - if [ -z "${HATYPE_OPT}" ]; then - return 0 - fi - - verbose_output "Collecting HA software configurations from host $1..." - - case "${HATYPE_OPT}" in - "${HATYPE_HBV1}" | "${HATYPE_HBV2}") # Heartbeat - if ! get_hb_configs ${host_name}; then - return 1 - fi - ;; - "${HATYPE_CLUMGR}") # CluManager - if ! get_cluman_configs ${host_name}; then - return 1 - fi - ;; - esac - - return 0 -} - -#*********************** Lustre targets configurations ***********************# - -# is_failover_service target_svname -# Check whether a target service @target_svname is a failover service. -is_failover_service() { - local target_svname=$1 - declare -i i - - for ((i = 0; i < ${#ALL_TARGET_SVNAMES[@]}; i++)); do - [ "${target_svname}" = "${ALL_TARGET_SVNAMES[i]}" ] && return 0 - done - - return 1 -} - -# get_svnames hostname -# Get the lustre target server obd names from the node @hostname -get_svnames(){ - declare -i i - declare -i j - local host_name=$1 - local ret_line line - - # Initialize the TARGET_SVNAMES array - unset TARGET_SVNAMES - unset FAILOVER_FMTOPTS - - # Execute remote command to the node @hostname and figure out what - # lustre services are running. - i=0 - j=${#ALL_TARGET_SVNAMES[@]} - while read -r ret_line; do - if is_pdsh; then - set -- ${ret_line} - shift - line="$*" - else - line="${ret_line}" - fi - - if [ -z "`echo ${line} | grep ${MGS_TYPE}`" ] \ - && [ -z "`echo ${line} | grep ${MDT_TYPE}`" ] \ - && [ -z "`echo ${line} | grep ${OST_TYPE}`" ]; then - continue - fi - - # Get target server name - TARGET_SVNAMES[i]=`echo ${line} | awk '{print $4}'` - if [ -n "${TARGET_SVNAMES[i]}" ]; then - if is_failover_service ${TARGET_SVNAMES[i]}; then - FAILOVER_FMTOPTS[i]="--noformat" - fi - ALL_TARGET_SVNAMES[j]=${TARGET_SVNAMES[i]} - let "i += 1" - let "j += 1" - else - echo >&2 "`basename $0`: get_svnames() error: Invalid"\ - "line in ${host_name}'s ${LUSTRE_PROC_DEVICES}"\ - "- \"${line}\"!" - return 1 - fi - done < <(${REMOTE} ${host_name} "cat ${LUSTRE_PROC_DEVICES}") - - if [ $i -eq 0 ]; then - verbose_output "There are no lustre services running" \ - "on the node ${host_name}!" - fi - - return 0 -} - -# is_loopdev devname -# Check whether a device @devname is a loop device or not -is_loopdev() { - local devname=$1 - - if [ -z "${devname}" ] || \ - [ -z "`echo ${devname}|awk '/\/dev\/loop[[:digit:]]/ {print $0}'`" ] - then - return 1 - fi - - return 0 -} - -# get_devname hostname svname -# Get the device name of lustre target @svname from node @hostname -get_devname() { - local host_name=$1 - local target_svname=$2 - local target_devname= - local ret_str - local target_type target_obdtype mntdev_file - - if [ "${target_svname}" = "${MGS_SVNAME}" ]; then - # Execute remote command to get the device name of mgs target - ret_str=`${REMOTE} ${host_name} \ - "/sbin/findfs LABEL=${target_svname}" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ] - then - echo "`basename $0`: get_devname() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - fi - - if [ "${ret_str}" = "${ret_str#*Unable to resolve*}" ]; then - if is_pdsh; then - target_devname=`echo ${ret_str} | awk '{print $2}'` - else - target_devname=`echo ${ret_str} | awk '{print $1}'` - fi - fi - else # Execute remote command to get the device name of mdt/ost target - target_type=`echo ${target_svname} | cut -d - -f 2` - target_obdtype=${target_type:0:3}_TYPE - - mntdev_file=${LUSTRE_PROC}/${!target_obdtype}/${target_svname}/mntdev - - ret_str=`${REMOTE} ${host_name} "cat ${mntdev_file}" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: get_devname() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - - if [ "${ret_str}" != "${ret_str#*No such file*}" ]; then - echo "`basename $0`: get_devname() error:"\ - "${mntdev_file} does not exist in ${host_name}!" - return 1 - else - if is_pdsh; then - target_devname=`echo ${ret_str} | awk '{print $2}'` - else - target_devname=`echo ${ret_str} | awk '{print $1}'` - fi - fi - fi - - echo ${target_devname} - return 0 -} - -# get_devsize hostname target_devname -# Get the device size (KB) of @target_devname from node @hostname -get_devsize() { - local host_name=$1 - local target_devname=$2 - local target_devsize= - local ret_str - - # Execute remote command to get the device size - ret_str=`${REMOTE} ${host_name} \ - "/sbin/blockdev --getsize ${target_devname}" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: get_devsize() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - - if is_pdsh; then - target_devsize=`echo ${ret_str} | awk '{print $2}'` - else - target_devsize=`echo ${ret_str} | awk '{print $1}'` - fi - - if [ -z "`echo ${target_devsize}|awk '/^[[:digit:]]/ {print $0}'`" ] - then - echo "`basename $0`: get_devsize() error: can't" \ - "get device size of ${target_devname} in ${host_name}!" - return 1 - fi - - let " target_devsize /= 2" - - echo ${target_devsize} - return 0 -} - -# get_realdevname hostname loop_dev -# Get the real device name of loop device @loop_dev from node @hostname -get_realdevname() { - local host_name=$1 - local loop_dev=$2 - local target_devname= - local ret_str - - # Execute remote command to get the real device name - ret_str=`${REMOTE} ${host_name} \ - "/sbin/losetup ${loop_dev}" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: get_realdevname() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - - if is_pdsh; then - target_devname=`echo ${ret_str} | awk '{print $4}' \ - | sed 's/^(//' | sed 's/)$//'` - else - target_devname=`echo ${ret_str} | awk '{print $3}' \ - | sed 's/^(//' | sed 's/)$//'` - fi - - if [ "${ret_str}" != "${ret_str#*No such*}" ] \ - || [ -z "${target_devname}" ]; then - echo "`basename $0`: get_realdevname() error: can't" \ - "get info on device ${loop_dev} in ${host_name}!" - return 1 - fi - - echo ${target_devname} - return 0 -} - -# get_mntpnt hostname target_devname -# Get the lustre target mount point from the node @hostname -get_mntpnt(){ - local host_name=$1 - local target_devname=$2 - local mnt_point= - local ret_str - - # Execute remote command to get the mount point - ret_str=`${REMOTE} ${host_name} \ - "cat /etc/mtab | grep ${target_devname}" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: get_mntpnt() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - - if is_pdsh; then - mnt_point=`echo ${ret_str} | awk '{print $3}'` - else - mnt_point=`echo ${ret_str} | awk '{print $2}'` - fi - - if [ -z "${mnt_point}" ]; then - echo "`basename $0`: get_mntpnt() error: can't" \ - "get the mount point of ${target_devname} in ${host_name}!" - return 1 - fi - - echo ${mnt_point} - return 0 -} - -# get_devnames hostname -# Get the lustre target device names, mount points -# and loop device sizes from the node @hostname -get_devnames(){ - declare -i i - local host_name=$1 - local ret_line line - - # Initialize the arrays - unset TARGET_DEVNAMES - unset TARGET_DEVSIZES - unset TARGET_MNTPNTS - - for ((i = 0; i < ${#TARGET_SVNAMES[@]}; i++)); do - TARGET_DEVNAMES[i]=$(get_devname ${host_name} \ - ${TARGET_SVNAMES[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${TARGET_DEVNAMES[i]}" - return 1 - fi - - if [ -z "${TARGET_DEVNAMES[i]}" ]; then - if [ "${TARGET_SVNAMES[i]}" = "${MGS_SVNAME}" ]; then - verbose_output "There exists combo mgs/mdt"\ - "target in ${host_name}." - continue - else - echo >&2 "`basename $0`: get_devname() error:"\ - "No device corresponding to target" \ - "${TARGET_SVNAMES[i]} in ${host_name}!" - return 1 - fi - fi - - # Get the mount point of the target - TARGET_MNTPNTS[i]=$(get_mntpnt ${host_name} \ - ${TARGET_DEVNAMES[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${TARGET_MNTPNTS[i]}" - return 1 - fi - - # The target device is a loop device? - if [ -n "${TARGET_DEVNAMES[i]}" ] \ - && is_loopdev ${TARGET_DEVNAMES[i]}; then - # Get the device size - TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \ - ${TARGET_DEVNAMES[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${TARGET_DEVSIZES[i]}" - return 1 - fi - - # Get the real device name - TARGET_DEVNAMES[i]=$(get_realdevname ${host_name} \ - ${TARGET_DEVNAMES[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${TARGET_DEVNAMES[i]}" - return 1 - fi - fi - done - - return 0 -} - -# is_target target_svtype ldd_flags -# Check the service type of a lustre target -is_target() { - case "$1" in - "mdt") let "ret = $2 & LDD_F_SV_TYPE_MDT";; - "ost") let "ret = $2 & LDD_F_SV_TYPE_OST";; - "mgs") let "ret = $2 & LDD_F_SV_TYPE_MGS";; - "*") - echo >&2 "`basename $0`: is_target() error: Invalid" \ - "target service type - \"$1\"!" - return 1 - ;; - esac - - if [ ${ret} -eq 0 ]; then - return 1 - fi - - return 0 -} - -# get_devtype ldd_flags -# Get the service type of a lustre target from @ldd_flags -get_devtype() { - local target_devtype= - - if [ -z "${flags}" ]; then - echo "`basename $0`: get_devtype() error: Invalid" \ - "ldd_flags - it's value is null!" - return 1 - fi - - if is_target "mgs" $1; then - if is_target "mdt" $1; then - target_devtype="mgs|mdt" - else - target_devtype="mgs" - fi - elif is_target "mdt" $1; then - target_devtype="mdt" - elif is_target "ost" $1; then - target_devtype="ost" - else - echo "`basename $0`: get_devtype() error: Invalid" \ - "ldd_flags - \"$1\"!" - return 1 - fi - - echo ${target_devtype} - return 0 -} - -# get_mntopts ldd_mount_opts -# Get the user-specified lustre target mount options from @ldd_mount_opts -get_mntopts() { - local mount_opts= - local ldd_mount_opts=$1 - - mount_opts="${ldd_mount_opts#${ALWAYS_MNTOPTS}}" - mount_opts="${mount_opts#${MDT_MGS_ALWAYS_MNTOPTS}}" - mount_opts="${mount_opts#${OST_ALWAYS_MNTOPTS}}" - mount_opts="${mount_opts#${OST_DEFAULT_MNTOPTS}}" - mount_opts="`echo \"${mount_opts}\" | sed 's/^,//'`" - - [ "${mount_opts}" != "${mount_opts#*,*}" ] && echo "\""${mount_opts}"\"" \ - || echo ${mount_opts} - - return 0 -} - -# ip2hostname nids -# Convert IP addresses in @nids into hostnames -ip2hostname() { - local orig_nids=$1 - local nids= - local nid nids_str - local nettype - - nids_str=`echo ${orig_nids}|awk '{split($orig_nids, a, ",")}\ - END {for (i in a) print a[i]}'` - for nid in ${nids_str}; do - nettype=${nid#*@} - - case "${nettype}" in - lo* | elan* | gm* | ptl*) ;; - *) - nid=$(nid2hostname ${nid}) - if [ $? -ne 0 ]; then - echo "${nid}" - return 1 - fi - - nid=${nid}@${nettype} - ;; - esac - - if [ -z "${nids}" ]; then - nids=${nid} - else - nids=${nids},${nid} - fi - done - - echo ${nids} - return 0 -} - -# get_mgsnids ldd_params -# Get the mgs nids of lustre target from @ldd_params -get_mgsnids() { - local mgs_nids= # mgs nids in one mgs node - local all_mgs_nids= # mgs nids in all mgs failover nodes - local param= - local ldd_params="$*" - - for param in ${ldd_params}; do - if [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ]; then - mgs_nids=`echo ${param#${PARAM_MGSNODE}}` - mgs_nids=$(ip2hostname ${mgs_nids}) - if [ $? -ne 0 ]; then - echo >&2 "${mgs_nids}" - return 1 - fi - - if [ -n "${all_mgs_nids}" ]; then - all_mgs_nids=${all_mgs_nids}:${mgs_nids} - else - all_mgs_nids=${mgs_nids} - fi - fi - done - - [ "${all_mgs_nids}" != "${all_mgs_nids#*,*}" ] \ - && echo "\""${all_mgs_nids}"\"" || echo ${all_mgs_nids} - - return 0 -} - -# get_failnids ldd_params -# Get the failover nids of lustre target from @ldd_params -get_failnids() { - local fail_nids= # failover nids in one failover node - local all_fail_nids= # failover nids in all failover nodes - # of this target - local param= - local ldd_params="$*" - - for param in ${ldd_params}; do - if [ -n "`echo ${param}|awk '/failover.node=/ {print $0}'`" ]; then - fail_nids=`echo ${param#${PARAM_FAILNODE}}` - fail_nids=$(ip2hostname ${fail_nids}) - if [ $? -ne 0 ]; then - echo >&2 "${fail_nids}" - return 1 - fi - - if [ -n "${all_fail_nids}" ]; then - all_fail_nids=${all_fail_nids}:${fail_nids} - else - all_fail_nids=${fail_nids} - fi - fi - done - - [ "${all_fail_nids}" != "${all_fail_nids#*,*}" ] \ - && echo "\""${all_fail_nids}"\"" || echo ${all_fail_nids} - - return 0 -} - -# get_fmtopts target_devname hostname ldd_params -# Get other format options of the lustre target @target_devname from @ldd_params -get_fmtopts() { - local target_devname=$1 - local host_name=$2 - shift - shift - local ldd_params="$*" - local param= - local fmt_opts= - - for param in ${ldd_params}; do - [ -n "`echo ${param}|awk '/mgsnode=/ {print $0}'`" ] && continue - [ -n "`echo ${param}|awk '/failover.node=/ {print $0}'`" ] && continue - - if [ -n "${param}" ]; then - if [ -n "${fmt_opts}" ]; then - fmt_opts=${fmt_opts}" --param=\""${param}"\"" - else - fmt_opts="--param=\""${param}"\"" - fi - fi - done - - echo ${fmt_opts} - return 0 -} - -# get_stripecount host_name target_fsname -# Get the stripe count for @target_fsname -get_stripecount() { - local host_name=$1 - local target_fsname=$2 - local stripe_count= - local stripecount_file - local ret_str - - # Get the stripe count - stripecount_file=${LUSTRE_PROC}/lov/${target_fsname}-mdtlov/stripecount - ret_str=`${REMOTE} ${host_name} "cat ${stripecount_file}" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: get_stripecount() error:" \ - "remote command to ${host_name} error: ${ret_str}" - return 1 - fi - - if is_pdsh; then - stripe_count=`echo ${ret_str} | awk '{print $2}'` - else - stripe_count=`echo ${ret_str} | awk '{print $1}'` - fi - - if [ -z "`echo ${stripe_count}|awk '/^[[:digit:]]/ {print $0}'`" ] - then - echo "`basename $0`: get_stripecount() error: can't" \ - "get stripe count of ${target_fsname} in ${host_name}!" - return 1 - fi - - echo ${stripe_count} - return 0 -} - -# get_stripecount_opt host_name target_fsname -# Get the stripe count option for lustre mdt target -get_stripecount_opt() { - local host_name=$1 - local target_fsname=$2 - local stripe_count= - local stripecount_opt= - - # Get the stripe count - [ -z "${target_fsname}" ] && target_fsname="lustre" - stripe_count=$(get_stripecount ${host_name} ${target_fsname}) - if [ $? -ne 0 ]; then - echo "${stripe_count}" - return 1 - fi - - if [ "${stripe_count}" != "1" ]; then - stripecount_opt=${OPTSTR_STRIPE_COUNT}${stripe_count} - fi - - echo ${stripecount_opt} - return 0 -} - -# get_ldds hostname -# Get the lustre target disk data from the node @hostname -get_ldds(){ - declare -i i - local host_name=$1 - local ret_line line - local flags mnt_opts params - local stripecount_opt - - # Initialize the arrays - unset TARGET_DEVTYPES TARGET_FSNAMES TARGET_MGSNIDS TARGET_INDEXES - unset TARGET_FMTOPTS TARGET_MNTOPTS TARGET_FAILNIDS - - # Get lustre target device type, fsname, index, etc. - # from MOUNT_DATA_FILE. Using tunefs.lustre to read it. - for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do - flags= - mnt_opts= - params= - stripecount_opt= - [ -z "${TARGET_DEVNAMES[i]}" ] && continue - - # Execute remote command to read MOUNT_DATA_FILE - while read -r ret_line; do - if is_pdsh; then - set -- ${ret_line} - shift - line="$*" - else - line="${ret_line}" - fi - - if [ -n "`echo ${line}|awk '/Index:/ {print $0}'`" ]; then - TARGET_INDEXES[i]=`echo ${line}|awk '{print $2}'` - continue - fi - - if [ -n "`echo ${line}|awk '/Lustre FS:/ {print $0}'`" ]; then - TARGET_FSNAMES[i]=`echo ${line}|awk '{print $3}'` - continue - fi - - if [ -n "`echo ${line}|awk '/Flags:/ {print $0}'`" ]; then - flags=`echo ${line}|awk '{print $2}'` - continue - fi - - if [ -n "`echo ${line}|awk '/Persistent mount opts:/ {print $0}'`" ]; then - mnt_opts=`echo ${line}|awk '{print $0}'` - mnt_opts=`echo ${mnt_opts#Persistent mount opts: }` - continue - fi - - if [ -n "`echo ${line}|awk '/Parameters:/ {print $0}'`" ]; then - params=`echo ${line}|awk '{print $0}'` - params=`echo ${params#Parameters:}` - break - fi - done < <(${REMOTE} ${host_name} "${TUNEFS} --print --verbose ${TARGET_DEVNAMES[i]} 2>/dev/null") - - if [ -z "${flags}" ]; then - echo >&2 "`basename $0`: get_ldds() error: Invalid" \ - "ldd_flags of target ${TARGET_DEVNAMES[i]}" \ - "in host ${host_name} - it's value is null!"\ - "Check ${TUNEFS} command!" - return 1 - fi - - if [ "${TARGET_INDEXES[i]}" = "unassigned" ] \ - || is_target "mgs" ${flags}; then - TARGET_INDEXES[i]= - fi - - [ "${TARGET_FSNAMES[i]}" = "lustre" ] && TARGET_FSNAMES[i]= - - # Get the lustre target service type - TARGET_DEVTYPES[i]=$(get_devtype ${flags}) - if [ $? -ne 0 ]; then - echo >&2 "${TARGET_DEVTYPES[i]} From device" \ - "${TARGET_DEVNAMES[i]} in host ${host_name}!" - return 1 - fi - - # Get the lustre target mount options - TARGET_MNTOPTS[i]=$(get_mntopts "${mnt_opts}") - - # Get mgs nids of the lustre target - TARGET_MGSNIDS[i]=$(get_mgsnids "${params}") - - # Get failover nids of the lustre target - TARGET_FAILNIDS[i]=$(get_failnids "${params}") - if [ $? -ne 0 ]; then - echo >&2 "${TARGET_FAILNIDS[i]} From device" \ - "${TARGET_DEVNAMES[i]} in host ${host_name}!" - return 1 - fi - - # Get other format options of the lustre target - TARGET_FMTOPTS[i]=$(get_fmtopts ${TARGET_DEVNAMES[i]} ${host_name} "${params}") - if [ $? -ne 0 ]; then - echo >&2 "${TARGET_FMTOPTS[i]}" - return 1 - fi - - if [ -n "${TARGET_DEVSIZES[i]}" ]; then - if [ -n "${TARGET_FMTOPTS[i]}" ]; then - TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]} ""${TARGET_FMTOPTS[i]}" - else - TARGET_FMTOPTS[i]="--device-size=${TARGET_DEVSIZES[i]}" - fi - fi - - if [ -n "${FAILOVER_FMTOPTS[i]}" ]; then - if [ -n "${TARGET_FMTOPTS[i]}" ]; then - TARGET_FMTOPTS[i]=${TARGET_FMTOPTS[i]}" "${FAILOVER_FMTOPTS[i]} - else - TARGET_FMTOPTS[i]=${FAILOVER_FMTOPTS[i]} - fi - fi - - if is_target "mdt" ${flags}; then - # Get the stripe count option - stripecount_opt=$(get_stripecount_opt ${host_name} ${TARGET_FSNAMES[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${stripecount_opt}" - return 1 - fi - - if [ -n "${stripecount_opt}" ]; then - if [ -n "${TARGET_FMTOPTS[i]}" ]; then - TARGET_FMTOPTS[i]=${TARGET_FMTOPTS[i]}" "${stripecount_opt} - else - TARGET_FMTOPTS[i]=${stripecount_opt} - fi - fi - fi - - if [ "${TARGET_FMTOPTS[i]}" != "${TARGET_FMTOPTS[i]#*,*}" ]; then - TARGET_FMTOPTS[i]="\""${TARGET_FMTOPTS[i]}"\"" - fi - done - - return 0 -} - -# get_journalsize target_devname hostname -# Get the journal size of lustre target @target_devname from @hostname -get_journalsize() { - local target_devname=$1 - local host_name=$2 - local journal_inode= - local journal_size= - local ret_str - - # Execute remote command to get the journal inode number - ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ - ${target_devname} | grep 'Journal inode:'" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: get_journalsize() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - - ret_str=${ret_str#${ret_str%Journal inode:*}} - journal_inode=`echo ${ret_str} | awk '{print $3}'` - if [ -z "`echo ${journal_inode}|awk '/^[[:digit:]]/ {print $0}'`" ] - then - echo "`basename $0`: get_journalsize() error: can't" \ - "get journal inode of ${target_devname} in ${host_name}!" - return 1 - fi - - # Execute remote command to get the journal size - ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R \ - 'stat <${journal_inode}>' ${target_devname}|grep '^User:'" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: get_journalsize() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - - ret_str=${ret_str#${ret_str%User:*}} - journal_size=`echo ${ret_str} | awk '{print $6}'` - if [ -z "`echo ${journal_size}|awk '/^[[:digit:]]/ {print $0}'`" ] - then - echo "`basename $0`: get_journalsize() error: can't" \ - "get journal size of ${target_devname} in ${host_name}!" - return 1 - fi - - let "journal_size /= 1024*1024" # MB - - echo ${journal_size} - return 0 -} - -# get_defaultjournalsize target_devsize -# Calculate the default journal size from target device size @target_devsize -get_defaultjournalsize() { - declare -i target_devsize=$1 - declare -i journal_size=0 - declare -i max_size base_size - - let "base_size = 1024*1024" - if [ ${target_devsize} -gt ${base_size} ]; then # 1GB - let "journal_size = target_devsize / 102400" - let "journal_size *= 4" - fi - - let "max_size = 102400 * L_BLOCK_SIZE" - let "max_size >>= 20" # 400MB - - if [ ${journal_size} -gt ${max_size} ]; then - let "journal_size = max_size" - fi - - echo ${journal_size} - return 0 -} - -# figure_journal_size target_devname hostname -# Find a reasonable journal file size given the number of blocks -# in the filesystem. This algorithm is derived from figure_journal_size() -# function in util.c of e2fsprogs-1.38.cfs2-1.src.rpm. -figure_journal_size() { - local target_devname=$1 - local host_name=$2 - local ret_str - declare -i block_count - declare -i journal_blocks - declare -i journal_size - - # Execute remote command to get the block count - ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ - ${target_devname} | grep 'Block count:'" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: figure_journal_size() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - - ret_str=${ret_str#${ret_str%Block count:*}} - block_count=`echo ${ret_str} | awk '{print $3}'` - if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ] - then - echo "`basename $0`: figure_journal_size() error: can't" \ - "get block count of ${target_devname} in ${host_name}!" - return 1 - fi - - if ((block_count < 32768)); then - let "journal_blocks = 1024" - elif ((block_count < 256*1024)); then - let "journal_blocks = 4096" - elif ((block_count < 512*1024)); then - let "journal_blocks = 8192" - elif ((block_count < 1024*1024)); then - let "journal_blocks = 16384" - else - let "journal_blocks = 32768" - fi - - let "journal_size = journal_blocks * L_BLOCK_SIZE / 1048576" - - echo ${journal_size} - return 0 -} - -# get_J_opt hostname target_devname target_devsize -# Get the mkfs -J option of lustre target @target_devname -# from the node @hostname -get_J_opt() { - local host_name=$1 - local target_devname=$2 - local target_devsize=$3 - local journal_size= - local default_journal_size= - local journal_opt= - - # Get the real journal size of lustre target - journal_size=$(get_journalsize ${target_devname} ${host_name}) - if [ $? -ne 0 ]; then - echo "${journal_size}" - return 1 - fi - - # Get the default journal size of lustre target - default_journal_size=$(get_defaultjournalsize ${target_devsize}) - if [ "${default_journal_size}" = "0" ]; then - default_journal_size=$(figure_journal_size ${target_devname} \ - ${host_name}) - if [ $? -ne 0 ]; then - echo "${default_journal_size}" - return 1 - fi - fi - - if [ "${journal_size}" != "${default_journal_size}" ]; then - journal_opt="-J size=${journal_size}" - fi - - echo ${journal_opt} - return 0 -} - -# get_ratio target_devname hostname -# Get the bytes/inode ratio of lustre target @target_devname from @hostname -get_ratio() { - local target_devname=$1 - local host_name=$2 - local inode_count= - local block_count= - local ratio= - local ret_str - - # Execute remote command to get the inode count - ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ - ${target_devname} | grep 'Inode count:'" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: get_ratio() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - - ret_str=${ret_str#${ret_str%Inode count:*}} - inode_count=`echo ${ret_str} | awk '{print $3}'` - if [ -z "`echo ${inode_count}|awk '/^[[:digit:]]/ {print $0}'`" ] - then - echo "`basename $0`: get_ratio() error: can't" \ - "get inode count of ${target_devname} in ${host_name}!" - return 1 - fi - - # Execute remote command to get the block count - ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ - ${target_devname} | grep 'Block count:'" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: get_ratio() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - - ret_str=${ret_str#${ret_str%Block count:*}} - block_count=`echo ${ret_str} | awk '{print $3}'` - if [ -z "`echo ${block_count}|awk '/^[[:digit:]]/ {print $0}'`" ] - then - echo "`basename $0`: get_ratio() error: can't" \ - "get block count of ${target_devname} in ${host_name}!" - return 1 - fi - - let "ratio = block_count*L_BLOCK_SIZE/inode_count" - - echo ${ratio} - return 0 -} - -# get_default_ratio target_devtype target_devsize -# Calculate the default bytes/inode ratio from target type @target_devtype -get_default_ratio() { - local target_devtype=$1 - declare -i target_devsize=$2 - local ratio= - - case "${target_devtype}" in - "mdt" | "mgs|mdt" | "mdt|mgs") - ratio=4096;; - "ost") - [ ${target_devsize} -gt 1000000 ] && ratio=16384;; - esac - - [ -z "${ratio}" ] && ratio=${L_BLOCK_SIZE} - - echo ${ratio} - return 0 -} - -# get_i_opt hostname target_devname target_devtype target_devsize -# Get the mkfs -i option of lustre target @target_devname -# from the node @hostname -get_i_opt() { - local host_name=$1 - local target_devname=$2 - local target_devtype=$3 - local target_devsize=$4 - local ratio= - local default_ratio= - local ratio_opt= - - # Get the real bytes/inode ratio of lustre target - ratio=$(get_ratio ${target_devname} ${host_name}) - if [ $? -ne 0 ]; then - echo "${ratio}" - return 1 - fi - - # Get the default bytes/inode ratio of lustre target - default_ratio=$(get_default_ratio ${target_devtype} ${target_devsize}) - - if [ "${ratio}" != "${default_ratio}" ]; then - ratio_opt="-i ${ratio}" - fi - - echo ${ratio_opt} - return 0 -} - -# get_isize target_devname hostname -# Get the inode size of lustre target @target_devname from @hostname -get_isize() { - local target_devname=$1 - local host_name=$2 - local inode_size= - local ret_str - - # Execute remote command to get the inode size - ret_str=`${REMOTE} ${host_name} "/sbin/debugfs -R 'stats -h' \ - ${target_devname} | grep 'Inode size:'" 2>&1` - if [ $? -ne 0 -a -n "${ret_str}" ]; then - echo "`basename $0`: get_isize() error:" \ - "remote command error: ${ret_str}" - return 1 - fi - - ret_str=${ret_str#${ret_str%Inode size:*}} - inode_size=`echo ${ret_str} | awk '{print $3}'` - if [ -z "`echo ${inode_size}|awk '/^[[:digit:]]/ {print $0}'`" ] - then - echo "`basename $0`: get_isize() error: can't" \ - "get inode size of ${target_devname} in ${host_name}!" - return 1 - fi - - echo ${inode_size} - return 0 -} - -# get_mdt_default_isize host_name target_fsname -# Calculate the default inode size of lustre mdt target -get_mdt_default_isize() { - local host_name=$1 - local target_fsname=$2 - declare -i stripe_count - local inode_size= - - # Get the stripe count - stripe_count=$(get_stripecount ${host_name} ${target_fsname}) - if [ $? -ne 0 ]; then - echo "${stripe_count}" - return 1 - fi - - if ((stripe_count > 77)); then - inode_size=512 - elif ((stripe_count > 34)); then - inode_size=2048 - elif ((stripe_count > 13)); then - inode_size=1024 - else - inode_size=512 - fi - - echo ${inode_size} - return 0 -} - -# get_default_isize host_name target_devtype target_fsname -# Calculate the default inode size of lustre target type @target_devtype -get_default_isize() { - local host_name=$1 - local target_devtype=$2 - local target_fsname=$3 - local inode_size= - - case "${target_devtype}" in - "mdt" | "mgs|mdt" | "mdt|mgs") - inode_size=$(get_mdt_default_isize ${host_name} ${target_fsname}) - if [ $? -ne 0 ]; then - echo "${inode_size}" - return 1 - fi - ;; - "ost") - inode_size=256;; - esac - - [ -z "${inode_size}" ] && inode_size=128 - - echo ${inode_size} - return 0 -} - -# get_I_opt hostname target_devname target_devtype target_fsname -# Get the mkfs -I option of lustre target @target_devname -# from the node @hostname -get_I_opt() { - local host_name=$1 - local target_devname=$2 - local target_devtype=$3 - local target_fsname=$4 - local isize= - local default_isize= - local isize_opt= - - # Get the real inode size of lustre target - isize=$(get_isize ${target_devname} ${host_name}) - if [ $? -ne 0 ]; then - echo "${isize}" - return 1 - fi - - # Get the default inode size of lustre target - [ -z "${target_fsname}" ] && target_fsname="lustre" - default_isize=$(get_default_isize ${host_name} ${target_devtype} \ - ${target_fsname}) - if [ $? -ne 0 ]; then - echo "${default_isize}" - return 1 - fi - - if [ "${isize}" != "${default_isize}" ]; then - isize_opt="-I ${isize}" - fi - - echo ${isize_opt} - return 0 -} - -# get_mkfsopts hostname -# Get the mkfs options of lustre targets from the node @hostname -get_mkfsopts(){ - declare -i i - local host_name=$1 - local journal_opt - local ratio_opt - local inode_size_opt - - # Initialize the arrays - unset TARGET_MKFSOPTS - - # FIXME: Get other mkfs options of ext3/ldiskfs besides -J, -i and -I - for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do - journal_opt= - ratio_opt= - inode_size_opt= - - [ -z "${TARGET_DEVNAMES[i]}" ] && continue - - if [ -z "${TARGET_DEVSIZES[i]}" ]; then - # Get the device size - TARGET_DEVSIZES[i]=$(get_devsize ${host_name} \ - ${TARGET_DEVNAMES[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${TARGET_DEVSIZES[i]}" - return 1 - fi - fi - - # Get the journal option - journal_opt=$(get_J_opt ${host_name} ${TARGET_DEVNAMES[i]} \ - ${TARGET_DEVSIZES[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${journal_opt}" - return 1 - fi - - if [ -n "${journal_opt}" ]; then - if [ -z "${TARGET_MKFSOPTS[i]}" ]; then - TARGET_MKFSOPTS[i]="${journal_opt}" - else - TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${journal_opt}" - fi - fi - - # Get the bytes-per-inode ratio option - ratio_opt=$(get_i_opt ${host_name} ${TARGET_DEVNAMES[i]} \ - ${TARGET_DEVTYPES[i]} ${TARGET_DEVSIZES[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${ratio_opt}" - return 1 - fi - - if [ -n "${ratio_opt}" ]; then - if [ -z "${TARGET_MKFSOPTS[i]}" ]; then - TARGET_MKFSOPTS[i]="${ratio_opt}" - else - TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${ratio_opt}" - fi - fi - - # Get the inode size option - inode_size_opt=$(get_I_opt ${host_name} ${TARGET_DEVNAMES[i]} \ - ${TARGET_DEVTYPES[i]} ${TARGET_FSNAMES[i]}) - if [ $? -ne 0 ]; then - echo >&2 "${inode_size_opt}" - return 1 - fi - - if [ -n "${inode_size_opt}" ]; then - if [ -z "${TARGET_MKFSOPTS[i]}" ]; then - TARGET_MKFSOPTS[i]="${inode_size_opt}" - else - TARGET_MKFSOPTS[i]=${TARGET_MKFSOPTS[i]}" ${inode_size_opt}" - fi - fi - - if [ "${TARGET_MKFSOPTS[i]}" != "${TARGET_MKFSOPTS[i]#*,*}" ]; then - TARGET_MKFSOPTS[i]="\""${TARGET_MKFSOPTS[i]}"\"" - fi - done - return 0 -} - -# get_target_configs hostname -# Get the lustre target informations from the node @hostname -get_target_configs() { - declare -i i - local host_name=$1 - local ret_line line - - # Initialize the arrays - unset TARGET_CONFIGS - - # Get lustre target server names - if ! get_svnames ${host_name}; then - return 1 - fi - - # Get lustre target device names, mount points and loop device sizes - if ! get_devnames ${host_name}; then - return 1 - fi - - # Get lustre target device type, fsname, index, etc. - if ! get_ldds ${host_name}; then - return 1 - fi - - # Get mkfs options of lustre targets - if ! get_mkfsopts ${host_name}; then - return 1 - fi - - # Construct lustre target configs - for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do - [ -z "${TARGET_DEVNAMES[i]}" ] && continue - TARGET_CONFIGS[i]=${TARGET_DEVNAMES[i]},${TARGET_MNTPNTS[i]},${TARGET_DEVTYPES[i]},${TARGET_FSNAMES[i]},${TARGET_MGSNIDS[i]},${TARGET_INDEXES[i]},${TARGET_FMTOPTS[i]},${TARGET_MKFSOPTS[i]},${TARGET_MNTOPTS[i]},${TARGET_FAILNIDS[i]} - done - - return 0 -} - -# get_configs hostname -# Get all the informations needed to generate a csv file from -# the node @hostname -get_configs() { - # Check the hostname - if [ -z "$1" ]; then - echo >&2 "`basename $0`: get_configs() error:" \ - "Missing hostname!" - return 1 - fi - - # Get network module options - verbose_output "" - verbose_output "Collecting network module options from host $1..." - if ! get_module_opts $1; then - return 1 - fi - verbose_output "OK" - - # Get lustre target informations - verbose_output "Collecting Lustre targets informations from host $1..." - if ! get_target_configs $1; then - return 1 - fi - verbose_output "OK" - - # Get HA software configurations - if ! get_ha_configs $1; then - return 1 - fi - - return 0 -} - - -# Generate the csv file from the lustre cluster -gen_csvfile() { - declare -i idx - declare -i i - local line - - # Get lustre cluster node names - verbose_output "Collecting Lustre cluster node names..." - if ! get_hostnames; then - return 1 - fi - verbose_output "OK" - - : > ${CSV_FILE} - - for ((idx = 0; idx < ${#HOST_NAMES[@]}; idx++)); do - # Collect informations - if ! get_configs ${HOST_NAMES[idx]}; then - rm -f ${CSV_FILE} - return 1 - fi - - # Append informations to the csv file - for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do - [ -z "${TARGET_DEVNAMES[i]}" ] && continue - - if [ -z "${HA_CONFIGS[i]}" ]; then - line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]} - else - line=${HOST_NAMES[idx]},${MODULE_OPTS},${TARGET_CONFIGS[i]},${HA_CONFIGS[i]} - fi - verbose_output "Informations of target ${TARGET_DEVNAMES[i]}" \ - "in host ${HOST_NAMES[idx]} are as follows:" - verbose_output "${line}" - echo "" >> ${CSV_FILE} - echo "${line}" >> ${CSV_FILE} - done - done - - return 0 -} - -# Main flow -echo "`basename $0`: ******** Generate csv file -- ${CSV_FILE} START ********" -if ! gen_csvfile; then - exit 1 -fi -echo "`basename $0`: ******** Generate csv file -- ${CSV_FILE} OK **********" - -exit 0 -- 1.8.3.1