#!/bin/bash # vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: # # lc_net - script for Lustre cluster network verification # ############################################################################### # Usage usage() { cat >&2 < Options: -a select all the nodes from the csv file to operate on -w hostname,hostname,... select the specified list of nodes (separated by commas) -x hostname,hostname,... exclude the specified list of nodes (separated by commas) -v verbose mode csv file a spreadsheet that contains configuration parameters (separated by commas) for each target in a Lustre cl- uster, the first field of each line is the host name of the cluster node EOF exit 1 } # Get the library of functions . @scriptlibdir@/lc_common VERBOSE_OUTPUT=false # Get and check the positional parameters while getopts "aw:x:v" OPTION; do case $OPTION in a) [ -z "${SPECIFIED_NODELIST}" ] && [ -z "${EXCLUDED_NODELIST}" ]\ && USE_ALLNODES=true ;; w) USE_ALLNODES=false SPECIFIED_NODELIST=$OPTARG ;; x) USE_ALLNODES=false EXCLUDED_NODELIST=$OPTARG ;; v) VERBOSE_OUTPUT=true ;; ?) usage esac done # Toss out the parameters we've already processed shift `expr $OPTIND - 1` # Here we expect the csv file if [ $# -eq 0 ]; then error_output "Missing csv file!" usage fi # Global variables CSV_FILE=$1 declare -a HOST_NAMES declare -a HOST_IPADDRS # Get the hosts to be operated on get_hostnames() { local NODES_TO_USE # Initialize the HOST_NAMES array unset HOST_NAMES # Get the list of nodes to be operated on NODES_TO_USE=$(get_nodelist) [ ${PIPESTATUS[0]} -ne 0 ] && error_output "${NODES_TO_USE}" && return 1 # Check the node list if [ -z "${NODES_TO_USE}" ]; then echo "`basename $0`: There are no hosts to be operated on."\ "Check the node selection options (-a, -w or -x)." return 1 fi # Load the hostnames in the nodelist into the array HOST_NAMES=( $(echo ${NODES_TO_USE//,/ }) ) return 0 } # ping_host host_name # Check whether host $host_name is reachable. # If it is, then return the IP address of this host. ping_host() { local host_name=$1 local ip_addr= local ret_str if [ -z "${host_name}" ]; then echo "`basename $0`: ping_host() error: Missing hostname!" return 1 fi # Run ping command ret_str=$(ping -c1 ${host_name} 2>&1) if [ ${PIPESTATUS[0]} -ne 0 ]; then if [ -n "${ret_str}" ]; then echo "`basename $0`: ping_host() error: ${ret_str}!" else echo "`basename $0`: ping_host() error:"\ "Host ${host_name} does not respond to ping!" fi return 1 fi # Get the IP address ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}' | \ sed -e 's/^(//' -e 's/)$//'` echo "${ip_addr}" return 0 } # local_check index # Check the network connectivity between local host and ${HOST_NAMES[index]}. local_check() { declare -i i=$1 # Check whether ${HOST_NAMES[i]} is reachable # and get the IP address of this host from ping HOST_IPADDRS[i]=$(ping_host ${HOST_NAMES[i]}) if [ ${PIPESTATUS[0]} -ne 0 ]; then error_output "${HOST_IPADDRS[i]}" return 1 fi return 0 } # remote_check index # Check whether ${HOST_NAMES[index]} can resolve its own name and whether # this host agrees with the local host about what its name is resolved to. remote_check() { declare -i i=$1 local cmd ret_str local ip_addr= # the IP address got from remote ping # Execute remote command to check whether ${HOST_NAMES[i]} # can resolve its own name cmd="ping -c1 ${HOST_NAMES[i]} 2>&1" ret_str=$(${REMOTE} ${HOST_NAMES[i]} "${cmd}" 2>&1) if [ ${PIPESTATUS[0]} -ne 0 -a -n "${ret_str}" ]; then error_output "remote_check():"\ "remote to ${HOST_NAMES[i]} error: ${ret_str}!" return 1 fi if [ -z "${ret_str}" ]; then error_output "remote_check():"\ "No results from ${HOST_NAMES[i]}! Check the network"\ "connectivity between local host and ${HOST_NAMES[i]}!" return 1 fi # Get the IP address of ${HOST_NAMES[i]} from its own ping ip_pattern="[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}" ip_addr=`echo "${ret_str}" | egrep -m 1 -o "${ip_pattern}"` # Compare IP addresses # Check whether ${HOST_NAMES[i]} agrees with the local host # about what its name is resolved to. if [ "${ip_addr}" != "${HOST_IPADDRS[i]}" ]; then error_output "remote_check():"\ "Local host resolves ${HOST_NAMES[i]} to IP address"\ "\"${HOST_IPADDRS[i]}\", while its own resolution is"\ "\"${ip_addr}\". They are not the same!" return 1 fi return 0 } # network_verify # Verify name resolution and network connectivity of the Lustre cluster network_verify() { declare -i i # Initialize the HOST_IPADDRS array unset HOST_IPADDRS # Get all the host names to be operated on ! get_hostnames && return 1 # Check the network connectivity between local host # and other cluster nodes for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue verbose_output "Verifying network connectivity between"\ "\"`hostname`\" and \"${HOST_NAMES[i]}\"..." ! local_check $i && return 1 ! remote_check $i && return 1 verbose_output "OK" done return 0 } # Main flow if ! check_file ${CSV_FILE}; then exit 1 fi # Cluster network verification if ! network_verify; then exit 1 fi exit 0