--- /dev/null
+#!/bin/bash
+#
+# lc_net - script for Lustre cluster network verification
+#
+###############################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` [options] <csv file>
+
+ Options:
+ -a select all the nodes from the csv file to operate on
+ -w hostname,hostname,...
+ select the specified list of nodes (separated by commas)
+ -x hostname,hostname,...
+ exclude the specified list of nodes (separated by commas)
+ -v verbose mode
+ csv file a spreadsheet that contains configuration parameters
+ (separated by commas) for each target in a Lustre cl-
+ uster, the first field of each line is the host name
+ of the cluster node
+
+EOF
+ exit 1
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common
+
+VERBOSE_OUTPUT=false
+# Get and check the positional parameters
+while getopts "aw:x:v" OPTION; do
+ case $OPTION in
+ a)
+ [ -z "${SPECIFIED_NODELIST}" ] && [ -z "${EXCLUDED_NODELIST}" ]\
+ && USE_ALLNODES=true
+ ;;
+ w)
+ USE_ALLNODES=false
+ SPECIFIED_NODELIST=$OPTARG
+ ;;
+ x)
+ USE_ALLNODES=false
+ EXCLUDED_NODELIST=$OPTARG
+ ;;
+ v)
+ VERBOSE_OUTPUT=true
+ ;;
+ ?)
+ usage
+ esac
+done
+
+# Toss out the parameters we've already processed
+shift `expr $OPTIND - 1`
+
+# Here we expect the csv file
+if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: Missing csv file!"
+ usage
+fi
+
+# Global variables
+CSV_FILE=$1
+declare -a HOST_NAMES
+declare -a HOST_IPADDRS
+
+# Get the hosts to be operated on
+get_hostnames() {
+ local NODES_TO_USE
+
+ # Initialize the HOST_NAMES array
+ unset HOST_NAMES
+
+ # Get the list of nodes to be operated on
+ NODES_TO_USE=$(get_nodelist)
+ [ ${PIPESTATUS[0]} -ne 0 ] && echo >&2 "${NODES_TO_USE}" && return 1
+
+ # Check the node list
+ if [ -z "${NODES_TO_USE}" ]; then
+ echo "`basename $0`: There are no hosts to be operated on."\
+ "Check the node selection options (-a, -w or -x)."
+ return 1
+ fi
+
+ # Load the hostnames in the nodelist into the array
+ HOST_NAMES=( $(echo ${NODES_TO_USE//,/ }) )
+
+ return 0
+}
+
+# ping_host host_name
+# Check whether host $host_name is reachable.
+# If it is, then return the IP address of this host.
+ping_host() {
+ local host_name=$1
+ local ip_addr=
+ local ret_str
+
+ if [ -z "${host_name}" ]; then
+ echo "`basename $0`: ping_host() error: Missing hostname!"
+ return 1
+ fi
+
+ # Run ping command
+ ret_str=$(ping -c1 ${host_name} 2>&1)
+ if [ ${PIPESTATUS[0]} -ne 0 ]; then
+ if [ -n "${ret_str}" ]; then
+ echo "`basename $0`: ping_host() error: ${ret_str}!"
+ else
+ echo "`basename $0`: ping_host() error:"\
+ "Host ${host_name} does not respond to ping!"
+ fi
+ return 1
+ fi
+
+ # Get the IP address
+ ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}' | \
+ sed -e 's/^(//' -e 's/)$//'`
+
+ echo "${ip_addr}"
+ return 0
+}
+
+# local_check index
+# Check the network connectivity between local host and ${HOST_NAMES[index]}.
+local_check() {
+ declare -i i=$1
+
+ # Check whether ${HOST_NAMES[i]} is reachable
+ # and get the IP address of this host from ping
+ HOST_IPADDRS[i]=$(ping_host ${HOST_NAMES[i]})
+ if [ ${PIPESTATUS[0]} -ne 0 ]; then
+ echo >&2 "${HOST_IPADDRS[i]}"
+ return 1
+ fi
+
+ return 0
+}
+
+# remote_check index
+# Check whether ${HOST_NAMES[index]} can resolve its own name and whether
+# this host agrees with the local host about what its name is resolved to.
+remote_check() {
+ declare -i i=$1
+ local cmd ret_str
+ local ip_addr= # the IP address got from remote ping
+
+ # Execute remote command to check whether ${HOST_NAMES[i]}
+ # can resolve its own name
+ cmd="ping -c1 ${HOST_NAMES[i]} 2>&1"
+ ret_str=$(${REMOTE} ${HOST_NAMES[i]} "${cmd}" 2>&1)
+ if [ ${PIPESTATUS[0]} -ne 0 -a -n "${ret_str}" ]; then
+ echo >&2 "`basename $0`: remote_check() error:"\
+ "remote to ${HOST_NAMES[i]} error: ${ret_str}!"
+ return 1
+ fi
+
+ if [ -z "${ret_str}" ]; then
+ echo >&2 "`basename $0`: remote_check() error:"\
+ "No results from ${HOST_NAMES[i]}! Check the network"\
+ "connectivity between local host and ${HOST_NAMES[i]}!"
+ return 1
+ fi
+
+ # Get the IP address of ${HOST_NAMES[i]} from its own ping
+ if is_pdsh; then
+ ip_addr=`echo "${ret_str}" | head -1 | awk '{print $4}'`
+ else
+ ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}'`
+ fi
+ ip_addr=`echo "${ip_addr}" | sed -e 's/^(//' -e 's/)$//'`
+
+ # Compare IP addresses
+ # Check whether ${HOST_NAMES[i]} agrees with the local host
+ # about what its name is resolved to.
+ if [ "${ip_addr}" != "${HOST_IPADDRS[i]}" ]; then
+ echo >&2 "`basename $0`: remote_check() error:"\
+ "Local host resolves ${HOST_NAMES[i]} to IP address"\
+ "\"${HOST_IPADDRS[i]}\", while its own resolution is"\
+ "\"${ip_addr}\". They are not the same!"
+ return 1
+ fi
+
+ return 0
+}
+
+# network_verify
+# Verify name resolution and network connectivity of the Lustre cluster
+network_verify() {
+ declare -i i
+
+ # Initialize the HOST_IPADDRS array
+ unset HOST_IPADDRS
+
+ # Get all the host names to be operated on
+ ! get_hostnames && return 1
+
+ # Check the network connectivity between local host
+ # and other cluster nodes
+ for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
+ [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue
+
+ verbose_output "Verifying network connectivity between"\
+ "\"`hostname`\" and \"${HOST_NAMES[i]}\"..."
+ ! local_check $i && return 1
+ ! remote_check $i && return 1
+ verbose_output "OK"
+ done
+
+ return 0
+}
+
+# Main flow
+if ! check_file ${CSV_FILE}; then
+ exit 1
+fi
+
+# Cluster network verification
+if ! network_verify; then
+ exit 1
+fi
+
+exit 0