Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / scripts / lc_net.in
diff --git a/lustre/scripts/lc_net.in b/lustre/scripts/lc_net.in
new file mode 100644 (file)
index 0000000..d618c69
--- /dev/null
@@ -0,0 +1,226 @@
+#!/bin/bash
+#
+# lc_net - script for Lustre cluster network verification
+#
+###############################################################################
+
+# Usage
+usage() {
+       cat >&2 <<EOF
+
+Usage: `basename $0` [options] <csv file>
+
+       Options:
+       -a              select all the nodes from the csv file to operate on
+       -w hostname,hostname,...
+                       select the specified list of nodes (separated by commas)
+       -x hostname,hostname,...
+                       exclude the specified list of nodes (separated by commas)
+       -v              verbose mode
+       csv file        a spreadsheet that contains configuration parameters 
+                       (separated by commas) for each target in a Lustre cl-
+                       uster, the first field of each line is the host name 
+                       of the cluster node
+
+EOF
+       exit 1
+}
+
+# Get the library of functions
+. @scriptlibdir@/lc_common
+
+VERBOSE_OUTPUT=false
+# Get and check the positional parameters
+while getopts "aw:x:v" OPTION; do
+       case $OPTION in
+       a)
+               [ -z "${SPECIFIED_NODELIST}" ] && [ -z "${EXCLUDED_NODELIST}" ]\
+               && USE_ALLNODES=true
+               ;;
+       w)
+               USE_ALLNODES=false
+               SPECIFIED_NODELIST=$OPTARG
+               ;;
+       x)
+               USE_ALLNODES=false
+               EXCLUDED_NODELIST=$OPTARG
+               ;;
+       v) 
+               VERBOSE_OUTPUT=true
+               ;;
+        ?) 
+               usage 
+       esac
+done
+
+# Toss out the parameters we've already processed
+shift  `expr $OPTIND - 1`
+
+# Here we expect the csv file
+if [ $# -eq 0 ]; then
+       echo >&2 $"`basename $0`: Missing csv file!"
+       usage
+fi
+
+# Global variables
+CSV_FILE=$1
+declare -a HOST_NAMES
+declare -a HOST_IPADDRS
+
+# Get the hosts to be operated on
+get_hostnames() {
+       local NODES_TO_USE
+
+       # Initialize the HOST_NAMES array
+       unset HOST_NAMES
+
+       # Get the list of nodes to be operated on
+       NODES_TO_USE=$(get_nodelist)
+       [ ${PIPESTATUS[0]} -ne 0 ] && echo >&2 "${NODES_TO_USE}" && return 1
+
+       # Check the node list
+       if [ -z "${NODES_TO_USE}" ]; then
+               echo "`basename $0`: There are no hosts to be operated on."\
+               "Check the node selection options (-a, -w or -x)."
+               return 1
+       fi
+
+       # Load the hostnames in the nodelist into the array
+       HOST_NAMES=( $(echo ${NODES_TO_USE//,/ }) )
+
+       return 0
+}
+
+# ping_host host_name
+# Check whether host $host_name is reachable. 
+# If it is, then return the IP address of this host.
+ping_host() {
+       local host_name=$1
+       local ip_addr=
+       local ret_str
+
+       if [ -z "${host_name}" ]; then
+               echo "`basename $0`: ping_host() error: Missing hostname!"
+               return 1
+       fi
+
+       # Run ping command
+       ret_str=$(ping -c1 ${host_name} 2>&1)
+       if [ ${PIPESTATUS[0]} -ne 0 ]; then
+               if [ -n "${ret_str}" ]; then
+                       echo "`basename $0`: ping_host() error: ${ret_str}!"
+               else
+                       echo "`basename $0`: ping_host() error:"\
+                       "Host ${host_name} does not respond to ping!"
+               fi
+               return 1
+       fi
+
+       # Get the IP address
+       ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}' | \
+               sed -e 's/^(//' -e 's/)$//'`
+
+       echo "${ip_addr}"
+       return 0
+}
+
+# local_check index
+# Check the network connectivity between local host and ${HOST_NAMES[index]}.
+local_check() {
+       declare -i i=$1
+
+       # Check whether ${HOST_NAMES[i]} is reachable
+       # and get the IP address of this host from ping
+       HOST_IPADDRS[i]=$(ping_host ${HOST_NAMES[i]})
+       if [ ${PIPESTATUS[0]} -ne 0 ]; then
+               echo >&2 "${HOST_IPADDRS[i]}"
+               return 1
+       fi
+
+       return 0
+}
+
+# remote_check index
+# Check whether ${HOST_NAMES[index]} can resolve its own name and whether
+# this host agrees with the local host about what its name is resolved to.
+remote_check() {
+       declare -i i=$1
+       local cmd ret_str
+       local ip_addr=          # the IP address got from remote ping
+
+       # Execute remote command to check whether ${HOST_NAMES[i]}
+       # can resolve its own name
+       cmd="ping -c1 ${HOST_NAMES[i]} 2>&1"
+       ret_str=$(${REMOTE} ${HOST_NAMES[i]} "${cmd}" 2>&1)
+       if [ ${PIPESTATUS[0]} -ne 0 -a -n "${ret_str}" ]; then
+               echo >&2 "`basename $0`: remote_check() error:"\
+               "remote to ${HOST_NAMES[i]} error: ${ret_str}!"
+               return 1
+       fi
+
+       if [ -z "${ret_str}" ]; then
+               echo >&2 "`basename $0`: remote_check() error:"\
+               "No results from ${HOST_NAMES[i]}! Check the network"\
+               "connectivity between local host and ${HOST_NAMES[i]}!"
+               return 1
+       fi
+
+       # Get the IP address of ${HOST_NAMES[i]} from its own ping
+       if is_pdsh; then
+               ip_addr=`echo "${ret_str}" | head -1 | awk '{print $4}'`
+       else
+               ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}'`
+       fi
+       ip_addr=`echo "${ip_addr}" | sed -e 's/^(//' -e 's/)$//'`
+
+       # Compare IP addresses
+       # Check whether ${HOST_NAMES[i]} agrees with the local host
+       # about what its name is resolved to.
+       if [ "${ip_addr}" != "${HOST_IPADDRS[i]}" ]; then
+               echo >&2 "`basename $0`: remote_check() error:"\
+               "Local host resolves ${HOST_NAMES[i]} to IP address"\
+               "\"${HOST_IPADDRS[i]}\", while its own resolution is"\
+               "\"${ip_addr}\". They are not the same!"
+               return 1
+       fi
+       
+       return 0
+}
+
+# network_verify
+# Verify name resolution and network connectivity of the Lustre cluster
+network_verify() {
+       declare -i i
+
+       # Initialize the HOST_IPADDRS array
+       unset HOST_IPADDRS
+
+       # Get all the host names to be operated on 
+       ! get_hostnames && return 1
+
+       # Check the network connectivity between local host 
+       # and other cluster nodes
+       for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
+               [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue
+
+               verbose_output "Verifying network connectivity between"\
+                              "\"`hostname`\" and \"${HOST_NAMES[i]}\"..."
+               ! local_check $i && return 1
+               ! remote_check $i && return 1
+               verbose_output "OK"
+       done
+
+       return 0
+}
+
+# Main flow
+if ! check_file ${CSV_FILE}; then
+       exit 1  
+fi
+
+# Cluster network verification
+if ! network_verify; then
+       exit 1  
+fi
+
+exit 0