From df306f80ac2a449bbb56d19ecab744e6aea39e5b Mon Sep 17 00:00:00 2001 From: yujian Date: Thu, 20 Jul 2006 06:24:42 +0000 Subject: [PATCH] b=10647 not require the use of /etc/hosts on nodes --- lustre/scripts/lc_net.sh.in | 245 +++++++++++++++----------------------------- 1 file changed, 84 insertions(+), 161 deletions(-) diff --git a/lustre/scripts/lc_net.sh.in b/lustre/scripts/lc_net.sh.in index e6608b4..e4f150c 100644 --- a/lustre/scripts/lc_net.sh.in +++ b/lustre/scripts/lc_net.sh.in @@ -81,160 +81,104 @@ get_hostnames() { return 0 } -# Check whether the host name matches the name in the local /etc/hosts table -# and whether the IP address corresponding to the host name is correct -local_check() { - # Check argument - if [ $# -ne 2 ]; then - echo >&2 $"`basename $0`: local_check() error: Missing"\ - "argument for function local_check()!" - return 1 - fi - - local RET_STR REAL_NAME - - # Get the corresponding IP address of the host name from /etc/hosts table - # of the current host - HOST_IPADDRS[$2]=`egrep "[[:space:]]$1([[:space:]]|$)" /etc/hosts \ - | awk '{print $1}'` - if [ -z "${HOST_IPADDRS[$2]}" ]; then - echo >&2 "`basename $0`: local_check() error: $1 does not" \ - "exist in the local /etc/hosts table!" +# ping_host host_name +# Check whether host $host_name is reachable. +# If it is, then return the IP address of this host. +ping_host() { + local host_name=$1 + local ip_addr= + local ret_str + + if [ -z "${host_name}" ]; then + echo "`basename $0`: ping_host() error: Missing hostname!" return 1 fi - if [ ${#HOST_IPADDRS[$2]} -gt 15 ]; then - echo >&2 "`basename $0`: local_check() error: More than one" \ - "IP address line corresponding to $1 in the local" \ - "/etc/hosts table!" + # Run ping command + ret_str=`ping -c1 ${host_name} 2>&1` + if [ $? -ne 0 ]; then + if [ -n "${ret_str}" ]; then + echo "`basename $0`: ping_host() error: ${ret_str}!" + else + echo "`basename $0`: ping_host() error:"\ + "Host ${host_name} does not respond to ping!" + fi return 1 fi - # Execute remote command to get the real host name - RET_STR=`${REMOTE} ${HOST_IPADDRS[$2]} hostname 2>&1` - if [ $? -ne 0 -a -n "${RET_STR}" ]; then - echo >&2 "`basename $0`: local_check() error: remote error:" \ - "${RET_STR}" - return 1 - fi + # Get the IP address + ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}' | \ + sed -e 's/^(//' -e 's/)$//'` - if [ -z "${RET_STR}" ]; then - echo >&2 "`basename $0`: local_check() error: remote error: No"\ - "results from remote! Check the network connectivity"\ - "between the local host and ${HOST_IPADDRS[$2]}!" - return 1 - fi + echo "${ip_addr}" + return 0 +} - if is_pdsh; then - REAL_NAME=`echo ${RET_STR} | awk '{print $2}'` - else - REAL_NAME=`echo ${RET_STR} | awk '{print $1}'` - fi +# local_check index +# Check the network connectivity between local host and ${HOST_NAMES[index]}. +local_check() { + declare -i i=$1 - if [ "$1" != "${REAL_NAME}" ]; then - echo >&2 "`basename $0`: local_check() error: The real hostname"\ - "of ${HOST_IPADDRS[$2]} is \"${REAL_NAME}\","\ - "not \"$1\"! Check the local /etc/hosts table!" + # Check whether ${HOST_NAMES[i]} is reachable + # and get the IP address of this host from ping + HOST_IPADDRS[i]=$(ping_host ${HOST_NAMES[i]}) + if [ $? -ne 0 ]; then + echo >&2 "${HOST_IPADDRS[i]}" return 1 fi return 0 } -# Check whether the correct host name and IP address pair matches -# the one in the remote /etc/hosts tables +# remote_check index +# Check whether ${HOST_NAMES[index]} can resolve its own name and whether +# this host agrees with the local host about what its name is resolved to. remote_check() { - # Check argument - if [ $# -ne 2 ]; then - echo >&2 $"`basename $0`: remote_check() error: Missing"\ - "argument for function remote_check()!" - return 1 - fi - - declare -i i - local RET_STR COMMAND IP_ADDR - - COMMAND=$"egrep \"[[:space:]]$1([[:space:]]|$)\" /etc/hosts" + declare -i i=$1 + local cmd ret_str + local ip_addr= # the IP address got from remote ping + + # Execute remote command to check whether ${HOST_NAMES[i]} + # can resolve its own name + cmd="ping -c1 ${HOST_NAMES[i]} 2>&1" + ret_str=`${REMOTE} ${HOST_NAMES[i]} "${cmd}" 2>&1` + if [ $? -ne 0 -a -n "${ret_str}" ]; then + echo >&2 "`basename $0`: remote_check() error:"\ + "remote to ${HOST_NAMES[i]} error: ${ret_str}!" + return 1 + fi - # Execute remote command to check remote /etc/hosts tables - for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do - RET_STR=`${REMOTE} ${HOST_NAMES[i]} ${COMMAND} 2>&1` - if [ $? -ne 0 -a -n "${RET_STR}" ]; then - echo >&2 "`basename $0`: remote_check() error:"\ - "remote ${HOST_NAMES[i]} error: ${RET_STR}" - return 1 - fi + if [ -z "${ret_str}" ]; then + echo >&2 "`basename $0`: remote_check() error:"\ + "No results from ${HOST_NAMES[i]}! Check the network"\ + "connectivity between local host and ${HOST_NAMES[i]}!" + return 1 + fi - if is_pdsh; then - IP_ADDR=`echo ${RET_STR} | awk '{print $2}'` - else - IP_ADDR=`echo ${RET_STR} | awk '{print $1}'` - fi - if [ -z "${IP_ADDR}" ]; then - echo >&2 "`basename $0`: remote_check() error:" \ - "$1 does not exist in the ${HOST_NAMES[i]}'s"\ - "/etc/hosts table!" - return 1 - fi + # Get the IP address of ${HOST_NAMES[i]} from its own ping + if is_pdsh; then + ip_addr=`echo "${ret_str}" | head -1 | awk '{print $4}'` + else + ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}'` + fi + ip_addr=`echo "${ip_addr}" | sed -e 's/^(//' -e 's/)$//'` + + # Compare IP addresses + # Check whether ${HOST_NAMES[i]} agrees with the local host + # about what its name is resolved to. + if [ "${ip_addr}" != "${HOST_IPADDRS[i]}" ]; then + echo >&2 "`basename $0`: remote_check() error:"\ + "Local host resolves ${HOST_NAMES[i]} to IP address"\ + "\"${HOST_IPADDRS[i]}\", while its own resolution is"\ + "\"${ip_addr}\". They are not the same!" + return 1 + fi - if [ "${IP_ADDR}" != "${HOST_IPADDRS[$2]}" ]; then - echo >&2 "`basename $0`: remote_check() error:" \ - "IP address ${IP_ADDR} of $1 in the" \ - "${HOST_NAMES[i]}'s /etc/hosts is incorrect!" - return 1 - fi - done - - return 0 -} - -# Verify forward and reverse network connectivity of the Lustre cluster -network_check() { - # Check argument - if [ $# -eq 0 ]; then - echo >&2 $"`basename $0`: network_check() error: Missing"\ - "argument for function network_check()!" - return 1 - fi - - declare -i i - local RET_STR COMMAND REAL_NAME - - # Execute remote command to check network connectivity - for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do - COMMAND=$"${REMOTE} ${HOST_NAMES[i]} hostname" - RET_STR=`${REMOTE} $1 ${COMMAND} 2>&1` - if [ $? -ne 0 -a -n "${RET_STR}" ]; then - echo >&2 "`basename $0`: network_check() error:" \ - "remote error: ${RET_STR}" - return 1 - fi - - if [ -z "${RET_STR}" ]; then - echo >&2 "`basename $0`: network_check() error:" \ - "No results from remote! Check the network" \ - "connectivity between \"$1\" and" \ - "\"${HOST_NAMES[i]}\"!" - return 1 - fi - - if is_pdsh; then - REAL_NAME=`echo ${RET_STR} | awk '{print $3}'` - else - REAL_NAME=`echo ${RET_STR} | awk '{print $1}'` - fi - if [ "${HOST_NAMES[i]}" != "${REAL_NAME}" ]; then - echo >&2 "`basename $0`: network_check() error:" \ - "${RET_STR}" - return 1 - fi - done - return 0 } -# Verify forward and reverse network connectivity of the Lustre cluster, -# and that hostnames match the names in the /etc/hosts tables. +# network_verify +# Verify name resolution and network connectivity of the Lustre cluster network_verify() { declare -i i @@ -242,39 +186,17 @@ network_verify() { unset HOST_IPADDRS # Get all the host names from the csv file - if ! get_hostnames; then - return 1 - fi - - # Check whether all the host names match the names in - # all the /etc/hosts tables of the Lustre cluster - for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do - verbose_output "Verifying IP address of host" \ - "\"${HOST_NAMES[i]}\" in the local /etc/hosts..." - if ! local_check ${HOST_NAMES[i]} $i; then - return 1 - fi - verbose_output "OK" - done + ! get_hostnames && return 1 + # Check the network connectivity between local host + # and other cluster nodes for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue - verbose_output "Verifying IP address of host" \ - "\"${HOST_NAMES[i]}\" in the remote /etc/hosts..." - if ! remote_check ${HOST_NAMES[i]} $i; then - return 1 - fi - verbose_output "OK" - done - # Verify network connectivity of the Lustre cluster - for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do - [ "${HOST_NAMES[i]}" = "`hostname`" ] && continue - verbose_output "Verifying network connectivity of host" \ - "\"${HOST_NAMES[i]}\" to other hosts..." - if ! network_check ${HOST_NAMES[i]}; then - return 1 - fi + verbose_output "Verifying network connectivity between"\ + "\"`hostname`\" and \"${HOST_NAMES[i]}\"..." + ! local_check $i && return 1 + ! remote_check $i && return 1 verbose_output "OK" done @@ -286,6 +208,7 @@ if ! check_file ${CSV_FILE}; then exit 1 fi +# Cluster network verification if ! network_verify; then exit 1 fi -- 1.8.3.1