From f7d30d102f85b0923fc4c7f363f83d8291d93585 Mon Sep 17 00:00:00 2001 From: nathan Date: Thu, 23 Feb 2006 00:44:31 +0000 Subject: [PATCH] Branch b1_4_mountconf b=9858 -sanity test fixes -brief nap before updating to new configs --- lustre/mgc/mgc_request.c | 19 +++++++++++++------ lustre/tests/conf-sanity.sh | 1 + lustre/tests/llmount.sh | 19 +++++++++++-------- lustre/tests/oos.sh | 4 ++-- lustre/tests/sanity-quota.sh | 2 +- lustre/tests/sanity.sh | 4 ++-- lustre/tests/test-framework.sh | 22 ++-------------------- lustre/utils/cluster_scripts/1uml.csv | 5 +++++ lustre/utils/cluster_scripts/cluster_config.sh | 15 +++++++++------ .../utils/cluster_scripts/gen_clumanager_config.sh | 2 +- lustre/utils/cluster_scripts/gen_hb_config.sh | 2 +- lustre/utils/cluster_scripts/verify_cluster_net.sh | 8 ++++---- lustre/utils/cluster_scripts/verify_serviceIP.sh | 4 ++-- 13 files changed, 54 insertions(+), 53 deletions(-) create mode 100644 lustre/utils/cluster_scripts/1uml.csv diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 8cd9bab..9cdc305 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -346,6 +346,8 @@ static int mgc_process_log(struct obd_device *mgc, /* reenqueue the lock, reparse the log */ static int mgc_async_requeue(void *data) { + wait_queue_head_t waitq; + struct l_wait_info lwi; struct config_llog_data *cld = (struct config_llog_data *)data; unsigned long flags; int rc; @@ -368,21 +370,26 @@ static int mgc_async_requeue(void *data) cld->cld_resid.name[0], cld->cld_logname, cld->cld_cfg.cfg_instance); + /* Sleep a few seconds to allow the server who caused + the lock revocation to finish its setup, plus some random + so everyone doesn't try to reconnect at once. */ + init_waitqueue_head(&waitq); + lwi = LWI_TIMEOUT(3 * HZ + (ll_rand() & 0x7f), NULL, NULL); + l_wait_event(waitq, 0, &lwi); + LASSERT(the_mgc); + class_export_get(the_mgc->obd_self_export); - /* FIXME sleep a few seconds here to allow the server who caused - the lock revocation to finish its setup */ - #if 0 /* Re-send server info every time, in case MGS needs to regen its logs (for write_conf). Do we need this? It's extra RPCs for - every server at every update. */ + every server at every update. Turning it off until I'm sure + it's needed. */ server_register_target(cld->cld_cfg.cfg_sb); #endif - rc = mgc_process_log(the_mgc, cld); - class_export_put(the_mgc->obd_self_export); + RETURN(rc); } diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 3815714..fdd3afb 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -20,6 +20,7 @@ PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH LUSTRE=${LUSTRE:-`dirname $0`/..} RLUSTRE=${RLUSTRE:-$LUSTRE} MOUNTLUSTRE=${MOUNTLUSTRE:-/sbin/mount.lustre} +MKFSLUSTRE=${MKFSLUSTRE:-/usr/sbin/mkfs.lustre} HOSTNAME=`hostname` . $LUSTRE/tests/test-framework.sh diff --git a/lustre/tests/llmount.sh b/lustre/tests/llmount.sh index ca26b2a..8a47ea6 100755 --- a/lustre/tests/llmount.sh +++ b/lustre/tests/llmount.sh @@ -29,13 +29,16 @@ fi [ "$DEBUG" ] && debug_opt="--ptldebug=$DEBUG" [ "$PTLDEBUG" ] && debug_opt="--ptldebug=$PTLDEBUG" -${LCONF} $NOMOD $portals_opt $lustre_opt $debug_opt $node_opt ${REFORMAT:---reformat} $@ \ - $conf_opt || { +echo FIXME use the utils/cluster_scripts/cluster_config.sh to parse config csv files. + +exit 1 + +#${LCONF} $NOMOD $portals_opt $lustre_opt $debug_opt $node_opt ${REFORMAT:---reformat} $@ $conf_opt || { # maybe acceptor error, dump tcp port usage - netstat -tpn - exit 2 -} +# netstat -tpn +# exit 2 +#} -if [ "$MOUNT2" ]; then - $LLMOUNT -v -o user_xattr,acl `hostname`:/mds1/client $MOUNT2 || exit 3 -fi +#if [ "$MOUNT2" ]; then +# $LLMOUNT -v -o user_xattr,acl `hostname`:/mds1/client $MOUNT2 || exit 3 +#fi diff --git a/lustre/tests/oos.sh b/lustre/tests/oos.sh index 0d12568..65dd8be 100755 --- a/lustre/tests/oos.sh +++ b/lustre/tests/oos.sh @@ -52,7 +52,7 @@ fi # flush cache to OST(s) so avail numbers are correct sync; sleep 1 ; sync -for OSC in /proc/fs/lustre/osc/OSC*MNT*; do +for OSC in /proc/fs/lustre/osc/*-osc-*; do AVAIL=`cat $OSC/kbytesavail` GRANT=`cat $OSC/cur_grant_bytes` [ $(($AVAIL - $GRANT / 1024)) -lt 400 ] && OSCFULL=full @@ -60,7 +60,7 @@ done if [ -z "$OSCFULL" ]; then echo "no OSTs are close to full" - grep "[0-9]" /proc/fs/lustre/osc/OSC*MNT*/{kbytesavail,cur*} + grep "[0-9]" /proc/fs/lustre/osc/*-osc-*/{kbytesavail,cur*} SUCCESS=0 fi diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index 8c1e164..01f2869 100644 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -128,7 +128,7 @@ pass() { } mounted_lustre_filesystems() { - awk '($3 ~ "lustre") { print $2 }' /proc/mounts + awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts } MOUNT="`mounted_lustre_filesystems`" if [ -z "$MOUNT" ]; then diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 812fcfc..b413655 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -188,7 +188,7 @@ pass() { } mounted_lustre_filesystems() { - awk '($3 ~ "lustre") { print $2 }' /proc/mounts + awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts } MOUNT="`mounted_lustre_filesystems`" if [ -z "$MOUNT" ]; then @@ -2357,7 +2357,7 @@ run_test 63b "async write errors should be returned to fsync ===" test_64a () { df $DIR - grep "[0-9]" $LPROC/osc/*-osc*/cur* + grep "[0-9]" $LPROC/osc/*-osc-*/cur* } run_test 64a "verify filter grant calculations (in kernel) =====" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 1f520d1..dd63d65 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -36,9 +36,6 @@ init_test_env() { export TMP=${TMP:-$ROOT/tmp} export PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests - export LLMOUNT=${LLMOUNT:-"llmount"} - export LCONF=${LCONF:-"lconf"} - export LMC=${LMC:-"lmc"} export LCTL=${LCTL:-"$LUSTRE/utils/lctl"} export CHECKSTAT="${CHECKSTAT:-checkstat} " export FSYTPE=${FSTYPE:-"ext3"} @@ -109,13 +106,9 @@ zconf_mount() { if [ -x /sbin/mount.lustre ] ; then do_node $client mount -t lustre $OPTIONS \ - `facet_nid mds`:/mds_svc/client_facet $mnt || return 1 + `facet_nid mgs`:/lustre-client $mnt || return 1 else - # this is so cheating - do_node $client $LCONF --nosetup --node client_facet $XMLCONFIG > \ - /dev/null || return 2 - do_node $client $LLMOUNT $OPTIONS \ - `facet_nid mds`:/mds_svc/client_facet $mnt || return 4 + return 4 fi [ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname` @@ -348,22 +341,11 @@ do_facet() { do_node $HOST $@ } -add_facet() { - local facet=$1 - shift - echo "add facet $facet: `facet_host $facet`" - do_lmc --add node --node ${facet}_facet $@ --timeout $TIMEOUT \ - --lustre_upcall $UPCALL --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM - do_lmc --add net --node ${facet}_facet --nid `facet_nid $facet` \ - --nettype lnet $PORT_OPT -} - add_mds() { local MOUNT_OPTS local facet=$1 shift rm -f ${facet}active - add_facet $facet [ "x$MDSOPT" != "x" ] && MOUNT_OPTS="--mountfsoptions $MDSOPT" do_lmc --add mds --node ${facet}_facet --mds ${facet}_svc \ --fstype $FSTYPE $* $MOUNT_OPTS diff --git a/lustre/utils/cluster_scripts/1uml.csv b/lustre/utils/cluster_scripts/1uml.csv new file mode 100644 index 0000000..d6f23a4 --- /dev/null +++ b/lustre/utils/cluster_scripts/1uml.csv @@ -0,0 +1,5 @@ +# combo mdt/mgs +uml1,options lnet networks=tcp,/r/tmp/mdt,mdt|mgs,,,,--device-size=10240 +# ost0 +uml1,options lnet networks=tcp,/r/tmp/ost0,ost,,"uml1@tcp0",,--device-size=10240 + diff --git a/lustre/utils/cluster_scripts/cluster_config.sh b/lustre/utils/cluster_scripts/cluster_config.sh index 8c09030..cebb95d 100755 --- a/lustre/utils/cluster_scripts/cluster_config.sh +++ b/lustre/utils/cluster_scripts/cluster_config.sh @@ -132,6 +132,8 @@ EOF } # Global variables +PDSH=${PDSH:-"pdsh -R ssh"} +export PDSH # Some scripts to be called SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"./"} MODULE_CONFIG=${SCRIPTS_PATH}$"module_config.sh" @@ -309,7 +311,7 @@ check_element() { # Check mgmtnid if [ "${DEVICE_TYPE}" = "ost" ]&&[ -z "${MGMT_NID}" ]; then - echo >&2 $"`basename $0`: check_element() error: OST's mgmtnid"\ + echo >&2 $"`basename $0`: check_element() error: OST's mgsnid"\ "element has null value!" return 1 fi @@ -632,19 +634,20 @@ mass_config() { fi # Execute pdsh command to add lnet options lines to modprobe.conf/modules.conf - verbose_output "Adding module options to ${HOST_NAME}..." COMMAND=$"echo \"${NETWORKS}\"|${MODULE_CONFIG}" - pdsh -w ${HOST_NAME} ${COMMAND} >&2 & + verbose_output "Adding module options to ${HOST_NAME}" + verbose_output ${COMMAND} + ${PDSH} -w ${HOST_NAME} ${COMMAND} >&2 & PDSH_PID[${pid_num}]=$! - PDSH_CMD[${pid_num}]="pdsh -w ${HOST_NAME} ${COMMAND}" + PDSH_CMD[${pid_num}]="${PDSH} -w ${HOST_NAME} ${COMMAND}" pid_num=${pid_num}+1 # Execute pdsh command to format Lustre target verbose_output "Formatting Lustre target on ${HOST_NAME}..." verbose_output "Format command line is: ${MKFS_CMD}" - pdsh -w ${HOST_NAME} ${MKFS_CMD} >&2 & + ${PDSH} -w ${HOST_NAME} ${MKFS_CMD} >&2 & PDSH_PID[${pid_num}]=$! - PDSH_CMD[${pid_num}]="pdsh -w ${HOST_NAME} ${MKFS_CMD}" + PDSH_CMD[${pid_num}]="${PDSH} -w ${HOST_NAME} ${MKFS_CMD}" pid_num=${pid_num}+1 line_num=${line_num}+1 diff --git a/lustre/utils/cluster_scripts/gen_clumanager_config.sh b/lustre/utils/cluster_scripts/gen_clumanager_config.sh index 3733afe..9a6938b 100755 --- a/lustre/utils/cluster_scripts/gen_clumanager_config.sh +++ b/lustre/utils/cluster_scripts/gen_clumanager_config.sh @@ -192,7 +192,7 @@ stop_clumanager() { nodename_str=${nodename_str}$","${NODE_NAMES[idx]} done - pdsh -w ${nodename_str} /sbin/service clumanager stop + ${PDSH} -w ${nodename_str} /sbin/service clumanager stop if [ $? -ne 0 ]; then echo >&2 "`basename $0`: stop_clumanager() error:"\ "Fail to execute pdsh command!" diff --git a/lustre/utils/cluster_scripts/gen_hb_config.sh b/lustre/utils/cluster_scripts/gen_hb_config.sh index 0177e14..de78ef4 100755 --- a/lustre/utils/cluster_scripts/gen_hb_config.sh +++ b/lustre/utils/cluster_scripts/gen_hb_config.sh @@ -203,7 +203,7 @@ stop_heartbeat() { nodename_str=${nodename_str}$","${NODE_NAMES[idx]} done - pdsh -w ${nodename_str} /sbin/service heartbeat stop + ${PDSH} -w ${nodename_str} /sbin/service heartbeat stop if [ $? -ne 0 ]; then echo >&2 "`basename $0`: stop_heartbeat() error:"\ "Fail to execute pdsh command!" diff --git a/lustre/utils/cluster_scripts/verify_cluster_net.sh b/lustre/utils/cluster_scripts/verify_cluster_net.sh index aa440c8..f5f59c4 100755 --- a/lustre/utils/cluster_scripts/verify_cluster_net.sh +++ b/lustre/utils/cluster_scripts/verify_cluster_net.sh @@ -123,7 +123,7 @@ local_check() { fi # Execute pdsh command to get the real host name - RET_STR=`pdsh -w ${HOST_IPADDRS[$2]} hostname 2>&1` + RET_STR=`${PDSH} -w ${HOST_IPADDRS[$2]} hostname 2>&1` if [ $? -ne 0 ] || [ "${RET_STR}" != "${RET_STR#*connect:*}" ]; then echo >&2 "`basename $0`: local_check() error: pdsh error:" \ "${RET_STR}" @@ -166,7 +166,7 @@ remote_check() { # Execute pdsh command to check remote /etc/hosts tables for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do - RET_STR=`pdsh -w ${HOST_NAMES[i]} ${COMMAND} 2>&1` + RET_STR=`${PDSH} -w ${HOST_NAMES[i]} ${COMMAND} 2>&1` if [ $? -ne 0 ] || [ "${RET_STR}" != "${RET_STR#*connect:*}" ] then echo >&2 "`basename $0`: remote_check() error:" \ @@ -208,8 +208,8 @@ network_check () { # Execute pdsh command to check network connectivity for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do - COMMAND=$"pdsh -w ${HOST_NAMES[i]} hostname" - RET_STR=`pdsh -w $1 ${COMMAND} 2>&1` + COMMAND=$"${PDSH} -w ${HOST_NAMES[i]} hostname" + RET_STR=`${PDSH} -w $1 ${COMMAND} 2>&1` if [ $? -ne 0 ] || [ "${RET_STR}" != "${RET_STR#*connect:*}" ] then echo >&2 "`basename $0`: network_check() error:" \ diff --git a/lustre/utils/cluster_scripts/verify_serviceIP.sh b/lustre/utils/cluster_scripts/verify_serviceIP.sh index 794f153..cdc749d 100755 --- a/lustre/utils/cluster_scripts/verify_serviceIP.sh +++ b/lustre/utils/cluster_scripts/verify_serviceIP.sh @@ -130,7 +130,7 @@ findInterface() { done done done - } < <(pdsh -w $hostname /sbin/ifconfig) + } < <(${PDSH} -w $hostname /sbin/ifconfig) echo >&2 "`basename $0`: Cannot find the interface in which" \ "$target is configured in the host $hostname!" @@ -162,7 +162,7 @@ findNetmask() { esac shift done - done < <(pdsh -w $hostname /sbin/ifconfig $target) + done < <(${PDSH} -w $hostname /sbin/ifconfig $target) echo >&2 "`basename $0`: Cannot find the netmask associated with" \ "the interface $target in the host $hostname!" -- 1.8.3.1