Whamcloud - gitweb
Branch b1_4_mountconf
authornathan <nathan>
Thu, 23 Feb 2006 00:44:31 +0000 (00:44 +0000)
committernathan <nathan>
Thu, 23 Feb 2006 00:44:31 +0000 (00:44 +0000)
b=9858
-sanity test fixes
-brief nap before updating to new configs

13 files changed:
lustre/mgc/mgc_request.c
lustre/tests/conf-sanity.sh
lustre/tests/llmount.sh
lustre/tests/oos.sh
lustre/tests/sanity-quota.sh
lustre/tests/sanity.sh
lustre/tests/test-framework.sh
lustre/utils/cluster_scripts/1uml.csv [new file with mode: 0644]
lustre/utils/cluster_scripts/cluster_config.sh
lustre/utils/cluster_scripts/gen_clumanager_config.sh
lustre/utils/cluster_scripts/gen_hb_config.sh
lustre/utils/cluster_scripts/verify_cluster_net.sh
lustre/utils/cluster_scripts/verify_serviceIP.sh

index 8cd9bab..9cdc305 100644 (file)
@@ -346,6 +346,8 @@ static int mgc_process_log(struct obd_device *mgc,
 /* reenqueue the lock, reparse the log */
 static int mgc_async_requeue(void *data)
 {
+        wait_queue_head_t   waitq;
+        struct l_wait_info  lwi;
         struct config_llog_data *cld = (struct config_llog_data *)data;
         unsigned long flags;
         int rc;
@@ -368,21 +370,26 @@ static int mgc_async_requeue(void *data)
                cld->cld_resid.name[0], cld->cld_logname, 
                cld->cld_cfg.cfg_instance);
         
+        /* Sleep a few seconds to allow the server who caused
+           the lock revocation to finish its setup, plus some random
+           so everyone doesn't try to reconnect at once. */
+        init_waitqueue_head(&waitq);
+        lwi = LWI_TIMEOUT(3 * HZ + (ll_rand() & 0x7f), NULL, NULL);
+        l_wait_event(waitq, 0, &lwi);
+
         LASSERT(the_mgc);
+
         class_export_get(the_mgc->obd_self_export);
-        /* FIXME sleep a few seconds here to allow the server who caused
-           the lock revocation to finish its setup */
-        
 #if 0
         /* Re-send server info every time, in case MGS needs to regen its
            logs (for write_conf).  Do we need this?  It's extra RPCs for
-           every server at every update. */
+           every server at every update.  Turning it off until I'm sure
+           it's needed. */
         server_register_target(cld->cld_cfg.cfg_sb);
 #endif 
-       
         rc = mgc_process_log(the_mgc, cld);
-
         class_export_put(the_mgc->obd_self_export);
+
         RETURN(rc);
 }
 
index 3815714..fdd3afb 100644 (file)
@@ -20,6 +20,7 @@ PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
 LUSTRE=${LUSTRE:-`dirname $0`/..}
 RLUSTRE=${RLUSTRE:-$LUSTRE}
 MOUNTLUSTRE=${MOUNTLUSTRE:-/sbin/mount.lustre}
+MKFSLUSTRE=${MKFSLUSTRE:-/usr/sbin/mkfs.lustre}
 HOSTNAME=`hostname`
 
 . $LUSTRE/tests/test-framework.sh
index ca26b2a..8a47ea6 100755 (executable)
@@ -29,13 +29,16 @@ fi
 [ "$DEBUG" ] && debug_opt="--ptldebug=$DEBUG"
 [ "$PTLDEBUG" ] && debug_opt="--ptldebug=$PTLDEBUG"
 
-${LCONF} $NOMOD $portals_opt $lustre_opt $debug_opt $node_opt ${REFORMAT:---reformat} $@ \
-       $conf_opt  || {
+echo FIXME use the utils/cluster_scripts/cluster_config.sh to parse config csv files.
+
+exit 1
+
+#${LCONF} $NOMOD $portals_opt $lustre_opt $debug_opt $node_opt ${REFORMAT:---reformat} $@ $conf_opt  || {
     # maybe acceptor error, dump tcp port usage
-    netstat -tpn
-    exit 2
-}
+#    netstat -tpn
+#    exit 2
+#}
 
-if [ "$MOUNT2" ]; then
-       $LLMOUNT -v -o user_xattr,acl `hostname`:/mds1/client $MOUNT2 || exit 3
-fi
+#if [ "$MOUNT2" ]; then
+#      $LLMOUNT -v -o user_xattr,acl `hostname`:/mds1/client $MOUNT2 || exit 3
+#fi
index 0d12568..65dd8be 100755 (executable)
@@ -52,7 +52,7 @@ fi
 # flush cache to OST(s) so avail numbers are correct
 sync; sleep 1 ; sync
 
-for OSC in /proc/fs/lustre/osc/OSC*MNT*; do
+for OSC in /proc/fs/lustre/osc/*-osc-*; do
        AVAIL=`cat $OSC/kbytesavail`
        GRANT=`cat $OSC/cur_grant_bytes`
        [ $(($AVAIL - $GRANT / 1024)) -lt 400 ] && OSCFULL=full
@@ -60,7 +60,7 @@ done
 
 if [ -z "$OSCFULL" ]; then
        echo "no OSTs are close to full"
-       grep "[0-9]" /proc/fs/lustre/osc/OSC*MNT*/{kbytesavail,cur*}
+       grep "[0-9]" /proc/fs/lustre/osc/*-osc-*/{kbytesavail,cur*}
        SUCCESS=0
 fi
 
index 8c1e164..01f2869 100644 (file)
@@ -128,7 +128,7 @@ pass() {
 }
 
 mounted_lustre_filesystems() {
-       awk '($3 ~ "lustre") { print $2 }' /proc/mounts
+       awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts
 }
 MOUNT="`mounted_lustre_filesystems`"
 if [ -z "$MOUNT" ]; then
index 812fcfc..b413655 100644 (file)
@@ -188,7 +188,7 @@ pass() {
 }
 
 mounted_lustre_filesystems() {
-       awk '($3 ~ "lustre") { print $2 }' /proc/mounts
+       awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts
 }
 MOUNT="`mounted_lustre_filesystems`"
 if [ -z "$MOUNT" ]; then
@@ -2357,7 +2357,7 @@ run_test 63b "async write errors should be returned to fsync ==="
 
 test_64a () {
        df $DIR
-       grep "[0-9]" $LPROC/osc/*-osc*/cur*
+       grep "[0-9]" $LPROC/osc/*-osc-*/cur*
 }
 run_test 64a "verify filter grant calculations (in kernel) ====="
 
index 1f520d1..dd63d65 100644 (file)
@@ -36,9 +36,6 @@ init_test_env() {
     export TMP=${TMP:-$ROOT/tmp}
 
     export PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests
-    export LLMOUNT=${LLMOUNT:-"llmount"}
-    export LCONF=${LCONF:-"lconf"}
-    export LMC=${LMC:-"lmc"}
     export LCTL=${LCTL:-"$LUSTRE/utils/lctl"}
     export CHECKSTAT="${CHECKSTAT:-checkstat} "
     export FSYTPE=${FSTYPE:-"ext3"}
@@ -109,13 +106,9 @@ zconf_mount() {
 
     if [ -x /sbin/mount.lustre ] ; then
        do_node $client mount -t lustre $OPTIONS \
-               `facet_nid mds`:/mds_svc/client_facet $mnt || return 1
+               `facet_nid mgs`:/lustre-client $mnt || return 1
     else
-       # this is so cheating
-       do_node $client $LCONF --nosetup --node client_facet $XMLCONFIG > \
-               /dev/null || return 2
-       do_node $client $LLMOUNT $OPTIONS \
-               `facet_nid mds`:/mds_svc/client_facet $mnt || return 4
+       return 4
     fi
 
     [ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname`
@@ -348,22 +341,11 @@ do_facet() {
     do_node $HOST $@
 }
 
-add_facet() {
-    local facet=$1
-    shift
-    echo "add facet $facet: `facet_host $facet`"
-    do_lmc --add node --node ${facet}_facet $@ --timeout $TIMEOUT \
-        --lustre_upcall $UPCALL --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM
-    do_lmc --add net --node ${facet}_facet --nid `facet_nid $facet` \
-        --nettype lnet $PORT_OPT
-}
-
 add_mds() {
     local MOUNT_OPTS
     local facet=$1
     shift
     rm -f ${facet}active
-    add_facet $facet
     [ "x$MDSOPT" != "x" ] && MOUNT_OPTS="--mountfsoptions $MDSOPT"
     do_lmc --add mds --node ${facet}_facet --mds ${facet}_svc \
        --fstype $FSTYPE $* $MOUNT_OPTS
diff --git a/lustre/utils/cluster_scripts/1uml.csv b/lustre/utils/cluster_scripts/1uml.csv
new file mode 100644 (file)
index 0000000..d6f23a4
--- /dev/null
@@ -0,0 +1,5 @@
+# combo mdt/mgs
+uml1,options lnet networks=tcp,/r/tmp/mdt,mdt|mgs,,,,--device-size=10240
+# ost0
+uml1,options lnet networks=tcp,/r/tmp/ost0,ost,,"uml1@tcp0",,--device-size=10240
+
index 8c09030..cebb95d 100755 (executable)
@@ -132,6 +132,8 @@ EOF
 }
 
 # Global variables
+PDSH=${PDSH:-"pdsh -R ssh"}
+export PDSH
 # Some scripts to be called
 SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"./"}
 MODULE_CONFIG=${SCRIPTS_PATH}$"module_config.sh"
@@ -309,7 +311,7 @@ check_element() {
 
         # Check mgmtnid
         if [ "${DEVICE_TYPE}" = "ost" ]&&[ -z "${MGMT_NID}" ]; then
-                echo >&2 $"`basename $0`: check_element() error: OST's mgmtnid"\
+                echo >&2 $"`basename $0`: check_element() error: OST's mgsnid"\
                          "element has null value!"
                 return 1
         fi
@@ -632,19 +634,20 @@ mass_config() {
                fi
 
                # Execute pdsh command to add lnet options lines to modprobe.conf/modules.conf
-               verbose_output "Adding module options to ${HOST_NAME}..."
                COMMAND=$"echo \"${NETWORKS}\"|${MODULE_CONFIG}"
-               pdsh -w ${HOST_NAME} ${COMMAND} >&2 &
+               verbose_output "Adding module options to ${HOST_NAME}"
+               verbose_output ${COMMAND}
+               ${PDSH} -w ${HOST_NAME} ${COMMAND} >&2 &
                PDSH_PID[${pid_num}]=$!
-               PDSH_CMD[${pid_num}]="pdsh -w ${HOST_NAME} ${COMMAND}"
+               PDSH_CMD[${pid_num}]="${PDSH} -w ${HOST_NAME} ${COMMAND}"
                pid_num=${pid_num}+1
 
                # Execute pdsh command to format Lustre target
                verbose_output "Formatting Lustre target on ${HOST_NAME}..."
                verbose_output "Format command line is: ${MKFS_CMD}"
-               pdsh -w ${HOST_NAME} ${MKFS_CMD} >&2 &  
+               ${PDSH} -w ${HOST_NAME} ${MKFS_CMD} >&2 &  
                PDSH_PID[${pid_num}]=$!
-               PDSH_CMD[${pid_num}]="pdsh -w ${HOST_NAME} ${MKFS_CMD}"
+               PDSH_CMD[${pid_num}]="${PDSH} -w ${HOST_NAME} ${MKFS_CMD}"
                pid_num=${pid_num}+1
 
                line_num=${line_num}+1
index 3733afe..9a6938b 100755 (executable)
@@ -192,7 +192,7 @@ stop_clumanager() {
                nodename_str=${nodename_str}$","${NODE_NAMES[idx]}
        done
 
-       pdsh -w ${nodename_str} /sbin/service clumanager stop
+       ${PDSH} -w ${nodename_str} /sbin/service clumanager stop
        if [ $? -ne 0 ]; then
                echo >&2 "`basename $0`: stop_clumanager() error:"\
                         "Fail to execute pdsh command!"
index 0177e14..de78ef4 100755 (executable)
@@ -203,7 +203,7 @@ stop_heartbeat() {
                nodename_str=${nodename_str}$","${NODE_NAMES[idx]}
        done
 
-       pdsh -w ${nodename_str} /sbin/service heartbeat stop
+       ${PDSH} -w ${nodename_str} /sbin/service heartbeat stop
        if [ $? -ne 0 ]; then
                echo >&2 "`basename $0`: stop_heartbeat() error:"\
                         "Fail to execute pdsh command!"
index aa440c8..f5f59c4 100755 (executable)
@@ -123,7 +123,7 @@ local_check() {
        fi
 
        # Execute pdsh command to get the real host name
-       RET_STR=`pdsh -w ${HOST_IPADDRS[$2]} hostname 2>&1`
+       RET_STR=`${PDSH} -w ${HOST_IPADDRS[$2]} hostname 2>&1`
        if [ $? -ne 0 ] || [ "${RET_STR}" != "${RET_STR#*connect:*}" ]; then
                echo >&2 "`basename $0`: local_check() error: pdsh error:" \
                         "${RET_STR}"
@@ -166,7 +166,7 @@ remote_check() {
 
        # Execute pdsh command to check remote /etc/hosts tables
        for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
-               RET_STR=`pdsh -w ${HOST_NAMES[i]} ${COMMAND} 2>&1`
+               RET_STR=`${PDSH} -w ${HOST_NAMES[i]} ${COMMAND} 2>&1`
                if [ $? -ne 0 ] || [ "${RET_STR}" != "${RET_STR#*connect:*}" ]
                then
                        echo >&2 "`basename $0`: remote_check() error:" \
@@ -208,8 +208,8 @@ network_check () {
 
        # Execute pdsh command to check network connectivity
        for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
-               COMMAND=$"pdsh -w ${HOST_NAMES[i]} hostname"
-               RET_STR=`pdsh -w $1 ${COMMAND} 2>&1`
+               COMMAND=$"${PDSH} -w ${HOST_NAMES[i]} hostname"
+               RET_STR=`${PDSH} -w $1 ${COMMAND} 2>&1`
                if [ $? -ne 0 ] || [ "${RET_STR}" != "${RET_STR#*connect:*}" ]
                then
                        echo >&2 "`basename $0`: network_check() error:" \
index 794f153..cdc749d 100755 (executable)
@@ -130,7 +130,7 @@ findInterface() {
                        done
                done
        done
-       } < <(pdsh -w $hostname /sbin/ifconfig)
+       } < <(${PDSH} -w $hostname /sbin/ifconfig)
 
        echo >&2 "`basename $0`: Cannot find the interface in which" \
                  "$target is configured in the host $hostname!"
@@ -162,7 +162,7 @@ findNetmask() {
                        esac
                        shift
                done
-       done < <(pdsh -w $hostname /sbin/ifconfig $target)
+       done < <(${PDSH} -w $hostname /sbin/ifconfig $target)
 
        echo >&2 "`basename $0`: Cannot find the netmask associated with" \
                  "the interface $target in the host $hostname!"