From 53c6e13fb7112aada26e9644b0e39ad9742342b3 Mon Sep 17 00:00:00 2001 From: yujian Date: Tue, 16 Sep 2008 08:08:41 +0000 Subject: [PATCH] Branch b1_8_gate b=14095 i=nathan.rutman i=brian 1) add lustre_start utility to start or stop multiple Lustre servers from a CSV file 2) fix the utility path issue in bug 14094 3) use 'error_output()' to format error messages 4) move common variables and functions to lc_common 5) fix the "lustre_config -h" issue in bug 14133 6) fix the module_opts issue in bug 14092 7) remove the duplicates from checking lnet connectivity to MGS node --- lustre/scripts/lustre_start.in | 372 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100644 lustre/scripts/lustre_start.in diff --git a/lustre/scripts/lustre_start.in b/lustre/scripts/lustre_start.in new file mode 100644 index 0000000..7493bb8 --- /dev/null +++ b/lustre/scripts/lustre_start.in @@ -0,0 +1,372 @@ +#!/bin/bash + +# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: + +# +# lustre_start - start or stop multiple Lustre servers from a CSV file +# +# This script is used to parse each line of a CSV (Comma-Separated Value) file +# and execute remote command to start/stop the service on every Lustre server +# target that will be part of the Lustre cluster. +# +################################################################################ + +# Usage +usage() { + cat < + + This script is used to start or stop multiple Lustre servers from a + CSV file. + + Options: + -a select all the nodes from the CSV file to operate on + -w hostname,hostname,... + select the specified list of nodes (separated by commas) to + operate on rather than all the nodes in the CSV file + -x hostname,hostname,... + exclude the specified list of nodes (separated by commas) + -n no net - don't verify network connectivity and hostnames + in the cluster + -m pass "mount options" item in the CSV file to mount command line + -k stop the services on Lustre server targets + -v verbose mode + -h help + CSV file a comma-separated value file that contains configuration + parameters for each target in a Lustre cluster + + Please refer to "lustre_config -h" for the description of CSV file formats. + +EOF +} + +# Get the library of functions +. @scriptlibdir@/lc_common + +SPECIFY_MNTOPTS=false +STOP_SERVICE=false +# Get and check the positional parameters +while getopts "aw:x:nmkhv" OPTION; do + case $OPTION in + a) + [ -z "$SPECIFIED_NODELIST" ] && [ -z "$EXCLUDED_NODELIST" ] \ + && USE_ALLNODES=true + NODELIST_OPT="$NODELIST_OPT -a" + ;; + w) + USE_ALLNODES=false + SPECIFIED_NODELIST=$OPTARG + NODELIST_OPT="$NODELIST_OPT -w $SPECIFIED_NODELIST" + ;; + x) + USE_ALLNODES=false + EXCLUDED_NODELIST=$OPTARG + NODELIST_OPT="$NODELIST_OPT -x $EXCLUDED_NODELIST" + ;; + n) + VERIFY_CONNECT=false + ;; + m) + SPECIFY_MNTOPTS=true + ;; + k) + STOP_SERVICE=true + ;; + h) + usage + exit 0 + ;; + v) + VERBOSE_OPT="-v" + VERBOSE_OUTPUT=true + ;; + ?) + usage 1>&2 + exit 1 + ;; + esac +done + +# Toss out the parameters we've already processed +shift $((OPTIND - 1)) + +# Here we expect the CSV file +if [ $# -eq 0 ]; then + error_output "Missing CSV file!" + usage 1>&2 + exit 1 +fi + +CSV_FILE=$1 + +# get_fstab_mntopts host_name device_name mount_point +# Get the mount options from the /etc/fstab file +get_fstab_mntopts() { + local host_name=$1 + local dev_name=$2 + local mnt_pnt=$3 + + local mnt_opts="" + + if [ -z "$host_name" -o -z "$dev_name" -o -z "$mnt_pnt" ]; then + echo "get_fstab_mntopts(): Missing argument!" + return 1 + fi + + # Execute remote command to get the mount options from /etc/fstab file + mnt_opts=$($REMOTE $host_name "grep -w ^$dev_name /etc/fstab | \ +grep -w $mnt_pnt | awk '{print \$4}'" 2>/dev/null) + + mnt_opts=${mnt_opts//$host_name: /} + + echo $mnt_opts + return 0 +} + +# Start the service on one Lustre server target +start_service() { + declare -i i=$1 + shift + local extra_mntopts="$*" + local mntopts="" + + # Get mount options + if $SPECIFY_MNTOPTS; then + # Use the "mount options" item from the CSV file + [ -n "${MOUNT_OPTIONS[i]}" ] && mntopts=${MOUNT_OPTIONS[i]} + else + # Do not use the "mount options" item from the CSV file + mntopts=$(get_fstab_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]} \ + ${MOUNT_POINT[i]}) + [ ${PIPESTATUS[0]} -ne 0 ] && error_output "$mntopts" && return 1 + fi + + [ -n "$mntopts" ] && mntopts="-o $mntopts" + [ -n "$extra_mntopts" ] && mntopts="$mntopts $extra_mntopts" + # Strip of any leading space + mntopts=${mntopts# } + + # Execute remote command to start the service + verbose_output "Mounting Lustre ${DEVICE_TYPE[i]} target"\ + "${DEVICE_NAME[i]} (opts: $mntopts) on ${HOST_NAME[i]}:${MOUNT_POINT[i]}..." + $REMOTE ${HOST_NAME[i]} "PATH=\$PATH:/sbin:/usr/sbin +error() { set +x; echo \"ERROR: \$2: \$1\"; echo \"XXRETCODE:\$1\"; exit \$1; } +mkdir -p ${MOUNT_POINT[i]} || \\ + error \${PIPESTATUS[0]} \"failed to mkdir ${MOUNT_POINT[i]}\" +mount -t $FS_TYPE $mntopts ${DEVICE_NAME[i]} ${MOUNT_POINT[i]} || \\ + error \${PIPESTATUS[0]} \\ + \"failed to mount ${DEVICE_NAME[i]} on host ${HOST_NAME[i]}\"" + return ${PIPESTATUS[0]} +} + +# Stop the service on one Lustre server target +stop_service() { + declare -i i=$1 + + # Execute remote command to stop the service + verbose_output "Unmounting Lustre ${DEVICE_TYPE[i]} target"\ + "${DEVICE_NAME[i]} on ${HOST_NAME[i]}:${MOUNT_POINT[i]}..." + $REMOTE ${HOST_NAME[i]} "PATH=\$PATH:/sbin:/usr/sbin +error() { set +x; echo \"ERROR: \$2: \$1\"; echo \"XXRETCODE:\$1\"; exit \$1; } +if grep -q \" ${MOUNT_POINT[i]} \" /proc/mounts; then + umount -d -f ${MOUNT_POINT[i]} || \\ + error \${PIPESTATUS[0]} \\ + \"failed to unmount ${DEVICE_NAME[i]} on host ${HOST_NAME[i]}\" +else + echo \"${DEVICE_NAME[i]} was not mounted on\"\\ + \"${HOST_NAME[i]}:${MOUNT_POINT[i]}\" +fi" + return ${PIPESTATUS[0]} +} + +# mass_op op_type target_type +# Start/stop the services on Lustre server targets in parallel +mass_op() { + local op_type=$1 + local target_type=$2 + + local op_func + declare -i i + declare -i pid_num=0 + declare -a REMOTE_PID + local RC=0 + + if [ -z "$op_type" -o -z "$target_type" ]; then + error_output "mass_op(): Missing argument!" + return 1 + fi + + case "$op_type" in + "start") op_func=start_service;; + "stop") op_func=stop_service;; + *) error_output "mass_op(): Invalid op type \"$op_type\"!" && return 1;; + esac + + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + if [ "${DEVICE_TYPE[i]}" = "$target_type" ] \ + && [[ "${FORMAT_OPTIONS[i]}" != *noformat* ]]; then + eval "$op_func $i &" + REMOTE_PID[$pid_num]=$! + let pid_num=$pid_num+1 + fi + done + + for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do + wait ${REMOTE_PID[${pid_num}]} + local RC1=${PIPESTATUS[0]} + [ $RC1 -ne 0 ] && RC=$RC1 + done + + [ $RC -ne 0 ] && return $RC + + return 0 +} + +# Unload the modules to make cleanup +unload_modules() { + local command + local host + local host_list + + host_list=$(comma_list "${HOST_NAME[@]}") + [ -z "$host_list" ] && return 0 + + command="PATH=\$PATH:/sbin:/usr/sbin +if grep -q libcfs /proc/modules; then + lctl net down 1>/dev/null 2>&1 + lustre_rmmod +fi" + + if is_pdsh; then + $REMOTE $host_list "$command" + else + for host in ${host_list//,/ }; do + $REMOTE $host "$command" + done + fi +} + +# Start the services on Lustre server targets +mass_start() { + declare -i i + local combo_mgs_mdt=false + + if [ ${#HOST_NAME[@]} -eq 0 ]; then + verbose_output "There are no Lustre targets specified." + return 0 + fi + + # Start lnet network on the MGS node + start_mgs_lnet || return ${PIPESTATUS[0]} + + local checked_hosts="" + for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do + host_in_hostlist ${HOST_NAME[i]} $checked_hosts && continue + if ! is_mgs_node ${HOST_NAME[i]}; then + # Add module options to the module configuration file + add_module_options $i ${HOST_NAME[i]} || return ${PIPESTATUS[0]} + + # Check lnet networks + check_lnet $i || return ${PIPESTATUS[0]} + + checked_hosts="$checked_hosts,${HOST_NAME[i]}" + fi + done + + # Start MGS or the MGS service on combo MGS/MDT (with "-o nosvc -n" options) + if [ -n "${MGS_NODENAME[0]}" ]; then + local idx=${MGS_IDX[0]} + if [ "${DEVICE_TYPE[idx]#*mdt*}" != "${DEVICE_TYPE[idx]}" ]; then + # Combo MGS/MDT + combo_mgs_mdt=true + start_service ${MGS_IDX[0]} "-o nosvc -n" || return ${PIPESTATUS[0]} + else + start_service ${MGS_IDX[0]} || return ${PIPESTATUS[0]} + fi + fi + + # Start OST(s) + mass_op "start" "ost" || return ${PIPESTATUS[0]} + + # Start the MDT service on combo MGS/MDT (with "-o nomgs" option) + if $combo_mgs_mdt; then + start_service ${MGS_IDX[0]} "-o nomgs" || return ${PIPESTATUS[0]} + fi + + # Start MDT(s) + mass_op "start" "mdt" || return ${PIPESTATUS[0]} + + verbose_output "Success on all Lustre targets!" + return 0 +} + +# Stop the services on Lustre server targets +mass_stop() { + declare -i i + + if [ ${#HOST_NAME[@]} -eq 0 ]; then + verbose_output "There are no Lustre targets specified." + return 0 + fi + + # Stop MDT(s) + mass_op "stop" "mdt" || return ${PIPESTATUS[0]} + + # Stop the MDT service on combo MGS/MDT + if [ -n "${MGS_NODENAME[0]}" ]; then + local idx=${MGS_IDX[0]} + if [ "${DEVICE_TYPE[idx]#*mdt*}" != "${DEVICE_TYPE[idx]}" ]; then + # Combo MGS/MDT + stop_service ${MGS_IDX[0]} || return ${PIPESTATUS[0]} + fi + fi + + # Stop OST(s) + mass_op "stop" "ost" || return ${PIPESTATUS[0]} + + # Stop MGS or the MGS service on combo MGS/MDT + if [ -n "${MGS_NODENAME[0]}" ]; then + stop_service ${MGS_IDX[0]} || return ${PIPESTATUS[0]} + fi + + unload_modules + + return 0 +} + +#********************************* Main Flow **********************************# + +# Check the CSV file +check_file $CSV_FILE || exit ${PIPESTATUS[0]} + +# Get the list of nodes to be operated on +NODES_TO_USE=$(get_nodelist) || error_exit ${PIPESTATUS[0]} "$NODES_TO_USE" + +# Check the node list +check_nodelist $NODES_TO_USE || exit ${PIPESTATUS[0]} + +# Check the network connectivity and hostnames +if $VERIFY_CONNECT; then + verbose_output "Checking the cluster network connectivity and hostnames..." + $VERIFY_CLUSTER_NET $NODELIST_OPT $VERBOSE_OPT $CSV_FILE || \ + exit ${PIPESTATUS[0]} + verbose_output "Check the cluster network connectivity and hostnames OK!" +fi + +# Configure the Lustre cluster +echo "$(basename $0): ******** Lustre cluster configuration BEGIN ********" + +get_lustre_items $CSV_FILE || exit ${PIPESTATUS[0]} + +check_mgs || exit ${PIPESTATUS[0]} + +if ! $STOP_SERVICE; then + mass_start || exit ${PIPESTATUS[0]} +else + mass_stop || exit ${PIPESTATUS[0]} +fi + +echo "$(basename $0): ******** Lustre cluster configuration END **********" + +exit 0 -- 1.8.3.1