#!/bin/bash # vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: # # lc_common - This file contains common variables and functions to be used by # Lustre cluster config scripts. # ################################################################################ #****************************** Common Variables ******************************# export PATH=$PATH:/sbin:/usr/sbin # Remote command export REMOTE=${REMOTE:-"ssh -x -q"} #export REMOTE=${REMOTE:-"pdsh -S -R ssh -w"} # Lustre utilities export MKFS=${MKFS:-"mkfs.lustre"} export TUNEFS=${TUNEFS:-"tunefs.lustre"} export LCTL=${LCTL:-"lctl"} # Software RAID command export MDADM=${MDADM:-"mdadm"} # Some scripts to be called export MODULE_CONFIG=${MODULE_CONFIG:-"lc_modprobe"} export VERIFY_CLUSTER_NET=${VERIFY_CLUSTER_NET:-"lc_net"} export GEN_HB_CONFIG=${GEN_HB_CONFIG:-"lc_hb"} export GEN_CLUMGR_CONFIG=${GEN_CLUMGR_CONFIG:-"lc_cluman"} export SCRIPT_VERIFY_SRVIP=${SCRIPT_VERIFY_SRVIP:-"lc_servip"} export SCRIPT_GEN_MONCF=${SCRIPT_GEN_MONCF:-"lc_mon"} export SCRIPT_CONFIG_MD=${SCRIPT_CONFIG_MD:-"lc_md"} export SCRIPT_CONFIG_LVM=${SCRIPT_CONFIG_LVM:-"lc_lvm"} # Variables of HA software HBVER_HBV1="hbv1" # Heartbeat version 1 HBVER_HBV2="hbv2" # Heartbeat version 2 HATYPE_CLUMGR="cluman" # Cluster Manager # Configuration directories and files HA_DIR=${HA_DIR:-"/etc/ha.d"} # Heartbeat configuration directory MON_DIR=${MON_DIR:-"/etc/mon"} # mon configuration directory CIB_DIR=${CIB_DIR:-"/var/lib/heartbeat/crm"} # cib.xml directory HA_CF=${HA_DIR}/ha.cf # ha.cf file HA_RES=${HA_DIR}/haresources # haresources file HA_CIB=${CIB_DIR}/cib.xml CLUMAN_DIR="/etc" # CluManager configuration directory CLUMAN_CONFIG=${CLUMAN_DIR}/cluster.xml CLUMAN_TOOLS_PATH=${CLUMAN_TOOLS_PATH:-"/usr/sbin"} # CluManager tools CONFIG_CMD=${CONFIG_CMD:-"${CLUMAN_TOOLS_PATH}/redhat-config-cluster-cmd"} HB_TMP_DIR="/tmp/heartbeat" # Temporary directory CLUMGR_TMP_DIR="/tmp/clumanager" TMP_DIRS="${HB_TMP_DIR} ${CLUMGR_TMP_DIR}" FS_TYPE=${FS_TYPE:-"lustre"} # Lustre filesystem type FILE_SUFFIX=${FILE_SUFFIX:-".lustre"} # Suffix of the generated config files # Marker of the MD device line export MD_MARKER=${MD_MARKER:-"MD"} # Marker of the LVM device line export PV_MARKER=${PV_MARKER:-"PV"} export VG_MARKER=${VG_MARKER:-"VG"} export LV_MARKER=${LV_MARKER:-"LV"} declare -a CONFIG_ITEM # Items in each line of the CSV file declare -a NODE_NAME # Hostnames of nodes have been configured declare -a MGS_NODENAME # Node names of the MGS servers declare -a MGS_IDX # Indexes of MGSs in the global arrays declare -i MGS_NUM # Number of MGS servers in the cluster declare -i INIT_IDX # All of the Lustre target items in the CSV file declare -a HOST_NAME MODULE_OPTS DEVICE_NAME MOUNT_POINT DEVICE_TYPE FS_NAME declare -a MGS_NIDS INDEX FORMAT_OPTIONS MKFS_OPTIONS MOUNT_OPTIONS FAILOVERS # Heartbeat software requires that node names in the configuration directive # must (normally) match the "uname -n" of that machine. Since the value of the # "failover nids" field in the CSV file is the NID(s) of failover partner node, # we have to figure out the corresponding hostname of that node. declare -a FAILOVERS_NAMES export VERIFY_CONNECT=true # Verify network connectivity by default export USE_ALLNODES=false # Not operating on all the nodes by default export SPECIFIED_NODELIST="" # Specified list of nodes to be operated on export EXCLUDED_NODELIST="" # Specified list of nodes to be excluded export NODES_TO_USE="" # Defacto list of nodes to be operated on export NODELIST_OPT="" export VERBOSE_OUTPUT=false export VERBOSE_OPT="" #****************************** Common Functions ******************************# # verbose_output string # Output verbose information $string verbose_output() { if ${VERBOSE_OUTPUT}; then echo "`basename $0`: $*" fi return 0 } # error_output string # Output error string to stderr, prefixing with ERROR # for easy error parsing from the rest of the output. error_output() { echo >&2 "$(basename $0): ERROR: $*" return 0 } # error_exit rc string # Output error to stderr via error_output and exit with rc. error_exit() { local rc=$1 shift error_output $* exit $rc } # Check whether the reomte command is pdsh is_pdsh() { if [ "${REMOTE}" = "${REMOTE#*pdsh}" ]; then return 1 fi return 0 } # check_file csv_file # Check the file $csv_file check_file() { # Check argument if [ $# -eq 0 ]; then error_output "check_file(): Missing CSV file!" return 1 fi local CSV_FILE=$1 if [ ! -s ${CSV_FILE} ]; then error_output "check_file(): ${CSV_FILE}"\ "does not exist or is empty!" return 1 fi return 0 } # parse_line line # Parse a line in the CSV file parse_line() { # Check argument if [ $# -eq 0 ]; then error_output "parse_line(): Missing argument!" return 1 fi declare -i i=0 # Index of the CONFIG_ITEM array declare -i length=0 declare -i idx=0 declare -i s_quote_flag=0 # Flag of the single quote character declare -i d_quote_flag=0 # Flag of the double quotes character local TMP_LETTER LINE LINE="$*" # Initialize the CONFIG_ITEM array unset CONFIG_ITEM # Get the length of the line length=${#LINE} i=0 while [ ${idx} -lt ${length} ]; do # Get a letter from the line TMP_LETTER=${LINE:${idx}:1} case "${TMP_LETTER}" in ",") if [ ${s_quote_flag} -eq 1 -o ${d_quote_flag} -eq 1 ] then CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER} else i=$i+1 fi idx=${idx}+1 continue ;; "'") if [ ${s_quote_flag} -eq 0 ]; then s_quote_flag=1 else s_quote_flag=0 fi ;; "\"") if [ ${d_quote_flag} -eq 0 ]; then d_quote_flag=1 else d_quote_flag=0 fi ;; " ") idx=${idx}+1 continue ;; *) ;; esac CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER} idx=${idx}+1 done # Extract the real value of each field # Remove surrounded double-quotes, etc. for ((idx = 0; idx <= $i; idx++)); do # Strip the leading and trailing space-characters CONFIG_ITEM[idx]=`expr "${CONFIG_ITEM[idx]}" : '[[:space:]]*\(.*\)[[:space:]]*$'` [ -z "${CONFIG_ITEM[idx]}" ] && continue # Remove the surrounded double-quotes while [ -z "`echo "${CONFIG_ITEM[idx]}"|sed -e 's/^".*"$//'`" ]; do CONFIG_ITEM[idx]=`echo "${CONFIG_ITEM[idx]}" | sed -e 's/^"//' -e 's/"$//'` done CONFIG_ITEM[idx]=`echo "${CONFIG_ITEM[idx]}" | sed -e 's/""/"/g'` done return 0 } # fcanon name # If $name is a symbolic link, then display it's value fcanon() { local NAME=$1 if [ -h "$NAME" ]; then readlink -f "$NAME" else echo "$NAME" fi } # configured_host host_name # # Check whether the devices in $host_name has been configured or not configured_host() { local host_name=$1 declare -i i for ((i = 0; i < ${#NODE_NAME[@]}; i++)); do [ "${host_name}" = "${NODE_NAME[i]}" ] && return 0 done return 1 } # remote_error fn_name host_addr ret_str # Verify the return result from remote command remote_error() { local fn_name host_addr ret_str fn_name=$1 shift host_addr=$1 shift ret_str=$* if [ "${ret_str}" != "${ret_str#*connect:*}" ]; then error_output "${fn_name}(): ${ret_str}" return 0 fi if [ -z "${ret_str}" ]; then error_output "${fn_name}():" \ "No results from remote!" \ "Check network connectivity between the local host and ${host_addr}!" return 0 fi return 1 } # nid2hostname nid # Convert $nid to hostname of the lustre cluster node nid2hostname() { local nid=$1 local host_name= local addr nettype ip_addr local ret_str addr=${nid%@*} [ "${nid}" != "${nid#*@*}" ] && nettype=${nid#*@} || nettype=tcp if [ -z "${addr}" ]; then echo "`basename $0`: nid2hostname() error: Invalid nid - \"${nid}\"!" return 1 fi case "${nettype}" in lo*) host_name=`hostname`;; elan*) # QsNet # FIXME: Parse the /etc/elanhosts configuration file to # convert ElanID to hostname ;; ptl*) # Portals # FIXME: Convert portal ID to hostname ;; *) # tcp, o2ib, ra ip_addr=${addr} # Is it IP address or hostname? if [ -n "`echo ${ip_addr} | sed -e 's/\([0-9]\{1,3\}\.\)\{3,3\}[0-9]\{1,3\}//'`" ] then host_name=${ip_addr} echo ${host_name} return 0 fi # Execute remote command to get the host name ret_str=$(${REMOTE} ${ip_addr} "hostname" 2>&1 &1 && return 1 all_nodelist=$(egrep -v "([[:space:]]|^)#" ${csv_file} | cut -d, -f 1) all_nodelist=$(comma_list ${all_nodelist}) echo ${all_nodelist} return 0 } # get_nodelist # Get the comma-separated list of nodes to be operated on # Note: CSV_FILE, USE_ALLNODES, SPECIFIED_NODELIST and EXCLUDED_NODELIST # are global variables get_nodelist() { local ALL_NODELIST # Get the list of all the nodes in the CSV file ALL_NODELIST=$(get_csv_nodelist ${CSV_FILE}) [ ${PIPESTATUS[0]} -ne 0 ] && echo "${ALL_NODELIST}" && return 1 if [ -z "${ALL_NODELIST}" ]; then echo "`basename $0`: get_nodelist() error:"\ "There are no hosts in the ${CSV_FILE} file!" return 1 fi if ${USE_ALLNODES}; then echo ${ALL_NODELIST} && return 0 fi if [ -n "${SPECIFIED_NODELIST}" ]; then echo $(exclude_items_from_list ${SPECIFIED_NODELIST} ${EXCLUDED_NODELIST}) return 0 fi if [ -n "${EXCLUDED_NODELIST}" ]; then echo $(exclude_items_from_list ${ALL_NODELIST} ${EXCLUDED_NODELIST}) return 0 fi # No hosts to be operated on echo "" return 0 } # check_nodelist nodelist # Given a list of nodes to be operated on, check whether the nodelist is # empty or not and output prompt message. check_nodelist() { local nodes_to_use=$1 if [ -z "${nodes_to_use}" ]; then error_output "There are no nodes to be operated on."\ "Check the node selection options (-a, -w or -x)." usage 1>&2 return 1 else verbose_output "Operating on the following nodes: ${nodes_to_use}" fi return 0 } # nid_in_nidlist nid nidlist # Given a nid, and a list of nids in one node (delimited by comma ','), # return true if the nid appears in the list of nids, or false otherwise. nid_in_nidlist() { local nid="$1" local nidlist="$2" local my_nid [ -z "${nid}" -o -z "${nidlist}" ] && false && return if [[ "${nid}" != *@* || "${nid#*@}" == tcp* ]]; then # network type is tcp for my_nid in ${nidlist//,/ }; do [ "${nid%@*}" = "${my_nid%@*}" ] && true && return done else # network type is not tcp [[ ,${nidlist}, == *,${nid},* ]] && true && return fi false && return } # get_mgs_nids mgs_hostname mgs_nids # Get the corresponding NID(s) of the MGS node ${mgs_hostname} from the # "mgs nids" field of one lustre target in the CSV file get_mgs_nids() { local mgs_node="$1" local all_mgs_nids="$2" local mgs_nids local ret_str # Check whether the hostname of the mgs node is in # the mgs nids string for mgs_nids in ${all_mgs_nids//:/ }; do if nid_in_nidlist ${mgs_node} ${mgs_nids}; then echo ${mgs_nids} return 0 fi done # Let's use lctl to get the real nids from the mgs node ret_str=$($REMOTE $mgs_node "PATH=\$PATH:/sbin:/usr/sbin $LCTL list_nids" 2>&1 /dev/null" if [ ${PIPESTATUS[0]} -eq 0 ]; then # This node can contact the MGS node verbose_output "${HOST_NAME[i]} can contact the MGS" \ "node $mgs_node by using nid \"$mgs_nid\"!" ping_mgs=true break fi done done if ! ${ping_mgs}; then error_output "check_lnet_connect():" \ "${HOST_NAME[i]} cannot contact the MGS node ${mgs_node}"\ "with nids - \"${nids_str}\"! Check ${LCTL} command!" return 1 fi return 0 } # Start lnet network in the cluster node and check that # this node can contact the MGS node check_lnet() { if ! $VERIFY_CONNECT; then return 0 fi # Check argument if [ $# -eq 0 ]; then error_output "check_lnet(): Missing argument!" return 1 fi declare -i i=$1 declare -i j local ret_str # Execute remote command to start lnet network verbose_output "Starting lnet network on ${HOST_NAME[i]}" ret_str=$($REMOTE ${HOST_NAME[i]} "PATH=\$PATH:/sbin:/usr/sbin modprobe lnet && $LCTL network up" 2>&1) if [ ${PIPESTATUS[0]} -ne 0 ]; then error_output "check_lnet(): start lnet network on" \ "${HOST_NAME[i]} error: $ret_str" return 1 fi if is_mgs_node ${HOST_NAME[i]}; then return 0 fi # Execute remote command to check that # this node can contact the MGS node for ((j = 0; j < ${MGS_NUM}; j++)); do if ! check_lnet_connect $i ${MGS_NODENAME[j]}; then return 1 fi done return 0 } # Start lnet network in the MGS node start_mgs_lnet() { declare -i i declare -i idx if [ -z "${MGS_NODENAME[0]}" -a -z "${MGS_NODENAME[1]}" ]; then if ${USE_ALLNODES}; then verbose_output "There is no MGS target in the ${CSV_FILE} file." else verbose_output "There is no MGS target in the node list \"${NODES_TO_USE}\"." fi return 0 fi for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do # Execute remote command to add lnet options lines to # the MGS node's modprobe.conf/modules.conf idx=${MGS_IDX[i]} add_module_options $idx ${MGS_NODENAME[i]} || return ${PIPESTATUS[0]} # Start lnet network in the MGS node check_lnet $idx || return ${PIPESTATUS[0]} done return 0 } # Get all the Lustre target items in the CSV file and do some checks. get_lustre_items() { # Check argument if [ $# -eq 0 ]; then error_output "get_lustre_items(): Missing argument"\ "for function get_lustre_items()!" return 1 fi local CSV_FILE=$1 local LINE local marker local hostname declare -i line_num=0 declare -i idx=0 exec 9< ${CSV_FILE} while read -u 9 -r LINE; do line_num=${line_num}+1 # verbose_output "Parsing line ${line_num}: $LINE" # Get rid of the empty line [ -z "`echo ${LINE} | awk '/[[:alnum:]]/ {print $0}'`" ] && continue # Get rid of the comment line [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] && continue # Skip the Linux MD/LVM line marker=$(echo ${LINE} | cut -d, -f 2) if [ "${marker}" = "${MD_MARKER}" -o "${marker}" = "${PV_MARKER}" ] \ || [ "${marker}" = "${VG_MARKER}" -o "${marker}" = "${LV_MARKER}" ]; then continue fi # Skip the host which is not specified in the host list if ! ${USE_ALLNODES}; then hostname=$(echo ${LINE} | cut -d, -f 1) ! host_in_hostlist ${hostname} ${NODES_TO_USE} && continue fi # Parse the config line into CONFIG_ITEM if ! parse_line "$LINE"; then error_output "parse_line(): Occurred"\ "on line ${line_num} in ${CSV_FILE}: $LINE" return 1 fi HOST_NAME[idx]=${CONFIG_ITEM[0]} MODULE_OPTS[idx]=${CONFIG_ITEM[1]} DEVICE_NAME[idx]=${CONFIG_ITEM[2]} MOUNT_POINT[idx]=${CONFIG_ITEM[3]} DEVICE_TYPE[idx]=${CONFIG_ITEM[4]} FS_NAME[idx]=${CONFIG_ITEM[5]} MGS_NIDS[idx]=${CONFIG_ITEM[6]} INDEX[idx]=${CONFIG_ITEM[7]} FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]} MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]} MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]} FAILOVERS[idx]=${CONFIG_ITEM[11]} MODULE_OPTS[idx]=`echo "${MODULE_OPTS[idx]}" | sed 's/"/\\\"/g'` # Convert IP addresses in NIDs to hostnames FAILOVERS_NAMES[idx]=$(ip2hostname_multi_node ${FAILOVERS[idx]}) if [ ${PIPESTATUS[0]} -ne 0 ]; then error_output "${FAILOVERS_NAMES[idx]}" return 1 fi # Check some required items for formatting target if ! check_lustre_item $idx; then error_output "check_lustre_item():"\ "Occurred on line ${line_num} in ${CSV_FILE}." return 1 fi idx=${idx}+1 done return 0 }