Whamcloud - gitweb
LU-9749 llite: Reduce overhead for ll_do_fast_read
[fs/lustre-release.git] / lustre / scripts / lc_cluman.in
1 #!/bin/bash
2 #
3 # lc_cluman - script for generating the Red Hat Cluster Manager
4 #             HA software's configuration files
5 #
6 ################################################################################
7
8 # Usage
9 usage() {
10         cat >&2 <<EOF
11
12 Usage:  `basename $0` <-n hostnames> [-s service addresses]
13                       [-c heartbeat channel] [-o heartbeat options] [-v]
14                       <-d target device> [-d target device...]
15
16         -n hostnames            the nodenames of the primary node and its fail-
17                                 overs
18                                 Multiple nodenames are separated by colon (:)
19                                 delimeter. The first one is the nodename of the 
20                                 primary node, the others are failover nodenames.
21         -s service addresses    the IP addresses to failover
22                                 Multiple addresses are separated by colon (:)
23                                 delimeter.
24         -c heartbeat channel    the method to send/rcv heartbeats on
25                                 The default method is multicast, and multicast_
26                                 ipaddress is "225.0.0.11".
27         -o heartbeat options    a "catchall" for other heartbeat configuration 
28                                 options
29                                 Multiple options are separated by colon (:)
30                                 delimeter.
31         -v                      verbose mode
32         -d target device        the target device name and mount point
33                                 The device name and mount point are separated by
34                                 colon (:) delimeter. 
35
36 EOF
37         exit 1
38 }
39
40 # Get the library of functions
41 . @scriptlibdir@/lc_common
42
43 #****************************** Global variables ******************************#
44 TMP_DIR=${CLUMGR_TMP_DIR}               # Temporary directory
45
46 declare -a NODE_NAMES                   # Node names in the failover group
47 declare -a SRV_IPADDRS                  # Service IP addresses
48
49 # Lustre target device names, service names and mount points
50 declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
51 declare -i TARGET_NUM=0                 # Number of targets
52
53 # Get and check the positional parameters
54 VERBOSE_OUTPUT=false
55 while getopts "n:s:c:o:vd:" OPTION; do
56         case $OPTION in
57         n)
58                 HOSTNAME_OPT=$OPTARG 
59                 PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
60                 if [ -z "${PRIM_NODENAME}" ]; then
61                         echo >&2 $"`basename $0`: Missing primary nodename!"
62                         usage
63                 fi
64                 HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
65                 if [ ${HOSTNAME_NUM} -lt 2 ]; then
66                         echo >&2 $"`basename $0`: Missing failover nodenames!"
67                         usage
68                 fi
69                 ;;
70         s)
71                 SRVADDR_OPT=$OPTARG 
72                 ;;
73         c)
74                 HBCHANNEL_OPT=$OPTARG
75                 HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \
76                                | sed 's/"$//'` 
77                 if [ -n "${HBCHANNEL_OPT}" ] \
78                 && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*broadcast*}" ] \
79                 && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*multicast*}" ]; then
80                         echo >&2 $"`basename $0`: Invalid Heartbeat channel" \
81                                   "- ${HBCHANNEL_OPT}!"
82                         usage
83                 fi
84                 ;;
85         o)
86                 HBOPT_OPT=$OPTARG 
87                 HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'`
88                 ;;
89         v) 
90                 VERBOSE_OUTPUT=true
91                 ;;
92         d)
93                 DEVICE_OPT=$OPTARG 
94                 TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
95                 TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
96                 if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
97                         echo >&2 $"`basename $0`: Missing target device name!"
98                         usage
99                 fi
100                 if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
101                         echo >&2 $"`basename $0`: Missing mount point for target"\
102                                   "${TARGET_DEVNAMES[TARGET_NUM]}!"
103                         usage
104                 fi
105                 TARGET_NUM=$(( TARGET_NUM + 1 ))
106                 ;;
107
108         ?) 
109                 usage 
110         esac
111 done
112
113 # Check the required parameters
114 if [ -z "${HOSTNAME_OPT}" ]; then
115         echo >&2 $"`basename $0`: Missing -n option!"
116         usage
117 fi
118
119 if [ -z "${DEVICE_OPT}" ]; then
120         echo >&2 $"`basename $0`: Missing -d option!"
121         usage
122 fi
123
124 # get_nodenames
125 #
126 # Get all the node names in this failover group
127 get_nodenames() {
128         declare -i idx
129         local nodename_str nodename
130
131         nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
132                       END {for (i in a) print a[i]}'`
133         idx=0
134         for nodename in ${nodename_str}
135         do
136                 NODE_NAMES[idx]=${nodename}
137                 idx=$idx+1
138         done
139
140         return 0
141 }
142
143 # get_check_srvIPaddrs
144 #
145 # Get and check all the service IP addresses in this failover group
146 get_check_srvIPaddrs() {
147         declare -i idx
148         declare -i i
149         local srvIPaddr_str srvIPaddr
150
151         srvIPaddr_str=`echo ${SRVADDR_OPT}|awk '{split($SRVADDR_OPT, a, ":")}\
152                       END {for (i in a) print a[i]}'`
153         idx=0
154         for srvIPaddr in ${srvIPaddr_str}
155         do
156                 SRV_IPADDRS[idx]=${srvIPaddr}
157                 idx=$idx+1
158         done
159
160         for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
161           for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do
162             # Check service IP address
163             verbose_output "Verifying service IP ${SRV_IPADDRS[idx]} and" \
164                            "real IP of host ${NODE_NAMES[i]} are in the" \
165                            "same subnet..."
166             if ! ${SCRIPT_VERIFY_SRVIP} ${SRV_IPADDRS[idx]} ${NODE_NAMES[i]}
167             then
168               return 1
169             fi
170             verbose_output "OK"
171           done
172         done
173
174         return 0
175 }
176
177 # cluman_running host_name
178
179 # Run remote command to check whether clumanager service is running in @host_name
180 cluman_running() {
181         local host_name=$1
182         local ret_str
183
184         ret_str=`${REMOTE} ${host_name} "/sbin/service clumanager status" 2>&1`
185         if [ $? -ne 0 ]; then
186                 if [ "${ret_str}" != "${ret_str#*unrecognized*}" ]; then
187                         echo >&2 "`basename $0`: cluman_running() error:"\
188                         "remote command to ${host_name} error: ${ret_str}!"
189                         return 2
190                 else
191                         return 1
192                 fi
193         fi
194
195         return 0
196 }
197
198 # stop_cluman host_name
199 #
200 # Run remote command to stop clumanager service running in @host_name
201 stop_cluman() {
202         local host_name=$1
203         local ret_str
204
205         ret_str=`${REMOTE} ${host_name} "/sbin/service clumanager stop" 2>&1`
206         if [ $? -ne 0 ]; then
207                 echo >&2 "`basename $0`: stop_cluman() error:"\
208                 "remote command to ${host_name} error: ${ret_str}!"
209                 return 1
210         fi
211
212         echo "`basename $0`: Clumanager service is stopped on node ${host_name}."
213         return 0
214 }
215
216 # check_cluman
217 #
218 # Run remote command to check each node's clumanager service
219 check_cluman() {
220         declare -i idx
221         local OK
222
223         # Get and check all the service IP addresses
224         if [ -n "${SRVADDR_OPT}" ] && ! get_check_srvIPaddrs; then
225                 return 1
226         fi
227
228         for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
229                 # Check clumanager service status
230                 cluman_running ${NODE_NAMES[idx]}
231                 rc=$?
232                 if [ "$rc" -eq "2" ]; then
233                         return 1
234                 elif [ "$rc" -eq "1" ]; then
235                         verbose_output "Clumanager service is stopped on"\
236                         "node ${NODE_NAMES[idx]}."
237                 elif [ "$rc" -eq "0" ]; then
238                         OK=
239                         echo -n "`basename $0`: Clumanager service is running on"\
240                         "${NODE_NAMES[idx]}, go ahead to stop the service and"\
241                         "generate new configurations? [y/n]:"
242                         read OK
243                         if [ "${OK}" = "n" ]; then
244                                 echo "`basename $0`: New Clumanager configurations"\
245                                 "are not generated."
246                                 return 2
247                         fi
248
249                         # Stop clumanager service       
250                         stop_cluman ${NODE_NAMES[idx]}
251                 fi
252         done
253
254         return 0
255 }
256
257 # get_srvname hostname target_devname
258 #
259 # Get the lustre target server name from the node @hostname
260 get_srvname() {
261         local host_name=$1
262         local target_devname=$2
263         local target_srvname=
264         local ret_str
265
266         # Execute remote command to get the target server name
267         ret_str=`${REMOTE} ${host_name} \
268                 "${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1`
269         if [ $? -ne 0 ]; then
270                 echo "`basename $0`: get_srvname() error:" \
271                      "from host ${host_name} - ${ret_str}"
272                 return 1
273         fi
274
275         if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
276                 ret_str=${ret_str#*Target: }
277                 target_srvname=`echo ${ret_str} | awk '{print $1}'`
278         fi
279         
280         if [ -z "${target_srvname}" ]; then
281                 echo "`basename $0`: get_srvname() error: Cannot get the"\
282                      "server name of target ${target_devname} in ${host_name}!"
283                 return 1
284         fi
285
286         echo ${target_srvname}
287         return 0
288
289
290 # get_srvnames
291 #
292 # Get server names of all the Lustre targets in this failover group
293 get_srvnames() {
294         declare -i i
295
296         # Initialize the TARGET_SRVNAMES array
297         unset TARGET_SRVNAMES
298
299         # Get Lustre target service names
300         for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
301                 TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
302                                      ${TARGET_DEVNAMES[i]})
303                 if [ $? -ne 0 ]; then
304                         echo >&2 "${TARGET_SRVNAMES[i]}"
305                         return 1
306                 fi
307         done
308
309         return 0
310 }
311
312 # check_retval retval
313 #
314 # Check the return value of redhat-config-cluster-cmd
315 check_retval() {
316         if [ $1 -ne 0 ]; then
317                 echo >&2 "`basename $0`: Failed to run ${CONFIG_CMD}!"
318                 return 1
319         fi
320
321         return 0
322 }
323
324 # add_services
325 #
326 # Add service tags into the cluster.xml file
327 add_services() {
328         declare -i idx
329         declare -i i
330
331         # Add service tag
332         for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do
333                 ${CONFIG_CMD} --add_service --name=${TARGET_SRVNAMES[i]}
334                 if ! check_retval $?; then
335                         return 1
336                 fi
337
338                 for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
339                         ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
340                         --add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]}
341                         if ! check_retval $?; then
342                                 return 1
343                         fi
344                 done
345
346                 ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
347                               --add_device \
348                               --name=${TARGET_DEVNAMES[i]}
349                 if ! check_retval $?; then
350                         return 1
351                 fi
352
353                 ${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
354                               --device=${TARGET_DEVNAMES[i]} \
355                               --mount \
356                               --mountpoint=${TARGET_MNTPNTS[i]} \
357                               --fstype=lustre
358                 if ! check_retval $?; then
359                         return 1
360                 fi
361         done
362
363         return 0
364 }
365
366 # gen_cluster_xml
367 #
368 # Run redhat-config-cluster-cmd to create the cluster.xml file
369 gen_cluster_xml() {
370         declare -i idx
371         declare -i i
372         local mcast_IPaddr
373         local node_names
374         local hbopt
375
376         [ -e "${CLUMAN_DIR}/cluster.xml" ] && \
377         /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old
378
379         # Run redhat-config-cluster-cmd to generate cluster.xml
380         # Add clumembd tag
381         if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*broadcast*}" ]; then
382                 ${CONFIG_CMD} --clumembd --broadcast=yes
383                 ${CONFIG_CMD} --clumembd --multicast=no
384                 if ! check_retval $?; then
385                         return 1
386                 fi
387         elif [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*multicast*}" ]; then
388                 mcast_IPaddr=`echo ${HBCHANNEL_OPT} | awk '{print $2}'`
389                 if [ -n "${mcast_IPaddr}" ]; then
390                         ${CONFIG_CMD} --clumembd --multicast=yes\
391                                       --multicast_ipaddress=${mcast_IPaddr}
392                         if ! check_retval $?; then
393                                 return 1
394                         fi
395                 fi
396         fi
397
398         # Add cluster tag
399         node_names=
400         for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
401                 node_names=${node_names}"${NODE_NAMES[idx]} "
402         done
403
404         ${CONFIG_CMD} --cluster --name="${node_names}failover group"
405         if ! check_retval $?; then
406                 return 1
407         fi
408
409         # Add member tag
410         for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
411                 ${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]}
412                 if ! check_retval $?; then
413                         return 1
414                 fi
415         done
416
417         # Add service tag
418         if ! add_services; then
419                 return 1
420         fi
421
422         # Add other tags
423         if [ -n "${HBOPT_OPT}" ]; then
424                 while read -r hbopt
425                 do
426                         ${CONFIG_CMD} ${hbopt}
427                         if ! check_retval $?; then
428                                 return 1
429                         fi
430                 done < <(echo ${HBOPT_OPT}|awk '{split($HBOPT_OPT, a, ":")}\
431                          END {for (i in a) print a[i]}')
432         fi
433
434         return 0
435 }
436
437 # create_config
438 #
439 # Create the cluster.xml file and scp it to the each node's /etc/
440 create_config() {
441         declare -i idx
442
443         /bin/mkdir -p ${TMP_DIR}
444         CONFIG_PRIMNODE=${TMP_DIR}$"/cluster.xml."${PRIM_NODENAME}
445         CONFIG_LUSTRE=${TMP_DIR}$"/cluster.xml"${FILE_SUFFIX}
446
447         # Get server names of Lustre targets
448         if ! get_srvnames; then
449                 return 1
450         fi
451
452         if [ -s ${CONFIG_PRIMNODE} ]; then
453                 if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${CONFIG_PRIMNODE}`" ]
454                 then
455                         verbose_output "${CONFIG_PRIMNODE} already exists."
456                         return 0
457                 else
458                         [ -e "${CLUMAN_DIR}/cluster.xml" ] && \
459                         /bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old
460
461                         /bin/cp -f ${CONFIG_PRIMNODE} ${CLUMAN_DIR}/cluster.xml 
462
463                         # Add services into the cluster.xml file
464                         if ! add_services; then
465                                 return 1
466                         fi
467                 fi
468         else
469                 # Run redhat-config-cluster-cmd to generate cluster.xml
470                 verbose_output "Creating cluster.xml file for" \
471                                "${PRIM_NODENAME} failover group hosts..."
472                 if ! gen_cluster_xml; then
473                         return 1
474                 fi
475                 verbose_output "OK"
476         fi
477
478         /bin/mv ${CLUMAN_DIR}/cluster.xml ${CONFIG_LUSTRE}
479         [ -e "${CLUMAN_DIR}/cluster.xml.old" ] && \
480         /bin/mv ${CLUMAN_DIR}/cluster.xml.old ${CLUMAN_DIR}/cluster.xml
481
482         # scp the cluster.xml file to all the nodes
483         verbose_output "Remote copying cluster.xml${FILE_SUFFIX} file to" \
484                        "${PRIM_NODENAME} failover group hosts..."
485         for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
486                 /bin/cp -f ${CONFIG_LUSTRE} ${TMP_DIR}$"/cluster.xml."${NODE_NAMES[idx]}
487
488                 scp ${CONFIG_LUSTRE} ${NODE_NAMES[idx]}:${CLUMAN_DIR}/
489                 if [ $? -ne 0 ]; then
490                         echo >&2 "`basename $0`: Failed to scp cluster.xml file"\
491                                  "to node ${NODE_NAMES[idx]}!"
492                         return 1
493                 fi
494         done
495         verbose_output "OK"
496
497         return 0
498 }
499
500 # Main flow
501 # Get all the node names
502 if ! get_nodenames; then
503         exit 1
504 fi
505
506 # Check clumanager services
507 verbose_output "Checking clumanager service in the ${PRIM_NODENAME}"\
508                "failover group hosts..."
509 check_cluman
510 rc=$?
511 if [ "$rc" -eq "2" ]; then
512         verbose_output "OK"
513         exit 0
514 elif [ "$rc" -eq "1" ]; then
515         exit 1
516 fi
517 verbose_output "OK"
518
519 # Generate configuration files
520 if ! create_config; then
521         exit 1
522 fi
523
524 exit 0