Whamcloud - gitweb
8469f7d7038f000235a7698cf33d7e6ecae956c2
[fs/lustre-release.git] / lustre / utils / cluster_scripts / gen_clumanager_config.sh
1 #!/bin/bash
2 #
3 # gen_clumanager_config.sh - script for generating the Red Hat's Cluster Manager
4 #                            HA software's configuration files
5 #
6 ################################################################################
7
8 # Usage
9 usage() {
10         cat >&2 <<EOF
11
12 Usage:  `basename $0` <-n hostnames> <-d target device> <-s service addresses> 
13                       [-c heartbeat channels] [-o heartbeat options] [-v]
14
15         -n hostnames            the nodenames of the primary node and its fail-
16                                 overs
17                                 Multiple nodenames are separated by colon (:)
18                                 delimeter. The first one is the nodename of the 
19                                 primary node, the others are failover nodenames.
20         -d target device        the target device name and type
21                                 The name and type are separated by colon (:)
22                                 delimeter. The type values are: mgs, mdt, ost or
23                                 mgs_mdt.
24         -s service addresses    the IP addresses to failover
25                                 Multiple addresses are separated by colon (:)
26                                 delimeter.
27         -c heartbeat channels   the methods to send/rcv heartbeats on
28                                 The default method is multicast, and multicast_
29                                 ipaddress is "225.0.0.11".
30         -o heartbeat options    a "catchall" for other heartbeat configuration 
31                                 options
32         -v                      verbose mode
33
34 EOF
35         exit 1
36 }
37
38 # Global variables
39 SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"./"}
40 SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}$"verify_serviceIP.sh"
41
42 LUSTRE_SRV_SCRIPT=$"/etc/rc.d/init.d/lustre"    # service script for lustre
43
44 TMP_DIR=$"/tmp/clumanager/"             # temporary directory
45 CLUMGR_DIR=$"/etc/"                     # CluManager configuration directory
46
47 CONFIG_CMD=$"redhat-config-cluster-cmd"
48
49 declare -a NODE_NAMES                   # node names in the failover group
50 declare -a SRV_IPADDRS                  # service IP addresses
51
52 # Get and check the positional parameters
53 while getopts "n:d:s:c:o:v" OPTION; do
54         case $OPTION in
55         n)
56                 HOSTNAME_OPT=$OPTARG 
57                 HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
58                 if [ ${HOSTNAME_NUM} -lt 2 ]; then
59                         echo >&2 $"`basename $0`: Lack failover nodenames!"
60                         usage
61                 fi
62                 ;;
63         d)
64                 DEVICE_OPT=$OPTARG 
65                 TARGET_DEV=`echo ${DEVICE_OPT} | awk -F":" '{print $1}'`
66                 TARGET_TYPE=`echo ${DEVICE_OPT} | awk -F":" '{print $2}'`
67                 if [ -z "${TARGET_TYPE}" ]; then
68                         echo >&2 $"`basename $0`: Lack target device type!"
69                         usage
70                 fi
71                 if [ "${TARGET_TYPE}" != "mgs" ]&&[ "${TARGET_TYPE}" != "mdt" ]\
72                 &&[ "${TARGET_TYPE}" != "ost" ]&&[ "${TARGET_TYPE}" != "mgs_mdt" ]
73                 then
74                         echo >&2 $"`basename $0`: Invalid target device type" \
75                                   "- ${TARGET_TYPE}!"
76                         usage
77                 fi
78                 ;;
79         s)
80                 SRVADDR_OPT=$OPTARG 
81                 ;;
82         c)
83                 HBCHANNEL_OPT=$OPTARG
84                 HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \
85                                | sed 's/"$//'` 
86                 if [ -n "${HBCHANNEL_OPT}" ] \
87                 && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*broadcast*}" ] \
88                 && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*multicast*}" ]; then
89                         echo >&2 $"`basename $0`: Invalid Heartbeat channel" \
90                                   "- ${HBCHANNEL_OPT}!"
91                         usage
92                 fi
93                 ;;
94         o)
95                 HBOPT_OPT=$OPTARG 
96                 HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'`
97                 ;;
98         v) 
99                 VERBOSE_OPT=$"yes"
100                 ;;
101         ?) 
102                 usage 
103         esac
104 done
105
106 # Check the required parameters
107 if [ -z "${HOSTNAME_OPT}" ]; then
108         echo >&2 $"`basename $0`: Lack -n option!"
109         usage
110 fi
111
112 if [ -z "${DEVICE_OPT}" ]; then
113         echo >&2 $"`basename $0`: Lack -d option!"
114         usage
115 fi
116
117 if [ -z "${SRVADDR_OPT}" ]; then
118         echo >&2 $"`basename $0`: Lack -s option!"
119         usage
120 fi
121
122 # Output verbose informations
123 verbose_output() {
124         if [ "${VERBOSE_OPT}" = "yes" ]; then
125                 echo "`basename $0`: $*"
126         fi
127         return 0
128 }
129
130 # get_nodenames
131 #
132 # Get all the node names in this failover group
133 get_nodenames() {
134         PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
135
136         declare -i idx
137         local nodename_str nodename
138
139         nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
140                       END {for (i in a) print a[i]}'`
141         idx=0
142         for nodename in ${nodename_str}
143         do
144                 NODE_NAMES[idx]=${nodename}
145                 idx=$idx+1
146         done
147
148         return 0
149 }
150
151 # get_check_srvIPaddrs
152 #
153 # Get and check all the service IP addresses in this failover group
154 get_check_srvIPaddrs() {
155         declare -i idx
156         declare -i i
157         local srvIPaddr_str srvIPaddr
158
159         srvIPaddr_str=`echo ${SRVADDR_OPT}|awk '{split($SRVADDR_OPT, a, ":")}\
160                       END {for (i in a) print a[i]}'`
161         idx=0
162         for srvIPaddr in ${srvIPaddr_str}
163         do
164                 SRV_IPADDRS[idx]=${srvIPaddr}
165                 idx=$idx+1
166         done
167
168         for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
169           for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do
170             # Check service IP address
171             verbose_output "Verifying service IP ${SRV_IPADDRS[idx]} and" \
172                            "real IP of host ${NODE_NAMES[i]} are in the" \
173                            "same subnet..."
174             if ! ${SCRIPT_VERIFY_SRVIP} ${SRV_IPADDRS[idx]} ${NODE_NAMES[i]}
175             then
176               return 1
177             fi
178             verbose_output "OK"
179           done
180         done
181
182         return 0
183 }
184
185 # stop_clumanager
186 #
187 # Run pdsh command to stop each node's clumanager service
188 stop_clumanager() {
189         declare -i idx
190         local nodename_str=${PRIM_NODENAME}
191
192         for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do
193                 nodename_str=${nodename_str}$","${NODE_NAMES[idx]}
194         done
195
196         ${PDSH} -w ${nodename_str} /sbin/service clumanager stop
197         if [ $? -ne 0 ]; then
198                 echo >&2 "`basename $0`: stop_clumanager() error:"\
199                          "Fail to execute pdsh command!"
200                 return 1
201         fi
202
203         return 0
204 }
205
206 # check_retval retval
207 #
208 # Check the return value of redhat-config-cluster-cmd
209 check_retval() {
210         if [ $1 -ne 0 ]; then
211                 echo >&2 "`basename $0`: Fail to run ${CONFIG_CMD}!"
212                 return 1
213         fi
214
215         return 0
216 }
217
218 # gen_cluster_xml
219 #
220 # Run redhat-config-cluster-cmd to create the cluster.xml file
221 gen_cluster_xml() {
222         declare -i idx
223         local mcast_IPaddr
224         local hbopt_str hbopt
225
226         # Run redhat-config-cluster-cmd to generate cluster.xml
227         # Add clumembd tag
228         if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*broadcast*}" ]; then
229                 ${CONFIG_CMD} --clumembd --broadcast=yes
230                 if ! check_retval $?; then
231                         return 1
232                 fi
233         elif [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*multicast*}" ]; then
234                 mcast_IPaddr=`echo ${HBCHANNEL_OPT} | awk '{print $2}'`
235                 if [ -n "${mcast_IPaddr}" ]; then
236                         ${CONFIG_CMD} --clumembd --multicast=yes\
237                                       --multicast_ipaddress=${mcast_IPaddr}
238                         if ! check_retval $?; then
239                                 return 1
240                         fi
241                 fi
242         fi
243
244         # Add cluster tag
245         ${CONFIG_CMD} --cluster --name='${TARGET_TYPE} failover group'
246         if ! check_retval $?; then
247                 return 1
248         fi
249
250         # Add member tag
251         for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
252                 ${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]}
253                 if ! check_retval $?; then
254                         return 1
255                 fi
256         done
257
258         # Add failoverdomain tag
259         ${CONFIG_CMD} --add_failoverdomain --name=${TARGET_TYPE}-domain
260         if ! check_retval $?; then
261                 return 1
262         fi
263
264         for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
265                 ${CONFIG_CMD} --failoverdomain=${TARGET_TYPE}-domain\
266                         --add_failoverdomainnode --name=${NODE_NAMES[idx]}
267                 if ! check_retval $?; then
268                         return 1
269                 fi
270         done
271
272         # Add service tag
273         ${CONFIG_CMD} --add_service --name=${TARGET_TYPE}-service
274         if ! check_retval $?; then
275                 return 1
276         fi
277
278         ${CONFIG_CMD} --service=${TARGET_TYPE}-service \
279                 --userscript=${LUSTRE_SRV_SCRIPT}
280         if ! check_retval $?; then
281                 return 1
282         fi
283
284         ${CONFIG_CMD} --service=${TARGET_TYPE}-service \
285                 --failoverdomain=${TARGET_TYPE}-domain
286         if ! check_retval $?; then
287                 return 1
288         fi
289
290         for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
291                 ${CONFIG_CMD} --service=mgs-service \
292                         --add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]}
293                 if ! check_retval $?; then
294                         return 1
295                 fi
296         done
297
298         # Add other tags
299         if [ -n "${HBOPT_OPT}"]; then
300                 hbopt_str=`echo ${HBOPT_OPT}|awk '{split($HBOPT_OPT, a, ":")}\
301                           END {for (i in a) print a[i]}'`
302                 idx=0
303                 for hbopt in ${hbopt_str}
304                 do
305                         ${CONFIG_CMD} ${hbopt}
306                         if ! check_retval $?; then
307                                 return 1
308                         fi
309                         idx=$idx+1
310                 done
311         fi
312
313         return 0
314 }
315
316 # create_config
317 #
318 # Create the cluster.xml file and scp it to the each node's /etc/
319 create_config() {
320         CONFIG_PRIMNODE=${TMP_DIR}$"cluster.xml."${PRIM_NODENAME}
321         declare -i idx
322
323         if [ -e ${CONFIG_PRIMNODE} ]; then
324                 verbose_output "${CONFIG_PRIMNODE} already exists."
325                 return 0
326         fi
327
328         # Run redhat-config-cluster-cmd to generate cluster.xml
329         verbose_output "Creating cluster.xml file for" \
330                        "${PRIM_NODENAME} failover group hosts..."
331         if ! gen_cluster_xml; then
332                 return 1
333         fi
334         verbose_output "OK"
335
336         /bin/cp -f ${CLUMGR_DIR}cluster.xml ${CONFIG_PRIMNODE}
337
338         # scp the cluster.xml file to all the nodes
339         verbose_output "Remote copying cluster.xml file to" \
340                        "${PRIM_NODENAME} failover group hosts..."
341         for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
342                 touch ${TMP_DIR}$"cluster.xml."${NODE_NAMES[idx]}
343                 scp ${CONFIG_PRIMNODE} ${NODE_NAMES[idx]}:${CLUMGR_DIR}cluster.xml
344                 if [ $? -ne 0 ]; then
345                         echo >&2 "`basename $0`: Fail to scp cluster.xml file"\
346                                  "to node ${NODE_NAMES[idx]}!"
347                         return 1
348                 fi
349         done
350         verbose_output "OK"
351
352         return 0
353 }
354
355 # Main flow
356 # Get all the node names
357 if ! get_nodenames; then
358         exit 1
359 fi
360
361 # Get and check all the service IP addresses
362 if ! get_check_srvIPaddrs; then
363         exit 1
364 fi
365
366 # Stop clumanager services
367 verbose_output "Stopping clumanager service in the ${PRIM_NODENAME}"\
368                "failover group hosts..."
369 if ! stop_clumanager; then
370         exit 1
371 fi
372 verbose_output "OK"
373
374 # Generate configuration files
375 if ! create_config; then
376         exit 1
377 fi
378
379 exit 0