3 # vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
6 # lustre_config - format and set up multiple lustre servers from a csv file
8 # This script is used to parse each line of a spreadsheet (csv file) and
9 # execute remote commands to format (mkfs.lustre) every Lustre target
10 # that will be part of the Lustre cluster.
12 # In addition, it can also verify the network connectivity and hostnames in
13 # the cluster, configure Linux MD/LVM devices and produce High-Availability
14 # software configurations for Heartbeat or CluManager.
16 ################################################################################
22 Usage: `basename $0` [options] <csv file>
24 This script is used to format and set up multiple lustre servers from a
29 -a select all the nodes from the csv file to operate on
30 -w hostname,hostname,...
31 select the specified list of nodes (separated by commas) to
32 operate on rather than all the nodes in the csv file
33 -x hostname,hostname,...
34 exclude the specified list of nodes (separated by commas)
35 -t HAtype produce High-Availability software configurations
36 The argument following -t is used to indicate the High-
37 Availability software type. The HA software types which
38 are currently supported are: hbv1 (Heartbeat version 1)
39 and hbv2 (Heartbeat version 2).
40 -n no net - don't verify network connectivity and hostnames
42 -d configure Linux MD/LVM devices before formatting the
44 -f force-format the Lustre targets using --reformat option
45 -m no fstab change - don't modify /etc/fstab to add the new
47 If using this option, then the value of "mount options"
48 item in the csv file will be passed to mkfs.lustre, else
49 the value will be added into the /etc/fstab.
50 -u upgrade Lustre targets from 1.4 to 1.6
52 csv file a spreadsheet that contains configuration parameters
53 (separated by commas) for each target in a Lustre cluster
63 This script is used to parse each line of a spreadsheet (csv file) and
64 execute remote commands to format (mkfs.lustre) every Lustre target
65 that will be part of the Lustre cluster.
67 It can also optionally:
68 * upgrade Lustre targets from 1.4 to 1.6
69 * verify the network connectivity and hostnames in the cluster
70 * configure Linux MD/LVM devices
71 * modify /etc/modprobe.conf to add Lustre networking info
72 * add the Lustre server info to /etc/fstab
73 * produce configurations for Heartbeat or CluManager.
75 There are 5 kinds of line formats in the csv file. They represent the following
79 hostname,MD,md name,operation mode,options,raid level,component devices
81 hostname hostname of the node in the cluster
82 MD marker of MD device line
83 md name MD device name, e.g. /dev/md0
84 operation mode create or remove, default is create
85 options a "catchall" for other mdadm options, e.g. "-c 128"
86 raid level raid level: 0,1,4,5,6,10,linear and multipath
87 component devices block devices to be combined into the MD device
88 Multiple devices are separated by space or by using
89 shell expansions, e.g. "/dev/sd{a,b,c}"
91 2) Linux LVM PV (Physical Volume)
93 hostname,PV,pv names,operation mode,options
95 hostname hostname of the node in the cluster
97 pv names devices or loopback files to be initialized for later
98 use by LVM or to be wiped the label, e.g. /dev/sda
99 Multiple devices or files are separated by space or by
100 using shell expansions, e.g. "/dev/sd{a,b,c}"
101 operation mode create or remove, default is create
102 options a "catchall" for other pvcreate/pvremove options
105 3) Linux LVM VG (Volume Group)
107 hostname,VG,vg name,operation mode,options,pv paths
109 hostname hostname of the node in the cluster
111 vg name name of the volume group, e.g. ost_vg
112 operation mode create or remove, default is create
113 options a "catchall" for other vgcreate/vgremove options
115 pv paths physical volumes to construct this VG, required by
117 Multiple PVs are separated by space or by using
118 shell expansions, e.g. "/dev/sd[k-m]1"
120 4) Linux LVM LV (Logical Volume)
122 hostname,LV,lv name,operation mode,options,lv size,vg name
124 hostname hostname of the node in the cluster
126 lv name name of the logical volume to be created (optional)
127 or path of the logical volume to be removed (required
129 operation mode create or remove, default is create
130 options a "catchall" for other lvcreate/lvremove options
132 lv size size [kKmMgGtT] to be allocated for the new LV
133 Default unit is megabytes.
134 vg name name of the VG in which the new LV will be created
138 hostname,module_opts,device name,mount point,device type,fsname,mgs nids,index,
139 format options,mkfs options,mount options,failover nids
141 hostname hostname of the node in the cluster, must match "uname -n"
142 module_opts Lustre networking module options
143 device name Lustre target (block device or loopback file)
144 mount point Lustre target mount point
145 device type Lustre target type (mgs, mdt, ost, mgs|mdt, mdt|mgs)
146 fsname Lustre filesystem name, should be limited to 8 characters
148 mgs nids NID(s) of remote mgs node, required for mdt and ost targets
149 If this item is not given for an mdt, it is assumed that
150 the mdt will also be an mgs, according to mkfs.lustre.
151 index Lustre target index
152 format options a "catchall" contains options to be passed to mkfs.lustre
153 "--device-size", "--param", etc. all goes into this item.
154 mkfs options format options to be wrapped with --mkfsoptions="" and
155 passed to mkfs.lustre
156 mount options If this script is invoked with "-m" option, then the value of
157 this item will be wrapped with --mountfsoptions="" and passed
158 to mkfs.lustre, else the value will be added into /etc/fstab.
159 failover nids NID(s) of failover partner node
161 All the NIDs in one node are delimited by commas (','). When multiple nodes are
162 specified, they are delimited by a colon (':').
164 Items left blank will be set to defaults.
166 Example 1 - Simple, with combo MGS/MDT:
167 -------------------------------------------------------------------------------
169 lustre-mgs,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--device-size=10240
172 lustre-ost,options lnet networks=tcp,/tmp/ost0,/mnt/ost0,ost,,lustre-mgs@tcp0,,--device-size=10240
175 lustre-ost,options lnet networks=tcp,/tmp/ost1,/mnt/ost1,ost,,lustre-mgs@tcp0,,--device-size=10240
176 -------------------------------------------------------------------------------
178 Example 2 - Separate MGS/MDT, two networks interfaces:
179 -------------------------------------------------------------------------------
181 lustre-mgs1,options lnet 'networks="tcp,elan"',/dev/sda,/mnt/mgs,mgs,,,,--quiet --param="sys.timeout=50",,"defaults,noauto","lustre-mgs2,2@elan"
184 lustre-mdt1,options lnet 'networks="tcp,elan"',/dev/sdb,/mnt/mdt,mdt,lustre2,"lustre-mgs1,1@elan:lustre-mgs2,2@elan",,--quiet --param="lov.stripesize=4194304",-J size=16,"defaults,noauto",lustre-mdt2
187 lustre-ost1,options lnet 'networks="tcp,elan"',/dev/sdc,/mnt/ost,ost,lustre2,"lustre-mgs1,1@elan:lustre-mgs2,2@elan",,--quiet,-I 512,"defaults,noauto",lustre-ost2
188 -------------------------------------------------------------------------------
190 Example 3 - with combo MGS/MDT failover pair and OST failover pair:
191 -------------------------------------------------------------------------------
193 lustre-mgs1,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--quiet --device-size=10240,,,lustre-mgs2@tcp0
195 # combo mgs/mdt backup (--noformat)
196 lustre-mgs2,options lnet networks=tcp,/tmp/mgs,/mnt/mgs,mgs|mdt,,,,--quiet --device-size=10240 --noformat,,,lustre-mgs1@tcp0
199 lustre-ost1,options lnet networks=tcp,/tmp/ost1,/mnt/ost1,ost,,"lustre-mgs1@tcp0:lustre-mgs2@tcp0",,--quiet --device-size=10240,,,lustre-ost2@tcp0
201 # ost backup (--noformat) (note different device name)
202 lustre-ost2,options lnet networks=tcp,/tmp/ost2,/mnt/ost2,ost,,"lustre-mgs1@tcp0:lustre-mgs2@tcp0",,--quiet --device-size=10240 --noformat,,,lustre-ost1@tcp0
203 -------------------------------------------------------------------------------
205 Example 4 - Configure Linux MD/LVM devices before formatting Lustre targets:
206 -------------------------------------------------------------------------------
207 # MD device on mgsnode
208 mgsnode,MD,/dev/md0,,-q,1,/dev/sda1 /dev/sdb1
210 # MD/LVM devices on ostnode
211 ostnode,MD,/dev/md0,,-q -c 128,5,"/dev/sd{a,b,c}"
212 ostnode,MD,/dev/md1,,-q -c 128,5,"/dev/sd{d,e,f}"
213 ostnode,PV,/dev/md0 /dev/md1
214 ostnode,VG,ost_vg,,-s 32M,/dev/md0 /dev/md1
215 ostnode,LV,ost0,,-i 2 -I 128,300G,ost_vg
216 ostnode,LV,ost1,,-i 2 -I 128,300G,ost_vg
219 mgsnode,options lnet networks=tcp,/dev/md0,/mnt/mgs,mgs|mdt,,,,--quiet
222 ostnode,options lnet networks=tcp,/dev/ost_vg/ost0,/mnt/ost0,ost,,mgsnode,,--quiet
225 ostnode,options lnet networks=tcp,/dev/ost_vg/ost1,/mnt/ost1,ost,,mgsnode,,--quiet
226 -------------------------------------------------------------------------------
232 # Get the library of functions
233 . @scriptlibdir@/lc_common
235 #***************************** Global variables *****************************#
236 declare -a MGS_NODENAME # node names of the MGS servers
237 declare -a MGS_IDX # indexes of MGSs in the global arrays
238 declare -i MGS_NUM # number of MGS servers in the cluster
241 declare -a NODE_NAMES # node names in the failover group
242 declare -a TARGET_OPTS # target services in one failover group
244 # All the items in the csv file
245 declare -a HOST_NAME MODULE_OPTS DEVICE_NAME MOUNT_POINT DEVICE_TYPE FS_NAME
246 declare -a MGS_NIDS INDEX FORMAT_OPTIONS MKFS_OPTIONS MOUNT_OPTIONS FAILOVERS
248 # Heartbeat software requires that node names in the configuration directive
249 # must (normally) match the "uname -n" of that machine. Since the value of the
250 # "failover nids" field in the csv file is the NID(s) of failover partner node,
251 # we have to figure out the corresponding hostname of that node.
252 declare -a FAILOVERS_NAMES
259 # Get and check the positional parameters
260 while getopts "aw:x:t:ndfmuhv" OPTION; do
263 [ -z "${SPECIFIED_NODELIST}" ] && [ -z "${EXCLUDED_NODELIST}" ] \
265 NODELIST_OPT="${NODELIST_OPT} -a"
269 SPECIFIED_NODELIST=$OPTARG
270 NODELIST_OPT="${NODELIST_OPT} -w ${SPECIFIED_NODELIST}"
274 EXCLUDED_NODELIST=$OPTARG
275 NODELIST_OPT="${NODELIST_OPT} -x ${EXCLUDED_NODELIST}"
279 if [ "${HATYPE_OPT}" != "${HBVER_HBV1}" ] \
280 && [ "${HATYPE_OPT}" != "${HBVER_HBV2}" ] \
281 && [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
282 echo >&2 $"`basename $0`: Invalid HA software type" \
294 REFORMAT_OPTION=$"--reformat "
314 # Toss out the parameters we've already processed
315 shift `expr $OPTIND - 1`
317 # Here we expect the csv file
318 if [ $# -eq 0 ]; then
319 echo >&2 $"`basename $0`: Missing csv file!"
323 # Check the items required for OSTs, MDTs and MGS
325 # When formatting an OST, the following items: hostname, module_opts,
326 # device name, device type and mgs nids, cannot have null value.
328 # When formatting an MDT or MGS, the following items: hostname,
329 # module_opts, device name and device type, cannot have null value.
332 if [ $# -eq 0 ]; then
333 echo >&2 $"`basename $0`: check_item() error: Missing argument"\
334 "for function check_item()!"
340 # Check hostname, module_opts, device name and device type
341 if [ -z "${HOST_NAME[i]}" ]||[ -z "${MODULE_OPTS[i]}" ]\
342 ||[ -z "${DEVICE_NAME[i]}" ]||[ -z "${DEVICE_TYPE[i]}" ]; then
343 echo >&2 $"`basename $0`: check_item() error: Some required"\
344 "item has null value! Check hostname, module_opts,"\
345 "device name and device type!"
350 if [ "${DEVICE_TYPE[i]}" = "ost" ]&&[ -z "${MGS_NIDS[i]}" ]; then
351 echo >&2 $"`basename $0`: check_item() error: OST's mgs nids"\
352 "item has null value!"
357 if [ -z "${MOUNT_POINT[i]}" ]; then
358 echo >&2 $"`basename $0`: check_item() error: mount"\
359 "point item of target ${DEVICE_NAME[i]} has null value!"
366 # Get the number of MGS nodes in the cluster
369 MGS_NUM=${#MGS_NODENAME[@]}
370 [ -z "${MGS_NODENAME[0]}" ] && let "INIT_IDX += 1" \
371 && let "MGS_NUM += 1"
374 # is_mgs_node hostname
375 # Verify whether @hostname is a MGS node
381 for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
382 [ "${MGS_NODENAME[i]}" = "${host_name}" ] && return 0
388 # Check whether the MGS nodes are in the same failover group
396 for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
397 mgs_node=${MGS_NODENAME[i]}
398 for ((j = ${INIT_IDX}; j < ${MGS_NUM}; j++)); do
399 [ "${MGS_NODENAME[j]}" = "${mgs_node}" ] && continue 1
402 if [ "${FAILOVERS_NAMES[idx]#*$mgs_node*}" = "${FAILOVERS_NAMES[idx]}" ]
404 echo >&2 $"`basename $0`: check_mgs_group() error:"\
405 "MGS node ${mgs_node} is not in the ${HOST_NAME[idx]}"\
415 # Get and check MGS servers.
416 # There should be no more than one MGS specified in the entire csv file.
420 declare -i exp_idx # Index of explicit MGS servers
421 declare -i imp_idx # Index of implicit MGS servers
422 local is_exp_mgs is_imp_mgs
425 # Initialize the MGS_NODENAME and MGS_IDX arrays
431 for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
435 # Check whether this node is an explicit MGS node
437 if [ "${DEVICE_TYPE[i]#*mgs*}" != "${DEVICE_TYPE[i]}" ]; then
438 verbose_output "Explicit MGS target" \
439 "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
443 if [ "${DEVICE_TYPE[i]}" = "mdt" -a -z "${MGS_NIDS[i]}" ]; then
444 verbose_output "Implicit MGS target" \
445 "${DEVICE_NAME[i]} in host ${HOST_NAME[i]}."
449 # Get and check MGS servers
450 if ${is_exp_mgs} || ${is_imp_mgs}; then
451 # Check whether more than one MGS target in one MGS node
452 if is_mgs_node ${HOST_NAME[i]}; then
453 echo >&2 $"`basename $0`: check_mgs() error:"\
454 "More than one MGS target in the same node -"\
455 "\"${HOST_NAME[i]}\"!"
459 # Get and check primary MGS server and backup MGS server
460 if [ "${FORMAT_OPTIONS[i]}" = "${FORMAT_OPTIONS[i]#*noformat*}" ]
463 if [ -z "${MGS_NODENAME[0]}" ]; then
464 if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
465 || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
466 echo >&2 $"`basename $0`: check_mgs() error:"\
467 "There exist both explicit and implicit MGS"\
468 "targets in the csv file!"
471 MGS_NODENAME[0]=${HOST_NAME[i]}
474 mgs_node=${MGS_NODENAME[0]}
475 if [ "${FAILOVERS_NAMES[i]#*$mgs_node*}" = "${FAILOVERS_NAMES[i]}" ]
477 echo >&2 $"`basename $0`: check_mgs() error:"\
478 "More than one primary MGS nodes in the csv" \
479 "file - ${MGS_NODENAME[0]} and ${HOST_NAME[i]}!"
481 echo >&2 $"`basename $0`: check_mgs() error:"\
482 "MGS nodes ${MGS_NODENAME[0]} and ${HOST_NAME[i]}"\
483 "are failover pair, one of them should use"\
484 "\"--noformat\" in the format options item!"
488 else # Backup MGS server
489 if [ "${is_exp_mgs}" = "true" -a ${imp_idx} -gt 1 ] \
490 || [ "${is_imp_mgs}" = "true" -a ${exp_idx} -gt 1 ]; then
491 echo >&2 $"`basename $0`: check_mgs() error:"\
492 "There exist both explicit and implicit MGS"\
493 "targets in the csv file!"
497 if ${is_exp_mgs}; then # Explicit MGS
498 MGS_NODENAME[exp_idx]=${HOST_NAME[i]}
500 exp_idx=$(( exp_idx + 1 ))
502 MGS_NODENAME[imp_idx]=${HOST_NAME[i]}
504 imp_idx=$(( imp_idx + 1 ))
507 fi #End of "if ${is_exp_mgs} || ${is_imp_mgs}"
510 # Check whether the MGS nodes are in the same failover group
511 if ! check_mgs_group; then
518 # Construct the command line of mkfs.lustre
519 construct_mkfs_cmdline() {
521 if [ $# -eq 0 ]; then
522 echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
523 "Missing argument for function construct_mkfs_cmdline()!"
528 local mgsnids mgsnids_str
529 local failnids failnids_str
531 if $UPGRADE_TARGET; then
532 MKFS_CMD="$TUNEFS --writeconf"
534 MKFS_CMD="$MKFS $REFORMAT_OPTION"
537 case "${DEVICE_TYPE[i]}" in
539 MKFS_CMD="$MKFS_CMD --ost"
542 MKFS_CMD="$MKFS_CMD --mdt"
545 MKFS_CMD="$MKFS_CMD --mgs"
547 "mdt|mgs" | "mgs|mdt")
548 MKFS_CMD="$MKFS_CMD --mgs --mdt"
551 echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
552 "Invalid device type - \"${DEVICE_TYPE[i]}\"!"
557 if [ -n "${FS_NAME[i]}" ]; then
558 MKFS_CMD="$MKFS_CMD --fsname=${FS_NAME[i]}"
561 if [ -n "${MGS_NIDS[i]}" ]; then
562 mgsnids_str=${MGS_NIDS[i]}
563 for mgsnids in ${mgsnids_str//:/ }; do
564 MKFS_CMD="$MKFS_CMD --mgsnode=$mgsnids"
568 if [ -n "${INDEX[i]}" ]; then
569 MKFS_CMD="$MKFS_CMD --index=${INDEX[i]}"
572 if [ -n "${FORMAT_OPTIONS[i]}" ]; then
573 MKFS_CMD="$MKFS_CMD ${FORMAT_OPTIONS[i]}"
576 if ! $UPGRADE_TARGET && [ -n "${MKFS_OPTIONS[i]}" ]; then
577 MKFS_CMD="$MKFS_CMD --mkfsoptions=\"${MKFS_OPTIONS[i]}\""
580 if [ -n "${MOUNT_OPTIONS[i]}" ] && ! $MODIFY_FSTAB; then
581 MKFS_CMD="$MKFS_CMD --mountfsoptions=\"${MOUNT_OPTIONS[i]}\""
584 if [ -n "${FAILOVERS[i]}" ]; then
585 failnids_str=${FAILOVERS[i]}
586 for failnids in ${failnids_str//:/ }; do
587 MKFS_CMD="$MKFS_CMD --failnode=$failnids"
591 MKFS_CMD="$MKFS_CMD ${DEVICE_NAME[i]}"
595 # Get all the node names in this failover group
598 if [ $# -eq 0 ]; then
599 echo >&2 $"`basename $0`: get_nodenames() error: Missing"\
600 "argument for function get_nodenames()!"
608 # Initialize the NODE_NAMES array
611 NODE_NAMES[0]=${HOST_NAME[i]}
614 for nids in ${FAILOVERS_NAMES[i]//:/ }
616 NODE_NAMES[idx]=$(nids2hostname ${nids})
617 if [ ${PIPESTATUS[0]} -ne 0 ]; then
618 echo >&2 "${NODE_NAMES[idx]}"
628 # Verify whether the format line has HA items
632 [ -n "${FAILOVERS[i]}" ] && return 0
637 # Produce HA software's configuration files
645 HOSTNAME_OPT=${HOST_NAME[i]}
647 if ! get_nodenames $i; then
648 echo >&2 $"`basename $0`: gen_ha_config() error: Can not get the"\
649 "failover nodenames from failover nids - \"${FAILOVERS[i]}\" in"\
650 "the \"${HOST_NAME[i]}\" failover group!"
654 for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do
655 HOSTNAME_OPT=${HOSTNAME_OPT}$":"${NODE_NAMES[idx]}
658 # Target devices option
659 DEVICE_OPT=" -d "${TARGET_OPTS[0]}
660 for ((idx = 1; idx < ${#TARGET_OPTS[@]}; idx++)); do
661 DEVICE_OPT=${DEVICE_OPT}" -d "${TARGET_OPTS[idx]}
664 # Construct the generation script command line
665 case "${HATYPE_OPT}" in
666 "${HBVER_HBV1}"|"${HBVER_HBV2}") # Heartbeat
667 cmd_line=${GEN_HB_CONFIG}$" -r ${HATYPE_OPT} -n ${HOSTNAME_OPT}"
668 cmd_line=${cmd_line}${DEVICE_OPT}${VERBOSE_OPT}
670 "${HATYPE_CLUMGR}") # CluManager
671 cmd_line=${GEN_CLUMGR_CONFIG}$" -n ${HOSTNAME_OPT}"
672 cmd_line=${cmd_line}${DEVICE_OPT}${VERBOSE_OPT}
676 # Execute script to generate HA software's configuration files
677 verbose_output "Generating HA software's configurations in"\
678 "${HOST_NAME[i]} failover group..."
679 verbose_output "${cmd_line}"
680 eval $(echo "${cmd_line}")
681 if [ ${PIPESTATUS[0]} -ne 0 ]; then
684 verbose_output "Generate HA software's configurations in"\
685 "${HOST_NAME[i]} failover group OK"
690 # Configure HA software
692 if $UPGRADE_TARGET || [ -z "${HATYPE_OPT}" ]; then
697 declare -i prim_idx # Index for PRIM_HOSTNAMES array
698 declare -i target_idx # Index for TARGET_OPTS and HOST_INDEX arrays
700 declare -a PRIM_HOSTNAMES # Primary hostnames in all the failover
701 # groups in the lustre cluster
702 declare -a HOST_INDEX # Indices for the same node in all the
703 # format lines in the csv file
706 # Initialize the PRIM_HOSTNAMES array
710 # Get failover groups and generate HA configuration files
711 for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
712 prim_host=${HOST_NAME[i]}
714 for ((j = 0; j < ${#PRIM_HOSTNAMES[@]}; j++)); do
715 [ "${prim_host}" = "${PRIM_HOSTNAMES[j]}" ] && continue 2
721 for ((k = 0; k < ${#HOST_NAME[@]}; k++)); do
722 if [ "${prim_host}" = "${HOST_NAME[k]}" ] && is_ha_line "${k}"
724 HOST_INDEX[target_idx]=$k
725 TARGET_OPTS[target_idx]=${DEVICE_NAME[k]}:${MOUNT_POINT[k]}
726 target_idx=$(( target_idx + 1 ))
730 if [ ${#TARGET_OPTS[@]} -ne 0 ]; then
731 PRIM_HOSTNAMES[prim_idx]=${prim_host}
732 prim_idx=$(( prim_idx + 1 ))
734 if ! gen_ha_config ${HOST_INDEX[0]}; then
740 if [ ${#PRIM_HOSTNAMES[@]} -eq 0 ]; then
741 verbose_output "There are no \"failover nids\" items in the"\
742 "csv file. No HA configuration files are generated!"
749 # Get all the items in the csv file and do some checks.
752 if [ $# -eq 0 ]; then
753 echo >&2 $"`basename $0`: get_items() error: Missing argument"\
754 "for function get_items()!"
762 declare -i line_num=0
766 while read -u 9 -r LINE; do
767 line_num=${line_num}+1
768 # verbose_output "Parsing line ${line_num}: $LINE"
770 # Get rid of the empty line
771 if [ -z "`echo ${LINE}|awk '/[[:alnum:]]/ {print $0}'`" ]; then
775 # Get rid of the comment line
776 if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ]
781 # Skip the Linux MD/LVM line
782 marker=$(echo ${LINE} | cut -d, -f 2)
783 if [ "${marker}" = "${MD_MARKER}" -o "${marker}" = "${PV_MARKER}" ] \
784 || [ "${marker}" = "${VG_MARKER}" -o "${marker}" = "${LV_MARKER}" ]; then
788 # Skip the host which is not specified in the host list
789 if ! ${USE_ALLNODES}; then
790 hostname=$(echo ${LINE} | cut -d, -f 1)
791 ! host_in_hostlist ${hostname} ${NODES_TO_USE} && continue
794 # Parse the config line into CONFIG_ITEM
795 if ! parse_line "$LINE"; then
796 echo >&2 $"`basename $0`: parse_line() error: Occurred"\
797 "on line ${line_num} in ${CSV_FILE}: $LINE"
801 HOST_NAME[idx]=${CONFIG_ITEM[0]}
802 MODULE_OPTS[idx]=${CONFIG_ITEM[1]}
803 DEVICE_NAME[idx]=${CONFIG_ITEM[2]}
804 MOUNT_POINT[idx]=${CONFIG_ITEM[3]}
805 DEVICE_TYPE[idx]=${CONFIG_ITEM[4]}
806 FS_NAME[idx]=${CONFIG_ITEM[5]}
807 MGS_NIDS[idx]=${CONFIG_ITEM[6]}
808 INDEX[idx]=${CONFIG_ITEM[7]}
809 FORMAT_OPTIONS[idx]=${CONFIG_ITEM[8]}
810 MKFS_OPTIONS[idx]=${CONFIG_ITEM[9]}
811 MOUNT_OPTIONS[idx]=${CONFIG_ITEM[10]}
812 FAILOVERS[idx]=${CONFIG_ITEM[11]}
814 MODULE_OPTS[idx]=`echo "${MODULE_OPTS[idx]}" | sed 's/"/\\\"/g'`
816 # Convert IP addresses in NIDs to hostnames
817 FAILOVERS_NAMES[idx]=$(ip2hostname_multi_node ${FAILOVERS[idx]})
818 if [ ${PIPESTATUS[0]} -ne 0 ]; then
819 echo >&2 "${FAILOVERS_NAMES[idx]}"
823 # Check some required items for formatting target
824 if ! check_item $idx; then
825 echo >&2 $"`basename $0`: check_item() error:"\
826 "Occurred on line ${line_num} in ${CSV_FILE}."
836 # check_lnet_connect hostname_index mgs_hostname
837 # Check whether the target node can contact the MGS node @mgs_hostname
838 # If @mgs_hostname is null, then it means the primary MGS node
839 check_lnet_connect() {
843 local COMMAND RET_STR
849 # Execute remote command to check that
850 # this node can contact the MGS node
851 verbose_output "Checking lnet connectivity between" \
852 "${HOST_NAME[i]} and the MGS node ${mgs_node}"
853 mgs_prim_nids=`echo ${MGS_NIDS[i]} | awk -F: '{print $1}'`
855 if [ -z "${mgs_node}" -o $MGS_NUM -eq 1 ]; then
856 nids_str=${mgs_prim_nids} # nids of primary MGS node
857 if [ -z "${nids_str}" ]; then
858 echo >&2 $"`basename $0`: check_lnet_connect() error:"\
859 "Check the mgs nids item of host ${HOST_NAME[i]}!"\
860 "Missing nids of the primary MGS node!"
864 # Get the corresponding NID(s) of the MGS node ${mgs_node}
865 # from the "mgs nids" field
866 nids_str=$(get_mgs_nids ${mgs_node} ${MGS_NIDS[i]})
867 if [ ${PIPESTATUS[0]} -ne 0 ]; then
868 echo >&2 "${nids_str}"
874 for mgs_nid in ${nids_str//,/ }
876 COMMAND=$"${LCTL} ping ${mgs_nid} 5 || echo failed 2>&1"
877 RET_STR=$(${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1)
878 if [ ${PIPESTATUS[0]} -eq 0 -a "${RET_STR}" = "${RET_STR#*failed*}" ]
880 # This node can contact the MGS node
881 verbose_output "${HOST_NAME[i]} can contact the MGS" \
882 "node ${mgs_node} by using nid \"${mgs_nid}\"!"
888 if ! ${ping_mgs}; then
889 echo >&2 "`basename $0`: check_lnet_connect() error:" \
890 "${HOST_NAME[i]} cannot contact the MGS node ${mgs_node}"\
891 "with nids - \"${nids_str}\"! Check ${LCTL} command!"
898 # Start lnet network in the cluster node and check that
899 # this node can contact the MGS node
901 if ! ${VERIFY_CONNECT}; then
906 if [ $# -eq 0 ]; then
907 echo >&2 $"`basename $0`: check_lnet() error: Missing"\
908 "argument for function check_lnet()!"
914 local COMMAND RET_STR
916 # Execute remote command to start lnet network
917 verbose_output "Starting lnet network in ${HOST_NAME[i]}"
918 COMMAND="PATH=\$PATH:/sbin:/usr/sbin modprobe lnet; ${LCTL} network up 2>&1"
919 RET_STR=$(${REMOTE} ${HOST_NAME[i]} "${COMMAND}" 2>&1)
920 if [ ${PIPESTATUS[0]} -ne 0 -o "${RET_STR}" = "${RET_STR#*LNET configured*}" ]
922 echo >&2 "`basename $0`: check_lnet() error: remote" \
923 "${HOST_NAME[i]} error: ${RET_STR}"
927 if is_mgs_node ${HOST_NAME[i]}; then
931 # Execute remote command to check that
932 # this node can contact the MGS node
933 for ((j = 0; j < ${MGS_NUM}; j++)); do
934 if ! check_lnet_connect $i ${MGS_NODENAME[j]}; then
942 # Start lnet network in the MGS node
948 if [ -z "${MGS_NODENAME[0]}" -a -z "${MGS_NODENAME[1]}" ]; then
949 if ${USE_ALLNODES}; then
950 verbose_output "There is no MGS target in the ${CSV_FILE} file."
952 verbose_output "There is no MGS target in the node list \"${NODES_TO_USE}\"."
957 for ((i = ${INIT_IDX}; i < ${MGS_NUM}; i++)); do
958 # Execute remote command to add lnet options lines to
959 # the MGS node's modprobe.conf/modules.conf
961 COMMAND=$"echo \"${MODULE_OPTS[${idx}]}\"|${MODULE_CONFIG}"
962 verbose_output "Adding lnet module options to ${MGS_NODENAME[i]}"
963 ${REMOTE} ${MGS_NODENAME[i]} "${COMMAND}" >&2
964 if [ ${PIPESTATUS[0]} -ne 0 ]; then
965 echo >&2 "`basename $0`: start_mgs_lnet() error:"\
966 "Failed to execute remote command to" \
967 "add module options to ${MGS_NODENAME[i]}!"\
968 "Check ${MODULE_CONFIG}!"
972 # Start lnet network in the MGS node
973 if ! check_lnet ${idx}; then
981 # Execute remote command to add lnet options lines to remote nodes'
982 # modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets
985 declare -a REMOTE_PID
986 declare -a REMOTE_CMD
990 if [ ${#HOST_NAME[@]} -eq 0 ]; then
991 verbose_output "There are no lustre targets specified."
995 if ! $UPGRADE_TARGET; then
996 # Start lnet network in the MGS node
997 start_mgs_lnet || return 1
1000 for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
1001 # Construct the command line of mkfs.lustre
1002 if ! construct_mkfs_cmdline $i; then
1006 # create the mount point on the node
1007 COMMAND="mkdir -p ${MOUNT_POINT[i]}"
1008 verbose_output "Creating the mount point ${MOUNT_POINT[i]} on" \
1010 ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
1011 if [ ${PIPESTATUS[0]} -ne 0 ]; then
1012 echo >&2 "`basename $0`: mass_config() error:"\
1013 "Failed to execute remote command to"\
1014 "create the mountpoint on ${HOST_NAME[i]}!"
1018 if ! $UPGRADE_TARGET && ! is_mgs_node ${HOST_NAME[i]}; then
1019 # Execute remote command to add lnet options lines to
1020 # modprobe.conf/modules.conf
1021 COMMAND=$"echo \"${MODULE_OPTS[i]}\"|${MODULE_CONFIG}"
1022 verbose_output "Adding lnet module options to" \
1024 ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
1025 if [ ${PIPESTATUS[0]} -ne 0 ]; then
1026 echo >&2 "`basename $0`: mass_config() error:"\
1027 "Failed to execute remote command to"\
1028 "add module options to ${HOST_NAME[i]}!"
1032 # Check lnet networks
1033 if ! check_lnet $i; then
1038 # Execute remote command to format or upgrade Lustre target
1040 $UPGRADE_TARGET && OP="Upgrading" || OP="Formatting"
1041 verbose_output "$OP Lustre target ${DEVICE_NAME[i]} on ${HOST_NAME[i]}..."
1043 COMMAND="$EXPORT_PATH $MKFS_CMD"
1044 REMOTE_CMD[$pid_num]="$REMOTE ${HOST_NAME[i]} \"$COMMAND\""
1045 verbose_output "$OP command line is: ${REMOTE_CMD[$pid_num]}"
1047 $REMOTE ${HOST_NAME[i]} "$COMMAND" &
1048 REMOTE_PID[$pid_num]=$!
1049 let pid_num=$pid_num+1
1053 # Wait for the exit status of the background remote command
1054 verbose_output "Waiting for the return of the remote command..."
1055 fail_exit_status=false
1056 for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
1057 wait ${REMOTE_PID[${pid_num}]}
1058 if [ ${PIPESTATUS[0]} -ne 0 ]; then
1059 echo >&2 "`basename $0`: mass_config() error: Failed"\
1060 "to execute \"${REMOTE_CMD[${pid_num}]}\"!"
1061 fail_exit_status=true
1065 if ${fail_exit_status}; then
1069 verbose_output "Success on all Lustre targets!"
1073 # get_mntopts hostname device_name failovers
1074 # Construct the mount options of Lustre target @device_name in host @hostname
1077 local device_name=$2
1082 [ -n "${failovers}" ] && mnt_opts=defaults,noauto || mnt_opts=defaults
1084 # Execute remote command to check whether the device
1085 # is a block device or not
1086 ret_str=$(${REMOTE} ${host_name} \
1087 "[ -b ${device_name} ] && echo block || echo loop" 2>&1)
1088 if [ ${PIPESTATUS[0]} -ne 0 -a -n "${ret_str}" ]; then
1089 echo "`basename $0`: get_mntopts() error:" \
1090 "remote command to ${host_name} error: ${ret_str}"
1094 if [ -z "${ret_str}" ]; then
1095 echo "`basename $0`: get_mntopts() error: remote error:" \
1096 "No results from remote!" \
1097 "Check network connectivity between the local host and ${host_name}!"
1101 [ "${ret_str}" != "${ret_str#*loop}" ] && mnt_opts=${mnt_opts},loop
1107 # Execute remote command to modify /etc/fstab to add the new Lustre targets
1110 local mntent mntopts device_name
1113 if ! ${MODIFY_FSTAB}; then
1117 for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
1118 verbose_output "Modify /etc/fstab of host ${HOST_NAME[i]}"\
1119 "to add Lustre target ${DEVICE_NAME[i]}"
1120 mntent=${DEVICE_NAME[i]}"\t\t"${MOUNT_POINT[i]}"\t\t"${FS_TYPE}
1123 if [ -n "${MOUNT_OPTIONS[i]}" ]; then
1124 # The mount options already specified in the csv file.
1125 mntopts=${MOUNT_OPTIONS[i]}
1127 mntopts=$(get_mntopts ${HOST_NAME[i]} ${DEVICE_NAME[i]}\
1129 if [ ${PIPESTATUS[0]} -ne 0 ]; then
1130 echo >&2 "${mntopts}"
1135 mntent=${mntent}"\t"${mntopts}"\t"0" "0
1136 verbose_output "`echo -e ${mntent}`"
1138 # Execute remote command to modify /etc/fstab
1139 device_name=${DEVICE_NAME[i]//\//\\/}
1140 COMMAND=". @scriptlibdir@/lc_common; \
1141 sed -i \"/^${device_name}\t/d\" \$(fcanon /etc/fstab); \
1142 echo -e \"${mntent}\" >> \$(fcanon /etc/fstab)"
1143 ${REMOTE} ${HOST_NAME[i]} "${COMMAND}" >&2
1144 if [ ${PIPESTATUS[0]} -ne 0 ]; then
1145 echo >&2 "`basename $0`: modify_fstab() error:"\
1146 "Failed to modify /etc/fstab of host ${HOST_NAME[i]}"\
1147 "to add Lustre target ${DEVICE_NAME[i]}!"
1156 # Check the csv file
1157 if ! check_file $1; then
1161 # Get the list of nodes to be operated on
1162 NODES_TO_USE=$(get_nodelist)
1163 [ ${PIPESTATUS[0]} -ne 0 ] && echo >&2 "${NODES_TO_USE}" && exit 1
1165 # Check the node list
1166 check_nodelist ${NODES_TO_USE} || exit 1
1168 if ${VERIFY_CONNECT}; then
1169 # Check the network connectivity and hostnames
1170 echo "`basename $0`: Checking the cluster network connectivity"\
1172 if ! ${VERIFY_CLUSTER_NET} ${NODELIST_OPT} ${VERBOSE_OPT} ${CSV_FILE}; then
1175 echo "`basename $0`: Check the cluster network connectivity"\
1180 if $CONFIG_MD_LVM && ! $UPGRADE_TARGET; then
1181 # Configure Linux MD/LVM devices
1182 echo "`basename $0`: Configuring Linux MD/LVM devices..."
1183 if ! ${SCRIPT_CONFIG_MD} ${NODELIST_OPT} ${VERBOSE_OPT} ${CSV_FILE}; then
1187 if ! ${SCRIPT_CONFIG_LVM} ${NODELIST_OPT} ${VERBOSE_OPT} ${CSV_FILE}; then
1190 echo "`basename $0`: Configure Linux MD/LVM devices OK!"
1194 # Configure the Lustre cluster
1195 echo "`basename $0`: ******** Lustre cluster configuration START ********"
1196 if ! get_items ${CSV_FILE}; then
1200 if ! check_mgs; then
1204 if ! mass_config; then
1208 if ! modify_fstab; then
1212 # Produce HA software's configuration files
1213 if ! config_ha; then
1218 echo "`basename $0`: ******** Lustre cluster configuration END **********"