3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = 'portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
96 "undefined" : (1 << 0),
106 "portals" : (1 << 10),
107 "socknal" : (1 << 11),
108 "qswnal" : (1 << 12),
109 "pinger" : (1 << 13),
110 "filter" : (1 << 14),
116 "ptlrouter" : (1 << 20),
126 first_cleanup_error = 0
127 def cleanup_error(rc):
128 global first_cleanup_error
129 if not first_cleanup_error:
130 first_cleanup_error = rc
132 # ============================================================
133 # debugging and error funcs
135 def fixme(msg = "this feature"):
136 raise Lustre.LconfError, msg + ' not implemented yet.'
139 msg = string.join(map(str,args))
140 if not config.noexec:
141 raise Lustre.LconfError(msg)
146 msg = string.join(map(str,args))
151 print string.strip(s)
155 msg = string.join(map(str,args))
158 # ack, python's builtin int() does not support '0x123' syntax.
159 # eval can do it, although what a hack!
163 return eval(s, {}, {})
166 except SyntaxError, e:
167 raise ValueError("not a number")
169 raise ValueError("not a number")
171 # ============================================================
172 # locally defined exceptions
173 class CommandError (exceptions.Exception):
174 def __init__(self, cmd_name, cmd_err, rc=None):
175 self.cmd_name = cmd_name
176 self.cmd_err = cmd_err
181 if type(self.cmd_err) == types.StringType:
183 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
185 print "! %s: %s" % (self.cmd_name, self.cmd_err)
186 elif type(self.cmd_err) == types.ListType:
188 print "! %s (error %d):" % (self.cmd_name, self.rc)
190 print "! %s:" % (self.cmd_name)
191 for s in self.cmd_err:
192 print "> %s" %(string.strip(s))
197 # ============================================================
198 # handle daemons, like the acceptor
200 """ Manage starting and stopping a daemon. Assumes daemon manages
201 it's own pid file. """
203 def __init__(self, cmd):
209 log(self.command, "already running.")
211 self.path = find_prog(self.command)
213 panic(self.command, "not found.")
214 ret, out = runcmd(self.path +' '+ self.command_line())
216 raise CommandError(self.path, out, ret)
220 pid = self.read_pidfile()
223 log ("killing process", pid)
226 log("was unable to find pid of " + self.command)
227 #time.sleep(1) # let daemon die
229 log("unable to kill", self.command, e)
231 log("unable to kill", self.command)
234 pid = self.read_pidfile()
240 log("was unable to find pid of " + self.command)
247 def read_pidfile(self):
249 fp = open(self.pidfile(), 'r')
259 def clean_pidfile(self):
260 """ Remove a stale pidfile """
261 log("removing stale pidfile:", self.pidfile())
263 os.unlink(self.pidfile())
265 log(self.pidfile(), e)
267 class AcceptorHandler(DaemonHandler):
268 def __init__(self, port, net_type):
269 DaemonHandler.__init__(self, "acceptor")
274 return "/var/run/%s-%d.pid" % (self.command, self.port)
276 def command_line(self):
277 return string.join(map(str,(self.flags, self.port)))
281 # start the acceptors
283 if config.lctl_dump or config.record:
285 for port in acceptors.keys():
286 daemon = acceptors[port]
287 if not daemon.running():
290 def run_one_acceptor(port):
291 if config.lctl_dump or config.record:
293 if acceptors.has_key(port):
294 daemon = acceptors[port]
295 if not daemon.running():
298 panic("run_one_acceptor: No acceptor defined for port:", port)
300 def stop_acceptor(port):
301 if acceptors.has_key(port):
302 daemon = acceptors[port]
307 # ============================================================
308 # handle lctl interface
311 Manage communication with lctl
314 def __init__(self, cmd):
316 Initialize close by finding the lctl binary.
318 self.lctl = find_prog(cmd)
320 self.record_device = ''
323 debug('! lctl not found')
326 raise CommandError('lctl', "unable to find lctl binary.")
328 def use_save_file(self, file):
329 self.save_file = file
331 def record(self, dev_name, logname):
332 log("Recording log", logname, "on", dev_name)
333 self.record_device = dev_name
334 self.record_log = logname
336 def end_record(self):
337 log("End recording log", self.record_log, "on", self.record_device)
338 self.record_device = None
339 self.record_log = None
341 def set_nonblock(self, fd):
342 fl = fcntl.fcntl(fd, F_GETFL)
343 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
348 the cmds are written to stdin of lctl
349 lctl doesn't return errors when run in script mode, so
351 should modify command line to accept multiple commands, or
352 create complex command line options
356 cmds = '\n dump ' + self.save_file + '\n' + cmds
357 elif self.record_device:
361 %s""" % (self.record_device, self.record_log, cmds)
363 debug("+", cmd_line, cmds)
364 if config.noexec: return (0, [])
366 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
367 child.tochild.write(cmds + "\n")
368 child.tochild.close()
369 # print "LCTL:", cmds
371 # From "Python Cookbook" from O'Reilly
372 outfile = child.fromchild
373 outfd = outfile.fileno()
374 self.set_nonblock(outfd)
375 errfile = child.childerr
376 errfd = errfile.fileno()
377 self.set_nonblock(errfd)
379 outdata = errdata = ''
382 ready = select.select([outfd,errfd],[],[]) # Wait for input
383 if outfd in ready[0]:
384 outchunk = outfile.read()
385 if outchunk == '': outeof = 1
386 outdata = outdata + outchunk
387 if errfd in ready[0]:
388 errchunk = errfile.read()
389 if errchunk == '': erreof = 1
390 errdata = errdata + errchunk
391 if outeof and erreof: break
392 # end of "borrowed" code
395 if os.WIFEXITED(ret):
396 rc = os.WEXITSTATUS(ret)
399 if rc or len(errdata):
400 raise CommandError(self.lctl, errdata, rc)
403 def runcmd(self, *args):
405 run lctl using the command line
407 cmd = string.join(map(str,args))
408 debug("+", self.lctl, cmd)
409 rc, out = run(self.lctl, cmd)
411 raise CommandError(self.lctl, out, rc)
414 def clear_log(self, dev, log):
415 """ clear an existing log """
420 quit """ % (dev, log)
423 def root_squash(self, name, uid, nid):
427 quit""" % (name, uid, nid)
430 def network(self, net, nid):
435 quit """ % (net, nid)
439 def add_interface(self, net, ip, netmask = ""):
440 """ add an interface """
444 quit """ % (net, ip, netmask)
447 # delete an interface
448 def del_interface(self, net, ip):
449 """ delete an interface """
456 # create a new connection
457 def add_uuid(self, net_type, uuid, nid):
458 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
461 def add_peer(self, net_type, nid, hostaddr, port):
462 if net_type in ('tcp',) and not config.lctl_dump:
467 nid, hostaddr, port )
469 elif net_type in ('openib','iib',) and not config.lctl_dump:
477 def connect(self, srv):
478 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
479 if srv.net_type in ('tcp','openib','iib',) and not config.lctl_dump:
481 hostaddr = string.split(srv.hostaddr[0], '/')[0]
482 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
485 def recover(self, dev_name, new_conn):
488 recover %s""" %(dev_name, new_conn)
491 # add a route to a range
492 def add_route(self, net, gw, lo, hi):
500 except CommandError, e:
504 def del_route(self, net, gw, lo, hi):
509 quit """ % (net, gw, lo, hi)
512 # add a route to a host
513 def add_route_host(self, net, uuid, gw, tgt):
514 self.add_uuid(net, uuid, tgt)
522 except CommandError, e:
526 # add a route to a range
527 def del_route_host(self, net, uuid, gw, tgt):
533 quit """ % (net, gw, tgt)
537 def del_peer(self, net_type, nid, hostaddr):
538 if net_type in ('tcp',) and not config.lctl_dump:
542 del_peer %s %s single_share
546 elif net_type in ('openib','iib',) and not config.lctl_dump:
550 del_peer %s single_share
555 # disconnect one connection
556 def disconnect(self, srv):
557 self.del_uuid(srv.nid_uuid)
558 if srv.net_type in ('tcp','openib','iib',) and not config.lctl_dump:
560 hostaddr = string.split(srv.hostaddr[0], '/')[0]
561 self.del_peer(srv.net_type, srv.nid, hostaddr)
563 def del_uuid(self, uuid):
571 def disconnectAll(self, net):
579 def attach(self, type, name, uuid):
582 quit""" % (type, name, uuid)
585 def setup(self, name, setup = ""):
589 quit""" % (name, setup)
592 def add_conn(self, name, conn_uuid):
596 quit""" % (name, conn_uuid)
600 # create a new device with lctl
601 def newdev(self, type, name, uuid, setup = ""):
602 self.attach(type, name, uuid);
604 self.setup(name, setup)
605 except CommandError, e:
606 self.cleanup(name, uuid, 0)
611 def cleanup(self, name, uuid, force, failover = 0):
612 if failover: force = 1
618 quit""" % (name, ('', 'force')[force],
619 ('', 'failover')[failover])
623 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
624 stripe_sz, stripe_off, pattern, devlist = None):
627 lov_setup %s %d %d %d %s %s
628 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
632 # add an OBD to a LOV
633 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
635 lov_modify_tgts add %s %s %s %s
636 quit""" % (name, obd_uuid, index, gen)
640 def lmv_setup(self, name, uuid, desc_uuid, devlist):
644 quit""" % (name, uuid, desc_uuid, devlist)
647 # delete an OBD from a LOV
648 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
650 lov_modify_tgts del %s %s %s %s
651 quit""" % (name, obd_uuid, index, gen)
655 def deactivate(self, name):
663 def dump(self, dump_file):
666 quit""" % (dump_file)
669 # get list of devices
670 def device_list(self):
671 devices = '/proc/fs/lustre/devices'
673 if os.access(devices, os.R_OK):
675 fp = open(devices, 'r')
683 def lustre_version(self):
684 rc, out = self.runcmd('version')
688 def mount_option(self, profile, osc, mdc):
690 mount_option %s %s %s
691 quit""" % (profile, osc, mdc)
694 # delete mount options
695 def del_mount_option(self, profile):
701 def set_timeout(self, timeout):
707 def set_lustre_upcall(self, upcall):
712 # ============================================================
713 # Various system-level functions
714 # (ideally moved to their own module)
716 # Run a command and return the output and status.
717 # stderr is sent to /dev/null, could use popen3 to
718 # save it if necessary
721 if config.noexec: return (0, [])
722 f = os.popen(cmd + ' 2>&1')
732 cmd = string.join(map(str,args))
735 # Run a command in the background.
736 def run_daemon(*args):
737 cmd = string.join(map(str,args))
739 if config.noexec: return 0
740 f = os.popen(cmd + ' 2>&1')
748 # Determine full path to use for an external command
749 # searches dirname(argv[0]) first, then PATH
751 syspath = string.split(os.environ['PATH'], ':')
752 cmdpath = os.path.dirname(sys.argv[0])
753 syspath.insert(0, cmdpath);
755 syspath.insert(0, os.path.join(config.portals, 'utils/'))
757 prog = os.path.join(d,cmd)
758 if os.access(prog, os.X_OK):
762 # Recursively look for file starting at base dir
763 def do_find_file(base, mod):
764 fullname = os.path.join(base, mod)
765 if os.access(fullname, os.R_OK):
767 for d in os.listdir(base):
768 dir = os.path.join(base,d)
769 if os.path.isdir(dir):
770 module = do_find_file(dir, mod)
774 # is the path a block device?
781 return stat.S_ISBLK(s[stat.ST_MODE])
783 # find the journal device from mkfs options
789 while i < len(x) - 1:
790 if x[i] == '-J' and x[i+1].startswith('device='):
796 # build fs according to type
798 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
804 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
806 # devsize is in 1k, and fs block count is in 4k
807 block_cnt = devsize/4
809 if fstype in ('ext3', 'extN', 'ldiskfs'):
810 # ext3 journal size is in megabytes
811 # but don't set jsize if mkfsoptions indicates a separate journal device
812 if jsize == 0 and jdev(mkfsoptions) == '':
814 if not is_block(dev):
815 ret, out = runcmd("ls -l %s" %dev)
816 devsize = int(string.split(out[0])[4]) / 1024
818 # sfdisk works for symlink, hardlink, and realdev
819 ret, out = runcmd("sfdisk -s %s" %dev)
821 devsize = int(out[0])
823 # sfdisk -s will fail for too large block device,
824 # then, read the size of partition from /proc/partitions
826 # get the realpath of the device
827 # it may be the real device, such as /dev/hda7
828 # or the hardlink created via mknod for a device
829 if 'realpath' in dir(os.path):
830 real_dev = os.path.realpath(dev)
834 while os.path.islink(real_dev) and (link_count < 20):
835 link_count = link_count + 1
836 dev_link = os.readlink(real_dev)
837 if os.path.isabs(dev_link):
840 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
842 panic("Entountered too many symbolic links resolving block device:", dev)
844 # get the major and minor number of the realpath via ls
845 # it seems python(os.stat) does not return
846 # the st_rdev member of the stat structure
847 ret, out = runcmd("ls -l %s" %real_dev)
848 major = string.split(string.split(out[0])[4], ",")[0]
849 minor = string.split(out[0])[5]
851 # get the devsize from /proc/partitions with the major and minor number
852 ret, out = runcmd("cat /proc/partitions")
855 if string.split(line)[0] == major and string.split(line)[1] == minor:
856 devsize = int(string.split(line)[2])
859 if devsize > 1024 * 1024:
860 jsize = ((devsize / 102400) * 4)
863 if jsize: jopt = "-J size=%d" %(jsize,)
864 if isize: iopt = "-I %d" %(isize,)
865 mkfs = 'mkfs.ext2 -j -b 4096 '
866 if not isblock or config.force:
868 if jdev(mkfsoptions) != '':
869 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
871 jmkfs = jmkfs + '-F '
872 jmkfs = jmkfs + jdev(mkfsoptions)
873 (ret, out) = run (jmkfs)
875 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
876 elif fstype == 'reiserfs':
877 # reiserfs journal size is in blocks
878 if jsize: jopt = "--journal_size %d" %(jsize,)
879 mkfs = 'mkreiserfs -ff'
881 panic('unsupported fs type: ', fstype)
883 if config.mkfsoptions != None:
884 mkfs = mkfs + ' ' + config.mkfsoptions
885 if mkfsoptions != None:
886 mkfs = mkfs + ' ' + mkfsoptions
887 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
889 panic("Unable to build fs:", dev, string.join(out))
890 # enable hash tree indexing on fsswe
891 if fstype in ('ext3', 'extN', 'ldiskfs'):
892 htree = 'echo "feature FEATURE_C5" | debugfs -w'
893 (ret, out) = run (htree, dev)
895 panic("Unable to enable htree:", dev)
897 # some systems use /dev/loopN, some /dev/loop/N
901 if not os.access(loop + str(0), os.R_OK):
903 if not os.access(loop + str(0), os.R_OK):
904 panic ("can't access loop devices")
907 # find loop device assigned to the file
908 def find_assigned_loop(file):
910 for n in xrange(0, MAX_LOOP_DEVICES):
912 if os.access(dev, os.R_OK):
913 (stat, out) = run('losetup', dev)
914 if out and stat == 0:
915 m = re.search(r'\((.*)\)', out[0])
916 if m and file == m.group(1):
922 # create file if necessary and assign the first free loop device
923 def init_loop(file, size, fstype, journal_size, inode_size,
924 mkfsoptions, reformat, autoformat, backfstype, backfile):
927 realfstype = backfstype
928 if is_block(backfile):
929 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
930 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
936 dev = find_assigned_loop(realfile)
938 print 'WARNING: file ', realfile, 'already mapped to', dev
941 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
943 panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (realfile, size))
944 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
946 panic("Unable to create backing store:", realfile)
948 mkfs(realfile, size, realfstype, journal_size, inode_size,
949 mkfsoptions, isblock=0)
952 # find next free loop
953 for n in xrange(0, MAX_LOOP_DEVICES):
955 if os.access(dev, os.R_OK):
956 (stat, out) = run('losetup', dev)
958 print "attach " + realfile + " <-> " + dev
959 run('losetup', dev, realfile)
962 print "out of loop devices"
964 print "out of loop devices"
967 # undo loop assignment
968 def clean_loop(dev, fstype, backfstype, backdev):
973 if not is_block(realfile):
974 dev = find_assigned_loop(realfile)
976 print "detach " + dev + " <-> " + realfile
977 ret, out = run('losetup -d', dev)
979 log('unable to clean loop device:', dev, 'for file:', realfile)
982 # finilizes passed device
983 def clean_dev(dev, fstype, backfstype, backdev):
984 if fstype == 'smfs' or not is_block(dev):
985 clean_loop(dev, fstype, backfstype, backdev)
987 # determine if dev is formatted as a <fstype> filesystem
988 def need_format(fstype, dev):
989 # FIXME don't know how to implement this
992 # initialize a block device if needed
993 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
994 inode_size, mkfsoptions, backfstype, backdev):
998 if fstype == 'smfs' or not is_block(dev):
999 dev = init_loop(dev, size, fstype, journal_size, inode_size,
1000 mkfsoptions, reformat, autoformat, backfstype, backdev)
1001 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
1002 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
1005 # panic("device:", dev,
1006 # "not prepared, and autoformat is not set.\n",
1007 # "Rerun with --reformat option to format ALL filesystems")
1012 """lookup IP address for an interface"""
1013 rc, out = run("/sbin/ifconfig", iface)
1016 addr = string.split(out[1])[1]
1017 ip = string.split(addr, ':')[1]
1020 def def_mount_options(fstype, target):
1021 """returns deafult mount options for passed fstype and target (mds, ost)"""
1022 if fstype == 'ext3' or fstype == 'ldiskfs':
1023 mountfsoptions = "errors=remount-ro"
1024 if target == 'ost' and sys_get_branch() == '2.4':
1025 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1026 return mountfsoptions
1029 def sys_get_elan_position_file():
1030 procfiles = ["/proc/elan/device0/position",
1031 "/proc/qsnet/elan4/device0/position",
1032 "/proc/qsnet/elan3/device0/position"]
1034 if os.access(p, os.R_OK):
1038 def sys_get_local_nid(net_type, wildcard, cluster_id):
1039 """Return the local nid."""
1041 if sys_get_elan_position_file():
1042 local = sys_get_local_address('elan', '*', cluster_id)
1044 local = sys_get_local_address(net_type, wildcard, cluster_id)
1047 def sys_get_local_address(net_type, wildcard, cluster_id):
1048 """Return the local address for the network type."""
1050 if net_type in ('tcp','openib','iib',):
1052 iface, star = string.split(wildcard, ':')
1053 local = if2addr(iface)
1055 panic ("unable to determine ip for:", wildcard)
1057 host = socket.gethostname()
1058 local = socket.gethostbyname(host)
1059 elif net_type == 'elan':
1060 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1061 f = sys_get_elan_position_file()
1063 panic ("unable to determine local Elan ID")
1066 lines = fp.readlines()
1070 if a[0] == 'NodeId':
1074 nid = my_int(cluster_id) + my_int(elan_id)
1075 local = "%d" % (nid)
1076 except ValueError, e:
1080 elif net_type == 'gm':
1081 fixme("automatic local address for GM")
1085 def sys_get_branch():
1086 """Returns kernel release"""
1088 fp = open('/proc/sys/kernel/osrelease')
1089 lines = fp.readlines()
1093 version = string.split(l)
1094 a = string.split(version[0], '.')
1095 return a[0] + '.' + a[1]
1100 # XXX: instead of device_list, ask for $name and see what we get
1101 def is_prepared(name):
1102 """Return true if a device exists for the name"""
1103 if config.lctl_dump:
1105 if (config.noexec or config.record) and config.cleanup:
1108 # expect this format:
1109 # 1 UP ldlm ldlm ldlm_UUID 2
1110 out = lctl.device_list()
1112 if name == string.split(s)[3]:
1114 except CommandError, e:
1118 def net_is_prepared():
1119 """If the any device exists, then assume that all networking
1120 has been configured"""
1121 out = lctl.device_list()
1124 def fs_is_mounted(path):
1125 """Return true if path is a mounted lustre filesystem"""
1127 fp = open('/proc/mounts')
1128 lines = fp.readlines()
1132 if a[1] == path and a[2] == 'lustre_lite':
1138 def kmod_find(src_dir, dev_dir, modname):
1139 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1140 for modext in '.ko', '.o':
1141 module = modbase + modext
1143 if os.access(module, os.R_OK):
1149 def kmod_info(modname):
1150 """Returns reference count for passed module name."""
1152 fp = open('/proc/modules')
1153 lines = fp.readlines()
1156 # please forgive my tired fingers for this one
1157 ret = filter(lambda word, mod = modname: word[0] == mod,
1158 map(lambda line: string.split(line), lines))
1162 except Exception, e:
1166 """Presents kernel module"""
1167 def __init__(self, src_dir, dev_dir, name):
1168 self.src_dir = src_dir
1169 self.dev_dir = dev_dir
1174 log ('loading module:', self.name, 'srcdir',
1175 self.src_dir, 'devdir', self.dev_dir)
1177 module = kmod_find(self.src_dir, self.dev_dir,
1180 panic('module not found:', self.name)
1181 (rc, out) = run('/sbin/insmod', module)
1183 raise CommandError('insmod', out, rc)
1185 (rc, out) = run('/sbin/modprobe', self.name)
1187 raise CommandError('modprobe', out, rc)
1191 log('unloading module:', self.name)
1192 (rc, out) = run('/sbin/rmmod', self.name)
1194 log('unable to unload module:', self.name +
1195 "(" + self.refcount() + ")")
1199 """Returns module info if any."""
1200 return kmod_info(self.name)
1203 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1210 """Returns module refcount."""
1217 """Returns 1 if module is used, otherwise 0 is returned."""
1223 if users and users != '(unused)' and users != '-':
1231 """Returns 1 if module is busy, otherwise 0 is returned."""
1232 if self.loaded() and (self.used() or self.refcount() != '0'):
1238 """Manage kernel modules"""
1239 def __init__(self, lustre_dir, portals_dir):
1240 self.lustre_dir = lustre_dir
1241 self.portals_dir = portals_dir
1242 self.kmodule_list = []
1244 def find_module(self, modname):
1245 """Find module by module name"""
1246 for mod in self.kmodule_list:
1247 if mod.name == modname:
1251 def add_portals_module(self, dev_dir, modname):
1252 """Append a module to list of modules to load."""
1254 mod = self.find_module(modname)
1256 mod = kmod(self.portals_dir, dev_dir, modname)
1257 self.kmodule_list.append(mod)
1259 def add_lustre_module(self, dev_dir, modname):
1260 """Append a module to list of modules to load."""
1262 mod = self.find_module(modname)
1264 mod = kmod(self.lustre_dir, dev_dir, modname)
1265 self.kmodule_list.append(mod)
1267 def load_modules(self):
1268 """Load all the modules in the list in the order they appear."""
1269 for mod in self.kmodule_list:
1270 if mod.loaded() and not config.noexec:
1274 def cleanup_modules(self):
1275 """Unload the modules in the list in reverse order."""
1276 rev = self.kmodule_list
1279 if (not mod.loaded() or mod.busy()) and not config.noexec:
1282 if mod.name == 'portals' and config.dump:
1283 lctl.dump(config.dump)
1286 # ============================================================
1287 # Classes to prepare and cleanup the various objects
1290 """ Base class for the rest of the modules. The default cleanup method is
1291 defined here, as well as some utilitiy funcs.
1293 def __init__(self, module_name, db):
1295 self.module_name = module_name
1296 self.name = self.db.getName()
1297 self.uuid = self.db.getUUID()
1301 def info(self, *args):
1302 msg = string.join(map(str,args))
1303 print self.module_name + ":", self.name, self.uuid, msg
1306 """ default cleanup, used for most modules """
1309 lctl.cleanup(self.name, self.uuid, config.force)
1310 except CommandError, e:
1311 log(self.module_name, "cleanup failed: ", self.name)
1315 def add_module(self, manager):
1316 """Adds all needed modules in the order they appear."""
1319 def safe_to_clean(self):
1322 def safe_to_clean_modules(self):
1323 return self.safe_to_clean()
1325 class Network(Module):
1326 def __init__(self,db):
1327 Module.__init__(self, 'NETWORK', db)
1328 self.net_type = self.db.get_val('nettype')
1329 self.nid = self.db.get_val('nid', '*')
1330 self.cluster_id = self.db.get_val('clusterid', "0")
1331 self.port = self.db.get_val_int('port', 0)
1334 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1336 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1337 self.generic_nid = 1
1338 debug("nid:", self.nid)
1340 self.generic_nid = 0
1342 self.nid_uuid = self.nid_to_uuid(self.nid)
1343 self.hostaddr = self.db.get_hostaddr()
1344 if len(self.hostaddr) == 0:
1345 self.hostaddr.append(self.nid)
1346 if '*' in self.hostaddr[0]:
1347 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1348 if not self.hostaddr[0]:
1349 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1350 debug("hostaddr:", self.hostaddr[0])
1352 def add_module(self, manager):
1353 manager.add_portals_module("libcfs", 'libcfs')
1354 manager.add_portals_module("portals", 'portals')
1355 if node_needs_router():
1356 manager.add_portals_module("router", 'kptlrouter')
1357 if self.net_type == 'tcp':
1358 manager.add_portals_module("knals/socknal", 'ksocknal')
1359 if self.net_type == 'elan':
1360 manager.add_portals_module("knals/qswnal", 'kqswnal')
1361 if self.net_type == 'gm':
1362 manager.add_portals_module("knals/gmnal", 'kgmnal')
1363 if self.net_type == 'openib':
1364 manager.add_portals_module("knals/openibnal", 'kopenibnal')
1365 if self.net_type == 'iib':
1366 manager.add_portals_module("knals/iibnal", 'kiibnal')
1368 def nid_to_uuid(self, nid):
1369 return "NID_%s_UUID" %(nid,)
1372 if not config.record and net_is_prepared():
1374 self.info(self.net_type, self.nid, self.port)
1375 if not (config.record and self.generic_nid):
1376 lctl.network(self.net_type, self.nid)
1377 if self.net_type == 'tcp':
1379 for hostaddr in self.db.get_hostaddr():
1380 ip = string.split(hostaddr, '/')[0]
1381 if len(string.split(hostaddr, '/')) == 2:
1382 netmask = string.split(hostaddr, '/')[1]
1385 lctl.add_interface(self.net_type, ip, netmask)
1386 if self.net_type == 'elan':
1388 if self.port and node_is_router():
1389 run_one_acceptor(self.port)
1390 self.connect_peer_gateways()
1392 def connect_peer_gateways(self):
1393 for router in self.db.lookup_class('node'):
1394 if router.get_val_int('router', 0):
1395 for netuuid in router.get_networks():
1396 net = self.db.lookup(netuuid)
1398 if (gw.cluster_id == self.cluster_id and
1399 gw.net_type == self.net_type):
1400 if gw.nid != self.nid:
1403 def disconnect_peer_gateways(self):
1404 for router in self.db.lookup_class('node'):
1405 if router.get_val_int('router', 0):
1406 for netuuid in router.get_networks():
1407 net = self.db.lookup(netuuid)
1409 if (gw.cluster_id == self.cluster_id and
1410 gw.net_type == self.net_type):
1411 if gw.nid != self.nid:
1414 except CommandError, e:
1415 print "disconnect failed: ", self.name
1419 def safe_to_clean(self):
1420 return not net_is_prepared()
1423 self.info(self.net_type, self.nid, self.port)
1425 stop_acceptor(self.port)
1426 if node_is_router():
1427 self.disconnect_peer_gateways()
1428 if self.net_type == 'tcp':
1429 for hostaddr in self.db.get_hostaddr():
1430 ip = string.split(hostaddr, '/')[0]
1431 lctl.del_interface(self.net_type, ip)
1433 def correct_level(self, level, op=None):
1436 class RouteTable(Module):
1437 def __init__(self,db):
1438 Module.__init__(self, 'ROUTES', db)
1440 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1442 # only setup connections for tcp, openib, and iib NALs
1444 if not net_type in ('tcp','openib','iib',):
1447 # connect to target if route is to single node and this node is the gw
1448 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1449 if not local_cluster(net_type, tgt_cluster_id):
1450 panic("target", lo, " not on the local cluster")
1451 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1452 # connect to gateway if this node is not the gw
1453 elif (local_cluster(net_type, gw_cluster_id)
1454 and not local_interface(net_type, gw_cluster_id, gw)):
1455 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1460 panic("no server for nid", lo)
1463 return Network(srvdb)
1466 if not config.record and net_is_prepared():
1469 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1470 lctl.add_route(net_type, gw, lo, hi)
1471 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1475 def safe_to_clean(self):
1476 return not net_is_prepared()
1479 if net_is_prepared():
1480 # the network is still being used, don't clean it up
1482 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1483 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1486 lctl.disconnect(srv)
1487 except CommandError, e:
1488 print "disconnect failed: ", self.name
1493 lctl.del_route(net_type, gw, lo, hi)
1494 except CommandError, e:
1495 print "del_route failed: ", self.name
1499 class Management(Module):
1500 def __init__(self, db):
1501 Module.__init__(self, 'MGMT', db)
1503 def add_module(self, manager):
1504 manager.add_lustre_module('lvfs', 'lvfs')
1505 manager.add_lustre_module('obdclass', 'obdclass')
1506 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1507 manager.add_lustre_module('mgmt', 'mgmt_svc')
1510 if not config.record and is_prepared(self.name):
1513 lctl.newdev("mgmt", self.name, self.uuid)
1515 def safe_to_clean(self):
1519 if is_prepared(self.name):
1520 Module.cleanup(self)
1522 def correct_level(self, level, op=None):
1525 # This is only needed to load the modules; the LDLM device
1526 # is now created automatically.
1528 def __init__(self,db):
1529 Module.__init__(self, 'LDLM', db)
1531 def add_module(self, manager):
1532 manager.add_lustre_module('lvfs', 'lvfs')
1533 manager.add_lustre_module('obdclass', 'obdclass')
1534 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1542 def correct_level(self, level, op=None):
1546 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1547 Module.__init__(self, 'LOV', db)
1548 if name_override != None:
1549 self.name = "lov_%s" % name_override
1550 self.mds_uuid = self.db.get_first_ref('mds')
1551 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1552 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1553 self.pattern = self.db.get_val_int('stripepattern', 0)
1554 self.devlist = self.db.get_lov_tgts('lov_tgt')
1555 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1558 self.desc_uuid = self.uuid
1559 self.uuid = generate_client_uuid(self.name)
1560 self.fs_name = fs_name
1562 self.config_only = 1
1564 self.config_only = None
1565 mds = self.db.lookup(self.mds_uuid)
1566 self.mds_name = mds.getName()
1567 for (obd_uuid, index, gen, active) in self.devlist:
1570 self.obdlist.append(obd_uuid)
1571 obd = self.db.lookup(obd_uuid)
1572 osc = get_osc(obd, self.uuid, fs_name)
1574 self.osclist.append((osc, index, gen, active))
1576 panic('osc not found:', obd_uuid)
1582 if not config.record and is_prepared(self.name):
1584 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1585 self.stripe_off, self.pattern, self.devlist,
1587 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1588 self.stripe_sz, self.stripe_off, self.pattern,
1589 string.join(self.obdlist))
1590 for (osc, index, gen, active) in self.osclist:
1591 target_uuid = osc.target_uuid
1593 # Only ignore connect failures with --force, which
1594 # isn't implemented here yet.
1596 osc.prepare(ignore_connect_failure=0)
1597 except CommandError, e:
1598 print "Error preparing OSC %s\n" % osc.uuid
1600 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1603 for (osc, index, gen, active) in self.osclist:
1604 target_uuid = osc.target_uuid
1606 if is_prepared(self.name):
1607 Module.cleanup(self)
1608 if self.config_only:
1609 panic("Can't clean up config_only LOV ", self.name)
1611 def add_module(self, manager):
1612 if self.config_only:
1613 panic("Can't load modules for config_only LOV ", self.name)
1614 for (osc, index, gen, active) in self.osclist:
1615 osc.add_module(manager)
1617 manager.add_lustre_module('lov', 'lov')
1619 def correct_level(self, level, op=None):
1623 def __init__(self, db, uuid, fs_name, name_override = None):
1624 Module.__init__(self, 'LMV', db)
1625 if name_override != None:
1626 self.name = "lmv_%s" % name_override
1627 self.devlist = self.db.get_refs('mds')
1629 self.desc_uuid = self.uuid
1631 self.fs_name = fs_name
1632 for mds_uuid in self.devlist:
1633 mds = self.db.lookup(mds_uuid)
1635 panic("MDS not found!")
1636 mdc = MDC(mds, self.uuid, fs_name)
1638 self.mdclist.append(mdc)
1640 panic('mdc not found:', mds_uuid)
1643 if is_prepared(self.name):
1645 for mdc in self.mdclist:
1647 # Only ignore connect failures with --force, which
1648 # isn't implemented here yet.
1649 mdc.prepare(ignore_connect_failure=0)
1650 except CommandError, e:
1651 print "Error preparing LMV %s\n" % mdc.uuid
1653 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1654 string.join(self.devlist))
1657 for mdc in self.mdclist:
1659 if is_prepared(self.name):
1660 Module.cleanup(self)
1662 def add_module(self, manager):
1663 for mdc in self.mdclist:
1664 mdc.add_module(manager)
1666 manager.add_lustre_module('lmv', 'lmv')
1668 def correct_level(self, level, op=None):
1671 class MDSDEV(Module):
1672 def __init__(self,db):
1673 Module.__init__(self, 'MDSDEV', db)
1674 self.devpath = self.db.get_val('devpath','')
1675 self.backdevpath = self.db.get_val('backdevpath','')
1676 self.size = self.db.get_val_int('devsize', 0)
1677 self.journal_size = self.db.get_val_int('journalsize', 0)
1678 self.fstype = self.db.get_val('fstype', '')
1679 self.backfstype = self.db.get_val('backfstype', '')
1680 self.nspath = self.db.get_val('nspath', '')
1681 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1682 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1683 self.obdtype = self.db.get_val('obdtype', '')
1684 self.root_squash = self.db.get_val('root_squash', '')
1685 self.no_root_squash = self.db.get_val('no_root_squash', '')
1686 # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
1687 target_uuid = self.db.get_first_ref('target')
1688 self.mds = self.db.lookup(target_uuid)
1689 self.name = self.mds.getName()
1690 self.client_uuids = self.mds.get_refs('client')
1696 self.master_uuid = ""
1699 # it is possible to have MDS with no clients. It is master MDS
1700 # in configuration with CMOBD.
1701 self.lmv_uuid = self.db.get_first_ref('lmv')
1703 self.lmv = self.db.lookup(self.lmv_uuid)
1705 self.client_uuids = self.lmv.get_refs('client')
1706 self.master_uuid = self.lmv_uuid
1708 # FIXME: if fstype not set, then determine based on kernel version
1709 self.format = self.db.get_val('autoformat', "no")
1710 if self.mds.get_val('failover', 0):
1711 self.failover_mds = 'f'
1713 self.failover_mds = 'n'
1714 active_uuid = get_active_target(self.mds)
1716 panic("No target device found:", target_uuid)
1717 if active_uuid == self.uuid:
1721 if self.active and config.group and config.group != self.mds.get_val('group'):
1724 # default inode inode for case when neither LOV either
1725 # LMV is accessible.
1726 self.inode_size = 256
1728 inode_size = self.db.get_val_int('inodesize', 0)
1729 if not inode_size == 0:
1730 self.inode_size = inode_size
1732 # find the LOV for this MDS
1733 lovconfig_uuid = self.mds.get_first_ref('lovconfig')
1734 if lovconfig_uuid or self.lmv:
1736 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1737 lovconfig = self.lmv.lookup(lovconfig_uuid)
1738 lov_uuid = lovconfig.get_first_ref('lov')
1740 panic(self.mds.getName() + ": No LOV found for lovconfig ",
1743 lovconfig = self.mds.lookup(lovconfig_uuid)
1744 lov_uuid = lovconfig.get_first_ref('lov')
1746 panic(self.mds.getName() + ": No LOV found for lovconfig ",
1750 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1751 lovconfig = self.lmv.lookup(lovconfig_uuid)
1752 lov_uuid = lovconfig.get_first_ref('lov')
1754 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, 'FS_name',
1757 # default stripe count controls default inode_size
1758 stripe_count = lov.stripe_cnt
1759 if stripe_count > 77:
1760 self.inode_size = 4096
1761 elif stripe_count > 35:
1762 self.inode_size = 2048
1763 elif stripe_count > 13:
1764 self.inode_size = 1024
1765 elif stripe_count > 3:
1766 self.inode_size = 512
1768 self.inode_size = 256
1770 self.target_dev_uuid = self.uuid
1771 self.uuid = target_uuid
1774 if self.master_uuid:
1775 client_uuid = self.name + "_lmv_" + "UUID"
1776 self.master = LMV(self.db.lookup(self.lmv_uuid), client_uuid,
1777 self.name, self.name)
1778 self.master_uuid = self.master.name
1780 def add_module(self, manager):
1782 manager.add_lustre_module('mdc', 'mdc')
1783 manager.add_lustre_module('osc', 'osc')
1784 manager.add_lustre_module('ost', 'ost')
1785 manager.add_lustre_module('lov', 'lov')
1786 manager.add_lustre_module('mds', 'mds')
1788 if self.fstype == 'smfs':
1789 manager.add_lustre_module('smfs', 'smfs')
1791 if self.fstype == 'ldiskfs':
1792 manager.add_lustre_module('ldiskfs', 'ldiskfs')
1795 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
1797 # if fstype is smfs, then we should also take care about backing
1799 if self.fstype == 'smfs':
1800 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
1802 for option in string.split(self.mountfsoptions, ','):
1803 if option == 'snap':
1804 if not self.fstype == 'smfs':
1805 panic("mountoptions has 'snap', but fstype is not smfs.")
1806 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
1807 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
1810 if self.master_uuid:
1811 self.master.add_module(manager)
1814 if not config.record and is_prepared(self.name):
1817 debug(self.uuid, "not active")
1820 # run write_conf automatically, if --reformat used
1822 self.info(self.devpath, self.fstype, self.size, self.format)
1826 if self.master_uuid:
1827 self.master.prepare()
1829 # never reformat here
1830 blkdev = block_dev(self.devpath, self.size, self.fstype, 0,
1831 self.format, self.journal_size, self.inode_size,
1832 self.mkfsoptions, self.backfstype, self.backdevpath)
1834 if not is_prepared('MDT'):
1835 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
1837 mountfsoptions = def_mount_options(self.fstype, 'mds')
1839 if config.mountfsoptions:
1841 mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
1843 mountfsoptions = config.mountfsoptions
1844 if self.mountfsoptions:
1845 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1847 if self.mountfsoptions:
1849 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1851 mountfsoptions = self.mountfsoptions
1853 if self.fstype == 'smfs':
1854 realdev = self.fstype
1857 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
1861 mountfsoptions = "type=%s,dev=%s" % (self.backfstype,
1866 print 'MDS mount options: ' + mountfsoptions
1868 if not self.master_uuid:
1869 self.master_uuid = 'dumb'
1871 if not self.obdtype:
1872 self.obdtype = 'dumb'
1874 if not self.client_uuids:
1875 lctl.newdev("mds", self.name, self.uuid,
1876 setup ="%s %s %s %s %s %s" %(realdev, self.fstype,
1877 'dumb', mountfsoptions,
1878 self.master_uuid, self.obdtype))
1880 lctl.newdev("mds", self.name, self.uuid,
1881 setup ="%s %s %s %s %s %s" %(realdev, self.fstype,
1882 self.name, mountfsoptions,
1883 self.master_uuid, self.obdtype))
1885 if development_mode():
1886 procentry = "/proc/fs/lustre/mds/grp_hash_upcall"
1887 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/l_getgroups")
1888 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
1889 print "MDS Warning: failed to set group-hash upcall"
1891 run("echo ", upcall, " > ", procentry)
1893 except CommandError, e:
1895 panic("MDS is missing the config log. Need to run " +
1896 "lconf --write_conf.")
1900 if config.root_squash == None:
1901 config.root_squash = self.root_squash
1902 if config.no_root_squash == None:
1903 config.no_root_squash = self.no_root_squash
1904 if config.root_squash:
1905 if config.no_root_squash:
1906 nsnid = config.no_root_squash
1909 lctl.root_squash(self.name, config.root_squash, nsnid)
1911 def write_conf(self):
1912 if not self.client_uuids:
1916 if not is_prepared(self.name):
1917 self.info(self.devpath, self.fstype, self.format)
1919 blkdev = block_dev(self.devpath, self.size, self.fstype,
1920 config.reformat, self.format, self.journal_size,
1921 self.inode_size, self.mkfsoptions,
1922 self.backfstype, self.backdevpath)
1924 # Even for writing logs we mount mds with supplied mount options
1925 # because it will not mount smfs (if used) otherwise.
1927 mountfsoptions = def_mount_options(self.fstype, 'mds')
1929 if config.mountfsoptions:
1931 mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
1933 mountfsoptions = config.mountfsoptions
1934 if self.mountfsoptions:
1935 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1937 if self.mountfsoptions:
1939 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1941 mountfsoptions = self.mountfsoptions
1943 if self.fstype == 'smfs':
1944 realdev = self.fstype
1947 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
1951 mountfsoptions = "type=%s,dev=%s" % (self.backfstype,
1956 print 'MDS mount options: ' + mountfsoptions
1958 if not self.obdtype:
1959 self.obdtype = 'dumb'
1961 # As mount options are passed by 4th param to config tool, we need
1962 # to pass something in 3rd param. But we do not want this 3rd param
1963 # be counted as a profile name for reading log on MDS setup, thus,
1964 # we pass there some predefined sign like 'dumb', which will be
1965 # checked in MDS code and skipped. Probably there is more nice way
1966 # like pass empty string and check it in config tool and pass null
1968 lctl.newdev("mds", self.name, self.uuid,
1969 setup ="%s %s %s %s %s %s" %(realdev, self.fstype,
1970 'dumb', mountfsoptions,
1971 'dumb', self.obdtype))
1974 # record logs for all MDS clients
1975 for obd_uuid in self.client_uuids:
1976 log("recording client:", obd_uuid)
1978 client_uuid = generate_client_uuid(self.name)
1979 client = VOSC(self.db.lookup(obd_uuid), client_uuid,
1980 self.name, self.name)
1982 lctl.clear_log(self.name, self.name)
1983 lctl.record(self.name, self.name)
1985 lctl.mount_option(self.name, client.get_name(), "")
1987 process_updates(self.db, self.name, self.name, client)
1990 lctl.clear_log(self.name, self.name + '-clean')
1991 lctl.record(self.name, self.name + '-clean')
1993 lctl.del_mount_option(self.name)
1995 process_updates(self.db, self.name, self.name + '-clean', client)
1999 # record logs for each client
2005 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
2007 config_options = CONFIG_FILE
2009 for node_db in self.db.lookup_class('node'):
2010 client_name = node_db.getName()
2011 for prof_uuid in node_db.get_refs('profile'):
2012 prof_db = node_db.lookup(prof_uuid)
2013 # refactor this into a funtion to test "clientness"
2015 for ref_class, ref_uuid in prof_db.get_all_refs():
2016 if ref_class in ('mountpoint','echoclient'):
2017 debug("recording", client_name)
2018 old_noexec = config.noexec
2020 ret, out = run (sys.argv[0], noexec_opt,
2021 " -v --record --nomod",
2022 "--record_log", client_name,
2023 "--record_device", self.name,
2024 "--node", client_name,
2027 for s in out: log("record> ", string.strip(s))
2028 ret, out = run (sys.argv[0], noexec_opt,
2029 "--cleanup -v --record --nomod",
2030 "--record_log", client_name + "-clean",
2031 "--record_device", self.name,
2032 "--node", client_name,
2035 for s in out: log("record> ", string.strip(s))
2036 config.noexec = old_noexec
2039 lctl.cleanup(self.name, self.uuid, 0, 0)
2040 except CommandError, e:
2041 log(self.module_name, "cleanup failed: ", self.name)
2044 Module.cleanup(self)
2046 clean_dev(self.devpath, self.fstype, self.backfstype,
2049 def msd_remaining(self):
2050 out = lctl.device_list()
2052 if string.split(s)[2] in ('mds',):
2055 def safe_to_clean(self):
2058 def safe_to_clean_modules(self):
2059 return not self.msd_remaining()
2063 debug(self.uuid, "not active")
2066 if is_prepared(self.name):
2068 lctl.cleanup(self.name, self.uuid, config.force,
2070 except CommandError, e:
2071 log(self.module_name, "cleanup failed: ", self.name)
2074 Module.cleanup(self)
2076 if self.master_uuid:
2077 self.master.cleanup()
2078 if not self.msd_remaining() and is_prepared('MDT'):
2080 lctl.cleanup("MDT", "MDT_UUID", config.force,
2082 except CommandError, e:
2083 print "cleanup failed: ", self.name
2087 clean_dev(self.devpath, self.fstype, self.backfstype,
2090 def correct_level(self, level, op=None):
2091 #if self.master_uuid:
2096 def __init__(self, db):
2097 Module.__init__(self, 'OSD', db)
2098 self.osdtype = self.db.get_val('osdtype')
2099 self.devpath = self.db.get_val('devpath', '')
2100 self.backdevpath = self.db.get_val('backdevpath', '')
2101 self.size = self.db.get_val_int('devsize', 0)
2102 self.journal_size = self.db.get_val_int('journalsize', 0)
2103 self.inode_size = self.db.get_val_int('inodesize', 0)
2104 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2105 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2106 self.fstype = self.db.get_val('fstype', '')
2107 self.backfstype = self.db.get_val('backfstype', '')
2108 self.nspath = self.db.get_val('nspath', '')
2109 target_uuid = self.db.get_first_ref('target')
2110 ost = self.db.lookup(target_uuid)
2111 self.name = ost.getName()
2112 self.format = self.db.get_val('autoformat', 'yes')
2113 if ost.get_val('failover', 0):
2114 self.failover_ost = 'f'
2116 self.failover_ost = 'n'
2118 active_uuid = get_active_target(ost)
2120 panic("No target device found:", target_uuid)
2121 if active_uuid == self.uuid:
2125 if self.active and config.group and config.group != ost.get_val('group'):
2128 self.target_dev_uuid = self.uuid
2129 self.uuid = target_uuid
2131 def add_module(self, manager):
2133 manager.add_lustre_module('ost', 'ost')
2135 if self.fstype == 'smfs':
2136 manager.add_lustre_module('smfs', 'smfs')
2138 if self.fstype == 'ldiskfs':
2139 manager.add_lustre_module('ldiskfs', 'ldiskfs')
2141 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2142 if self.fstype == 'smfs':
2143 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2145 for option in self.mountfsoptions:
2146 if option == 'snap':
2147 if not self.fstype == 'smfs':
2148 panic("mountoptions with snap, but fstype is not smfs\n")
2149 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2150 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2152 manager.add_lustre_module(self.osdtype, self.osdtype)
2154 # need to check /proc/mounts and /etc/mtab before
2155 # formatting anything.
2156 # FIXME: check if device is already formatted.
2158 if is_prepared(self.name):
2161 debug(self.uuid, "not active")
2163 self.info(self.osdtype, self.devpath, self.size, self.fstype,
2164 self.format, self.journal_size, self.inode_size)
2166 if self.osdtype == 'obdecho':
2169 blkdev = block_dev(self.devpath, self.size, self.fstype,
2170 config.reformat, self.format, self.journal_size,
2171 self.inode_size, self.mkfsoptions, self.backfstype,
2174 mountfsoptions = def_mount_options(self.fstype, 'ost')
2176 if config.mountfsoptions:
2178 mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
2180 mountfsoptions = config.mountfsoptions
2181 if self.mountfsoptions:
2182 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
2184 if self.mountfsoptions:
2186 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
2188 mountfsoptions = self.mountfsoptions
2190 if self.fstype == 'smfs':
2191 realdev = self.fstype
2194 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
2198 mountfsoptions = "type=%s,dev=%s" % (self.backfstype,
2203 print 'OSD mount options: ' + mountfsoptions
2205 lctl.newdev(self.osdtype, self.name, self.uuid,
2206 setup ="%s %s %s %s" %(realdev, self.fstype,
2209 if not is_prepared('OSS'):
2210 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
2212 def osd_remaining(self):
2213 out = lctl.device_list()
2215 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2218 def safe_to_clean(self):
2221 def safe_to_clean_modules(self):
2222 return not self.osd_remaining()
2226 debug(self.uuid, "not active")
2228 if is_prepared(self.name):
2231 lctl.cleanup(self.name, self.uuid, config.force,
2233 except CommandError, e:
2234 log(self.module_name, "cleanup failed: ", self.name)
2237 if not self.osd_remaining() and is_prepared('OSS'):
2239 lctl.cleanup("OSS", "OSS_UUID", config.force,
2241 except CommandError, e:
2242 print "cleanup failed: ", self.name
2245 if not self.osdtype == 'obdecho':
2246 clean_dev(self.devpath, self.fstype, self.backfstype,
2249 def correct_level(self, level, op=None):
2252 def mgmt_uuid_for_fs(mtpt_name):
2255 mtpt_db = toplustreDB.lookup_name(mtpt_name)
2256 fs_uuid = mtpt_db.get_first_ref('filesystem')
2257 fs = toplustreDB.lookup(fs_uuid)
2260 return fs.get_first_ref('mgmt')
2262 # Generic client module, used by OSC and MDC
2263 class Client(Module):
2264 def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
2266 self.target_name = tgtdb.getName()
2267 self.target_uuid = tgtdb.getUUID()
2268 self.module_dir = module_dir
2269 self.module = module
2273 self.tgt_dev_uuid = get_active_target(tgtdb)
2274 if not self.tgt_dev_uuid:
2275 panic("No target device found for target(1):", self.target_name)
2280 self.module = module
2281 self.module_name = string.upper(module)
2283 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2284 self.target_name, fs_name)
2286 self.name = self_name
2288 self.lookup_server(self.tgt_dev_uuid)
2289 mgmt_uuid = mgmt_uuid_for_fs(fs_name)
2291 self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid)
2294 self.fs_name = fs_name
2295 if not self.module_dir:
2296 self.module_dir = module
2298 def add_module(self, manager):
2299 manager.add_lustre_module(self.module_dir, self.module)
2301 def lookup_server(self, srv_uuid):
2302 """ Lookup a server's network information """
2303 self._server_nets = get_ost_net(self.db, srv_uuid)
2304 if len(self._server_nets) == 0:
2305 panic ("Unable to find a server for:", srv_uuid)
2310 def get_servers(self):
2311 return self._server_nets
2313 def prepare(self, ignore_connect_failure = 0):
2314 self.info(self.target_uuid)
2315 if not config.record and is_prepared(self.name):
2318 srv = choose_local_server(self.get_servers())
2322 routes = find_route(self.get_servers())
2323 if len(routes) == 0:
2324 panic ("no route to", self.target_uuid)
2325 for (srv, r) in routes:
2326 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2327 except CommandError, e:
2328 if not ignore_connect_failure:
2331 if self.permits_inactive() and (self.target_uuid in config.inactive or self.active == 0):
2332 debug("%s inactive" % self.target_uuid)
2333 inactive_p = "inactive"
2335 debug("%s active" % self.target_uuid)
2337 lctl.newdev(self.module, self.name, self.uuid,
2338 setup ="%s %s %s %s" % (self.target_uuid, srv.nid_uuid,
2339 inactive_p, self.mgmt_name))
2342 if is_prepared(self.name):
2343 Module.cleanup(self)
2345 srv = choose_local_server(self.get_servers())
2347 lctl.disconnect(srv)
2349 for (srv, r) in find_route(self.get_servers()):
2350 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2351 except CommandError, e:
2352 log(self.module_name, "cleanup failed: ", self.name)
2356 def correct_level(self, level, op=None):
2359 def deactivate(self):
2361 lctl.deactivate(self.name)
2362 except CommandError, e:
2363 log(self.module_name, "deactivate failed: ", self.name)
2368 def __init__(self, db, uuid, fs_name):
2369 Client.__init__(self, db, uuid, 'mdc', fs_name)
2371 def permits_inactive(self):
2375 def __init__(self, db, uuid, fs_name):
2376 Client.__init__(self, db, uuid, 'osc', fs_name)
2378 def permits_inactive(self):
2381 def mgmtcli_name_for_uuid(uuid):
2382 return 'MGMTCLI_%s' % uuid
2384 class ManagementClient(Client):
2385 def __init__(self, db, uuid):
2386 Client.__init__(self, db, uuid, 'mgmt_cli', '',
2387 self_name = mgmtcli_name_for_uuid(db.getUUID()),
2388 module_dir = 'mgmt')
2390 class CMOBD(Module):
2391 def __init__(self, db):
2392 Module.__init__(self, 'CMOBD', db)
2393 self.name = self.db.getName();
2394 self.uuid = generate_client_uuid(self.name)
2395 self.master_uuid = self.db.get_first_ref('masterobd')
2396 self.cache_uuid = self.db.get_first_ref('cacheobd')
2398 master_obd = self.db.lookup(self.master_uuid)
2400 panic('master obd not found:', self.master_uuid)
2402 cache_obd = self.db.lookup(self.cache_uuid)
2404 panic('cache obd not found:', self.cache_uuid)
2406 master_class = master_obd.get_class()
2407 cache_class = cache_obd.get_class()
2409 if master_class == 'ost' or master_class == 'lov':
2410 self.master = LOV(master_obd, self.master_uuid, self.name,
2411 "%s_master" % (self.name));
2412 self.cache = LOV(cache_obd, self.cache_uuid, self.name,
2413 "%s_cache" % (self.name));
2414 if master_class == 'mds':
2415 self.master = get_mdc(db, self.name, self.master_uuid)
2416 if cache_class == 'mds':
2417 self.cache = get_mdc(db, self.name, self.cache_uuid)
2419 if master_class == 'lmv':
2420 self.master = LMV(master_obd, self.master_uuid, self.name,
2421 "%s_master" % (self.name));
2422 if cache_class == 'lmv':
2423 self.cache = LMV(cache_obd, self.cache_uuid, self.name,
2424 "%s_cache" % (self.name));
2426 # need to check /proc/mounts and /etc/mtab before
2427 # formatting anything.
2428 # FIXME: check if device is already formatted.
2430 self.master.prepare()
2431 if not config.record and is_prepared(self.name):
2433 self.info(self.master_uuid, self.cache_uuid)
2434 lctl.newdev("cmobd", self.name, self.uuid,
2435 setup ="%s %s" %(self.master_uuid,
2442 def get_master_name(self):
2443 return self.master.name
2444 def get_cache_name(self):
2445 return self.cache.name
2448 if is_prepared(self.name):
2449 Module.cleanup(self)
2450 self.master.cleanup()
2452 def add_module(self, manager):
2453 manager.add_lustre_module('cmobd', 'cmobd')
2454 self.master.add_module(manager)
2456 def correct_level(self, level, op=None):
2460 def __init__(self, db, uuid, name):
2461 Module.__init__(self, 'COBD', db)
2462 self.name = self.db.getName();
2463 self.uuid = generate_client_uuid(self.name)
2464 self.master_uuid = self.db.get_first_ref('masterobd')
2465 self.cache_uuid = self.db.get_first_ref('cacheobd')
2467 master_obd = self.db.lookup(self.master_uuid)
2469 panic('master obd not found:', self.master_uuid)
2471 cache_obd = self.db.lookup(self.cache_uuid)
2473 panic('cache obd not found:', self.cache_uuid)
2475 master_class = master_obd.get_class()
2476 cache_class = cache_obd.get_class()
2478 if master_class == 'ost' or master_class == 'lov':
2479 self.master = LOV(master_obd, self.master_uuid, name,
2480 "%s_master" % (self.name));
2481 self.cache = LOV(cache_obd, self.cache_uuid, name,
2482 "%s_cache" % (self.name));
2483 if master_class == 'mds':
2484 self.master = get_mdc(db, name, self.master_uuid)
2485 if cache_class == 'mds':
2486 self.cache = get_mdc(db, name, self.cache_uuid)
2488 if master_class == 'lmv':
2489 self.master = LMV(master_obd, self.master_uuid, self.name,
2490 "%s_master" % (self.name));
2491 if cache_class == 'lmv':
2492 self.cache = LMV(cache_obd, self.cache_uuid, self.name,
2493 "%s_cache" % (self.name));
2495 # need to check /proc/mounts and /etc/mtab before
2496 # formatting anything.
2497 # FIXME: check if device is already formatted.
2504 def get_master_name(self):
2505 return self.master.name
2507 def get_cache_name(self):
2508 return self.cache.name
2511 self.master.prepare()
2512 self.cache.prepare()
2513 if not config.record and is_prepared(self.name):
2515 self.info(self.master_uuid, self.cache_uuid)
2516 lctl.newdev("cobd", self.name, self.uuid,
2517 setup ="%s %s" %(self.master.name,
2521 if is_prepared(self.name):
2522 Module.cleanup(self)
2523 self.master.cleanup()
2524 self.cache.cleanup()
2526 def add_module(self, manager):
2527 manager.add_lustre_module('cobd', 'cobd')
2528 self.master.add_module(manager)
2530 # virtual interface for OSC and LOV
2532 def __init__(self, db, client_uuid, name, name_override = None):
2533 Module.__init__(self, 'VOSC', db)
2534 if db.get_class() == 'lov':
2535 self.osc = LOV(db, client_uuid, name, name_override)
2537 elif db.get_class() == 'cobd':
2538 self.osc = COBD(db, client_uuid, name)
2541 self.osc = OSC(db, client_uuid, name)
2545 return self.osc.get_uuid()
2548 return self.osc.get_name()
2556 def add_module(self, manager):
2557 self.osc.add_module(manager)
2559 def correct_level(self, level, op=None):
2560 return self.osc.correct_level(level, op)
2562 # virtual interface for MDC and LMV
2564 def __init__(self, db, client_uuid, name, name_override = None):
2565 Module.__init__(self, 'VMDC', db)
2566 if db.get_class() == 'lmv':
2567 self.mdc = LMV(db, client_uuid, name)
2568 elif db.get_class() == 'cobd':
2569 self.mdc = COBD(db, client_uuid, name)
2571 self.mdc = MDC(db, client_uuid, name)
2574 return self.mdc.uuid
2577 return self.mdc.name
2585 def add_module(self, manager):
2586 self.mdc.add_module(manager)
2588 def correct_level(self, level, op=None):
2589 return self.mdc.correct_level(level, op)
2591 class ECHO_CLIENT(Module):
2592 def __init__(self,db):
2593 Module.__init__(self, 'ECHO_CLIENT', db)
2594 self.obd_uuid = self.db.get_first_ref('obd')
2595 obd = self.db.lookup(self.obd_uuid)
2596 self.uuid = generate_client_uuid(self.name)
2597 self.osc = VOSC(obd, self.uuid, self.name)
2600 if not config.record and is_prepared(self.name):
2603 self.osc.prepare() # XXX This is so cheating. -p
2604 self.info(self.obd_uuid)
2606 lctl.newdev("echo_client", self.name, self.uuid,
2607 setup = self.osc.get_name())
2610 if is_prepared(self.name):
2611 Module.cleanup(self)
2614 def add_module(self, manager):
2615 self.osc.add_module(manager)
2616 manager.add_lustre_module('obdecho', 'obdecho')
2618 def correct_level(self, level, op=None):
2621 def generate_client_uuid(name):
2622 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2624 int(random.random() * 1048576),
2625 int(random.random() * 1048576))
2626 return client_uuid[:36]
2628 class Mountpoint(Module):
2629 def __init__(self,db):
2630 Module.__init__(self, 'MTPT', db)
2631 self.path = self.db.get_val('path')
2632 self.clientoptions = self.db.get_val('clientoptions', '')
2633 self.fs_uuid = self.db.get_first_ref('filesystem')
2634 fs = self.db.lookup(self.fs_uuid)
2635 self.mds_uuid = fs.get_first_ref('lmv')
2636 if not self.mds_uuid:
2637 self.mds_uuid = fs.get_first_ref('mds')
2638 self.obd_uuid = fs.get_first_ref('obd')
2639 self.mgmt_uuid = fs.get_first_ref('mgmt')
2640 client_uuid = generate_client_uuid(self.name)
2642 ost = self.db.lookup(self.obd_uuid)
2644 panic("no ost: ", self.obd_uuid)
2646 mds = self.db.lookup(self.mds_uuid)
2648 panic("no mds: ", self.mds_uuid)
2650 self.vosc = VOSC(ost, client_uuid, self.name)
2651 self.vmdc = VMDC(mds, client_uuid, self.name)
2654 self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid),
2660 if not config.record and fs_is_mounted(self.path):
2661 log(self.path, "already mounted.")
2665 self.mgmtcli.prepare()
2668 vmdc_name = self.vmdc.get_name()
2670 self.info(self.path, self.mds_uuid, self.obd_uuid)
2671 if config.record or config.lctl_dump:
2672 lctl.mount_option(local_node_name, self.vosc.get_name(), vmdc_name)
2675 if config.clientoptions:
2676 if self.clientoptions:
2677 self.clientoptions = self.clientoptions + ',' + \
2678 config.clientoptions
2680 self.clientoptions = config.clientoptions
2681 if self.clientoptions:
2682 self.clientoptions = ',' + self.clientoptions
2683 # Linux kernel will deal with async and not pass it to ll_fill_super,
2684 # so replace it with Lustre async
2685 self.clientoptions = string.replace(self.clientoptions, "async",
2688 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s%s %s %s" % \
2689 (self.vosc.get_name(), vmdc_name, self.clientoptions,
2690 config.config, self.path)
2691 run("mkdir", self.path)
2696 panic("mount failed:", self.path, ":", string.join(val))
2699 self.info(self.path, self.mds_uuid,self.obd_uuid)
2701 if config.record or config.lctl_dump:
2702 lctl.del_mount_option(local_node_name)
2704 if fs_is_mounted(self.path):
2706 (rc, out) = run("umount", "-f", self.path)
2708 (rc, out) = run("umount", self.path)
2710 raise CommandError('umount', out, rc)
2712 if fs_is_mounted(self.path):
2713 panic("fs is still mounted:", self.path)
2718 self.mgmtcli.cleanup()
2720 def add_module(self, manager):
2721 manager.add_lustre_module('mdc', 'mdc')
2724 self.mgmtcli.add_module(manager)
2726 self.vosc.add_module(manager)
2727 self.vmdc.add_module(manager)
2729 manager.add_lustre_module('llite', 'llite')
2731 def correct_level(self, level, op=None):
2734 # ============================================================
2735 # misc query functions
2737 def get_ost_net(self, osd_uuid):
2741 osd = self.lookup(osd_uuid)
2742 node_uuid = osd.get_first_ref('node')
2743 node = self.lookup(node_uuid)
2745 panic("unable to find node for osd_uuid:", osd_uuid,
2746 " node_ref:", node_uuid_)
2747 for net_uuid in node.get_networks():
2748 db = node.lookup(net_uuid)
2749 srv_list.append(Network(db))
2753 # the order of iniitailization is based on level.
2754 def getServiceLevel(self):
2755 type = self.get_class()
2757 if type in ('network',):
2759 elif type in ('routetbl',):
2761 elif type in ('ldlm',):
2763 elif type in ('osd', 'cobd'):
2765 elif type in ('mdsdev',):
2767 elif type in ('lmv',):
2769 elif type in ('cmobd',):
2771 elif type in ('mountpoint', 'echoclient'):
2774 panic("Unknown type: ", type)
2776 if ret < config.minlevel or ret > config.maxlevel:
2781 # return list of services in a profile. list is a list of tuples
2782 # [(level, db_object),]
2783 def getServices(self):
2785 for ref_class, ref_uuid in self.get_all_refs():
2786 servdb = self.lookup(ref_uuid)
2788 level = getServiceLevel(servdb)
2790 list.append((level, servdb))
2792 panic('service not found: ' + ref_uuid)
2798 ############################################################
2800 # FIXME: clean this mess up!
2802 # OSC is no longer in the xml, so we have to fake it.
2803 # this is getting ugly and begging for another refactoring
2804 def get_osc(ost_db, uuid, fs_name):
2805 osc = OSC(ost_db, uuid, fs_name)
2808 def get_mdc(db, fs_name, mds_uuid):
2809 mds_db = db.lookup(mds_uuid);
2811 error("no mds:", mds_uuid)
2812 mdc = MDC(mds_db, mds_uuid, fs_name)
2815 ############################################################
2816 # routing ("rooting")
2818 # list of (nettype, cluster_id, nid)
2821 def find_local_clusters(node_db):
2822 global local_clusters
2823 for netuuid in node_db.get_networks():
2824 net = node_db.lookup(netuuid)
2826 debug("add_local", netuuid)
2827 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2829 if acceptors.has_key(srv.port):
2830 panic("duplicate port:", srv.port)
2831 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
2833 # This node is a gateway.
2835 def node_is_router():
2838 # If there are any routers found in the config, then this will be true
2839 # and all nodes will load kptlrouter.
2841 def node_needs_router():
2842 return needs_router or is_router
2844 # list of (nettype, gw, tgt_cluster_id, lo, hi)
2845 # Currently, these local routes are only added to kptlrouter route
2846 # table if they are needed to connect to a specific server. This
2847 # should be changed so all available routes are loaded, and the
2848 # ptlrouter can make all the decisions.
2851 def find_local_routes(lustre):
2852 """ Scan the lustre config looking for routers . Build list of
2854 global local_routes, needs_router
2856 list = lustre.lookup_class('node')
2858 if router.get_val_int('router', 0):
2860 for (local_type, local_cluster_id, local_nid) in local_clusters:
2862 for netuuid in router.get_networks():
2863 db = router.lookup(netuuid)
2864 if (local_type == db.get_val('nettype') and
2865 local_cluster_id == db.get_val('clusterid')):
2866 gw = db.get_val('nid')
2869 debug("find_local_routes: gw is", gw)
2870 for route in router.get_local_routes(local_type, gw):
2871 local_routes.append(route)
2872 debug("find_local_routes:", local_routes)
2875 def choose_local_server(srv_list):
2876 for srv in srv_list:
2877 if local_cluster(srv.net_type, srv.cluster_id):
2880 def local_cluster(net_type, cluster_id):
2881 for cluster in local_clusters:
2882 if net_type == cluster[0] and cluster_id == cluster[1]:
2886 def local_interface(net_type, cluster_id, nid):
2887 for cluster in local_clusters:
2888 if (net_type == cluster[0] and cluster_id == cluster[1]
2889 and nid == cluster[2]):
2893 def find_route(srv_list):
2895 frm_type = local_clusters[0][0]
2896 for srv in srv_list:
2897 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
2898 to_type = srv.net_type
2900 cluster_id = srv.cluster_id
2901 debug ('looking for route to', to_type, to)
2902 for r in local_routes:
2903 debug("find_route: ", r)
2904 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
2905 result.append((srv, r))
2908 def get_active_target(db):
2909 target_uuid = db.getUUID()
2910 target_name = db.getName()
2911 node_name = get_select(target_name)
2913 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
2915 tgt_dev_uuid = db.get_first_ref('active')
2918 def get_server_by_nid_uuid(db, nid_uuid):
2919 for n in db.lookup_class("network"):
2921 if net.nid_uuid == nid_uuid:
2925 ############################################################
2929 type = db.get_class()
2930 debug('Service:', type, db.getName(), db.getUUID())
2935 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2936 elif type == 'network':
2938 elif type == 'routetbl':
2942 elif type == 'cobd':
2943 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2944 elif type == 'cmobd':
2946 elif type == 'mdsdev':
2948 elif type == 'mountpoint':
2950 elif type == 'echoclient':
2955 panic ("unknown service type:", type)
2959 # Prepare the system to run lustre using a particular profile
2960 # in a the configuration.
2961 # * load & the modules
2962 # * setup networking for the current node
2963 # * make sure partitions are in place and prepared
2964 # * initialize devices with lctl
2965 # Levels is important, and needs to be enforced.
2966 def for_each_profile(db, prof_list, operation):
2967 for prof_uuid in prof_list:
2968 prof_db = db.lookup(prof_uuid)
2970 panic("profile:", prof_uuid, "not found.")
2971 services = getServices(prof_db)
2974 def magic_get_osc(db, rec, lov):
2976 lov_uuid = lov.get_uuid()
2977 lov_name = lov.osc.fs_name
2979 lov_uuid = rec.getAttribute('lov_uuidref')
2980 # FIXME: better way to find the mountpoint?
2981 filesystems = db.root_node.getElementsByTagName('filesystem')
2983 for fs in filesystems:
2984 ref = fs.getElementsByTagName('obd_ref')
2985 if ref[0].getAttribute('uuidref') == lov_uuid:
2986 fsuuid = fs.getAttribute('uuid')
2990 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
2992 mtpts = db.root_node.getElementsByTagName('mountpoint')
2995 ref = fs.getElementsByTagName('filesystem_ref')
2996 if ref[0].getAttribute('uuidref') == fsuuid:
2997 lov_name = fs.getAttribute('name')
3001 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
3003 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3005 ost_uuid = rec.getAttribute('ost_uuidref')
3006 obd = db.lookup(ost_uuid)
3009 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
3011 osc = get_osc(obd, lov_uuid, lov_name)
3013 panic('osc not found:', obd_uuid)
3016 # write logs for update records. sadly, logs of all types -- and updates in
3017 # particular -- are something of an afterthought. lconf needs rewritten with
3018 # these as core concepts. so this is a pretty big hack.
3019 def process_update_record(db, update, lov):
3020 for rec in update.childNodes:
3021 if rec.nodeType != rec.ELEMENT_NODE:
3024 log("found "+rec.nodeName+" record in update version " +
3025 str(update.getAttribute('version')))
3027 lov_uuid = rec.getAttribute('lov_uuidref')
3028 ost_uuid = rec.getAttribute('ost_uuidref')
3029 index = rec.getAttribute('index')
3030 gen = rec.getAttribute('generation')
3032 if not lov_uuid or not ost_uuid or not index or not gen:
3033 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
3036 tmplov = db.lookup(lov_uuid)
3038 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
3039 lov_name = tmplov.getName()
3041 lov_name = lov.osc.name
3043 # ------------------------------------------------------------- add
3044 if rec.nodeName == 'add':
3046 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3049 osc = magic_get_osc(db, rec, lov)
3052 # Only ignore connect failures with --force, which
3053 # isn't implemented here yet.
3054 osc.prepare(ignore_connect_failure=0)
3055 except CommandError, e:
3056 print "Error preparing OSC %s\n" % osc.uuid
3059 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3061 # ------------------------------------------------------ deactivate
3062 elif rec.nodeName == 'deactivate':
3066 osc = magic_get_osc(db, rec, lov)
3070 except CommandError, e:
3071 print "Error deactivating OSC %s\n" % osc.uuid
3074 # ---------------------------------------------------------- delete
3075 elif rec.nodeName == 'delete':
3079 osc = magic_get_osc(db, rec, lov)
3085 except CommandError, e:
3086 print "Error cleaning up OSC %s\n" % osc.uuid
3089 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3091 def process_updates(db, log_device, log_name, lov = None):
3092 updates = db.root_node.getElementsByTagName('update')
3094 if not u.childNodes:
3095 log("ignoring empty update record (version " +
3096 str(u.getAttribute('version')) + ")")
3099 version = u.getAttribute('version')
3100 real_name = "%s-%s" % (log_name, version)
3101 lctl.clear_log(log_device, real_name)
3102 lctl.record(log_device, real_name)
3104 process_update_record(db, u, lov)
3108 def doWriteconf(services):
3112 if s[1].get_class() == 'mdsdev':
3113 n = newService(s[1])
3116 def doSetup(services):
3121 n = newService(s[1])
3123 slist.append((n.level, n))
3126 nl = n[1].correct_level(n[0])
3127 nlist.append((nl, n[1]))
3132 def doLoadModules(services):
3136 # adding all needed modules from all services
3138 n = newService(s[1])
3139 n.add_module(mod_manager)
3141 # loading all registered modules
3142 mod_manager.load_modules()
3144 def doUnloadModules(services):
3148 # adding all needed modules from all services
3150 n = newService(s[1])
3151 if n.safe_to_clean_modules():
3152 n.add_module(mod_manager)
3154 # unloading all registered modules
3155 mod_manager.cleanup_modules()
3157 def doCleanup(services):
3163 n = newService(s[1])
3165 slist.append((n.level, n))
3168 nl = n[1].correct_level(n[0])
3169 nlist.append((nl, n[1]))
3174 if n[1].safe_to_clean():
3179 def doHost(lustreDB, hosts):
3180 global is_router, local_node_name
3183 node_db = lustreDB.lookup_name(h, 'node')
3187 panic('No host entry found.')
3189 local_node_name = node_db.get_val('name', 0)
3190 is_router = node_db.get_val_int('router', 0)
3191 lustre_upcall = node_db.get_val('lustreUpcall', '')
3192 portals_upcall = node_db.get_val('portalsUpcall', '')
3193 timeout = node_db.get_val_int('timeout', 0)
3194 ptldebug = node_db.get_val('ptldebug', '')
3195 subsystem = node_db.get_val('subsystem', '')
3197 find_local_clusters(node_db)
3199 find_local_routes(lustreDB)
3201 # Two step process: (1) load modules, (2) setup lustre
3202 # if not cleaning, load modules first.
3203 prof_list = node_db.get_refs('profile')
3205 if config.write_conf:
3206 for_each_profile(node_db, prof_list, doLoadModules)
3208 for_each_profile(node_db, prof_list, doWriteconf)
3209 for_each_profile(node_db, prof_list, doUnloadModules)
3212 elif config.recover:
3213 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3214 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3215 "--client_uuid <UUID> --conn_uuid <UUID>")
3216 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3218 elif config.cleanup:
3220 # the command line can override this value
3222 # ugly hack, only need to run lctl commands for --dump
3223 if config.lctl_dump or config.record:
3224 for_each_profile(node_db, prof_list, doCleanup)
3227 sys_set_timeout(timeout)
3228 sys_set_ptldebug(ptldebug)
3229 sys_set_subsystem(subsystem)
3230 sys_set_lustre_upcall(lustre_upcall)
3231 sys_set_portals_upcall(portals_upcall)
3233 for_each_profile(node_db, prof_list, doCleanup)
3234 for_each_profile(node_db, prof_list, doUnloadModules)
3238 # ugly hack, only need to run lctl commands for --dump
3239 if config.lctl_dump or config.record:
3240 sys_set_timeout(timeout)
3241 sys_set_lustre_upcall(lustre_upcall)
3242 for_each_profile(node_db, prof_list, doSetup)
3246 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3247 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3249 for_each_profile(node_db, prof_list, doLoadModules)
3251 sys_set_debug_path()
3252 sys_set_ptldebug(ptldebug)
3253 sys_set_subsystem(subsystem)
3254 script = config.gdb_script
3255 run(lctl.lctl, ' modules >', script)
3257 log ("The GDB module script is in", script)
3258 # pause, so user has time to break and
3261 sys_set_timeout(timeout)
3262 sys_set_lustre_upcall(lustre_upcall)
3263 sys_set_portals_upcall(portals_upcall)
3265 for_each_profile(node_db, prof_list, doSetup)
3268 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3269 tgt = lustreDB.lookup(tgt_uuid)
3271 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3272 new_uuid = get_active_target(tgt)
3274 raise Lustre.LconfError("doRecovery: no active target found for: " +
3276 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3278 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3280 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3282 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3285 lctl.disconnect(oldnet)
3286 except CommandError, e:
3287 log("recover: disconnect", nid_uuid, "failed: ")
3292 except CommandError, e:
3293 log("recover: connect failed")
3296 lctl.recover(client_uuid, net.nid_uuid)
3299 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3300 base = os.path.dirname(cmd)
3301 if development_mode():
3302 if not config.lustre:
3303 debug('using objdir module paths')
3304 config.lustre = (os.path.join(base, ".."))
3305 # normalize the portals dir, using command line arg if set
3307 portals_dir = config.portals
3308 dir = os.path.join(config.lustre, portals_dir)
3309 config.portals = dir
3310 debug('config.portals', config.portals)
3311 elif config.lustre and config.portals:
3313 # if --lustre and --portals, normalize portals
3314 # can ignore POTRALS_DIR here, since it is probly useless here
3315 config.portals = os.path.join(config.lustre, config.portals)
3316 debug('config.portals B', config.portals)
3318 def sysctl(path, val):
3319 debug("+ sysctl", path, val)
3323 fp = open(os.path.join('/proc/sys', path), 'w')
3330 def sys_set_debug_path():
3331 sysctl('portals/debug_path', config.debug_path)
3333 def sys_set_lustre_upcall(upcall):
3334 # the command overrides the value in the node config
3335 if config.lustre_upcall:
3336 upcall = config.lustre_upcall
3338 upcall = config.upcall
3340 lctl.set_lustre_upcall(upcall)
3342 def sys_set_portals_upcall(upcall):
3343 # the command overrides the value in the node config
3344 if config.portals_upcall:
3345 upcall = config.portals_upcall
3347 upcall = config.upcall
3349 sysctl('portals/upcall', upcall)
3351 def sys_set_timeout(timeout):
3352 # the command overrides the value in the node config
3353 if config.timeout and config.timeout > 0:
3354 timeout = config.timeout
3355 if timeout != None and timeout > 0:
3356 lctl.set_timeout(timeout)
3358 def sys_tweak_socknal ():
3359 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3360 if sys_get_branch() == '2.6':
3361 fp = open('/proc/meminfo')
3362 lines = fp.readlines()
3367 if a[0] == 'MemTotal:':
3369 debug("memtotal" + memtotal)
3370 if int(memtotal) < 262144:
3371 minfree = int(memtotal) / 16
3374 debug("+ minfree ", minfree)
3375 sysctl("vm/min_free_kbytes", minfree)
3376 if config.single_socket:
3377 sysctl("socknal/typed", 0)
3379 def sys_optimize_elan ():
3380 procfiles = ["/proc/elan/config/eventint_punt_loops",
3381 "/proc/qsnet/elan3/config/eventint_punt_loops",
3382 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3384 if os.access(p, os.W_OK):
3385 run ("echo 1 > " + p)
3387 def sys_set_ptldebug(ptldebug):
3389 ptldebug = config.ptldebug
3392 val = eval(ptldebug, ptldebug_names)
3393 val = "0x%x" % (val)
3394 sysctl('portals/debug', val)
3395 except NameError, e:
3398 def sys_set_subsystem(subsystem):
3399 if config.subsystem:
3400 subsystem = config.subsystem
3403 val = eval(subsystem, subsystem_names)
3404 val = "0x%x" % (val)
3405 sysctl('portals/subsystem_debug', val)
3406 except NameError, e:
3409 def sys_set_netmem_max(path, max):
3410 debug("setting", path, "to at least", max)
3418 fp = open(path, 'w')
3419 fp.write('%d\n' %(max))
3423 def sys_make_devices():
3424 if not os.access('/dev/portals', os.R_OK):
3425 run('mknod /dev/portals c 10 240')
3426 if not os.access('/dev/obd', os.R_OK):
3427 run('mknod /dev/obd c 10 241')
3430 # Add dir to the global PATH, if not already there.
3431 def add_to_path(new_dir):
3432 syspath = string.split(os.environ['PATH'], ':')
3433 if new_dir in syspath:
3435 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3437 def default_debug_path():
3438 path = '/tmp/lustre-log'
3439 if os.path.isdir('/r'):
3444 def default_gdb_script():
3445 script = '/tmp/ogdb'
3446 if os.path.isdir('/r'):
3447 return '/r' + script
3452 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3453 # ensure basic elements are in the system path
3454 def sanitise_path():
3455 for dir in DEFAULT_PATH:
3458 # global hack for the --select handling
3460 def init_select(args):
3461 # args = [service=nodeA,service2=nodeB service3=nodeC]
3464 list = string.split(arg, ',')
3466 srv, node = string.split(entry, '=')
3467 tgt_select[srv] = node
3469 def get_select(srv):
3470 if tgt_select.has_key(srv):
3471 return tgt_select[srv]
3475 FLAG = Lustre.Options.FLAG
3476 PARAM = Lustre.Options.PARAM
3477 INTPARAM = Lustre.Options.INTPARAM
3478 PARAMLIST = Lustre.Options.PARAMLIST
3480 ('verbose,v', "Print system commands as they are run"),
3481 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3482 ('config', "Cluster config name used for LDAP query", PARAM),
3483 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3484 ('node', "Load config for <nodename>", PARAM),
3485 ('cleanup,d', "Cleans up config. (Shutdown)"),
3486 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3488 ('single_socket', "socknal option: only use one socket instead of bundle",
3490 ('failover',"""Used to shut down without saving state.
3491 This will allow this node to "give up" a service to a
3492 another node for failover purposes. This will not
3493 be a clean shutdown.""",
3495 ('gdb', """Prints message after creating gdb module script
3496 and sleeps for 5 seconds."""),
3497 ('noexec,n', """Prints the commands and steps that will be run for a
3498 config without executing them. This can used to check if a
3499 config file is doing what it should be doing"""),
3500 ('nomod', "Skip load/unload module step."),
3501 ('nosetup', "Skip device setup/cleanup step."),
3502 ('reformat', "Reformat all devices (without question)"),
3503 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3504 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3505 ('clientoptions', "Additional options for Lustre", PARAM),
3506 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3508 ('write_conf', "Save all the client config information on mds."),
3509 ('record', "Write config information on mds."),
3510 ('record_log', "Name of config record log.", PARAM),
3511 ('record_device', "MDS device name that will record the config commands",
3513 ('root_squash', "MDS squash root to appointed uid",
3515 ('no_root_squash', "Don't squash root for appointed nid",
3517 ('minlevel', "Minimum level of services to configure/cleanup",
3519 ('maxlevel', """Maximum level of services to configure/cleanup
3520 Levels are aproximatly like:
3525 70 - mountpoint, echo_client, osc, mdc, lov""",
3527 ('lustre', """Base directory of lustre sources. This parameter will
3528 cause lconf to load modules from a source tree.""", PARAM),
3529 ('portals', """Portals source directory. If this is a relative path,
3530 then it is assumed to be relative to lustre. """, PARAM),
3531 ('timeout', "Set recovery timeout", INTPARAM),
3532 ('upcall', "Set both portals and lustre upcall script", PARAM),
3533 ('lustre_upcall', "Set lustre upcall script", PARAM),
3534 ('portals_upcall', "Set portals upcall script", PARAM),
3535 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3536 ('ptldebug', "Set the portals debug level", PARAM),
3537 ('subsystem', "Set the portals debug subsystem", PARAM),
3538 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3539 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3540 # Client recovery options
3541 ('recover', "Recover a device"),
3542 ('group', "The group of devices to configure or cleanup", PARAM),
3543 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3544 ('client_uuid', "The failed client (required for recovery)", PARAM),
3545 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3547 ('inactive', """The name of an inactive service, to be ignored during
3548 mounting (currently OST-only). Can be repeated.""",
3553 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3555 # in the upcall this is set to SIG_IGN
3556 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3558 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3560 config, args = cl.parse(sys.argv[1:])
3561 except Lustre.OptionError, e:
3565 setupModulePath(sys.argv[0])
3567 host = socket.gethostname()
3569 # the PRNG is normally seeded with time(), which is not so good for starting
3570 # time-synchronized clusters
3571 input = open('/dev/urandom', 'r')
3573 print 'Unable to open /dev/urandom!'
3575 seed = input.read(32)
3581 init_select(config.select)
3584 # allow config to be fetched via HTTP, but only with python2
3585 if sys.version[0] != '1' and args[0].startswith('http://'):
3588 config_file = urllib2.urlopen(args[0])
3589 except (urllib2.URLError, socket.error), err:
3590 if hasattr(err, 'args'):
3592 print "Could not access '%s': %s" %(args[0], err)
3594 elif not os.access(args[0], os.R_OK):
3595 print 'File not found or readable:', args[0]
3599 config_file = open(args[0], 'r')
3601 dom = xml.dom.minidom.parse(config_file)
3603 panic("%s does not appear to be a config file." % (args[0]))
3604 sys.exit(1) # make sure to die here, even in debug mode.
3606 CONFIG_FILE = args[0]
3607 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3608 if not config.config:
3609 config.config = os.path.basename(args[0])# use full path?
3610 if config.config[-4:] == '.xml':
3611 config.config = config.config[:-4]
3612 elif config.ldapurl:
3613 if not config.config:
3614 panic("--ldapurl requires --config name")
3615 dn = "config=%s,fs=lustre" % (config.config)
3616 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3617 elif config.ptldebug or config.subsystem:
3618 sys_set_ptldebug(None)
3619 sys_set_subsystem(None)
3622 print 'Missing config file or ldap URL.'
3623 print 'see lconf --help for command summary'
3626 toplustreDB = lustreDB
3628 ver = lustreDB.get_version()
3630 panic("No version found in config data, please recreate.")
3631 if ver != Lustre.CONFIG_VERSION:
3632 panic("Config version", ver, "does not match lconf version",
3633 Lustre.CONFIG_VERSION)
3637 node_list.append(config.node)
3640 node_list.append(host)
3641 node_list.append('localhost')
3643 debug("configuring for host: ", node_list)
3646 config.debug_path = config.debug_path + '-' + host
3647 config.gdb_script = config.gdb_script + '-' + host
3649 lctl = LCTLInterface('lctl')
3651 if config.lctl_dump:
3652 lctl.use_save_file(config.lctl_dump)
3655 if not (config.record_device and config.record_log):
3656 panic("When recording, both --record_log and --record_device must be specified.")
3657 lctl.clear_log(config.record_device, config.record_log)
3658 lctl.record(config.record_device, config.record_log)
3660 # init module manager
3661 mod_manager = kmod_manager(config.lustre, config.portals)
3663 doHost(lustreDB, node_list)
3665 if not config.record:
3670 process_updates(lustreDB, config.record_device, config.record_log)
3672 if __name__ == "__main__":
3675 except Lustre.LconfError, e:
3677 # traceback.print_exc(file=sys.stdout)
3679 except CommandError, e:
3683 if first_cleanup_error:
3684 sys.exit(first_cleanup_error)