3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = 'portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
96 "undefined" : (1 << 0),
106 "portals" : (1 << 10),
107 "socknal" : (1 << 11),
108 "qswnal" : (1 << 12),
109 "pinger" : (1 << 13),
110 "filter" : (1 << 14),
116 "ptlrouter" : (1 << 20),
118 "openibnal" : (1 << 22),
123 first_cleanup_error = 0
124 def cleanup_error(rc):
125 global first_cleanup_error
126 if not first_cleanup_error:
127 first_cleanup_error = rc
129 # ============================================================
130 # debugging and error funcs
132 def fixme(msg = "this feature"):
133 raise Lustre.LconfError, msg + ' not implemented yet.'
136 msg = string.join(map(str,args))
137 if not config.noexec:
138 raise Lustre.LconfError(msg)
143 msg = string.join(map(str,args))
148 print string.strip(s)
152 msg = string.join(map(str,args))
155 # ack, python's builtin int() does not support '0x123' syntax.
156 # eval can do it, although what a hack!
160 return eval(s, {}, {})
163 except SyntaxError, e:
164 raise ValueError("not a number")
166 raise ValueError("not a number")
168 # ============================================================
169 # locally defined exceptions
170 class CommandError (exceptions.Exception):
171 def __init__(self, cmd_name, cmd_err, rc=None):
172 self.cmd_name = cmd_name
173 self.cmd_err = cmd_err
178 if type(self.cmd_err) == types.StringType:
180 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
182 print "! %s: %s" % (self.cmd_name, self.cmd_err)
183 elif type(self.cmd_err) == types.ListType:
185 print "! %s (error %d):" % (self.cmd_name, self.rc)
187 print "! %s:" % (self.cmd_name)
188 for s in self.cmd_err:
189 print "> %s" %(string.strip(s))
194 # ============================================================
195 # handle daemons, like the acceptor
197 """ Manage starting and stopping a daemon. Assumes daemon manages
198 it's own pid file. """
200 def __init__(self, cmd):
206 log(self.command, "already running.")
208 self.path = find_prog(self.command)
210 panic(self.command, "not found.")
211 ret, out = runcmd(self.path +' '+ self.command_line())
213 raise CommandError(self.path, out, ret)
217 pid = self.read_pidfile()
219 log ("killing process", pid)
221 #time.sleep(1) # let daemon die
223 log("unable to kill", self.command, e)
225 log("unable to kill", self.command)
228 pid = self.read_pidfile()
238 def read_pidfile(self):
240 fp = open(self.pidfile(), 'r')
247 def clean_pidfile(self):
248 """ Remove a stale pidfile """
249 log("removing stale pidfile:", self.pidfile())
251 os.unlink(self.pidfile())
253 log(self.pidfile(), e)
255 class AcceptorHandler(DaemonHandler):
256 def __init__(self, port, net_type):
257 DaemonHandler.__init__(self, "acceptor")
262 return "/var/run/%s-%d.pid" % (self.command, self.port)
264 def command_line(self):
265 return string.join(map(str,(self.flags, self.port)))
269 # start the acceptors
271 if config.lctl_dump or config.record:
273 for port in acceptors.keys():
274 daemon = acceptors[port]
275 if not daemon.running():
278 def run_one_acceptor(port):
279 if config.lctl_dump or config.record:
281 if acceptors.has_key(port):
282 daemon = acceptors[port]
283 if not daemon.running():
286 panic("run_one_acceptor: No acceptor defined for port:", port)
288 def stop_acceptor(port):
289 if acceptors.has_key(port):
290 daemon = acceptors[port]
295 # ============================================================
296 # handle lctl interface
299 Manage communication with lctl
302 def __init__(self, cmd):
304 Initialize close by finding the lctl binary.
306 self.lctl = find_prog(cmd)
308 self.record_device = ''
311 debug('! lctl not found')
314 raise CommandError('lctl', "unable to find lctl binary.")
316 def use_save_file(self, file):
317 self.save_file = file
319 def record(self, dev_name, logname):
320 log("Recording log", logname, "on", dev_name)
321 self.record_device = dev_name
322 self.record_log = logname
324 def end_record(self):
325 log("End recording log", self.record_log, "on", self.record_device)
326 self.record_device = None
327 self.record_log = None
329 def set_nonblock(self, fd):
330 fl = fcntl.fcntl(fd, F_GETFL)
331 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
336 the cmds are written to stdin of lctl
337 lctl doesn't return errors when run in script mode, so
339 should modify command line to accept multiple commands, or
340 create complex command line options
344 cmds = '\n dump ' + self.save_file + '\n' + cmds
345 elif self.record_device:
349 %s""" % (self.record_device, self.record_log, cmds)
351 debug("+", cmd_line, cmds)
352 if config.noexec: return (0, [])
354 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
355 child.tochild.write(cmds + "\n")
356 child.tochild.close()
357 # print "LCTL:", cmds
359 # From "Python Cookbook" from O'Reilly
360 outfile = child.fromchild
361 outfd = outfile.fileno()
362 self.set_nonblock(outfd)
363 errfile = child.childerr
364 errfd = errfile.fileno()
365 self.set_nonblock(errfd)
367 outdata = errdata = ''
370 ready = select.select([outfd,errfd],[],[]) # Wait for input
371 if outfd in ready[0]:
372 outchunk = outfile.read()
373 if outchunk == '': outeof = 1
374 outdata = outdata + outchunk
375 if errfd in ready[0]:
376 errchunk = errfile.read()
377 if errchunk == '': erreof = 1
378 errdata = errdata + errchunk
379 if outeof and erreof: break
380 # end of "borrowed" code
383 if os.WIFEXITED(ret):
384 rc = os.WEXITSTATUS(ret)
387 if rc or len(errdata):
388 raise CommandError(self.lctl, errdata, rc)
391 def runcmd(self, *args):
393 run lctl using the command line
395 cmd = string.join(map(str,args))
396 debug("+", self.lctl, cmd)
397 rc, out = run(self.lctl, cmd)
399 raise CommandError(self.lctl, out, rc)
403 def clear_log(self, dev, log):
404 """ clear an existing log """
409 quit """ % (dev, log)
412 def network(self, net, nid):
417 quit """ % (net, nid)
420 def root_squash(self, name, uid, nid):
424 quit""" % (name, uid, nid)
427 # create a new connection
428 def add_uuid(self, net_type, uuid, nid):
429 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
432 def add_peer(self, net_type, nid, hostaddr, port):
433 if net_type in ('tcp',) and not config.lctl_dump:
438 nid, hostaddr, port )
440 elif net_type in ('openib',) and not config.lctl_dump:
448 def connect(self, srv):
449 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
450 if srv.net_type in ('tcp','openib',) and not config.lctl_dump:
451 self.add_peer(srv.net_type, srv.nid, srv.hostaddr, srv.port)
454 def recover(self, dev_name, new_conn):
457 recover %s""" %(dev_name, new_conn)
460 # add a route to a range
461 def add_route(self, net, gw, lo, hi):
469 except CommandError, e:
473 def del_route(self, net, gw, lo, hi):
478 quit """ % (net, gw, lo, hi)
481 # add a route to a host
482 def add_route_host(self, net, uuid, gw, tgt):
483 self.add_uuid(net, uuid, tgt)
491 except CommandError, e:
495 # add a route to a range
496 def del_route_host(self, net, uuid, gw, tgt):
502 quit """ % (net, gw, tgt)
505 def del_peer(self, net_type, nid, hostaddr):
506 if net_type in ('tcp',) and not config.lctl_dump:
510 del_peer %s %s single_share
514 elif net_type in ('openib',) and not config.lctl_dump:
518 del_peer %s single_share
523 # disconnect one connection
524 def disconnect(self, srv):
525 self.del_uuid(srv.nid_uuid)
526 if srv.net_type in ('tcp','openib',) and not config.lctl_dump:
527 self.del_peer(srv.net_type, srv.nid, srv.hostaddr)
529 def del_uuid(self, uuid):
537 def disconnectAll(self, net):
545 def attach(self, type, name, uuid):
548 quit""" % (type, name, uuid)
551 def setup(self, name, setup = ""):
555 quit""" % (name, setup)
558 def add_conn(self, name, conn_uuid):
562 quit""" % (name, conn_uuid)
566 # create a new device with lctl
567 def newdev(self, type, name, uuid, setup = ""):
568 self.attach(type, name, uuid);
570 self.setup(name, setup)
571 except CommandError, e:
572 self.cleanup(name, uuid, 0)
577 def cleanup(self, name, uuid, force, failover = 0):
578 if failover: force = 1
584 quit""" % (name, ('', 'force')[force],
585 ('', 'failover')[failover])
589 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
590 stripe_sz, stripe_off, pattern, devlist = None):
593 lov_setup %s %d %d %d %s %s
594 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
598 # add an OBD to a LOV
599 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
601 lov_modify_tgts add %s %s %s %s
602 quit""" % (name, obd_uuid, index, gen)
606 def lmv_setup(self, name, uuid, desc_uuid, devlist):
610 quit""" % (name, uuid, desc_uuid, devlist)
613 # delete an OBD from a LOV
614 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
616 lov_modify_tgts del %s %s %s %s
617 quit""" % (name, obd_uuid, index, gen)
621 def deactivate(self, name):
629 def dump(self, dump_file):
632 quit""" % (dump_file)
635 # get list of devices
636 def device_list(self):
637 devices = '/proc/fs/lustre/devices'
639 if os.access(devices, os.R_OK):
641 fp = open(devices, 'r')
649 def lustre_version(self):
650 rc, out = self.runcmd('version')
654 def mount_option(self, profile, osc, mdc):
656 mount_option %s %s %s
657 quit""" % (profile, osc, mdc)
660 # delete mount options
661 def del_mount_option(self, profile):
667 def set_timeout(self, timeout):
673 def set_lustre_upcall(self, upcall):
678 # ============================================================
679 # Various system-level functions
680 # (ideally moved to their own module)
682 # Run a command and return the output and status.
683 # stderr is sent to /dev/null, could use popen3 to
684 # save it if necessary
687 if config.noexec: return (0, [])
688 f = os.popen(cmd + ' 2>&1')
698 cmd = string.join(map(str,args))
701 # Run a command in the background.
702 def run_daemon(*args):
703 cmd = string.join(map(str,args))
705 if config.noexec: return 0
706 f = os.popen(cmd + ' 2>&1')
714 # Determine full path to use for an external command
715 # searches dirname(argv[0]) first, then PATH
717 syspath = string.split(os.environ['PATH'], ':')
718 cmdpath = os.path.dirname(sys.argv[0])
719 syspath.insert(0, cmdpath);
721 syspath.insert(0, os.path.join(config.portals, 'utils/'))
723 prog = os.path.join(d,cmd)
724 if os.access(prog, os.X_OK):
728 # Recursively look for file starting at base dir
729 def do_find_file(base, mod):
730 fullname = os.path.join(base, mod)
731 if os.access(fullname, os.R_OK):
733 for d in os.listdir(base):
734 dir = os.path.join(base,d)
735 if os.path.isdir(dir):
736 module = do_find_file(dir, mod)
740 # is the path a block device?
747 return stat.S_ISBLK(s[stat.ST_MODE])
749 # find the journal device from mkfs options
755 while i < len(x) - 1:
756 if x[i] == '-J' and x[i+1].startswith('device='):
764 # build fs according to type
766 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
772 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
774 # devsize is in 1k, and fs block count is in 4k
775 block_cnt = devsize/4
777 if fstype in ('ext3', 'extN', 'ldiskfs'):
778 # ext3 journal size is in megabytes
779 # but don't set jsize if mkfsoptions indicates a separate journal device
780 if jsize == 0 and jdev(mkfsoptions) == '':
782 if not is_block(dev):
783 ret, out = runcmd("ls -l %s" %dev)
784 devsize = int(string.split(out[0])[4]) / 1024
786 # sfdisk works for symlink, hardlink, and realdev
787 ret, out = runcmd("sfdisk -s %s" %dev)
789 devsize = int(out[0])
791 # sfdisk -s will fail for too large block device,
792 # then, read the size of partition from /proc/partitions
794 # get the realpath of the device
795 # it may be the real device, such as /dev/hda7
796 # or the hardlink created via mknod for a device
797 if 'realpath' in dir(os.path):
798 real_dev = os.path.realpath(dev)
802 while os.path.islink(real_dev) and (link_count < 20):
803 link_count = link_count + 1
804 dev_link = os.readlink(real_dev)
805 if os.path.isabs(dev_link):
808 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
810 panic("Entountered too many symbolic links resolving block device:", dev)
812 # get the major and minor number of the realpath via ls
813 # it seems python(os.stat) does not return
814 # the st_rdev member of the stat structure
815 ret, out = runcmd("ls -l %s" %real_dev)
816 major = string.split(string.split(out[0])[4], ",")[0]
817 minor = string.split(out[0])[5]
819 # get the devsize from /proc/partitions with the major and minor number
820 ret, out = runcmd("cat /proc/partitions")
823 if string.split(line)[0] == major and string.split(line)[1] == minor:
824 devsize = int(string.split(line)[2])
827 if devsize > 1024 * 1024:
828 jsize = ((devsize / 102400) * 4)
831 if jsize: jopt = "-J size=%d" %(jsize,)
832 if isize: iopt = "-I %d" %(isize,)
833 mkfs = 'mkfs.ext2 -j -b 4096 '
834 if not isblock or config.force:
836 if jdev(mkfsoptions) != '':
837 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
839 jmkfs = jmkfs + '-F '
840 jmkfs = jmkfs + jdev(mkfsoptions)
841 (ret, out) = run (jmkfs)
843 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
845 elif fstype == 'reiserfs':
846 # reiserfs journal size is in blocks
847 if jsize: jopt = "--journal_size %d" %(jsize,)
848 mkfs = 'mkreiserfs -ff'
850 panic('unsupported fs type: ', fstype)
852 if config.mkfsoptions != None:
853 mkfs = mkfs + ' ' + config.mkfsoptions
854 if mkfsoptions != None:
855 mkfs = mkfs + ' ' + mkfsoptions
856 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
858 panic("Unable to build fs:", dev, string.join(out))
859 # enable hash tree indexing on fsswe
860 if fstype in ('ext3', 'extN', 'ldiskfs'):
861 htree = 'echo "feature FEATURE_C5" | debugfs -w'
862 (ret, out) = run (htree, dev)
864 panic("Unable to enable htree:", dev)
866 # some systems use /dev/loopN, some /dev/loop/N
870 if not os.access(loop + str(0), os.R_OK):
872 if not os.access(loop + str(0), os.R_OK):
873 panic ("can't access loop devices")
876 # find loop device assigned to the file
877 def find_assigned_loop(file):
879 for n in xrange(0, MAX_LOOP_DEVICES):
881 if os.access(dev, os.R_OK):
882 (stat, out) = run('losetup', dev)
883 if out and stat == 0:
884 m = re.search(r'\((.*)\)', out[0])
885 if m and file == m.group(1):
889 # create file if necessary and assign the first free loop device
890 def init_loop(file, size, fstype, journal_size, inode_size,
891 mkfsoptions, reformat, autoformat, backfstype, backfile):
894 realfstype = backfstype
896 if is_block(realfile):
897 if reformat or (need_format(realfstype, realfile) and autoformat == 'yes'):
898 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
904 dev = find_assigned_loop(realfile)
906 print 'WARNING: file ', realfile, 'already mapped to', dev
909 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
911 panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (realfile, size))
912 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
914 panic("Unable to create backing store: ", realfile)
916 mkfs(realfile, size, realfstype, journal_size, inode_size,
917 mkfsoptions, isblock=0)
921 # find next free loop
922 for n in xrange(0, MAX_LOOP_DEVICES):
924 if os.access(dev, os.R_OK):
925 (stat, out) = run('losetup', dev)
927 run('losetup', dev, realfile)
928 print "attach " + realfile + " <-> " + dev
931 print "out of loop devices"
933 print "out of loop devices"
936 # undo loop assignment
937 def clean_loop(dev, fstype, backfstype, backdev):
943 if not is_block(realfile):
944 dev = find_assigned_loop(realfile)
946 print "detach " + dev + " <-> " + realfile
947 ret, out = run('losetup -d', dev)
949 log('unable to clean loop device:', dev, 'for file:', realfile)
952 # determine if dev is formatted as a <fstype> filesystem
953 def need_format(fstype, dev):
954 # FIXME don't know how to implement this
957 # finilizes passed device
958 def clean_dev(dev, fstype, backfstype, backdev):
959 if fstype == 'smfs' or not is_block(dev):
960 clean_loop(dev, fstype, backfstype, backdev)
962 # initialize a block device if needed
963 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
964 inode_size, mkfsoptions, backfstype, backdev):
968 if fstype == 'smfs' or not is_block(dev):
969 dev = init_loop(dev, size, fstype, journal_size, inode_size,
970 mkfsoptions, reformat, autoformat, backfstype, backdev)
971 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
972 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
975 # panic("device:", dev,
976 # "not prepared, and autoformat is not set.\n",
977 # "Rerun with --reformat option to format ALL filesystems")
982 """lookup IP address for an interface"""
983 rc, out = run("/sbin/ifconfig", iface)
986 addr = string.split(out[1])[1]
987 ip = string.split(addr, ':')[1]
990 def def_mount_options(fstype, target):
991 """returns deafult mount options for passed fstype and target (mds, ost)"""
992 if fstype == 'ext3' or fstype == 'ldiskfs':
993 mountfsoptions = "errors=remount-ro"
994 if target == 'ost' and sys_get_branch() == '2.4':
995 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
996 return mountfsoptions
999 def sys_get_elan_position_file():
1000 procfiles = ["/proc/elan/device0/position",
1001 "/proc/qsnet/elan4/device0/position",
1002 "/proc/qsnet/elan3/device0/position"]
1004 if os.access(p, os.R_OK):
1008 def sys_get_local_nid(net_type, wildcard, cluster_id):
1009 """Return the local nid."""
1011 if sys_get_elan_position_file():
1012 local = sys_get_local_address('elan', '*', cluster_id)
1014 local = sys_get_local_address(net_type, wildcard, cluster_id)
1017 def sys_get_local_address(net_type, wildcard, cluster_id):
1018 """Return the local address for the network type."""
1020 if net_type in ('tcp','openib',):
1022 iface, star = string.split(wildcard, ':')
1023 local = if2addr(iface)
1025 panic ("unable to determine ip for:", wildcard)
1027 host = socket.gethostname()
1028 local = socket.gethostbyname(host)
1029 elif net_type == 'elan':
1030 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1031 f = sys_get_elan_position_file()
1033 panic ("unable to determine local Elan ID")
1036 lines = fp.readlines()
1040 if a[0] == 'NodeId':
1044 nid = my_int(cluster_id) + my_int(elan_id)
1045 local = "%d" % (nid)
1046 except ValueError, e:
1050 elif net_type == 'gm':
1051 fixme("automatic local address for GM")
1055 def sys_get_branch():
1056 """Returns kernel release"""
1058 fp = open('/proc/sys/kernel/osrelease')
1059 lines = fp.readlines()
1063 version = string.split(l)
1064 a = string.split(version[0], '.')
1065 return a[0] + '.' + a[1]
1071 # XXX: instead of device_list, ask for $name and see what we get
1072 def is_prepared(name):
1073 """Return true if a device exists for the name"""
1074 if config.lctl_dump:
1076 if (config.noexec or config.record) and config.cleanup:
1079 # expect this format:
1080 # 1 UP ldlm ldlm ldlm_UUID 2
1081 out = lctl.device_list()
1083 if name == string.split(s)[3]:
1085 except CommandError, e:
1089 def network_is_prepared():
1090 """If the any device exists, then assume that all networking
1091 has been configured"""
1092 out = lctl.device_list()
1095 def fs_is_mounted(path):
1096 """Return true if path is a mounted lustre filesystem"""
1098 fp = open('/proc/mounts')
1099 lines = fp.readlines()
1103 if a[1] == path and a[2] == 'lustre_lite':
1109 def kmod_find(src_dir, dev_dir, modname):
1110 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1111 for modext in '.ko', '.o':
1112 module = modbase + modext
1114 if os.access(module, os.R_OK):
1120 def kmod_info(modname):
1121 """Returns reference count for passed module name."""
1123 fp = open('/proc/modules')
1124 lines = fp.readlines()
1127 # please forgive my tired fingers for this one
1128 ret = filter(lambda word, mod = modname: word[0] == mod,
1129 map(lambda line: string.split(line), lines))
1133 except Exception, e:
1137 """Presents kernel module"""
1138 def __init__(self, src_dir, dev_dir, name):
1139 self.src_dir = src_dir
1140 self.dev_dir = dev_dir
1145 log ('loading module:', self.name, 'srcdir',
1146 self.src_dir, 'devdir', self.dev_dir)
1148 module = kmod_find(self.src_dir, self.dev_dir,
1151 panic('module not found:', self.name)
1152 (rc, out) = run('/sbin/insmod', module)
1154 raise CommandError('insmod', out, rc)
1156 (rc, out) = run('/sbin/modprobe', self.name)
1158 raise CommandError('modprobe', out, rc)
1162 log('unloading module:', self.name)
1163 (rc, out) = run('/sbin/rmmod', self.name)
1165 log('unable to unload module:', self.name +
1166 "(" + self.refcount() + ")")
1170 """Returns module info if any."""
1171 return kmod_info(self.name)
1174 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1181 """Returns module refcount."""
1188 """Returns 1 if module is used, otherwise 0 is returned."""
1194 if users and users != '(unused)' and users != '-':
1202 """Returns 1 if module is busy, otherwise 0 is returned."""
1203 if self.loaded() and (self.used() or self.refcount() != '0'):
1209 """Manage kernel modules"""
1210 def __init__(self, lustre_dir, portals_dir):
1211 self.lustre_dir = lustre_dir
1212 self.portals_dir = portals_dir
1213 self.kmodule_list = []
1215 def find_module(self, modname):
1216 """Find module by module name"""
1217 for mod in self.kmodule_list:
1218 if mod.name == modname:
1222 def add_portals_module(self, dev_dir, modname):
1223 """Append a module to list of modules to load."""
1225 mod = self.find_module(modname)
1227 mod = kmod(self.portals_dir, dev_dir, modname)
1228 self.kmodule_list.append(mod)
1230 def add_lustre_module(self, dev_dir, modname):
1231 """Append a module to list of modules to load."""
1233 mod = self.find_module(modname)
1235 mod = kmod(self.lustre_dir, dev_dir, modname)
1236 self.kmodule_list.append(mod)
1238 def load_modules(self):
1239 """Load all the modules in the list in the order they appear."""
1240 for mod in self.kmodule_list:
1241 if mod.loaded() and not config.noexec:
1245 def cleanup_modules(self):
1246 """Unload the modules in the list in reverse order."""
1247 rev = self.kmodule_list
1250 if (not mod.loaded() or mod.busy()) and not config.noexec:
1253 if mod.name == 'portals' and config.dump:
1254 lctl.dump(config.dump)
1257 # ============================================================
1258 # Classes to prepare and cleanup the various objects
1261 """ Base class for the rest of the modules. The default cleanup method is
1262 defined here, as well as some utilitiy funcs.
1264 def __init__(self, module_name, db):
1266 self.module_name = module_name
1267 self.name = self.db.getName()
1268 self.uuid = self.db.getUUID()
1271 self.kmod_manager = mod_manager
1273 def info(self, *args):
1274 msg = string.join(map(str,args))
1275 print self.module_name + ":", self.name, self.uuid, msg
1278 """ default cleanup, used for most modules """
1281 lctl.cleanup(self.name, self.uuid, config.force)
1282 except CommandError, e:
1283 log(self.module_name, "cleanup failed: ", self.name)
1287 def add_module(self, manager):
1288 """Adds all needed modules in the order they appear."""
1291 def safe_to_clean(self):
1294 def safe_to_clean_modules(self):
1295 return self.safe_to_clean()
1297 class Network(Module):
1298 def __init__(self,db):
1299 Module.__init__(self, 'NETWORK', db)
1300 self.net_type = self.db.get_val('nettype')
1301 self.nid = self.db.get_val('nid', '*')
1302 self.cluster_id = self.db.get_val('clusterid', "0")
1303 self.port = self.db.get_val_int('port', 0)
1306 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1308 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1309 self.generic_nid = 1
1310 debug("nid:", self.nid)
1312 self.generic_nid = 0
1314 self.nid_uuid = self.nid_to_uuid(self.nid)
1316 self.hostaddr = self.db.get_val('hostaddr', self.nid)
1317 if '*' in self.hostaddr:
1318 self.hostaddr = sys_get_local_address(self.net_type, self.hostaddr, self.cluster_id)
1319 if not self.hostaddr:
1320 panic("unable to set hostaddr for", self.net_type, self.hostaddr, self.cluster_id)
1321 debug("hostaddr:", self.hostaddr)
1323 def add_module(self, manager):
1324 manager.add_portals_module("libcfs", 'libcfs')
1325 manager.add_portals_module("portals", 'portals')
1326 if node_needs_router():
1327 manager.add_portals_module("router", 'kptlrouter')
1328 if self.net_type == 'tcp':
1329 manager.add_portals_module("knals/socknal", 'ksocknal')
1330 if self.net_type == 'elan':
1331 manager.add_portals_module("knals/qswnal", 'kqswnal')
1332 if self.net_type == 'gm':
1333 self.add_portals_module("knals/gmnal", 'kgmnal')
1334 if self.net_type == 'openib':
1335 self.add_portals_module("knals/openibnal", 'kopenibnal')
1337 def nid_to_uuid(self, nid):
1338 return "NID_%s_UUID" %(nid,)
1341 if not config.record and network_is_prepared():
1343 self.info(self.net_type, self.nid, self.port)
1344 if not (config.record and self.generic_nid):
1345 lctl.network(self.net_type, self.nid)
1346 if self.net_type == 'tcp':
1348 if self.net_type == 'elan':
1350 if self.port and node_is_router():
1351 run_one_acceptor(self.port)
1352 self.connect_peer_gateways()
1354 def connect_peer_gateways(self):
1355 for router in self.db.lookup_class('node'):
1356 if router.get_val_int('router', 0):
1357 for netuuid in router.get_networks():
1358 net = self.db.lookup(netuuid)
1360 if (gw.cluster_id == self.cluster_id and
1361 gw.net_type == self.net_type):
1362 if gw.nid != self.nid:
1365 def disconnect_peer_gateways(self):
1366 for router in self.db.lookup_class('node'):
1367 if router.get_val_int('router', 0):
1368 for netuuid in router.get_networks():
1369 net = self.db.lookup(netuuid)
1371 if (gw.cluster_id == self.cluster_id and
1372 gw.net_type == self.net_type):
1373 if gw.nid != self.nid:
1376 except CommandError, e:
1377 print "disconnect failed: ", self.name
1381 def safe_to_clean(self):
1382 return not network_is_prepared()
1385 self.info(self.net_type, self.nid, self.port)
1387 stop_acceptor(self.port)
1388 if node_is_router():
1389 self.disconnect_peer_gateways()
1391 def correct_level(self, level, op=None):
1394 class RouteTable(Module):
1395 def __init__(self,db):
1396 Module.__init__(self, 'ROUTES', db)
1398 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1400 # only setup connections for tcp and openib NALs
1403 if not net_type in ('tcp','openib'):
1406 # connect to target if route is to single node and this node is the gw
1407 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1408 if not local_cluster(net_type, tgt_cluster_id):
1409 panic("target", lo, " not on the local cluster")
1410 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1411 # connect to gateway if this node is not the gw
1412 elif (local_cluster(net_type, gw_cluster_id)
1413 and not local_interface(net_type, gw_cluster_id, gw)):
1414 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1419 panic("no server for nid", lo)
1422 return Network(srvdb)
1425 if not config.record and network_is_prepared():
1428 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1429 lctl.add_route(net_type, gw, lo, hi)
1430 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1434 def safe_to_clean(self):
1435 return not network_is_prepared()
1438 if network_is_prepared():
1439 # the network is still being used, don't clean it up
1441 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1442 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1445 lctl.disconnect(srv)
1446 except CommandError, e:
1447 print "disconnect failed: ", self.name
1452 lctl.del_route(net_type, gw, lo, hi)
1453 except CommandError, e:
1454 print "del_route failed: ", self.name
1458 class Management(Module):
1459 def __init__(self, db):
1460 Module.__init__(self, 'MGMT', db)
1462 def add_module(self, manager):
1463 manager.add_lustre_module('lvfs', 'lvfs')
1464 manager.add_lustre_module('obdclass', 'obdclass')
1465 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1466 manager.add_lustre_module('mgmt', 'mgmt_svc')
1469 if not config.record and is_prepared(self.name):
1472 lctl.newdev("mgmt", self.name, self.uuid)
1474 def safe_to_clean(self):
1478 if is_prepared(self.name):
1479 Module.cleanup(self)
1481 def correct_level(self, level, op=None):
1484 # This is only needed to load the modules; the LDLM device
1485 # is now created automatically.
1487 def __init__(self,db):
1488 Module.__init__(self, 'LDLM', db)
1490 def add_module(self, manager):
1491 manager.add_lustre_module('lvfs', 'lvfs')
1492 manager.add_lustre_module('obdclass', 'obdclass')
1493 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1501 def correct_level(self, level, op=None):
1505 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1506 Module.__init__(self, 'LOV', db)
1507 if name_override != None:
1508 self.name = "lov_%s" % name_override
1509 self.mds_uuid = self.db.get_first_ref('mds')
1510 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1511 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1512 self.pattern = self.db.get_val_int('stripepattern', 0)
1513 self.devlist = self.db.get_lov_tgts('lov_tgt')
1514 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1517 self.desc_uuid = self.uuid
1518 self.uuid = generate_client_uuid(self.name)
1519 self.fs_name = fs_name
1521 self.config_only = 1
1523 self.config_only = None
1524 mds = self.db.lookup(self.mds_uuid)
1525 self.mds_name = mds.getName()
1526 for (obd_uuid, index, gen, active) in self.devlist:
1529 self.obdlist.append(obd_uuid)
1530 obd = self.db.lookup(obd_uuid)
1531 osc = get_osc(obd, self.uuid, fs_name)
1533 self.osclist.append((osc, index, gen, active))
1535 panic('osc not found:', obd_uuid)
1541 if not config.record and is_prepared(self.name):
1543 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1544 self.stripe_off, self.pattern, self.devlist,
1546 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1547 self.stripe_sz, self.stripe_off, self.pattern,
1548 string.join(self.obdlist))
1549 for (osc, index, gen, active) in self.osclist:
1550 target_uuid = osc.target_uuid
1552 # Only ignore connect failures with --force, which
1553 # isn't implemented here yet.
1555 osc.prepare(ignore_connect_failure=0)
1556 except CommandError, e:
1557 print "Error preparing OSC %s\n" % osc.uuid
1559 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1562 for (osc, index, gen, active) in self.osclist:
1563 target_uuid = osc.target_uuid
1565 if is_prepared(self.name):
1566 Module.cleanup(self)
1567 if self.config_only:
1568 panic("Can't clean up config_only LOV ", self.name)
1570 def add_module(self, manager):
1571 if self.config_only:
1572 panic("Can't load modules for config_only LOV ", self.name)
1573 for (osc, index, gen, active) in self.osclist:
1574 osc.add_module(manager)
1576 manager.add_lustre_module('lov', 'lov')
1578 def correct_level(self, level, op=None):
1582 def __init__(self, db, uuid, fs_name, name_override = None):
1583 Module.__init__(self, 'LMV', db)
1584 if name_override != None:
1585 self.name = "lmv_%s" % name_override
1586 self.devlist = self.db.get_refs('mds')
1588 self.desc_uuid = self.uuid
1590 self.fs_name = fs_name
1592 for mds_uuid in self.devlist:
1593 mds = self.db.lookup(mds_uuid)
1595 panic("MDS not found!")
1596 mdc = MDC(mds, self.uuid, fs_name)
1598 self.mdclist.append(mdc)
1600 panic('mdc not found:', mds_uuid)
1603 if is_prepared(self.name):
1606 self.info(self.name)
1607 for mdc in self.mdclist:
1609 # Only ignore connect failures with --force, which
1610 # isn't implemented here yet.
1611 mdc.prepare(ignore_connect_failure=0)
1612 except CommandError, e:
1613 print "Error preparing LMV %s\n" % mdc.uuid
1615 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1616 string.join(self.devlist))
1619 for mdc in self.mdclist:
1621 if is_prepared(self.name):
1622 Module.cleanup(self)
1624 def add_module(self, manager):
1625 for mdc in self.mdclist:
1626 mdc.add_module(manager)
1628 manager.add_lustre_module('lmv', 'lmv')
1630 def correct_level(self, level, op=None):
1633 class MDSDEV(Module):
1634 def __init__(self,db):
1635 Module.__init__(self, 'MDSDEV', db)
1636 self.devpath = self.db.get_val('devpath','')
1637 self.backdevpath = self.db.get_val('backdevpath','')
1638 self.size = self.db.get_val_int('devsize', 0)
1639 self.journal_size = self.db.get_val_int('journalsize', 0)
1640 self.fstype = self.db.get_val('fstype', '')
1641 self.backfstype = self.db.get_val('backfstype', '')
1642 self.nspath = self.db.get_val('nspath', '')
1643 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1644 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1645 self.obdtype = self.db.get_val('obdtype', '')
1646 self.root_squash = self.db.get_val('root_squash', '')
1647 self.no_root_squash = self.db.get_val('no_root_squash', '')
1648 # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
1649 target_uuid = self.db.get_first_ref('target')
1650 self.mds = self.db.lookup(target_uuid)
1651 self.name = self.mds.getName()
1652 self.client_uuids = self.mds.get_refs('client')
1658 self.master_uuid = ""
1661 # it is possible to have MDS with no clients. It is master MDS
1662 # in configuration with CMOBD.
1663 self.lmv_uuid = self.db.get_first_ref('lmv')
1665 self.lmv = self.db.lookup(self.lmv_uuid)
1667 self.client_uuids = self.lmv.get_refs('client')
1668 self.master_uuid = self.lmv_uuid
1670 # FIXME: if fstype not set, then determine based on kernel version
1671 self.format = self.db.get_val('autoformat', "no")
1672 if self.mds.get_val('failover', 0):
1673 self.failover_mds = 'f'
1675 self.failover_mds = 'n'
1676 active_uuid = get_active_target(self.mds)
1678 panic("No target device found:", target_uuid)
1679 if active_uuid == self.uuid:
1683 if self.active and config.group and config.group != self.mds.get_val('group'):
1686 # default inode inode for case when neither LOV either
1687 # LMV is accessible.
1688 self.inode_size = 256
1690 inode_size = self.db.get_val_int('inodesize', 0)
1691 if not inode_size == 0:
1692 self.inode_size = inode_size
1694 # find the LOV for this MDS
1695 lovconfig_uuid = self.mds.get_first_ref('lovconfig')
1696 if lovconfig_uuid or self.lmv:
1698 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1699 lovconfig = self.lmv.lookup(lovconfig_uuid)
1700 lov_uuid = lovconfig.get_first_ref('lov')
1702 panic(self.mds.getName() + ": No LOV found for lovconfig ",
1705 lovconfig = self.mds.lookup(lovconfig_uuid)
1706 lov_uuid = lovconfig.get_first_ref('lov')
1708 panic(self.mds.getName() + ": No LOV found for lovconfig ",
1712 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1713 lovconfig = self.lmv.lookup(lovconfig_uuid)
1714 lov_uuid = lovconfig.get_first_ref('lov')
1716 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, 'FS_name',
1719 # default stripe count controls default inode_size
1720 stripe_count = lov.stripe_cnt
1721 if stripe_count > 77:
1722 self.inode_size = 4096
1723 elif stripe_count > 35:
1724 self.inode_size = 2048
1725 elif stripe_count > 13:
1726 self.inode_size = 1024
1727 elif stripe_count > 3:
1728 self.inode_size = 512
1730 self.inode_size = 256
1732 self.target_dev_uuid = self.uuid
1733 self.uuid = target_uuid
1736 if self.master_uuid:
1737 client_uuid = self.name + "_lmv_" + "UUID"
1738 self.master = LMV(self.db.lookup(self.lmv_uuid), client_uuid,
1739 self.name, self.name)
1740 self.master_uuid = self.master.name
1742 def add_module(self, manager):
1744 manager.add_lustre_module('mdc', 'mdc')
1745 manager.add_lustre_module('osc', 'osc')
1746 manager.add_lustre_module('ost', 'ost')
1747 manager.add_lustre_module('lov', 'lov')
1748 manager.add_lustre_module('mds', 'mds')
1750 if self.fstype == 'smfs':
1751 manager.add_lustre_module('smfs', 'smfs')
1753 if self.fstype == 'ldiskfs':
1754 manager.add_lustre_module('ldiskfs', 'ldiskfs')
1757 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
1759 # if fstype is smfs, then we should also take care about backing
1761 if self.fstype == 'smfs':
1762 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
1764 for option in string.split(self.mountfsoptions, ','):
1765 if option == 'snap':
1766 if not self.fstype == 'smfs':
1767 panic("mountoptions has 'snap', but fstype is not smfs.")
1768 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
1769 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
1772 if self.master_uuid:
1773 self.master.add_module(manager)
1776 if not config.record and is_prepared(self.name):
1779 debug(self.uuid, "not active")
1782 # run write_conf automatically, if --reformat used
1784 self.info(self.devpath, self.fstype, self.size, self.format)
1788 if self.master_uuid:
1789 self.master.prepare()
1791 # never reformat here
1792 blkdev = block_dev(self.devpath, self.size, self.fstype, 0,
1793 self.format, self.journal_size, self.inode_size,
1794 self.mkfsoptions, self.backfstype, self.backdevpath)
1796 if not is_prepared('MDT'):
1797 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
1799 mountfsoptions = def_mount_options(self.fstype, 'mds')
1801 if config.mountfsoptions:
1803 mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
1805 mountfsoptions = config.mountfsoptions
1806 if self.mountfsoptions:
1807 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1809 if self.mountfsoptions:
1811 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1813 mountfsoptions = self.mountfsoptions
1815 if self.fstype == 'smfs':
1816 realdev = self.fstype
1819 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
1823 mountfsoptions = "type=%s,dev=%s" % (self.backfstype,
1828 print 'MDS mount options: ' + mountfsoptions
1830 if not self.master_uuid:
1831 self.master_uuid = 'dumb'
1833 if not self.obdtype:
1834 self.obdtype = 'dumb'
1836 if not self.client_uuids:
1837 lctl.newdev("mds", self.name, self.uuid,
1838 setup ="%s %s %s %s %s %s" %(realdev, self.fstype,
1839 'dumb', mountfsoptions,
1840 self.master_uuid, self.obdtype))
1842 lctl.newdev("mds", self.name, self.uuid,
1843 setup ="%s %s %s %s %s %s" %(realdev, self.fstype,
1844 self.name, mountfsoptions,
1845 self.master_uuid, self.obdtype))
1847 if development_mode():
1848 procentry = "/proc/fs/lustre/mds/grp_hash_upcall"
1849 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/l_getgroups")
1850 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
1851 print "MDS Warning: failed to set group-hash upcall"
1853 run("echo ", upcall, " > ", procentry)
1855 except CommandError, e:
1857 panic("MDS is missing the config log. Need to run " +
1858 "lconf --write_conf.")
1862 if config.root_squash == None:
1863 config.root_squash = self.root_squash
1864 if config.no_root_squash == None:
1865 config.no_root_squash = self.no_root_squash
1866 if config.root_squash:
1867 if config.no_root_squash:
1868 nsnid = config.no_root_squash
1871 lctl.root_squash(self.name, config.root_squash, nsnid)
1873 def write_conf(self):
1874 if not self.client_uuids:
1878 if not is_prepared(self.name):
1879 self.info(self.devpath, self.fstype, self.format)
1881 blkdev = block_dev(self.devpath, self.size, self.fstype,
1882 config.reformat, self.format, self.journal_size,
1883 self.inode_size, self.mkfsoptions,
1884 self.backfstype, self.backdevpath)
1886 # Even for writing logs we mount mds with supplied mount options
1887 # because it will not mount smfs (if used) otherwise.
1889 mountfsoptions = def_mount_options(self.fstype, 'mds')
1891 if config.mountfsoptions:
1893 mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
1895 mountfsoptions = config.mountfsoptions
1896 if self.mountfsoptions:
1897 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1899 if self.mountfsoptions:
1901 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1903 mountfsoptions = self.mountfsoptions
1905 if self.fstype == 'smfs':
1906 realdev = self.fstype
1909 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
1913 mountfsoptions = "type=%s,dev=%s" % (self.backfstype,
1918 print 'MDS mount options: ' + mountfsoptions
1920 if not self.obdtype:
1921 self.obdtype = 'dumb'
1923 # As mount options are passed by 4th param to config tool, we need
1924 # to pass something in 3rd param. But we do not want this 3rd param
1925 # be counted as a profile name for reading log on MDS setup, thus,
1926 # we pass there some predefined sign like 'dumb', which will be
1927 # checked in MDS code and skipped. Probably there is more nice way
1928 # like pass empty string and check it in config tool and pass null
1930 lctl.newdev("mds", self.name, self.uuid,
1931 setup ="%s %s %s %s %s %s" %(realdev, self.fstype,
1932 'dumb', mountfsoptions,
1933 'dumb', self.obdtype))
1936 # record logs for all MDS clients
1937 for obd_uuid in self.client_uuids:
1938 log("recording client:", obd_uuid)
1940 client_uuid = generate_client_uuid(self.name)
1941 client = VOSC(self.db.lookup(obd_uuid), client_uuid,
1942 self.name, self.name)
1944 lctl.clear_log(self.name, self.name)
1945 lctl.record(self.name, self.name)
1947 lctl.mount_option(self.name, client.get_name(), "")
1949 process_updates(self.db, self.name, self.name, client)
1952 lctl.clear_log(self.name, self.name + '-clean')
1953 lctl.record(self.name, self.name + '-clean')
1955 lctl.del_mount_option(self.name)
1957 process_updates(self.db, self.name, self.name + '-clean', client)
1961 # record logs for each client
1967 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
1969 config_options = CONFIG_FILE
1971 for node_db in self.db.lookup_class('node'):
1972 client_name = node_db.getName()
1973 for prof_uuid in node_db.get_refs('profile'):
1974 prof_db = node_db.lookup(prof_uuid)
1975 # refactor this into a funtion to test "clientness"
1977 for ref_class, ref_uuid in prof_db.get_all_refs():
1978 if ref_class in ('mountpoint','echoclient'):
1979 debug("recording", client_name)
1980 old_noexec = config.noexec
1982 ret, out = run (sys.argv[0], noexec_opt,
1983 " -v --record --nomod",
1984 "--record_log", client_name,
1985 "--record_device", self.name,
1986 "--node", client_name,
1989 for s in out: log("record> ", string.strip(s))
1990 ret, out = run (sys.argv[0], noexec_opt,
1991 "--cleanup -v --record --nomod",
1992 "--record_log", client_name + "-clean",
1993 "--record_device", self.name,
1994 "--node", client_name,
1997 for s in out: log("record> ", string.strip(s))
1998 config.noexec = old_noexec
2001 lctl.cleanup(self.name, self.uuid, 0, 0)
2002 except CommandError, e:
2003 log(self.module_name, "cleanup failed: ", self.name)
2006 Module.cleanup(self)
2008 clean_dev(self.devpath, self.fstype, self.backfstype,
2011 def msd_remaining(self):
2012 out = lctl.device_list()
2014 if string.split(s)[2] in ('mds',):
2017 def safe_to_clean(self):
2020 def safe_to_clean_modules(self):
2021 return not self.msd_remaining()
2025 debug(self.uuid, "not active")
2028 if is_prepared(self.name):
2030 lctl.cleanup(self.name, self.uuid, config.force,
2032 except CommandError, e:
2033 log(self.module_name, "cleanup failed: ", self.name)
2036 Module.cleanup(self)
2038 if self.master_uuid:
2039 self.master.cleanup()
2040 if not self.msd_remaining() and is_prepared('MDT'):
2042 lctl.cleanup("MDT", "MDT_UUID", config.force,
2044 except CommandError, e:
2045 print "cleanup failed: ", self.name
2049 clean_dev(self.devpath, self.fstype, self.backfstype,
2052 def correct_level(self, level, op=None):
2053 #if self.master_uuid:
2058 def __init__(self, db):
2059 Module.__init__(self, 'OSD', db)
2060 self.osdtype = self.db.get_val('osdtype')
2061 self.devpath = self.db.get_val('devpath', '')
2062 self.backdevpath = self.db.get_val('backdevpath', '')
2063 self.size = self.db.get_val_int('devsize', 0)
2064 self.journal_size = self.db.get_val_int('journalsize', 0)
2065 self.inode_size = self.db.get_val_int('inodesize', 0)
2066 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2067 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2068 self.fstype = self.db.get_val('fstype', '')
2069 self.backfstype = self.db.get_val('backfstype', '')
2070 self.nspath = self.db.get_val('nspath', '')
2071 target_uuid = self.db.get_first_ref('target')
2072 ost = self.db.lookup(target_uuid)
2073 self.name = ost.getName()
2074 self.format = self.db.get_val('autoformat', 'yes')
2075 if ost.get_val('failover', 0):
2076 self.failover_ost = 'f'
2078 self.failover_ost = 'n'
2080 active_uuid = get_active_target(ost)
2082 panic("No target device found:", target_uuid)
2083 if active_uuid == self.uuid:
2087 if self.active and config.group and config.group != ost.get_val('group'):
2090 self.target_dev_uuid = self.uuid
2091 self.uuid = target_uuid
2093 def add_module(self, manager):
2095 manager.add_lustre_module('ost', 'ost')
2097 if self.fstype == 'smfs':
2098 manager.add_lustre_module('smfs', 'smfs')
2100 if self.fstype == 'ldiskfs':
2101 manager.add_lustre_module('ldiskfs', 'ldiskfs')
2103 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2104 if self.fstype == 'smfs':
2105 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2107 for option in self.mountfsoptions:
2108 if option == 'snap':
2109 if not self.fstype == 'smfs':
2110 panic("mountoptions with snap, but fstype is not smfs\n")
2111 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2112 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2114 manager.add_lustre_module(self.osdtype, self.osdtype)
2116 # need to check /proc/mounts and /etc/mtab before
2117 # formatting anything.
2118 # FIXME: check if device is already formatted.
2120 if is_prepared(self.name):
2123 debug(self.uuid, "not active")
2125 self.info(self.osdtype, self.devpath, self.size, self.fstype,
2126 self.format, self.journal_size, self.inode_size)
2128 if self.osdtype == 'obdecho':
2131 blkdev = block_dev(self.devpath, self.size, self.fstype,
2132 config.reformat, self.format, self.journal_size,
2133 self.inode_size, self.mkfsoptions, self.backfstype,
2136 mountfsoptions = def_mount_options(self.fstype, 'ost')
2138 if config.mountfsoptions:
2140 mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
2142 mountfsoptions = config.mountfsoptions
2143 if self.mountfsoptions:
2144 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
2146 if self.mountfsoptions:
2148 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
2150 mountfsoptions = self.mountfsoptions
2152 if self.fstype == 'smfs':
2153 realdev = self.fstype
2156 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
2160 mountfsoptions = "type=%s,dev=%s" % (self.backfstype,
2165 print 'OSD mount options: ' + mountfsoptions
2167 lctl.newdev(self.osdtype, self.name, self.uuid,
2168 setup ="%s %s %s %s" %(realdev, self.fstype,
2171 if not is_prepared('OSS'):
2172 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
2174 def osd_remaining(self):
2175 out = lctl.device_list()
2177 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2180 def safe_to_clean(self):
2183 def safe_to_clean_modules(self):
2184 return not self.osd_remaining()
2188 debug(self.uuid, "not active")
2190 if is_prepared(self.name):
2193 lctl.cleanup(self.name, self.uuid, config.force,
2195 except CommandError, e:
2196 log(self.module_name, "cleanup failed: ", self.name)
2199 if not self.osd_remaining() and is_prepared('OSS'):
2201 lctl.cleanup("OSS", "OSS_UUID", config.force,
2203 except CommandError, e:
2204 print "cleanup failed: ", self.name
2207 if not self.osdtype == 'obdecho':
2208 clean_dev(self.devpath, self.fstype, self.backfstype,
2211 def correct_level(self, level, op=None):
2214 # Generic client module, used by OSC and MDC
2215 class Client(Module):
2216 def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
2218 self.target_name = tgtdb.getName()
2219 self.target_uuid = tgtdb.getUUID()
2220 self.module_dir = module_dir
2221 self.module = module
2224 self.backup_targets = []
2226 self.tgt_dev_uuid = get_active_target(tgtdb)
2227 if not self.tgt_dev_uuid:
2228 panic("No target device found for target(1):", self.target_name)
2230 self.kmod_manager = kmod_manager(config.lustre, config.portals)
2234 self.module = module
2235 self.module_name = string.upper(module)
2237 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2238 self.target_name, fs_name)
2240 self.name = self_name
2242 self.lookup_server(self.tgt_dev_uuid)
2244 self.lookup_backup_targets()
2245 self.fs_name = fs_name
2246 if not self.module_dir:
2247 self.module_dir = module
2249 def add_module(self, manager):
2250 manager.add_lustre_module(self.module_dir, self.module)
2252 def lookup_server(self, srv_uuid):
2253 """ Lookup a server's network information """
2254 self._server_nets = get_ost_net(self.db, srv_uuid)
2255 if len(self._server_nets) == 0:
2256 panic ("Unable to find a server for:", srv_uuid)
2261 def get_servers(self):
2262 return self._server_nets
2263 def lookup_backup_targets(self):
2264 """ Lookup alternative network information """
2265 prof_list = toplustreDB.get_refs('profile')
2266 for prof_uuid in prof_list:
2267 prof_db = toplustreDB.lookup(prof_uuid)
2269 panic("profile:", prof_uuid, "not found.")
2270 for ref_class, ref_uuid in prof_db.get_all_refs():
2271 if ref_class in ('osd', 'mdsdev'):
2272 devdb = toplustreDB.lookup(ref_uuid)
2273 uuid = devdb.get_first_ref('target')
2274 if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
2275 self.backup_targets.append(ref_uuid)
2277 def prepare(self, ignore_connect_failure = 0):
2278 self.info(self.target_uuid)
2279 if not config.record and is_prepared(self.name):
2282 srv = choose_local_server(self.get_servers())
2286 routes = find_route(self.get_servers())
2287 if len(routes) == 0:
2288 panic ("no route to", self.target_uuid)
2289 for (srv, r) in routes:
2290 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2291 except CommandError, e:
2292 if not ignore_connect_failure:
2295 if self.permits_inactive() and (self.target_uuid in config.inactive or self.active == 0):
2296 debug("%s inactive" % self.target_uuid)
2297 inactive_p = "inactive"
2299 debug("%s active" % self.target_uuid)
2301 lctl.newdev(self.module, self.name, self.uuid,
2302 setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
2304 for tgt_dev_uuid in self.backup_targets:
2305 this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
2306 if len(this_nets) == 0:
2307 panic ("Unable to find a server for:", tgt_dev_uuid)
2308 srv = choose_local_server(this_nets)
2312 routes = find_route(this_nets);
2313 if len(routes) == 0:
2314 panic("no route to", tgt_dev_uuid)
2315 for (srv, r) in routes:
2316 lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2318 lctl.add_conn(self.name, srv.nid_uuid);
2321 if is_prepared(self.name):
2322 Module.cleanup(self)
2324 srv = choose_local_server(self.get_servers())
2326 lctl.disconnect(srv)
2328 for (srv, r) in find_route(self.get_servers()):
2329 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2330 except CommandError, e:
2331 log(self.module_name, "cleanup failed: ", self.name)
2335 for tgt_dev_uuid in self.backup_targets:
2336 this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
2337 srv = choose_local_server(this_net)
2339 lctl.disconnect(srv)
2341 for (srv, r) in find_route(this_net):
2342 lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2345 def correct_level(self, level, op=None):
2348 def deactivate(self):
2350 lctl.deactivate(self.name)
2351 except CommandError, e:
2352 log(self.module_name, "deactivate failed: ", self.name)
2357 def __init__(self, db, uuid, fs_name):
2358 Client.__init__(self, db, uuid, 'mdc', fs_name)
2360 def permits_inactive(self):
2364 def __init__(self, db, uuid, fs_name):
2365 Client.__init__(self, db, uuid, 'osc', fs_name)
2367 def permits_inactive(self):
2370 def mgmtcli_name_for_uuid(uuid):
2371 return 'MGMTCLI_%s' % uuid
2373 class ManagementClient(Client):
2374 def __init__(self, db, uuid):
2375 Client.__init__(self, db, uuid, 'mgmt_cli', '',
2376 self_name = mgmtcli_name_for_uuid(db.getUUID()),
2377 module_dir = 'mgmt')
2379 class CMOBD(Module):
2380 def __init__(self, db):
2381 Module.__init__(self, 'CMOBD', db)
2382 self.name = self.db.getName();
2383 self.uuid = generate_client_uuid(self.name)
2384 self.master_uuid = self.db.get_first_ref('masterobd')
2385 self.cache_uuid = self.db.get_first_ref('cacheobd')
2387 master_obd = self.db.lookup(self.master_uuid)
2389 panic('master obd not found:', self.master_uuid)
2391 cache_obd = self.db.lookup(self.cache_uuid)
2393 panic('cache obd not found:', self.cache_uuid)
2395 master_class = master_obd.get_class()
2396 cache_class = cache_obd.get_class()
2398 if master_class == 'ost' or master_class == 'lov':
2399 self.master = LOV(master_obd, self.master_uuid, self.name,
2400 "%s_master" % (self.name));
2401 self.cache = LOV(cache_obd, self.cache_uuid, self.name,
2402 "%s_cache" % (self.name));
2403 if master_class == 'mds':
2404 self.master = get_mdc(db, self.name, self.master_uuid)
2405 if cache_class == 'mds':
2406 self.cache = get_mdc(db, self.name, self.cache_uuid)
2408 if master_class == 'lmv':
2409 self.master = LMV(master_obd, self.master_uuid, self.name,
2410 "%s_master" % (self.name));
2411 if cache_class == 'lmv':
2412 self.cache = LMV(cache_obd, self.cache_uuid, self.name,
2413 "%s_cache" % (self.name));
2415 # need to check /proc/mounts and /etc/mtab before
2416 # formatting anything.
2417 # FIXME: check if device is already formatted.
2419 self.master.prepare()
2420 if not config.record and is_prepared(self.name):
2422 self.info(self.master_uuid, self.cache_uuid)
2423 lctl.newdev("cmobd", self.name, self.uuid,
2424 setup ="%s %s" %(self.master_uuid,
2431 def get_master_name(self):
2432 return self.master.name
2433 def get_cache_name(self):
2434 return self.cache.name
2437 if is_prepared(self.name):
2438 Module.cleanup(self)
2439 self.master.cleanup()
2441 def add_module(self, manager):
2442 manager.add_lustre_module('cmobd', 'cmobd')
2443 self.master.add_module(manager)
2445 def correct_level(self, level, op=None):
2449 def __init__(self, db, uuid, name):
2450 Module.__init__(self, 'COBD', db)
2451 self.name = self.db.getName();
2452 self.uuid = generate_client_uuid(self.name)
2453 self.master_uuid = self.db.get_first_ref('masterobd')
2454 self.cache_uuid = self.db.get_first_ref('cacheobd')
2456 master_obd = self.db.lookup(self.master_uuid)
2458 panic('master obd not found:', self.master_uuid)
2460 cache_obd = self.db.lookup(self.cache_uuid)
2462 panic('cache obd not found:', self.cache_uuid)
2464 master_class = master_obd.get_class()
2465 cache_class = cache_obd.get_class()
2467 if master_class == 'ost' or master_class == 'lov':
2468 self.master = LOV(master_obd, self.master_uuid, name,
2469 "%s_master" % (self.name));
2470 self.cache = LOV(cache_obd, self.cache_uuid, name,
2471 "%s_cache" % (self.name));
2472 if master_class == 'mds':
2473 self.master = get_mdc(db, name, self.master_uuid)
2474 if cache_class == 'mds':
2475 self.cache = get_mdc(db, name, self.cache_uuid)
2477 if master_class == 'lmv':
2478 self.master = LMV(master_obd, self.master_uuid, self.name,
2479 "%s_master" % (self.name));
2480 if cache_class == 'lmv':
2481 self.cache = LMV(cache_obd, self.cache_uuid, self.name,
2482 "%s_cache" % (self.name));
2484 # need to check /proc/mounts and /etc/mtab before
2485 # formatting anything.
2486 # FIXME: check if device is already formatted.
2493 def get_master_name(self):
2494 return self.master.name
2496 def get_cache_name(self):
2497 return self.cache.name
2500 self.master.prepare()
2501 self.cache.prepare()
2502 if not config.record and is_prepared(self.name):
2504 self.info(self.master_uuid, self.cache_uuid)
2505 lctl.newdev("cobd", self.name, self.uuid,
2506 setup ="%s %s" %(self.master.name,
2510 if is_prepared(self.name):
2511 Module.cleanup(self)
2512 self.master.cleanup()
2513 self.cache.cleanup()
2515 def add_module(self, manager):
2516 manager.add_lustre_module('cobd', 'cobd')
2517 self.master.add_module(manager)
2519 # virtual interface for OSC and LOV
2521 def __init__(self, db, client_uuid, name, name_override = None):
2522 Module.__init__(self, 'VOSC', db)
2523 if db.get_class() == 'lov':
2524 self.osc = LOV(db, client_uuid, name, name_override)
2526 elif db.get_class() == 'cobd':
2527 self.osc = COBD(db, client_uuid, name)
2530 self.osc = OSC(db, client_uuid, name)
2534 return self.osc.get_uuid()
2537 return self.osc.get_name()
2545 def add_module(self, manager):
2546 self.osc.add_module(manager)
2548 def correct_level(self, level, op=None):
2549 return self.osc.correct_level(level, op)
2551 # virtual interface for MDC and LMV
2553 def __init__(self, db, client_uuid, name, name_override = None):
2554 Module.__init__(self, 'VMDC', db)
2555 if db.get_class() == 'lmv':
2556 self.mdc = LMV(db, client_uuid, name)
2557 elif db.get_class() == 'cobd':
2558 self.mdc = COBD(db, client_uuid, name)
2560 self.mdc = MDC(db, client_uuid, name)
2563 return self.mdc.uuid
2566 return self.mdc.name
2574 def add_module(self, manager):
2575 self.mdc.add_module(manager)
2577 def correct_level(self, level, op=None):
2578 return self.mdc.correct_level(level, op)
2580 class ECHO_CLIENT(Module):
2581 def __init__(self,db):
2582 Module.__init__(self, 'ECHO_CLIENT', db)
2583 self.obd_uuid = self.db.get_first_ref('obd')
2584 obd = self.db.lookup(self.obd_uuid)
2585 self.uuid = generate_client_uuid(self.name)
2586 self.osc = VOSC(obd, self.uuid, self.name)
2589 if not config.record and is_prepared(self.name):
2592 self.osc.prepare() # XXX This is so cheating. -p
2593 self.info(self.obd_uuid)
2595 lctl.newdev("echo_client", self.name, self.uuid,
2596 setup = self.osc.get_name())
2599 if is_prepared(self.name):
2600 Module.cleanup(self)
2603 def add_module(self, manager):
2604 self.osc.add_module(manager)
2605 manager.add_lustre_module('obdecho', 'obdecho')
2607 def correct_level(self, level, op=None):
2610 def generate_client_uuid(name):
2611 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2613 int(random.random() * 1048576),
2614 int(random.random() * 1048576))
2615 return client_uuid[:36]
2617 def my_rstrip(s, chars):
2618 """my_rstrip(s, chars) -> strips any instances of the characters
2619 found in chars from the right side of string s"""
2620 # XXX required because python versions pre 2.2.3 don't allow
2621 #string.rstrip() to take alternate char lists
2625 ns = string.rstrip(s, '/')
2626 except TypeError, e:
2627 for i in range(len(s) - 1, 0, -1):
2635 class Mountpoint(Module):
2636 def __init__(self,db):
2637 Module.__init__(self, 'MTPT', db)
2638 self.path = self.db.get_val('path')
2639 self.clientoptions = self.db.get_val('clientoptions', '')
2640 self.fs_uuid = self.db.get_first_ref('filesystem')
2641 fs = self.db.lookup(self.fs_uuid)
2642 self.mds_uuid = fs.get_first_ref('lmv')
2643 if not self.mds_uuid:
2644 self.mds_uuid = fs.get_first_ref('mds')
2645 self.obd_uuid = fs.get_first_ref('obd')
2646 self.mgmt_uuid = fs.get_first_ref('mgmt')
2647 client_uuid = generate_client_uuid(self.name)
2649 ost = self.db.lookup(self.obd_uuid)
2651 panic("no ost: ", self.obd_uuid)
2653 mds = self.db.lookup(self.mds_uuid)
2655 panic("no mds: ", self.mds_uuid)
2657 self.vosc = VOSC(ost, client_uuid, self.name)
2658 self.vmdc = VMDC(mds, client_uuid, self.name)
2661 self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid),
2667 if not config.record and fs_is_mounted(self.path):
2668 log(self.path, "already mounted.")
2672 self.mgmtcli.prepare()
2675 vmdc_name = self.vmdc.get_name()
2677 self.info(self.path, self.mds_uuid, self.obd_uuid)
2678 if config.record or config.lctl_dump:
2679 lctl.mount_option(local_node_name, self.vosc.get_name(), vmdc_name)
2682 if config.clientoptions:
2683 if self.clientoptions:
2684 self.clientoptions = self.clientoptions + ',' + \
2685 config.clientoptions
2687 self.clientoptions = config.clientoptions
2688 if self.clientoptions:
2689 self.clientoptions = ',' + self.clientoptions
2690 # Linux kernel will deal with async and not pass it to ll_fill_super,
2691 # so replace it with Lustre async
2692 self.clientoptions = string.replace(self.clientoptions, "async",
2695 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s%s %s %s" % \
2696 (self.vosc.get_name(), vmdc_name, self.clientoptions,
2697 config.config, self.path)
2698 run("mkdir", self.path)
2703 panic("mount failed:", self.path, ":", string.join(val))
2706 self.info(self.path, self.mds_uuid,self.obd_uuid)
2708 if config.record or config.lctl_dump:
2709 lctl.del_mount_option(local_node_name)
2711 if fs_is_mounted(self.path):
2713 (rc, out) = run("umount", "-f", self.path)
2715 (rc, out) = run("umount", self.path)
2717 raise CommandError('umount', out, rc)
2719 if fs_is_mounted(self.path):
2720 panic("fs is still mounted:", self.path)
2725 self.mgmtcli.cleanup()
2727 def add_module(self, manager):
2728 manager.add_lustre_module('mdc', 'mdc')
2731 self.mgmtcli.add_module(manager)
2733 self.vosc.add_module(manager)
2734 self.vmdc.add_module(manager)
2736 manager.add_lustre_module('llite', 'llite')
2738 def correct_level(self, level, op=None):
2741 # ============================================================
2742 # misc query functions
2744 def get_ost_net(self, osd_uuid):
2748 osd = self.lookup(osd_uuid)
2749 node_uuid = osd.get_first_ref('node')
2750 node = self.lookup(node_uuid)
2752 panic("unable to find node for osd_uuid:", osd_uuid,
2753 " node_ref:", node_uuid_)
2754 for net_uuid in node.get_networks():
2755 db = node.lookup(net_uuid)
2756 srv_list.append(Network(db))
2760 # the order of iniitailization is based on level.
2761 def getServiceLevel(self):
2762 type = self.get_class()
2764 if type in ('network',):
2766 elif type in ('routetbl',):
2768 elif type in ('ldlm',):
2770 elif type in ('osd', 'cobd'):
2772 elif type in ('mdsdev',):
2774 elif type in ('lmv',):
2776 elif type in ('cmobd',):
2778 elif type in ('mountpoint', 'echoclient'):
2781 panic("Unknown type: ", type)
2783 if ret < config.minlevel or ret > config.maxlevel:
2788 # return list of services in a profile. list is a list of tuples
2789 # [(level, db_object),]
2790 def getServices(self):
2792 for ref_class, ref_uuid in self.get_all_refs():
2793 servdb = self.lookup(ref_uuid)
2795 level = getServiceLevel(servdb)
2797 list.append((level, servdb))
2799 panic('service not found: ' + ref_uuid)
2805 ############################################################
2807 # FIXME: clean this mess up!
2809 # OSC is no longer in the xml, so we have to fake it.
2810 # this is getting ugly and begging for another refactoring
2811 def get_osc(ost_db, uuid, fs_name):
2812 osc = OSC(ost_db, uuid, fs_name)
2815 def get_mdc(db, fs_name, mds_uuid):
2816 mds_db = db.lookup(mds_uuid);
2818 error("no mds:", mds_uuid)
2819 mdc = MDC(mds_db, mds_uuid, fs_name)
2822 ############################################################
2823 # routing ("rooting")
2824 # list of (nettype, cluster_id, nid)
2827 def find_local_clusters(node_db):
2828 global local_clusters
2829 for netuuid in node_db.get_networks():
2830 net = node_db.lookup(netuuid)
2832 debug("add_local", netuuid)
2833 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2835 if acceptors.has_key(srv.port):
2836 panic("duplicate port:", srv.port)
2837 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
2839 # This node is a gateway.
2841 def node_is_router():
2844 # If there are any routers found in the config, then this will be true
2845 # and all nodes will load kptlrouter.
2847 def node_needs_router():
2848 return needs_router or is_router
2850 # list of (nettype, gw, tgt_cluster_id, lo, hi)
2851 # Currently, these local routes are only added to kptlrouter route
2852 # table if they are needed to connect to a specific server. This
2853 # should be changed so all available routes are loaded, and the
2854 # ptlrouter can make all the decisions.
2857 def find_local_routes(lustre):
2858 """ Scan the lustre config looking for routers . Build list of
2860 global local_routes, needs_router
2862 list = lustre.lookup_class('node')
2864 if router.get_val_int('router', 0):
2866 for (local_type, local_cluster_id, local_nid) in local_clusters:
2868 for netuuid in router.get_networks():
2869 db = router.lookup(netuuid)
2870 if (local_type == db.get_val('nettype') and
2871 local_cluster_id == db.get_val('clusterid')):
2872 gw = db.get_val('nid')
2875 debug("find_local_routes: gw is", gw)
2876 for route in router.get_local_routes(local_type, gw):
2877 local_routes.append(route)
2878 debug("find_local_routes:", local_routes)
2881 def choose_local_server(srv_list):
2882 for srv in srv_list:
2883 if local_cluster(srv.net_type, srv.cluster_id):
2886 def local_cluster(net_type, cluster_id):
2887 for cluster in local_clusters:
2888 if net_type == cluster[0] and cluster_id == cluster[1]:
2892 def local_interface(net_type, cluster_id, nid):
2893 for cluster in local_clusters:
2894 if (net_type == cluster[0] and cluster_id == cluster[1]
2895 and nid == cluster[2]):
2899 def find_route(srv_list):
2901 frm_type = local_clusters[0][0]
2902 for srv in srv_list:
2903 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
2904 to_type = srv.net_type
2906 cluster_id = srv.cluster_id
2907 debug ('looking for route to', to_type, to)
2908 for r in local_routes:
2909 debug("find_route: ", r)
2910 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
2911 result.append((srv, r))
2914 def get_active_target(db):
2915 target_uuid = db.getUUID()
2916 target_name = db.getName()
2917 node_name = get_select(target_name)
2919 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
2921 tgt_dev_uuid = db.get_first_ref('active')
2924 def get_server_by_nid_uuid(db, nid_uuid):
2925 for n in db.lookup_class("network"):
2927 if net.nid_uuid == nid_uuid:
2931 ############################################################
2935 type = db.get_class()
2936 debug('Service:', type, db.getName(), db.getUUID())
2941 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2942 elif type == 'network':
2944 elif type == 'routetbl':
2948 elif type == 'cobd':
2949 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2950 elif type == 'cmobd':
2952 elif type == 'mdsdev':
2954 elif type == 'mountpoint':
2956 elif type == 'echoclient':
2961 panic ("unknown service type:", type)
2965 # Prepare the system to run lustre using a particular profile
2966 # in a the configuration.
2967 # * load & the modules
2968 # * setup networking for the current node
2969 # * make sure partitions are in place and prepared
2970 # * initialize devices with lctl
2971 # Levels is important, and needs to be enforced.
2972 def for_each_profile(db, prof_list, operation):
2973 for prof_uuid in prof_list:
2974 prof_db = db.lookup(prof_uuid)
2976 panic("profile:", prof_uuid, "not found.")
2977 services = getServices(prof_db)
2980 def magic_get_osc(db, rec, lov):
2982 lov_uuid = lov.get_uuid()
2983 lov_name = lov.osc.fs_name
2985 lov_uuid = rec.getAttribute('lov_uuidref')
2986 # FIXME: better way to find the mountpoint?
2987 filesystems = db.root_node.getElementsByTagName('filesystem')
2989 for fs in filesystems:
2990 ref = fs.getElementsByTagName('obd_ref')
2991 if ref[0].getAttribute('uuidref') == lov_uuid:
2992 fsuuid = fs.getAttribute('uuid')
2996 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
2998 mtpts = db.root_node.getElementsByTagName('mountpoint')
3001 ref = fs.getElementsByTagName('filesystem_ref')
3002 if ref[0].getAttribute('uuidref') == fsuuid:
3003 lov_name = fs.getAttribute('name')
3007 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
3009 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3011 ost_uuid = rec.getAttribute('ost_uuidref')
3012 obd = db.lookup(ost_uuid)
3015 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
3017 osc = get_osc(obd, lov_uuid, lov_name)
3019 panic('osc not found:', obd_uuid)
3022 # write logs for update records. sadly, logs of all types -- and updates in
3023 # particular -- are something of an afterthought. lconf needs rewritten with
3024 # these as core concepts. so this is a pretty big hack.
3025 def process_update_record(db, update, lov):
3026 for rec in update.childNodes:
3027 if rec.nodeType != rec.ELEMENT_NODE:
3030 log("found "+rec.nodeName+" record in update version " +
3031 str(update.getAttribute('version')))
3033 lov_uuid = rec.getAttribute('lov_uuidref')
3034 ost_uuid = rec.getAttribute('ost_uuidref')
3035 index = rec.getAttribute('index')
3036 gen = rec.getAttribute('generation')
3038 if not lov_uuid or not ost_uuid or not index or not gen:
3039 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
3042 tmplov = db.lookup(lov_uuid)
3044 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
3045 lov_name = tmplov.getName()
3047 lov_name = lov.osc.name
3049 # ------------------------------------------------------------- add
3050 if rec.nodeName == 'add':
3052 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3055 osc = magic_get_osc(db, rec, lov)
3058 # Only ignore connect failures with --force, which
3059 # isn't implemented here yet.
3060 osc.prepare(ignore_connect_failure=0)
3061 except CommandError, e:
3062 print "Error preparing OSC %s\n" % osc.uuid
3065 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3067 # ------------------------------------------------------ deactivate
3068 elif rec.nodeName == 'deactivate':
3072 osc = magic_get_osc(db, rec, lov)
3076 except CommandError, e:
3077 print "Error deactivating OSC %s\n" % osc.uuid
3080 # ---------------------------------------------------------- delete
3081 elif rec.nodeName == 'delete':
3085 osc = magic_get_osc(db, rec, lov)
3091 except CommandError, e:
3092 print "Error cleaning up OSC %s\n" % osc.uuid
3095 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3097 def process_updates(db, log_device, log_name, lov = None):
3098 updates = db.root_node.getElementsByTagName('update')
3100 if not u.childNodes:
3101 log("ignoring empty update record (version " +
3102 str(u.getAttribute('version')) + ")")
3105 version = u.getAttribute('version')
3106 real_name = "%s-%s" % (log_name, version)
3107 lctl.clear_log(log_device, real_name)
3108 lctl.record(log_device, real_name)
3110 process_update_record(db, u, lov)
3114 def doWriteconf(services):
3118 if s[1].get_class() == 'mdsdev':
3119 n = newService(s[1])
3122 def doSetup(services):
3127 n = newService(s[1])
3129 slist.append((n.level, n))
3132 nl = n[1].correct_level(n[0])
3133 nlist.append((nl, n[1]))
3138 def doLoadModules(services):
3142 # adding all needed modules from all services
3144 n = newService(s[1])
3145 n.add_module(mod_manager)
3147 # loading all registered modules
3148 mod_manager.load_modules()
3150 def doUnloadModules(services):
3154 # adding all needed modules from all services
3156 n = newService(s[1])
3157 if n.safe_to_clean_modules():
3158 n.add_module(mod_manager)
3160 # unloading all registered modules
3161 mod_manager.cleanup_modules()
3163 def doCleanup(services):
3169 n = newService(s[1])
3171 slist.append((n.level, n))
3174 nl = n[1].correct_level(n[0])
3175 nlist.append((nl, n[1]))
3180 if n[1].safe_to_clean():
3185 def doHost(lustreDB, hosts):
3186 global is_router, local_node_name
3189 node_db = lustreDB.lookup_name(h, 'node')
3193 panic('No host entry found.')
3195 local_node_name = node_db.get_val('name', 0)
3196 is_router = node_db.get_val_int('router', 0)
3197 lustre_upcall = node_db.get_val('lustreUpcall', '')
3198 portals_upcall = node_db.get_val('portalsUpcall', '')
3199 timeout = node_db.get_val_int('timeout', 0)
3200 ptldebug = node_db.get_val('ptldebug', '')
3201 subsystem = node_db.get_val('subsystem', '')
3203 find_local_clusters(node_db)
3205 find_local_routes(lustreDB)
3207 # Two step process: (1) load modules, (2) setup lustre
3208 # if not cleaning, load modules first.
3209 prof_list = node_db.get_refs('profile')
3211 if config.write_conf:
3212 for_each_profile(node_db, prof_list, doLoadModules)
3214 for_each_profile(node_db, prof_list, doWriteconf)
3215 for_each_profile(node_db, prof_list, doUnloadModules)
3218 elif config.recover:
3219 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3220 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3221 "--client_uuid <UUID> --conn_uuid <UUID>")
3222 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3224 elif config.cleanup:
3226 # the command line can override this value
3228 # ugly hack, only need to run lctl commands for --dump
3229 if config.lctl_dump or config.record:
3230 for_each_profile(node_db, prof_list, doCleanup)
3233 sys_set_timeout(timeout)
3234 sys_set_ptldebug(ptldebug)
3235 sys_set_subsystem(subsystem)
3236 sys_set_lustre_upcall(lustre_upcall)
3237 sys_set_portals_upcall(portals_upcall)
3239 for_each_profile(node_db, prof_list, doCleanup)
3240 for_each_profile(node_db, prof_list, doUnloadModules)
3244 # ugly hack, only need to run lctl commands for --dump
3245 if config.lctl_dump or config.record:
3246 sys_set_timeout(timeout)
3247 sys_set_lustre_upcall(lustre_upcall)
3248 for_each_profile(node_db, prof_list, doSetup)
3252 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3253 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3255 for_each_profile(node_db, prof_list, doLoadModules)
3257 sys_set_debug_path()
3258 sys_set_ptldebug(ptldebug)
3259 sys_set_subsystem(subsystem)
3260 script = config.gdb_script
3261 run(lctl.lctl, ' modules >', script)
3263 log ("The GDB module script is in", script)
3264 # pause, so user has time to break and
3267 sys_set_timeout(timeout)
3268 sys_set_lustre_upcall(lustre_upcall)
3269 sys_set_portals_upcall(portals_upcall)
3271 for_each_profile(node_db, prof_list, doSetup)
3274 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3275 tgt = lustreDB.lookup(tgt_uuid)
3277 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3278 new_uuid = get_active_target(tgt)
3280 raise Lustre.LconfError("doRecovery: no active target found for: " +
3282 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3284 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3286 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3288 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3291 lctl.disconnect(oldnet)
3292 except CommandError, e:
3293 log("recover: disconnect", nid_uuid, "failed: ")
3298 except CommandError, e:
3299 log("recover: connect failed")
3302 lctl.recover(client_uuid, net.nid_uuid)
3305 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3306 base = os.path.dirname(cmd)
3307 if development_mode():
3308 if not config.lustre:
3309 debug('using objdir module paths')
3310 config.lustre = (os.path.join(base, ".."))
3311 # normalize the portals dir, using command line arg if set
3313 portals_dir = config.portals
3314 dir = os.path.join(config.lustre, portals_dir)
3315 config.portals = dir
3316 debug('config.portals', config.portals)
3317 elif config.lustre and config.portals:
3319 # if --lustre and --portals, normalize portals
3320 # can ignore POTRALS_DIR here, since it is probly useless here
3321 config.portals = os.path.join(config.lustre, config.portals)
3322 debug('config.portals B', config.portals)
3324 def sysctl(path, val):
3325 debug("+ sysctl", path, val)
3329 fp = open(os.path.join('/proc/sys', path), 'w')
3336 def sys_set_debug_path():
3337 sysctl('portals/debug_path', config.debug_path)
3339 def sys_set_lustre_upcall(upcall):
3340 # the command overrides the value in the node config
3341 if config.lustre_upcall:
3342 upcall = config.lustre_upcall
3344 upcall = config.upcall
3346 lctl.set_lustre_upcall(upcall)
3348 def sys_set_portals_upcall(upcall):
3349 # the command overrides the value in the node config
3350 if config.portals_upcall:
3351 upcall = config.portals_upcall
3353 upcall = config.upcall
3355 sysctl('portals/upcall', upcall)
3357 def sys_set_timeout(timeout):
3358 # the command overrides the value in the node config
3359 if config.timeout and config.timeout > 0:
3360 timeout = config.timeout
3361 if timeout != None and timeout > 0:
3362 lctl.set_timeout(timeout)
3364 def sys_tweak_socknal ():
3365 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3366 if sys_get_branch() == '2.6':
3367 fp = open('/proc/meminfo')
3368 lines = fp.readlines()
3373 if a[0] == 'MemTotal:':
3375 debug("memtotal" + memtotal)
3376 if int(memtotal) < 262144:
3377 minfree = int(memtotal) / 16
3380 debug("+ minfree ", minfree)
3381 sysctl("vm/min_free_kbytes", minfree)
3382 if config.single_socket:
3383 sysctl("socknal/typed", 0)
3385 def sys_optimize_elan ():
3386 procfiles = ["/proc/elan/config/eventint_punt_loops",
3387 "/proc/qsnet/elan3/config/eventint_punt_loops",
3388 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3390 if os.access(p, os.W_OK):
3391 run ("echo 1 > " + p)
3393 def sys_set_ptldebug(ptldebug):
3395 ptldebug = config.ptldebug
3398 val = eval(ptldebug, ptldebug_names)
3399 val = "0x%x" % (val)
3400 sysctl('portals/debug', val)
3401 except NameError, e:
3404 def sys_set_subsystem(subsystem):
3405 if config.subsystem:
3406 subsystem = config.subsystem
3409 val = eval(subsystem, subsystem_names)
3410 val = "0x%x" % (val)
3411 sysctl('portals/subsystem_debug', val)
3412 except NameError, e:
3415 def sys_set_netmem_max(path, max):
3416 debug("setting", path, "to at least", max)
3424 fp = open(path, 'w')
3425 fp.write('%d\n' %(max))
3429 def sys_make_devices():
3430 if not os.access('/dev/portals', os.R_OK):
3431 run('mknod /dev/portals c 10 240')
3432 if not os.access('/dev/obd', os.R_OK):
3433 run('mknod /dev/obd c 10 241')
3436 # Add dir to the global PATH, if not already there.
3437 def add_to_path(new_dir):
3438 syspath = string.split(os.environ['PATH'], ':')
3439 if new_dir in syspath:
3441 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3443 def default_debug_path():
3444 path = '/tmp/lustre-log'
3445 if os.path.isdir('/r'):
3450 def default_gdb_script():
3451 script = '/tmp/ogdb'
3452 if os.path.isdir('/r'):
3453 return '/r' + script
3458 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3459 # ensure basic elements are in the system path
3460 def sanitise_path():
3461 for dir in DEFAULT_PATH:
3464 # global hack for the --select handling
3466 def init_select(args):
3467 # args = [service=nodeA,service2=nodeB service3=nodeC]
3470 list = string.split(arg, ',')
3472 srv, node = string.split(entry, '=')
3473 tgt_select[srv] = node
3475 def get_select(srv):
3476 if tgt_select.has_key(srv):
3477 return tgt_select[srv]
3481 FLAG = Lustre.Options.FLAG
3482 PARAM = Lustre.Options.PARAM
3483 INTPARAM = Lustre.Options.INTPARAM
3484 PARAMLIST = Lustre.Options.PARAMLIST
3486 ('verbose,v', "Print system commands as they are run"),
3487 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3488 ('config', "Cluster config name used for LDAP query", PARAM),
3489 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3490 ('node', "Load config for <nodename>", PARAM),
3491 ('cleanup,d', "Cleans up config. (Shutdown)"),
3492 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3494 ('single_socket', "socknal option: only use one socket instead of bundle",
3496 ('failover',"""Used to shut down without saving state.
3497 This will allow this node to "give up" a service to a
3498 another node for failover purposes. This will not
3499 be a clean shutdown.""",
3501 ('gdb', """Prints message after creating gdb module script
3502 and sleeps for 5 seconds."""),
3503 ('noexec,n', """Prints the commands and steps that will be run for a
3504 config without executing them. This can used to check if a
3505 config file is doing what it should be doing"""),
3506 ('nomod', "Skip load/unload module step."),
3507 ('nosetup', "Skip device setup/cleanup step."),
3508 ('reformat', "Reformat all devices (without question)"),
3509 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3510 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3511 ('clientoptions', "Additional options for Lustre", PARAM),
3512 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3514 ('write_conf', "Save all the client config information on mds."),
3515 ('record', "Write config information on mds."),
3516 ('record_log', "Name of config record log.", PARAM),
3517 ('record_device', "MDS device name that will record the config commands",
3519 ('root_squash', "MDS squash root to appointed uid",
3521 ('no_root_squash', "Don't squash root for appointed nid",
3523 ('minlevel', "Minimum level of services to configure/cleanup",
3525 ('maxlevel', """Maximum level of services to configure/cleanup
3526 Levels are aproximatly like:
3531 70 - mountpoint, echo_client, osc, mdc, lov""",
3533 ('lustre', """Base directory of lustre sources. This parameter will
3534 cause lconf to load modules from a source tree.""", PARAM),
3535 ('portals', """Portals source directory. If this is a relative path,
3536 then it is assumed to be relative to lustre. """, PARAM),
3537 ('timeout', "Set recovery timeout", INTPARAM),
3538 ('upcall', "Set both portals and lustre upcall script", PARAM),
3539 ('lustre_upcall', "Set lustre upcall script", PARAM),
3540 ('portals_upcall', "Set portals upcall script", PARAM),
3541 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3542 ('ptldebug', "Set the portals debug level", PARAM),
3543 ('subsystem', "Set the portals debug subsystem", PARAM),
3544 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3545 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3546 # Client recovery options
3547 ('recover', "Recover a device"),
3548 ('group', "The group of devices to configure or cleanup", PARAM),
3549 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3550 ('client_uuid', "The failed client (required for recovery)", PARAM),
3551 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3553 ('inactive', """The name of an inactive service, to be ignored during
3554 mounting (currently OST-only). Can be repeated.""",
3559 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3561 # in the upcall this is set to SIG_IGN
3562 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3564 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3566 config, args = cl.parse(sys.argv[1:])
3567 except Lustre.OptionError, e:
3571 setupModulePath(sys.argv[0])
3573 host = socket.gethostname()
3575 # the PRNG is normally seeded with time(), which is not so good for starting
3576 # time-synchronized clusters
3577 input = open('/dev/urandom', 'r')
3579 print 'Unable to open /dev/urandom!'
3581 seed = input.read(32)
3587 init_select(config.select)
3590 # allow config to be fetched via HTTP, but only with python2
3591 if sys.version[0] != '1' and args[0].startswith('http://'):
3594 config_file = urllib2.urlopen(args[0])
3595 except (urllib2.URLError, socket.error), err:
3596 if hasattr(err, 'args'):
3598 print "Could not access '%s': %s" %(args[0], err)
3600 elif not os.access(args[0], os.R_OK):
3601 print 'File not found or readable:', args[0]
3605 config_file = open(args[0], 'r')
3607 dom = xml.dom.minidom.parse(config_file)
3609 panic("%s does not appear to be a config file." % (args[0]))
3610 sys.exit(1) # make sure to die here, even in debug mode.
3612 CONFIG_FILE = args[0]
3613 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3614 if not config.config:
3615 config.config = os.path.basename(args[0])# use full path?
3616 if config.config[-4:] == '.xml':
3617 config.config = config.config[:-4]
3618 elif config.ldapurl:
3619 if not config.config:
3620 panic("--ldapurl requires --config name")
3621 dn = "config=%s,fs=lustre" % (config.config)
3622 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3623 elif config.ptldebug or config.subsystem:
3624 sys_set_ptldebug(None)
3625 sys_set_subsystem(None)
3628 print 'Missing config file or ldap URL.'
3629 print 'see lconf --help for command summary'
3632 toplustreDB = lustreDB
3634 ver = lustreDB.get_version()
3636 panic("No version found in config data, please recreate.")
3637 if ver != Lustre.CONFIG_VERSION:
3638 panic("Config version", ver, "does not match lconf version",
3639 Lustre.CONFIG_VERSION)
3643 node_list.append(config.node)
3646 node_list.append(host)
3647 node_list.append('localhost')
3649 debug("configuring for host: ", node_list)
3652 config.debug_path = config.debug_path + '-' + host
3653 config.gdb_script = config.gdb_script + '-' + host
3655 lctl = LCTLInterface('lctl')
3657 if config.lctl_dump:
3658 lctl.use_save_file(config.lctl_dump)
3661 if not (config.record_device and config.record_log):
3662 panic("When recording, both --record_log and --record_device must be specified.")
3663 lctl.clear_log(config.record_device, config.record_log)
3664 lctl.record(config.record_device, config.record_log)
3666 # init module manager
3667 mod_manager = kmod_manager(config.lustre, config.portals)
3669 doHost(lustreDB, node_list)
3671 if not config.record:
3676 process_updates(lustreDB, config.record_device, config.record_log)
3678 if __name__ == "__main__":
3681 except Lustre.LconfError, e:
3683 # traceback.print_exc(file=sys.stdout)
3685 except CommandError, e:
3689 if first_cleanup_error:
3690 sys.exit(first_cleanup_error)