3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = 'portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
96 "undefined" : (1 << 0),
106 "portals" : (1 << 10),
107 "socknal" : (1 << 11),
108 "qswnal" : (1 << 12),
109 "pinger" : (1 << 13),
110 "filter" : (1 << 14),
116 "ptlrouter" : (1 << 20),
126 first_cleanup_error = 0
127 def cleanup_error(rc):
128 global first_cleanup_error
129 if not first_cleanup_error:
130 first_cleanup_error = rc
132 # ============================================================
133 # debugging and error funcs
135 def fixme(msg = "this feature"):
136 raise Lustre.LconfError, msg + ' not implemented yet.'
139 msg = string.join(map(str,args))
140 if not config.noexec:
141 raise Lustre.LconfError(msg)
146 msg = string.join(map(str,args))
151 print string.strip(s)
155 msg = string.join(map(str,args))
158 # ack, python's builtin int() does not support '0x123' syntax.
159 # eval can do it, although what a hack!
163 return eval(s, {}, {})
166 except SyntaxError, e:
167 raise ValueError("not a number")
169 raise ValueError("not a number")
171 # ============================================================
172 # locally defined exceptions
173 class CommandError (exceptions.Exception):
174 def __init__(self, cmd_name, cmd_err, rc=None):
175 self.cmd_name = cmd_name
176 self.cmd_err = cmd_err
181 if type(self.cmd_err) == types.StringType:
183 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
185 print "! %s: %s" % (self.cmd_name, self.cmd_err)
186 elif type(self.cmd_err) == types.ListType:
188 print "! %s (error %d):" % (self.cmd_name, self.rc)
190 print "! %s:" % (self.cmd_name)
191 for s in self.cmd_err:
192 print "> %s" %(string.strip(s))
197 # ============================================================
198 # handle daemons, like the acceptor
200 """ Manage starting and stopping a daemon. Assumes daemon manages
201 it's own pid file. """
203 def __init__(self, cmd):
209 log(self.command, "already running.")
211 self.path = find_prog(self.command)
213 panic(self.command, "not found.")
214 ret, out = runcmd(self.path +' '+ self.command_line())
216 raise CommandError(self.path, out, ret)
220 pid = self.read_pidfile()
222 log ("killing process", pid)
224 #time.sleep(1) # let daemon die
226 log("unable to kill", self.command, e)
228 log("unable to kill", self.command)
231 pid = self.read_pidfile()
241 def read_pidfile(self):
243 fp = open(self.pidfile(), 'r')
250 def clean_pidfile(self):
251 """ Remove a stale pidfile """
252 log("removing stale pidfile:", self.pidfile())
254 os.unlink(self.pidfile())
256 log(self.pidfile(), e)
258 class AcceptorHandler(DaemonHandler):
259 def __init__(self, port, net_type):
260 DaemonHandler.__init__(self, "acceptor")
265 return "/var/run/%s-%d.pid" % (self.command, self.port)
267 def command_line(self):
268 return string.join(map(str,(self.flags, self.port)))
272 # start the acceptors
274 if config.lctl_dump or config.record:
276 for port in acceptors.keys():
277 daemon = acceptors[port]
278 if not daemon.running():
281 def run_one_acceptor(port):
282 if config.lctl_dump or config.record:
284 if acceptors.has_key(port):
285 daemon = acceptors[port]
286 if not daemon.running():
289 panic("run_one_acceptor: No acceptor defined for port:", port)
291 def stop_acceptor(port):
292 if acceptors.has_key(port):
293 daemon = acceptors[port]
298 # ============================================================
299 # handle lctl interface
302 Manage communication with lctl
305 def __init__(self, cmd):
307 Initialize close by finding the lctl binary.
309 self.lctl = find_prog(cmd)
311 self.record_device = ''
314 debug('! lctl not found')
317 raise CommandError('lctl', "unable to find lctl binary.")
319 def use_save_file(self, file):
320 self.save_file = file
322 def record(self, dev_name, logname):
323 log("Recording log", logname, "on", dev_name)
324 self.record_device = dev_name
325 self.record_log = logname
327 def end_record(self):
328 log("End recording log", self.record_log, "on", self.record_device)
329 self.record_device = None
330 self.record_log = None
332 def set_nonblock(self, fd):
333 fl = fcntl.fcntl(fd, F_GETFL)
334 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
339 the cmds are written to stdin of lctl
340 lctl doesn't return errors when run in script mode, so
342 should modify command line to accept multiple commands, or
343 create complex command line options
347 cmds = '\n dump ' + self.save_file + '\n' + cmds
348 elif self.record_device:
352 %s""" % (self.record_device, self.record_log, cmds)
354 debug("+", cmd_line, cmds)
355 if config.noexec: return (0, [])
357 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
358 child.tochild.write(cmds + "\n")
359 child.tochild.close()
360 # print "LCTL:", cmds
362 # From "Python Cookbook" from O'Reilly
363 outfile = child.fromchild
364 outfd = outfile.fileno()
365 self.set_nonblock(outfd)
366 errfile = child.childerr
367 errfd = errfile.fileno()
368 self.set_nonblock(errfd)
370 outdata = errdata = ''
373 ready = select.select([outfd,errfd],[],[]) # Wait for input
374 if outfd in ready[0]:
375 outchunk = outfile.read()
376 if outchunk == '': outeof = 1
377 outdata = outdata + outchunk
378 if errfd in ready[0]:
379 errchunk = errfile.read()
380 if errchunk == '': erreof = 1
381 errdata = errdata + errchunk
382 if outeof and erreof: break
383 # end of "borrowed" code
386 if os.WIFEXITED(ret):
387 rc = os.WEXITSTATUS(ret)
390 if rc or len(errdata):
391 raise CommandError(self.lctl, errdata, rc)
394 def runcmd(self, *args):
396 run lctl using the command line
398 cmd = string.join(map(str,args))
399 debug("+", self.lctl, cmd)
400 rc, out = run(self.lctl, cmd)
402 raise CommandError(self.lctl, out, rc)
406 def clear_log(self, dev, log):
407 """ clear an existing log """
412 quit """ % (dev, log)
415 def root_squash(self, name, uid, nid):
419 quit""" % (name, uid, nid)
422 def network(self, net, nid):
427 quit """ % (net, netid, nid)
431 def add_interface(self, net, ip, netmask = ""):
432 """ add an interface """
436 quit """ % (net, ip, netmask)
439 # delete an interface
440 def del_interface(self, net, ip):
441 """ delete an interface """
448 # create a new connection
449 def add_uuid(self, net_type, uuid, nid):
450 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
453 def add_peer(self, net_type, nid, hostaddr, port):
454 if net_type in ('tcp',) and not config.lctl_dump:
459 nid, hostaddr, port )
461 elif net_type in ('openib','iib',) and not config.lctl_dump:
469 def connect(self, srv):
470 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
471 if srv.net_type in ('tcp','openib','iib',) and not config.lctl_dump:
473 hostaddr = string.split(srv.hostaddr[0], '/')[0]
474 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
477 def recover(self, dev_name, new_conn):
480 recover %s""" %(dev_name, new_conn)
483 # add a route to a range
484 def add_route(self, net, gw, lo, hi):
492 except CommandError, e:
496 def del_route(self, net, gw, lo, hi):
501 quit """ % (net, gw, lo, hi)
504 # add a route to a host
505 def add_route_host(self, net, uuid, gw, tgt):
506 self.add_uuid(net, uuid, tgt)
514 except CommandError, e:
518 # add a route to a range
519 def del_route_host(self, net, uuid, gw, tgt):
525 quit """ % (net, gw, tgt)
529 def del_peer(self, net_type, nid, hostaddr):
530 if net_type in ('tcp',) and not config.lctl_dump:
534 del_peer %s %s single_share
538 elif net_type in ('openib','iib',) and not config.lctl_dump:
542 del_peer %s single_share
547 # disconnect one connection
548 def disconnect(self, srv):
549 self.del_uuid(srv.nid_uuid)
550 if srv.net_type in ('tcp','openib','iib',) and not config.lctl_dump:
552 hostaddr = string.split(srv.hostaddr[0], '/')[0]
553 self.del_peer(srv.net_type, srv.nid, hostaddr)
555 def del_uuid(self, uuid):
563 def disconnectAll(self, net):
571 def attach(self, type, name, uuid):
574 quit""" % (type, name, uuid)
577 def setup(self, name, setup = ""):
581 quit""" % (name, setup)
584 def add_conn(self, name, conn_uuid):
588 quit""" % (name, conn_uuid)
592 # create a new device with lctl
593 def newdev(self, type, name, uuid, setup = ""):
594 self.attach(type, name, uuid);
596 self.setup(name, setup)
597 except CommandError, e:
598 self.cleanup(name, uuid, 0)
603 def cleanup(self, name, uuid, force, failover = 0):
604 if failover: force = 1
610 quit""" % (name, ('', 'force')[force],
611 ('', 'failover')[failover])
615 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
616 stripe_sz, stripe_off, pattern, devlist = None):
619 lov_setup %s %d %d %d %s %s
620 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
624 # add an OBD to a LOV
625 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
627 lov_modify_tgts add %s %s %s %s
628 quit""" % (name, obd_uuid, index, gen)
632 def lmv_setup(self, name, uuid, desc_uuid, devlist):
636 quit""" % (name, uuid, desc_uuid, devlist)
639 # delete an OBD from a LOV
640 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
642 lov_modify_tgts del %s %s %s %s
643 quit""" % (name, obd_uuid, index, gen)
647 def deactivate(self, name):
655 def dump(self, dump_file):
658 quit""" % (dump_file)
661 # get list of devices
662 def device_list(self):
663 devices = '/proc/fs/lustre/devices'
665 if os.access(devices, os.R_OK):
667 fp = open(devices, 'r')
675 def lustre_version(self):
676 rc, out = self.runcmd('version')
680 def mount_option(self, profile, osc, mdc):
682 mount_option %s %s %s
683 quit""" % (profile, osc, mdc)
686 # delete mount options
687 def del_mount_option(self, profile):
693 def set_timeout(self, timeout):
699 def set_lustre_upcall(self, upcall):
704 # ============================================================
705 # Various system-level functions
706 # (ideally moved to their own module)
708 # Run a command and return the output and status.
709 # stderr is sent to /dev/null, could use popen3 to
710 # save it if necessary
713 if config.noexec: return (0, [])
714 f = os.popen(cmd + ' 2>&1')
724 cmd = string.join(map(str,args))
727 # Run a command in the background.
728 def run_daemon(*args):
729 cmd = string.join(map(str,args))
731 if config.noexec: return 0
732 f = os.popen(cmd + ' 2>&1')
740 # Determine full path to use for an external command
741 # searches dirname(argv[0]) first, then PATH
743 syspath = string.split(os.environ['PATH'], ':')
744 cmdpath = os.path.dirname(sys.argv[0])
745 syspath.insert(0, cmdpath);
747 syspath.insert(0, os.path.join(config.portals, 'utils/'))
749 prog = os.path.join(d,cmd)
750 if os.access(prog, os.X_OK):
754 # Recursively look for file starting at base dir
755 def do_find_file(base, mod):
756 fullname = os.path.join(base, mod)
757 if os.access(fullname, os.R_OK):
759 for d in os.listdir(base):
760 dir = os.path.join(base,d)
761 if os.path.isdir(dir):
762 module = do_find_file(dir, mod)
766 def find_module(src_dir, dev_dir, modname):
767 modbase = src_dir +'/'+ dev_dir +'/'+ modname
768 for modext in '.ko', '.o':
769 module = modbase + modext
771 if os.access(module, os.R_OK):
777 # is the path a block device?
784 return stat.S_ISBLK(s[stat.ST_MODE])
786 # find the journal device from mkfs options
792 while i < len(x) - 1:
793 if x[i] == '-J' and x[i+1].startswith('device='):
799 # build fs according to type
801 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
807 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
809 # devsize is in 1k, and fs block count is in 4k
810 block_cnt = devsize/4
812 if fstype in ('ext3', 'extN', 'ldiskfs'):
813 # ext3 journal size is in megabytes
814 # but don't set jsize if mkfsoptions indicates a separate journal device
815 if jsize == 0 and jdev(mkfsoptions) == '':
817 if not is_block(dev):
818 ret, out = runcmd("ls -l %s" %dev)
819 devsize = int(string.split(out[0])[4]) / 1024
821 # sfdisk works for symlink, hardlink, and realdev
822 ret, out = runcmd("sfdisk -s %s" %dev)
824 devsize = int(out[0])
826 # sfdisk -s will fail for too large block device,
827 # then, read the size of partition from /proc/partitions
829 # get the realpath of the device
830 # it may be the real device, such as /dev/hda7
831 # or the hardlink created via mknod for a device
832 if 'realpath' in dir(os.path):
833 real_dev = os.path.realpath(dev)
837 while os.path.islink(real_dev) and (link_count < 20):
838 link_count = link_count + 1
839 dev_link = os.readlink(real_dev)
840 if os.path.isabs(dev_link):
843 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
845 panic("Entountered too many symbolic links resolving block device:", dev)
847 # get the major and minor number of the realpath via ls
848 # it seems python(os.stat) does not return
849 # the st_rdev member of the stat structure
850 ret, out = runcmd("ls -l %s" %real_dev)
851 major = string.split(string.split(out[0])[4], ",")[0]
852 minor = string.split(out[0])[5]
854 # get the devsize from /proc/partitions with the major and minor number
855 ret, out = runcmd("cat /proc/partitions")
858 if string.split(line)[0] == major and string.split(line)[1] == minor:
859 devsize = int(string.split(line)[2])
862 if devsize > 1024 * 1024:
863 jsize = ((devsize / 102400) * 4)
866 if jsize: jopt = "-J size=%d" %(jsize,)
867 if isize: iopt = "-I %d" %(isize,)
868 mkfs = 'mkfs.ext2 -j -b 4096 '
869 if not isblock or config.force:
871 if jdev(mkfsoptions) != '':
872 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
874 jmkfs = jmkfs + '-F '
875 jmkfs = jmkfs + jdev(mkfsoptions)
876 (ret, out) = run (jmkfs)
878 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
879 elif fstype == 'reiserfs':
880 # reiserfs journal size is in blocks
881 if jsize: jopt = "--journal_size %d" %(jsize,)
882 mkfs = 'mkreiserfs -ff'
884 panic('unsupported fs type: ', fstype)
886 if config.mkfsoptions != None:
887 mkfs = mkfs + ' ' + config.mkfsoptions
888 if mkfsoptions != None:
889 mkfs = mkfs + ' ' + mkfsoptions
890 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
892 panic("Unable to build fs:", dev, string.join(out))
893 # enable hash tree indexing on fsswe
894 if fstype in ('ext3', 'extN', 'ldiskfs'):
895 htree = 'echo "feature FEATURE_C5" | debugfs -w'
896 (ret, out) = run (htree, dev)
898 panic("Unable to enable htree:", dev)
900 # some systems use /dev/loopN, some /dev/loop/N
904 if not os.access(loop + str(0), os.R_OK):
906 if not os.access(loop + str(0), os.R_OK):
907 panic ("can't access loop devices")
910 # find loop device assigned to the file
911 def find_assigned_loop(file):
913 for n in xrange(0, MAX_LOOP_DEVICES):
915 if os.access(dev, os.R_OK):
916 (stat, out) = run('losetup', dev)
917 if out and stat == 0:
918 m = re.search(r'\((.*)\)', out[0])
919 if m and file == m.group(1):
925 # create file if necessary and assign the first free loop device
926 def init_loop(file, size, fstype, journal_size, inode_size,
927 mkfsoptions, reformat, autoformat, backfstype, backfile):
930 realfstype = backfstype
931 if is_block(backfile):
932 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
933 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
939 dev = find_assigned_loop(realfile)
941 print 'WARNING file:', realfile, 'already mapped to', dev
944 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
946 panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (realfile, size))
947 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
949 panic("Unable to create backing store:", realfile)
951 mkfs(realfile, size, realfstype, journal_size, inode_size,
952 mkfsoptions, isblock=0)
955 # find next free loop
956 for n in xrange(0, MAX_LOOP_DEVICES):
958 if os.access(dev, os.R_OK):
959 (stat, out) = run('losetup', dev)
961 run('losetup', dev, realfile)
964 print "out of loop devices"
966 print "out of loop devices"
969 # undo loop assignment
970 def clean_loop(file):
971 dev = find_assigned_loop(file)
973 ret, out = run('losetup -d', dev)
975 log('unable to clean loop device:', dev, 'for file:', file)
978 # determine if dev is formatted as a <fstype> filesystem
979 def need_format(fstype, dev):
980 # FIXME don't know how to implement this
983 # initialize a block device if needed
984 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
985 inode_size, mkfsoptions, backfstype, backdev):
989 if fstype == 'smfs' or not is_block(dev):
990 dev = init_loop(dev, size, fstype, journal_size, inode_size,
991 mkfsoptions, reformat, autoformat, backfstype, backdev)
992 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
993 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
996 # panic("device:", dev,
997 # "not prepared, and autoformat is not set.\n",
998 # "Rerun with --reformat option to format ALL filesystems")
1003 """lookup IP address for an interface"""
1004 rc, out = run("/sbin/ifconfig", iface)
1007 addr = string.split(out[1])[1]
1008 ip = string.split(addr, ':')[1]
1011 def def_mount_options(fstype, target):
1012 """returns deafult mount options for passed fstype and target (mds, ost)"""
1013 if fstype == 'ext3' or fstype == 'ldiskfs':
1014 mountfsoptions = "errors=remount-ro"
1015 if target == 'ost' and sys_get_branch() == '2.4':
1016 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1017 return mountfsoptions
1020 def sys_get_elan_position_file():
1021 procfiles = ["/proc/elan/device0/position",
1022 "/proc/qsnet/elan4/device0/position",
1023 "/proc/qsnet/elan3/device0/position"]
1025 if os.access(p, os.R_OK):
1029 def sys_get_local_nid(net_type, wildcard, cluster_id):
1030 """Return the local nid."""
1032 if sys_get_elan_position_file():
1033 local = sys_get_local_address('elan', '*', cluster_id)
1035 local = sys_get_local_address(net_type, wildcard, cluster_id)
1038 def sys_get_local_address(net_type, wildcard, cluster_id):
1039 """Return the local address for the network type."""
1041 if net_type in ('tcp','openib','iib',):
1043 iface, star = string.split(wildcard, ':')
1044 local = if2addr(iface)
1046 panic ("unable to determine ip for:", wildcard)
1048 host = socket.gethostname()
1049 local = socket.gethostbyname(host)
1050 elif net_type == 'elan':
1051 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1052 f = sys_get_elan_position_file()
1054 panic ("unable to determine local Elan ID")
1057 lines = fp.readlines()
1061 if a[0] == 'NodeId':
1065 nid = my_int(cluster_id) + my_int(elan_id)
1066 local = "%d" % (nid)
1067 except ValueError, e:
1071 elif net_type == 'gm':
1072 fixme("automatic local address for GM")
1076 def sys_get_branch():
1077 """Returns kernel release"""
1079 fp = open('/proc/sys/kernel/osrelease')
1080 lines = fp.readlines()
1084 version = string.split(l)
1085 a = string.split(version[0], '.')
1086 return a[0] + '.' + a[1]
1092 def mod_loaded(modname):
1093 """Check if a module is already loaded. Look in /proc/modules for it."""
1095 fp = open('/proc/modules')
1096 lines = fp.readlines()
1098 # please forgive my tired fingers for this one
1099 ret = filter(lambda word, mod=modname: word == mod,
1100 map(lambda line: string.split(line)[0], lines))
1102 except Exception, e:
1105 # XXX: instead of device_list, ask for $name and see what we get
1106 def is_prepared(name):
1107 """Return true if a device exists for the name"""
1108 if config.lctl_dump:
1110 if (config.noexec or config.record) and config.cleanup:
1113 # expect this format:
1114 # 1 UP ldlm ldlm ldlm_UUID 2
1115 out = lctl.device_list()
1117 if name == string.split(s)[3]:
1119 except CommandError, e:
1123 def is_network_prepared():
1124 """If the any device exists, then assume that all networking
1125 has been configured"""
1126 out = lctl.device_list()
1129 def fs_is_mounted(path):
1130 """Return true if path is a mounted lustre filesystem"""
1132 fp = open('/proc/mounts')
1133 lines = fp.readlines()
1137 if a[1] == path and a[2] == 'lustre_lite':
1145 """Manage kernel modules"""
1146 def __init__(self, lustre_dir, portals_dir):
1147 self.lustre_dir = lustre_dir
1148 self.portals_dir = portals_dir
1149 self.kmodule_list = []
1151 def add_portals_module(self, dev_dir, modname):
1152 """Append a module to list of modules to load."""
1153 self.kmodule_list.append((self.portals_dir, dev_dir, modname))
1155 def add_lustre_module(self, dev_dir, modname):
1156 """Append a module to list of modules to load."""
1157 self.kmodule_list.append((self.lustre_dir, dev_dir, modname))
1159 def load_module(self):
1160 """Load all the modules in the list in the order they appear."""
1161 for src_dir, dev_dir, mod in self.kmodule_list:
1162 if mod_loaded(mod) and not config.noexec:
1164 log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir)
1166 module = find_module(src_dir, dev_dir, mod)
1168 panic('module not found:', mod)
1169 (rc, out) = run('/sbin/insmod', module)
1171 raise CommandError('insmod', out, rc)
1173 (rc, out) = run('/sbin/modprobe', mod)
1175 raise CommandError('modprobe', out, rc)
1177 def cleanup_module(self):
1178 """Unload the modules in the list in reverse order."""
1179 rev = self.kmodule_list
1181 for src_dir, dev_dir, mod in rev:
1182 if not mod_loaded(mod) and not config.noexec:
1185 if mod == 'portals' and config.dump:
1186 lctl.dump(config.dump)
1187 log('unloading module:', mod)
1188 (rc, out) = run('/sbin/rmmod', mod)
1190 log('! unable to unload module:', mod)
1193 # ============================================================
1194 # Classes to prepare and cleanup the various objects
1197 """ Base class for the rest of the modules. The default cleanup method is
1198 defined here, as well as some utilitiy funcs.
1200 def __init__(self, module_name, db):
1202 self.module_name = module_name
1203 self.name = self.db.getName()
1204 self.uuid = self.db.getUUID()
1207 self.kmod = kmod(config.lustre, config.portals)
1209 def info(self, *args):
1210 msg = string.join(map(str,args))
1211 print self.module_name + ":", self.name, self.uuid, msg
1214 """ default cleanup, used for most modules """
1217 lctl.cleanup(self.name, self.uuid, config.force)
1218 except CommandError, e:
1219 log(self.module_name, "cleanup failed: ", self.name)
1223 def add_portals_module(self, dev_dir, modname):
1224 """Append a module to list of modules to load."""
1225 self.kmod.add_portals_module(dev_dir, modname)
1227 def add_lustre_module(self, dev_dir, modname):
1228 """Append a module to list of modules to load."""
1229 self.kmod.add_lustre_module(dev_dir, modname)
1231 def load_module(self):
1232 """Load all the modules in the list in the order they appear."""
1233 self.kmod.load_module()
1235 def cleanup_module(self):
1236 """Unload the modules in the list in reverse order."""
1237 if self.safe_to_clean():
1238 self.kmod.cleanup_module()
1240 def safe_to_clean(self):
1243 def safe_to_clean_modules(self):
1244 return self.safe_to_clean()
1246 class Network(Module):
1247 def __init__(self,db):
1248 Module.__init__(self, 'NETWORK', db)
1249 self.net_type = self.db.get_val('nettype')
1250 self.nid = self.db.get_val('nid', '*')
1251 self.cluster_id = self.db.get_val('clusterid', "0")
1252 self.port = self.db.get_val_int('port', 0)
1255 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1257 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1258 self.generic_nid = 1
1259 debug("nid:", self.nid)
1261 self.generic_nid = 0
1263 self.nid_uuid = self.nid_to_uuid(self.nid)
1265 self.hostaddr = self.db.get_hostaddr()
1266 if len(self.hostaddr) == 0:
1267 self.hostaddr.append(self.nid)
1268 if '*' in self.hostaddr[0]:
1269 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1270 if not self.hostaddr[0]:
1271 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1272 debug("hostaddr:", self.hostaddr[0])
1274 self.add_portals_module("libcfs", 'libcfs')
1275 self.add_portals_module("portals", 'portals')
1276 if node_needs_router():
1277 self.add_portals_module("router", 'kptlrouter')
1278 if self.net_type == 'tcp':
1279 self.add_portals_module("knals/socknal", 'ksocknal')
1280 if self.net_type == 'elan':
1281 self.add_portals_module("knals/qswnal", 'kqswnal')
1282 if self.net_type == 'gm':
1283 self.add_portals_module("knals/gmnal", 'kgmnal')
1284 if self.net_type == 'openib':
1285 self.add_portals_module("knals/openibnal", 'kopenibnal')
1286 if self.net_type == 'iib':
1287 self.add_portals_module("knals/iibnal", 'kiibnal')
1289 def nid_to_uuid(self, nid):
1290 return "NID_%s_UUID" %(nid,)
1293 if not config.record and is_network_prepared():
1295 self.info(self.net_type, self.nid, self.port)
1296 if not (config.record and self.generic_nid):
1297 lctl.network(self.net_type, self.nid)
1298 if self.net_type == 'tcp':
1300 for hostaddr in self.db.get_hostaddr():
1301 ip = string.split(hostaddr, '/')[0]
1302 if len(string.split(hostaddr, '/')) == 2:
1303 netmask = string.split(hostaddr, '/')[1]
1306 lctl.add_interface(self.net_type, ip, netmask)
1307 lctl.newnet(self.netid, self.net_type)
1308 if not (config.record and self.generic_nid):
1309 lctl.network(self.net_type, self.netid, self.nid)
1310 bindaddrlist = self.db.get_bindaddrlist(self.netid)
1311 for bindaddr in bindaddrlist:
1312 lctl.bind(self.net_type, self.netid, bindaddr)
1313 if self.net_type == 'elan':
1315 if self.port and node_is_router():
1316 run_one_acceptor(self.port)
1317 self.connect_peer_gateways()
1319 def connect_peer_gateways(self):
1320 for router in self.db.lookup_class('node'):
1321 if router.get_val_int('router', 0):
1322 for netuuid in router.get_networks():
1323 net = self.db.lookup(netuuid)
1325 if (gw.cluster_id == self.cluster_id and
1326 gw.net_type == self.net_type):
1327 if gw.nid != self.nid:
1330 def disconnect_peer_gateways(self):
1331 for router in self.db.lookup_class('node'):
1332 if router.get_val_int('router', 0):
1333 for netuuid in router.get_networks():
1334 net = self.db.lookup(netuuid)
1336 if (gw.cluster_id == self.cluster_id and
1337 gw.net_type == self.net_type):
1338 if gw.nid != self.nid:
1341 except CommandError, e:
1342 print "disconnect failed: ", self.name
1346 def safe_to_clean(self):
1347 return not is_network_prepared()
1350 self.info(self.net_type, self.nid, self.port)
1352 stop_acceptor(self.port)
1353 if node_is_router():
1354 self.disconnect_peer_gateways()
1355 if self.net_type == 'tcp':
1356 for hostaddr in self.db.get_hostaddr():
1357 ip = string.split(hostaddr, '/')[0]
1358 lctl.del_interface(self.net_type, ip)
1360 def correct_level(self, level, op=None):
1363 class RouteTable(Module):
1364 def __init__(self,db):
1365 Module.__init__(self, 'ROUTES', db)
1367 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1369 # only setup connections for tcp, openib, and iib NALs
1371 if not net_type in ('tcp','openib','iib',):
1374 # connect to target if route is to single node and this node is the gw
1375 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1376 if not local_cluster(net_type, tgt_cluster_id):
1377 panic("target", lo, " not on the local cluster")
1378 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1379 # connect to gateway if this node is not the gw
1380 elif (local_cluster(net_type, gw_cluster_id)
1381 and not local_interface(net_type, gw_cluster_id, gw)):
1382 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1387 panic("no server for nid", lo)
1390 return Network(srvdb)
1393 if not config.record and is_network_prepared():
1396 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1397 lctl.add_route(net_type, gw, lo, hi)
1398 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1402 def safe_to_clean(self):
1403 return not is_network_prepared()
1406 if is_network_prepared():
1407 # the network is still being used, don't clean it up
1409 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1410 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1413 lctl.disconnect(srv)
1414 except CommandError, e:
1415 print "disconnect failed: ", self.name
1420 lctl.del_route(net_type, gw, lo, hi)
1421 except CommandError, e:
1422 print "del_route failed: ", self.name
1426 # This is only needed to load the modules; the LDLM device
1427 # is now created automatically.
1429 def __init__(self,db):
1430 Module.__init__(self, 'LDLM', db)
1431 self.add_lustre_module('lvfs', 'lvfs')
1432 self.add_lustre_module('obdclass', 'obdclass')
1433 self.add_lustre_module('ptlrpc', 'ptlrpc')
1441 def correct_level(self, level, op=None):
1446 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1447 Module.__init__(self, 'LOV', db)
1448 if name_override != None:
1449 self.name = "lov_%s" % name_override
1450 self.add_lustre_module('lov', 'lov')
1451 self.mds_uuid = self.db.get_first_ref('mds')
1452 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1453 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1454 self.pattern = self.db.get_val_int('stripepattern', 0)
1455 self.devlist = self.db.get_lov_tgts('lov_tgt')
1456 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1459 self.desc_uuid = self.uuid
1460 self.uuid = generate_client_uuid(self.name)
1461 self.fs_name = fs_name
1463 self.config_only = 1
1465 self.config_only = None
1466 mds = self.db.lookup(self.mds_uuid)
1467 self.mds_name = mds.getName()
1468 for (obd_uuid, index, gen, active) in self.devlist:
1471 self.obdlist.append(obd_uuid)
1472 obd = self.db.lookup(obd_uuid)
1473 osc = get_osc(obd, self.uuid, fs_name)
1475 self.osclist.append((osc, index, gen, active))
1477 panic('osc not found:', obd_uuid)
1483 if not config.record and is_prepared(self.name):
1485 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1486 self.stripe_off, self.pattern, self.devlist,
1488 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1489 self.stripe_sz, self.stripe_off, self.pattern,
1490 string.join(self.obdlist))
1491 for (osc, index, gen, active) in self.osclist:
1492 target_uuid = osc.target_uuid
1494 # Only ignore connect failures with --force, which
1495 # isn't implemented here yet.
1497 osc.prepare(ignore_connect_failure=0)
1498 except CommandError, e:
1499 print "Error preparing OSC %s\n" % osc.uuid
1501 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1504 for (osc, index, gen, active) in self.osclist:
1505 target_uuid = osc.target_uuid
1507 if is_prepared(self.name):
1508 Module.cleanup(self)
1509 if self.config_only:
1510 panic("Can't clean up config_only LOV ", self.name)
1512 def load_module(self):
1513 if self.config_only:
1514 panic("Can't load modules for config_only LOV ", self.name)
1515 for (osc, index, gen, active) in self.osclist:
1518 Module.load_module(self)
1520 def cleanup_module(self):
1521 if self.config_only:
1522 panic("Can't cleanup modules for config_only LOV ", self.name)
1523 Module.cleanup_module(self)
1524 for (osc, index, gen, active) in self.osclist:
1526 osc.cleanup_module()
1529 def correct_level(self, level, op=None):
1533 def __init__(self, db, uuid, fs_name, name_override = None):
1534 Module.__init__(self, 'LMV', db)
1535 if name_override != None:
1536 self.name = "lmv_%s" % name_override
1537 self.add_lustre_module('lmv', 'lmv')
1538 self.devlist = self.db.get_refs('mds')
1540 self.desc_uuid = self.uuid
1542 self.fs_name = fs_name
1543 for mds_uuid in self.devlist:
1544 mds = self.db.lookup(mds_uuid)
1546 panic("MDS not found!")
1547 mdc = MDC(mds, self.uuid, fs_name)
1549 self.mdclist.append(mdc)
1551 panic('mdc not found:', mds_uuid)
1554 if is_prepared(self.name):
1556 for mdc in self.mdclist:
1558 # Only ignore connect failures with --force, which
1559 # isn't implemented here yet.
1560 mdc.prepare(ignore_connect_failure=0)
1561 except CommandError, e:
1562 print "Error preparing LMV %s\n" % mdc.uuid
1564 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1565 string.join(self.devlist))
1568 for mdc in self.mdclist:
1570 if is_prepared(self.name):
1571 Module.cleanup(self)
1573 def load_module(self):
1574 for mdc in self.mdclist:
1577 Module.load_module(self)
1579 def cleanup_module(self):
1580 Module.cleanup_module(self)
1581 for mdc in self.mdclist:
1582 mdc.cleanup_module()
1585 def correct_level(self, level, op=None):
1588 class MDSDEV(Module):
1589 def __init__(self,db):
1590 Module.__init__(self, 'MDSDEV', db)
1591 self.devpath = self.db.get_val('devpath','')
1592 self.backdevpath = self.db.get_val('backdevpath','')
1593 self.size = self.db.get_val_int('devsize', 0)
1594 self.journal_size = self.db.get_val_int('journalsize', 0)
1595 self.fstype = self.db.get_val('fstype', '')
1596 self.backfstype = self.db.get_val('backfstype', '')
1597 self.nspath = self.db.get_val('nspath', '')
1598 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1599 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1600 self.root_squash = self.db.get_val('root_squash', '')
1601 self.no_root_squash = self.db.get_val('no_root_squash', '')
1602 self.cachetype = self.db.get_val('cachetype', '')
1603 # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
1604 target_uuid = self.db.get_first_ref('target')
1605 mds = self.db.lookup(target_uuid)
1606 self.name = mds.getName()
1607 self.filesystem_uuids = mds.get_refs('filesystem')
1610 self.master_mds = ""
1611 if not self.filesystem_uuids:
1612 self.lmv_uuid = self.db.get_first_ref('lmv')
1613 if not self.lmv_uuid:
1614 panic("ALERT: can't find lvm uuid")
1616 self.lmv = self.db.lookup(self.lmv_uuid)
1618 self.filesystem_uuids = self.lmv.get_refs('filesystem')
1619 self.master_mds = self.lmv_uuid
1620 # FIXME: if fstype not set, then determine based on kernel version
1621 self.format = self.db.get_val('autoformat', "no")
1622 if mds.get_val('failover', 0):
1623 self.failover_mds = 'f'
1625 self.failover_mds = 'n'
1626 active_uuid = get_active_target(mds)
1628 panic("No target device found:", target_uuid)
1629 if active_uuid == self.uuid:
1633 if self.active and config.group and config.group != mds.get_val('group'):
1636 self.inode_size = self.db.get_val_int('inodesize', 0)
1637 if self.inode_size == 0:
1638 # find the LOV for this MDS
1639 lovconfig_uuid = mds.get_first_ref('lovconfig')
1640 if not lovconfig_uuid:
1641 if not self.lmv_uuid:
1642 panic("No LOV found for lovconfig ", lovconfig.name)
1645 panic("No LMV initialized and not lovconfig_uuid found")
1647 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1648 lovconfig = self.lmv.lookup(lovconfig_uuid)
1649 lov_uuid = lovconfig.get_first_ref('lov')
1651 panic("No LOV found for lovconfig ", lovconfig.name)
1653 lovconfig = mds.lookup(lovconfig_uuid)
1654 lov_uuid = lovconfig.get_first_ref('lov')
1656 panic("No LOV found for lovconfig ", lovconfig.name)
1659 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1660 lovconfig = self.lmv.lookup(lovconfig_uuid)
1661 lov_uuid = lovconfig.get_first_ref('lov')
1663 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, 'FS_name', config_only = 1)
1665 # default stripe count controls default inode_size
1666 if (lov.stripe_cnt > 0):
1667 stripe_count = lov.stripe_cnt
1669 stripe_count = len(lov.devlist)
1670 if stripe_count > 77:
1671 self.inode_size = 4096
1672 elif stripe_count > 35:
1673 self.inode_size = 2048
1674 elif stripe_count > 13:
1675 self.inode_size = 1024
1676 elif stripe_count > 3:
1677 self.inode_size = 512
1679 self.inode_size = 256
1681 self.target_dev_uuid = self.uuid
1682 self.uuid = target_uuid
1685 client_uuid = generate_client_uuid(self.name)
1686 client_uuid = self.name + "_lmv_" + "UUID"
1687 self.master = LMV(self.db.lookup(self.lmv_uuid), client_uuid, self.name, self.name)
1688 self.master_mds = self.master.name
1691 self.add_lustre_module('mdc', 'mdc')
1692 self.add_lustre_module('osc', 'osc')
1693 self.add_lustre_module('lov', 'lov')
1694 self.add_lustre_module('lmv', 'lmv')
1695 self.add_lustre_module('ost', 'ost')
1696 self.add_lustre_module('mds', 'mds')
1698 if self.fstype == 'smfs':
1699 self.add_lustre_module('smfs', 'smfs')
1701 if self.fstype == 'ldiskfs':
1702 self.add_lustre_module('ldiskfs', 'ldiskfs')
1705 self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
1707 # if fstype is smfs, then we should also take care about backing
1709 if self.fstype == 'smfs':
1710 self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
1712 for options in string.split(self.mountfsoptions, ','):
1713 if options == 'snap':
1714 if not self.fstype == 'smfs':
1715 panic("mountoptions with snap, but fstype is not smfs\n")
1716 self.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
1717 self.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
1718 def load_module(self):
1720 Module.load_module(self)
1723 if not config.record and is_prepared(self.name):
1726 debug(self.uuid, "not active")
1729 # run write_conf automatically, if --reformat used
1731 self.info(self.devpath, self.fstype, self.size, self.format)
1735 self.master.prepare()
1736 # never reformat here
1737 blkdev = block_dev(self.devpath, self.size, self.fstype, 0,
1738 self.format, self.journal_size, self.inode_size,
1739 self.mkfsoptions, self.backfstype, self.backdevpath)
1741 if not is_prepared('MDT'):
1742 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
1744 mountfsoptions = def_mount_options(self.fstype, 'mds')
1746 if config.mountfsoptions:
1748 mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
1750 mountfsoptions = config.mountfsoptions
1751 if self.mountfsoptions:
1752 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1754 if self.mountfsoptions:
1756 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1758 mountfsoptions = self.mountfsoptions
1760 if self.fstype == 'smfs':
1761 realdev = self.fstype
1764 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
1768 mountfsoptions = "type=%s,dev=%s" % (self.backfstype,
1773 print 'MDS mount options: ' + mountfsoptions
1775 if not self.master_mds:
1776 self.master_mds = 'dumb'
1777 if not self.cachetype:
1778 self.cachetype = 'dumb'
1779 lctl.newdev("mds", self.name, self.uuid,
1780 setup ="%s %s %s %s %s %s" %(realdev, self.fstype,
1781 self.name, mountfsoptions,
1782 self.master_mds, self.cachetype))
1784 if development_mode():
1785 procentry = "/proc/fs/lustre/mds/grp_hash_upcall"
1786 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/l_getgroups")
1787 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
1788 print "MDS Warning: failed to set group-hash upcall"
1790 run("echo ", upcall, " > ", procentry)
1792 except CommandError, e:
1794 panic("MDS is missing the config log. Need to run " +
1795 "lconf --write_conf.")
1799 if config.root_squash == None:
1800 config.root_squash = self.root_squash
1801 if config.no_root_squash == None:
1802 config.no_root_squash = self.no_root_squash
1803 if config.root_squash:
1804 if config.no_root_squash:
1805 nsnid = config.no_root_squash
1808 lctl.root_squash(self.name, config.root_squash, nsnid)
1810 def write_conf(self):
1812 if not is_prepared(self.name):
1813 self.info(self.devpath, self.fstype, self.format)
1815 blkdev = block_dev(self.devpath, self.size, self.fstype,
1816 config.reformat, self.format, self.journal_size,
1817 self.inode_size, self.mkfsoptions,
1818 self.backfstype, self.backdevpath)
1820 # Even for writing logs we mount mds with supplied mount options
1821 # because it will not mount smfs (if used) otherwise.
1823 mountfsoptions = def_mount_options(self.fstype, 'mds')
1825 if config.mountfsoptions:
1827 mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
1829 mountfsoptions = config.mountfsoptions
1830 if self.mountfsoptions:
1831 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1833 if self.mountfsoptions:
1835 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1837 mountfsoptions = self.mountfsoptions
1839 if self.fstype == 'smfs':
1840 realdev = self.fstype
1843 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
1847 mountfsoptions = "type=%s,dev=%s" % (self.backfstype,
1852 print 'MDS mount options: ' + mountfsoptions
1854 # As mount options are passed by 4th param to config tool, we need
1855 # to pass something in 3rd param. But we do not want this 3rd param
1856 # be counted as a profile name for reading log on MDS setup, thus,
1857 # we pass there some predefined sign like 'dumb', which will be
1858 # checked in MDS code and skipped. Probably there is more nice way
1859 # like pass empty string and check it in config tool and pass null
1861 lctl.newdev("mds", self.name, self.uuid,
1862 setup ="%s %s %s %s" %(realdev, self.fstype,
1863 'dumb', mountfsoptions))
1866 # record logs for the MDS lov
1867 for uuid in self.filesystem_uuids:
1868 log("recording clients for filesystem:", uuid)
1869 fs = self.db.lookup(uuid)
1871 # this is ugly, should be organized nice later.
1872 target_uuid = self.db.get_first_ref('target')
1873 mds = self.db.lookup(target_uuid)
1875 lovconfig_uuid = mds.get_first_ref('lovconfig')
1877 lovconfig = mds.lookup(lovconfig_uuid)
1878 obd_uuid = lovconfig.get_first_ref('lov')
1880 obd_uuid = fs.get_first_ref('obd')
1882 client_uuid = generate_client_uuid(self.name)
1883 client = VOSC(self.db.lookup(obd_uuid), client_uuid, self.name,
1886 lctl.clear_log(self.name, self.name)
1887 lctl.record(self.name, self.name)
1889 lctl.mount_option(self.name, client.get_name(), "")
1891 process_updates(self.db, self.name, self.name, client)
1894 lctl.clear_log(self.name, self.name + '-clean')
1895 lctl.record(self.name, self.name + '-clean')
1897 lctl.del_mount_option(self.name)
1899 process_updates(self.db, self.name, self.name + '-clean', client)
1903 # record logs for each client
1909 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
1911 config_options = CONFIG_FILE
1913 for node_db in self.db.lookup_class('node'):
1914 client_name = node_db.getName()
1915 for prof_uuid in node_db.get_refs('profile'):
1916 prof_db = node_db.lookup(prof_uuid)
1917 # refactor this into a funtion to test "clientness"
1919 for ref_class, ref_uuid in prof_db.get_all_refs():
1920 if ref_class in ('mountpoint','echoclient'):
1921 debug("recording", client_name)
1922 old_noexec = config.noexec
1924 ret, out = run (sys.argv[0], noexec_opt,
1925 " -v --record --nomod",
1926 "--record_log", client_name,
1927 "--record_device", self.name,
1928 "--node", client_name,
1931 for s in out: log("record> ", string.strip(s))
1932 ret, out = run (sys.argv[0], noexec_opt,
1933 "--cleanup -v --record --nomod",
1934 "--record_log", client_name + "-clean",
1935 "--record_device", self.name,
1936 "--node", client_name,
1939 for s in out: log("record> ", string.strip(s))
1940 config.noexec = old_noexec
1943 lctl.cleanup(self.name, self.uuid, 0, 0)
1944 except CommandError, e:
1945 log(self.module_name, "cleanup failed: ", self.name)
1948 Module.cleanup(self)
1950 if self.fstype == 'smfs':
1951 clean_loop(self.backdevpath)
1953 clean_loop(self.devpath)
1955 def msd_remaining(self):
1956 out = lctl.device_list()
1958 if string.split(s)[2] in ('mds',):
1961 def safe_to_clean(self):
1964 def safe_to_clean_modules(self):
1965 return not self.msd_remaining()
1969 debug(self.uuid, "not active")
1972 if is_prepared(self.name):
1974 lctl.cleanup(self.name, self.uuid, config.force,
1976 except CommandError, e:
1977 log(self.module_name, "cleanup failed: ", self.name)
1980 Module.cleanup(self)
1983 self.master.cleanup()
1984 if not self.msd_remaining() and is_prepared('MDT'):
1986 lctl.cleanup("MDT", "MDT_UUID", config.force,
1988 except CommandError, e:
1989 print "cleanup failed: ", self.name
1993 if self.fstype == 'smfs':
1994 clean_loop(self.backdevpath)
1996 clean_loop(self.devpath)
1998 def correct_level(self, level, op=None):
1999 #if self.master_mds:
2004 def __init__(self, db):
2005 Module.__init__(self, 'OSD', db)
2006 self.osdtype = self.db.get_val('osdtype')
2007 self.devpath = self.db.get_val('devpath', '')
2008 self.backdevpath = self.db.get_val('backdevpath', '')
2009 self.size = self.db.get_val_int('devsize', 0)
2010 self.journal_size = self.db.get_val_int('journalsize', 0)
2011 self.inode_size = self.db.get_val_int('inodesize', 0)
2012 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2013 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2014 self.fstype = self.db.get_val('fstype', '')
2015 self.backfstype = self.db.get_val('backfstype', '')
2016 self.nspath = self.db.get_val('nspath', '')
2017 target_uuid = self.db.get_first_ref('target')
2018 ost = self.db.lookup(target_uuid)
2019 self.name = ost.getName()
2020 self.format = self.db.get_val('autoformat', 'yes')
2021 if ost.get_val('failover', 0):
2022 self.failover_ost = 'f'
2024 self.failover_ost = 'n'
2026 active_uuid = get_active_target(ost)
2028 panic("No target device found:", target_uuid)
2029 if active_uuid == self.uuid:
2033 if self.active and config.group and config.group != ost.get_val('group'):
2036 self.target_dev_uuid = self.uuid
2037 self.uuid = target_uuid
2039 self.add_lustre_module('ost', 'ost')
2040 if self.fstype == 'smfs':
2041 self.add_lustre_module('smfs', 'smfs')
2042 # FIXME: should we default to ext3 here?
2043 if self.fstype == 'ldiskfs':
2044 self.add_lustre_module('ldiskfs', 'ldiskfs')
2046 self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2047 if self.fstype == 'smfs':
2048 self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2050 for options in self.mountfsoptions:
2051 if options == 'snap':
2052 if not self.fstype == 'smfs':
2053 panic("mountoptions with snap, but fstype is not smfs\n")
2054 self.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2055 self.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2057 self.add_lustre_module(self.osdtype, self.osdtype)
2059 def load_module(self):
2061 Module.load_module(self)
2063 # need to check /proc/mounts and /etc/mtab before
2064 # formatting anything.
2065 # FIXME: check if device is already formatted.
2067 if is_prepared(self.name):
2070 debug(self.uuid, "not active")
2072 self.info(self.osdtype, self.devpath, self.size, self.fstype,
2073 self.format, self.journal_size, self.inode_size)
2075 if self.osdtype == 'obdecho':
2078 blkdev = block_dev(self.devpath, self.size, self.fstype,
2079 config.reformat, self.format, self.journal_size,
2080 self.inode_size, self.mkfsoptions, self.backfstype,
2083 mountfsoptions = def_mount_options(self.fstype, 'ost')
2085 if config.mountfsoptions:
2087 mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
2089 mountfsoptions = config.mountfsoptions
2090 if self.mountfsoptions:
2091 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
2093 if self.mountfsoptions:
2095 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
2097 mountfsoptions = self.mountfsoptions
2099 if self.fstype == 'smfs':
2100 realdev = self.fstype
2103 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
2107 mountfsoptions = "type=%s,dev=%s" % (self.backfstype,
2112 print 'OSD mount options: ' + mountfsoptions
2114 lctl.newdev(self.osdtype, self.name, self.uuid,
2115 setup ="%s %s %s %s" %(realdev, self.fstype,
2118 if not is_prepared('OSS'):
2119 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
2121 def osd_remaining(self):
2122 out = lctl.device_list()
2124 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2127 def safe_to_clean(self):
2130 def safe_to_clean_modules(self):
2131 return not self.osd_remaining()
2135 debug(self.uuid, "not active")
2137 if is_prepared(self.name):
2140 lctl.cleanup(self.name, self.uuid, config.force,
2142 except CommandError, e:
2143 log(self.module_name, "cleanup failed: ", self.name)
2146 if not self.osd_remaining() and is_prepared('OSS'):
2148 lctl.cleanup("OSS", "OSS_UUID", config.force,
2150 except CommandError, e:
2151 print "cleanup failed: ", self.name
2154 if not self.osdtype == 'obdecho':
2155 if self.fstype == 'smfs':
2156 clean_loop(self.backdevpath)
2158 clean_loop(self.devpath)
2160 def correct_level(self, level, op=None):
2163 # Generic client module, used by OSC and MDC
2164 class Client(Module):
2165 def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
2167 self.target_name = tgtdb.getName()
2168 self.target_uuid = tgtdb.getUUID()
2171 self.backup_targets = []
2173 self.tgt_dev_uuid = get_active_target(tgtdb)
2174 if not self.tgt_dev_uuid:
2175 panic("No target device found for target(1):", self.target_name)
2177 self.kmod = kmod(config.lustre, config.portals)
2181 self.module = module
2182 self.module_name = string.upper(module)
2184 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2185 self.target_name, fs_name)
2187 self.name = self_name
2189 self.lookup_server(self.tgt_dev_uuid)
2191 self.lookup_backup_targets()
2192 self.fs_name = fs_name
2195 self.add_lustre_module(module_dir, module)
2197 def lookup_server(self, srv_uuid):
2198 """ Lookup a server's network information """
2199 self._server_nets = get_ost_net(self.db, srv_uuid)
2200 if len(self._server_nets) == 0:
2201 panic ("Unable to find a server for:", srv_uuid)
2204 def get_servers(self):
2205 return self._server_nets
2206 def lookup_backup_targets(self):
2207 """ Lookup alternative network information """
2208 prof_list = toplustreDB.get_refs('profile')
2209 for prof_uuid in prof_list:
2210 prof_db = toplustreDB.lookup(prof_uuid)
2212 panic("profile:", prof_uuid, "not found.")
2213 for ref_class, ref_uuid in prof_db.get_all_refs():
2214 if ref_class in ('osd', 'mdsdev'):
2215 devdb = toplustreDB.lookup(ref_uuid)
2216 uuid = devdb.get_first_ref('target')
2217 if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
2218 self.backup_targets.append(ref_uuid)
2220 def prepare(self, ignore_connect_failure = 0):
2221 self.info(self.target_uuid)
2222 if not config.record and is_prepared(self.name):
2225 srv = choose_local_server(self.get_servers())
2229 routes = find_route(self.get_servers())
2230 if len(routes) == 0:
2231 panic ("no route to", self.target_uuid)
2232 for (srv, r) in routes:
2233 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2234 except CommandError, e:
2235 if not ignore_connect_failure:
2238 if self.permits_inactive() and (self.target_uuid in config.inactive or self.active == 0):
2239 debug("%s inactive" % self.target_uuid)
2240 inactive_p = "inactive"
2242 debug("%s active" % self.target_uuid)
2244 lctl.newdev(self.module, self.name, self.uuid,
2245 setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
2247 for tgt_dev_uuid in self.backup_targets:
2248 this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
2249 if len(this_nets) == 0:
2250 panic ("Unable to find a server for:", tgt_dev_uuid)
2251 srv = choose_local_server(this_nets)
2255 routes = find_route(this_nets);
2256 if len(routes) == 0:
2257 panic("no route to", tgt_dev_uuid)
2258 for (srv, r) in routes:
2259 lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2261 lctl.add_conn(self.name, srv.nid_uuid);
2264 if is_prepared(self.name):
2265 Module.cleanup(self)
2267 srv = choose_local_server(self.get_servers())
2269 lctl.disconnect(srv)
2271 for (srv, r) in find_route(self.get_servers()):
2272 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2273 except CommandError, e:
2274 log(self.module_name, "cleanup failed: ", self.name)
2278 for tgt_dev_uuid in self.backup_targets:
2279 this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
2280 srv = choose_local_server(this_net)
2282 lctl.disconnect(srv)
2284 for (srv, r) in find_route(this_net):
2285 lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2288 def correct_level(self, level, op=None):
2291 def deactivate(self):
2293 lctl.deactivate(self.name)
2294 except CommandError, e:
2295 log(self.module_name, "deactivate failed: ", self.name)
2300 def __init__(self, db, uuid, fs_name):
2301 Client.__init__(self, db, uuid, 'mdc', fs_name)
2303 def permits_inactive(self):
2307 def __init__(self, db, uuid, fs_name):
2308 Client.__init__(self, db, uuid, 'osc', fs_name)
2310 def permits_inactive(self):
2314 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
2315 Module.__init__(self, 'VLOV', db)
2316 if name_override != None:
2317 self.name = "lov_%s" % name_override
2318 self.add_lustre_module('lov', 'lov')
2319 self.stripe_sz = 65536
2323 self.desc_uuid = self.uuid
2324 self.uuid = generate_client_uuid(self.name)
2325 self.fs_name = fs_name
2326 self.osc = get_osc(db, self.uuid, fs_name)
2328 panic('osc not found:', self.uuid)
2330 self.config_only = 1
2332 self.config_only = None
2338 if not config.record and is_prepared(self.name):
2340 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
2341 self.stripe_sz, self.stripe_off, self.pattern)
2342 target_uuid = self.osc.target_uuid
2345 self.osc.prepare(ignore_connect_failure=0)
2346 except CommandError, e:
2347 print "Error preparing OSC %s\n" % osc.uuid
2349 lctl.lov_add_obd(self.name, self.uuid, target_uuid, 0, 1)
2352 target_uuid = self.osc.target_uuid
2354 if is_prepared(self.name):
2355 Module.cleanup(self)
2356 if self.config_only:
2357 panic("Can't clean up config_only LOV ", self.name)
2359 def load_module(self):
2360 if self.config_only:
2361 panic("Can't load modules for config_only LOV ", self.name)
2362 self.osc.load_module()
2363 Module.load_module(self)
2365 def cleanup_module(self):
2366 if self.config_only:
2367 panic("Can't cleanup modules for config_only LOV ", self.name)
2368 Module.cleanup_module(self)
2369 self.osc.cleanup_module()
2371 def correct_level(self, level, op=None):
2374 class CMOBD(Module):
2375 def __init__(self,db):
2376 Module.__init__(self, 'CMOBD', db)
2377 self.name = self.db.getName();
2378 self.uuid = generate_client_uuid(self.name)
2379 self.master_uuid = self.db.get_first_ref('masterobd')
2380 self.cache_uuid = self.db.get_first_ref('cacheobd')
2381 self.add_lustre_module('cmobd', 'cmobd')
2382 master_obd = self.db.lookup(self.master_uuid)
2384 panic('master obd not found:', self.master_uuid)
2385 cache_obd = self.db.lookup(self.cache_uuid)
2387 panic('cache obd not found:', self.cache_uuid)
2389 if master_obd.get_class() == 'ost':
2390 self.client_uuid = generate_client_uuid(self.name)
2391 self.master= VLOV(master_obd, self.client_uuid, self.name,
2392 "%s_master" % (self.name))
2393 self.master_uuid = self.master.get_uuid()
2395 self.master = get_mdc(db, self.name, self.master_uuid)
2396 # need to check /proc/mounts and /etc/mtab before
2397 # formatting anything.
2398 # FIXME: check if device is already formatted.
2400 self.master.prepare()
2401 if not config.record and is_prepared(self.name):
2403 self.info(self.master_uuid, self.cache_uuid)
2404 lctl.newdev("cmobd", self.name, self.uuid,
2405 setup ="%s %s" %(self.master_uuid,
2409 if is_prepared(self.name):
2410 Module.cleanup(self)
2411 self.master.cleanup()
2413 def load_module(self):
2414 self.master.load_module()
2415 Module.load_module(self)
2417 def cleanup_module(self):
2418 Module.cleanup_module(self)
2419 self.master.cleanup_module()
2421 def correct_level(self, level, op=None):
2425 def __init__(self, db, uuid, name, type, name_override = None):
2426 Module.__init__(self, 'COBD', db)
2427 self.name = self.db.getName();
2428 self.uuid = generate_client_uuid(self.name)
2429 self.real_uuid = self.db.get_first_ref('realobd')
2430 self.cache_uuid = self.db.get_first_ref('cacheobd')
2431 self.add_lustre_module('cobd', 'cobd')
2432 real_obd = self.db.lookup(self.real_uuid)
2434 panic('real obd not found:', self.real_uuid)
2435 cache_obd = self.db.lookup(self.cache_uuid)
2437 panic('cache obd not found:', self.cache_uuid)
2439 self.real = LOV(real_obd, self.real_uuid, name,
2440 "%s_real" % (self.name));
2441 self.cache = LOV(cache_obd, self.cache_uuid, name,
2442 "%s_cache" % (self.name));
2444 self.real = get_mdc(db, name, self.real_uuid)
2445 self.cache = get_mdc(db, name, self.cache_uuid)
2446 # need to check /proc/mounts and /etc/mtab before
2447 # formatting anything.
2448 # FIXME: check if device is already formatted.
2453 def get_real_name(self):
2454 return self.real.name
2455 def get_cache_name(self):
2456 return self.cache.name
2459 self.cache.prepare()
2460 if not config.record and is_prepared(self.name):
2462 self.info(self.real_uuid, self.cache_uuid)
2463 lctl.newdev("cobd", self.name, self.uuid,
2464 setup ="%s %s" %(self.real.name,
2468 if is_prepared(self.name):
2469 Module.cleanup(self)
2471 self.cache.cleanup()
2473 def load_module(self):
2474 self.real.load_module()
2475 Module.load_module(self)
2477 def cleanup_module(self):
2478 Module.cleanup_module(self)
2479 self.real.cleanup_module()
2481 # virtual interface for OSC and LOV
2483 def __init__(self, db, client_uuid, name, name_override = None):
2484 Module.__init__(self, 'VOSC', db)
2485 if db.get_class() == 'lov':
2486 self.osc = LOV(db, client_uuid, name, name_override)
2488 elif db.get_class() == 'cobd':
2489 self.osc = COBD(db, client_uuid, name, 'obd')
2492 self.osc = OSC(db, client_uuid, name)
2495 return self.osc.get_uuid()
2497 return self.osc.get_name()
2502 def load_module(self):
2503 self.osc.load_module()
2504 def cleanup_module(self):
2505 self.osc.cleanup_module()
2506 def correct_level(self, level, op=None):
2507 return self.osc.correct_level(level, op)
2509 # virtual interface for MDC and LMV
2511 def __init__(self, db, client_uuid, name, name_override = None):
2512 Module.__init__(self, 'VMDC', db)
2513 if db.get_class() == 'lmv':
2514 self.mdc = LMV(db, client_uuid, name)
2515 elif db.get_class() == 'cobd':
2516 self.mdc = COBD(db, client_uuid, name, 'mds')
2518 self.mdc = MDC(db, client_uuid, name)
2520 return self.mdc.uuid
2522 return self.mdc.name
2527 def load_module(self):
2528 self.mdc.load_module()
2529 def cleanup_module(self):
2530 self.mdc.cleanup_module()
2531 def correct_level(self, level, op=None):
2532 return self.mdc.correct_level(level, op)
2534 class ECHO_CLIENT(Module):
2535 def __init__(self,db):
2536 Module.__init__(self, 'ECHO_CLIENT', db)
2537 self.add_lustre_module('obdecho', 'obdecho')
2538 self.obd_uuid = self.db.get_first_ref('obd')
2539 obd = self.db.lookup(self.obd_uuid)
2540 self.uuid = generate_client_uuid(self.name)
2541 self.osc = VOSC(obd, self.uuid, self.name)
2544 if not config.record and is_prepared(self.name):
2547 self.osc.prepare() # XXX This is so cheating. -p
2548 self.info(self.obd_uuid)
2550 lctl.newdev("echo_client", self.name, self.uuid,
2551 setup = self.osc.get_name())
2554 if is_prepared(self.name):
2555 Module.cleanup(self)
2558 def load_module(self):
2559 self.osc.load_module()
2560 Module.load_module(self)
2562 def cleanup_module(self):
2563 Module.cleanup_module(self)
2564 self.osc.cleanup_module()
2566 def correct_level(self, level, op=None):
2569 def generate_client_uuid(name):
2570 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2572 int(random.random() * 1048576),
2573 int(random.random() * 1048576))
2574 return client_uuid[:36]
2576 def my_rstrip(s, chars):
2577 """my_rstrip(s, chars) -> strips any instances of the characters
2578 found in chars from the right side of string s"""
2579 # XXX required because python versions pre 2.2.3 don't allow
2580 #string.rstrip() to take alternate char lists
2584 ns = string.rstrip(s, '/')
2585 except TypeError, e:
2586 for i in range(len(s) - 1, 0, -1):
2594 class Mountpoint(Module):
2595 def __init__(self,db):
2596 Module.__init__(self, 'MTPT', db)
2597 self.path = self.db.get_val('path')
2598 self.clientoptions = self.db.get_val('clientoptions', '')
2599 self.fs_uuid = self.db.get_first_ref('filesystem')
2600 fs = self.db.lookup(self.fs_uuid)
2601 self.mds_uuid = fs.get_first_ref('lmv')
2602 if not self.mds_uuid:
2603 self.mds_uuid = fs.get_first_ref('mds')
2604 self.obd_uuid = fs.get_first_ref('obd')
2605 client_uuid = generate_client_uuid(self.name)
2607 ost = self.db.lookup(self.obd_uuid)
2609 panic("no ost: ", self.obd_uuid)
2611 mds = self.db.lookup(self.mds_uuid)
2613 panic("no mds: ", self.mds_uuid)
2615 self.add_lustre_module('mdc', 'mdc')
2616 self.add_lustre_module('lmv', 'lmv')
2617 self.add_lustre_module('llite', 'llite')
2619 self.vosc = VOSC(ost, client_uuid, self.name)
2620 self.vmdc = VMDC(mds, client_uuid, self.name)
2623 if not config.record and fs_is_mounted(self.path):
2624 log(self.path, "already mounted.")
2629 vmdc_name = self.vmdc.get_name()
2631 self.info(self.path, self.mds_uuid, self.obd_uuid)
2632 if config.record or config.lctl_dump:
2633 lctl.mount_option(local_node_name, self.vosc.get_name(), vmdc_name)
2636 if config.clientoptions:
2637 if self.clientoptions:
2638 self.clientoptions = self.clientoptions + ',' + \
2639 config.clientoptions
2641 self.clientoptions = config.clientoptions
2642 if self.clientoptions:
2643 self.clientoptions = ',' + self.clientoptions
2644 # Linux kernel will deal with async and not pass it to ll_fill_super,
2645 # so replace it with Lustre async
2646 self.clientoptions = string.replace(self.clientoptions, "async",
2649 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s%s %s %s" % \
2650 (self.vosc.get_name(), vmdc_name, self.clientoptions,
2651 config.config, self.path)
2652 run("mkdir", self.path)
2657 panic("mount failed:", self.path, ":", string.join(val))
2660 self.info(self.path, self.mds_uuid,self.obd_uuid)
2662 if config.record or config.lctl_dump:
2663 lctl.del_mount_option(local_node_name)
2665 if fs_is_mounted(self.path):
2667 (rc, out) = run("umount", "-f", self.path)
2669 (rc, out) = run("umount", self.path)
2671 raise CommandError('umount', out, rc)
2673 if fs_is_mounted(self.path):
2674 panic("fs is still mounted:", self.path)
2679 def load_module(self):
2680 self.vosc.load_module()
2681 Module.load_module(self)
2683 def cleanup_module(self):
2684 Module.cleanup_module(self)
2685 self.vosc.cleanup_module()
2687 def correct_level(self, level, op=None):
2690 # ============================================================
2691 # misc query functions
2693 def get_ost_net(self, osd_uuid):
2697 osd = self.lookup(osd_uuid)
2698 node_uuid = osd.get_first_ref('node')
2699 node = self.lookup(node_uuid)
2701 panic("unable to find node for osd_uuid:", osd_uuid,
2702 " node_ref:", node_uuid_)
2703 for net_uuid in node.get_networks():
2704 db = node.lookup(net_uuid)
2705 srv_list.append(Network(db))
2709 # the order of iniitailization is based on level.
2710 def getServiceLevel(self):
2711 type = self.get_class()
2713 if type in ('network',):
2715 elif type in ('routetbl',):
2717 elif type in ('ldlm',):
2719 elif type in ('osd', 'cobd'):
2721 elif type in ('mdsdev',):
2723 elif type in ('lmv',):
2725 elif type in ('cmobd',):
2727 elif type in ('mountpoint', 'echoclient'):
2730 panic("Unknown type: ", type)
2732 if ret < config.minlevel or ret > config.maxlevel:
2737 # return list of services in a profile. list is a list of tuples
2738 # [(level, db_object),]
2739 def getServices(self):
2741 for ref_class, ref_uuid in self.get_all_refs():
2742 servdb = self.lookup(ref_uuid)
2744 level = getServiceLevel(servdb)
2746 list.append((level, servdb))
2748 panic('service not found: ' + ref_uuid)
2754 ############################################################
2756 # FIXME: clean this mess up!
2758 # OSC is no longer in the xml, so we have to fake it.
2759 # this is getting ugly and begging for another refactoring
2760 def get_osc(ost_db, uuid, fs_name):
2761 osc = OSC(ost_db, uuid, fs_name)
2764 def get_mdc(db, fs_name, mds_uuid):
2765 mds_db = db.lookup(mds_uuid);
2767 error("no mds:", mds_uuid)
2768 mdc = MDC(mds_db, mds_uuid, fs_name)
2771 ############################################################
2772 # routing ("rooting")
2774 # list of (nettype, cluster_id, nid)
2777 def find_local_clusters(node_db):
2778 global local_clusters
2779 for netuuid in node_db.get_networks():
2780 net = node_db.lookup(netuuid)
2782 debug("add_local", netuuid)
2783 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2785 if acceptors.has_key(srv.port):
2786 panic("duplicate port:", srv.port)
2787 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
2789 # This node is a gateway.
2791 def node_is_router():
2794 # If there are any routers found in the config, then this will be true
2795 # and all nodes will load kptlrouter.
2797 def node_needs_router():
2798 return needs_router or is_router
2800 # list of (nettype, gw, tgt_cluster_id, lo, hi)
2801 # Currently, these local routes are only added to kptlrouter route
2802 # table if they are needed to connect to a specific server. This
2803 # should be changed so all available routes are loaded, and the
2804 # ptlrouter can make all the decisions.
2807 def find_local_routes(lustre):
2808 """ Scan the lustre config looking for routers . Build list of
2810 global local_routes, needs_router
2812 list = lustre.lookup_class('node')
2814 if router.get_val_int('router', 0):
2816 for (local_type, local_cluster_id, local_nid) in local_clusters:
2818 for netuuid in router.get_networks():
2819 db = router.lookup(netuuid)
2820 if (local_type == db.get_val('nettype') and
2821 local_cluster_id == db.get_val('clusterid')):
2822 gw = db.get_val('nid')
2825 debug("find_local_routes: gw is", gw)
2826 for route in router.get_local_routes(local_type, gw):
2827 local_routes.append(route)
2828 debug("find_local_routes:", local_routes)
2831 def choose_local_server(srv_list):
2832 for srv in srv_list:
2833 if local_cluster(srv.net_type, srv.cluster_id):
2836 def local_cluster(net_type, cluster_id):
2837 for cluster in local_clusters:
2838 if net_type == cluster[0] and cluster_id == cluster[1]:
2842 def local_interface(net_type, cluster_id, nid):
2843 for cluster in local_clusters:
2844 if (net_type == cluster[0] and cluster_id == cluster[1]
2845 and nid == cluster[2]):
2849 def find_route(srv_list):
2851 frm_type = local_clusters[0][0]
2852 for srv in srv_list:
2853 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
2854 to_type = srv.net_type
2856 cluster_id = srv.cluster_id
2857 debug ('looking for route to', to_type, to)
2858 for r in local_routes:
2859 debug("find_route: ", r)
2860 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
2861 result.append((srv, r))
2864 def get_active_target(db):
2865 target_uuid = db.getUUID()
2866 target_name = db.getName()
2867 node_name = get_select(target_name)
2869 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
2871 tgt_dev_uuid = db.get_first_ref('active')
2874 def get_server_by_nid_uuid(db, nid_uuid):
2875 for n in db.lookup_class("network"):
2877 if net.nid_uuid == nid_uuid:
2881 ############################################################
2885 type = db.get_class()
2886 debug('Service:', type, db.getName(), db.getUUID())
2891 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2892 elif type == 'network':
2894 elif type == 'routetbl':
2898 elif type == 'cobd':
2899 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2900 elif type == 'cmobd':
2902 elif type == 'mdsdev':
2904 elif type == 'mountpoint':
2906 elif type == 'echoclient':
2911 panic ("unknown service type:", type)
2915 # Prepare the system to run lustre using a particular profile
2916 # in a the configuration.
2917 # * load & the modules
2918 # * setup networking for the current node
2919 # * make sure partitions are in place and prepared
2920 # * initialize devices with lctl
2921 # Levels is important, and needs to be enforced.
2922 def for_each_profile(db, prof_list, operation):
2923 for prof_uuid in prof_list:
2924 prof_db = db.lookup(prof_uuid)
2926 panic("profile:", prof_uuid, "not found.")
2927 services = getServices(prof_db)
2930 def magic_get_osc(db, rec, lov):
2932 lov_uuid = lov.get_uuid()
2933 lov_name = lov.osc.fs_name
2935 lov_uuid = rec.getAttribute('lov_uuidref')
2936 # FIXME: better way to find the mountpoint?
2937 filesystems = db.root_node.getElementsByTagName('filesystem')
2939 for fs in filesystems:
2940 ref = fs.getElementsByTagName('obd_ref')
2941 if ref[0].getAttribute('uuidref') == lov_uuid:
2942 fsuuid = fs.getAttribute('uuid')
2946 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
2948 mtpts = db.root_node.getElementsByTagName('mountpoint')
2951 ref = fs.getElementsByTagName('filesystem_ref')
2952 if ref[0].getAttribute('uuidref') == fsuuid:
2953 lov_name = fs.getAttribute('name')
2957 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
2959 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
2961 ost_uuid = rec.getAttribute('ost_uuidref')
2962 obd = db.lookup(ost_uuid)
2965 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
2967 osc = get_osc(obd, lov_uuid, lov_name)
2969 panic('osc not found:', obd_uuid)
2972 # write logs for update records. sadly, logs of all types -- and updates in
2973 # particular -- are something of an afterthought. lconf needs rewritten with
2974 # these as core concepts. so this is a pretty big hack.
2975 def process_update_record(db, update, lov):
2976 for rec in update.childNodes:
2977 if rec.nodeType != rec.ELEMENT_NODE:
2980 log("found "+rec.nodeName+" record in update version " +
2981 str(update.getAttribute('version')))
2983 lov_uuid = rec.getAttribute('lov_uuidref')
2984 ost_uuid = rec.getAttribute('ost_uuidref')
2985 index = rec.getAttribute('index')
2986 gen = rec.getAttribute('generation')
2988 if not lov_uuid or not ost_uuid or not index or not gen:
2989 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
2992 tmplov = db.lookup(lov_uuid)
2994 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
2995 lov_name = tmplov.getName()
2997 lov_name = lov.osc.name
2999 # ------------------------------------------------------------- add
3000 if rec.nodeName == 'add':
3002 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3005 osc = magic_get_osc(db, rec, lov)
3008 # Only ignore connect failures with --force, which
3009 # isn't implemented here yet.
3010 osc.prepare(ignore_connect_failure=0)
3011 except CommandError, e:
3012 print "Error preparing OSC %s\n" % osc.uuid
3015 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3017 # ------------------------------------------------------ deactivate
3018 elif rec.nodeName == 'deactivate':
3022 osc = magic_get_osc(db, rec, lov)
3026 except CommandError, e:
3027 print "Error deactivating OSC %s\n" % osc.uuid
3030 # ---------------------------------------------------------- delete
3031 elif rec.nodeName == 'delete':
3035 osc = magic_get_osc(db, rec, lov)
3041 except CommandError, e:
3042 print "Error cleaning up OSC %s\n" % osc.uuid
3045 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3047 def process_updates(db, log_device, log_name, lov = None):
3048 updates = db.root_node.getElementsByTagName('update')
3050 if not u.childNodes:
3051 log("ignoring empty update record (version " +
3052 str(u.getAttribute('version')) + ")")
3055 version = u.getAttribute('version')
3056 real_name = "%s-%s" % (log_name, version)
3057 lctl.clear_log(log_device, real_name)
3058 lctl.record(log_device, real_name)
3060 process_update_record(db, u, lov)
3064 def doWriteconf(services):
3068 if s[1].get_class() == 'mdsdev':
3069 n = newService(s[1])
3072 def doSetup(services):
3077 n = newService(s[1])
3079 slist.append((n.level, n))
3082 nl = n[1].correct_level(n[0])
3083 nlist.append((nl, n[1]))
3088 def doModules(services):
3092 n = newService(s[1])
3095 def doCleanup(services):
3100 n = newService(s[1])
3102 slist.append((n.level, n))
3105 nl = n[1].correct_level(n[0])
3106 nlist.append((nl, n[1]))
3110 if n[1].safe_to_clean():
3113 def doUnloadModules(services):
3118 n = newService(s[1])
3119 if n.safe_to_clean_modules():
3124 def doHost(lustreDB, hosts):
3125 global is_router, local_node_name
3128 node_db = lustreDB.lookup_name(h, 'node')
3132 panic('No host entry found.')
3134 local_node_name = node_db.get_val('name', 0)
3135 is_router = node_db.get_val_int('router', 0)
3136 lustre_upcall = node_db.get_val('lustreUpcall', '')
3137 portals_upcall = node_db.get_val('portalsUpcall', '')
3138 timeout = node_db.get_val_int('timeout', 0)
3139 ptldebug = node_db.get_val('ptldebug', '')
3140 subsystem = node_db.get_val('subsystem', '')
3142 find_local_clusters(node_db)
3144 find_local_routes(lustreDB)
3146 # Two step process: (1) load modules, (2) setup lustre
3147 # if not cleaning, load modules first.
3148 prof_list = node_db.get_refs('profile')
3150 if config.write_conf:
3151 for_each_profile(node_db, prof_list, doModules)
3153 for_each_profile(node_db, prof_list, doWriteconf)
3154 for_each_profile(node_db, prof_list, doUnloadModules)
3157 elif config.recover:
3158 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3159 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3160 "--client_uuid <UUID> --conn_uuid <UUID>")
3161 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3163 elif config.cleanup:
3165 # the command line can override this value
3167 # ugly hack, only need to run lctl commands for --dump
3168 if config.lctl_dump or config.record:
3169 for_each_profile(node_db, prof_list, doCleanup)
3172 sys_set_timeout(timeout)
3173 sys_set_ptldebug(ptldebug)
3174 sys_set_subsystem(subsystem)
3175 sys_set_lustre_upcall(lustre_upcall)
3176 sys_set_portals_upcall(portals_upcall)
3178 for_each_profile(node_db, prof_list, doCleanup)
3179 for_each_profile(node_db, prof_list, doUnloadModules)
3183 # ugly hack, only need to run lctl commands for --dump
3184 if config.lctl_dump or config.record:
3185 sys_set_timeout(timeout)
3186 sys_set_lustre_upcall(lustre_upcall)
3187 for_each_profile(node_db, prof_list, doSetup)
3191 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3192 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3194 for_each_profile(node_db, prof_list, doModules)
3196 sys_set_debug_path()
3197 sys_set_ptldebug(ptldebug)
3198 sys_set_subsystem(subsystem)
3199 script = config.gdb_script
3200 run(lctl.lctl, ' modules >', script)
3202 log ("The GDB module script is in", script)
3203 # pause, so user has time to break and
3206 sys_set_timeout(timeout)
3207 sys_set_lustre_upcall(lustre_upcall)
3208 sys_set_portals_upcall(portals_upcall)
3210 for_each_profile(node_db, prof_list, doSetup)
3213 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3214 tgt = lustreDB.lookup(tgt_uuid)
3216 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3217 new_uuid = get_active_target(tgt)
3219 raise Lustre.LconfError("doRecovery: no active target found for: " +
3221 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3223 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3225 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3227 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3230 lctl.disconnect(oldnet)
3231 except CommandError, e:
3232 log("recover: disconnect", nid_uuid, "failed: ")
3237 except CommandError, e:
3238 log("recover: connect failed")
3241 lctl.recover(client_uuid, net.nid_uuid)
3244 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3245 base = os.path.dirname(cmd)
3246 if development_mode():
3247 if not config.lustre:
3248 debug('using objdir module paths')
3249 config.lustre = (os.path.join(base, ".."))
3250 # normalize the portals dir, using command line arg if set
3252 portals_dir = config.portals
3253 dir = os.path.join(config.lustre, portals_dir)
3254 config.portals = dir
3255 debug('config.portals', config.portals)
3256 elif config.lustre and config.portals:
3258 # if --lustre and --portals, normalize portals
3259 # can ignore POTRALS_DIR here, since it is probly useless here
3260 config.portals = os.path.join(config.lustre, config.portals)
3261 debug('config.portals B', config.portals)
3263 def sysctl(path, val):
3264 debug("+ sysctl", path, val)
3268 fp = open(os.path.join('/proc/sys', path), 'w')
3275 def sys_set_debug_path():
3276 sysctl('portals/debug_path', config.debug_path)
3278 def sys_set_lustre_upcall(upcall):
3279 # the command overrides the value in the node config
3280 if config.lustre_upcall:
3281 upcall = config.lustre_upcall
3283 upcall = config.upcall
3285 lctl.set_lustre_upcall(upcall)
3287 def sys_set_portals_upcall(upcall):
3288 # the command overrides the value in the node config
3289 if config.portals_upcall:
3290 upcall = config.portals_upcall
3292 upcall = config.upcall
3294 sysctl('portals/upcall', upcall)
3296 def sys_set_timeout(timeout):
3297 # the command overrides the value in the node config
3298 if config.timeout and config.timeout > 0:
3299 timeout = config.timeout
3300 if timeout != None and timeout > 0:
3301 lctl.set_timeout(timeout)
3303 def sys_tweak_socknal ():
3304 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3305 if sys_get_branch() == '2.6':
3306 fp = open('/proc/meminfo')
3307 lines = fp.readlines()
3312 if a[0] == 'MemTotal:':
3314 debug("memtotal" + memtotal)
3315 if int(memtotal) < 262144:
3316 minfree = int(memtotal) / 16
3319 debug("+ minfree ", minfree)
3320 sysctl("vm/min_free_kbytes", minfree)
3321 if config.single_socket:
3322 sysctl("socknal/typed", 0)
3324 def sys_optimize_elan ():
3325 procfiles = ["/proc/elan/config/eventint_punt_loops",
3326 "/proc/qsnet/elan3/config/eventint_punt_loops",
3327 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3329 if os.access(p, os.W_OK):
3330 run ("echo 1 > " + p)
3332 def sys_set_ptldebug(ptldebug):
3334 ptldebug = config.ptldebug
3337 val = eval(ptldebug, ptldebug_names)
3338 val = "0x%x" % (val)
3339 sysctl('portals/debug', val)
3340 except NameError, e:
3343 def sys_set_subsystem(subsystem):
3344 if config.subsystem:
3345 subsystem = config.subsystem
3348 val = eval(subsystem, subsystem_names)
3349 val = "0x%x" % (val)
3350 sysctl('portals/subsystem_debug', val)
3351 except NameError, e:
3354 def sys_set_netmem_max(path, max):
3355 debug("setting", path, "to at least", max)
3363 fp = open(path, 'w')
3364 fp.write('%d\n' %(max))
3368 def sys_make_devices():
3369 if not os.access('/dev/portals', os.R_OK):
3370 run('mknod /dev/portals c 10 240')
3371 if not os.access('/dev/obd', os.R_OK):
3372 run('mknod /dev/obd c 10 241')
3375 # Add dir to the global PATH, if not already there.
3376 def add_to_path(new_dir):
3377 syspath = string.split(os.environ['PATH'], ':')
3378 if new_dir in syspath:
3380 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3382 def default_debug_path():
3383 path = '/tmp/lustre-log'
3384 if os.path.isdir('/r'):
3389 def default_gdb_script():
3390 script = '/tmp/ogdb'
3391 if os.path.isdir('/r'):
3392 return '/r' + script
3397 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3398 # ensure basic elements are in the system path
3399 def sanitise_path():
3400 for dir in DEFAULT_PATH:
3403 # global hack for the --select handling
3405 def init_select(args):
3406 # args = [service=nodeA,service2=nodeB service3=nodeC]
3409 list = string.split(arg, ',')
3411 srv, node = string.split(entry, '=')
3412 tgt_select[srv] = node
3414 def get_select(srv):
3415 if tgt_select.has_key(srv):
3416 return tgt_select[srv]
3420 FLAG = Lustre.Options.FLAG
3421 PARAM = Lustre.Options.PARAM
3422 INTPARAM = Lustre.Options.INTPARAM
3423 PARAMLIST = Lustre.Options.PARAMLIST
3425 ('verbose,v', "Print system commands as they are run"),
3426 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3427 ('config', "Cluster config name used for LDAP query", PARAM),
3428 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3429 ('node', "Load config for <nodename>", PARAM),
3430 ('cleanup,d', "Cleans up config. (Shutdown)"),
3431 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3433 ('single_socket', "socknal option: only use one socket instead of bundle",
3435 ('failover',"""Used to shut down without saving state.
3436 This will allow this node to "give up" a service to a
3437 another node for failover purposes. This will not
3438 be a clean shutdown.""",
3440 ('gdb', """Prints message after creating gdb module script
3441 and sleeps for 5 seconds."""),
3442 ('noexec,n', """Prints the commands and steps that will be run for a
3443 config without executing them. This can used to check if a
3444 config file is doing what it should be doing"""),
3445 ('nomod', "Skip load/unload module step."),
3446 ('nosetup', "Skip device setup/cleanup step."),
3447 ('reformat', "Reformat all devices (without question)"),
3448 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3449 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3450 ('clientoptions', "Additional options for Lustre", PARAM),
3451 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3453 ('write_conf', "Save all the client config information on mds."),
3454 ('record', "Write config information on mds."),
3455 ('record_log', "Name of config record log.", PARAM),
3456 ('record_device', "MDS device name that will record the config commands",
3458 ('root_squash', "MDS squash root to appointed uid",
3460 ('no_root_squash', "Don't squash root for appointed nid",
3462 ('minlevel', "Minimum level of services to configure/cleanup",
3464 ('maxlevel', """Maximum level of services to configure/cleanup
3465 Levels are aproximatly like:
3470 70 - mountpoint, echo_client, osc, mdc, lov""",
3472 ('lustre', """Base directory of lustre sources. This parameter will
3473 cause lconf to load modules from a source tree.""", PARAM),
3474 ('portals', """Portals source directory. If this is a relative path,
3475 then it is assumed to be relative to lustre. """, PARAM),
3476 ('timeout', "Set recovery timeout", INTPARAM),
3477 ('upcall', "Set both portals and lustre upcall script", PARAM),
3478 ('lustre_upcall', "Set lustre upcall script", PARAM),
3479 ('portals_upcall', "Set portals upcall script", PARAM),
3480 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3481 ('ptldebug', "Set the portals debug level", PARAM),
3482 ('subsystem', "Set the portals debug subsystem", PARAM),
3483 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3484 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3485 # Client recovery options
3486 ('recover', "Recover a device"),
3487 ('group', "The group of devices to configure or cleanup", PARAM),
3488 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3489 ('client_uuid', "The failed client (required for recovery)", PARAM),
3490 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3492 ('inactive', """The name of an inactive service, to be ignored during
3493 mounting (currently OST-only). Can be repeated.""",
3498 global lctl, config, toplustreDB, CONFIG_FILE
3500 # in the upcall this is set to SIG_IGN
3501 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3503 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3505 config, args = cl.parse(sys.argv[1:])
3506 except Lustre.OptionError, e:
3510 setupModulePath(sys.argv[0])
3512 host = socket.gethostname()
3514 # the PRNG is normally seeded with time(), which is not so good for starting
3515 # time-synchronized clusters
3516 input = open('/dev/urandom', 'r')
3518 print 'Unable to open /dev/urandom!'
3520 seed = input.read(32)
3526 init_select(config.select)
3529 # allow config to be fetched via HTTP, but only with python2
3530 if sys.version[0] != '1' and args[0].startswith('http://'):
3533 config_file = urllib2.urlopen(args[0])
3534 except (urllib2.URLError, socket.error), err:
3535 if hasattr(err, 'args'):
3537 print "Could not access '%s': %s" %(args[0], err)
3539 elif not os.access(args[0], os.R_OK):
3540 print 'File not found or readable:', args[0]
3544 config_file = open(args[0], 'r')
3546 dom = xml.dom.minidom.parse(config_file)
3548 panic("%s does not appear to be a config file." % (args[0]))
3549 sys.exit(1) # make sure to die here, even in debug mode.
3551 CONFIG_FILE = args[0]
3552 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3553 if not config.config:
3554 config.config = os.path.basename(args[0])# use full path?
3555 if config.config[-4:] == '.xml':
3556 config.config = config.config[:-4]
3557 elif config.ldapurl:
3558 if not config.config:
3559 panic("--ldapurl requires --config name")
3560 dn = "config=%s,fs=lustre" % (config.config)
3561 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3562 elif config.ptldebug or config.subsystem:
3563 sys_set_ptldebug(None)
3564 sys_set_subsystem(None)
3567 print 'Missing config file or ldap URL.'
3568 print 'see lconf --help for command summary'
3571 toplustreDB = lustreDB
3573 ver = lustreDB.get_version()
3575 panic("No version found in config data, please recreate.")
3576 if ver != Lustre.CONFIG_VERSION:
3577 panic("Config version", ver, "does not match lconf version",
3578 Lustre.CONFIG_VERSION)
3582 node_list.append(config.node)
3585 node_list.append(host)
3586 node_list.append('localhost')
3588 debug("configuring for host: ", node_list)
3591 config.debug_path = config.debug_path + '-' + host
3592 config.gdb_script = config.gdb_script + '-' + host
3594 lctl = LCTLInterface('lctl')
3596 if config.lctl_dump:
3597 lctl.use_save_file(config.lctl_dump)
3600 if not (config.record_device and config.record_log):
3601 panic("When recording, both --record_log and --record_device must be specified.")
3602 lctl.clear_log(config.record_device, config.record_log)
3603 lctl.record(config.record_device, config.record_log)
3605 doHost(lustreDB, node_list)
3607 if not config.record:
3612 process_updates(lustreDB, config.record_device, config.record_log)
3614 if __name__ == "__main__":
3617 except Lustre.LconfError, e:
3619 # traceback.print_exc(file=sys.stdout)
3621 except CommandError, e:
3625 if first_cleanup_error:
3626 sys.exit(first_cleanup_error)