3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = 'portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
96 "undefined" : (1 << 0),
106 "portals" : (1 << 10),
107 "socknal" : (1 << 11),
108 "qswnal" : (1 << 12),
109 "pinger" : (1 << 13),
110 "filter" : (1 << 14),
116 "ptlrouter" : (1 << 20),
126 first_cleanup_error = 0
127 def cleanup_error(rc):
128 global first_cleanup_error
129 if not first_cleanup_error:
130 first_cleanup_error = rc
132 # ============================================================
133 # debugging and error funcs
135 def fixme(msg = "this feature"):
136 raise Lustre.LconfError, msg + ' not implemented yet.'
139 msg = string.join(map(str,args))
140 if not config.noexec:
141 raise Lustre.LconfError(msg)
146 msg = string.join(map(str,args))
151 print string.strip(s)
155 msg = string.join(map(str,args))
158 # ack, python's builtin int() does not support '0x123' syntax.
159 # eval can do it, although what a hack!
163 return eval(s, {}, {})
166 except SyntaxError, e:
167 raise ValueError("not a number")
169 raise ValueError("not a number")
171 # ============================================================
172 # locally defined exceptions
173 class CommandError (exceptions.Exception):
174 def __init__(self, cmd_name, cmd_err, rc=None):
175 self.cmd_name = cmd_name
176 self.cmd_err = cmd_err
181 if type(self.cmd_err) == types.StringType:
183 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
185 print "! %s: %s" % (self.cmd_name, self.cmd_err)
186 elif type(self.cmd_err) == types.ListType:
188 print "! %s (error %d):" % (self.cmd_name, self.rc)
190 print "! %s:" % (self.cmd_name)
191 for s in self.cmd_err:
192 print "> %s" %(string.strip(s))
197 # ============================================================
198 # handle daemons, like the acceptor
200 """ Manage starting and stopping a daemon. Assumes daemon manages
201 it's own pid file. """
203 def __init__(self, cmd):
209 log(self.command, "already running.")
211 self.path = find_prog(self.command)
213 panic(self.command, "not found.")
214 ret, out = runcmd(self.path +' '+ self.command_line())
216 raise CommandError(self.path, out, ret)
220 pid = self.read_pidfile()
222 log ("killing process", pid)
224 #time.sleep(1) # let daemon die
226 log("unable to kill", self.command, e)
228 log("unable to kill", self.command)
231 pid = self.read_pidfile()
241 def read_pidfile(self):
243 fp = open(self.pidfile(), 'r')
250 def clean_pidfile(self):
251 """ Remove a stale pidfile """
252 log("removing stale pidfile:", self.pidfile())
254 os.unlink(self.pidfile())
256 log(self.pidfile(), e)
258 class AcceptorHandler(DaemonHandler):
259 def __init__(self, port, net_type):
260 DaemonHandler.__init__(self, "acceptor")
265 return "/var/run/%s-%d.pid" % (self.command, self.port)
267 def command_line(self):
268 return string.join(map(str,(self.flags, self.port)))
272 # start the acceptors
274 if config.lctl_dump or config.record:
276 for port in acceptors.keys():
277 daemon = acceptors[port]
278 if not daemon.running():
281 def run_one_acceptor(port):
282 if config.lctl_dump or config.record:
284 if acceptors.has_key(port):
285 daemon = acceptors[port]
286 if not daemon.running():
289 panic("run_one_acceptor: No acceptor defined for port:", port)
291 def stop_acceptor(port):
292 if acceptors.has_key(port):
293 daemon = acceptors[port]
298 # ============================================================
299 # handle lctl interface
302 Manage communication with lctl
305 def __init__(self, cmd):
307 Initialize close by finding the lctl binary.
309 self.lctl = find_prog(cmd)
311 self.record_device = ''
314 debug('! lctl not found')
317 raise CommandError('lctl', "unable to find lctl binary.")
319 def use_save_file(self, file):
320 self.save_file = file
322 def record(self, dev_name, logname):
323 log("Recording log", logname, "on", dev_name)
324 self.record_device = dev_name
325 self.record_log = logname
327 def end_record(self):
328 log("End recording log", self.record_log, "on", self.record_device)
329 self.record_device = None
330 self.record_log = None
332 def set_nonblock(self, fd):
333 fl = fcntl.fcntl(fd, F_GETFL)
334 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
339 the cmds are written to stdin of lctl
340 lctl doesn't return errors when run in script mode, so
342 should modify command line to accept multiple commands, or
343 create complex command line options
347 cmds = '\n dump ' + self.save_file + '\n' + cmds
348 elif self.record_device:
352 %s""" % (self.record_device, self.record_log, cmds)
354 debug("+", cmd_line, cmds)
355 if config.noexec: return (0, [])
357 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
358 child.tochild.write(cmds + "\n")
359 child.tochild.close()
360 # print "LCTL:", cmds
362 # From "Python Cookbook" from O'Reilly
363 outfile = child.fromchild
364 outfd = outfile.fileno()
365 self.set_nonblock(outfd)
366 errfile = child.childerr
367 errfd = errfile.fileno()
368 self.set_nonblock(errfd)
370 outdata = errdata = ''
373 ready = select.select([outfd,errfd],[],[]) # Wait for input
374 if outfd in ready[0]:
375 outchunk = outfile.read()
376 if outchunk == '': outeof = 1
377 outdata = outdata + outchunk
378 if errfd in ready[0]:
379 errchunk = errfile.read()
380 if errchunk == '': erreof = 1
381 errdata = errdata + errchunk
382 if outeof and erreof: break
383 # end of "borrowed" code
386 if os.WIFEXITED(ret):
387 rc = os.WEXITSTATUS(ret)
390 if rc or len(errdata):
391 raise CommandError(self.lctl, errdata, rc)
394 def runcmd(self, *args):
396 run lctl using the command line
398 cmd = string.join(map(str,args))
399 debug("+", self.lctl, cmd)
400 rc, out = run(self.lctl, cmd)
402 raise CommandError(self.lctl, out, rc)
406 def clear_log(self, dev, log):
407 """ clear an existing log """
412 quit """ % (dev, log)
415 def root_squash(self, name, uid, nid):
419 quit""" % (name, uid, nid)
422 def network(self, net, nid):
427 quit """ % (net, nid)
431 def add_interface(self, net, ip, netmask = ""):
432 """ add an interface """
436 quit """ % (net, ip, netmask)
439 # delete an interface
440 def del_interface(self, net, ip):
441 """ delete an interface """
448 # create a new connection
449 def add_uuid(self, net_type, uuid, nid):
450 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
453 def add_peer(self, net_type, nid, hostaddr, port):
454 if net_type in ('tcp',) and not config.lctl_dump:
459 nid, hostaddr, port )
461 elif net_type in ('openib','iib',) and not config.lctl_dump:
469 def connect(self, srv):
470 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
471 if srv.net_type in ('tcp','openib','iib',) and not config.lctl_dump:
473 hostaddr = string.split(srv.hostaddr[0], '/')[0]
474 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
477 def recover(self, dev_name, new_conn):
480 recover %s""" %(dev_name, new_conn)
483 # add a route to a range
484 def add_route(self, net, gw, lo, hi):
492 except CommandError, e:
496 def del_route(self, net, gw, lo, hi):
501 quit """ % (net, gw, lo, hi)
504 # add a route to a host
505 def add_route_host(self, net, uuid, gw, tgt):
506 self.add_uuid(net, uuid, tgt)
514 except CommandError, e:
518 # add a route to a range
519 def del_route_host(self, net, uuid, gw, tgt):
525 quit """ % (net, gw, tgt)
529 def del_peer(self, net_type, nid, hostaddr):
530 if net_type in ('tcp',) and not config.lctl_dump:
534 del_peer %s %s single_share
538 elif net_type in ('openib','iib',) and not config.lctl_dump:
542 del_peer %s single_share
547 # disconnect one connection
548 def disconnect(self, srv):
549 self.del_uuid(srv.nid_uuid)
550 if srv.net_type in ('tcp','openib','iib',) and not config.lctl_dump:
552 hostaddr = string.split(srv.hostaddr[0], '/')[0]
553 self.del_peer(srv.net_type, srv.nid, hostaddr)
555 def del_uuid(self, uuid):
563 def disconnectAll(self, net):
571 def attach(self, type, name, uuid):
574 quit""" % (type, name, uuid)
577 def setup(self, name, setup = ""):
581 quit""" % (name, setup)
584 def add_conn(self, name, conn_uuid):
588 quit""" % (name, conn_uuid)
592 # create a new device with lctl
593 def newdev(self, type, name, uuid, setup = ""):
594 self.attach(type, name, uuid);
596 self.setup(name, setup)
597 except CommandError, e:
598 self.cleanup(name, uuid, 0)
603 def cleanup(self, name, uuid, force, failover = 0):
604 if failover: force = 1
610 quit""" % (name, ('', 'force')[force],
611 ('', 'failover')[failover])
615 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
616 stripe_sz, stripe_off, pattern, devlist = None):
619 lov_setup %s %d %d %d %s %s
620 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
624 # add an OBD to a LOV
625 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
627 lov_modify_tgts add %s %s %s %s
628 quit""" % (name, obd_uuid, index, gen)
632 def lmv_setup(self, name, uuid, desc_uuid, devlist):
636 quit""" % (name, uuid, desc_uuid, devlist)
639 # delete an OBD from a LOV
640 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
642 lov_modify_tgts del %s %s %s %s
643 quit""" % (name, obd_uuid, index, gen)
647 def deactivate(self, name):
655 def dump(self, dump_file):
658 quit""" % (dump_file)
661 # get list of devices
662 def device_list(self):
663 devices = '/proc/fs/lustre/devices'
665 if os.access(devices, os.R_OK):
667 fp = open(devices, 'r')
675 def lustre_version(self):
676 rc, out = self.runcmd('version')
680 def mount_option(self, profile, osc, mdc):
682 mount_option %s %s %s
683 quit""" % (profile, osc, mdc)
686 # delete mount options
687 def del_mount_option(self, profile):
693 def set_timeout(self, timeout):
699 def set_lustre_upcall(self, upcall):
704 # ============================================================
705 # Various system-level functions
706 # (ideally moved to their own module)
708 # Run a command and return the output and status.
709 # stderr is sent to /dev/null, could use popen3 to
710 # save it if necessary
713 if config.noexec: return (0, [])
714 f = os.popen(cmd + ' 2>&1')
724 cmd = string.join(map(str,args))
727 # Run a command in the background.
728 def run_daemon(*args):
729 cmd = string.join(map(str,args))
731 if config.noexec: return 0
732 f = os.popen(cmd + ' 2>&1')
740 # Determine full path to use for an external command
741 # searches dirname(argv[0]) first, then PATH
743 syspath = string.split(os.environ['PATH'], ':')
744 cmdpath = os.path.dirname(sys.argv[0])
745 syspath.insert(0, cmdpath);
747 syspath.insert(0, os.path.join(config.portals, 'utils/'))
749 prog = os.path.join(d,cmd)
750 if os.access(prog, os.X_OK):
754 # Recursively look for file starting at base dir
755 def do_find_file(base, mod):
756 fullname = os.path.join(base, mod)
757 if os.access(fullname, os.R_OK):
759 for d in os.listdir(base):
760 dir = os.path.join(base,d)
761 if os.path.isdir(dir):
762 module = do_find_file(dir, mod)
766 def find_module(src_dir, dev_dir, modname):
767 modbase = src_dir +'/'+ dev_dir +'/'+ modname
768 for modext in '.ko', '.o':
769 module = modbase + modext
771 if os.access(module, os.R_OK):
777 # is the path a block device?
784 return stat.S_ISBLK(s[stat.ST_MODE])
786 # find the journal device from mkfs options
792 while i < len(x) - 1:
793 if x[i] == '-J' and x[i+1].startswith('device='):
799 # build fs according to type
801 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
807 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
809 # devsize is in 1k, and fs block count is in 4k
810 block_cnt = devsize/4
812 if fstype in ('ext3', 'extN', 'ldiskfs'):
813 # ext3 journal size is in megabytes
814 # but don't set jsize if mkfsoptions indicates a separate journal device
815 if jsize == 0 and jdev(mkfsoptions) == '':
817 if not is_block(dev):
818 ret, out = runcmd("ls -l %s" %dev)
819 devsize = int(string.split(out[0])[4]) / 1024
821 # sfdisk works for symlink, hardlink, and realdev
822 ret, out = runcmd("sfdisk -s %s" %dev)
824 devsize = int(out[0])
826 # sfdisk -s will fail for too large block device,
827 # then, read the size of partition from /proc/partitions
829 # get the realpath of the device
830 # it may be the real device, such as /dev/hda7
831 # or the hardlink created via mknod for a device
832 if 'realpath' in dir(os.path):
833 real_dev = os.path.realpath(dev)
837 while os.path.islink(real_dev) and (link_count < 20):
838 link_count = link_count + 1
839 dev_link = os.readlink(real_dev)
840 if os.path.isabs(dev_link):
843 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
845 panic("Entountered too many symbolic links resolving block device:", dev)
847 # get the major and minor number of the realpath via ls
848 # it seems python(os.stat) does not return
849 # the st_rdev member of the stat structure
850 ret, out = runcmd("ls -l %s" %real_dev)
851 major = string.split(string.split(out[0])[4], ",")[0]
852 minor = string.split(out[0])[5]
854 # get the devsize from /proc/partitions with the major and minor number
855 ret, out = runcmd("cat /proc/partitions")
858 if string.split(line)[0] == major and string.split(line)[1] == minor:
859 devsize = int(string.split(line)[2])
862 if devsize > 1024 * 1024:
863 jsize = ((devsize / 102400) * 4)
866 if jsize: jopt = "-J size=%d" %(jsize,)
867 if isize: iopt = "-I %d" %(isize,)
868 mkfs = 'mkfs.ext2 -j -b 4096 '
869 if not isblock or config.force:
871 if jdev(mkfsoptions) != '':
872 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
874 jmkfs = jmkfs + '-F '
875 jmkfs = jmkfs + jdev(mkfsoptions)
876 (ret, out) = run (jmkfs)
878 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
879 elif fstype == 'reiserfs':
880 # reiserfs journal size is in blocks
881 if jsize: jopt = "--journal_size %d" %(jsize,)
882 mkfs = 'mkreiserfs -ff'
884 panic('unsupported fs type: ', fstype)
886 if config.mkfsoptions != None:
887 mkfs = mkfs + ' ' + config.mkfsoptions
888 if mkfsoptions != None:
889 mkfs = mkfs + ' ' + mkfsoptions
890 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
892 panic("Unable to build fs:", dev, string.join(out))
893 # enable hash tree indexing on fsswe
894 if fstype in ('ext3', 'extN', 'ldiskfs'):
895 htree = 'echo "feature FEATURE_C5" | debugfs -w'
896 (ret, out) = run (htree, dev)
898 panic("Unable to enable htree:", dev)
900 # some systems use /dev/loopN, some /dev/loop/N
904 if not os.access(loop + str(0), os.R_OK):
906 if not os.access(loop + str(0), os.R_OK):
907 panic ("can't access loop devices")
910 # find loop device assigned to the file
911 def find_assigned_loop(file):
913 for n in xrange(0, MAX_LOOP_DEVICES):
915 if os.access(dev, os.R_OK):
916 (stat, out) = run('losetup', dev)
917 if out and stat == 0:
918 m = re.search(r'\((.*)\)', out[0])
919 if m and file == m.group(1):
925 # create file if necessary and assign the first free loop device
926 def init_loop(file, size, fstype, journal_size, inode_size,
927 mkfsoptions, reformat, autoformat, backfstype, backfile):
930 realfstype = backfstype
931 if is_block(backfile):
932 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
933 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
939 dev = find_assigned_loop(realfile)
941 print 'WARNING file:', realfile, 'already mapped to', dev
944 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
946 panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (realfile, size))
947 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
949 panic("Unable to create backing store:", realfile)
951 mkfs(realfile, size, realfstype, journal_size, inode_size,
952 mkfsoptions, isblock=0)
955 # find next free loop
956 for n in xrange(0, MAX_LOOP_DEVICES):
958 if os.access(dev, os.R_OK):
959 (stat, out) = run('losetup', dev)
961 run('losetup', dev, realfile)
964 print "out of loop devices"
966 print "out of loop devices"
969 # undo loop assignment
970 def clean_loop(file):
971 dev = find_assigned_loop(file)
973 ret, out = run('losetup -d', dev)
975 log('unable to clean loop device:', dev, 'for file:', file)
978 # determine if dev is formatted as a <fstype> filesystem
979 def need_format(fstype, dev):
980 # FIXME don't know how to implement this
983 # initialize a block device if needed
984 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
985 inode_size, mkfsoptions, backfstype, backdev):
989 if fstype == 'smfs' or not is_block(dev):
990 dev = init_loop(dev, size, fstype, journal_size, inode_size,
991 mkfsoptions, reformat, autoformat, backfstype, backdev)
992 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
993 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
996 # panic("device:", dev,
997 # "not prepared, and autoformat is not set.\n",
998 # "Rerun with --reformat option to format ALL filesystems")
1003 """lookup IP address for an interface"""
1004 rc, out = run("/sbin/ifconfig", iface)
1007 addr = string.split(out[1])[1]
1008 ip = string.split(addr, ':')[1]
1011 def def_mount_options(fstype, target):
1012 """returns deafult mount options for passed fstype and target (mds, ost)"""
1013 if fstype == 'ext3' or fstype == 'ldiskfs':
1014 mountfsoptions = "errors=remount-ro"
1015 if target == 'ost' and sys_get_branch() == '2.4':
1016 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1017 return mountfsoptions
1020 def sys_get_elan_position_file():
1021 procfiles = ["/proc/elan/device0/position",
1022 "/proc/qsnet/elan4/device0/position",
1023 "/proc/qsnet/elan3/device0/position"]
1025 if os.access(p, os.R_OK):
1029 def sys_get_local_nid(net_type, wildcard, cluster_id):
1030 """Return the local nid."""
1032 if sys_get_elan_position_file():
1033 local = sys_get_local_address('elan', '*', cluster_id)
1035 local = sys_get_local_address(net_type, wildcard, cluster_id)
1038 def sys_get_local_address(net_type, wildcard, cluster_id):
1039 """Return the local address for the network type."""
1041 if net_type in ('tcp','openib','iib',):
1043 iface, star = string.split(wildcard, ':')
1044 local = if2addr(iface)
1046 panic ("unable to determine ip for:", wildcard)
1048 host = socket.gethostname()
1049 local = socket.gethostbyname(host)
1050 elif net_type == 'elan':
1051 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1052 f = sys_get_elan_position_file()
1054 panic ("unable to determine local Elan ID")
1057 lines = fp.readlines()
1061 if a[0] == 'NodeId':
1065 nid = my_int(cluster_id) + my_int(elan_id)
1066 local = "%d" % (nid)
1067 except ValueError, e:
1071 elif net_type == 'gm':
1072 fixme("automatic local address for GM")
1076 def sys_get_branch():
1077 """Returns kernel release"""
1079 fp = open('/proc/sys/kernel/osrelease')
1080 lines = fp.readlines()
1084 version = string.split(l)
1085 a = string.split(version[0], '.')
1086 return a[0] + '.' + a[1]
1092 def mod_loaded(modname):
1093 """Check if a module is already loaded. Look in /proc/modules for it."""
1095 fp = open('/proc/modules')
1096 lines = fp.readlines()
1098 # please forgive my tired fingers for this one
1099 ret = filter(lambda word, mod=modname: word == mod,
1100 map(lambda line: string.split(line)[0], lines))
1102 except Exception, e:
1105 # XXX: instead of device_list, ask for $name and see what we get
1106 def is_prepared(name):
1107 """Return true if a device exists for the name"""
1108 if config.lctl_dump:
1110 if (config.noexec or config.record) and config.cleanup:
1113 # expect this format:
1114 # 1 UP ldlm ldlm ldlm_UUID 2
1115 out = lctl.device_list()
1117 if name == string.split(s)[3]:
1119 except CommandError, e:
1123 def is_network_prepared():
1124 """If the any device exists, then assume that all networking
1125 has been configured"""
1126 out = lctl.device_list()
1129 def fs_is_mounted(path):
1130 """Return true if path is a mounted lustre filesystem"""
1132 fp = open('/proc/mounts')
1133 lines = fp.readlines()
1137 if a[1] == path and a[2] == 'lustre_lite':
1145 """Manage kernel modules"""
1146 def __init__(self, lustre_dir, portals_dir):
1147 self.lustre_dir = lustre_dir
1148 self.portals_dir = portals_dir
1149 self.kmodule_list = []
1151 def add_portals_module(self, dev_dir, modname):
1152 """Append a module to list of modules to load."""
1153 self.kmodule_list.append((self.portals_dir, dev_dir, modname))
1155 def add_lustre_module(self, dev_dir, modname):
1156 """Append a module to list of modules to load."""
1157 self.kmodule_list.append((self.lustre_dir, dev_dir, modname))
1159 def load_module(self):
1160 """Load all the modules in the list in the order they appear."""
1161 for src_dir, dev_dir, mod in self.kmodule_list:
1162 if mod_loaded(mod) and not config.noexec:
1164 log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir)
1166 module = find_module(src_dir, dev_dir, mod)
1168 panic('module not found:', mod)
1169 (rc, out) = run('/sbin/insmod', module)
1171 raise CommandError('insmod', out, rc)
1173 (rc, out) = run('/sbin/modprobe', mod)
1175 raise CommandError('modprobe', out, rc)
1177 def cleanup_module(self):
1178 """Unload the modules in the list in reverse order."""
1179 rev = self.kmodule_list
1181 for src_dir, dev_dir, mod in rev:
1182 if not mod_loaded(mod) and not config.noexec:
1185 if mod == 'portals' and config.dump:
1186 lctl.dump(config.dump)
1187 log('unloading module:', mod)
1188 (rc, out) = run('/sbin/rmmod', mod)
1190 log('! unable to unload module:', mod)
1193 # ============================================================
1194 # Classes to prepare and cleanup the various objects
1197 """ Base class for the rest of the modules. The default cleanup method is
1198 defined here, as well as some utilitiy funcs.
1200 def __init__(self, module_name, db):
1202 self.module_name = module_name
1203 self.name = self.db.getName()
1204 self.uuid = self.db.getUUID()
1207 self.kmod = kmod(config.lustre, config.portals)
1209 def info(self, *args):
1210 msg = string.join(map(str,args))
1211 print self.module_name + ":", self.name, self.uuid, msg
1214 """ default cleanup, used for most modules """
1217 lctl.cleanup(self.name, self.uuid, config.force)
1218 except CommandError, e:
1219 log(self.module_name, "cleanup failed: ", self.name)
1223 def add_portals_module(self, dev_dir, modname):
1224 """Append a module to list of modules to load."""
1225 self.kmod.add_portals_module(dev_dir, modname)
1227 def add_lustre_module(self, dev_dir, modname):
1228 """Append a module to list of modules to load."""
1229 self.kmod.add_lustre_module(dev_dir, modname)
1231 def load_module(self):
1232 """Load all the modules in the list in the order they appear."""
1233 self.kmod.load_module()
1235 def cleanup_module(self):
1236 """Unload the modules in the list in reverse order."""
1237 if self.safe_to_clean():
1238 self.kmod.cleanup_module()
1240 def safe_to_clean(self):
1243 def safe_to_clean_modules(self):
1244 return self.safe_to_clean()
1246 class Network(Module):
1247 def __init__(self,db):
1248 Module.__init__(self, 'NETWORK', db)
1249 self.net_type = self.db.get_val('nettype')
1250 self.nid = self.db.get_val('nid', '*')
1251 self.cluster_id = self.db.get_val('clusterid', "0")
1252 self.port = self.db.get_val_int('port', 0)
1255 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1257 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1258 self.generic_nid = 1
1259 debug("nid:", self.nid)
1261 self.generic_nid = 0
1263 self.nid_uuid = self.nid_to_uuid(self.nid)
1265 self.hostaddr = self.db.get_hostaddr()
1266 if len(self.hostaddr) == 0:
1267 self.hostaddr.append(self.nid)
1268 if '*' in self.hostaddr[0]:
1269 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1270 if not self.hostaddr[0]:
1271 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1272 debug("hostaddr:", self.hostaddr[0])
1274 self.add_portals_module("libcfs", 'libcfs')
1275 self.add_portals_module("portals", 'portals')
1276 if node_needs_router():
1277 self.add_portals_module("router", 'kptlrouter')
1278 if self.net_type == 'tcp':
1279 self.add_portals_module("knals/socknal", 'ksocknal')
1280 if self.net_type == 'elan':
1281 self.add_portals_module("knals/qswnal", 'kqswnal')
1282 if self.net_type == 'gm':
1283 self.add_portals_module("knals/gmnal", 'kgmnal')
1284 if self.net_type == 'openib':
1285 self.add_portals_module("knals/openibnal", 'kopenibnal')
1286 if self.net_type == 'iib':
1287 self.add_portals_module("knals/iibnal", 'kiibnal')
1289 def nid_to_uuid(self, nid):
1290 return "NID_%s_UUID" %(nid,)
1293 if not config.record and is_network_prepared():
1295 self.info(self.net_type, self.nid, self.port)
1296 if not (config.record and self.generic_nid):
1297 lctl.network(self.net_type, self.nid)
1298 if self.net_type == 'tcp':
1300 for hostaddr in self.db.get_hostaddr():
1301 ip = string.split(hostaddr, '/')[0]
1302 if len(string.split(hostaddr, '/')) == 2:
1303 netmask = string.split(hostaddr, '/')[1]
1306 lctl.add_interface(self.net_type, ip, netmask)
1307 if self.net_type == 'elan':
1309 if self.port and node_is_router():
1310 run_one_acceptor(self.port)
1311 self.connect_peer_gateways()
1313 def connect_peer_gateways(self):
1314 for router in self.db.lookup_class('node'):
1315 if router.get_val_int('router', 0):
1316 for netuuid in router.get_networks():
1317 net = self.db.lookup(netuuid)
1319 if (gw.cluster_id == self.cluster_id and
1320 gw.net_type == self.net_type):
1321 if gw.nid != self.nid:
1324 def disconnect_peer_gateways(self):
1325 for router in self.db.lookup_class('node'):
1326 if router.get_val_int('router', 0):
1327 for netuuid in router.get_networks():
1328 net = self.db.lookup(netuuid)
1330 if (gw.cluster_id == self.cluster_id and
1331 gw.net_type == self.net_type):
1332 if gw.nid != self.nid:
1335 except CommandError, e:
1336 print "disconnect failed: ", self.name
1340 def safe_to_clean(self):
1341 return not is_network_prepared()
1344 self.info(self.net_type, self.nid, self.port)
1346 stop_acceptor(self.port)
1347 if node_is_router():
1348 self.disconnect_peer_gateways()
1349 if self.net_type == 'tcp':
1350 for hostaddr in self.db.get_hostaddr():
1351 ip = string.split(hostaddr, '/')[0]
1352 lctl.del_interface(self.net_type, ip)
1354 def correct_level(self, level, op=None):
1357 class RouteTable(Module):
1358 def __init__(self,db):
1359 Module.__init__(self, 'ROUTES', db)
1361 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1363 # only setup connections for tcp, openib, and iib NALs
1365 if not net_type in ('tcp','openib','iib',):
1368 # connect to target if route is to single node and this node is the gw
1369 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1370 if not local_cluster(net_type, tgt_cluster_id):
1371 panic("target", lo, " not on the local cluster")
1372 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1373 # connect to gateway if this node is not the gw
1374 elif (local_cluster(net_type, gw_cluster_id)
1375 and not local_interface(net_type, gw_cluster_id, gw)):
1376 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1381 panic("no server for nid", lo)
1384 return Network(srvdb)
1387 if not config.record and is_network_prepared():
1390 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1391 lctl.add_route(net_type, gw, lo, hi)
1392 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1396 def safe_to_clean(self):
1397 return not is_network_prepared()
1400 if is_network_prepared():
1401 # the network is still being used, don't clean it up
1403 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1404 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1407 lctl.disconnect(srv)
1408 except CommandError, e:
1409 print "disconnect failed: ", self.name
1414 lctl.del_route(net_type, gw, lo, hi)
1415 except CommandError, e:
1416 print "del_route failed: ", self.name
1420 # This is only needed to load the modules; the LDLM device
1421 # is now created automatically.
1423 def __init__(self,db):
1424 Module.__init__(self, 'LDLM', db)
1425 self.add_lustre_module('lvfs', 'lvfs')
1426 self.add_lustre_module('obdclass', 'obdclass')
1427 self.add_lustre_module('ptlrpc', 'ptlrpc')
1435 def correct_level(self, level, op=None):
1440 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1441 Module.__init__(self, 'LOV', db)
1442 if name_override != None:
1443 self.name = "lov_%s" % name_override
1444 self.add_lustre_module('lov', 'lov')
1445 self.mds_uuid = self.db.get_first_ref('mds')
1446 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1447 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1448 self.pattern = self.db.get_val_int('stripepattern', 0)
1449 self.devlist = self.db.get_lov_tgts('lov_tgt')
1450 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1453 self.desc_uuid = self.uuid
1454 self.uuid = generate_client_uuid(self.name)
1455 self.fs_name = fs_name
1457 self.config_only = 1
1459 self.config_only = None
1460 mds = self.db.lookup(self.mds_uuid)
1461 self.mds_name = mds.getName()
1462 for (obd_uuid, index, gen, active) in self.devlist:
1465 self.obdlist.append(obd_uuid)
1466 obd = self.db.lookup(obd_uuid)
1467 osc = get_osc(obd, self.uuid, fs_name)
1469 self.osclist.append((osc, index, gen, active))
1471 panic('osc not found:', obd_uuid)
1477 if not config.record and is_prepared(self.name):
1479 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1480 self.stripe_off, self.pattern, self.devlist,
1482 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1483 self.stripe_sz, self.stripe_off, self.pattern,
1484 string.join(self.obdlist))
1485 for (osc, index, gen, active) in self.osclist:
1486 target_uuid = osc.target_uuid
1488 # Only ignore connect failures with --force, which
1489 # isn't implemented here yet.
1491 osc.prepare(ignore_connect_failure=0)
1492 except CommandError, e:
1493 print "Error preparing OSC %s\n" % osc.uuid
1495 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1498 for (osc, index, gen, active) in self.osclist:
1499 target_uuid = osc.target_uuid
1501 if is_prepared(self.name):
1502 Module.cleanup(self)
1503 if self.config_only:
1504 panic("Can't clean up config_only LOV ", self.name)
1506 def load_module(self):
1507 if self.config_only:
1508 panic("Can't load modules for config_only LOV ", self.name)
1509 for (osc, index, gen, active) in self.osclist:
1512 Module.load_module(self)
1514 def cleanup_module(self):
1515 if self.config_only:
1516 panic("Can't cleanup modules for config_only LOV ", self.name)
1517 Module.cleanup_module(self)
1518 for (osc, index, gen, active) in self.osclist:
1520 osc.cleanup_module()
1523 def correct_level(self, level, op=None):
1527 def __init__(self, db, uuid, fs_name, name_override = None):
1528 Module.__init__(self, 'LMV', db)
1529 if name_override != None:
1530 self.name = "lmv_%s" % name_override
1531 self.add_lustre_module('lmv', 'lmv')
1532 self.devlist = self.db.get_refs('mds')
1534 self.desc_uuid = self.uuid
1536 self.fs_name = fs_name
1537 for mds_uuid in self.devlist:
1538 mds = self.db.lookup(mds_uuid)
1540 panic("MDS not found!")
1541 mdc = MDC(mds, self.uuid, fs_name)
1543 self.mdclist.append(mdc)
1545 panic('mdc not found:', mds_uuid)
1548 if is_prepared(self.name):
1550 for mdc in self.mdclist:
1552 # Only ignore connect failures with --force, which
1553 # isn't implemented here yet.
1554 mdc.prepare(ignore_connect_failure=0)
1555 except CommandError, e:
1556 print "Error preparing LMV %s\n" % mdc.uuid
1558 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1559 string.join(self.devlist))
1562 for mdc in self.mdclist:
1564 if is_prepared(self.name):
1565 Module.cleanup(self)
1567 def load_module(self):
1568 for mdc in self.mdclist:
1571 Module.load_module(self)
1573 def cleanup_module(self):
1574 Module.cleanup_module(self)
1575 for mdc in self.mdclist:
1576 mdc.cleanup_module()
1579 def correct_level(self, level, op=None):
1582 class MDSDEV(Module):
1583 def __init__(self,db):
1584 Module.__init__(self, 'MDSDEV', db)
1585 self.devpath = self.db.get_val('devpath','')
1586 self.backdevpath = self.db.get_val('backdevpath','')
1587 self.size = self.db.get_val_int('devsize', 0)
1588 self.journal_size = self.db.get_val_int('journalsize', 0)
1589 self.fstype = self.db.get_val('fstype', '')
1590 self.backfstype = self.db.get_val('backfstype', '')
1591 self.nspath = self.db.get_val('nspath', '')
1592 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1593 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1594 self.root_squash = self.db.get_val('root_squash', '')
1595 self.no_root_squash = self.db.get_val('no_root_squash', '')
1596 self.cachetype = self.db.get_val('cachetype', '')
1597 # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
1598 target_uuid = self.db.get_first_ref('target')
1599 mds = self.db.lookup(target_uuid)
1600 self.name = mds.getName()
1601 self.filesystem_uuids = mds.get_refs('filesystem')
1604 self.master_mds = ""
1605 if not self.filesystem_uuids:
1606 self.lmv_uuid = self.db.get_first_ref('lmv')
1607 if not self.lmv_uuid:
1608 panic("ALERT: can't find lvm uuid")
1610 self.lmv = self.db.lookup(self.lmv_uuid)
1612 self.filesystem_uuids = self.lmv.get_refs('filesystem')
1613 self.master_mds = self.lmv_uuid
1614 # FIXME: if fstype not set, then determine based on kernel version
1615 self.format = self.db.get_val('autoformat', "no")
1616 if mds.get_val('failover', 0):
1617 self.failover_mds = 'f'
1619 self.failover_mds = 'n'
1620 active_uuid = get_active_target(mds)
1622 panic("No target device found:", target_uuid)
1623 if active_uuid == self.uuid:
1627 if self.active and config.group and config.group != mds.get_val('group'):
1630 self.inode_size = self.db.get_val_int('inodesize', 0)
1631 if self.inode_size == 0:
1632 # find the LOV for this MDS
1633 lovconfig_uuid = mds.get_first_ref('lovconfig')
1634 if not lovconfig_uuid:
1635 if not self.lmv_uuid:
1636 panic("No LOV found for lovconfig ", lovconfig.name)
1639 panic("No LMV initialized and not lovconfig_uuid found")
1641 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1642 lovconfig = self.lmv.lookup(lovconfig_uuid)
1643 lov_uuid = lovconfig.get_first_ref('lov')
1645 panic("No LOV found for lovconfig ", lovconfig.name)
1647 lovconfig = mds.lookup(lovconfig_uuid)
1648 lov_uuid = lovconfig.get_first_ref('lov')
1650 panic("No LOV found for lovconfig ", lovconfig.name)
1653 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1654 lovconfig = self.lmv.lookup(lovconfig_uuid)
1655 lov_uuid = lovconfig.get_first_ref('lov')
1657 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, 'FS_name', config_only = 1)
1659 # default stripe count controls default inode_size
1660 if (lov.stripe_cnt > 0):
1661 stripe_count = lov.stripe_cnt
1663 stripe_count = len(lov.devlist)
1664 if stripe_count > 77:
1665 self.inode_size = 4096
1666 elif stripe_count > 35:
1667 self.inode_size = 2048
1668 elif stripe_count > 13:
1669 self.inode_size = 1024
1670 elif stripe_count > 3:
1671 self.inode_size = 512
1673 self.inode_size = 256
1675 self.target_dev_uuid = self.uuid
1676 self.uuid = target_uuid
1679 client_uuid = generate_client_uuid(self.name)
1680 client_uuid = self.name + "_lmv_" + "UUID"
1681 self.master = LMV(self.db.lookup(self.lmv_uuid), client_uuid, self.name, self.name)
1682 self.master_mds = self.master.name
1685 self.add_lustre_module('mdc', 'mdc')
1686 self.add_lustre_module('osc', 'osc')
1687 self.add_lustre_module('lov', 'lov')
1688 self.add_lustre_module('lmv', 'lmv')
1689 self.add_lustre_module('ost', 'ost')
1690 self.add_lustre_module('mds', 'mds')
1692 if self.fstype == 'smfs':
1693 self.add_lustre_module('smfs', 'smfs')
1695 if self.fstype == 'ldiskfs':
1696 self.add_lustre_module('ldiskfs', 'ldiskfs')
1699 self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
1701 # if fstype is smfs, then we should also take care about backing
1703 if self.fstype == 'smfs':
1704 self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
1706 for options in string.split(self.mountfsoptions, ','):
1707 if options == 'snap':
1708 if not self.fstype == 'smfs':
1709 panic("mountoptions with snap, but fstype is not smfs\n")
1710 self.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
1711 self.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
1712 def load_module(self):
1714 Module.load_module(self)
1717 if not config.record and is_prepared(self.name):
1720 debug(self.uuid, "not active")
1723 # run write_conf automatically, if --reformat used
1725 self.info(self.devpath, self.fstype, self.size, self.format)
1729 self.master.prepare()
1730 # never reformat here
1731 blkdev = block_dev(self.devpath, self.size, self.fstype, 0,
1732 self.format, self.journal_size, self.inode_size,
1733 self.mkfsoptions, self.backfstype, self.backdevpath)
1735 if not is_prepared('MDT'):
1736 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
1738 mountfsoptions = def_mount_options(self.fstype, 'mds')
1740 if config.mountfsoptions:
1742 mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
1744 mountfsoptions = config.mountfsoptions
1745 if self.mountfsoptions:
1746 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1748 if self.mountfsoptions:
1750 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1752 mountfsoptions = self.mountfsoptions
1754 if self.fstype == 'smfs':
1755 realdev = self.fstype
1758 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
1762 mountfsoptions = "type=%s,dev=%s" % (self.backfstype,
1767 print 'MDS mount options: ' + mountfsoptions
1769 if not self.master_mds:
1770 self.master_mds = 'dumb'
1771 if not self.cachetype:
1772 self.cachetype = 'dumb'
1773 lctl.newdev("mds", self.name, self.uuid,
1774 setup ="%s %s %s %s %s %s" %(realdev, self.fstype,
1775 self.name, mountfsoptions,
1776 self.master_mds, self.cachetype))
1778 if development_mode():
1779 procentry = "/proc/fs/lustre/mds/grp_hash_upcall"
1780 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/l_getgroups")
1781 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
1782 print "MDS Warning: failed to set group-hash upcall"
1784 run("echo ", upcall, " > ", procentry)
1786 except CommandError, e:
1788 panic("MDS is missing the config log. Need to run " +
1789 "lconf --write_conf.")
1793 if config.root_squash == None:
1794 config.root_squash = self.root_squash
1795 if config.no_root_squash == None:
1796 config.no_root_squash = self.no_root_squash
1797 if config.root_squash:
1798 if config.no_root_squash:
1799 nsnid = config.no_root_squash
1802 lctl.root_squash(self.name, config.root_squash, nsnid)
1804 def write_conf(self):
1806 if not is_prepared(self.name):
1807 self.info(self.devpath, self.fstype, self.format)
1809 blkdev = block_dev(self.devpath, self.size, self.fstype,
1810 config.reformat, self.format, self.journal_size,
1811 self.inode_size, self.mkfsoptions,
1812 self.backfstype, self.backdevpath)
1814 # Even for writing logs we mount mds with supplied mount options
1815 # because it will not mount smfs (if used) otherwise.
1817 mountfsoptions = def_mount_options(self.fstype, 'mds')
1819 if config.mountfsoptions:
1821 mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
1823 mountfsoptions = config.mountfsoptions
1824 if self.mountfsoptions:
1825 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1827 if self.mountfsoptions:
1829 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
1831 mountfsoptions = self.mountfsoptions
1833 if self.fstype == 'smfs':
1834 realdev = self.fstype
1837 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
1841 mountfsoptions = "type=%s,dev=%s" % (self.backfstype,
1846 print 'MDS mount options: ' + mountfsoptions
1848 # As mount options are passed by 4th param to config tool, we need
1849 # to pass something in 3rd param. But we do not want this 3rd param
1850 # be counted as a profile name for reading log on MDS setup, thus,
1851 # we pass there some predefined sign like 'dumb', which will be
1852 # checked in MDS code and skipped. Probably there is more nice way
1853 # like pass empty string and check it in config tool and pass null
1855 lctl.newdev("mds", self.name, self.uuid,
1856 setup ="%s %s %s %s" %(realdev, self.fstype,
1857 'dumb', mountfsoptions))
1860 # record logs for the MDS lov
1861 for uuid in self.filesystem_uuids:
1862 log("recording clients for filesystem:", uuid)
1863 fs = self.db.lookup(uuid)
1865 # this is ugly, should be organized nice later.
1866 target_uuid = self.db.get_first_ref('target')
1867 mds = self.db.lookup(target_uuid)
1869 lovconfig_uuid = mds.get_first_ref('lovconfig')
1871 lovconfig = mds.lookup(lovconfig_uuid)
1872 obd_uuid = lovconfig.get_first_ref('lov')
1874 obd_uuid = fs.get_first_ref('obd')
1876 client_uuid = generate_client_uuid(self.name)
1877 client = VOSC(self.db.lookup(obd_uuid), client_uuid, self.name,
1880 lctl.clear_log(self.name, self.name)
1881 lctl.record(self.name, self.name)
1883 lctl.mount_option(self.name, client.get_name(), "")
1885 process_updates(self.db, self.name, self.name, client)
1888 lctl.clear_log(self.name, self.name + '-clean')
1889 lctl.record(self.name, self.name + '-clean')
1891 lctl.del_mount_option(self.name)
1893 process_updates(self.db, self.name, self.name + '-clean', client)
1897 # record logs for each client
1903 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
1905 config_options = CONFIG_FILE
1907 for node_db in self.db.lookup_class('node'):
1908 client_name = node_db.getName()
1909 for prof_uuid in node_db.get_refs('profile'):
1910 prof_db = node_db.lookup(prof_uuid)
1911 # refactor this into a funtion to test "clientness"
1913 for ref_class, ref_uuid in prof_db.get_all_refs():
1914 if ref_class in ('mountpoint','echoclient'):
1915 debug("recording", client_name)
1916 old_noexec = config.noexec
1918 ret, out = run (sys.argv[0], noexec_opt,
1919 " -v --record --nomod",
1920 "--record_log", client_name,
1921 "--record_device", self.name,
1922 "--node", client_name,
1925 for s in out: log("record> ", string.strip(s))
1926 ret, out = run (sys.argv[0], noexec_opt,
1927 "--cleanup -v --record --nomod",
1928 "--record_log", client_name + "-clean",
1929 "--record_device", self.name,
1930 "--node", client_name,
1933 for s in out: log("record> ", string.strip(s))
1934 config.noexec = old_noexec
1937 lctl.cleanup(self.name, self.uuid, 0, 0)
1938 except CommandError, e:
1939 log(self.module_name, "cleanup failed: ", self.name)
1942 Module.cleanup(self)
1944 if self.fstype == 'smfs':
1945 clean_loop(self.backdevpath)
1947 clean_loop(self.devpath)
1949 def msd_remaining(self):
1950 out = lctl.device_list()
1952 if string.split(s)[2] in ('mds',):
1955 def safe_to_clean(self):
1958 def safe_to_clean_modules(self):
1959 return not self.msd_remaining()
1963 debug(self.uuid, "not active")
1966 if is_prepared(self.name):
1968 lctl.cleanup(self.name, self.uuid, config.force,
1970 except CommandError, e:
1971 log(self.module_name, "cleanup failed: ", self.name)
1974 Module.cleanup(self)
1977 self.master.cleanup()
1978 if not self.msd_remaining() and is_prepared('MDT'):
1980 lctl.cleanup("MDT", "MDT_UUID", config.force,
1982 except CommandError, e:
1983 print "cleanup failed: ", self.name
1987 if self.fstype == 'smfs':
1988 clean_loop(self.backdevpath)
1990 clean_loop(self.devpath)
1992 def correct_level(self, level, op=None):
1993 #if self.master_mds:
1998 def __init__(self, db):
1999 Module.__init__(self, 'OSD', db)
2000 self.osdtype = self.db.get_val('osdtype')
2001 self.devpath = self.db.get_val('devpath', '')
2002 self.backdevpath = self.db.get_val('backdevpath', '')
2003 self.size = self.db.get_val_int('devsize', 0)
2004 self.journal_size = self.db.get_val_int('journalsize', 0)
2005 self.inode_size = self.db.get_val_int('inodesize', 0)
2006 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2007 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2008 self.fstype = self.db.get_val('fstype', '')
2009 self.backfstype = self.db.get_val('backfstype', '')
2010 self.nspath = self.db.get_val('nspath', '')
2011 target_uuid = self.db.get_first_ref('target')
2012 ost = self.db.lookup(target_uuid)
2013 self.name = ost.getName()
2014 self.format = self.db.get_val('autoformat', 'yes')
2015 if ost.get_val('failover', 0):
2016 self.failover_ost = 'f'
2018 self.failover_ost = 'n'
2020 active_uuid = get_active_target(ost)
2022 panic("No target device found:", target_uuid)
2023 if active_uuid == self.uuid:
2027 if self.active and config.group and config.group != ost.get_val('group'):
2030 self.target_dev_uuid = self.uuid
2031 self.uuid = target_uuid
2033 self.add_lustre_module('ost', 'ost')
2034 if self.fstype == 'smfs':
2035 self.add_lustre_module('smfs', 'smfs')
2036 # FIXME: should we default to ext3 here?
2037 if self.fstype == 'ldiskfs':
2038 self.add_lustre_module('ldiskfs', 'ldiskfs')
2040 self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2041 if self.fstype == 'smfs':
2042 self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2044 for options in self.mountfsoptions:
2045 if options == 'snap':
2046 if not self.fstype == 'smfs':
2047 panic("mountoptions with snap, but fstype is not smfs\n")
2048 self.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2049 self.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2051 self.add_lustre_module(self.osdtype, self.osdtype)
2053 def load_module(self):
2055 Module.load_module(self)
2057 # need to check /proc/mounts and /etc/mtab before
2058 # formatting anything.
2059 # FIXME: check if device is already formatted.
2061 if is_prepared(self.name):
2064 debug(self.uuid, "not active")
2066 self.info(self.osdtype, self.devpath, self.size, self.fstype,
2067 self.format, self.journal_size, self.inode_size)
2069 if self.osdtype == 'obdecho':
2072 blkdev = block_dev(self.devpath, self.size, self.fstype,
2073 config.reformat, self.format, self.journal_size,
2074 self.inode_size, self.mkfsoptions, self.backfstype,
2077 mountfsoptions = def_mount_options(self.fstype, 'ost')
2079 if config.mountfsoptions:
2081 mountfsoptions = mountfsoptions + ',' + config.mountfsoptions
2083 mountfsoptions = config.mountfsoptions
2084 if self.mountfsoptions:
2085 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
2087 if self.mountfsoptions:
2089 mountfsoptions = mountfsoptions + ',' + self.mountfsoptions
2091 mountfsoptions = self.mountfsoptions
2093 if self.fstype == 'smfs':
2094 realdev = self.fstype
2097 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
2101 mountfsoptions = "type=%s,dev=%s" % (self.backfstype,
2106 print 'OSD mount options: ' + mountfsoptions
2108 lctl.newdev(self.osdtype, self.name, self.uuid,
2109 setup ="%s %s %s %s" %(realdev, self.fstype,
2112 if not is_prepared('OSS'):
2113 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
2115 def osd_remaining(self):
2116 out = lctl.device_list()
2118 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2121 def safe_to_clean(self):
2124 def safe_to_clean_modules(self):
2125 return not self.osd_remaining()
2129 debug(self.uuid, "not active")
2131 if is_prepared(self.name):
2134 lctl.cleanup(self.name, self.uuid, config.force,
2136 except CommandError, e:
2137 log(self.module_name, "cleanup failed: ", self.name)
2140 if not self.osd_remaining() and is_prepared('OSS'):
2142 lctl.cleanup("OSS", "OSS_UUID", config.force,
2144 except CommandError, e:
2145 print "cleanup failed: ", self.name
2148 if not self.osdtype == 'obdecho':
2149 if self.fstype == 'smfs':
2150 clean_loop(self.backdevpath)
2152 clean_loop(self.devpath)
2154 def correct_level(self, level, op=None):
2157 # Generic client module, used by OSC and MDC
2158 class Client(Module):
2159 def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
2161 self.target_name = tgtdb.getName()
2162 self.target_uuid = tgtdb.getUUID()
2165 self.backup_targets = []
2167 self.tgt_dev_uuid = get_active_target(tgtdb)
2168 if not self.tgt_dev_uuid:
2169 panic("No target device found for target(1):", self.target_name)
2171 self.kmod = kmod(config.lustre, config.portals)
2175 self.module = module
2176 self.module_name = string.upper(module)
2178 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2179 self.target_name, fs_name)
2181 self.name = self_name
2183 self.lookup_server(self.tgt_dev_uuid)
2185 self.lookup_backup_targets()
2186 self.fs_name = fs_name
2189 self.add_lustre_module(module_dir, module)
2191 def lookup_server(self, srv_uuid):
2192 """ Lookup a server's network information """
2193 self._server_nets = get_ost_net(self.db, srv_uuid)
2194 if len(self._server_nets) == 0:
2195 panic ("Unable to find a server for:", srv_uuid)
2198 def get_servers(self):
2199 return self._server_nets
2200 def lookup_backup_targets(self):
2201 """ Lookup alternative network information """
2202 prof_list = toplustreDB.get_refs('profile')
2203 for prof_uuid in prof_list:
2204 prof_db = toplustreDB.lookup(prof_uuid)
2206 panic("profile:", prof_uuid, "not found.")
2207 for ref_class, ref_uuid in prof_db.get_all_refs():
2208 if ref_class in ('osd', 'mdsdev'):
2209 devdb = toplustreDB.lookup(ref_uuid)
2210 uuid = devdb.get_first_ref('target')
2211 if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
2212 self.backup_targets.append(ref_uuid)
2214 def prepare(self, ignore_connect_failure = 0):
2215 self.info(self.target_uuid)
2216 if not config.record and is_prepared(self.name):
2219 srv = choose_local_server(self.get_servers())
2223 routes = find_route(self.get_servers())
2224 if len(routes) == 0:
2225 panic ("no route to", self.target_uuid)
2226 for (srv, r) in routes:
2227 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2228 except CommandError, e:
2229 if not ignore_connect_failure:
2232 if self.permits_inactive() and (self.target_uuid in config.inactive or self.active == 0):
2233 debug("%s inactive" % self.target_uuid)
2234 inactive_p = "inactive"
2236 debug("%s active" % self.target_uuid)
2238 lctl.newdev(self.module, self.name, self.uuid,
2239 setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
2241 for tgt_dev_uuid in self.backup_targets:
2242 this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
2243 if len(this_nets) == 0:
2244 panic ("Unable to find a server for:", tgt_dev_uuid)
2245 srv = choose_local_server(this_nets)
2249 routes = find_route(this_nets);
2250 if len(routes) == 0:
2251 panic("no route to", tgt_dev_uuid)
2252 for (srv, r) in routes:
2253 lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2255 lctl.add_conn(self.name, srv.nid_uuid);
2258 if is_prepared(self.name):
2259 Module.cleanup(self)
2261 srv = choose_local_server(self.get_servers())
2263 lctl.disconnect(srv)
2265 for (srv, r) in find_route(self.get_servers()):
2266 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2267 except CommandError, e:
2268 log(self.module_name, "cleanup failed: ", self.name)
2272 for tgt_dev_uuid in self.backup_targets:
2273 this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
2274 srv = choose_local_server(this_net)
2276 lctl.disconnect(srv)
2278 for (srv, r) in find_route(this_net):
2279 lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2282 def correct_level(self, level, op=None):
2285 def deactivate(self):
2287 lctl.deactivate(self.name)
2288 except CommandError, e:
2289 log(self.module_name, "deactivate failed: ", self.name)
2294 def __init__(self, db, uuid, fs_name):
2295 Client.__init__(self, db, uuid, 'mdc', fs_name)
2297 def permits_inactive(self):
2301 def __init__(self, db, uuid, fs_name):
2302 Client.__init__(self, db, uuid, 'osc', fs_name)
2304 def permits_inactive(self):
2308 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
2309 Module.__init__(self, 'VLOV', db)
2310 if name_override != None:
2311 self.name = "lov_%s" % name_override
2312 self.add_lustre_module('lov', 'lov')
2313 self.stripe_sz = 65536
2317 self.desc_uuid = self.uuid
2318 self.uuid = generate_client_uuid(self.name)
2319 self.fs_name = fs_name
2320 self.osc = get_osc(db, self.uuid, fs_name)
2322 panic('osc not found:', self.uuid)
2324 self.config_only = 1
2326 self.config_only = None
2332 if not config.record and is_prepared(self.name):
2334 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
2335 self.stripe_sz, self.stripe_off, self.pattern)
2336 target_uuid = self.osc.target_uuid
2339 self.osc.prepare(ignore_connect_failure=0)
2340 except CommandError, e:
2341 print "Error preparing OSC %s\n" % osc.uuid
2343 lctl.lov_add_obd(self.name, self.uuid, target_uuid, 0, 1)
2346 target_uuid = self.osc.target_uuid
2348 if is_prepared(self.name):
2349 Module.cleanup(self)
2350 if self.config_only:
2351 panic("Can't clean up config_only LOV ", self.name)
2353 def load_module(self):
2354 if self.config_only:
2355 panic("Can't load modules for config_only LOV ", self.name)
2356 self.osc.load_module()
2357 Module.load_module(self)
2359 def cleanup_module(self):
2360 if self.config_only:
2361 panic("Can't cleanup modules for config_only LOV ", self.name)
2362 Module.cleanup_module(self)
2363 self.osc.cleanup_module()
2365 def correct_level(self, level, op=None):
2368 class CMOBD(Module):
2369 def __init__(self,db):
2370 Module.__init__(self, 'CMOBD', db)
2371 self.name = self.db.getName();
2372 self.uuid = generate_client_uuid(self.name)
2373 self.master_uuid = self.db.get_first_ref('masterobd')
2374 self.cache_uuid = self.db.get_first_ref('cacheobd')
2375 self.add_lustre_module('cmobd', 'cmobd')
2376 master_obd = self.db.lookup(self.master_uuid)
2378 panic('master obd not found:', self.master_uuid)
2379 cache_obd = self.db.lookup(self.cache_uuid)
2381 panic('cache obd not found:', self.cache_uuid)
2383 if master_obd.get_class() == 'ost':
2384 self.client_uuid = generate_client_uuid(self.name)
2385 self.master= VLOV(master_obd, self.client_uuid, self.name,
2386 "%s_master" % (self.name))
2387 self.master_uuid = self.master.get_uuid()
2389 self.master = get_mdc(db, self.name, self.master_uuid)
2390 # need to check /proc/mounts and /etc/mtab before
2391 # formatting anything.
2392 # FIXME: check if device is already formatted.
2394 self.master.prepare()
2395 if not config.record and is_prepared(self.name):
2397 self.info(self.master_uuid, self.cache_uuid)
2398 lctl.newdev("cmobd", self.name, self.uuid,
2399 setup ="%s %s" %(self.master_uuid,
2403 if is_prepared(self.name):
2404 Module.cleanup(self)
2405 self.master.cleanup()
2407 def load_module(self):
2408 self.master.load_module()
2409 Module.load_module(self)
2411 def cleanup_module(self):
2412 Module.cleanup_module(self)
2413 self.master.cleanup_module()
2415 def correct_level(self, level, op=None):
2419 def __init__(self, db, uuid, name, type, name_override = None):
2420 Module.__init__(self, 'COBD', db)
2421 self.name = self.db.getName();
2422 self.uuid = generate_client_uuid(self.name)
2423 self.real_uuid = self.db.get_first_ref('realobd')
2424 self.cache_uuid = self.db.get_first_ref('cacheobd')
2425 self.add_lustre_module('cobd', 'cobd')
2426 real_obd = self.db.lookup(self.real_uuid)
2428 panic('real obd not found:', self.real_uuid)
2429 cache_obd = self.db.lookup(self.cache_uuid)
2431 panic('cache obd not found:', self.cache_uuid)
2433 self.real = LOV(real_obd, self.real_uuid, name,
2434 "%s_real" % (self.name));
2435 self.cache = LOV(cache_obd, self.cache_uuid, name,
2436 "%s_cache" % (self.name));
2438 self.real = get_mdc(db, name, self.real_uuid)
2439 self.cache = get_mdc(db, name, self.cache_uuid)
2440 # need to check /proc/mounts and /etc/mtab before
2441 # formatting anything.
2442 # FIXME: check if device is already formatted.
2447 def get_real_name(self):
2448 return self.real.name
2449 def get_cache_name(self):
2450 return self.cache.name
2453 self.cache.prepare()
2454 if not config.record and is_prepared(self.name):
2456 self.info(self.real_uuid, self.cache_uuid)
2457 lctl.newdev("cobd", self.name, self.uuid,
2458 setup ="%s %s" %(self.real.name,
2462 if is_prepared(self.name):
2463 Module.cleanup(self)
2465 self.cache.cleanup()
2467 def load_module(self):
2468 self.real.load_module()
2469 Module.load_module(self)
2471 def cleanup_module(self):
2472 Module.cleanup_module(self)
2473 self.real.cleanup_module()
2475 # virtual interface for OSC and LOV
2477 def __init__(self, db, client_uuid, name, name_override = None):
2478 Module.__init__(self, 'VOSC', db)
2479 if db.get_class() == 'lov':
2480 self.osc = LOV(db, client_uuid, name, name_override)
2482 elif db.get_class() == 'cobd':
2483 self.osc = COBD(db, client_uuid, name, 'obd')
2486 self.osc = OSC(db, client_uuid, name)
2489 return self.osc.get_uuid()
2491 return self.osc.get_name()
2496 def load_module(self):
2497 self.osc.load_module()
2498 def cleanup_module(self):
2499 self.osc.cleanup_module()
2500 def correct_level(self, level, op=None):
2501 return self.osc.correct_level(level, op)
2503 # virtual interface for MDC and LMV
2505 def __init__(self, db, client_uuid, name, name_override = None):
2506 Module.__init__(self, 'VMDC', db)
2507 if db.get_class() == 'lmv':
2508 self.mdc = LMV(db, client_uuid, name)
2509 elif db.get_class() == 'cobd':
2510 self.mdc = COBD(db, client_uuid, name, 'mds')
2512 self.mdc = MDC(db, client_uuid, name)
2514 return self.mdc.uuid
2516 return self.mdc.name
2521 def load_module(self):
2522 self.mdc.load_module()
2523 def cleanup_module(self):
2524 self.mdc.cleanup_module()
2525 def correct_level(self, level, op=None):
2526 return self.mdc.correct_level(level, op)
2528 class ECHO_CLIENT(Module):
2529 def __init__(self,db):
2530 Module.__init__(self, 'ECHO_CLIENT', db)
2531 self.add_lustre_module('obdecho', 'obdecho')
2532 self.obd_uuid = self.db.get_first_ref('obd')
2533 obd = self.db.lookup(self.obd_uuid)
2534 self.uuid = generate_client_uuid(self.name)
2535 self.osc = VOSC(obd, self.uuid, self.name)
2538 if not config.record and is_prepared(self.name):
2541 self.osc.prepare() # XXX This is so cheating. -p
2542 self.info(self.obd_uuid)
2544 lctl.newdev("echo_client", self.name, self.uuid,
2545 setup = self.osc.get_name())
2548 if is_prepared(self.name):
2549 Module.cleanup(self)
2552 def load_module(self):
2553 self.osc.load_module()
2554 Module.load_module(self)
2556 def cleanup_module(self):
2557 Module.cleanup_module(self)
2558 self.osc.cleanup_module()
2560 def correct_level(self, level, op=None):
2563 def generate_client_uuid(name):
2564 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2566 int(random.random() * 1048576),
2567 int(random.random() * 1048576))
2568 return client_uuid[:36]
2570 def my_rstrip(s, chars):
2571 """my_rstrip(s, chars) -> strips any instances of the characters
2572 found in chars from the right side of string s"""
2573 # XXX required because python versions pre 2.2.3 don't allow
2574 #string.rstrip() to take alternate char lists
2578 ns = string.rstrip(s, '/')
2579 except TypeError, e:
2580 for i in range(len(s) - 1, 0, -1):
2588 class Mountpoint(Module):
2589 def __init__(self,db):
2590 Module.__init__(self, 'MTPT', db)
2591 self.path = self.db.get_val('path')
2592 self.clientoptions = self.db.get_val('clientoptions', '')
2593 self.fs_uuid = self.db.get_first_ref('filesystem')
2594 fs = self.db.lookup(self.fs_uuid)
2595 self.mds_uuid = fs.get_first_ref('lmv')
2596 if not self.mds_uuid:
2597 self.mds_uuid = fs.get_first_ref('mds')
2598 self.obd_uuid = fs.get_first_ref('obd')
2599 client_uuid = generate_client_uuid(self.name)
2601 ost = self.db.lookup(self.obd_uuid)
2603 panic("no ost: ", self.obd_uuid)
2605 mds = self.db.lookup(self.mds_uuid)
2607 panic("no mds: ", self.mds_uuid)
2609 self.add_lustre_module('mdc', 'mdc')
2610 self.add_lustre_module('lmv', 'lmv')
2611 self.add_lustre_module('llite', 'llite')
2613 self.vosc = VOSC(ost, client_uuid, self.name)
2614 self.vmdc = VMDC(mds, client_uuid, self.name)
2617 if not config.record and fs_is_mounted(self.path):
2618 log(self.path, "already mounted.")
2623 vmdc_name = self.vmdc.get_name()
2625 self.info(self.path, self.mds_uuid, self.obd_uuid)
2626 if config.record or config.lctl_dump:
2627 lctl.mount_option(local_node_name, self.vosc.get_name(), vmdc_name)
2630 if config.clientoptions:
2631 if self.clientoptions:
2632 self.clientoptions = self.clientoptions + ',' + \
2633 config.clientoptions
2635 self.clientoptions = config.clientoptions
2636 if self.clientoptions:
2637 self.clientoptions = ',' + self.clientoptions
2638 # Linux kernel will deal with async and not pass it to ll_fill_super,
2639 # so replace it with Lustre async
2640 self.clientoptions = string.replace(self.clientoptions, "async",
2643 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s%s %s %s" % \
2644 (self.vosc.get_name(), vmdc_name, self.clientoptions,
2645 config.config, self.path)
2646 run("mkdir", self.path)
2651 panic("mount failed:", self.path, ":", string.join(val))
2654 self.info(self.path, self.mds_uuid,self.obd_uuid)
2656 if config.record or config.lctl_dump:
2657 lctl.del_mount_option(local_node_name)
2659 if fs_is_mounted(self.path):
2661 (rc, out) = run("umount", "-f", self.path)
2663 (rc, out) = run("umount", self.path)
2665 raise CommandError('umount', out, rc)
2667 if fs_is_mounted(self.path):
2668 panic("fs is still mounted:", self.path)
2673 def load_module(self):
2674 self.vosc.load_module()
2675 Module.load_module(self)
2677 def cleanup_module(self):
2678 Module.cleanup_module(self)
2679 self.vosc.cleanup_module()
2681 def correct_level(self, level, op=None):
2684 # ============================================================
2685 # misc query functions
2687 def get_ost_net(self, osd_uuid):
2691 osd = self.lookup(osd_uuid)
2692 node_uuid = osd.get_first_ref('node')
2693 node = self.lookup(node_uuid)
2695 panic("unable to find node for osd_uuid:", osd_uuid,
2696 " node_ref:", node_uuid_)
2697 for net_uuid in node.get_networks():
2698 db = node.lookup(net_uuid)
2699 srv_list.append(Network(db))
2703 # the order of iniitailization is based on level.
2704 def getServiceLevel(self):
2705 type = self.get_class()
2707 if type in ('network',):
2709 elif type in ('routetbl',):
2711 elif type in ('ldlm',):
2713 elif type in ('osd', 'cobd'):
2715 elif type in ('mdsdev',):
2717 elif type in ('lmv',):
2719 elif type in ('cmobd',):
2721 elif type in ('mountpoint', 'echoclient'):
2724 panic("Unknown type: ", type)
2726 if ret < config.minlevel or ret > config.maxlevel:
2731 # return list of services in a profile. list is a list of tuples
2732 # [(level, db_object),]
2733 def getServices(self):
2735 for ref_class, ref_uuid in self.get_all_refs():
2736 servdb = self.lookup(ref_uuid)
2738 level = getServiceLevel(servdb)
2740 list.append((level, servdb))
2742 panic('service not found: ' + ref_uuid)
2748 ############################################################
2750 # FIXME: clean this mess up!
2752 # OSC is no longer in the xml, so we have to fake it.
2753 # this is getting ugly and begging for another refactoring
2754 def get_osc(ost_db, uuid, fs_name):
2755 osc = OSC(ost_db, uuid, fs_name)
2758 def get_mdc(db, fs_name, mds_uuid):
2759 mds_db = db.lookup(mds_uuid);
2761 error("no mds:", mds_uuid)
2762 mdc = MDC(mds_db, mds_uuid, fs_name)
2765 ############################################################
2766 # routing ("rooting")
2768 # list of (nettype, cluster_id, nid)
2771 def find_local_clusters(node_db):
2772 global local_clusters
2773 for netuuid in node_db.get_networks():
2774 net = node_db.lookup(netuuid)
2776 debug("add_local", netuuid)
2777 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2779 if acceptors.has_key(srv.port):
2780 panic("duplicate port:", srv.port)
2781 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
2783 # This node is a gateway.
2785 def node_is_router():
2788 # If there are any routers found in the config, then this will be true
2789 # and all nodes will load kptlrouter.
2791 def node_needs_router():
2792 return needs_router or is_router
2794 # list of (nettype, gw, tgt_cluster_id, lo, hi)
2795 # Currently, these local routes are only added to kptlrouter route
2796 # table if they are needed to connect to a specific server. This
2797 # should be changed so all available routes are loaded, and the
2798 # ptlrouter can make all the decisions.
2801 def find_local_routes(lustre):
2802 """ Scan the lustre config looking for routers . Build list of
2804 global local_routes, needs_router
2806 list = lustre.lookup_class('node')
2808 if router.get_val_int('router', 0):
2810 for (local_type, local_cluster_id, local_nid) in local_clusters:
2812 for netuuid in router.get_networks():
2813 db = router.lookup(netuuid)
2814 if (local_type == db.get_val('nettype') and
2815 local_cluster_id == db.get_val('clusterid')):
2816 gw = db.get_val('nid')
2819 debug("find_local_routes: gw is", gw)
2820 for route in router.get_local_routes(local_type, gw):
2821 local_routes.append(route)
2822 debug("find_local_routes:", local_routes)
2825 def choose_local_server(srv_list):
2826 for srv in srv_list:
2827 if local_cluster(srv.net_type, srv.cluster_id):
2830 def local_cluster(net_type, cluster_id):
2831 for cluster in local_clusters:
2832 if net_type == cluster[0] and cluster_id == cluster[1]:
2836 def local_interface(net_type, cluster_id, nid):
2837 for cluster in local_clusters:
2838 if (net_type == cluster[0] and cluster_id == cluster[1]
2839 and nid == cluster[2]):
2843 def find_route(srv_list):
2845 frm_type = local_clusters[0][0]
2846 for srv in srv_list:
2847 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
2848 to_type = srv.net_type
2850 cluster_id = srv.cluster_id
2851 debug ('looking for route to', to_type, to)
2852 for r in local_routes:
2853 debug("find_route: ", r)
2854 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
2855 result.append((srv, r))
2858 def get_active_target(db):
2859 target_uuid = db.getUUID()
2860 target_name = db.getName()
2861 node_name = get_select(target_name)
2863 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
2865 tgt_dev_uuid = db.get_first_ref('active')
2868 def get_server_by_nid_uuid(db, nid_uuid):
2869 for n in db.lookup_class("network"):
2871 if net.nid_uuid == nid_uuid:
2875 ############################################################
2879 type = db.get_class()
2880 debug('Service:', type, db.getName(), db.getUUID())
2885 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2886 elif type == 'network':
2888 elif type == 'routetbl':
2892 elif type == 'cobd':
2893 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2894 elif type == 'cmobd':
2896 elif type == 'mdsdev':
2898 elif type == 'mountpoint':
2900 elif type == 'echoclient':
2905 panic ("unknown service type:", type)
2909 # Prepare the system to run lustre using a particular profile
2910 # in a the configuration.
2911 # * load & the modules
2912 # * setup networking for the current node
2913 # * make sure partitions are in place and prepared
2914 # * initialize devices with lctl
2915 # Levels is important, and needs to be enforced.
2916 def for_each_profile(db, prof_list, operation):
2917 for prof_uuid in prof_list:
2918 prof_db = db.lookup(prof_uuid)
2920 panic("profile:", prof_uuid, "not found.")
2921 services = getServices(prof_db)
2924 def magic_get_osc(db, rec, lov):
2926 lov_uuid = lov.get_uuid()
2927 lov_name = lov.osc.fs_name
2929 lov_uuid = rec.getAttribute('lov_uuidref')
2930 # FIXME: better way to find the mountpoint?
2931 filesystems = db.root_node.getElementsByTagName('filesystem')
2933 for fs in filesystems:
2934 ref = fs.getElementsByTagName('obd_ref')
2935 if ref[0].getAttribute('uuidref') == lov_uuid:
2936 fsuuid = fs.getAttribute('uuid')
2940 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
2942 mtpts = db.root_node.getElementsByTagName('mountpoint')
2945 ref = fs.getElementsByTagName('filesystem_ref')
2946 if ref[0].getAttribute('uuidref') == fsuuid:
2947 lov_name = fs.getAttribute('name')
2951 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
2953 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
2955 ost_uuid = rec.getAttribute('ost_uuidref')
2956 obd = db.lookup(ost_uuid)
2959 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
2961 osc = get_osc(obd, lov_uuid, lov_name)
2963 panic('osc not found:', obd_uuid)
2966 # write logs for update records. sadly, logs of all types -- and updates in
2967 # particular -- are something of an afterthought. lconf needs rewritten with
2968 # these as core concepts. so this is a pretty big hack.
2969 def process_update_record(db, update, lov):
2970 for rec in update.childNodes:
2971 if rec.nodeType != rec.ELEMENT_NODE:
2974 log("found "+rec.nodeName+" record in update version " +
2975 str(update.getAttribute('version')))
2977 lov_uuid = rec.getAttribute('lov_uuidref')
2978 ost_uuid = rec.getAttribute('ost_uuidref')
2979 index = rec.getAttribute('index')
2980 gen = rec.getAttribute('generation')
2982 if not lov_uuid or not ost_uuid or not index or not gen:
2983 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
2986 tmplov = db.lookup(lov_uuid)
2988 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
2989 lov_name = tmplov.getName()
2991 lov_name = lov.osc.name
2993 # ------------------------------------------------------------- add
2994 if rec.nodeName == 'add':
2996 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
2999 osc = magic_get_osc(db, rec, lov)
3002 # Only ignore connect failures with --force, which
3003 # isn't implemented here yet.
3004 osc.prepare(ignore_connect_failure=0)
3005 except CommandError, e:
3006 print "Error preparing OSC %s\n" % osc.uuid
3009 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3011 # ------------------------------------------------------ deactivate
3012 elif rec.nodeName == 'deactivate':
3016 osc = magic_get_osc(db, rec, lov)
3020 except CommandError, e:
3021 print "Error deactivating OSC %s\n" % osc.uuid
3024 # ---------------------------------------------------------- delete
3025 elif rec.nodeName == 'delete':
3029 osc = magic_get_osc(db, rec, lov)
3035 except CommandError, e:
3036 print "Error cleaning up OSC %s\n" % osc.uuid
3039 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3041 def process_updates(db, log_device, log_name, lov = None):
3042 updates = db.root_node.getElementsByTagName('update')
3044 if not u.childNodes:
3045 log("ignoring empty update record (version " +
3046 str(u.getAttribute('version')) + ")")
3049 version = u.getAttribute('version')
3050 real_name = "%s-%s" % (log_name, version)
3051 lctl.clear_log(log_device, real_name)
3052 lctl.record(log_device, real_name)
3054 process_update_record(db, u, lov)
3058 def doWriteconf(services):
3062 if s[1].get_class() == 'mdsdev':
3063 n = newService(s[1])
3066 def doSetup(services):
3071 n = newService(s[1])
3073 slist.append((n.level, n))
3076 nl = n[1].correct_level(n[0])
3077 nlist.append((nl, n[1]))
3082 def doModules(services):
3086 n = newService(s[1])
3089 def doCleanup(services):
3094 n = newService(s[1])
3096 slist.append((n.level, n))
3099 nl = n[1].correct_level(n[0])
3100 nlist.append((nl, n[1]))
3104 if n[1].safe_to_clean():
3107 def doUnloadModules(services):
3112 n = newService(s[1])
3113 if n.safe_to_clean_modules():
3118 def doHost(lustreDB, hosts):
3119 global is_router, local_node_name
3122 node_db = lustreDB.lookup_name(h, 'node')
3126 panic('No host entry found.')
3128 local_node_name = node_db.get_val('name', 0)
3129 is_router = node_db.get_val_int('router', 0)
3130 lustre_upcall = node_db.get_val('lustreUpcall', '')
3131 portals_upcall = node_db.get_val('portalsUpcall', '')
3132 timeout = node_db.get_val_int('timeout', 0)
3133 ptldebug = node_db.get_val('ptldebug', '')
3134 subsystem = node_db.get_val('subsystem', '')
3136 find_local_clusters(node_db)
3138 find_local_routes(lustreDB)
3140 # Two step process: (1) load modules, (2) setup lustre
3141 # if not cleaning, load modules first.
3142 prof_list = node_db.get_refs('profile')
3144 if config.write_conf:
3145 for_each_profile(node_db, prof_list, doModules)
3147 for_each_profile(node_db, prof_list, doWriteconf)
3148 for_each_profile(node_db, prof_list, doUnloadModules)
3151 elif config.recover:
3152 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3153 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3154 "--client_uuid <UUID> --conn_uuid <UUID>")
3155 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3157 elif config.cleanup:
3159 # the command line can override this value
3161 # ugly hack, only need to run lctl commands for --dump
3162 if config.lctl_dump or config.record:
3163 for_each_profile(node_db, prof_list, doCleanup)
3166 sys_set_timeout(timeout)
3167 sys_set_ptldebug(ptldebug)
3168 sys_set_subsystem(subsystem)
3169 sys_set_lustre_upcall(lustre_upcall)
3170 sys_set_portals_upcall(portals_upcall)
3172 for_each_profile(node_db, prof_list, doCleanup)
3173 for_each_profile(node_db, prof_list, doUnloadModules)
3177 # ugly hack, only need to run lctl commands for --dump
3178 if config.lctl_dump or config.record:
3179 sys_set_timeout(timeout)
3180 sys_set_lustre_upcall(lustre_upcall)
3181 for_each_profile(node_db, prof_list, doSetup)
3185 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3186 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3188 for_each_profile(node_db, prof_list, doModules)
3190 sys_set_debug_path()
3191 sys_set_ptldebug(ptldebug)
3192 sys_set_subsystem(subsystem)
3193 script = config.gdb_script
3194 run(lctl.lctl, ' modules >', script)
3196 log ("The GDB module script is in", script)
3197 # pause, so user has time to break and
3200 sys_set_timeout(timeout)
3201 sys_set_lustre_upcall(lustre_upcall)
3202 sys_set_portals_upcall(portals_upcall)
3204 for_each_profile(node_db, prof_list, doSetup)
3207 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3208 tgt = lustreDB.lookup(tgt_uuid)
3210 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3211 new_uuid = get_active_target(tgt)
3213 raise Lustre.LconfError("doRecovery: no active target found for: " +
3215 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3217 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3219 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3221 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3224 lctl.disconnect(oldnet)
3225 except CommandError, e:
3226 log("recover: disconnect", nid_uuid, "failed: ")
3231 except CommandError, e:
3232 log("recover: connect failed")
3235 lctl.recover(client_uuid, net.nid_uuid)
3238 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3239 base = os.path.dirname(cmd)
3240 if development_mode():
3241 if not config.lustre:
3242 debug('using objdir module paths')
3243 config.lustre = (os.path.join(base, ".."))
3244 # normalize the portals dir, using command line arg if set
3246 portals_dir = config.portals
3247 dir = os.path.join(config.lustre, portals_dir)
3248 config.portals = dir
3249 debug('config.portals', config.portals)
3250 elif config.lustre and config.portals:
3252 # if --lustre and --portals, normalize portals
3253 # can ignore POTRALS_DIR here, since it is probly useless here
3254 config.portals = os.path.join(config.lustre, config.portals)
3255 debug('config.portals B', config.portals)
3257 def sysctl(path, val):
3258 debug("+ sysctl", path, val)
3262 fp = open(os.path.join('/proc/sys', path), 'w')
3269 def sys_set_debug_path():
3270 sysctl('portals/debug_path', config.debug_path)
3272 def sys_set_lustre_upcall(upcall):
3273 # the command overrides the value in the node config
3274 if config.lustre_upcall:
3275 upcall = config.lustre_upcall
3277 upcall = config.upcall
3279 lctl.set_lustre_upcall(upcall)
3281 def sys_set_portals_upcall(upcall):
3282 # the command overrides the value in the node config
3283 if config.portals_upcall:
3284 upcall = config.portals_upcall
3286 upcall = config.upcall
3288 sysctl('portals/upcall', upcall)
3290 def sys_set_timeout(timeout):
3291 # the command overrides the value in the node config
3292 if config.timeout and config.timeout > 0:
3293 timeout = config.timeout
3294 if timeout != None and timeout > 0:
3295 lctl.set_timeout(timeout)
3297 def sys_tweak_socknal ():
3298 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3299 if sys_get_branch() == '2.6':
3300 fp = open('/proc/meminfo')
3301 lines = fp.readlines()
3306 if a[0] == 'MemTotal:':
3308 debug("memtotal" + memtotal)
3309 if int(memtotal) < 262144:
3310 minfree = int(memtotal) / 16
3313 debug("+ minfree ", minfree)
3314 sysctl("vm/min_free_kbytes", minfree)
3315 if config.single_socket:
3316 sysctl("socknal/typed", 0)
3318 def sys_optimize_elan ():
3319 procfiles = ["/proc/elan/config/eventint_punt_loops",
3320 "/proc/qsnet/elan3/config/eventint_punt_loops",
3321 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3323 if os.access(p, os.W_OK):
3324 run ("echo 1 > " + p)
3326 def sys_set_ptldebug(ptldebug):
3328 ptldebug = config.ptldebug
3331 val = eval(ptldebug, ptldebug_names)
3332 val = "0x%x" % (val)
3333 sysctl('portals/debug', val)
3334 except NameError, e:
3337 def sys_set_subsystem(subsystem):
3338 if config.subsystem:
3339 subsystem = config.subsystem
3342 val = eval(subsystem, subsystem_names)
3343 val = "0x%x" % (val)
3344 sysctl('portals/subsystem_debug', val)
3345 except NameError, e:
3348 def sys_set_netmem_max(path, max):
3349 debug("setting", path, "to at least", max)
3357 fp = open(path, 'w')
3358 fp.write('%d\n' %(max))
3362 def sys_make_devices():
3363 if not os.access('/dev/portals', os.R_OK):
3364 run('mknod /dev/portals c 10 240')
3365 if not os.access('/dev/obd', os.R_OK):
3366 run('mknod /dev/obd c 10 241')
3369 # Add dir to the global PATH, if not already there.
3370 def add_to_path(new_dir):
3371 syspath = string.split(os.environ['PATH'], ':')
3372 if new_dir in syspath:
3374 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3376 def default_debug_path():
3377 path = '/tmp/lustre-log'
3378 if os.path.isdir('/r'):
3383 def default_gdb_script():
3384 script = '/tmp/ogdb'
3385 if os.path.isdir('/r'):
3386 return '/r' + script
3391 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3392 # ensure basic elements are in the system path
3393 def sanitise_path():
3394 for dir in DEFAULT_PATH:
3397 # global hack for the --select handling
3399 def init_select(args):
3400 # args = [service=nodeA,service2=nodeB service3=nodeC]
3403 list = string.split(arg, ',')
3405 srv, node = string.split(entry, '=')
3406 tgt_select[srv] = node
3408 def get_select(srv):
3409 if tgt_select.has_key(srv):
3410 return tgt_select[srv]
3414 FLAG = Lustre.Options.FLAG
3415 PARAM = Lustre.Options.PARAM
3416 INTPARAM = Lustre.Options.INTPARAM
3417 PARAMLIST = Lustre.Options.PARAMLIST
3419 ('verbose,v', "Print system commands as they are run"),
3420 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3421 ('config', "Cluster config name used for LDAP query", PARAM),
3422 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3423 ('node', "Load config for <nodename>", PARAM),
3424 ('cleanup,d', "Cleans up config. (Shutdown)"),
3425 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3427 ('single_socket', "socknal option: only use one socket instead of bundle",
3429 ('failover',"""Used to shut down without saving state.
3430 This will allow this node to "give up" a service to a
3431 another node for failover purposes. This will not
3432 be a clean shutdown.""",
3434 ('gdb', """Prints message after creating gdb module script
3435 and sleeps for 5 seconds."""),
3436 ('noexec,n', """Prints the commands and steps that will be run for a
3437 config without executing them. This can used to check if a
3438 config file is doing what it should be doing"""),
3439 ('nomod', "Skip load/unload module step."),
3440 ('nosetup', "Skip device setup/cleanup step."),
3441 ('reformat', "Reformat all devices (without question)"),
3442 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3443 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3444 ('clientoptions', "Additional options for Lustre", PARAM),
3445 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3447 ('write_conf', "Save all the client config information on mds."),
3448 ('record', "Write config information on mds."),
3449 ('record_log', "Name of config record log.", PARAM),
3450 ('record_device', "MDS device name that will record the config commands",
3452 ('root_squash', "MDS squash root to appointed uid",
3454 ('no_root_squash', "Don't squash root for appointed nid",
3456 ('minlevel', "Minimum level of services to configure/cleanup",
3458 ('maxlevel', """Maximum level of services to configure/cleanup
3459 Levels are aproximatly like:
3464 70 - mountpoint, echo_client, osc, mdc, lov""",
3466 ('lustre', """Base directory of lustre sources. This parameter will
3467 cause lconf to load modules from a source tree.""", PARAM),
3468 ('portals', """Portals source directory. If this is a relative path,
3469 then it is assumed to be relative to lustre. """, PARAM),
3470 ('timeout', "Set recovery timeout", INTPARAM),
3471 ('upcall', "Set both portals and lustre upcall script", PARAM),
3472 ('lustre_upcall', "Set lustre upcall script", PARAM),
3473 ('portals_upcall', "Set portals upcall script", PARAM),
3474 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3475 ('ptldebug', "Set the portals debug level", PARAM),
3476 ('subsystem', "Set the portals debug subsystem", PARAM),
3477 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3478 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3479 # Client recovery options
3480 ('recover', "Recover a device"),
3481 ('group', "The group of devices to configure or cleanup", PARAM),
3482 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3483 ('client_uuid', "The failed client (required for recovery)", PARAM),
3484 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3486 ('inactive', """The name of an inactive service, to be ignored during
3487 mounting (currently OST-only). Can be repeated.""",
3492 global lctl, config, toplustreDB, CONFIG_FILE
3494 # in the upcall this is set to SIG_IGN
3495 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3497 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3499 config, args = cl.parse(sys.argv[1:])
3500 except Lustre.OptionError, e:
3504 setupModulePath(sys.argv[0])
3506 host = socket.gethostname()
3508 # the PRNG is normally seeded with time(), which is not so good for starting
3509 # time-synchronized clusters
3510 input = open('/dev/urandom', 'r')
3512 print 'Unable to open /dev/urandom!'
3514 seed = input.read(32)
3520 init_select(config.select)
3523 # allow config to be fetched via HTTP, but only with python2
3524 if sys.version[0] != '1' and args[0].startswith('http://'):
3527 config_file = urllib2.urlopen(args[0])
3528 except (urllib2.URLError, socket.error), err:
3529 if hasattr(err, 'args'):
3531 print "Could not access '%s': %s" %(args[0], err)
3533 elif not os.access(args[0], os.R_OK):
3534 print 'File not found or readable:', args[0]
3538 config_file = open(args[0], 'r')
3540 dom = xml.dom.minidom.parse(config_file)
3542 panic("%s does not appear to be a config file." % (args[0]))
3543 sys.exit(1) # make sure to die here, even in debug mode.
3545 CONFIG_FILE = args[0]
3546 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3547 if not config.config:
3548 config.config = os.path.basename(args[0])# use full path?
3549 if config.config[-4:] == '.xml':
3550 config.config = config.config[:-4]
3551 elif config.ldapurl:
3552 if not config.config:
3553 panic("--ldapurl requires --config name")
3554 dn = "config=%s,fs=lustre" % (config.config)
3555 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3556 elif config.ptldebug or config.subsystem:
3557 sys_set_ptldebug(None)
3558 sys_set_subsystem(None)
3561 print 'Missing config file or ldap URL.'
3562 print 'see lconf --help for command summary'
3565 toplustreDB = lustreDB
3567 ver = lustreDB.get_version()
3569 panic("No version found in config data, please recreate.")
3570 if ver != Lustre.CONFIG_VERSION:
3571 panic("Config version", ver, "does not match lconf version",
3572 Lustre.CONFIG_VERSION)
3576 node_list.append(config.node)
3579 node_list.append(host)
3580 node_list.append('localhost')
3582 debug("configuring for host: ", node_list)
3585 config.debug_path = config.debug_path + '-' + host
3586 config.gdb_script = config.gdb_script + '-' + host
3588 lctl = LCTLInterface('lctl')
3590 if config.lctl_dump:
3591 lctl.use_save_file(config.lctl_dump)
3594 if not (config.record_device and config.record_log):
3595 panic("When recording, both --record_log and --record_device must be specified.")
3596 lctl.clear_log(config.record_device, config.record_log)
3597 lctl.record(config.record_device, config.record_log)
3599 doHost(lustreDB, node_list)
3601 if not config.record:
3606 process_updates(lustreDB, config.record_device, config.record_log)
3608 if __name__ == "__main__":
3611 except Lustre.LconfError, e:
3613 # traceback.print_exc(file=sys.stdout)
3615 except CommandError, e:
3619 if first_cleanup_error:
3620 sys.exit(first_cleanup_error)