3 # Copyright (C) 2002 Cluster File Systems, Inc.
4 # Author: Robert Read <rread@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
29 import string, os, stat, popen2, socket, time
31 import xml.dom.minidom
37 # Maximum number of devices to search for.
38 # (the /dev/loop* nodes need to be created beforehand)
39 MAX_LOOP_DEVICES = 256
43 print """usage: lconf config.xml
45 config.xml Lustre configuration in xml format.
46 --get <url> URL to fetch a config file
47 --node <nodename> Load config for <nodename>
48 -d | --cleanup Cleans up config. (Shutdown)
49 -v | --verbose Print system commands as they are run
50 -h | --help Print this help
51 --gdb Prints message after creating gdb module script
52 and sleeps for 5 seconds.
53 -n | --noexec Prints the commands and steps that will be run for a
54 config without executing them. This can used to check if a
55 config file is doing what it should be doing. (Implies -v)
56 --nomod Skip load/unload module step.
57 --nosetup Skip device setup/cleanup step.
60 --ldap server LDAP server with lustre config database
61 --makeldiff Translate xml source to LDIFF
62 --reformat Reformat all devices (will confirm)
63 This are perhaps not needed:
64 --lustre="src dir" Base directory of lustre sources. Used to search
66 --portals=src Portals source
70 # ============================================================
71 # Config parameters, encapsulated in a class
86 self._gdb_script = '/tmp/ogdb'
87 self._debug_path = '/tmp/lustre-log'
90 def verbose(self, flag = None):
91 if flag: self._verbose = flag
94 def noexec(self, flag = None):
95 if flag: self._noexec = flag
98 def reformat(self, flag = None):
99 if flag: self._reformat = flag
100 return self._reformat
102 def cleanup(self, flag = None):
103 if flag: self._cleanup = flag
106 def gdb(self, flag = None):
107 if flag: self._gdb = flag
110 def nomod(self, flag = None):
111 if flag: self._nomod = flag
114 def nosetup(self, flag = None):
115 if flag: self._nosetup = flag
118 def node(self, val = None):
119 if val: self._node = val
122 def url(self, val = None):
123 if val: self._url = val
126 def gdb_script(self):
127 if os.path.isdir('/r'):
128 return '/r' + self._gdb_script
130 return self._gdb_script
132 def debug_path(self):
133 if os.path.isdir('/r'):
134 return '/r' + self._debug_path
136 return self._debug_path
138 def src_dir(self, val = None):
139 if val: self._url = val
144 # ============================================================
145 # debugging and error funcs
147 def fixme(msg = "this feature"):
148 raise LconfError, msg + ' not implmemented yet.'
151 msg = string.join(map(str,args))
152 if not config.noexec():
153 raise LconfError(msg)
156 msg = string.join(map(str,args))
161 print string.strip(s)
165 msg = string.join(map(str,args))
168 # ============================================================
169 # locally defined exceptions
170 class CommandError (exceptions.Exception):
171 def __init__(self, cmd_name, cmd_err, rc=None):
172 self.cmd_name = cmd_name
173 self.cmd_err = cmd_err
178 if type(self.cmd_err) == types.StringType:
180 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
182 print "! %s: %s" % (self.cmd_name, self.cmd_err)
183 elif type(self.cmd_err) == types.ListType:
185 print "! %s (error %d):" % (self.cmd_name, self.rc)
187 print "! %s:" % (self.cmd_name)
188 for s in self.cmd_err:
189 print "> %s" %(string.strip(s))
193 class LconfError (exceptions.Exception):
194 def __init__(self, args):
198 # ============================================================
199 # handle lctl interface
202 Manage communication with lctl
205 def __init__(self, cmd):
207 Initialize close by finding the lctl binary.
209 self.lctl = find_prog(cmd)
212 debug('! lctl not found')
215 raise CommandError('lctl', "unable to find lctl binary.")
220 the cmds are written to stdin of lctl
221 lctl doesn't return errors when run in script mode, so
223 should modify command line to accept multiple commands, or
224 create complex command line options
226 debug("+", self.lctl, cmds)
227 if config.noexec(): return (0, [])
228 p = popen2.Popen3(self.lctl, 1)
229 p.tochild.write(cmds + "\n")
231 out = p.fromchild.readlines()
232 err = p.childerr.readlines()
235 raise CommandError(self.lctl, err, ret)
239 def network(self, net, nid):
240 """ initialized network and add "self" """
241 # Idea: "mynid" could be used for all network types to add "self," and then
242 # this special case would be gone and the "self" hack would be hidden.
248 quit""" % (net, nid, nid)
257 # create a new connection
258 def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
266 quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, )
272 quit""" % (net, servuuid, nid, nid, port, )
276 # add a route to a range
277 def add_route(self, net, gw, lo, hi):
281 """ % (net, gw, lo, hi)
284 # add a route to a host
285 def add_route_host(self, net, uuid, gw, tgt):
290 """ % (net, uuid, tgt, gw, tgt)
293 # disconnect one connection
294 def disconnect(self, net, nid, port, servuuid):
299 quit""" % (net, nid, servuuid)
302 # disconnect all connections
303 def disconnectAll(self, net):
311 # create a new device with lctl
312 def newdev(self, attach, setup = ""):
317 quit""" % (attach, setup)
321 def cleanup(self, name, uuid):
330 def lovconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist):
334 lovconfig %s %d %d %d %s %s
335 quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist)
338 # ============================================================
339 # Various system-level functions
340 # (ideally moved to their own module)
342 # Run a command and return the output and status.
343 # stderr is sent to /dev/null, could use popen3 to
344 # save it if necessary
346 cmd = string.join(map(str,args))
348 if config.noexec(): return (0, [])
349 f = os.popen(cmd + ' 2>&1')
358 # Run a command in the background.
359 def run_daemon(*args):
360 cmd = string.join(map(str,args))
362 if config.noexec(): return 0
363 f = os.popen(cmd + ' 2>&1')
371 # Determine full path to use for an external command
372 # searches dirname(argv[0]) first, then PATH
374 syspath = string.split(os.environ['PATH'], ':')
375 cmdpath = os.path.dirname(sys.argv[0])
376 syspath.insert(0, cmdpath);
377 syspath.insert(0, os.path.join(cmdpath, '../../portals/linux/utils/'))
379 prog = os.path.join(d,cmd)
380 if os.access(prog, os.X_OK):
384 # Recursively look for file starting at base dir
385 def do_find_file(base, mod):
386 fullname = os.path.join(base, mod)
387 if os.access(fullname, os.R_OK):
389 for d in os.listdir(base):
390 dir = os.path.join(base,d)
391 if os.path.isdir(dir):
392 module = do_find_file(dir, mod)
396 def find_module(src_dir, modname):
397 mod = '%s.o' % (modname)
398 search = (src_dir + "/lustre", src_dir + "/portals/linux")
401 module = do_find_file(d, mod)
408 # is the path a block device?
415 return stat.S_ISBLK(s[stat.ST_MODE])
417 # build fs according to type
419 def mkfs(fstype, dev):
420 if(fstype in ('ext3', 'extN')):
421 mkfs = 'mkfs.ext2 -j -b 4096'
423 print 'unsupported fs type: ', fstype
424 if not is_block(dev):
428 (ret, out) = run (mkfs, force, dev)
430 panic("Unable to build fs:", dev)
431 # enable hash tree indexing on fs
433 htree = 'echo "feature FEATURE_C5" | debugfs -w'
434 (ret, out) = run (htree, dev)
436 panic("Unable to enable htree:", dev)
438 # some systems use /dev/loopN, some /dev/loop/N
442 if not os.access(loop + str(0), os.R_OK):
444 if not os.access(loop + str(0), os.R_OK):
445 panic ("can't access loop devices")
448 # find loop device assigned to thefile
451 for n in xrange(0, MAX_LOOP_DEVICES):
453 if os.access(dev, os.R_OK):
454 (stat, out) = run('losetup', dev)
455 if (out and stat == 0):
456 m = re.search(r'\((.*)\)', out[0])
457 if m and file == m.group(1):
463 # create file if necessary and assign the first free loop device
464 def init_loop(file, size, fstype):
465 dev = find_loop(file)
467 print 'WARNING file:', file, 'already mapped to', dev
469 if not os.access(file, os.R_OK | os.W_OK):
470 run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file))
472 # find next free loop
473 for n in xrange(0, MAX_LOOP_DEVICES):
475 if os.access(dev, os.R_OK):
476 (stat, out) = run('losetup', dev)
478 run('losetup', dev, file)
481 print "out of loop devices"
483 print "out of loop devices"
486 # undo loop assignment
487 def clean_loop(file):
488 dev = find_loop(file)
490 ret, out = run('losetup -d', dev)
492 log('unable to clean loop device:', dev, 'for file:', file)
495 # determine if dev is formatted as a <fstype> filesystem
496 def need_format(fstype, dev):
497 # FIXME don't know how to implement this
500 # initialize a block device if needed
501 def block_dev(dev, size, fstype, format):
502 if config.noexec(): return dev
503 if not is_block(dev):
504 dev = init_loop(dev, size, fstype)
505 if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
509 # panic("device:", dev,
510 # "not prepared, and autoformat is not set.\n",
511 # "Rerun with --reformat option to format ALL filesystems")
515 def get_local_address(net_type):
516 """Return the local address for the network type."""
518 if net_type == 'tcp':
520 host = socket.gethostname()
521 local = socket.gethostbyname(host)
522 elif net_type == 'elan':
523 # awk '/NodeId/ { print $2 }' '/proc/elan/device0/position'
525 fp = open('/proc/elan/device0/position', 'r')
526 lines = fp.readlines()
535 elif net_type == 'gm':
536 fixme("automatic local address for GM")
541 # ============================================================
542 # Classes to prepare and cleanup the various objects
545 """ Base class for the rest of the modules. The default cleanup method is
546 defined here, as well as some utilitiy funcs.
548 def __init__(self, module_name, dom_node):
549 self.dom_node = dom_node
550 self.module_name = module_name
551 self.name = get_attr(dom_node, 'name')
552 self.uuid = get_attr(dom_node, 'uuid')
553 self.kmodule_list = []
557 def info(self, *args):
558 msg = string.join(map(str,args))
559 print self.module_name + ":", self.name, self.uuid, msg
562 def lookup_server(self, srv_uuid):
563 """ Lookup a server's network information """
564 net = get_ost_net(self.dom_node.parentNode, srv_uuid)
565 self._server = Network(net)
567 def get_server(self):
571 """ default cleanup, used for most modules """
573 srv = self.get_server()
576 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
577 except CommandError, e:
578 log(self.module_name, "disconnect failed: ", self.name)
581 lctl.cleanup(self.name, self.uuid)
582 except CommandError, e:
583 log(self.module_name, "cleanup failed: ", self.name)
586 def add_module(self, modname):
587 """Append a module to list of modules to load."""
588 self.kmodule_list.append(modname)
590 def mod_loaded(self, modname):
591 """Check if a module is already loaded. Look in /proc/modules for it."""
592 fp = open('/proc/modules')
593 lines = fp.readlines()
595 # please forgive my tired fingers for this one
596 ret = filter(lambda word, mod=modname: word == mod,
597 map(lambda line: string.split(line)[0], lines))
600 def load_module(self):
601 """Load all the modules in the list in the order they appear."""
602 for mod in self.kmodule_list:
603 # (rc, out) = run ('/sbin/lsmod | grep -s', mod)
604 if self.mod_loaded(mod) and not config.noexec():
606 log ('loading module:', mod)
608 module = find_module(config.src_dir(), mod)
610 panic('module not found:', mod)
611 (rc, out) = run('/sbin/insmod', module)
613 raise CommandError('insmod', out, rc)
615 (rc, out) = run('/sbin/modprobe', mod)
617 raise CommandError('modprobe', out, rc)
619 def cleanup_module(self):
620 """Unload the modules in the list in reverse order."""
621 rev = self.kmodule_list
624 if not self.mod_loaded(mod):
626 log('unloading module:', mod)
629 (rc, out) = run('/sbin/rmmod', mod)
631 log('! unable to unload module:', mod)
635 class Network(Module):
636 def __init__(self,dom_node):
637 Module.__init__(self, 'NETWORK', dom_node)
638 self.net_type = get_attr(dom_node,'type')
639 self.nid = get_text(dom_node, 'server', '*')
640 self.port = get_text_int(dom_node, 'port', 0)
641 self.send_buf = get_text_int(dom_node, 'send_buf', 65536)
642 self.read_buf = get_text_int(dom_node, 'read_buf', 65536)
644 self.nid = get_local_address(self.net_type)
646 panic("unable to set nid for", self.net_type)
648 self.add_module('portals')
649 if self.net_type == 'tcp':
650 self.add_module('ksocknal')
651 if self.net_type == 'elan':
652 self.add_module('kqswnal')
653 if self.net_type == 'gm':
654 self.add_module('kgmnal')
655 self.add_module('obdclass')
656 self.add_module('ptlrpc')
659 self.info(self.net_type, self.nid, self.port)
660 if self.net_type == 'tcp':
661 ret = run_daemon(TCP_ACCEPTOR, self.port)
663 raise CommandError(TCP_ACCEPTOR, 'failed', ret)
665 ret = self.dom_node.getElementsByTagName('route_tbl')
667 for r in a.getElementsByTagName('route'):
668 lctl.add_route(self.net_type, self.nid, get_attr(r, 'lo'),
669 get_attr(r,'hi', ''))
671 lctl.network(self.net_type, self.nid)
672 lctl.newdev(attach = "ptlrpc RPCDEV")
675 self.info(self.net_type, self.nid, self.port)
677 lctl.cleanup("RPCDEV", "")
678 except CommandError, e:
679 print "cleanup failed: ", self.name
682 lctl.disconnectAll(self.net_type)
683 except CommandError, e:
684 print "disconnectAll failed: ", self.name
686 if self.net_type == 'tcp':
687 # yikes, this ugly! need to save pid in /var/something
688 run("killall acceptor")
691 def __init__(self,dom_node):
692 Module.__init__(self, 'LDLM', dom_node)
693 self.add_module('ldlm')
696 lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
700 def __init__(self,dom_node):
701 Module.__init__(self, 'LOV', dom_node)
702 self.stripe_sz = get_attr_int(dom_node, 'stripesize', 65536)
703 self.stripe_off = get_attr_int(dom_node, 'stripeoffset', 0)
704 self.pattern = get_attr_int(dom_node, 'pattern', 0)
705 self.mdsuuid = get_first_ref(dom_node, 'mds')
706 mds= lookup(dom_node.parentNode, self.mdsuuid)
707 self.mdsname = getName(mds)
708 self.devlist = get_all_refs(dom_node, 'osc')
709 self.stripe_cnt = len(self.devlist)
710 self.add_module('osc')
711 self.add_module('lov')
714 self.info(self.mdsuuid, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern,
715 self.devlist, self.mdsname)
716 lctl.lovconfig(self.uuid, self.mdsname, self.stripe_cnt,
717 self.stripe_sz, self.stripe_off, self.pattern,
718 string.join(self.devlist))
722 def __init__(self,dom_node):
723 Module.__init__(self, 'MDS', dom_node)
724 self.devname, self.size = get_device(dom_node)
725 self.fstype = get_text(dom_node, 'fstype')
726 self.format = get_text(dom_node, 'autoformat', "no")
727 if self.fstype == 'extN':
728 self.add_module('extN')
729 self.add_module('mds')
730 self.add_module('mds_%s' % (self.fstype))
733 self.info(self.devname, self.fstype, self.format)
734 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
735 lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
736 setup ="%s %s" %(blkdev, self.fstype))
739 clean_loop(self.devname)
742 def __init__(self,dom_node):
743 Module.__init__(self, 'MDC', dom_node)
744 self.mds_uuid = get_first_ref(dom_node, 'mds')
745 self.lookup_server(self.mds_uuid)
746 self.add_module('mdc')
749 self.info(self.mds_uuid)
750 srv = self.get_server()
751 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_buf, srv.read_buf)
752 lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
753 setup ="%s %s" %(self.mds_uuid, srv.uuid))
756 def __init__(self, dom_node):
757 Module.__init__(self, 'OBD', dom_node)
758 self.obdtype = get_attr(dom_node, 'type')
759 self.devname, self.size = get_device(dom_node)
760 self.fstype = get_text(dom_node, 'fstype')
761 self.format = get_text(dom_node, 'autoformat', 'yes')
762 if self.fstype == 'extN':
763 self.add_module('extN')
764 self.add_module(self.obdtype)
766 # need to check /proc/mounts and /etc/mtab before
767 # formatting anything.
768 # FIXME: check if device is already formatted.
770 self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
771 if self.obdtype == 'obdecho':
774 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
775 lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
776 setup ="%s %s" %(blkdev, self.fstype))
779 if not self.obdtype == 'obdecho':
780 clean_loop(self.devname)
783 def __init__(self,dom_node):
784 Module.__init__(self, 'OST', dom_node)
785 self.obd_uuid = get_first_ref(dom_node, 'obd')
786 self.add_module('ost')
789 self.info(self.obd_uuid)
790 lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
791 setup ="%s" % (self.obd_uuid))
794 def __init__(self,dom_node):
795 Module.__init__(self, 'OSC', dom_node)
796 self.obd_uuid = get_first_ref(dom_node, 'obd')
797 self.ost_uuid = get_first_ref(dom_node, 'ost')
798 self.lookup_server(self.ost_uuid)
799 self.add_module('osc')
802 self.info(self.obd_uuid, self.ost_uuid)
803 srv = self.get_server()
805 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_buf, srv.read_buf)
808 lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
810 lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
811 setup ="%s %s" %(self.obd_uuid, srv.uuid))
814 class Mountpoint(Module):
815 def __init__(self,dom_node):
816 Module.__init__(self, 'MTPT', dom_node)
817 self.path = get_text(dom_node, 'path')
818 self.mdc_uuid = get_first_ref(dom_node, 'mdc')
819 self.lov_uuid = get_first_ref(dom_node, 'osc')
820 self.add_module('osc')
821 self.add_module('llite')
824 l = lookup(self.dom_node.parentNode, self.lov_uuid)
825 if l.nodeName == 'lov':
827 for osc_uuid in lov.devlist:
828 osc = lookup(self.dom_node.parentNode, osc_uuid)
833 panic('osc not found:', osc_uuid)
834 lctl.newdev(attach="lov %s %s" % (lov.name, lov.uuid),
835 setup ="%s" % (self.mdc_uuid))
840 self.info(self.path, self.mdc_uuid,self.lov_uuid)
841 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
842 (self.lov_uuid, self.mdc_uuid, self.path)
843 run("mkdir", self.path)
846 panic("mount failed:", self.path)
848 self.info(self.path, self.mdc_uuid,self.lov_uuid)
849 (rc, out) = run("umount", self.path)
851 log("umount failed, cleanup will most likely not work.")
852 l = lookup(self.dom_node.parentNode, self.lov_uuid)
853 if l.nodeName == 'lov':
855 for osc_uuid in lov.devlist:
856 osc = lookup(self.dom_node.parentNode, osc_uuid)
861 panic('osc not found:', osc_uuid)
866 class Router(Module):
867 def __init__(self,dom_node):
868 Module.__init__(self, 'ROUTER', dom_node)
869 self.add_module('kptlrouter')
875 # ============================================================
876 # XML processing and query
877 # TODO: Change query funcs to use XPath, which is muc cleaner
880 list = obd.getElementsByTagName('device')
884 size = get_attr_int(dev, 'size', 0)
885 return dev.firstChild.data, size
888 # Get the text content from the first matching child
889 # If there is no content (or it is all whitespace), return
891 def get_text(dom_node, tag, default=""):
892 list = dom_node.getElementsByTagName(tag)
896 if dom_node.firstChild:
897 txt = string.strip(dom_node.firstChild.data)
902 def get_text_int(dom_node, tag, default=0):
903 list = dom_node.getElementsByTagName(tag)
908 if dom_node.firstChild:
909 txt = string.strip(dom_node.firstChild.data)
914 panic("text value is not integer:", txt)
917 def get_attr(dom_node, attr, default=""):
918 v = dom_node.getAttribute(attr)
923 def get_attr_int(dom_node, attr, default=0):
925 v = dom_node.getAttribute(attr)
930 panic("attr value is not integer", v)
933 def get_first_ref(dom_node, tag):
934 """ Get the first uuidref of the type TAG. Used one only
935 one is expected. Returns the uuid."""
937 refname = '%s_ref' % tag
938 list = dom_node.getElementsByTagName(refname)
940 uuid = getRef(list[0])
943 def get_all_refs(dom_node, tag):
944 """ Get all the refs of type TAG. Returns list of uuids. """
946 refname = '%s_ref' % tag
947 list = dom_node.getElementsByTagName(refname)
950 uuids.append(getRef(i))
953 def get_ost_net(dom_node, uuid):
954 ost = lookup(dom_node, uuid)
955 uuid = get_first_ref(ost, 'network')
958 return lookup(dom_node, uuid)
960 def lookup(dom_node, uuid):
961 for n in dom_node.childNodes:
962 if n.nodeType == n.ELEMENT_NODE:
963 if getUUID(n) == uuid:
970 # Get name attribute of dom_node
971 def getName(dom_node):
972 return dom_node.getAttribute('name')
974 def getRef(dom_node):
975 return dom_node.getAttribute('uuidref')
977 # Get name attribute of dom_node
978 def getUUID(dom_node):
979 return dom_node.getAttribute('uuid')
981 # the tag name is the service type
982 # fixme: this should do some checks to make sure the dom_node is a service
983 def getServiceType(dom_node):
984 return dom_node.nodeName
987 # determine what "level" a particular node is at.
988 # the order of iniitailization is based on level.
989 def getServiceLevel(dom_node):
990 type = getServiceType(dom_node)
991 if type in ('ptlrouter',):
993 if type in ('network',):
995 elif type in ('device', 'ldlm'):
997 elif type in ('obd', 'mdd'):
999 elif type in ('mds','ost'):
1001 elif type in ('mdc','osc'):
1003 elif type in ('lov',):
1005 elif type in ('mountpoint',):
1010 # return list of services in a profile. list is a list of tuples
1011 # [(level, dom_node),]
1012 def getServices(lustreNode, profileNode):
1014 for n in profileNode.childNodes:
1015 if n.nodeType == n.ELEMENT_NODE:
1016 servNode = lookup(lustreNode, getRef(n))
1019 panic('service not found: ' + getRef(n))
1020 level = getServiceLevel(servNode)
1021 list.append((level, servNode))
1025 def getByName(lustreNode, name, tag):
1026 ndList = lustreNode.getElementsByTagName(tag)
1028 if getName(nd) == name:
1035 ############################################################
1036 # routing ("rooting")
1041 def init_node(dom_node):
1043 netlist = dom_node.getElementsByTagName('network')
1044 for dom_net in netlist:
1045 type = get_attr(dom_net, 'type')
1046 gw = get_text(dom_net, 'server')
1047 local_node.append((type, gw))
1050 def get_routes(type, gw, dom_net):
1051 """ Return the routes as a list of tuples of the form:
1052 [(type, gw, lo, hi),]"""
1054 tbl = dom_net.getElementsByTagName('route_tbl')
1055 routes = tbl[0].getElementsByTagName('route')
1057 lo = get_attr(r, 'lo')
1058 hi = get_attr(r, 'hi', '')
1059 res.append((type, gw, lo, hi))
1063 def init_route_config(lustre):
1064 """ Scan the lustre config looking for routers. Build list of
1068 list = lustre.getElementsByTagName('node')
1070 if get_attr(node, 'router'):
1071 for (local_type, local_nid) in local_node:
1073 netlist = node.getElementsByTagName('network')
1074 for dom_net in netlist:
1075 if local_type == get_attr(dom_net, 'type'):
1076 gw = get_text(dom_net, 'server')
1080 for dom_net in netlist:
1081 if local_type != get_attr(dom_net, 'type'):
1082 for route in get_routes(local_type, gw, dom_net):
1083 routes.append(route)
1088 for iface in local_node:
1089 if net.net_type == iface[0]:
1093 def find_route(net):
1094 global local_node, routes
1095 frm_type = local_node[0][0]
1096 to_type = net.net_type
1098 debug ('looking for route to', to_type,to)
1107 ############################################################
1110 def startService(dom_node, module_flag):
1111 type = getServiceType(dom_node)
1112 debug('Service:', type, getName(dom_node), getUUID(dom_node))
1113 # there must be a more dynamic way of doing this...
1119 elif type == 'network':
1120 n = Network(dom_node)
1131 elif type == 'mountpoint':
1132 n = Mountpoint(dom_node)
1133 elif type == 'ptlrouter':
1134 n = Router(dom_node)
1136 panic ("unknown service type:", type)
1141 if config.cleanup():
1146 if config.nosetup():
1148 if config.cleanup():
1154 # Prepare the system to run lustre using a particular profile
1155 # in a the configuration.
1156 # * load & the modules
1157 # * setup networking for the current node
1158 # * make sure partitions are in place and prepared
1159 # * initialize devices with lctl
1160 # Levels is important, and needs to be enforced.
1161 def startProfile(lustreNode, profileNode, module_flag):
1163 panic("profile:", profile, "not found.")
1164 services = getServices(lustreNode, profileNode)
1165 if config.cleanup():
1168 startService(s[1], module_flag)
1173 def doHost(lustreNode, hosts):
1177 dom_node = getByName(lustreNode, h, 'node')
1182 print 'No host entry found.'
1185 if not get_attr(dom_node, 'router'):
1187 init_route_config(lustreNode)
1189 # Two step process: (1) load modules, (2) setup lustre
1190 # if not cleaning, load modules first.
1191 module_flag = not config.cleanup()
1192 reflist = dom_node.getElementsByTagName('profile')
1193 for profile in reflist:
1194 startProfile(lustreNode, profile, module_flag)
1196 if not config.cleanup():
1197 sys_set_debug_path()
1198 script = config.gdb_script()
1199 run(lctl.lctl, ' modules >', script)
1201 # dump /tmp/ogdb and sleep/pause here
1202 log ("The GDB module script is in", script)
1205 module_flag = not module_flag
1206 for profile in reflist:
1207 startProfile(lustreNode, profile, module_flag)
1209 ############################################################
1210 # Command line processing
1212 def parse_cmdline(argv):
1214 long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
1215 "portals=", "makeldiff", "cleanup", "noexec",
1216 "help", "node=", "get=", "nomod", "nosetup"]
1220 opts, args = getopt.getopt(argv, short_opts, long_opts)
1221 except getopt.error:
1226 if o in ("-h", "--help"):
1228 if o in ("-d","--cleanup"):
1230 if o in ("-v", "--verbose"):
1232 if o in ("-n", "--noexec"):
1235 if o == "--portals":
1239 if o == "--reformat":
1249 if o == "--nosetup":
1257 s = urllib.urlopen(url)
1263 def setupModulePath(cmd):
1264 base = os.path.dirname(cmd)
1265 if os.access(base+"/Makefile", os.R_OK):
1266 config.src_dir(base + "/../../")
1268 def sys_set_debug_path():
1269 debug("debug path: ", config.debug_path())
1273 fp = open('/proc/sys/portals/debug_path', 'w')
1274 fp.write(config.debug_path())
1280 def sys_make_devices():
1281 if not os.access('/dev/portals', os.R_OK):
1282 run('mknod /dev/portals c 10 240')
1283 if not os.access('/dev/obd', os.R_OK):
1284 run('mknod /dev/obd c 10 241')
1286 # Initialize or shutdown lustre according to a configuration file
1287 # * prepare the system for lustre
1288 # * configure devices with lctl
1289 # Shutdown does steps in reverse
1292 global TCP_ACCEPTOR, lctl
1293 args = parse_cmdline(sys.argv[1:])
1295 if not os.access(args[0], os.R_OK | os.W_OK):
1296 print 'File not found:', args[0]
1298 dom = xml.dom.minidom.parse(args[0])
1300 xmldata = fetch(config.url())
1301 dom = xml.dom.minidom.parseString(xmldata)
1307 node_list.append(config.node())
1309 host = socket.gethostname()
1311 node_list.append(host)
1312 node_list.append('localhost')
1313 debug("configuring for host: ", node_list)
1315 TCP_ACCEPTOR = find_prog('acceptor')
1316 if not TCP_ACCEPTOR:
1318 TCP_ACCEPTOR = 'acceptor'
1319 debug('! acceptor not found')
1321 panic('acceptor not found')
1323 lctl = LCTLInterface('lctl')
1325 setupModulePath(sys.argv[0])
1327 doHost(dom.documentElement, node_list)
1329 if __name__ == "__main__":
1332 except LconfError, e:
1334 except CommandError, e: