3 # Copyright (C) 2002 Cluster File Systems, Inc.
4 # Author: Robert Read <rread@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
29 import string, os, stat, popen2, socket, time
31 import xml.dom.minidom
37 # Maximum number of devices to search for.
38 # (the /dev/loop* nodes need to be created beforehand)
39 MAX_LOOP_DEVICES = 256
43 print """usage: lconf config.xml
45 config.xml Lustre configuration in xml format.
46 --get <url> URL to fetch a config file
47 --node <nodename> Load config for <nodename>
48 -d | --cleanup Cleans up config. (Shutdown)
49 -v | --verbose Print system commands as they are run
50 -h | --help Print this help
51 --gdb Prints message after creating gdb module script
52 and sleeps for 5 seconds.
53 -n | --noexec Prints the commands and steps that will be run for a
54 config without executing them. This can used to check if a
55 config file is doing what it should be doing. (Implies -v)
56 --nomod Skip load/unload module step.
57 --nosetup Skip device setup/cleanup step.
60 --ldap server LDAP server with lustre config database
61 --makeldiff Translate xml source to LDIFF
62 --reformat Reformat all devices (will confirm)
63 This are perhaps not needed:
64 --lustre="src dir" Base directory of lustre sources. Used to search
66 --portals=src Portals source
70 # ============================================================
71 # Config parameters, encapsulated in a class
86 self._gdb_script = '/tmp/ogdb'
87 self._debug_path = '/tmp/lustre-log'
90 def verbose(self, flag = None):
91 if flag: self._verbose = flag
94 def noexec(self, flag = None):
95 if flag: self._noexec = flag
98 def reformat(self, flag = None):
99 if flag: self._reformat = flag
100 return self._reformat
102 def cleanup(self, flag = None):
103 if flag: self._cleanup = flag
106 def gdb(self, flag = None):
107 if flag: self._gdb = flag
110 def nomod(self, flag = None):
111 if flag: self._nomod = flag
114 def nosetup(self, flag = None):
115 if flag: self._nosetup = flag
118 def node(self, val = None):
119 if val: self._node = val
122 def url(self, val = None):
123 if val: self._url = val
126 def gdb_script(self):
127 if os.path.isdir('/r'):
128 return '/r' + self._gdb_script
130 return self._gdb_script
132 def debug_path(self):
133 if os.path.isdir('/r'):
134 return '/r' + self._debug_path
136 return self._debug_path
138 def src_dir(self, val = None):
139 if val: self._url = val
144 # ============================================================
145 # debugging and error funcs
147 def fixme(msg = "this feature"):
148 raise LconfError, msg + ' not implmemented yet.'
151 msg = string.join(map(str,args))
152 if not config.noexec():
153 raise LconfError(msg)
158 msg = string.join(map(str,args))
163 print string.strip(s)
167 msg = string.join(map(str,args))
170 # ============================================================
171 # locally defined exceptions
172 class CommandError (exceptions.Exception):
173 def __init__(self, cmd_name, cmd_err, rc=None):
174 self.cmd_name = cmd_name
175 self.cmd_err = cmd_err
180 if type(self.cmd_err) == types.StringType:
182 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
184 print "! %s: %s" % (self.cmd_name, self.cmd_err)
185 elif type(self.cmd_err) == types.ListType:
187 print "! %s (error %d):" % (self.cmd_name, self.rc)
189 print "! %s:" % (self.cmd_name)
190 for s in self.cmd_err:
191 print "> %s" %(string.strip(s))
195 class LconfError (exceptions.Exception):
196 def __init__(self, args):
200 # ============================================================
201 # handle lctl interface
204 Manage communication with lctl
207 def __init__(self, cmd):
209 Initialize close by finding the lctl binary.
211 self.lctl = find_prog(cmd)
214 debug('! lctl not found')
217 raise CommandError('lctl', "unable to find lctl binary.")
222 the cmds are written to stdin of lctl
223 lctl doesn't return errors when run in script mode, so
225 should modify command line to accept multiple commands, or
226 create complex command line options
228 debug("+", self.lctl, cmds)
229 if config.noexec(): return (0, [])
230 p = popen2.Popen3(self.lctl, 1)
231 p.tochild.write(cmds + "\n")
233 out = p.fromchild.readlines()
234 err = p.childerr.readlines()
237 raise CommandError(self.lctl, err, ret)
241 def network(self, net, nid):
242 """ initialized network and add "self" """
243 # Idea: "mynid" could be used for all network types to add "self," and then
244 # this special case would be gone and the "self" hack would be hidden.
250 quit""" % (net, nid, nid)
259 # create a new connection
260 def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
268 quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, )
274 quit""" % (net, servuuid, nid, nid, port, )
278 # add a route to a range
279 def add_route(self, net, gw, lo, hi):
283 quit """ % (net, gw, lo, hi)
287 # add a route to a range
288 def del_route(self, net, gw, lo, hi):
292 quit """ % (net, lo, hi)
295 # add a route to a host
296 def add_route_host(self, net, uuid, gw, tgt):
301 quit """ % (net, uuid, tgt, gw, tgt)
304 # disconnect one connection
305 def disconnect(self, net, nid, port, servuuid):
310 quit""" % (net, nid, servuuid)
313 # disconnect all connections
314 def disconnectAll(self, net):
322 # create a new device with lctl
323 def newdev(self, attach, setup = ""):
328 quit""" % (attach, setup)
332 def cleanup(self, name, uuid):
341 def lovconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist):
345 lovconfig %s %d %d %d %s %s
346 quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist)
349 # ============================================================
350 # Various system-level functions
351 # (ideally moved to their own module)
353 # Run a command and return the output and status.
354 # stderr is sent to /dev/null, could use popen3 to
355 # save it if necessary
357 cmd = string.join(map(str,args))
359 if config.noexec(): return (0, [])
360 f = os.popen(cmd + ' 2>&1')
369 # Run a command in the background.
370 def run_daemon(*args):
371 cmd = string.join(map(str,args))
373 if config.noexec(): return 0
374 f = os.popen(cmd + ' 2>&1')
382 # Determine full path to use for an external command
383 # searches dirname(argv[0]) first, then PATH
385 syspath = string.split(os.environ['PATH'], ':')
386 cmdpath = os.path.dirname(sys.argv[0])
387 syspath.insert(0, cmdpath);
388 syspath.insert(0, os.path.join(cmdpath, '../../portals/linux/utils/'))
390 prog = os.path.join(d,cmd)
391 if os.access(prog, os.X_OK):
395 # Recursively look for file starting at base dir
396 def do_find_file(base, mod):
397 fullname = os.path.join(base, mod)
398 if os.access(fullname, os.R_OK):
400 for d in os.listdir(base):
401 dir = os.path.join(base,d)
402 if os.path.isdir(dir):
403 module = do_find_file(dir, mod)
407 def find_module(src_dir, modname):
408 mod = '%s.o' % (modname)
409 search = (src_dir + "/lustre", src_dir + "/portals/linux")
412 module = do_find_file(d, mod)
419 # is the path a block device?
426 return stat.S_ISBLK(s[stat.ST_MODE])
428 # build fs according to type
430 def mkfs(fstype, dev):
431 if(fstype in ('ext3', 'extN')):
432 mkfs = 'mkfs.ext2 -j -b 4096'
434 print 'unsupported fs type: ', fstype
435 if not is_block(dev):
439 (ret, out) = run (mkfs, force, dev)
441 panic("Unable to build fs:", dev)
442 # enable hash tree indexing on fs
444 htree = 'echo "feature FEATURE_C5" | debugfs -w'
445 (ret, out) = run (htree, dev)
447 panic("Unable to enable htree:", dev)
449 # some systems use /dev/loopN, some /dev/loop/N
453 if not os.access(loop + str(0), os.R_OK):
455 if not os.access(loop + str(0), os.R_OK):
456 panic ("can't access loop devices")
459 # find loop device assigned to thefile
462 for n in xrange(0, MAX_LOOP_DEVICES):
464 if os.access(dev, os.R_OK):
465 (stat, out) = run('losetup', dev)
466 if (out and stat == 0):
467 m = re.search(r'\((.*)\)', out[0])
468 if m and file == m.group(1):
474 # create file if necessary and assign the first free loop device
475 def init_loop(file, size, fstype):
476 dev = find_loop(file)
478 print 'WARNING file:', file, 'already mapped to', dev
480 if not os.access(file, os.R_OK | os.W_OK):
481 run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file))
483 # find next free loop
484 for n in xrange(0, MAX_LOOP_DEVICES):
486 if os.access(dev, os.R_OK):
487 (stat, out) = run('losetup', dev)
489 run('losetup', dev, file)
492 print "out of loop devices"
494 print "out of loop devices"
497 # undo loop assignment
498 def clean_loop(file):
499 dev = find_loop(file)
501 ret, out = run('losetup -d', dev)
503 log('unable to clean loop device:', dev, 'for file:', file)
506 # determine if dev is formatted as a <fstype> filesystem
507 def need_format(fstype, dev):
508 # FIXME don't know how to implement this
511 # initialize a block device if needed
512 def block_dev(dev, size, fstype, format):
513 if config.noexec(): return dev
514 if not is_block(dev):
515 dev = init_loop(dev, size, fstype)
516 if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
520 # panic("device:", dev,
521 # "not prepared, and autoformat is not set.\n",
522 # "Rerun with --reformat option to format ALL filesystems")
526 def get_local_address(net_type):
527 """Return the local address for the network type."""
529 if net_type == 'tcp':
531 host = socket.gethostname()
532 local = socket.gethostbyname(host)
533 elif net_type == 'elan':
534 # awk '/NodeId/ { print $2 }' '/proc/elan/device0/position'
536 fp = open('/proc/elan/device0/position', 'r')
537 lines = fp.readlines()
546 elif net_type == 'gm':
547 fixme("automatic local address for GM")
552 # ============================================================
553 # Classes to prepare and cleanup the various objects
556 """ Base class for the rest of the modules. The default cleanup method is
557 defined here, as well as some utilitiy funcs.
559 def __init__(self, module_name, dom_node):
560 self.dom_node = dom_node
561 self.module_name = module_name
562 self.name = get_attr(dom_node, 'name')
563 self.uuid = get_attr(dom_node, 'uuid')
564 self.kmodule_list = []
568 def info(self, *args):
569 msg = string.join(map(str,args))
570 print self.module_name + ":", self.name, self.uuid, msg
573 def lookup_server(self, srv_uuid):
574 """ Lookup a server's network information """
575 net = get_ost_net(self.dom_node.parentNode, srv_uuid)
576 self._server = Network(net)
578 def get_server(self):
582 """ default cleanup, used for most modules """
584 srv = self.get_server()
587 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
588 except CommandError, e:
589 log(self.module_name, "disconnect failed: ", self.name)
592 lctl.cleanup(self.name, self.uuid)
593 except CommandError, e:
594 log(self.module_name, "cleanup failed: ", self.name)
597 def add_module(self, modname):
598 """Append a module to list of modules to load."""
599 self.kmodule_list.append(modname)
601 def mod_loaded(self, modname):
602 """Check if a module is already loaded. Look in /proc/modules for it."""
603 fp = open('/proc/modules')
604 lines = fp.readlines()
606 # please forgive my tired fingers for this one
607 ret = filter(lambda word, mod=modname: word == mod,
608 map(lambda line: string.split(line)[0], lines))
611 def load_module(self):
612 """Load all the modules in the list in the order they appear."""
613 for mod in self.kmodule_list:
614 # (rc, out) = run ('/sbin/lsmod | grep -s', mod)
615 if self.mod_loaded(mod) and not config.noexec():
617 log ('loading module:', mod)
619 module = find_module(config.src_dir(), mod)
621 panic('module not found:', mod)
622 (rc, out) = run('/sbin/insmod', module)
624 raise CommandError('insmod', out, rc)
626 (rc, out) = run('/sbin/modprobe', mod)
628 raise CommandError('modprobe', out, rc)
630 def cleanup_module(self):
631 """Unload the modules in the list in reverse order."""
632 rev = self.kmodule_list
635 if not self.mod_loaded(mod):
637 log('unloading module:', mod)
640 (rc, out) = run('/sbin/rmmod', mod)
642 log('! unable to unload module:', mod)
646 class Network(Module):
647 def __init__(self,dom_node):
648 Module.__init__(self, 'NETWORK', dom_node)
649 self.net_type = get_attr(dom_node,'type')
650 self.nid = get_text(dom_node, 'server', '*')
651 self.port = get_text_int(dom_node, 'port', 0)
652 self.send_mem = get_text_int(dom_node, 'send_mem', 65536)
653 self.recv_mem = get_text_int(dom_node, 'recv_mem', 65536)
655 self.nid = get_local_address(self.net_type)
657 panic("unable to set nid for", self.net_type)
659 self.add_module('portals')
660 if node_needs_router():
661 self.add_module('kptlrouter')
662 if self.net_type == 'tcp':
663 self.add_module('ksocknal')
664 if self.net_type == 'elan':
665 self.add_module('kqswnal')
666 if self.net_type == 'gm':
667 self.add_module('kgmnal')
668 self.add_module('obdclass')
669 self.add_module('ptlrpc')
672 self.info(self.net_type, self.nid, self.port)
673 if self.net_type == 'tcp':
674 ret = run_daemon(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, self.port)
676 raise CommandError(TCP_ACCEPTOR, 'failed', ret)
677 ret = self.dom_node.getElementsByTagName('route_tbl')
679 for r in a.getElementsByTagName('route'):
680 net_type = get_attr(r, 'net_type')
681 gw = get_attr(r, 'gw')
682 lo = get_attr(r, 'lo')
683 hi = get_attr(r,'hi', '')
684 lctl.add_route(net_type, gw, lo, hi)
685 if self.net_type == 'tcp' and hi == '':
686 srv = nid2server(self.dom_node.parentNode.parentNode, lo)
688 panic("no server for nid", lo)
690 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
693 lctl.network(self.net_type, self.nid)
694 lctl.newdev(attach = "ptlrpc RPCDEV")
697 self.info(self.net_type, self.nid, self.port)
698 ret = self.dom_node.getElementsByTagName('route_tbl')
700 for r in a.getElementsByTagName('route'):
701 lo = get_attr(r, 'lo')
702 hi = get_attr(r,'hi', '')
703 if self.net_type == 'tcp' and hi == '':
704 srv = nid2server(self.dom_node.parentNode.parentNode, lo)
706 panic("no server for nid", lo)
708 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
709 lctl.del_route(self.net_type, self.nid, lo, hi)
711 lctl.cleanup("RPCDEV", "")
712 except CommandError, e:
713 print "cleanup failed: ", self.name
716 lctl.disconnectAll(self.net_type)
717 except CommandError, e:
718 print "disconnectAll failed: ", self.name
720 if self.net_type == 'tcp':
721 # yikes, this ugly! need to save pid in /var/something
722 run("killall acceptor")
725 def __init__(self,dom_node):
726 Module.__init__(self, 'LDLM', dom_node)
727 self.add_module('ldlm')
730 lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
734 def __init__(self,dom_node):
735 Module.__init__(self, 'LOV', dom_node)
736 self.stripe_sz = get_attr_int(dom_node, 'stripesize', 65536)
737 self.stripe_off = get_attr_int(dom_node, 'stripeoffset', 0)
738 self.pattern = get_attr_int(dom_node, 'pattern', 0)
739 self.mdsuuid = get_first_ref(dom_node, 'mds')
740 mds= lookup(dom_node.parentNode, self.mdsuuid)
741 self.mdsname = getName(mds)
742 self.devlist = get_all_refs(dom_node, 'osc')
743 self.stripe_cnt = len(self.devlist)
746 self.info(self.mdsuuid, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern,
747 self.devlist, self.mdsname)
748 lctl.lovconfig(self.uuid, self.mdsname, self.stripe_cnt,
749 self.stripe_sz, self.stripe_off, self.pattern,
750 string.join(self.devlist))
754 def __init__(self,dom_node):
755 Module.__init__(self, 'MDS', dom_node)
756 self.devname, self.size = get_device(dom_node)
757 self.fstype = get_text(dom_node, 'fstype')
758 self.format = get_text(dom_node, 'autoformat', "no")
759 if self.fstype == 'extN':
760 self.add_module('extN')
761 self.add_module('mds')
762 self.add_module('mds_%s' % (self.fstype))
765 self.info(self.devname, self.fstype, self.format)
766 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
767 lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
768 setup ="%s %s" %(blkdev, self.fstype))
771 clean_loop(self.devname)
774 def __init__(self,dom_node):
775 Module.__init__(self, 'MDC', dom_node)
776 self.mds_uuid = get_first_ref(dom_node, 'mds')
777 self.lookup_server(self.mds_uuid)
778 self.add_module('mdc')
781 self.info(self.mds_uuid)
782 srv = self.get_server()
783 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
784 lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
785 setup ="%s %s" %(self.mds_uuid, srv.uuid))
788 def __init__(self, dom_node):
789 Module.__init__(self, 'OBD', dom_node)
790 self.obdtype = get_attr(dom_node, 'type')
791 self.devname, self.size = get_device(dom_node)
792 self.fstype = get_text(dom_node, 'fstype')
793 self.format = get_text(dom_node, 'autoformat', 'yes')
794 if self.fstype == 'extN':
795 self.add_module('extN')
796 self.add_module(self.obdtype)
798 # need to check /proc/mounts and /etc/mtab before
799 # formatting anything.
800 # FIXME: check if device is already formatted.
802 self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
803 if self.obdtype == 'obdecho':
806 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
807 lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
808 setup ="%s %s" %(blkdev, self.fstype))
811 if not self.obdtype == 'obdecho':
812 clean_loop(self.devname)
815 def __init__(self,dom_node):
816 Module.__init__(self, 'OST', dom_node)
817 self.obd_uuid = get_first_ref(dom_node, 'obd')
818 self.add_module('ost')
821 self.info(self.obd_uuid)
822 lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
823 setup ="%s" % (self.obd_uuid))
826 def __init__(self,dom_node):
827 Module.__init__(self, 'OSC', dom_node)
828 self.obd_uuid = get_first_ref(dom_node, 'obd')
829 self.ost_uuid = get_first_ref(dom_node, 'ost')
830 self.lookup_server(self.ost_uuid)
831 self.add_module('osc')
834 self.info(self.obd_uuid, self.ost_uuid)
835 srv = self.get_server()
837 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
841 lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
843 panic ("no route to", srv.nid)
845 lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
846 setup ="%s %s" %(self.obd_uuid, srv.uuid))
849 class Mountpoint(Module):
850 def __init__(self,dom_node):
851 Module.__init__(self, 'MTPT', dom_node)
852 self.path = get_text(dom_node, 'path')
853 self.mdc_uuid = get_first_ref(dom_node, 'mdc')
854 self.lov_uuid = get_first_ref(dom_node, 'osc')
855 self.add_module('osc')
856 # should add lov only if needed
857 self.add_module('lov')
858 self.add_module('llite')
861 l = lookup(self.dom_node.parentNode, self.lov_uuid)
862 if l.nodeName == 'lov':
864 for osc_uuid in lov.devlist:
865 osc = lookup(self.dom_node.parentNode, osc_uuid)
870 panic('osc not found:', osc_uuid)
871 lctl.newdev(attach="lov %s %s" % (lov.name, lov.uuid),
872 setup ="%s" % (self.mdc_uuid))
877 self.info(self.path, self.mdc_uuid,self.lov_uuid)
878 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
879 (self.lov_uuid, self.mdc_uuid, self.path)
880 run("mkdir", self.path)
883 panic("mount failed:", self.path)
885 self.info(self.path, self.mdc_uuid,self.lov_uuid)
886 (rc, out) = run("umount", self.path)
888 log("umount failed, cleanup will most likely not work.")
889 l = lookup(self.dom_node.parentNode, self.lov_uuid)
890 if l.nodeName == 'lov':
892 for osc_uuid in lov.devlist:
893 osc = lookup(self.dom_node.parentNode, osc_uuid)
898 panic('osc not found:', osc_uuid)
904 # ============================================================
905 # XML processing and query
906 # TODO: Change query funcs to use XPath, which is muc cleaner
909 list = obd.getElementsByTagName('device')
913 size = get_attr_int(dev, 'size', 0)
914 return dev.firstChild.data, size
917 # Get the text content from the first matching child
918 # If there is no content (or it is all whitespace), return
920 def get_text(dom_node, tag, default=""):
921 list = dom_node.getElementsByTagName(tag)
925 if dom_node.firstChild:
926 txt = string.strip(dom_node.firstChild.data)
931 def get_text_int(dom_node, tag, default=0):
932 list = dom_node.getElementsByTagName(tag)
937 if dom_node.firstChild:
938 txt = string.strip(dom_node.firstChild.data)
943 panic("text value is not integer:", txt)
946 def get_attr(dom_node, attr, default=""):
947 v = dom_node.getAttribute(attr)
952 def get_attr_int(dom_node, attr, default=0):
954 v = dom_node.getAttribute(attr)
959 panic("attr value is not integer", v)
962 def get_first_ref(dom_node, tag):
963 """ Get the first uuidref of the type TAG. Used one only
964 one is expected. Returns the uuid."""
966 refname = '%s_ref' % tag
967 list = dom_node.getElementsByTagName(refname)
969 uuid = getRef(list[0])
972 def get_all_refs(dom_node, tag):
973 """ Get all the refs of type TAG. Returns list of uuids. """
975 refname = '%s_ref' % tag
976 list = dom_node.getElementsByTagName(refname)
979 uuids.append(getRef(i))
982 def get_ost_net(dom_node, uuid):
983 ost = lookup(dom_node, uuid)
984 uuid = get_first_ref(ost, 'network')
987 return lookup(dom_node, uuid)
989 def nid2server(dom_node, nid):
990 netlist = dom_node.getElementsByTagName('network')
991 for net_node in netlist:
992 if get_text(net_node, 'server') == nid:
993 return Network(net_node)
996 def lookup(dom_node, uuid):
997 for n in dom_node.childNodes:
998 if n.nodeType == n.ELEMENT_NODE:
999 if getUUID(n) == uuid:
1006 # Get name attribute of dom_node
1007 def getName(dom_node):
1008 return dom_node.getAttribute('name')
1010 def getRef(dom_node):
1011 return dom_node.getAttribute('uuidref')
1013 # Get name attribute of dom_node
1014 def getUUID(dom_node):
1015 return dom_node.getAttribute('uuid')
1017 # the tag name is the service type
1018 # fixme: this should do some checks to make sure the dom_node is a service
1019 def getServiceType(dom_node):
1020 return dom_node.nodeName
1023 # determine what "level" a particular node is at.
1024 # the order of iniitailization is based on level.
1025 def getServiceLevel(dom_node):
1026 type = getServiceType(dom_node)
1027 if type in ('network',):
1029 elif type in ('device', 'ldlm'):
1031 elif type in ('obd', 'mdd'):
1033 elif type in ('mds','ost'):
1035 elif type in ('mdc','osc'):
1037 elif type in ('lov',):
1039 elif type in ('mountpoint',):
1044 # return list of services in a profile. list is a list of tuples
1045 # [(level, dom_node),]
1046 def getServices(lustreNode, profileNode):
1048 for n in profileNode.childNodes:
1049 if n.nodeType == n.ELEMENT_NODE:
1050 servNode = lookup(lustreNode, getRef(n))
1053 panic('service not found: ' + getRef(n))
1054 level = getServiceLevel(servNode)
1055 list.append((level, servNode))
1059 def getByName(lustreNode, name, tag):
1060 ndList = lustreNode.getElementsByTagName(tag)
1062 if getName(nd) == name:
1069 ############################################################
1070 # routing ("rooting")
1076 def init_node(dom_node):
1077 global local_node, router_flag
1078 netlist = dom_node.getElementsByTagName('network')
1079 for dom_net in netlist:
1080 type = get_attr(dom_net, 'type')
1081 gw = get_text(dom_net, 'server')
1082 local_node.append((type, gw))
1084 def node_needs_router():
1087 def get_routes(type, gw, dom_net):
1088 """ Return the routes as a list of tuples of the form:
1089 [(type, gw, lo, hi),]"""
1091 tbl = dom_net.getElementsByTagName('route_tbl')
1093 routes = t.getElementsByTagName('route')
1095 lo = get_attr(r, 'lo')
1096 hi = get_attr(r, 'hi', '')
1097 res.append((type, gw, lo, hi))
1101 def init_route_config(lustre):
1102 """ Scan the lustre config looking for routers. Build list of
1104 global routes, router_flag
1106 list = lustre.getElementsByTagName('node')
1108 if get_attr(node, 'router'):
1110 for (local_type, local_nid) in local_node:
1112 netlist = node.getElementsByTagName('network')
1113 for dom_net in netlist:
1114 if local_type == get_attr(dom_net, 'type'):
1115 gw = get_text(dom_net, 'server')
1119 for dom_net in netlist:
1120 if local_type != get_attr(dom_net, 'type'):
1121 for route in get_routes(local_type, gw, dom_net):
1122 routes.append(route)
1127 for iface in local_node:
1128 if net.net_type == iface[0]:
1132 def find_route(net):
1133 global local_node, routes
1134 frm_type = local_node[0][0]
1135 to_type = net.net_type
1137 debug ('looking for route to', to_type,to)
1146 ############################################################
1149 def startService(dom_node, module_flag):
1150 type = getServiceType(dom_node)
1151 debug('Service:', type, getName(dom_node), getUUID(dom_node))
1152 # there must be a more dynamic way of doing this...
1158 elif type == 'network':
1159 n = Network(dom_node)
1170 elif type == 'mountpoint':
1171 n = Mountpoint(dom_node)
1173 panic ("unknown service type:", type)
1178 if config.cleanup():
1183 if config.nosetup():
1185 if config.cleanup():
1191 # Prepare the system to run lustre using a particular profile
1192 # in a the configuration.
1193 # * load & the modules
1194 # * setup networking for the current node
1195 # * make sure partitions are in place and prepared
1196 # * initialize devices with lctl
1197 # Levels is important, and needs to be enforced.
1198 def startProfile(lustreNode, profileNode, module_flag):
1200 panic("profile:", profile, "not found.")
1201 services = getServices(lustreNode, profileNode)
1202 if config.cleanup():
1205 startService(s[1], module_flag)
1210 def doHost(lustreNode, hosts):
1214 dom_node = getByName(lustreNode, h, 'node')
1219 print 'No host entry found.'
1222 if not get_attr(dom_node, 'router'):
1224 init_route_config(lustreNode)
1229 # Two step process: (1) load modules, (2) setup lustre
1230 # if not cleaning, load modules first.
1231 module_flag = not config.cleanup()
1232 reflist = dom_node.getElementsByTagName('profile')
1233 for profile in reflist:
1234 startProfile(lustreNode, profile, module_flag)
1236 if not config.cleanup():
1237 sys_set_debug_path()
1238 script = config.gdb_script()
1239 run(lctl.lctl, ' modules >', script)
1241 # dump /tmp/ogdb and sleep/pause here
1242 log ("The GDB module script is in", script)
1245 module_flag = not module_flag
1246 for profile in reflist:
1247 startProfile(lustreNode, profile, module_flag)
1249 ############################################################
1250 # Command line processing
1252 def parse_cmdline(argv):
1254 long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
1255 "portals=", "makeldiff", "cleanup", "noexec",
1256 "help", "node=", "get=", "nomod", "nosetup"]
1260 opts, args = getopt.getopt(argv, short_opts, long_opts)
1261 except getopt.error:
1266 if o in ("-h", "--help"):
1268 if o in ("-d","--cleanup"):
1270 if o in ("-v", "--verbose"):
1272 if o in ("-n", "--noexec"):
1275 if o == "--portals":
1279 if o == "--reformat":
1289 if o == "--nosetup":
1297 s = urllib.urlopen(url)
1303 def setupModulePath(cmd):
1304 base = os.path.dirname(cmd)
1305 if os.access(base+"/Makefile", os.R_OK):
1306 config.src_dir(base + "/../../")
1308 def sys_set_debug_path():
1309 debug("debug path: ", config.debug_path())
1313 fp = open('/proc/sys/portals/debug_path', 'w')
1314 fp.write(config.debug_path())
1319 #/proc/sys/net/core/rmem_max
1320 #/proc/sys/net/core/wmem_max
1321 def sys_set_netmem_max(path, max):
1322 debug("setting", path, "to at least", max)
1330 fp = open(path, 'w')
1331 fp.write('%d\n' %(max))
1335 def sys_make_devices():
1336 if not os.access('/dev/portals', os.R_OK):
1337 run('mknod /dev/portals c 10 240')
1338 if not os.access('/dev/obd', os.R_OK):
1339 run('mknod /dev/obd c 10 241')
1341 # Initialize or shutdown lustre according to a configuration file
1342 # * prepare the system for lustre
1343 # * configure devices with lctl
1344 # Shutdown does steps in reverse
1347 global TCP_ACCEPTOR, lctl, MAXTCPBUF
1348 args = parse_cmdline(sys.argv[1:])
1350 if not os.access(args[0], os.R_OK | os.W_OK):
1351 print 'File not found:', args[0]
1353 dom = xml.dom.minidom.parse(args[0])
1355 xmldata = fetch(config.url())
1356 dom = xml.dom.minidom.parseString(xmldata)
1362 node_list.append(config.node())
1364 host = socket.gethostname()
1366 node_list.append(host)
1367 node_list.append('localhost')
1368 debug("configuring for host: ", node_list)
1370 TCP_ACCEPTOR = find_prog('acceptor')
1371 if not TCP_ACCEPTOR:
1373 TCP_ACCEPTOR = 'acceptor'
1374 debug('! acceptor not found')
1376 panic('acceptor not found')
1378 lctl = LCTLInterface('lctl')
1380 setupModulePath(sys.argv[0])
1382 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
1383 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
1384 doHost(dom.documentElement, node_list)
1386 if __name__ == "__main__":
1389 except LconfError, e:
1391 except CommandError, e: