3 # Copyright (C) 2002 Cluster File Systems, Inc.
4 # Author: Robert Read <rread@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
29 import string, os, stat, popen2, socket, time
31 import xml.dom.minidom
37 # Maximum number of devices to search for.
38 # (the /dev/loop* nodes need to be created beforehand)
39 MAX_LOOP_DEVICES = 256
43 print """usage: lconf config.xml
45 config.xml Lustre configuration in xml format.
46 --get <url> URL to fetch a config file
47 --node <nodename> Load config for <nodename>
48 -d | --cleanup Cleans up config. (Shutdown)
49 -v | --verbose Print system commands as they are run
50 -h | --help Print this help
51 --gdb Prints message after creating gdb module script
52 and sleeps for 5 seconds.
53 -n | --noexec Prints the commands and steps that will be run for a
54 config without executing them. This can used to check if a
55 config file is doing what it should be doing. (Implies -v)
56 --nomod Skip load/unload module step.
57 --nosetup Skip device setup/cleanup step.
60 --ldap server LDAP server with lustre config database
61 --makeldiff Translate xml source to LDIFF
62 --reformat Reformat all devices (will confirm)
63 This are perhaps not needed:
64 --lustre="src dir" Base directory of lustre sources. Used to search
66 --portals=src Portals source
70 # ============================================================
71 # Config parameters, encapsulated in a class
86 self._gdb_script = '/tmp/ogdb'
87 self._debug_path = '/tmp/lustre-log'
90 def verbose(self, flag = None):
91 if flag: self._verbose = flag
94 def noexec(self, flag = None):
95 if flag: self._noexec = flag
98 def reformat(self, flag = None):
99 if flag: self._reformat = flag
100 return self._reformat
102 def cleanup(self, flag = None):
103 if flag: self._cleanup = flag
106 def gdb(self, flag = None):
107 if flag: self._gdb = flag
110 def nomod(self, flag = None):
111 if flag: self._nomod = flag
114 def nosetup(self, flag = None):
115 if flag: self._nosetup = flag
118 def node(self, val = None):
119 if val: self._node = val
122 def url(self, val = None):
123 if val: self._url = val
126 def gdb_script(self):
127 if os.path.isdir('/r'):
128 return '/r' + self._gdb_script
130 return self._gdb_script
132 def debug_path(self):
133 if os.path.isdir('/r'):
134 return '/r' + self._debug_path
136 return self._debug_path
138 def src_dir(self, val = None):
139 if val: self._url = val
144 # ============================================================
145 # debugging and error funcs
147 def fixme(msg = "this feature"):
148 raise LconfError, msg + ' not implmemented yet.'
151 msg = string.join(map(str,args))
152 if not config.noexec():
153 raise LconfError(msg)
158 msg = string.join(map(str,args))
163 print string.strip(s)
167 msg = string.join(map(str,args))
170 # ============================================================
171 # locally defined exceptions
172 class CommandError (exceptions.Exception):
173 def __init__(self, cmd_name, cmd_err, rc=None):
174 self.cmd_name = cmd_name
175 self.cmd_err = cmd_err
180 if type(self.cmd_err) == types.StringType:
182 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
184 print "! %s: %s" % (self.cmd_name, self.cmd_err)
185 elif type(self.cmd_err) == types.ListType:
187 print "! %s (error %d):" % (self.cmd_name, self.rc)
189 print "! %s:" % (self.cmd_name)
190 for s in self.cmd_err:
191 print "> %s" %(string.strip(s))
195 class LconfError (exceptions.Exception):
196 def __init__(self, args):
200 # ============================================================
201 # handle lctl interface
204 Manage communication with lctl
207 def __init__(self, cmd):
209 Initialize close by finding the lctl binary.
211 self.lctl = find_prog(cmd)
214 debug('! lctl not found')
217 raise CommandError('lctl', "unable to find lctl binary.")
222 the cmds are written to stdin of lctl
223 lctl doesn't return errors when run in script mode, so
225 should modify command line to accept multiple commands, or
226 create complex command line options
228 debug("+", self.lctl, cmds)
229 if config.noexec(): return (0, [])
230 p = popen2.Popen3(self.lctl, 1)
231 p.tochild.write(cmds + "\n")
233 out = p.fromchild.readlines()
234 err = p.childerr.readlines()
237 raise CommandError(self.lctl, err, ret)
241 def network(self, net, nid):
242 """ initialized network and add "self" """
243 # Idea: "mynid" could be used for all network types to add "self," and then
244 # this special case would be gone and the "self" hack would be hidden.
250 quit""" % (net, nid, nid)
259 # create a new connection
260 def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
268 quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, )
274 quit""" % (net, servuuid, nid, nid, port, )
278 # add a route to a range
279 def add_route(self, net, gw, lo, hi):
283 quit """ % (net, gw, lo, hi)
287 # add a route to a range
288 def del_route(self, net, gw, lo, hi):
295 # add a route to a host
296 def add_route_host(self, net, uuid, gw, tgt):
301 quit """ % (net, uuid, tgt, gw, tgt)
304 # disconnect one connection
305 def disconnect(self, net, nid, port, servuuid):
310 quit""" % (net, nid, servuuid)
313 # disconnect all connections
314 def disconnectAll(self, net):
322 # create a new device with lctl
323 def newdev(self, attach, setup = ""):
328 quit""" % (attach, setup)
332 def cleanup(self, name, uuid):
341 def lovconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist):
345 lovconfig %s %d %d %d %s %s
346 quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist)
349 # ============================================================
350 # Various system-level functions
351 # (ideally moved to their own module)
353 # Run a command and return the output and status.
354 # stderr is sent to /dev/null, could use popen3 to
355 # save it if necessary
357 cmd = string.join(map(str,args))
359 if config.noexec(): return (0, [])
360 f = os.popen(cmd + ' 2>&1')
369 # Run a command in the background.
370 def run_daemon(*args):
371 cmd = string.join(map(str,args))
373 if config.noexec(): return 0
374 f = os.popen(cmd + ' 2>&1')
382 # Determine full path to use for an external command
383 # searches dirname(argv[0]) first, then PATH
385 syspath = string.split(os.environ['PATH'], ':')
386 cmdpath = os.path.dirname(sys.argv[0])
387 syspath.insert(0, cmdpath);
388 syspath.insert(0, os.path.join(cmdpath, '../../portals/linux/utils/'))
390 prog = os.path.join(d,cmd)
391 if os.access(prog, os.X_OK):
395 # Recursively look for file starting at base dir
396 def do_find_file(base, mod):
397 fullname = os.path.join(base, mod)
398 if os.access(fullname, os.R_OK):
400 for d in os.listdir(base):
401 dir = os.path.join(base,d)
402 if os.path.isdir(dir):
403 module = do_find_file(dir, mod)
407 def find_module(src_dir, modname):
408 mod = '%s.o' % (modname)
409 search = (src_dir + "/lustre", src_dir + "/portals/linux")
412 module = do_find_file(d, mod)
419 # is the path a block device?
426 return stat.S_ISBLK(s[stat.ST_MODE])
428 # build fs according to type
430 def mkfs(fstype, dev):
431 if(fstype in ('ext3', 'extN')):
432 mkfs = 'mkfs.ext2 -j -b 4096'
434 print 'unsupported fs type: ', fstype
435 if not is_block(dev):
439 (ret, out) = run (mkfs, force, dev)
441 panic("Unable to build fs:", dev)
442 # enable hash tree indexing on fs
444 htree = 'echo "feature FEATURE_C5" | debugfs -w'
445 (ret, out) = run (htree, dev)
447 panic("Unable to enable htree:", dev)
449 # some systems use /dev/loopN, some /dev/loop/N
453 if not os.access(loop + str(0), os.R_OK):
455 if not os.access(loop + str(0), os.R_OK):
456 panic ("can't access loop devices")
459 # find loop device assigned to thefile
462 for n in xrange(0, MAX_LOOP_DEVICES):
464 if os.access(dev, os.R_OK):
465 (stat, out) = run('losetup', dev)
466 if (out and stat == 0):
467 m = re.search(r'\((.*)\)', out[0])
468 if m and file == m.group(1):
474 # create file if necessary and assign the first free loop device
475 def init_loop(file, size, fstype):
476 dev = find_loop(file)
478 print 'WARNING file:', file, 'already mapped to', dev
480 if not os.access(file, os.R_OK | os.W_OK):
481 run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file))
483 # find next free loop
484 for n in xrange(0, MAX_LOOP_DEVICES):
486 if os.access(dev, os.R_OK):
487 (stat, out) = run('losetup', dev)
489 run('losetup', dev, file)
492 print "out of loop devices"
494 print "out of loop devices"
497 # undo loop assignment
498 def clean_loop(file):
499 dev = find_loop(file)
501 ret, out = run('losetup -d', dev)
503 log('unable to clean loop device:', dev, 'for file:', file)
506 # determine if dev is formatted as a <fstype> filesystem
507 def need_format(fstype, dev):
508 # FIXME don't know how to implement this
511 # initialize a block device if needed
512 def block_dev(dev, size, fstype, format):
513 if config.noexec(): return dev
514 if not is_block(dev):
515 dev = init_loop(dev, size, fstype)
516 if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
520 # panic("device:", dev,
521 # "not prepared, and autoformat is not set.\n",
522 # "Rerun with --reformat option to format ALL filesystems")
526 def get_local_address(net_type):
527 """Return the local address for the network type."""
529 if net_type == 'tcp':
531 host = socket.gethostname()
532 local = socket.gethostbyname(host)
533 elif net_type == 'elan':
534 # awk '/NodeId/ { print $2 }' '/proc/elan/device0/position'
536 fp = open('/proc/elan/device0/position', 'r')
537 lines = fp.readlines()
546 elif net_type == 'gm':
547 fixme("automatic local address for GM")
552 # ============================================================
553 # Classes to prepare and cleanup the various objects
556 """ Base class for the rest of the modules. The default cleanup method is
557 defined here, as well as some utilitiy funcs.
559 def __init__(self, module_name, dom_node):
560 self.dom_node = dom_node
561 self.module_name = module_name
562 self.name = get_attr(dom_node, 'name')
563 self.uuid = get_attr(dom_node, 'uuid')
564 self.kmodule_list = []
568 def info(self, *args):
569 msg = string.join(map(str,args))
570 print self.module_name + ":", self.name, self.uuid, msg
573 def lookup_server(self, srv_uuid):
574 """ Lookup a server's network information """
575 net = get_ost_net(self.dom_node.parentNode, srv_uuid)
576 self._server = Network(net)
578 def get_server(self):
582 """ default cleanup, used for most modules """
584 srv = self.get_server()
585 if srv and local_net(srv):
587 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
588 except CommandError, e:
589 log(self.module_name, "disconnect failed: ", self.name)
592 lctl.cleanup(self.name, self.uuid)
593 except CommandError, e:
594 log(self.module_name, "cleanup failed: ", self.name)
597 def add_module(self, modname):
598 """Append a module to list of modules to load."""
599 self.kmodule_list.append(modname)
601 def mod_loaded(self, modname):
602 """Check if a module is already loaded. Look in /proc/modules for it."""
603 fp = open('/proc/modules')
604 lines = fp.readlines()
606 # please forgive my tired fingers for this one
607 ret = filter(lambda word, mod=modname: word == mod,
608 map(lambda line: string.split(line)[0], lines))
611 def load_module(self):
612 """Load all the modules in the list in the order they appear."""
613 for mod in self.kmodule_list:
614 # (rc, out) = run ('/sbin/lsmod | grep -s', mod)
615 if self.mod_loaded(mod) and not config.noexec():
617 log ('loading module:', mod)
619 module = find_module(config.src_dir(), mod)
621 panic('module not found:', mod)
622 (rc, out) = run('/sbin/insmod', module)
624 raise CommandError('insmod', out, rc)
626 (rc, out) = run('/sbin/modprobe', mod)
628 raise CommandError('modprobe', out, rc)
630 def cleanup_module(self):
631 """Unload the modules in the list in reverse order."""
632 rev = self.kmodule_list
635 if not self.mod_loaded(mod):
637 log('unloading module:', mod)
640 (rc, out) = run('/sbin/rmmod', mod)
642 log('! unable to unload module:', mod)
646 class Network(Module):
647 def __init__(self,dom_node):
648 Module.__init__(self, 'NETWORK', dom_node)
649 self.net_type = get_attr(dom_node,'type')
650 self.nid = get_text(dom_node, 'server', '*')
651 self.port = get_text_int(dom_node, 'port', 0)
652 self.send_mem = get_text_int(dom_node, 'send_mem', 65536)
653 self.recv_mem = get_text_int(dom_node, 'recv_mem', 65536)
655 self.nid = get_local_address(self.net_type)
657 panic("unable to set nid for", self.net_type)
659 self.add_module('portals')
660 if node_needs_router():
661 self.add_module('kptlrouter')
662 if self.net_type == 'tcp':
663 self.add_module('ksocknal')
664 if self.net_type == 'elan':
665 self.add_module('kqswnal')
666 if self.net_type == 'gm':
667 self.add_module('kgmnal')
668 self.add_module('obdclass')
669 self.add_module('ptlrpc')
672 self.info(self.net_type, self.nid, self.port)
673 if self.net_type == 'tcp':
674 ret = run_daemon(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, self.port)
676 raise CommandError(TCP_ACCEPTOR, 'failed', ret)
677 ret = self.dom_node.getElementsByTagName('route_tbl')
679 for r in a.getElementsByTagName('route'):
680 net_type = get_attr(r, 'type')
681 gw = get_attr(r, 'gw')
682 lo = get_attr(r, 'lo')
683 hi = get_attr(r,'hi', '')
684 lctl.add_route(net_type, gw, lo, hi)
685 if self.net_type == 'tcp' and hi == '':
686 srv = nid2server(self.dom_node.parentNode.parentNode, lo)
688 panic("no server for nid", lo)
690 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
693 lctl.network(self.net_type, self.nid)
694 lctl.newdev(attach = "ptlrpc RPCDEV")
697 self.info(self.net_type, self.nid, self.port)
698 ret = self.dom_node.getElementsByTagName('route_tbl')
700 for r in a.getElementsByTagName('route'):
701 lo = get_attr(r, 'lo')
702 hi = get_attr(r,'hi', '')
703 if self.net_type == 'tcp' and hi == '':
704 srv = nid2server(self.dom_node.parentNode.parentNode, lo)
706 panic("no server for nid", lo)
709 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
710 except CommandError, e:
711 print "disconnect failed: ", self.name
714 lctl.del_route(self.net_type, self.nid, lo, hi)
715 except CommandError, e:
716 print "del_route failed: ", self.name
720 lctl.cleanup("RPCDEV", "")
721 except CommandError, e:
722 print "cleanup failed: ", self.name
725 lctl.disconnectAll(self.net_type)
726 except CommandError, e:
727 print "disconnectAll failed: ", self.name
729 if self.net_type == 'tcp':
730 # yikes, this ugly! need to save pid in /var/something
731 run("killall acceptor")
734 def __init__(self,dom_node):
735 Module.__init__(self, 'LDLM', dom_node)
736 self.add_module('ldlm')
739 lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
743 def __init__(self,dom_node):
744 Module.__init__(self, 'LOV', dom_node)
745 self.stripe_sz = get_attr_int(dom_node, 'stripesize', 65536)
746 self.stripe_off = get_attr_int(dom_node, 'stripeoffset', 0)
747 self.pattern = get_attr_int(dom_node, 'pattern', 0)
748 self.mdsuuid = get_first_ref(dom_node, 'mds')
749 mds= lookup(dom_node.parentNode, self.mdsuuid)
750 self.mdsname = getName(mds)
751 self.devlist = get_all_refs(dom_node, 'osc')
752 self.stripe_cnt = len(self.devlist)
755 self.info(self.mdsuuid, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern,
756 self.devlist, self.mdsname)
757 lctl.lovconfig(self.uuid, self.mdsname, self.stripe_cnt,
758 self.stripe_sz, self.stripe_off, self.pattern,
759 string.join(self.devlist))
763 def __init__(self,dom_node):
764 Module.__init__(self, 'MDS', dom_node)
765 self.devname, self.size = get_device(dom_node)
766 self.fstype = get_text(dom_node, 'fstype')
767 self.format = get_text(dom_node, 'autoformat', "no")
768 if self.fstype == 'extN':
769 self.add_module('extN')
770 self.add_module('mds')
771 self.add_module('mds_%s' % (self.fstype))
774 self.info(self.devname, self.fstype, self.format)
775 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
776 lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
777 setup ="%s %s" %(blkdev, self.fstype))
780 clean_loop(self.devname)
783 def __init__(self,dom_node):
784 Module.__init__(self, 'MDC', dom_node)
785 self.mds_uuid = get_first_ref(dom_node, 'mds')
786 self.lookup_server(self.mds_uuid)
787 self.add_module('mdc')
790 self.info(self.mds_uuid)
791 srv = self.get_server()
792 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
793 lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
794 setup ="%s %s" %(self.mds_uuid, srv.uuid))
797 def __init__(self, dom_node):
798 Module.__init__(self, 'OBD', dom_node)
799 self.obdtype = get_attr(dom_node, 'type')
800 self.devname, self.size = get_device(dom_node)
801 self.fstype = get_text(dom_node, 'fstype')
802 self.format = get_text(dom_node, 'autoformat', 'yes')
803 if self.fstype == 'extN':
804 self.add_module('extN')
805 self.add_module(self.obdtype)
807 # need to check /proc/mounts and /etc/mtab before
808 # formatting anything.
809 # FIXME: check if device is already formatted.
811 self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
812 if self.obdtype == 'obdecho':
815 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
816 lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
817 setup ="%s %s" %(blkdev, self.fstype))
820 if not self.obdtype == 'obdecho':
821 clean_loop(self.devname)
824 def __init__(self,dom_node):
825 Module.__init__(self, 'OST', dom_node)
826 self.obd_uuid = get_first_ref(dom_node, 'obd')
827 self.add_module('ost')
830 self.info(self.obd_uuid)
831 lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
832 setup ="%s" % (self.obd_uuid))
835 def __init__(self,dom_node):
836 Module.__init__(self, 'OSC', dom_node)
837 self.obd_uuid = get_first_ref(dom_node, 'obd')
838 self.ost_uuid = get_first_ref(dom_node, 'ost')
839 self.lookup_server(self.ost_uuid)
840 self.add_module('osc')
843 self.info(self.obd_uuid, self.ost_uuid)
844 srv = self.get_server()
846 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
850 lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
852 panic ("no route to", srv.nid)
854 lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
855 setup ="%s %s" %(self.obd_uuid, srv.uuid))
858 class Mountpoint(Module):
859 def __init__(self,dom_node):
860 Module.__init__(self, 'MTPT', dom_node)
861 self.path = get_text(dom_node, 'path')
862 self.mdc_uuid = get_first_ref(dom_node, 'mdc')
863 self.lov_uuid = get_first_ref(dom_node, 'osc')
864 self.add_module('osc')
865 # should add lov only if needed
866 self.add_module('lov')
867 self.add_module('llite')
870 l = lookup(self.dom_node.parentNode, self.lov_uuid)
871 if l.nodeName == 'lov':
873 for osc_uuid in lov.devlist:
874 osc = lookup(self.dom_node.parentNode, osc_uuid)
879 panic('osc not found:', osc_uuid)
880 lctl.newdev(attach="lov %s %s" % (lov.name, lov.uuid),
881 setup ="%s" % (self.mdc_uuid))
886 self.info(self.path, self.mdc_uuid,self.lov_uuid)
887 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
888 (self.lov_uuid, self.mdc_uuid, self.path)
889 run("mkdir", self.path)
892 panic("mount failed:", self.path)
894 self.info(self.path, self.mdc_uuid,self.lov_uuid)
895 (rc, out) = run("umount", self.path)
897 log("umount failed, cleanup will most likely not work.")
898 l = lookup(self.dom_node.parentNode, self.lov_uuid)
899 if l.nodeName == 'lov':
901 for osc_uuid in lov.devlist:
902 osc = lookup(self.dom_node.parentNode, osc_uuid)
907 panic('osc not found:', osc_uuid)
913 # ============================================================
914 # XML processing and query
915 # TODO: Change query funcs to use XPath, which is muc cleaner
918 list = obd.getElementsByTagName('device')
922 size = get_attr_int(dev, 'size', 0)
923 return dev.firstChild.data, size
926 # Get the text content from the first matching child
927 # If there is no content (or it is all whitespace), return
929 def get_text(dom_node, tag, default=""):
930 list = dom_node.getElementsByTagName(tag)
934 if dom_node.firstChild:
935 txt = string.strip(dom_node.firstChild.data)
940 def get_text_int(dom_node, tag, default=0):
941 list = dom_node.getElementsByTagName(tag)
946 if dom_node.firstChild:
947 txt = string.strip(dom_node.firstChild.data)
952 panic("text value is not integer:", txt)
955 def get_attr(dom_node, attr, default=""):
956 v = dom_node.getAttribute(attr)
961 def get_attr_int(dom_node, attr, default=0):
963 v = dom_node.getAttribute(attr)
968 panic("attr value is not integer", v)
971 def get_first_ref(dom_node, tag):
972 """ Get the first uuidref of the type TAG. Used one only
973 one is expected. Returns the uuid."""
975 refname = '%s_ref' % tag
976 list = dom_node.getElementsByTagName(refname)
978 uuid = getRef(list[0])
981 def get_all_refs(dom_node, tag):
982 """ Get all the refs of type TAG. Returns list of uuids. """
984 refname = '%s_ref' % tag
985 list = dom_node.getElementsByTagName(refname)
988 uuids.append(getRef(i))
991 def get_ost_net(dom_node, uuid):
992 ost = lookup(dom_node, uuid)
993 uuid = get_first_ref(ost, 'network')
996 return lookup(dom_node, uuid)
998 def nid2server(dom_node, nid):
999 netlist = dom_node.getElementsByTagName('network')
1000 for net_node in netlist:
1001 if get_text(net_node, 'server') == nid:
1002 return Network(net_node)
1005 def lookup(dom_node, uuid):
1006 for n in dom_node.childNodes:
1007 if n.nodeType == n.ELEMENT_NODE:
1008 if getUUID(n) == uuid:
1015 # Get name attribute of dom_node
1016 def getName(dom_node):
1017 return dom_node.getAttribute('name')
1019 def getRef(dom_node):
1020 return dom_node.getAttribute('uuidref')
1022 # Get name attribute of dom_node
1023 def getUUID(dom_node):
1024 return dom_node.getAttribute('uuid')
1026 # the tag name is the service type
1027 # fixme: this should do some checks to make sure the dom_node is a service
1028 def getServiceType(dom_node):
1029 return dom_node.nodeName
1032 # determine what "level" a particular node is at.
1033 # the order of iniitailization is based on level.
1034 def getServiceLevel(dom_node):
1035 type = getServiceType(dom_node)
1036 if type in ('network',):
1038 elif type in ('device', 'ldlm'):
1040 elif type in ('obd', 'mdd'):
1042 elif type in ('mds','ost'):
1044 elif type in ('mdc','osc'):
1046 elif type in ('lov',):
1048 elif type in ('mountpoint',):
1053 # return list of services in a profile. list is a list of tuples
1054 # [(level, dom_node),]
1055 def getServices(lustreNode, profileNode):
1057 for n in profileNode.childNodes:
1058 if n.nodeType == n.ELEMENT_NODE:
1059 servNode = lookup(lustreNode, getRef(n))
1062 panic('service not found: ' + getRef(n))
1063 level = getServiceLevel(servNode)
1064 list.append((level, servNode))
1068 def getByName(lustreNode, name, tag):
1069 ndList = lustreNode.getElementsByTagName(tag)
1071 if getName(nd) == name:
1078 ############################################################
1079 # routing ("rooting")
1085 def init_node(dom_node):
1086 global local_node, router_flag
1087 netlist = dom_node.getElementsByTagName('network')
1088 for dom_net in netlist:
1089 type = get_attr(dom_net, 'type')
1090 gw = get_text(dom_net, 'server')
1091 local_node.append((type, gw))
1093 def node_needs_router():
1096 def get_routes(type, gw, dom_net):
1097 """ Return the routes as a list of tuples of the form:
1098 [(type, gw, lo, hi),]"""
1100 tbl = dom_net.getElementsByTagName('route_tbl')
1102 routes = t.getElementsByTagName('route')
1104 lo = get_attr(r, 'lo')
1105 hi = get_attr(r, 'hi', '')
1106 res.append((type, gw, lo, hi))
1110 def init_route_config(lustre):
1111 """ Scan the lustre config looking for routers. Build list of
1113 global routes, router_flag
1115 list = lustre.getElementsByTagName('node')
1117 if get_attr(node, 'router'):
1119 for (local_type, local_nid) in local_node:
1121 netlist = node.getElementsByTagName('network')
1122 for dom_net in netlist:
1123 if local_type == get_attr(dom_net, 'type'):
1124 gw = get_text(dom_net, 'server')
1128 for dom_net in netlist:
1129 if local_type != get_attr(dom_net, 'type'):
1130 for route in get_routes(local_type, gw, dom_net):
1131 routes.append(route)
1136 for iface in local_node:
1137 if net.net_type == iface[0]:
1141 def find_route(net):
1142 global local_node, routes
1143 frm_type = local_node[0][0]
1144 to_type = net.net_type
1146 debug ('looking for route to', to_type,to)
1155 ############################################################
1158 def startService(dom_node, module_flag):
1159 type = getServiceType(dom_node)
1160 debug('Service:', type, getName(dom_node), getUUID(dom_node))
1161 # there must be a more dynamic way of doing this...
1167 elif type == 'network':
1168 n = Network(dom_node)
1179 elif type == 'mountpoint':
1180 n = Mountpoint(dom_node)
1182 panic ("unknown service type:", type)
1187 if config.cleanup():
1192 if config.nosetup():
1194 if config.cleanup():
1200 # Prepare the system to run lustre using a particular profile
1201 # in a the configuration.
1202 # * load & the modules
1203 # * setup networking for the current node
1204 # * make sure partitions are in place and prepared
1205 # * initialize devices with lctl
1206 # Levels is important, and needs to be enforced.
1207 def startProfile(lustreNode, profileNode, module_flag):
1209 panic("profile:", profile, "not found.")
1210 services = getServices(lustreNode, profileNode)
1211 if config.cleanup():
1214 startService(s[1], module_flag)
1219 def doHost(lustreNode, hosts):
1223 dom_node = getByName(lustreNode, h, 'node')
1228 print 'No host entry found.'
1231 if not get_attr(dom_node, 'router'):
1233 init_route_config(lustreNode)
1238 # Two step process: (1) load modules, (2) setup lustre
1239 # if not cleaning, load modules first.
1240 module_flag = not config.cleanup()
1241 reflist = dom_node.getElementsByTagName('profile')
1242 for profile in reflist:
1243 startProfile(lustreNode, profile, module_flag)
1245 if not config.cleanup():
1246 sys_set_debug_path()
1247 script = config.gdb_script()
1248 run(lctl.lctl, ' modules >', script)
1250 # dump /tmp/ogdb and sleep/pause here
1251 log ("The GDB module script is in", script)
1254 module_flag = not module_flag
1255 for profile in reflist:
1256 startProfile(lustreNode, profile, module_flag)
1258 ############################################################
1259 # Command line processing
1261 def parse_cmdline(argv):
1263 long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
1264 "portals=", "makeldiff", "cleanup", "noexec",
1265 "help", "node=", "get=", "nomod", "nosetup"]
1269 opts, args = getopt.getopt(argv, short_opts, long_opts)
1270 except getopt.error:
1275 if o in ("-h", "--help"):
1277 if o in ("-d","--cleanup"):
1279 if o in ("-v", "--verbose"):
1281 if o in ("-n", "--noexec"):
1284 if o == "--portals":
1288 if o == "--reformat":
1298 if o == "--nosetup":
1306 s = urllib.urlopen(url)
1312 def setupModulePath(cmd):
1313 base = os.path.dirname(cmd)
1314 if os.access(base+"/Makefile", os.R_OK):
1315 config.src_dir(base + "/../../")
1317 def sys_set_debug_path():
1318 debug("debug path: ", config.debug_path())
1322 fp = open('/proc/sys/portals/debug_path', 'w')
1323 fp.write(config.debug_path())
1328 #/proc/sys/net/core/rmem_max
1329 #/proc/sys/net/core/wmem_max
1330 def sys_set_netmem_max(path, max):
1331 debug("setting", path, "to at least", max)
1339 fp = open(path, 'w')
1340 fp.write('%d\n' %(max))
1344 def sys_make_devices():
1345 if not os.access('/dev/portals', os.R_OK):
1346 run('mknod /dev/portals c 10 240')
1347 if not os.access('/dev/obd', os.R_OK):
1348 run('mknod /dev/obd c 10 241')
1350 # Initialize or shutdown lustre according to a configuration file
1351 # * prepare the system for lustre
1352 # * configure devices with lctl
1353 # Shutdown does steps in reverse
1356 global TCP_ACCEPTOR, lctl, MAXTCPBUF
1357 args = parse_cmdline(sys.argv[1:])
1359 if not os.access(args[0], os.R_OK | os.W_OK):
1360 print 'File not found:', args[0]
1362 dom = xml.dom.minidom.parse(args[0])
1364 xmldata = fetch(config.url())
1365 dom = xml.dom.minidom.parseString(xmldata)
1371 node_list.append(config.node())
1373 host = socket.gethostname()
1375 node_list.append(host)
1376 node_list.append('localhost')
1377 debug("configuring for host: ", node_list)
1379 TCP_ACCEPTOR = find_prog('acceptor')
1380 if not TCP_ACCEPTOR:
1382 TCP_ACCEPTOR = 'acceptor'
1383 debug('! acceptor not found')
1385 panic('acceptor not found')
1387 lctl = LCTLInterface('lctl')
1389 setupModulePath(sys.argv[0])
1391 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
1392 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
1393 doHost(dom.documentElement, node_list)
1395 if __name__ == "__main__":
1398 except LconfError, e:
1400 except CommandError, e: