3 # Copyright (C) 2002 Cluster File Systems, Inc.
4 # Author: Robert Read <rread@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
29 import string, os, stat, popen2, socket, time
31 import xml.dom.minidom
36 DEFAULT_TCPBUF = 1048576
38 # Maximum number of devices to search for.
39 # (the /dev/loop* nodes need to be created beforehand)
40 MAX_LOOP_DEVICES = 256
44 print """usage: lconf config.xml
46 config.xml Lustre configuration in xml format.
47 --get <url> URL to fetch a config file
48 --node <nodename> Load config for <nodename>
49 -d | --cleanup Cleans up config. (Shutdown)
50 -v | --verbose Print system commands as they are run
51 -h | --help Print this help
52 --gdb Prints message after creating gdb module script
53 and sleeps for 5 seconds.
54 -n | --noexec Prints the commands and steps that will be run for a
55 config without executing them. This can used to check if a
56 config file is doing what it should be doing. (Implies -v)
57 --nomod Skip load/unload module step.
58 --nosetup Skip device setup/cleanup step.
59 --reformat Reformat all devices (without question)
62 --ldap server LDAP server with lustre config database
63 --makeldiff Translate xml source to LDIFF
64 This are perhaps not needed:
65 --lustre="src dir" Base directory of lustre sources. Used to search
67 --portals=src Portals source
71 # ============================================================
72 # Config parameters, encapsulated in a class
87 self._gdb_script = '/tmp/ogdb'
88 self._debug_path = '/tmp/lustre-log'
89 self._dump_file = None
92 def verbose(self, flag = None):
93 if flag: self._verbose = flag
96 def noexec(self, flag = None):
97 if flag: self._noexec = flag
100 def reformat(self, flag = None):
101 if flag: self._reformat = flag
102 return self._reformat
104 def cleanup(self, flag = None):
105 if flag: self._cleanup = flag
108 def gdb(self, flag = None):
109 if flag: self._gdb = flag
112 def nomod(self, flag = None):
113 if flag: self._nomod = flag
116 def nosetup(self, flag = None):
117 if flag: self._nosetup = flag
120 def node(self, val = None):
121 if val: self._node = val
124 def url(self, val = None):
125 if val: self._url = val
128 def gdb_script(self):
129 if os.path.isdir('/r'):
130 return '/r' + self._gdb_script
132 return self._gdb_script
134 def debug_path(self):
135 if os.path.isdir('/r'):
136 return '/r' + self._debug_path
138 return self._debug_path
140 def src_dir(self, val = None):
141 if val: self._src_dir = val
144 def dump_file(self, val = None):
145 if val: self._dump_file = val
146 return self._dump_file
150 # ============================================================
151 # debugging and error funcs
153 def fixme(msg = "this feature"):
154 raise LconfError, msg + ' not implmemented yet.'
157 msg = string.join(map(str,args))
158 if not config.noexec():
159 raise LconfError(msg)
164 msg = string.join(map(str,args))
169 print string.strip(s)
173 msg = string.join(map(str,args))
176 # ============================================================
177 # locally defined exceptions
178 class CommandError (exceptions.Exception):
179 def __init__(self, cmd_name, cmd_err, rc=None):
180 self.cmd_name = cmd_name
181 self.cmd_err = cmd_err
186 if type(self.cmd_err) == types.StringType:
188 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
190 print "! %s: %s" % (self.cmd_name, self.cmd_err)
191 elif type(self.cmd_err) == types.ListType:
193 print "! %s (error %d):" % (self.cmd_name, self.rc)
195 print "! %s:" % (self.cmd_name)
196 for s in self.cmd_err:
197 print "> %s" %(string.strip(s))
201 class LconfError (exceptions.Exception):
202 def __init__(self, args):
206 # ============================================================
207 # handle lctl interface
210 Manage communication with lctl
213 def __init__(self, cmd):
215 Initialize close by finding the lctl binary.
217 self.lctl = find_prog(cmd)
220 debug('! lctl not found')
223 raise CommandError('lctl', "unable to find lctl binary.")
228 the cmds are written to stdin of lctl
229 lctl doesn't return errors when run in script mode, so
231 should modify command line to accept multiple commands, or
232 create complex command line options
234 debug("+", self.lctl, cmds)
235 if config.noexec(): return (0, [])
236 p = popen2.Popen3(self.lctl, 1)
237 p.tochild.write(cmds + "\n")
239 out = p.fromchild.readlines()
240 err = p.childerr.readlines()
243 raise CommandError(self.lctl, err, ret)
247 def network(self, net, nid):
248 """ initialized network and add "self" """
249 # Idea: "mynid" could be used for all network types to add "self," and then
250 # this special case would be gone and the "self" hack would be hidden.
256 quit""" % (net, nid, nid)
265 # create a new connection
266 def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
274 quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, )
280 quit""" % (net, servuuid, nid, nid, port, )
284 # add a route to a range
285 def add_route(self, net, gw, lo, hi):
289 quit """ % (net, gw, lo, hi)
293 # add a route to a range
294 def del_route(self, net, gw, lo, hi):
301 # add a route to a host
302 def add_route_host(self, net, uuid, gw, tgt):
307 quit """ % (net, uuid, tgt, gw, tgt)
310 # disconnect one connection
311 def disconnect(self, net, nid, port, servuuid):
316 quit""" % (net, nid, servuuid)
320 def disconnectAll(self, net):
328 # create a new device with lctl
329 def newdev(self, attach, setup = ""):
334 quit""" % (attach, setup)
338 def cleanup(self, name, uuid):
347 def lovconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist):
351 lovconfig %s %d %d %d %s %s
352 quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist)
356 def dump(self, dump_file):
359 quit""" % (dump_file)
362 # ============================================================
363 # Various system-level functions
364 # (ideally moved to their own module)
366 # Run a command and return the output and status.
367 # stderr is sent to /dev/null, could use popen3 to
368 # save it if necessary
370 cmd = string.join(map(str,args))
372 if config.noexec(): return (0, [])
373 f = os.popen(cmd + ' 2>&1')
382 # Run a command in the background.
383 def run_daemon(*args):
384 cmd = string.join(map(str,args))
386 if config.noexec(): return 0
387 f = os.popen(cmd + ' 2>&1')
395 # Determine full path to use for an external command
396 # searches dirname(argv[0]) first, then PATH
398 syspath = string.split(os.environ['PATH'], ':')
399 cmdpath = os.path.dirname(sys.argv[0])
400 syspath.insert(0, cmdpath);
401 syspath.insert(0, os.path.join(cmdpath, '../../portals/linux/utils/'))
403 prog = os.path.join(d,cmd)
404 if os.access(prog, os.X_OK):
408 # Recursively look for file starting at base dir
409 def do_find_file(base, mod):
410 fullname = os.path.join(base, mod)
411 if os.access(fullname, os.R_OK):
413 for d in os.listdir(base):
414 dir = os.path.join(base,d)
415 if os.path.isdir(dir):
416 module = do_find_file(dir, mod)
420 def find_module(src_dir, dev_dir, modname):
421 mod = '%s.o' % (modname)
422 module = src_dir +'/'+ dev_dir +'/'+ mod
424 if os.access(module, os.R_OK):
430 # is the path a block device?
437 return stat.S_ISBLK(s[stat.ST_MODE])
439 # build fs according to type
441 def mkfs(fstype, dev):
442 if(fstype in ('ext3', 'extN')):
443 mkfs = 'mkfs.ext2 -j -b 4096'
445 print 'unsupported fs type: ', fstype
446 if not is_block(dev):
450 (ret, out) = run (mkfs, force, dev)
452 panic("Unable to build fs:", dev)
453 # enable hash tree indexing on fs
455 htree = 'echo "feature FEATURE_C5" | debugfs -w'
456 (ret, out) = run (htree, dev)
458 panic("Unable to enable htree:", dev)
460 # some systems use /dev/loopN, some /dev/loop/N
464 if not os.access(loop + str(0), os.R_OK):
466 if not os.access(loop + str(0), os.R_OK):
467 panic ("can't access loop devices")
470 # find loop device assigned to thefile
473 for n in xrange(0, MAX_LOOP_DEVICES):
475 if os.access(dev, os.R_OK):
476 (stat, out) = run('losetup', dev)
477 if (out and stat == 0):
478 m = re.search(r'\((.*)\)', out[0])
479 if m and file == m.group(1):
485 # create file if necessary and assign the first free loop device
486 def init_loop(file, size, fstype):
487 dev = find_loop(file)
489 print 'WARNING file:', file, 'already mapped to', dev
491 if not os.access(file, os.R_OK | os.W_OK):
492 run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file))
494 # find next free loop
495 for n in xrange(0, MAX_LOOP_DEVICES):
497 if os.access(dev, os.R_OK):
498 (stat, out) = run('losetup', dev)
500 run('losetup', dev, file)
503 print "out of loop devices"
505 print "out of loop devices"
508 # undo loop assignment
509 def clean_loop(file):
510 dev = find_loop(file)
512 ret, out = run('losetup -d', dev)
514 log('unable to clean loop device:', dev, 'for file:', file)
517 # determine if dev is formatted as a <fstype> filesystem
518 def need_format(fstype, dev):
519 # FIXME don't know how to implement this
522 # initialize a block device if needed
523 def block_dev(dev, size, fstype, format):
524 if config.noexec(): return dev
525 if not is_block(dev):
526 dev = init_loop(dev, size, fstype)
527 if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
531 # panic("device:", dev,
532 # "not prepared, and autoformat is not set.\n",
533 # "Rerun with --reformat option to format ALL filesystems")
537 def get_local_address(net_type):
538 """Return the local address for the network type."""
540 if net_type == 'tcp':
542 host = socket.gethostname()
543 local = socket.gethostbyname(host)
544 elif net_type == 'elan':
545 # awk '/NodeId/ { print $2 }' '/proc/elan/device0/position'
547 fp = open('/proc/elan/device0/position', 'r')
548 lines = fp.readlines()
557 elif net_type == 'gm':
558 fixme("automatic local address for GM")
563 # ============================================================
564 # Classes to prepare and cleanup the various objects
567 """ Base class for the rest of the modules. The default cleanup method is
568 defined here, as well as some utilitiy funcs.
570 def __init__(self, module_name, dom_node):
571 self.dom_node = dom_node
572 self.module_name = module_name
573 self.name = get_attr(dom_node, 'name')
574 self.uuid = get_attr(dom_node, 'uuid')
575 self.kmodule_list = []
579 def info(self, *args):
580 msg = string.join(map(str,args))
581 print self.module_name + ":", self.name, self.uuid, msg
584 def lookup_server(self, srv_uuid):
585 """ Lookup a server's network information """
586 net = get_ost_net(self.dom_node.parentNode, srv_uuid)
587 self._server = Network(net)
589 def get_server(self):
593 """ default cleanup, used for most modules """
595 srv = self.get_server()
596 if srv and local_net(srv):
598 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
599 except CommandError, e:
600 log(self.module_name, "disconnect failed: ", self.name)
603 lctl.cleanup(self.name, self.uuid)
604 except CommandError, e:
605 log(self.module_name, "cleanup failed: ", self.name)
608 def add_module(self, dev_dir, modname):
609 """Append a module to list of modules to load."""
610 self.kmodule_list.append((dev_dir, modname))
612 def mod_loaded(self, modname):
613 """Check if a module is already loaded. Look in /proc/modules for it."""
614 fp = open('/proc/modules')
615 lines = fp.readlines()
617 # please forgive my tired fingers for this one
618 ret = filter(lambda word, mod=modname: word == mod,
619 map(lambda line: string.split(line)[0], lines))
622 def load_module(self):
623 """Load all the modules in the list in the order they appear."""
624 for dev_dir, mod in self.kmodule_list:
625 # (rc, out) = run ('/sbin/lsmod | grep -s', mod)
626 if self.mod_loaded(mod) and not config.noexec():
628 log ('loading module:', mod)
630 module = find_module(config.src_dir(),dev_dir, mod)
632 panic('module not found:', mod)
633 (rc, out) = run('/sbin/insmod', module)
635 raise CommandError('insmod', out, rc)
637 (rc, out) = run('/sbin/modprobe', mod)
639 raise CommandError('modprobe', out, rc)
641 def cleanup_module(self):
642 """Unload the modules in the list in reverse order."""
643 rev = self.kmodule_list
645 for dev_dir, mod in rev:
646 if not self.mod_loaded(mod):
649 if mod == 'portals' and config.dump_file():
650 lctl.dump(config.dump_file())
651 log('unloading module:', mod)
654 (rc, out) = run('/sbin/rmmod', mod)
656 log('! unable to unload module:', mod)
660 class Network(Module):
661 def __init__(self,dom_node):
662 Module.__init__(self, 'NETWORK', dom_node)
663 self.net_type = get_attr(dom_node,'type')
664 self.nid = get_text(dom_node, 'server', '*')
665 self.port = get_text_int(dom_node, 'port', 0)
666 self.send_mem = get_text_int(dom_node, 'send_mem', DEFAULT_TCPBUF)
667 self.recv_mem = get_text_int(dom_node, 'recv_mem', DEFAULT_TCPBUF)
669 self.nid = get_local_address(self.net_type)
671 panic("unable to set nid for", self.net_type)
673 self.add_module('portals/linux/oslib/', 'portals')
674 if node_needs_router():
675 self.add_module('portals/linux/router', 'kptlrouter')
676 if self.net_type == 'tcp':
677 self.add_module('portals/linux/socknal', 'ksocknal')
678 if self.net_type == 'elan':
679 self.add_module('portals/linux/rqswnal', 'kqswnal')
680 if self.net_type == 'gm':
681 self.add_module('portals/linux/gmnal', 'kgmnal')
682 self.add_module('lustre/obdclass', 'obdclass')
683 self.add_module('lustre/ptlrpc', 'ptlrpc')
686 self.info(self.net_type, self.nid, self.port)
687 if self.net_type == 'tcp':
688 ret = run_daemon(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, self.port)
690 raise CommandError(TCP_ACCEPTOR, 'failed', ret)
691 ret = self.dom_node.getElementsByTagName('route_tbl')
693 for r in a.getElementsByTagName('route'):
694 net_type = get_attr(r, 'type')
695 gw = get_attr(r, 'gw')
696 lo = get_attr(r, 'lo')
697 hi = get_attr(r,'hi', '')
698 lctl.add_route(net_type, gw, lo, hi)
699 if self.net_type == 'tcp' and hi == '':
700 srv = nid2server(self.dom_node.parentNode.parentNode, lo)
702 panic("no server for nid", lo)
704 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
707 lctl.network(self.net_type, self.nid)
708 lctl.newdev(attach = "ptlrpc RPCDEV")
711 self.info(self.net_type, self.nid, self.port)
712 ret = self.dom_node.getElementsByTagName('route_tbl')
714 for r in a.getElementsByTagName('route'):
715 lo = get_attr(r, 'lo')
716 hi = get_attr(r,'hi', '')
717 if self.net_type == 'tcp' and hi == '':
718 srv = nid2server(self.dom_node.parentNode.parentNode, lo)
720 panic("no server for nid", lo)
723 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
724 except CommandError, e:
725 print "disconnect failed: ", self.name
728 lctl.del_route(self.net_type, self.nid, lo, hi)
729 except CommandError, e:
730 print "del_route failed: ", self.name
734 lctl.cleanup("RPCDEV", "")
735 except CommandError, e:
736 print "cleanup failed: ", self.name
739 lctl.disconnectAll(self.net_type)
740 except CommandError, e:
741 print "disconnectAll failed: ", self.name
743 if self.net_type == 'tcp':
744 # yikes, this ugly! need to save pid in /var/something
745 run("killall acceptor")
748 def __init__(self,dom_node):
749 Module.__init__(self, 'LDLM', dom_node)
750 self.add_module('lustre/ldlm', 'ldlm')
753 lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
757 def __init__(self,dom_node):
758 Module.__init__(self, 'LOV', dom_node)
759 self.mds_uuid = get_first_ref(dom_node, 'mds')
760 mds= lookup(dom_node.parentNode, self.mds_uuid)
761 self.mds_name = getName(mds)
762 devs = dom_node.getElementsByTagName('devices')
765 self.stripe_sz = get_attr_int(dev_node, 'stripesize', 65536)
766 self.stripe_off = get_attr_int(dev_node, 'stripeoffset', 0)
767 self.pattern = get_attr_int(dev_node, 'pattern', 0)
768 self.devlist = get_all_refs(dev_node, 'osc')
769 self.stripe_cnt = len(self.devlist)
770 self.add_module('lustre/mdc', 'mdc')
771 self.add_module('lustre/lov', 'lov')
774 for osc_uuid in self.devlist:
775 osc = lookup(self.dom_node.parentNode, osc_uuid)
780 panic('osc not found:', osc_uuid)
781 mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
782 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern,
783 self.devlist, self.mds_name)
784 lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
785 setup ="%s" % (mdc_uuid))
788 for osc_uuid in self.devlist:
789 osc = lookup(self.dom_node.parentNode, osc_uuid)
794 panic('osc not found:', osc_uuid)
796 cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
797 def load_module(self):
798 for osc_uuid in self.devlist:
799 osc = lookup(self.dom_node.parentNode, osc_uuid)
805 panic('osc not found:', osc_uuid)
806 Module.load_module(self)
807 def cleanup_module(self):
808 Module.cleanup_module(self)
809 for osc_uuid in self.devlist:
810 osc = lookup(self.dom_node.parentNode, osc_uuid)
816 panic('osc not found:', osc_uuid)
818 class LOVConfig(Module):
819 def __init__(self,dom_node):
820 Module.__init__(self, 'LOVConfig', dom_node)
821 self.lov_uuid = get_first_ref(dom_node, 'lov')
822 l = lookup(dom_node.parentNode, self.lov_uuid)
827 self.info(lov.mds_uuid, lov.stripe_cnt, lov.stripe_sz, lov.stripe_off, lov.pattern,
828 lov.devlist, lov.mds_name)
829 lctl.lovconfig(lov.uuid, lov.mds_name, lov.stripe_cnt,
830 lov.stripe_sz, lov.stripe_off, lov.pattern,
831 string.join(lov.devlist))
839 def __init__(self,dom_node):
840 Module.__init__(self, 'MDS', dom_node)
841 self.devname, self.size = get_device(dom_node)
842 self.fstype = get_text(dom_node, 'fstype')
843 self.format = get_text(dom_node, 'autoformat', "no")
844 if self.fstype == 'extN':
845 self.add_module('lustre/extN', 'extN')
846 self.add_module('lustre/mds', 'mds')
847 self.add_module('lustre/mds', 'mds_%s' % (self.fstype))
850 self.info(self.devname, self.fstype, self.format)
851 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
852 lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
853 setup ="%s %s" %(blkdev, self.fstype))
856 clean_loop(self.devname)
858 # Very unusual case, as there is no MDC element in the XML anymore
859 # Builds itself from an MDS node
861 def __init__(self,dom_node):
862 self.mds = MDS(dom_node)
863 self.dom_node = dom_node
864 self.module_name = 'MDC'
865 self.kmodule_list = []
869 host = socket.gethostname()
870 self.name = 'MDC_'+host
871 self.uuid = self.name+'_UUID'
873 self.lookup_server(self.mds.uuid)
874 self.add_module('lustre/mdc', 'mdc')
877 self.info(self.mds.uuid)
878 srv = self.get_server()
879 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
880 lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
881 setup ="%s %s" %(self.mds.uuid, srv.uuid))
884 def __init__(self, dom_node):
885 Module.__init__(self, 'OBD', dom_node)
886 self.obdtype = get_attr(dom_node, 'type')
887 self.devname, self.size = get_device(dom_node)
888 self.fstype = get_text(dom_node, 'fstype')
889 self.format = get_text(dom_node, 'autoformat', 'yes')
890 if self.fstype == 'extN':
891 self.add_module('lustre/extN', 'extN')
892 self.add_module('lustre/' + self.obdtype, self.obdtype)
894 # need to check /proc/mounts and /etc/mtab before
895 # formatting anything.
896 # FIXME: check if device is already formatted.
898 self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
899 if self.obdtype == 'obdecho':
902 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
903 lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
904 setup ="%s %s" %(blkdev, self.fstype))
907 if not self.obdtype == 'obdecho':
908 clean_loop(self.devname)
911 def __init__(self,dom_node):
912 Module.__init__(self, 'OST', dom_node)
913 self.obd_uuid = get_first_ref(dom_node, 'obd')
914 self.add_module('lustre/ost', 'ost')
917 self.info(self.obd_uuid)
918 lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
919 setup ="%s" % (self.obd_uuid))
922 # virtual interface for OSC and LOV
924 def __init__(self,dom_node):
925 Module.__init__(self, 'VOSC', dom_node)
926 if dom_node.nodeName == 'lov':
927 self.osc = LOV(dom_node)
929 self.osc = OSC(dom_node)
934 def load_module(self):
935 self.osc.load_module()
936 def cleanup_module(self):
937 self.osc.cleanup_module()
941 def __init__(self,dom_node):
942 Module.__init__(self, 'OSC', dom_node)
943 self.obd_uuid = get_first_ref(dom_node, 'obd')
944 self.ost_uuid = get_first_ref(dom_node, 'ost')
945 self.lookup_server(self.ost_uuid)
946 self.add_module('lustre/osc', 'osc')
949 self.info(self.obd_uuid, self.ost_uuid)
950 srv = self.get_server()
952 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
956 lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
958 panic ("no route to", srv.nid)
960 lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
961 setup ="%s %s" %(self.obd_uuid, srv.uuid))
964 class Mountpoint(Module):
965 def __init__(self,dom_node):
966 Module.__init__(self, 'MTPT', dom_node)
967 self.path = get_text(dom_node, 'path')
968 self.mds_uuid = get_first_ref(dom_node, 'mds')
969 self.lov_uuid = get_first_ref(dom_node, 'osc')
970 self.add_module('lustre/mdc', 'mdc')
971 self.add_module('lustre/llite', 'llite')
972 l = lookup(self.dom_node.parentNode, self.lov_uuid)
977 mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
979 self.info(self.path, self.mds_uuid,self.lov_uuid)
980 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
981 (self.lov_uuid, mdc_uuid, self.path)
982 run("mkdir", self.path)
985 panic("mount failed:", self.path)
988 self.info(self.path, self.mds_uuid,self.lov_uuid)
989 (rc, out) = run("umount", self.path)
991 log("umount failed, cleanup will most likely not work.")
992 l = lookup(self.dom_node.parentNode, self.lov_uuid)
994 cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
996 def load_module(self):
997 self.osc.load_module()
998 Module.load_module(self)
999 def cleanup_module(self):
1000 Module.cleanup_module(self)
1001 self.osc.cleanup_module()
1004 # ============================================================
1005 # XML processing and query
1006 # TODO: Change query funcs to use XPath, which is muc cleaner
1008 def get_device(obd):
1009 list = obd.getElementsByTagName('device')
1013 size = get_attr_int(dev, 'size', 0)
1014 return dev.firstChild.data, size
1017 # Get the text content from the first matching child
1018 # If there is no content (or it is all whitespace), return
1020 def get_text(dom_node, tag, default=""):
1021 list = dom_node.getElementsByTagName(tag)
1024 dom_node.normalize()
1025 if dom_node.firstChild:
1026 txt = string.strip(dom_node.firstChild.data)
1031 def get_text_int(dom_node, tag, default=0):
1032 list = dom_node.getElementsByTagName(tag)
1036 dom_node.normalize()
1037 if dom_node.firstChild:
1038 txt = string.strip(dom_node.firstChild.data)
1043 panic("text value is not integer:", txt)
1046 def get_attr(dom_node, attr, default=""):
1047 v = dom_node.getAttribute(attr)
1052 def get_attr_int(dom_node, attr, default=0):
1054 v = dom_node.getAttribute(attr)
1059 panic("attr value is not integer", v)
1062 def get_first_ref(dom_node, tag):
1063 """ Get the first uuidref of the type TAG. Used one only
1064 one is expected. Returns the uuid."""
1066 refname = '%s_ref' % tag
1067 list = dom_node.getElementsByTagName(refname)
1069 uuid = getRef(list[0])
1072 def get_all_refs(dom_node, tag):
1073 """ Get all the refs of type TAG. Returns list of uuids. """
1075 refname = '%s_ref' % tag
1076 list = dom_node.getElementsByTagName(refname)
1079 uuids.append(getRef(i))
1082 def get_ost_net(dom_node, uuid):
1083 ost = lookup(dom_node, uuid)
1084 uuid = get_first_ref(ost, 'network')
1087 return lookup(dom_node, uuid)
1089 def nid2server(dom_node, nid):
1090 netlist = dom_node.getElementsByTagName('network')
1091 for net_node in netlist:
1092 if get_text(net_node, 'server') == nid:
1093 return Network(net_node)
1096 def lookup(dom_node, uuid):
1097 for n in dom_node.childNodes:
1098 if n.nodeType == n.ELEMENT_NODE:
1099 if getUUID(n) == uuid:
1106 # Get name attribute of dom_node
1107 def getName(dom_node):
1108 return dom_node.getAttribute('name')
1110 def getRef(dom_node):
1111 return dom_node.getAttribute('uuidref')
1113 # Get name attribute of dom_node
1114 def getUUID(dom_node):
1115 return dom_node.getAttribute('uuid')
1117 # the tag name is the service type
1118 # fixme: this should do some checks to make sure the dom_node is a service
1119 def getServiceType(dom_node):
1120 return dom_node.nodeName
1123 # determine what "level" a particular node is at.
1124 # the order of iniitailization is based on level.
1125 def getServiceLevel(dom_node):
1126 type = getServiceType(dom_node)
1127 if type in ('network',):
1129 elif type in ('device', 'ldlm'):
1131 elif type in ('obd', 'mdd'):
1133 elif type in ('mds','ost'):
1135 elif type in ('mdc','osc'):
1137 elif type in ('lov', 'lovconfig'):
1139 elif type in ('mountpoint',):
1144 # return list of services in a profile. list is a list of tuples
1145 # [(level, dom_node),]
1146 def getServices(lustreNode, profileNode):
1148 for n in profileNode.childNodes:
1149 if n.nodeType == n.ELEMENT_NODE:
1150 servNode = lookup(lustreNode, getRef(n))
1153 panic('service not found: ' + getRef(n))
1154 level = getServiceLevel(servNode)
1155 list.append((level, servNode))
1159 def getByName(lustreNode, name, tag):
1160 ndList = lustreNode.getElementsByTagName(tag)
1162 if getName(nd) == name:
1167 ############################################################
1169 # FIXME: clean this mess up!
1172 def prepare_mdc(dom_node, mds_uuid):
1174 mds_node = lookup(dom_node, mds_uuid);
1176 panic("no mds:", mds_uuid)
1185 def cleanup_mdc(dom_node, mds_uuid):
1187 mds_node = lookup(dom_node, mds_uuid);
1189 panic("no mds:", mds_uuid)
1197 ############################################################
1198 # routing ("rooting")
1204 def init_node(dom_node):
1205 global local_node, router_flag
1206 netlist = dom_node.getElementsByTagName('network')
1207 for dom_net in netlist:
1208 type = get_attr(dom_net, 'type')
1209 gw = get_text(dom_net, 'server')
1210 local_node.append((type, gw))
1212 def node_needs_router():
1215 def get_routes(type, gw, dom_net):
1216 """ Return the routes as a list of tuples of the form:
1217 [(type, gw, lo, hi),]"""
1219 tbl = dom_net.getElementsByTagName('route_tbl')
1221 routes = t.getElementsByTagName('route')
1223 lo = get_attr(r, 'lo')
1224 hi = get_attr(r, 'hi', '')
1225 res.append((type, gw, lo, hi))
1229 def init_route_config(lustre):
1230 """ Scan the lustre config looking for routers. Build list of
1232 global routes, router_flag
1234 list = lustre.getElementsByTagName('node')
1236 if get_attr(node, 'router'):
1238 for (local_type, local_nid) in local_node:
1240 netlist = node.getElementsByTagName('network')
1241 for dom_net in netlist:
1242 if local_type == get_attr(dom_net, 'type'):
1243 gw = get_text(dom_net, 'server')
1247 for dom_net in netlist:
1248 if local_type != get_attr(dom_net, 'type'):
1249 for route in get_routes(local_type, gw, dom_net):
1250 routes.append(route)
1255 for iface in local_node:
1256 if net.net_type == iface[0]:
1260 def find_route(net):
1261 global local_node, routes
1262 frm_type = local_node[0][0]
1263 to_type = net.net_type
1265 debug ('looking for route to', to_type,to)
1274 ############################################################
1277 def startService(dom_node, module_flag):
1278 type = getServiceType(dom_node)
1279 debug('Service:', type, getName(dom_node), getUUID(dom_node))
1280 # there must be a more dynamic way of doing this...
1286 elif type == 'lovconfig':
1287 n = LOVConfig(dom_node)
1288 elif type == 'network':
1289 n = Network(dom_node)
1300 elif type == 'mountpoint':
1301 n = Mountpoint(dom_node)
1303 panic ("unknown service type:", type)
1308 if config.cleanup():
1313 if config.nosetup():
1315 if config.cleanup():
1321 # Prepare the system to run lustre using a particular profile
1322 # in a the configuration.
1323 # * load & the modules
1324 # * setup networking for the current node
1325 # * make sure partitions are in place and prepared
1326 # * initialize devices with lctl
1327 # Levels is important, and needs to be enforced.
1328 def startProfile(lustreNode, profileNode, module_flag):
1330 panic("profile:", profile, "not found.")
1331 services = getServices(lustreNode, profileNode)
1332 if config.cleanup():
1335 startService(s[1], module_flag)
1340 def doHost(lustreNode, hosts):
1344 dom_node = getByName(lustreNode, h, 'node')
1349 print 'No host entry found.'
1352 if not get_attr(dom_node, 'router'):
1354 init_route_config(lustreNode)
1359 # Two step process: (1) load modules, (2) setup lustre
1360 # if not cleaning, load modules first.
1361 module_flag = not config.cleanup()
1362 reflist = dom_node.getElementsByTagName('profile')
1363 for profile in reflist:
1364 startProfile(lustreNode, profile, module_flag)
1366 if not config.cleanup():
1367 sys_set_debug_path()
1368 script = config.gdb_script()
1369 run(lctl.lctl, ' modules >', script)
1371 # dump /tmp/ogdb and sleep/pause here
1372 log ("The GDB module script is in", script)
1375 module_flag = not module_flag
1376 for profile in reflist:
1377 startProfile(lustreNode, profile, module_flag)
1379 ############################################################
1380 # Command line processing
1382 def parse_cmdline(argv):
1384 long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
1385 "portals=", "makeldiff", "cleanup", "noexec",
1386 "help", "node=", "get=", "nomod", "nosetup",
1391 opts, args = getopt.getopt(argv, short_opts, long_opts)
1392 except getopt.error:
1397 if o in ("-h", "--help"):
1399 if o in ("-d","--cleanup"):
1401 if o in ("-v", "--verbose"):
1403 if o in ("-n", "--noexec"):
1406 if o == "--portals":
1410 if o == "--reformat":
1420 if o == "--nosetup":
1430 s = urllib.urlopen(url)
1436 def setupModulePath(cmd):
1437 base = os.path.dirname(cmd)
1438 if os.access(base+"/Makefile", os.R_OK):
1439 config.src_dir(base + "/../../")
1441 def sys_set_debug_path():
1442 debug("debug path: ", config.debug_path())
1446 fp = open('/proc/sys/portals/debug_path', 'w')
1447 fp.write(config.debug_path())
1452 #/proc/sys/net/core/rmem_max
1453 #/proc/sys/net/core/wmem_max
1454 def sys_set_netmem_max(path, max):
1455 debug("setting", path, "to at least", max)
1463 fp = open(path, 'w')
1464 fp.write('%d\n' %(max))
1468 def sys_make_devices():
1469 if not os.access('/dev/portals', os.R_OK):
1470 run('mknod /dev/portals c 10 240')
1471 if not os.access('/dev/obd', os.R_OK):
1472 run('mknod /dev/obd c 10 241')
1474 # Initialize or shutdown lustre according to a configuration file
1475 # * prepare the system for lustre
1476 # * configure devices with lctl
1477 # Shutdown does steps in reverse
1480 global TCP_ACCEPTOR, lctl, MAXTCPBUF
1481 host = socket.gethostname()
1483 args = parse_cmdline(sys.argv[1:])
1485 if not os.access(args[0], os.R_OK | os.W_OK):
1486 print 'File not found:', args[0]
1488 dom = xml.dom.minidom.parse(args[0])
1490 xmldata = fetch(config.url())
1491 dom = xml.dom.minidom.parseString(xmldata)
1497 node_list.append(config.node())
1500 node_list.append(host)
1501 node_list.append('localhost')
1502 debug("configuring for host: ", node_list)
1505 config._debug_path = config._debug_path + '-' + host
1506 config._gdb_script = config._gdb_script + '-' + host
1508 TCP_ACCEPTOR = find_prog('acceptor')
1509 if not TCP_ACCEPTOR:
1511 TCP_ACCEPTOR = 'acceptor'
1512 debug('! acceptor not found')
1514 panic('acceptor not found')
1516 lctl = LCTLInterface('lctl')
1518 setupModulePath(sys.argv[0])
1520 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
1521 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
1522 doHost(dom.documentElement, node_list)
1524 if __name__ == "__main__":
1527 except LconfError, e:
1529 except CommandError, e: