3 # Copyright (C) 2002 Cluster File Systems, Inc.
4 # Author: Robert Read <rread@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
29 import string, os, stat, popen2, socket, time
31 import xml.dom.minidom
36 DEFAULT_TCPBUF = 1048576
38 # Maximum number of devices to search for.
39 # (the /dev/loop* nodes need to be created beforehand)
40 MAX_LOOP_DEVICES = 256
44 print """usage: lconf config.xml
46 config.xml Lustre configuration in xml format.
47 --get <url> URL to fetch a config file
48 --node <nodename> Load config for <nodename>
49 -d | --cleanup Cleans up config. (Shutdown)
50 -v | --verbose Print system commands as they are run
51 -h | --help Print this help
52 --gdb Prints message after creating gdb module script
53 and sleeps for 5 seconds.
54 -n | --noexec Prints the commands and steps that will be run for a
55 config without executing them. This can used to check if a
56 config file is doing what it should be doing. (Implies -v)
57 --nomod Skip load/unload module step.
58 --nosetup Skip device setup/cleanup step.
59 --reformat Reformat all devices (without question)
62 --ldap server LDAP server with lustre config database
63 --makeldiff Translate xml source to LDIFF
64 This are perhaps not needed:
65 --lustre="src dir" Base directory of lustre sources. Used to search
67 --portals=src Portals source
71 # ============================================================
72 # Config parameters, encapsulated in a class
87 self._gdb_script = '/tmp/ogdb'
88 self._debug_path = '/tmp/lustre-log'
89 self._dump_file = None
92 def verbose(self, flag = None):
93 if flag: self._verbose = flag
96 def noexec(self, flag = None):
97 if flag: self._noexec = flag
100 def reformat(self, flag = None):
101 if flag: self._reformat = flag
102 return self._reformat
104 def cleanup(self, flag = None):
105 if flag: self._cleanup = flag
108 def gdb(self, flag = None):
109 if flag: self._gdb = flag
112 def nomod(self, flag = None):
113 if flag: self._nomod = flag
116 def nosetup(self, flag = None):
117 if flag: self._nosetup = flag
120 def node(self, val = None):
121 if val: self._node = val
124 def url(self, val = None):
125 if val: self._url = val
128 def gdb_script(self):
129 if os.path.isdir('/r'):
130 return '/r' + self._gdb_script
132 return self._gdb_script
134 def debug_path(self):
135 if os.path.isdir('/r'):
136 return '/r' + self._debug_path
138 return self._debug_path
140 def src_dir(self, val = None):
141 if val: self._src_dir = val
144 def dump_file(self, val = None):
145 if val: self._dump_file = val
146 return self._dump_file
150 # ============================================================
151 # debugging and error funcs
153 def fixme(msg = "this feature"):
154 raise LconfError, msg + ' not implmemented yet.'
157 msg = string.join(map(str,args))
158 if not config.noexec():
159 raise LconfError(msg)
164 msg = string.join(map(str,args))
169 print string.strip(s)
173 msg = string.join(map(str,args))
176 # ============================================================
177 # locally defined exceptions
178 class CommandError (exceptions.Exception):
179 def __init__(self, cmd_name, cmd_err, rc=None):
180 self.cmd_name = cmd_name
181 self.cmd_err = cmd_err
186 if type(self.cmd_err) == types.StringType:
188 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
190 print "! %s: %s" % (self.cmd_name, self.cmd_err)
191 elif type(self.cmd_err) == types.ListType:
193 print "! %s (error %d):" % (self.cmd_name, self.rc)
195 print "! %s:" % (self.cmd_name)
196 for s in self.cmd_err:
197 print "> %s" %(string.strip(s))
201 class LconfError (exceptions.Exception):
202 def __init__(self, args):
206 # ============================================================
207 # handle lctl interface
210 Manage communication with lctl
213 def __init__(self, cmd):
215 Initialize close by finding the lctl binary.
217 self.lctl = find_prog(cmd)
220 debug('! lctl not found')
223 raise CommandError('lctl', "unable to find lctl binary.")
228 the cmds are written to stdin of lctl
229 lctl doesn't return errors when run in script mode, so
231 should modify command line to accept multiple commands, or
232 create complex command line options
234 debug("+", self.lctl, cmds)
235 if config.noexec(): return (0, [])
236 p = popen2.Popen3(self.lctl, 1)
237 p.tochild.write(cmds + "\n")
239 out = p.fromchild.readlines()
240 err = p.childerr.readlines()
243 raise CommandError(self.lctl, err, ret)
247 def network(self, net, nid):
248 """ initialized network and add "self" """
249 # Idea: "mynid" could be used for all network types to add "self," and then
250 # this special case would be gone and the "self" hack would be hidden.
256 quit""" % (net, nid, nid)
265 # create a new connection
266 def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
274 quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, )
280 quit""" % (net, servuuid, nid, nid, port, )
284 # add a route to a range
285 def add_route(self, net, gw, lo, hi):
289 quit """ % (net, gw, lo, hi)
293 # add a route to a range
294 def del_route(self, net, gw, lo, hi):
301 # add a route to a host
302 def add_route_host(self, net, uuid, gw, tgt):
307 quit """ % (net, uuid, tgt, gw, tgt)
310 # add a route to a range
311 def del_route_host(self, net, uuid, gw, tgt):
316 quit """ % (net, uuid, tgt)
319 # disconnect one connection
320 def disconnect(self, net, nid, port, servuuid):
325 quit""" % (net, nid, servuuid)
329 def disconnectAll(self, net):
337 # create a new device with lctl
338 def newdev(self, attach, setup = ""):
343 quit""" % (attach, setup)
347 def cleanup(self, name, uuid):
356 def lovconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist):
360 lovconfig %s %d %d %d %s %s
361 quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist)
365 def dump(self, dump_file):
368 quit""" % (dump_file)
371 # ============================================================
372 # Various system-level functions
373 # (ideally moved to their own module)
375 # Run a command and return the output and status.
376 # stderr is sent to /dev/null, could use popen3 to
377 # save it if necessary
379 cmd = string.join(map(str,args))
381 if config.noexec(): return (0, [])
382 f = os.popen(cmd + ' 2>&1')
391 # Run a command in the background.
392 def run_daemon(*args):
393 cmd = string.join(map(str,args))
395 if config.noexec(): return 0
396 f = os.popen(cmd + ' 2>&1')
404 # Determine full path to use for an external command
405 # searches dirname(argv[0]) first, then PATH
407 syspath = string.split(os.environ['PATH'], ':')
408 cmdpath = os.path.dirname(sys.argv[0])
409 syspath.insert(0, cmdpath);
410 syspath.insert(0, os.path.join(cmdpath, '../../portals/linux/utils/'))
412 prog = os.path.join(d,cmd)
413 if os.access(prog, os.X_OK):
417 # Recursively look for file starting at base dir
418 def do_find_file(base, mod):
419 fullname = os.path.join(base, mod)
420 if os.access(fullname, os.R_OK):
422 for d in os.listdir(base):
423 dir = os.path.join(base,d)
424 if os.path.isdir(dir):
425 module = do_find_file(dir, mod)
429 def find_module(src_dir, dev_dir, modname):
430 mod = '%s.o' % (modname)
431 module = src_dir +'/'+ dev_dir +'/'+ mod
433 if os.access(module, os.R_OK):
439 # is the path a block device?
446 return stat.S_ISBLK(s[stat.ST_MODE])
448 # build fs according to type
450 def mkfs(fstype, dev):
451 if(fstype in ('ext3', 'extN')):
452 mkfs = 'mkfs.ext2 -j -b 4096'
454 print 'unsupported fs type: ', fstype
455 if not is_block(dev):
459 (ret, out) = run (mkfs, force, dev)
461 panic("Unable to build fs:", dev)
462 # enable hash tree indexing on fs
464 htree = 'echo "feature FEATURE_C5" | debugfs -w'
465 (ret, out) = run (htree, dev)
467 panic("Unable to enable htree:", dev)
469 # some systems use /dev/loopN, some /dev/loop/N
473 if not os.access(loop + str(0), os.R_OK):
475 if not os.access(loop + str(0), os.R_OK):
476 panic ("can't access loop devices")
479 # find loop device assigned to thefile
482 for n in xrange(0, MAX_LOOP_DEVICES):
484 if os.access(dev, os.R_OK):
485 (stat, out) = run('losetup', dev)
486 if (out and stat == 0):
487 m = re.search(r'\((.*)\)', out[0])
488 if m and file == m.group(1):
494 # create file if necessary and assign the first free loop device
495 def init_loop(file, size, fstype):
496 dev = find_loop(file)
498 print 'WARNING file:', file, 'already mapped to', dev
500 if config.reformat() or not os.access(file, os.R_OK | os.W_OK):
501 run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file))
503 # find next free loop
504 for n in xrange(0, MAX_LOOP_DEVICES):
506 if os.access(dev, os.R_OK):
507 (stat, out) = run('losetup', dev)
509 run('losetup', dev, file)
512 print "out of loop devices"
514 print "out of loop devices"
517 # undo loop assignment
518 def clean_loop(file):
519 dev = find_loop(file)
521 ret, out = run('losetup -d', dev)
523 log('unable to clean loop device:', dev, 'for file:', file)
526 # determine if dev is formatted as a <fstype> filesystem
527 def need_format(fstype, dev):
528 # FIXME don't know how to implement this
531 # initialize a block device if needed
532 def block_dev(dev, size, fstype, format):
533 if config.noexec(): return dev
534 if not is_block(dev):
535 dev = init_loop(dev, size, fstype)
536 if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
540 # panic("device:", dev,
541 # "not prepared, and autoformat is not set.\n",
542 # "Rerun with --reformat option to format ALL filesystems")
547 """lookup IP address for an interface"""
548 rc, out = run("/sbin/ifconfig", iface)
551 addr = string.split(out[1])[1]
552 ip = string.split(addr, ':')[1]
555 def get_local_address(net_type, wildcard):
556 """Return the local address for the network type."""
558 if net_type == 'tcp':
560 iface, star = string.split(wildcard, ':')
561 local = if2addr(iface)
563 panic ("unable to determine ip for:", wildcard)
565 host = socket.gethostname()
566 local = socket.gethostbyname(host)
567 elif net_type == 'elan':
568 # awk '/NodeId/ { print $2 }' '/proc/elan/device0/position'
570 fp = open('/proc/elan/device0/position', 'r')
571 lines = fp.readlines()
580 elif net_type == 'gm':
581 fixme("automatic local address for GM")
586 # ============================================================
587 # Classes to prepare and cleanup the various objects
590 """ Base class for the rest of the modules. The default cleanup method is
591 defined here, as well as some utilitiy funcs.
593 def __init__(self, module_name, dom_node):
594 self.dom_node = dom_node
595 self.module_name = module_name
596 self.name = get_attr(dom_node, 'name')
597 self.uuid = get_attr(dom_node, 'uuid')
598 self.kmodule_list = []
602 def info(self, *args):
603 msg = string.join(map(str,args))
604 print self.module_name + ":", self.name, self.uuid, msg
607 def lookup_server(self, srv_uuid):
608 """ Lookup a server's network information """
609 net = get_ost_net(self.dom_node.parentNode, srv_uuid)
611 panic ("Unable to find a server for:", srv_uuid)
612 self._server = Network(net)
614 def get_server(self):
618 """ default cleanup, used for most modules """
620 srv = self.get_server()
621 if srv and local_net(srv):
623 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
624 except CommandError, e:
625 log(self.module_name, "disconnect failed: ", self.name)
628 lctl.cleanup(self.name, self.uuid)
629 except CommandError, e:
630 log(self.module_name, "cleanup failed: ", self.name)
633 def add_module(self, dev_dir, modname):
634 """Append a module to list of modules to load."""
635 self.kmodule_list.append((dev_dir, modname))
637 def mod_loaded(self, modname):
638 """Check if a module is already loaded. Look in /proc/modules for it."""
639 fp = open('/proc/modules')
640 lines = fp.readlines()
642 # please forgive my tired fingers for this one
643 ret = filter(lambda word, mod=modname: word == mod,
644 map(lambda line: string.split(line)[0], lines))
647 def load_module(self):
648 """Load all the modules in the list in the order they appear."""
649 for dev_dir, mod in self.kmodule_list:
650 # (rc, out) = run ('/sbin/lsmod | grep -s', mod)
651 if self.mod_loaded(mod) and not config.noexec():
653 log ('loading module:', mod)
655 module = find_module(config.src_dir(),dev_dir, mod)
657 panic('module not found:', mod)
658 (rc, out) = run('/sbin/insmod', module)
660 raise CommandError('insmod', out, rc)
662 (rc, out) = run('/sbin/modprobe', mod)
664 raise CommandError('modprobe', out, rc)
666 def cleanup_module(self):
667 """Unload the modules in the list in reverse order."""
668 rev = self.kmodule_list
670 for dev_dir, mod in rev:
671 if not self.mod_loaded(mod):
674 if mod == 'portals' and config.dump_file():
675 lctl.dump(config.dump_file())
676 log('unloading module:', mod)
679 (rc, out) = run('/sbin/rmmod', mod)
681 log('! unable to unload module:', mod)
685 class Network(Module):
686 def __init__(self,dom_node):
687 Module.__init__(self, 'NETWORK', dom_node)
688 self.net_type = get_attr(dom_node,'type')
689 self.nid = get_text(dom_node, 'server', '*')
690 self.port = get_text_int(dom_node, 'port', 0)
691 self.send_mem = get_text_int(dom_node, 'send_mem', DEFAULT_TCPBUF)
692 self.recv_mem = get_text_int(dom_node, 'recv_mem', DEFAULT_TCPBUF)
694 self.nid = get_local_address(self.net_type, self.nid)
696 panic("unable to set nid for", self.net_type, self.nid)
697 debug("nid:", self.nid)
699 self.add_module('portals/linux/oslib/', 'portals')
700 if node_needs_router():
701 self.add_module('portals/linux/router', 'kptlrouter')
702 if self.net_type == 'tcp':
703 self.add_module('portals/linux/socknal', 'ksocknal')
704 if self.net_type == 'elan':
705 self.add_module('portals/linux/rqswnal', 'kqswnal')
706 if self.net_type == 'gm':
707 self.add_module('portals/linux/gmnal', 'kgmnal')
708 self.add_module('lustre/obdclass', 'obdclass')
709 self.add_module('lustre/ptlrpc', 'ptlrpc')
712 self.info(self.net_type, self.nid, self.port)
713 if self.net_type == 'tcp':
714 ret, out = run(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, self.port)
716 raise CommandError(TCP_ACCEPTOR, out, ret)
717 ret = self.dom_node.getElementsByTagName('route_tbl')
719 for r in a.getElementsByTagName('route'):
720 net_type = get_attr(r, 'type')
721 gw = get_attr(r, 'gw')
722 lo = get_attr(r, 'lo')
723 hi = get_attr(r,'hi', '')
724 lctl.add_route(net_type, gw, lo, hi)
725 if self.net_type == 'tcp' and hi == '':
726 srv = nid2server(self.dom_node.parentNode.parentNode, lo)
728 panic("no server for nid", lo)
730 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
733 lctl.network(self.net_type, self.nid)
734 lctl.newdev(attach = "ptlrpc RPCDEV")
737 self.info(self.net_type, self.nid, self.port)
738 ret = self.dom_node.getElementsByTagName('route_tbl')
740 for r in a.getElementsByTagName('route'):
741 lo = get_attr(r, 'lo')
742 hi = get_attr(r,'hi', '')
743 if self.net_type == 'tcp' and hi == '':
744 srv = nid2server(self.dom_node.parentNode.parentNode, lo)
746 panic("no server for nid", lo)
749 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
750 except CommandError, e:
751 print "disconnect failed: ", self.name
754 lctl.del_route(self.net_type, self.nid, lo, hi)
755 except CommandError, e:
756 print "del_route failed: ", self.name
760 lctl.cleanup("RPCDEV", "")
761 except CommandError, e:
762 print "cleanup failed: ", self.name
765 lctl.disconnectAll(self.net_type)
766 except CommandError, e:
767 print "disconnectAll failed: ", self.name
769 if self.net_type == 'tcp':
770 # yikes, this ugly! need to save pid in /var/something
771 run("killall acceptor")
774 def __init__(self,dom_node):
775 Module.__init__(self, 'LDLM', dom_node)
776 self.add_module('lustre/ldlm', 'ldlm')
779 lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
783 def __init__(self,dom_node):
784 Module.__init__(self, 'LOV', dom_node)
785 self.mds_uuid = get_first_ref(dom_node, 'mds')
786 mds= lookup(dom_node.parentNode, self.mds_uuid)
787 self.mds_name = getName(mds)
788 devs = dom_node.getElementsByTagName('devices')
791 self.stripe_sz = get_attr_int(dev_node, 'stripesize', 65536)
792 self.stripe_off = get_attr_int(dev_node, 'stripeoffset', 0)
793 self.pattern = get_attr_int(dev_node, 'pattern', 0)
794 self.devlist = get_all_refs(dev_node, 'osc')
795 self.stripe_cnt = len(self.devlist)
796 self.add_module('lustre/mdc', 'mdc')
797 self.add_module('lustre/lov', 'lov')
800 for osc_uuid in self.devlist:
801 osc = lookup(self.dom_node.parentNode, osc_uuid)
806 panic('osc not found:', osc_uuid)
807 mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
808 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern,
809 self.devlist, self.mds_name)
810 lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
811 setup ="%s" % (mdc_uuid))
814 for osc_uuid in self.devlist:
815 osc = lookup(self.dom_node.parentNode, osc_uuid)
820 panic('osc not found:', osc_uuid)
822 cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
823 def load_module(self):
824 for osc_uuid in self.devlist:
825 osc = lookup(self.dom_node.parentNode, osc_uuid)
831 panic('osc not found:', osc_uuid)
832 Module.load_module(self)
833 def cleanup_module(self):
834 Module.cleanup_module(self)
835 for osc_uuid in self.devlist:
836 osc = lookup(self.dom_node.parentNode, osc_uuid)
842 panic('osc not found:', osc_uuid)
844 class LOVConfig(Module):
845 def __init__(self,dom_node):
846 Module.__init__(self, 'LOVConfig', dom_node)
847 self.lov_uuid = get_first_ref(dom_node, 'lov')
848 l = lookup(dom_node.parentNode, self.lov_uuid)
853 self.info(lov.mds_uuid, lov.stripe_cnt, lov.stripe_sz, lov.stripe_off, lov.pattern,
854 lov.devlist, lov.mds_name)
855 lctl.lovconfig(lov.uuid, lov.mds_name, lov.stripe_cnt,
856 lov.stripe_sz, lov.stripe_off, lov.pattern,
857 string.join(lov.devlist))
865 def __init__(self,dom_node):
866 Module.__init__(self, 'MDS', dom_node)
867 self.devname, self.size = get_device(dom_node)
868 self.fstype = get_text(dom_node, 'fstype')
869 self.format = get_text(dom_node, 'autoformat', "no")
870 if self.fstype == 'extN':
871 self.add_module('lustre/extN', 'extN')
872 self.add_module('lustre/mds', 'mds')
873 self.add_module('lustre/mds', 'mds_%s' % (self.fstype))
876 self.info(self.devname, self.fstype, self.format)
877 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
878 lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
879 setup ="%s %s" %(blkdev, self.fstype))
882 clean_loop(self.devname)
884 # Very unusual case, as there is no MDC element in the XML anymore
885 # Builds itself from an MDS node
887 def __init__(self,dom_node):
888 self.mds = MDS(dom_node)
889 self.dom_node = dom_node
890 self.module_name = 'MDC'
891 self.kmodule_list = []
895 host = socket.gethostname()
896 self.name = 'MDC_'+host
897 self.uuid = self.name+'_UUID'
899 self.lookup_server(self.mds.uuid)
900 self.add_module('lustre/mdc', 'mdc')
903 self.info(self.mds.uuid)
904 srv = self.get_server()
905 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
906 lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
907 setup ="%s %s" %(self.mds.uuid, srv.uuid))
910 def __init__(self, dom_node):
911 Module.__init__(self, 'OBD', dom_node)
912 self.obdtype = get_attr(dom_node, 'type')
913 self.devname, self.size = get_device(dom_node)
914 self.fstype = get_text(dom_node, 'fstype')
915 self.format = get_text(dom_node, 'autoformat', 'yes')
916 if self.fstype == 'extN':
917 self.add_module('lustre/extN', 'extN')
918 self.add_module('lustre/' + self.obdtype, self.obdtype)
920 # need to check /proc/mounts and /etc/mtab before
921 # formatting anything.
922 # FIXME: check if device is already formatted.
924 self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
925 if self.obdtype == 'obdecho':
928 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
929 lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
930 setup ="%s %s" %(blkdev, self.fstype))
933 if not self.obdtype == 'obdecho':
934 clean_loop(self.devname)
937 def __init__(self,dom_node):
938 Module.__init__(self, 'OST', dom_node)
939 self.obd_uuid = get_first_ref(dom_node, 'obd')
940 self.add_module('lustre/ost', 'ost')
943 self.info(self.obd_uuid)
944 lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
945 setup ="%s" % (self.obd_uuid))
948 # virtual interface for OSC and LOV
950 def __init__(self,dom_node):
951 Module.__init__(self, 'VOSC', dom_node)
952 if dom_node.nodeName == 'lov':
953 self.osc = LOV(dom_node)
955 self.osc = OSC(dom_node)
960 def load_module(self):
961 self.osc.load_module()
962 def cleanup_module(self):
963 self.osc.cleanup_module()
967 def __init__(self,dom_node):
968 Module.__init__(self, 'OSC', dom_node)
969 self.obd_uuid = get_first_ref(dom_node, 'obd')
970 self.ost_uuid = get_first_ref(dom_node, 'ost')
971 self.lookup_server(self.ost_uuid)
972 self.add_module('lustre/osc', 'osc')
975 self.info(self.obd_uuid, self.ost_uuid)
976 srv = self.get_server()
978 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
982 lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
984 panic ("no route to", srv.nid)
986 lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
987 setup ="%s %s" %(self.obd_uuid, srv.uuid))
990 srv = self.get_server()
994 self.info(self.obd_uuid, self.ost_uuid)
997 lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
1001 class Mountpoint(Module):
1002 def __init__(self,dom_node):
1003 Module.__init__(self, 'MTPT', dom_node)
1004 self.path = get_text(dom_node, 'path')
1005 self.mds_uuid = get_first_ref(dom_node, 'mds')
1006 self.lov_uuid = get_first_ref(dom_node, 'osc')
1007 self.add_module('lustre/mdc', 'mdc')
1008 self.add_module('lustre/llite', 'llite')
1009 l = lookup(self.dom_node.parentNode, self.lov_uuid)
1014 mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
1016 self.info(self.path, self.mds_uuid,self.lov_uuid)
1017 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
1018 (self.lov_uuid, mdc_uuid, self.path)
1019 run("mkdir", self.path)
1022 panic("mount failed:", self.path)
1025 self.info(self.path, self.mds_uuid,self.lov_uuid)
1026 (rc, out) = run("umount", self.path)
1028 log("umount failed, cleanup will most likely not work.")
1029 l = lookup(self.dom_node.parentNode, self.lov_uuid)
1031 cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
1033 def load_module(self):
1034 self.osc.load_module()
1035 Module.load_module(self)
1036 def cleanup_module(self):
1037 Module.cleanup_module(self)
1038 self.osc.cleanup_module()
1041 # ============================================================
1042 # XML processing and query
1043 # TODO: Change query funcs to use XPath, which is muc cleaner
1045 def get_device(obd):
1046 list = obd.getElementsByTagName('device')
1050 size = get_attr_int(dev, 'size', 0)
1051 return dev.firstChild.data, size
1054 # Get the text content from the first matching child
1055 # If there is no content (or it is all whitespace), return
1057 def get_text(dom_node, tag, default=""):
1058 list = dom_node.getElementsByTagName(tag)
1061 dom_node.normalize()
1062 if dom_node.firstChild:
1063 txt = string.strip(dom_node.firstChild.data)
1068 def get_text_int(dom_node, tag, default=0):
1069 list = dom_node.getElementsByTagName(tag)
1073 dom_node.normalize()
1074 if dom_node.firstChild:
1075 txt = string.strip(dom_node.firstChild.data)
1080 panic("text value is not integer:", txt)
1083 def get_attr(dom_node, attr, default=""):
1084 v = dom_node.getAttribute(attr)
1089 def get_attr_int(dom_node, attr, default=0):
1091 v = dom_node.getAttribute(attr)
1096 panic("attr value is not integer", v)
1099 def get_first_ref(dom_node, tag):
1100 """ Get the first uuidref of the type TAG. Used one only
1101 one is expected. Returns the uuid."""
1103 refname = '%s_ref' % tag
1104 list = dom_node.getElementsByTagName(refname)
1106 uuid = getRef(list[0])
1109 def get_all_refs(dom_node, tag):
1110 """ Get all the refs of type TAG. Returns list of uuids. """
1112 refname = '%s_ref' % tag
1113 list = dom_node.getElementsByTagName(refname)
1116 uuids.append(getRef(i))
1119 def get_ost_net(dom_node, uuid):
1120 ost = lookup(dom_node, uuid)
1121 uuid = get_first_ref(ost, 'network')
1124 return lookup(dom_node, uuid)
1126 def nid2server(dom_node, nid):
1127 netlist = dom_node.getElementsByTagName('network')
1128 for net_node in netlist:
1129 if get_text(net_node, 'server') == nid:
1130 return Network(net_node)
1133 def lookup(dom_node, uuid):
1134 for n in dom_node.childNodes:
1135 if n.nodeType == n.ELEMENT_NODE:
1136 if getUUID(n) == uuid:
1143 # Get name attribute of dom_node
1144 def getName(dom_node):
1145 return dom_node.getAttribute('name')
1147 def getRef(dom_node):
1148 return dom_node.getAttribute('uuidref')
1150 # Get name attribute of dom_node
1151 def getUUID(dom_node):
1152 return dom_node.getAttribute('uuid')
1154 # the tag name is the service type
1155 # fixme: this should do some checks to make sure the dom_node is a service
1156 def getServiceType(dom_node):
1157 return dom_node.nodeName
1160 # determine what "level" a particular node is at.
1161 # the order of iniitailization is based on level.
1162 def getServiceLevel(dom_node):
1163 type = getServiceType(dom_node)
1164 if type in ('network',):
1166 elif type in ('device', 'ldlm'):
1168 elif type in ('obd', 'mdd'):
1170 elif type in ('mds','ost'):
1172 elif type in ('mdc','osc'):
1174 elif type in ('lov', 'lovconfig'):
1176 elif type in ('mountpoint',):
1181 # return list of services in a profile. list is a list of tuples
1182 # [(level, dom_node),]
1183 def getServices(lustreNode, profileNode):
1185 for n in profileNode.childNodes:
1186 if n.nodeType == n.ELEMENT_NODE:
1187 servNode = lookup(lustreNode, getRef(n))
1190 panic('service not found: ' + getRef(n))
1191 level = getServiceLevel(servNode)
1192 list.append((level, servNode))
1196 def getByName(lustreNode, name, tag):
1197 ndList = lustreNode.getElementsByTagName(tag)
1199 if getName(nd) == name:
1204 ############################################################
1206 # FIXME: clean this mess up!
1209 def prepare_mdc(dom_node, mds_uuid):
1211 mds_node = lookup(dom_node, mds_uuid);
1213 panic("no mds:", mds_uuid)
1222 def cleanup_mdc(dom_node, mds_uuid):
1224 mds_node = lookup(dom_node, mds_uuid);
1226 panic("no mds:", mds_uuid)
1234 ############################################################
1235 # routing ("rooting")
1241 def init_node(dom_node):
1242 global local_node, router_flag
1243 netlist = dom_node.getElementsByTagName('network')
1244 for dom_net in netlist:
1245 type = get_attr(dom_net, 'type')
1246 gw = get_text(dom_net, 'server')
1247 local_node.append((type, gw))
1249 def node_needs_router():
1252 def get_routes(type, gw, dom_net):
1253 """ Return the routes as a list of tuples of the form:
1254 [(type, gw, lo, hi),]"""
1256 tbl = dom_net.getElementsByTagName('route_tbl')
1258 routes = t.getElementsByTagName('route')
1260 lo = get_attr(r, 'lo')
1261 hi = get_attr(r, 'hi', '')
1262 res.append((type, gw, lo, hi))
1266 def init_route_config(lustre):
1267 """ Scan the lustre config looking for routers. Build list of
1269 global routes, router_flag
1271 list = lustre.getElementsByTagName('node')
1273 if get_attr(node, 'router'):
1275 for (local_type, local_nid) in local_node:
1277 netlist = node.getElementsByTagName('network')
1278 for dom_net in netlist:
1279 if local_type == get_attr(dom_net, 'type'):
1280 gw = get_text(dom_net, 'server')
1284 for dom_net in netlist:
1285 if local_type != get_attr(dom_net, 'type'):
1286 for route in get_routes(local_type, gw, dom_net):
1287 routes.append(route)
1292 for iface in local_node:
1293 if net.net_type == iface[0]:
1297 def find_route(net):
1298 global local_node, routes
1299 frm_type = local_node[0][0]
1300 to_type = net.net_type
1302 debug ('looking for route to', to_type,to)
1311 ############################################################
1314 def startService(dom_node, module_flag):
1315 type = getServiceType(dom_node)
1316 debug('Service:', type, getName(dom_node), getUUID(dom_node))
1317 # there must be a more dynamic way of doing this...
1323 elif type == 'lovconfig':
1324 n = LOVConfig(dom_node)
1325 elif type == 'network':
1326 n = Network(dom_node)
1337 elif type == 'mountpoint':
1338 n = Mountpoint(dom_node)
1340 panic ("unknown service type:", type)
1345 if config.cleanup():
1350 if config.nosetup():
1352 if config.cleanup():
1358 # Prepare the system to run lustre using a particular profile
1359 # in a the configuration.
1360 # * load & the modules
1361 # * setup networking for the current node
1362 # * make sure partitions are in place and prepared
1363 # * initialize devices with lctl
1364 # Levels is important, and needs to be enforced.
1365 def startProfile(lustreNode, profileNode, module_flag):
1367 panic("profile:", profile, "not found.")
1368 services = getServices(lustreNode, profileNode)
1369 if config.cleanup():
1372 startService(s[1], module_flag)
1377 def doHost(lustreNode, hosts):
1381 dom_node = getByName(lustreNode, h, 'node')
1386 print 'No host entry found.'
1389 if not get_attr(dom_node, 'router'):
1391 init_route_config(lustreNode)
1396 # Two step process: (1) load modules, (2) setup lustre
1397 # if not cleaning, load modules first.
1398 module_flag = not config.cleanup()
1399 reflist = dom_node.getElementsByTagName('profile')
1400 for profile in reflist:
1401 startProfile(lustreNode, profile, module_flag)
1403 if not config.cleanup():
1404 sys_set_debug_path()
1405 script = config.gdb_script()
1406 run(lctl.lctl, ' modules >', script)
1408 # dump /tmp/ogdb and sleep/pause here
1409 log ("The GDB module script is in", script)
1412 module_flag = not module_flag
1413 for profile in reflist:
1414 startProfile(lustreNode, profile, module_flag)
1416 ############################################################
1417 # Command line processing
1419 def parse_cmdline(argv):
1421 long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
1422 "portals=", "makeldiff", "cleanup", "noexec",
1423 "help", "node=", "get=", "nomod", "nosetup",
1428 opts, args = getopt.getopt(argv, short_opts, long_opts)
1429 except getopt.error:
1434 if o in ("-h", "--help"):
1436 if o in ("-d","--cleanup"):
1438 if o in ("-v", "--verbose"):
1440 if o in ("-n", "--noexec"):
1443 if o == "--portals":
1447 if o == "--reformat":
1457 if o == "--nosetup":
1467 s = urllib.urlopen(url)
1473 def setupModulePath(cmd):
1474 base = os.path.dirname(cmd)
1475 if os.access(base+"/Makefile", os.R_OK):
1476 config.src_dir(base + "/../../")
1478 def sys_set_debug_path():
1479 debug("debug path: ", config.debug_path())
1483 fp = open('/proc/sys/portals/debug_path', 'w')
1484 fp.write(config.debug_path())
1489 #/proc/sys/net/core/rmem_max
1490 #/proc/sys/net/core/wmem_max
1491 def sys_set_netmem_max(path, max):
1492 debug("setting", path, "to at least", max)
1500 fp = open(path, 'w')
1501 fp.write('%d\n' %(max))
1505 def sys_make_devices():
1506 if not os.access('/dev/portals', os.R_OK):
1507 run('mknod /dev/portals c 10 240')
1508 if not os.access('/dev/obd', os.R_OK):
1509 run('mknod /dev/obd c 10 241')
1511 # Initialize or shutdown lustre according to a configuration file
1512 # * prepare the system for lustre
1513 # * configure devices with lctl
1514 # Shutdown does steps in reverse
1517 global TCP_ACCEPTOR, lctl, MAXTCPBUF
1518 host = socket.gethostname()
1520 args = parse_cmdline(sys.argv[1:])
1522 if not os.access(args[0], os.R_OK | os.W_OK):
1523 print 'File not found:', args[0]
1525 dom = xml.dom.minidom.parse(args[0])
1527 xmldata = fetch(config.url())
1528 dom = xml.dom.minidom.parseString(xmldata)
1534 node_list.append(config.node())
1537 node_list.append(host)
1538 node_list.append('localhost')
1539 debug("configuring for host: ", node_list)
1542 config._debug_path = config._debug_path + '-' + host
1543 config._gdb_script = config._gdb_script + '-' + host
1545 TCP_ACCEPTOR = find_prog('acceptor')
1546 if not TCP_ACCEPTOR:
1548 TCP_ACCEPTOR = 'acceptor'
1549 debug('! acceptor not found')
1551 panic('acceptor not found')
1553 lctl = LCTLInterface('lctl')
1555 setupModulePath(sys.argv[0])
1557 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
1558 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
1559 doHost(dom.documentElement, node_list)
1561 if __name__ == "__main__":
1564 except LconfError, e:
1566 except CommandError, e: