3 # Copyright (C) 2002 Cluster File Systems, Inc.
4 # Author: Robert Read <rread@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
29 import string, os, stat, popen2, socket, time
31 import xml.dom.minidom
36 DEFAULT_TCPBUF = 1048576
38 # Maximum number of devices to search for.
39 # (the /dev/loop* nodes need to be created beforehand)
40 MAX_LOOP_DEVICES = 256
44 print """usage: lconf config.xml
46 config.xml Lustre configuration in xml format.
47 --get <url> URL to fetch a config file
48 --node <nodename> Load config for <nodename>
49 -d | --cleanup Cleans up config. (Shutdown)
50 -v | --verbose Print system commands as they are run
51 -h | --help Print this help
52 --gdb Prints message after creating gdb module script
53 and sleeps for 5 seconds.
54 -n | --noexec Prints the commands and steps that will be run for a
55 config without executing them. This can used to check if a
56 config file is doing what it should be doing. (Implies -v)
57 --nomod Skip load/unload module step.
58 --nosetup Skip device setup/cleanup step.
59 --reformat Reformat all devices (without question)
62 --ldap server LDAP server with lustre config database
63 --makeldiff Translate xml source to LDIFF
64 This are perhaps not needed:
65 --lustre="src dir" Base directory of lustre sources. Used to search
67 --portals=src Portals source
71 # ============================================================
72 # Config parameters, encapsulated in a class
87 self._gdb_script = '/tmp/ogdb'
88 self._debug_path = '/tmp/lustre-log'
89 self._dump_file = None
92 def verbose(self, flag = None):
93 if flag: self._verbose = flag
96 def noexec(self, flag = None):
97 if flag: self._noexec = flag
100 def reformat(self, flag = None):
101 if flag: self._reformat = flag
102 return self._reformat
104 def cleanup(self, flag = None):
105 if flag: self._cleanup = flag
108 def gdb(self, flag = None):
109 if flag: self._gdb = flag
112 def nomod(self, flag = None):
113 if flag: self._nomod = flag
116 def nosetup(self, flag = None):
117 if flag: self._nosetup = flag
120 def node(self, val = None):
121 if val: self._node = val
124 def url(self, val = None):
125 if val: self._url = val
128 def gdb_script(self):
129 if os.path.isdir('/r'):
130 return '/r' + self._gdb_script
132 return self._gdb_script
134 def debug_path(self):
135 if os.path.isdir('/r'):
136 return '/r' + self._debug_path
138 return self._debug_path
140 def src_dir(self, val = None):
141 if val: self._src_dir = val
144 def dump_file(self, val = None):
145 if val: self._dump_file = val
146 return self._dump_file
150 # ============================================================
151 # debugging and error funcs
153 def fixme(msg = "this feature"):
154 raise LconfError, msg + ' not implmemented yet.'
157 msg = string.join(map(str,args))
158 if not config.noexec():
159 raise LconfError(msg)
164 msg = string.join(map(str,args))
169 print string.strip(s)
173 msg = string.join(map(str,args))
176 # ============================================================
177 # locally defined exceptions
178 class CommandError (exceptions.Exception):
179 def __init__(self, cmd_name, cmd_err, rc=None):
180 self.cmd_name = cmd_name
181 self.cmd_err = cmd_err
186 if type(self.cmd_err) == types.StringType:
188 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
190 print "! %s: %s" % (self.cmd_name, self.cmd_err)
191 elif type(self.cmd_err) == types.ListType:
193 print "! %s (error %d):" % (self.cmd_name, self.rc)
195 print "! %s:" % (self.cmd_name)
196 for s in self.cmd_err:
197 print "> %s" %(string.strip(s))
201 class LconfError (exceptions.Exception):
202 def __init__(self, args):
206 # ============================================================
207 # handle lctl interface
210 Manage communication with lctl
213 def __init__(self, cmd):
215 Initialize close by finding the lctl binary.
217 self.lctl = find_prog(cmd)
220 debug('! lctl not found')
223 raise CommandError('lctl', "unable to find lctl binary.")
228 the cmds are written to stdin of lctl
229 lctl doesn't return errors when run in script mode, so
231 should modify command line to accept multiple commands, or
232 create complex command line options
234 debug("+", self.lctl, cmds)
235 if config.noexec(): return (0, [])
236 p = popen2.Popen3(self.lctl, 1)
237 p.tochild.write(cmds + "\n")
239 out = p.fromchild.readlines()
240 err = p.childerr.readlines()
243 raise CommandError(self.lctl, err, ret)
247 def network(self, net, nid):
248 """ initialized network and add "self" """
249 # Idea: "mynid" could be used for all network types to add "self," and then
250 # this special case would be gone and the "self" hack would be hidden.
256 quit""" % (net, nid, nid)
265 # create a new connection
266 def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
274 quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, )
280 quit""" % (net, servuuid, nid, nid, port, )
284 # add a route to a range
285 def add_route(self, net, gw, lo, hi):
289 quit """ % (net, gw, lo, hi)
293 # add a route to a range
294 def del_route(self, net, gw, lo, hi):
301 # add a route to a host
302 def add_route_host(self, net, uuid, gw, tgt):
307 quit """ % (net, uuid, tgt, gw, tgt)
310 # add a route to a range
311 def del_route_host(self, net, uuid, gw, tgt):
316 quit """ % (net, uuid, tgt)
319 # disconnect one connection
320 def disconnect(self, net, nid, port, servuuid):
325 quit""" % (net, nid, servuuid)
329 def disconnectAll(self, net):
337 # create a new device with lctl
338 def newdev(self, attach, setup = ""):
343 quit""" % (attach, setup)
347 def cleanup(self, name, uuid):
356 def lovconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist):
360 lovconfig %s %d %d %d %s %s
361 quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist)
365 def dump(self, dump_file):
368 quit""" % (dump_file)
371 # ============================================================
372 # Various system-level functions
373 # (ideally moved to their own module)
375 # Run a command and return the output and status.
376 # stderr is sent to /dev/null, could use popen3 to
377 # save it if necessary
379 cmd = string.join(map(str,args))
381 if config.noexec(): return (0, [])
382 f = os.popen(cmd + ' 2>&1')
391 # Run a command in the background.
392 def run_daemon(*args):
393 cmd = string.join(map(str,args))
395 if config.noexec(): return 0
396 f = os.popen(cmd + ' 2>&1')
404 # Determine full path to use for an external command
405 # searches dirname(argv[0]) first, then PATH
407 syspath = string.split(os.environ['PATH'], ':')
408 cmdpath = os.path.dirname(sys.argv[0])
409 syspath.insert(0, cmdpath);
410 syspath.insert(0, os.path.join(cmdpath, '../../portals/linux/utils/'))
412 prog = os.path.join(d,cmd)
413 if os.access(prog, os.X_OK):
417 # Recursively look for file starting at base dir
418 def do_find_file(base, mod):
419 fullname = os.path.join(base, mod)
420 if os.access(fullname, os.R_OK):
422 for d in os.listdir(base):
423 dir = os.path.join(base,d)
424 if os.path.isdir(dir):
425 module = do_find_file(dir, mod)
429 def find_module(src_dir, dev_dir, modname):
430 mod = '%s.o' % (modname)
431 module = src_dir +'/'+ dev_dir +'/'+ mod
433 if os.access(module, os.R_OK):
439 # is the path a block device?
446 return stat.S_ISBLK(s[stat.ST_MODE])
448 # build fs according to type
450 def mkfs(fstype, dev):
451 if(fstype in ('ext3', 'extN')):
452 mkfs = 'mkfs.ext2 -j -b 4096'
454 print 'unsupported fs type: ', fstype
455 if not is_block(dev):
459 (ret, out) = run (mkfs, force, dev)
461 panic("Unable to build fs:", dev)
462 # enable hash tree indexing on fs
464 htree = 'echo "feature FEATURE_C5" | debugfs -w'
465 (ret, out) = run (htree, dev)
467 panic("Unable to enable htree:", dev)
469 # some systems use /dev/loopN, some /dev/loop/N
473 if not os.access(loop + str(0), os.R_OK):
475 if not os.access(loop + str(0), os.R_OK):
476 panic ("can't access loop devices")
479 # find loop device assigned to thefile
482 for n in xrange(0, MAX_LOOP_DEVICES):
484 if os.access(dev, os.R_OK):
485 (stat, out) = run('losetup', dev)
486 if (out and stat == 0):
487 m = re.search(r'\((.*)\)', out[0])
488 if m and file == m.group(1):
494 # create file if necessary and assign the first free loop device
495 def init_loop(file, size, fstype):
496 dev = find_loop(file)
498 print 'WARNING file:', file, 'already mapped to', dev
500 if config.reformat() or not os.access(file, os.R_OK | os.W_OK):
501 run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file))
503 # find next free loop
504 for n in xrange(0, MAX_LOOP_DEVICES):
506 if os.access(dev, os.R_OK):
507 (stat, out) = run('losetup', dev)
509 run('losetup', dev, file)
512 print "out of loop devices"
514 print "out of loop devices"
517 # undo loop assignment
518 def clean_loop(file):
519 dev = find_loop(file)
521 ret, out = run('losetup -d', dev)
523 log('unable to clean loop device:', dev, 'for file:', file)
526 # determine if dev is formatted as a <fstype> filesystem
527 def need_format(fstype, dev):
528 # FIXME don't know how to implement this
531 # initialize a block device if needed
532 def block_dev(dev, size, fstype, format):
533 if config.noexec(): return dev
534 if not is_block(dev):
535 dev = init_loop(dev, size, fstype)
536 if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
540 # panic("device:", dev,
541 # "not prepared, and autoformat is not set.\n",
542 # "Rerun with --reformat option to format ALL filesystems")
546 def get_local_address(net_type):
547 """Return the local address for the network type."""
549 if net_type == 'tcp':
551 host = socket.gethostname()
552 local = socket.gethostbyname(host)
553 elif net_type == 'elan':
554 # awk '/NodeId/ { print $2 }' '/proc/elan/device0/position'
556 fp = open('/proc/elan/device0/position', 'r')
557 lines = fp.readlines()
566 elif net_type == 'gm':
567 fixme("automatic local address for GM")
572 # ============================================================
573 # Classes to prepare and cleanup the various objects
576 """ Base class for the rest of the modules. The default cleanup method is
577 defined here, as well as some utilitiy funcs.
579 def __init__(self, module_name, dom_node):
580 self.dom_node = dom_node
581 self.module_name = module_name
582 self.name = get_attr(dom_node, 'name')
583 self.uuid = get_attr(dom_node, 'uuid')
584 self.kmodule_list = []
588 def info(self, *args):
589 msg = string.join(map(str,args))
590 print self.module_name + ":", self.name, self.uuid, msg
593 def lookup_server(self, srv_uuid):
594 """ Lookup a server's network information """
595 net = get_ost_net(self.dom_node.parentNode, srv_uuid)
596 self._server = Network(net)
598 def get_server(self):
602 """ default cleanup, used for most modules """
604 srv = self.get_server()
605 if srv and local_net(srv):
607 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
608 except CommandError, e:
609 log(self.module_name, "disconnect failed: ", self.name)
612 lctl.cleanup(self.name, self.uuid)
613 except CommandError, e:
614 log(self.module_name, "cleanup failed: ", self.name)
617 def add_module(self, dev_dir, modname):
618 """Append a module to list of modules to load."""
619 self.kmodule_list.append((dev_dir, modname))
621 def mod_loaded(self, modname):
622 """Check if a module is already loaded. Look in /proc/modules for it."""
623 fp = open('/proc/modules')
624 lines = fp.readlines()
626 # please forgive my tired fingers for this one
627 ret = filter(lambda word, mod=modname: word == mod,
628 map(lambda line: string.split(line)[0], lines))
631 def load_module(self):
632 """Load all the modules in the list in the order they appear."""
633 for dev_dir, mod in self.kmodule_list:
634 # (rc, out) = run ('/sbin/lsmod | grep -s', mod)
635 if self.mod_loaded(mod) and not config.noexec():
637 log ('loading module:', mod)
639 module = find_module(config.src_dir(),dev_dir, mod)
641 panic('module not found:', mod)
642 (rc, out) = run('/sbin/insmod', module)
644 raise CommandError('insmod', out, rc)
646 (rc, out) = run('/sbin/modprobe', mod)
648 raise CommandError('modprobe', out, rc)
650 def cleanup_module(self):
651 """Unload the modules in the list in reverse order."""
652 rev = self.kmodule_list
654 for dev_dir, mod in rev:
655 if not self.mod_loaded(mod):
658 if mod == 'portals' and config.dump_file():
659 lctl.dump(config.dump_file())
660 log('unloading module:', mod)
663 (rc, out) = run('/sbin/rmmod', mod)
665 log('! unable to unload module:', mod)
669 class Network(Module):
670 def __init__(self,dom_node):
671 Module.__init__(self, 'NETWORK', dom_node)
672 self.net_type = get_attr(dom_node,'type')
673 self.nid = get_text(dom_node, 'server', '*')
674 self.port = get_text_int(dom_node, 'port', 0)
675 self.send_mem = get_text_int(dom_node, 'send_mem', DEFAULT_TCPBUF)
676 self.recv_mem = get_text_int(dom_node, 'recv_mem', DEFAULT_TCPBUF)
678 self.nid = get_local_address(self.net_type)
680 panic("unable to set nid for", self.net_type)
682 self.add_module('portals/linux/oslib/', 'portals')
683 if node_needs_router():
684 self.add_module('portals/linux/router', 'kptlrouter')
685 if self.net_type == 'tcp':
686 self.add_module('portals/linux/socknal', 'ksocknal')
687 if self.net_type == 'elan':
688 self.add_module('portals/linux/rqswnal', 'kqswnal')
689 if self.net_type == 'gm':
690 self.add_module('portals/linux/gmnal', 'kgmnal')
691 self.add_module('lustre/obdclass', 'obdclass')
692 self.add_module('lustre/ptlrpc', 'ptlrpc')
695 self.info(self.net_type, self.nid, self.port)
696 if self.net_type == 'tcp':
697 ret = run_daemon(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, self.port)
699 raise CommandError(TCP_ACCEPTOR, 'failed', ret)
700 ret = self.dom_node.getElementsByTagName('route_tbl')
702 for r in a.getElementsByTagName('route'):
703 net_type = get_attr(r, 'type')
704 gw = get_attr(r, 'gw')
705 lo = get_attr(r, 'lo')
706 hi = get_attr(r,'hi', '')
707 lctl.add_route(net_type, gw, lo, hi)
708 if self.net_type == 'tcp' and hi == '':
709 srv = nid2server(self.dom_node.parentNode.parentNode, lo)
711 panic("no server for nid", lo)
713 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
716 lctl.network(self.net_type, self.nid)
717 lctl.newdev(attach = "ptlrpc RPCDEV")
720 self.info(self.net_type, self.nid, self.port)
721 ret = self.dom_node.getElementsByTagName('route_tbl')
723 for r in a.getElementsByTagName('route'):
724 lo = get_attr(r, 'lo')
725 hi = get_attr(r,'hi', '')
726 if self.net_type == 'tcp' and hi == '':
727 srv = nid2server(self.dom_node.parentNode.parentNode, lo)
729 panic("no server for nid", lo)
732 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
733 except CommandError, e:
734 print "disconnect failed: ", self.name
737 lctl.del_route(self.net_type, self.nid, lo, hi)
738 except CommandError, e:
739 print "del_route failed: ", self.name
743 lctl.cleanup("RPCDEV", "")
744 except CommandError, e:
745 print "cleanup failed: ", self.name
748 lctl.disconnectAll(self.net_type)
749 except CommandError, e:
750 print "disconnectAll failed: ", self.name
752 if self.net_type == 'tcp':
753 # yikes, this ugly! need to save pid in /var/something
754 run("killall acceptor")
757 def __init__(self,dom_node):
758 Module.__init__(self, 'LDLM', dom_node)
759 self.add_module('lustre/ldlm', 'ldlm')
762 lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
766 def __init__(self,dom_node):
767 Module.__init__(self, 'LOV', dom_node)
768 self.mds_uuid = get_first_ref(dom_node, 'mds')
769 mds= lookup(dom_node.parentNode, self.mds_uuid)
770 self.mds_name = getName(mds)
771 devs = dom_node.getElementsByTagName('devices')
774 self.stripe_sz = get_attr_int(dev_node, 'stripesize', 65536)
775 self.stripe_off = get_attr_int(dev_node, 'stripeoffset', 0)
776 self.pattern = get_attr_int(dev_node, 'pattern', 0)
777 self.devlist = get_all_refs(dev_node, 'osc')
778 self.stripe_cnt = len(self.devlist)
779 self.add_module('lustre/mdc', 'mdc')
780 self.add_module('lustre/lov', 'lov')
783 for osc_uuid in self.devlist:
784 osc = lookup(self.dom_node.parentNode, osc_uuid)
789 panic('osc not found:', osc_uuid)
790 mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
791 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz, self.stripe_off, self.pattern,
792 self.devlist, self.mds_name)
793 lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
794 setup ="%s" % (mdc_uuid))
797 for osc_uuid in self.devlist:
798 osc = lookup(self.dom_node.parentNode, osc_uuid)
803 panic('osc not found:', osc_uuid)
805 cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
806 def load_module(self):
807 for osc_uuid in self.devlist:
808 osc = lookup(self.dom_node.parentNode, osc_uuid)
814 panic('osc not found:', osc_uuid)
815 Module.load_module(self)
816 def cleanup_module(self):
817 Module.cleanup_module(self)
818 for osc_uuid in self.devlist:
819 osc = lookup(self.dom_node.parentNode, osc_uuid)
825 panic('osc not found:', osc_uuid)
827 class LOVConfig(Module):
828 def __init__(self,dom_node):
829 Module.__init__(self, 'LOVConfig', dom_node)
830 self.lov_uuid = get_first_ref(dom_node, 'lov')
831 l = lookup(dom_node.parentNode, self.lov_uuid)
836 self.info(lov.mds_uuid, lov.stripe_cnt, lov.stripe_sz, lov.stripe_off, lov.pattern,
837 lov.devlist, lov.mds_name)
838 lctl.lovconfig(lov.uuid, lov.mds_name, lov.stripe_cnt,
839 lov.stripe_sz, lov.stripe_off, lov.pattern,
840 string.join(lov.devlist))
848 def __init__(self,dom_node):
849 Module.__init__(self, 'MDS', dom_node)
850 self.devname, self.size = get_device(dom_node)
851 self.fstype = get_text(dom_node, 'fstype')
852 self.format = get_text(dom_node, 'autoformat', "no")
853 if self.fstype == 'extN':
854 self.add_module('lustre/extN', 'extN')
855 self.add_module('lustre/mds', 'mds')
856 self.add_module('lustre/mds', 'mds_%s' % (self.fstype))
859 self.info(self.devname, self.fstype, self.format)
860 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
861 lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
862 setup ="%s %s" %(blkdev, self.fstype))
865 clean_loop(self.devname)
867 # Very unusual case, as there is no MDC element in the XML anymore
868 # Builds itself from an MDS node
870 def __init__(self,dom_node):
871 self.mds = MDS(dom_node)
872 self.dom_node = dom_node
873 self.module_name = 'MDC'
874 self.kmodule_list = []
878 host = socket.gethostname()
879 self.name = 'MDC_'+host
880 self.uuid = self.name+'_UUID'
882 self.lookup_server(self.mds.uuid)
883 self.add_module('lustre/mdc', 'mdc')
886 self.info(self.mds.uuid)
887 srv = self.get_server()
888 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
889 lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
890 setup ="%s %s" %(self.mds.uuid, srv.uuid))
893 def __init__(self, dom_node):
894 Module.__init__(self, 'OBD', dom_node)
895 self.obdtype = get_attr(dom_node, 'type')
896 self.devname, self.size = get_device(dom_node)
897 self.fstype = get_text(dom_node, 'fstype')
898 self.format = get_text(dom_node, 'autoformat', 'yes')
899 if self.fstype == 'extN':
900 self.add_module('lustre/extN', 'extN')
901 self.add_module('lustre/' + self.obdtype, self.obdtype)
903 # need to check /proc/mounts and /etc/mtab before
904 # formatting anything.
905 # FIXME: check if device is already formatted.
907 self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
908 if self.obdtype == 'obdecho':
911 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
912 lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
913 setup ="%s %s" %(blkdev, self.fstype))
916 if not self.obdtype == 'obdecho':
917 clean_loop(self.devname)
920 def __init__(self,dom_node):
921 Module.__init__(self, 'OST', dom_node)
922 self.obd_uuid = get_first_ref(dom_node, 'obd')
923 self.add_module('lustre/ost', 'ost')
926 self.info(self.obd_uuid)
927 lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
928 setup ="%s" % (self.obd_uuid))
931 # virtual interface for OSC and LOV
933 def __init__(self,dom_node):
934 Module.__init__(self, 'VOSC', dom_node)
935 if dom_node.nodeName == 'lov':
936 self.osc = LOV(dom_node)
938 self.osc = OSC(dom_node)
943 def load_module(self):
944 self.osc.load_module()
945 def cleanup_module(self):
946 self.osc.cleanup_module()
950 def __init__(self,dom_node):
951 Module.__init__(self, 'OSC', dom_node)
952 self.obd_uuid = get_first_ref(dom_node, 'obd')
953 self.ost_uuid = get_first_ref(dom_node, 'ost')
954 self.lookup_server(self.ost_uuid)
955 self.add_module('lustre/osc', 'osc')
958 self.info(self.obd_uuid, self.ost_uuid)
959 srv = self.get_server()
961 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
965 lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
967 panic ("no route to", srv.nid)
969 lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
970 setup ="%s %s" %(self.obd_uuid, srv.uuid))
973 srv = self.get_server()
977 self.info(self.obd_uuid, self.ost_uuid)
980 lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
984 class Mountpoint(Module):
985 def __init__(self,dom_node):
986 Module.__init__(self, 'MTPT', dom_node)
987 self.path = get_text(dom_node, 'path')
988 self.mds_uuid = get_first_ref(dom_node, 'mds')
989 self.lov_uuid = get_first_ref(dom_node, 'osc')
990 self.add_module('lustre/mdc', 'mdc')
991 self.add_module('lustre/llite', 'llite')
992 l = lookup(self.dom_node.parentNode, self.lov_uuid)
997 mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
999 self.info(self.path, self.mds_uuid,self.lov_uuid)
1000 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
1001 (self.lov_uuid, mdc_uuid, self.path)
1002 run("mkdir", self.path)
1005 panic("mount failed:", self.path)
1008 self.info(self.path, self.mds_uuid,self.lov_uuid)
1009 (rc, out) = run("umount", self.path)
1011 log("umount failed, cleanup will most likely not work.")
1012 l = lookup(self.dom_node.parentNode, self.lov_uuid)
1014 cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
1016 def load_module(self):
1017 self.osc.load_module()
1018 Module.load_module(self)
1019 def cleanup_module(self):
1020 Module.cleanup_module(self)
1021 self.osc.cleanup_module()
1024 # ============================================================
1025 # XML processing and query
1026 # TODO: Change query funcs to use XPath, which is muc cleaner
1028 def get_device(obd):
1029 list = obd.getElementsByTagName('device')
1033 size = get_attr_int(dev, 'size', 0)
1034 return dev.firstChild.data, size
1037 # Get the text content from the first matching child
1038 # If there is no content (or it is all whitespace), return
1040 def get_text(dom_node, tag, default=""):
1041 list = dom_node.getElementsByTagName(tag)
1044 dom_node.normalize()
1045 if dom_node.firstChild:
1046 txt = string.strip(dom_node.firstChild.data)
1051 def get_text_int(dom_node, tag, default=0):
1052 list = dom_node.getElementsByTagName(tag)
1056 dom_node.normalize()
1057 if dom_node.firstChild:
1058 txt = string.strip(dom_node.firstChild.data)
1063 panic("text value is not integer:", txt)
1066 def get_attr(dom_node, attr, default=""):
1067 v = dom_node.getAttribute(attr)
1072 def get_attr_int(dom_node, attr, default=0):
1074 v = dom_node.getAttribute(attr)
1079 panic("attr value is not integer", v)
1082 def get_first_ref(dom_node, tag):
1083 """ Get the first uuidref of the type TAG. Used one only
1084 one is expected. Returns the uuid."""
1086 refname = '%s_ref' % tag
1087 list = dom_node.getElementsByTagName(refname)
1089 uuid = getRef(list[0])
1092 def get_all_refs(dom_node, tag):
1093 """ Get all the refs of type TAG. Returns list of uuids. """
1095 refname = '%s_ref' % tag
1096 list = dom_node.getElementsByTagName(refname)
1099 uuids.append(getRef(i))
1102 def get_ost_net(dom_node, uuid):
1103 ost = lookup(dom_node, uuid)
1104 uuid = get_first_ref(ost, 'network')
1107 return lookup(dom_node, uuid)
1109 def nid2server(dom_node, nid):
1110 netlist = dom_node.getElementsByTagName('network')
1111 for net_node in netlist:
1112 if get_text(net_node, 'server') == nid:
1113 return Network(net_node)
1116 def lookup(dom_node, uuid):
1117 for n in dom_node.childNodes:
1118 if n.nodeType == n.ELEMENT_NODE:
1119 if getUUID(n) == uuid:
1126 # Get name attribute of dom_node
1127 def getName(dom_node):
1128 return dom_node.getAttribute('name')
1130 def getRef(dom_node):
1131 return dom_node.getAttribute('uuidref')
1133 # Get name attribute of dom_node
1134 def getUUID(dom_node):
1135 return dom_node.getAttribute('uuid')
1137 # the tag name is the service type
1138 # fixme: this should do some checks to make sure the dom_node is a service
1139 def getServiceType(dom_node):
1140 return dom_node.nodeName
1143 # determine what "level" a particular node is at.
1144 # the order of iniitailization is based on level.
1145 def getServiceLevel(dom_node):
1146 type = getServiceType(dom_node)
1147 if type in ('network',):
1149 elif type in ('device', 'ldlm'):
1151 elif type in ('obd', 'mdd'):
1153 elif type in ('mds','ost'):
1155 elif type in ('mdc','osc'):
1157 elif type in ('lov', 'lovconfig'):
1159 elif type in ('mountpoint',):
1164 # return list of services in a profile. list is a list of tuples
1165 # [(level, dom_node),]
1166 def getServices(lustreNode, profileNode):
1168 for n in profileNode.childNodes:
1169 if n.nodeType == n.ELEMENT_NODE:
1170 servNode = lookup(lustreNode, getRef(n))
1173 panic('service not found: ' + getRef(n))
1174 level = getServiceLevel(servNode)
1175 list.append((level, servNode))
1179 def getByName(lustreNode, name, tag):
1180 ndList = lustreNode.getElementsByTagName(tag)
1182 if getName(nd) == name:
1187 ############################################################
1189 # FIXME: clean this mess up!
1192 def prepare_mdc(dom_node, mds_uuid):
1194 mds_node = lookup(dom_node, mds_uuid);
1196 panic("no mds:", mds_uuid)
1205 def cleanup_mdc(dom_node, mds_uuid):
1207 mds_node = lookup(dom_node, mds_uuid);
1209 panic("no mds:", mds_uuid)
1217 ############################################################
1218 # routing ("rooting")
1224 def init_node(dom_node):
1225 global local_node, router_flag
1226 netlist = dom_node.getElementsByTagName('network')
1227 for dom_net in netlist:
1228 type = get_attr(dom_net, 'type')
1229 gw = get_text(dom_net, 'server')
1230 local_node.append((type, gw))
1232 def node_needs_router():
1235 def get_routes(type, gw, dom_net):
1236 """ Return the routes as a list of tuples of the form:
1237 [(type, gw, lo, hi),]"""
1239 tbl = dom_net.getElementsByTagName('route_tbl')
1241 routes = t.getElementsByTagName('route')
1243 lo = get_attr(r, 'lo')
1244 hi = get_attr(r, 'hi', '')
1245 res.append((type, gw, lo, hi))
1249 def init_route_config(lustre):
1250 """ Scan the lustre config looking for routers. Build list of
1252 global routes, router_flag
1254 list = lustre.getElementsByTagName('node')
1256 if get_attr(node, 'router'):
1258 for (local_type, local_nid) in local_node:
1260 netlist = node.getElementsByTagName('network')
1261 for dom_net in netlist:
1262 if local_type == get_attr(dom_net, 'type'):
1263 gw = get_text(dom_net, 'server')
1267 for dom_net in netlist:
1268 if local_type != get_attr(dom_net, 'type'):
1269 for route in get_routes(local_type, gw, dom_net):
1270 routes.append(route)
1275 for iface in local_node:
1276 if net.net_type == iface[0]:
1280 def find_route(net):
1281 global local_node, routes
1282 frm_type = local_node[0][0]
1283 to_type = net.net_type
1285 debug ('looking for route to', to_type,to)
1294 ############################################################
1297 def startService(dom_node, module_flag):
1298 type = getServiceType(dom_node)
1299 debug('Service:', type, getName(dom_node), getUUID(dom_node))
1300 # there must be a more dynamic way of doing this...
1306 elif type == 'lovconfig':
1307 n = LOVConfig(dom_node)
1308 elif type == 'network':
1309 n = Network(dom_node)
1320 elif type == 'mountpoint':
1321 n = Mountpoint(dom_node)
1323 panic ("unknown service type:", type)
1328 if config.cleanup():
1333 if config.nosetup():
1335 if config.cleanup():
1341 # Prepare the system to run lustre using a particular profile
1342 # in a the configuration.
1343 # * load & the modules
1344 # * setup networking for the current node
1345 # * make sure partitions are in place and prepared
1346 # * initialize devices with lctl
1347 # Levels is important, and needs to be enforced.
1348 def startProfile(lustreNode, profileNode, module_flag):
1350 panic("profile:", profile, "not found.")
1351 services = getServices(lustreNode, profileNode)
1352 if config.cleanup():
1355 startService(s[1], module_flag)
1360 def doHost(lustreNode, hosts):
1364 dom_node = getByName(lustreNode, h, 'node')
1369 print 'No host entry found.'
1372 if not get_attr(dom_node, 'router'):
1374 init_route_config(lustreNode)
1379 # Two step process: (1) load modules, (2) setup lustre
1380 # if not cleaning, load modules first.
1381 module_flag = not config.cleanup()
1382 reflist = dom_node.getElementsByTagName('profile')
1383 for profile in reflist:
1384 startProfile(lustreNode, profile, module_flag)
1386 if not config.cleanup():
1387 sys_set_debug_path()
1388 script = config.gdb_script()
1389 run(lctl.lctl, ' modules >', script)
1391 # dump /tmp/ogdb and sleep/pause here
1392 log ("The GDB module script is in", script)
1395 module_flag = not module_flag
1396 for profile in reflist:
1397 startProfile(lustreNode, profile, module_flag)
1399 ############################################################
1400 # Command line processing
1402 def parse_cmdline(argv):
1404 long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
1405 "portals=", "makeldiff", "cleanup", "noexec",
1406 "help", "node=", "get=", "nomod", "nosetup",
1411 opts, args = getopt.getopt(argv, short_opts, long_opts)
1412 except getopt.error:
1417 if o in ("-h", "--help"):
1419 if o in ("-d","--cleanup"):
1421 if o in ("-v", "--verbose"):
1423 if o in ("-n", "--noexec"):
1426 if o == "--portals":
1430 if o == "--reformat":
1440 if o == "--nosetup":
1450 s = urllib.urlopen(url)
1456 def setupModulePath(cmd):
1457 base = os.path.dirname(cmd)
1458 if os.access(base+"/Makefile", os.R_OK):
1459 config.src_dir(base + "/../../")
1461 def sys_set_debug_path():
1462 debug("debug path: ", config.debug_path())
1466 fp = open('/proc/sys/portals/debug_path', 'w')
1467 fp.write(config.debug_path())
1472 #/proc/sys/net/core/rmem_max
1473 #/proc/sys/net/core/wmem_max
1474 def sys_set_netmem_max(path, max):
1475 debug("setting", path, "to at least", max)
1483 fp = open(path, 'w')
1484 fp.write('%d\n' %(max))
1488 def sys_make_devices():
1489 if not os.access('/dev/portals', os.R_OK):
1490 run('mknod /dev/portals c 10 240')
1491 if not os.access('/dev/obd', os.R_OK):
1492 run('mknod /dev/obd c 10 241')
1494 # Initialize or shutdown lustre according to a configuration file
1495 # * prepare the system for lustre
1496 # * configure devices with lctl
1497 # Shutdown does steps in reverse
1500 global TCP_ACCEPTOR, lctl, MAXTCPBUF
1501 host = socket.gethostname()
1503 args = parse_cmdline(sys.argv[1:])
1505 if not os.access(args[0], os.R_OK | os.W_OK):
1506 print 'File not found:', args[0]
1508 dom = xml.dom.minidom.parse(args[0])
1510 xmldata = fetch(config.url())
1511 dom = xml.dom.minidom.parseString(xmldata)
1517 node_list.append(config.node())
1520 node_list.append(host)
1521 node_list.append('localhost')
1522 debug("configuring for host: ", node_list)
1525 config._debug_path = config._debug_path + '-' + host
1526 config._gdb_script = config._gdb_script + '-' + host
1528 TCP_ACCEPTOR = find_prog('acceptor')
1529 if not TCP_ACCEPTOR:
1531 TCP_ACCEPTOR = 'acceptor'
1532 debug('! acceptor not found')
1534 panic('acceptor not found')
1536 lctl = LCTLInterface('lctl')
1538 setupModulePath(sys.argv[0])
1540 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
1541 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
1542 doHost(dom.documentElement, node_list)
1544 if __name__ == "__main__":
1547 except LconfError, e:
1549 except CommandError, e: