3 # Copyright (C) 2002 Cluster File Systems, Inc.
4 # Author: Robert Read <rread@clusterfs.com>
5 # This file is part of Lustre, http://www.lustre.org.
7 # Lustre is free software; you can redistribute it and/or
8 # modify it under the terms of version 2 of the GNU General Public
9 # License as published by the Free Software Foundation.
11 # Lustre is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with Lustre; if not, write to the Free Software
18 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 # lconf - lustre configuration tool
22 # lconf is the main driver script for starting and stopping
23 # lustre filesystem services.
25 # Based in part on the XML obdctl modifications done by Brian Behlendorf
27 import sys, getopt, types
28 import string, os, stat, popen2, socket, time, random, fcntl, FCNTL, select
30 import xml.dom.minidom
35 DEFAULT_TCPBUF = 1048576
37 # Maximum number of devices to search for.
38 # (the /dev/loop* nodes need to be created beforehand)
39 MAX_LOOP_DEVICES = 256
40 PORTALS_DIR = '@PORTALSLOC@'
42 first_cleanup_error = 0
43 def cleanup_error(rc):
44 global first_cleanup_error
45 if not first_cleanup_error:
46 first_cleanup_error = rc
50 print """usage: lconf config.xml
52 config.xml Lustre configuration in xml format.
53 --ldapurl LDAP server URL, eg. ldap://localhost
54 --config Cluster config name used for LDAP query
55 --node <nodename> Load config for <nodename>
56 --select service=nodeA,service2=nodeB U
57 -d | --cleanup Cleans up config. (Shutdown)
58 -f | --force Forced unmounting and/or obd detach during cleanup
59 -v | --verbose Print system commands as they are run
60 -h | --help Print this help
61 --gdb Prints message after creating gdb module script
62 and sleeps for 5 seconds.
63 -n | --noexec Prints the commands and steps that will be run for a
64 config without executing them. This can used to check if a
65 config file is doing what it should be doing. (Implies -v)
66 --nomod Skip load/unload module step.
67 --nosetup Skip device setup/cleanup step.
68 --reformat Reformat all devices (without question)
69 --dump <file> Dump the kernel debug log before portals is unloaded
70 --minlevel <num> Specify the minimum level of services to configure/cleanup (default 0)
71 --maxlevel <num> Specify the maximum level of services to configure/cleanup (default 100)
72 Levels are aproximatly like:
79 70 - mountpoint, echo_client
80 --lustre=src_dir Base directory of lustre sources. This parameter will cause lconf
81 to load modules from a source tree.
82 --portals=src_dir Portals source directory. If this is a relative path, then it is
83 assumed to be relative to lustre.
87 --ldap server LDAP server with lustre config database
88 --makeldiff Translate xml source to LDIFF
89 This are perhaps not needed:
93 # ============================================================
94 # Config parameters, encapsulated in a class
110 self._gdb_script = '/tmp/ogdb'
111 self._debug_path = '/tmp/lustre-log'
112 self._dump_file = None
113 self._lustre_dir = ''
114 self._portals_dir = ''
118 self._recovery_upcall = ''
120 self._config_name = ''
123 def verbose(self, flag = None):
124 if flag: self._verbose = flag
127 def noexec(self, flag = None):
128 if flag: self._noexec = flag
131 def reformat(self, flag = None):
132 if flag: self._reformat = flag
133 return self._reformat
135 def cleanup(self, flag = None):
136 if flag: self._cleanup = flag
139 def gdb(self, flag = None):
140 if flag: self._gdb = flag
143 def nomod(self, flag = None):
144 if flag: self._nomod = flag
147 def nosetup(self, flag = None):
148 if flag: self._nosetup = flag
151 def force(self, flag = None):
152 if flag: self._force = flag
155 def node(self, val = None):
156 if val: self._node = val
159 def gdb_script(self):
160 if os.path.isdir('/r'):
161 return '/r' + self._gdb_script
163 return self._gdb_script
165 def debug_path(self):
166 if os.path.isdir('/r'):
167 return '/r' + self._debug_path
169 return self._debug_path
171 def dump_file(self, val = None):
172 if val: self._dump_file = val
173 return self._dump_file
174 def minlevel(self, val = None):
175 if val: self._minlevel = int(val)
176 return self._minlevel
178 def maxlevel(self, val = None):
179 if val: self._maxlevel = int(val)
180 return self._maxlevel
182 def portals_dir(self, val = None):
183 if val: self._portals_dir = val
184 return self._portals_dir
186 def lustre_dir(self, val = None):
187 if val: self._lustre_dir = val
188 return self._lustre_dir
190 def timeout(self, val = None):
191 if val: self._timeout = val
194 def recovery_upcall(self, val = None):
195 if val: self._recovery_upcall = val
196 return self._recovery_upcall
198 def ldapurl(self, val = None):
199 if val: self._ldapurl = val
202 def config_name(self, val = None):
203 if val: self._config_name = val
204 return self._config_name
206 def init_select(self, arg):
207 # arg = "service=nodeA,service2=nodeB"
208 list = string.split(arg, ',')
210 srv, node = string.split(entry, '=')
211 self._select[srv] = node
213 def select(self, srv):
214 if self._select.has_key(srv):
215 return self._select[srv]
221 # ============================================================
222 # debugging and error funcs
224 def fixme(msg = "this feature"):
225 raise LconfError, msg + ' not implmemented yet.'
228 msg = string.join(map(str,args))
229 if not config.noexec():
230 raise LconfError(msg)
235 msg = string.join(map(str,args))
240 print string.strip(s)
244 msg = string.join(map(str,args))
247 # ============================================================
248 # locally defined exceptions
249 class CommandError (exceptions.Exception):
250 def __init__(self, cmd_name, cmd_err, rc=None):
251 self.cmd_name = cmd_name
252 self.cmd_err = cmd_err
257 if type(self.cmd_err) == types.StringType:
259 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
261 print "! %s: %s" % (self.cmd_name, self.cmd_err)
262 elif type(self.cmd_err) == types.ListType:
264 print "! %s (error %d):" % (self.cmd_name, self.rc)
266 print "! %s:" % (self.cmd_name)
267 for s in self.cmd_err:
268 print "> %s" %(string.strip(s))
272 class LconfError (exceptions.Exception):
273 def __init__(self, args):
277 # ============================================================
278 # handle lctl interface
281 Manage communication with lctl
284 def __init__(self, cmd):
286 Initialize close by finding the lctl binary.
288 self.lctl = find_prog(cmd)
291 debug('! lctl not found')
294 raise CommandError('lctl', "unable to find lctl binary.")
296 def set_nonblock(self, fd):
297 fl = fcntl.fcntl(fd, FCNTL.F_GETFL)
298 fcntl.fcntl(fd, FCNTL.F_SETFL, fl | os.O_NDELAY)
303 the cmds are written to stdin of lctl
304 lctl doesn't return errors when run in script mode, so
306 should modify command line to accept multiple commands, or
307 create complex command line options
309 debug("+", self.lctl, cmds)
310 if config.noexec(): return (0, [])
312 child = popen2.Popen3(self.lctl, 1) # Capture stdout and stderr from command
313 child.tochild.write(cmds + "\n")
314 child.tochild.close()
316 # From "Python Cookbook" from O'Reilly
317 outfile = child.fromchild
318 outfd = outfile.fileno()
319 self.set_nonblock(outfd)
320 errfile = child.childerr
321 errfd = errfile.fileno()
322 self.set_nonblock(errfd)
324 outdata = errdata = ''
327 ready = select.select([outfd,errfd],[],[]) # Wait for input
328 if outfd in ready[0]:
329 outchunk = outfile.read()
330 if outchunk == '': outeof = 1
331 outdata = outdata + outchunk
332 if errfd in ready[0]:
333 errchunk = errfile.read()
334 if errchunk == '': erreof = 1
335 errdata = errdata + errchunk
336 if outeof and erreof: break
337 # end of "borrowed" code
340 if os.WIFEXITED(ret):
341 rc = os.WEXITSTATUS(ret)
344 if rc or len(errdata):
345 raise CommandError(self.lctl, errdata, rc)
348 def runcmd(self, *args):
350 run lctl using the command line
352 cmd = string.join(map(str,args))
353 debug("+", self.lctl, cmd)
354 rc, out = run(self.lctl, cmd)
356 raise CommandError(self.lctl, out, rc)
360 def network(self, net, nid):
361 """ initialized network and add "self" """
362 # Idea: "mynid" could be used for all network types to add "self," and then
363 # this special case would be gone and the "self" hack would be hidden.
364 if net in ('tcp', 'toe'):
369 quit""" % (net, nid, nid)
378 # create a new connection
379 def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
380 if net in ('tcp', 'toe'):
387 quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, )
393 quit""" % (net, servuuid, nid, nid, port, )
397 # add a route to a range
398 def add_route(self, net, gw, lo, hi):
402 quit """ % (net, gw, lo, hi)
406 def del_route(self, net, gw, lo, hi):
414 # add a route to a host
415 def add_route_host(self, net, uuid, gw, tgt):
420 quit """ % (net, uuid, tgt, gw, tgt)
423 # add a route to a range
424 def del_route_host(self, net, uuid, gw, tgt):
430 quit """ % (net, uuid, tgt)
433 # disconnect one connection
434 def disconnect(self, net, nid, port, servuuid):
440 quit""" % (net, nid, servuuid)
444 def disconnectAll(self, net):
453 # create a new device with lctl
454 def newdev(self, attach, setup = ""):
459 quit""" % (attach, setup)
463 def cleanup(self, name, uuid):
469 quit""" % (name, ('', 'force')[config.force()])
473 def lov_setconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist):
477 lov_setconfig %s %d %d %d %s %s
478 quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist)
482 def dump(self, dump_file):
485 quit""" % (dump_file)
488 # get list of devices
489 def device_list(self):
490 rc, out = self.runcmd('device_list')
494 def lustre_version(self):
495 rc, out = self.runcmd('version')
498 # ============================================================
499 # Various system-level functions
500 # (ideally moved to their own module)
502 # Run a command and return the output and status.
503 # stderr is sent to /dev/null, could use popen3 to
504 # save it if necessary
506 cmd = string.join(map(str,args))
508 if config.noexec(): return (0, [])
509 f = os.popen(cmd + ' 2>&1')
518 # Run a command in the background.
519 def run_daemon(*args):
520 cmd = string.join(map(str,args))
522 if config.noexec(): return 0
523 f = os.popen(cmd + ' 2>&1')
531 # Determine full path to use for an external command
532 # searches dirname(argv[0]) first, then PATH
534 syspath = string.split(os.environ['PATH'], ':')
535 cmdpath = os.path.dirname(sys.argv[0])
536 syspath.insert(0, cmdpath);
537 if config.portals_dir():
538 syspath.insert(0, os.path.join(cmdpath, config.portals_dir()+'/linux/utils/'))
540 prog = os.path.join(d,cmd)
541 if os.access(prog, os.X_OK):
545 # Recursively look for file starting at base dir
546 def do_find_file(base, mod):
547 fullname = os.path.join(base, mod)
548 if os.access(fullname, os.R_OK):
550 for d in os.listdir(base):
551 dir = os.path.join(base,d)
552 if os.path.isdir(dir):
553 module = do_find_file(dir, mod)
557 def find_module(src_dir, dev_dir, modname):
558 mod = '%s.o' % (modname)
559 module = src_dir +'/'+ dev_dir +'/'+ mod
561 if os.access(module, os.R_OK):
567 # is the path a block device?
574 return stat.S_ISBLK(s[stat.ST_MODE])
576 # build fs according to type
578 def mkfs(fstype, dev):
579 if(fstype in ('ext3', 'extN')):
580 mkfs = 'mkfs.ext2 -j -b 4096'
581 elif (fstype == 'reiserfs'):
582 mkfs = 'mkfs.reiserfs -f'
584 print 'unsupported fs type: ', fstype
585 if not is_block(dev):
586 if(fstype in ('ext3', 'extN')):
588 elif (fstype == 'reiserfs'):
591 print 'unsupported fs type: ', fstype
594 (ret, out) = run (mkfs, force, dev)
596 panic("Unable to build fs:", dev)
597 # enable hash tree indexing on fsswe
598 # FIXME: this check can probably go away on 2.5
600 htree = 'echo "feature FEATURE_C5" | debugfs -w'
601 (ret, out) = run (htree, dev)
603 panic("Unable to enable htree:", dev)
605 # some systems use /dev/loopN, some /dev/loop/N
609 if not os.access(loop + str(0), os.R_OK):
611 if not os.access(loop + str(0), os.R_OK):
612 panic ("can't access loop devices")
615 # find loop device assigned to thefile
618 for n in xrange(0, MAX_LOOP_DEVICES):
620 if os.access(dev, os.R_OK):
621 (stat, out) = run('losetup', dev)
622 if (out and stat == 0):
623 m = re.search(r'\((.*)\)', out[0])
624 if m and file == m.group(1):
630 # create file if necessary and assign the first free loop device
631 def init_loop(file, size, fstype):
632 dev = find_loop(file)
634 print 'WARNING file:', file, 'already mapped to', dev
636 if config.reformat() or not os.access(file, os.R_OK | os.W_OK):
638 panic(file, "size must be larger than 8MB, currently set to:", size)
639 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size,
642 panic("Unable to create backing store:", file)
645 # find next free loop
646 for n in xrange(0, MAX_LOOP_DEVICES):
648 if os.access(dev, os.R_OK):
649 (stat, out) = run('losetup', dev)
651 run('losetup', dev, file)
654 print "out of loop devices"
656 print "out of loop devices"
659 # undo loop assignment
660 def clean_loop(file):
661 dev = find_loop(file)
663 ret, out = run('losetup -d', dev)
665 log('unable to clean loop device:', dev, 'for file:', file)
668 # determine if dev is formatted as a <fstype> filesystem
669 def need_format(fstype, dev):
670 # FIXME don't know how to implement this
673 # initialize a block device if needed
674 def block_dev(dev, size, fstype, format):
675 if config.noexec(): return dev
676 if not is_block(dev):
677 dev = init_loop(dev, size, fstype)
678 if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
682 # panic("device:", dev,
683 # "not prepared, and autoformat is not set.\n",
684 # "Rerun with --reformat option to format ALL filesystems")
689 """lookup IP address for an interface"""
690 rc, out = run("/sbin/ifconfig", iface)
693 addr = string.split(out[1])[1]
694 ip = string.split(addr, ':')[1]
697 def get_local_address(net_type, wildcard):
698 """Return the local address for the network type."""
700 if net_type in ('tcp', 'toe'):
702 iface, star = string.split(wildcard, ':')
703 local = if2addr(iface)
705 panic ("unable to determine ip for:", wildcard)
707 host = socket.gethostname()
708 local = socket.gethostbyname(host)
709 elif net_type == 'elan':
710 # awk '/NodeId/ { print $2 }' '/proc/elan/device0/position'
712 fp = open('/proc/elan/device0/position', 'r')
713 lines = fp.readlines()
722 elif net_type == 'gm':
723 fixme("automatic local address for GM")
727 def is_prepared(uuid):
728 """Return true if a device exists for the uuid"""
729 # expect this format:
730 # 1 UP ldlm ldlm ldlm_UUID 2
732 out = lctl.device_list()
734 if uuid == string.split(s)[4]:
736 except CommandError, e:
740 def fs_is_mounted(path):
741 """Return true if path is a mounted lustre filesystem"""
743 fp = open('/proc/mounts')
744 lines = fp.readlines()
748 if a[1] == path and a[2] == 'lustre_lite':
755 # ============================================================
756 # Classes to prepare and cleanup the various objects
759 """ Base class for the rest of the modules. The default cleanup method is
760 defined here, as well as some utilitiy funcs.
762 def __init__(self, module_name, db):
764 self.module_name = module_name
765 self.name = self.db.getName()
766 self.uuid = self.db.getUUID()
767 self.kmodule_list = []
771 def info(self, *args):
772 msg = string.join(map(str,args))
773 print self.module_name + ":", self.name, self.uuid, msg
775 def lookup_server(self, srv_uuid):
776 """ Lookup a server's network information """
777 net = self.db.get_ost_net(srv_uuid)
779 panic ("Unable to find a server for:", srv_uuid)
780 self._server = Network(net)
782 def get_server(self):
786 """ default cleanup, used for most modules """
788 srv = self.get_server()
789 if srv and local_net(srv):
791 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
792 except CommandError, e:
793 log(self.module_name, "disconnect failed: ", self.name)
797 lctl.cleanup(self.name, self.uuid)
798 except CommandError, e:
799 log(self.module_name, "cleanup failed: ", self.name)
803 def add_portals_module(self, dev_dir, modname):
804 """Append a module to list of modules to load."""
805 self.kmodule_list.append((config.portals_dir(), dev_dir, modname))
807 def add_lustre_module(self, dev_dir, modname):
808 """Append a module to list of modules to load."""
809 self.kmodule_list.append((config.lustre_dir(), dev_dir, modname))
811 def mod_loaded(self, modname):
812 """Check if a module is already loaded. Look in /proc/modules for it."""
813 fp = open('/proc/modules')
814 lines = fp.readlines()
816 # please forgive my tired fingers for this one
817 ret = filter(lambda word, mod=modname: word == mod,
818 map(lambda line: string.split(line)[0], lines))
821 def load_module(self):
822 """Load all the modules in the list in the order they appear."""
823 for src_dir, dev_dir, mod in self.kmodule_list:
824 # (rc, out) = run ('/sbin/lsmod | grep -s', mod)
825 if self.mod_loaded(mod) and not config.noexec():
827 log ('loading module:', mod)
829 module = find_module(src_dir, dev_dir, mod)
831 panic('module not found:', mod)
832 (rc, out) = run('/sbin/insmod', module)
834 raise CommandError('insmod', out, rc)
836 (rc, out) = run('/sbin/modprobe', mod)
838 raise CommandError('modprobe', out, rc)
840 def cleanup_module(self):
841 """Unload the modules in the list in reverse order."""
842 rev = self.kmodule_list
844 for src_dir, dev_dir, mod in rev:
845 if not self.mod_loaded(mod):
848 if mod == 'portals' and config.dump_file():
849 lctl.dump(config.dump_file())
850 log('unloading module:', mod)
853 (rc, out) = run('/sbin/rmmod', mod)
855 log('! unable to unload module:', mod)
859 class Network(Module):
860 def __init__(self,db):
861 Module.__init__(self, 'NETWORK', db)
862 self.net_type = self.db.get_val('nettype')
863 self.nid = self.db.get_val('nid', '*')
864 self.port = self.db.get_val_int('port', 0)
865 self.send_mem = self.db.get_val_int('send_mem', DEFAULT_TCPBUF)
866 self.recv_mem = self.db.get_val_int('recv_mem', DEFAULT_TCPBUF)
868 self.nid = get_local_address(self.net_type, self.nid)
870 panic("unable to set nid for", self.net_type, self.nid)
871 debug("nid:", self.nid)
873 self.add_portals_module("linux/oslib", 'portals')
874 if node_needs_router():
875 self.add_portals_module("linux/router", 'kptlrouter')
876 if self.net_type == 'tcp':
877 self.add_portals_module("linux/socknal", 'ksocknal')
878 if self.net_type == 'toe':
879 self.add_portals_odule("/linux/toenal", 'ktoenal')
880 if self.net_type == 'elan':
881 self.add_portals_module("/linux/rqswnal", 'kqswnal')
882 if self.net_type == 'gm':
883 self.add_portals_module("/linux/gmnal", 'kgmnal')
884 self.add_lustre_module('obdclass', 'obdclass')
885 self.add_lustre_module('ptlrpc', 'ptlrpc')
888 self.info(self.net_type, self.nid, self.port)
889 if self.net_type in ('tcp', 'toe'):
890 nal_id = '' # default is socknal
891 if self.net_type == 'toe':
893 ret, out = run(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, nal_id, self.port)
895 raise CommandError(TCP_ACCEPTOR, out, ret)
896 for net_type, gw, lo, hi in self.db.get_route_tbl():
897 lctl.add_route(net_type, gw, lo, hi)
898 if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '':
899 srvdb = self.db.nid2server(lo)
901 panic("no server for nid", lo)
904 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
907 lctl.network(self.net_type, self.nid)
908 lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID")
911 self.info(self.net_type, self.nid, self.port)
912 for net_type, gw, lo, hi in self.db.get_route_tbl():
913 if self.net_type in ('tcp', 'toe') and hi == '':
914 srvdb = self.db.nid2server(lo)
916 panic("no server for nid", lo)
920 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
921 except CommandError, e:
922 print "disconnect failed: ", self.name
926 lctl.del_route(self.net_type, self.nid, lo, hi)
927 except CommandError, e:
928 print "del_route failed: ", self.name
933 lctl.cleanup("RPCDEV", "RPCDEV_UUID")
934 except CommandError, e:
935 print "cleanup failed: ", self.name
939 lctl.disconnectAll(self.net_type)
940 except CommandError, e:
941 print "disconnectAll failed: ", self.name
944 if self.net_type in ('tcp', 'toe'):
945 # yikes, this ugly! need to save pid in /var/something
946 run("killall acceptor")
949 def __init__(self,db):
950 Module.__init__(self, 'LDLM', db)
951 self.add_lustre_module('ldlm', 'ldlm')
953 if is_prepared(self.uuid):
956 lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
960 def __init__(self,db):
961 Module.__init__(self, 'LOV', db)
962 self.mds_uuid = self.db.get_first_ref('mds')
963 mds= self.db.lookup(self.mds_uuid)
964 self.mds_name = mds.getName()
965 self.stripe_sz = self.db.get_val_int('stripesize', 65536)
966 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
967 self.pattern = self.db.get_val_int('stripepattern', 0)
968 self.devlist = self.db.get_refs('obd')
969 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
970 self.add_lustre_module('mdc', 'mdc')
971 self.add_lustre_module('lov', 'lov')
974 if is_prepared(self.uuid):
976 for obd_uuid in self.devlist:
977 obd = self.db.lookup(obd_uuid)
981 # Ignore connection failures, because the LOV will DTRT with
982 # an unconnected OSC.
983 osc.prepare(ignore_connect_failure=1)
985 print "Error preparing OSC %s (inactive)\n" % osc_uuid
987 panic('osc not found:', osc_uuid)
988 mdc_uuid = prepare_mdc(self.db, self.mds_uuid)
989 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
990 self.stripe_off, self.pattern, self.devlist, self.mds_name)
991 lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
992 setup ="%s" % (mdc_uuid))
995 if not is_prepared(self.uuid):
997 for obd_uuid in self.devlist:
998 obd = self.db.lookup(obd_uuid)
1003 panic('osc not found:', osc_uuid)
1004 Module.cleanup(self)
1005 cleanup_mdc(self.db, self.mds_uuid)
1008 def load_module(self):
1009 for obd_uuid in self.devlist:
1010 obd = self.db.lookup(obd_uuid)
1016 panic('osc not found:', osc_uuid)
1017 Module.load_module(self)
1020 def cleanup_module(self):
1021 Module.cleanup_module(self)
1022 for obd_uuid in self.devlist:
1023 obd = self.db.lookup(obd_uuid)
1026 osc.cleanup_module()
1029 panic('osc not found:', osc_uuid)
1031 class LOVConfig(Module):
1032 def __init__(self,db):
1033 Module.__init__(self, 'LOVConfig', db)
1035 self.lov_uuid = self.db.get_first_ref('lov')
1036 l = self.db.lookup(self.lov_uuid)
1041 self.info(lov.mds_uuid, lov.stripe_cnt, lov.stripe_sz, lov.stripe_off,
1042 lov.pattern, lov.devlist, lov.mds_name)
1043 lctl.lov_setconfig(lov.uuid, lov.mds_name, lov.stripe_cnt,
1044 lov.stripe_sz, lov.stripe_off, lov.pattern,
1045 string.join(lov.devlist))
1051 class MDSDEV(Module):
1052 def __init__(self,db):
1053 Module.__init__(self, 'MDSDEV', db)
1054 self.devname = self.db.get_val('devpath','')
1055 self.size = self.db.get_val_int('devsize', 0)
1056 self.fstype = self.db.get_val('fstype', '')
1057 # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
1058 self.uuid = self.db.get_first_ref('mds')
1059 mds = self.db.lookup(self.uuid)
1060 self.name = mds.getName()
1061 self.lovconfig_uuids = mds.get_refs('lovconfig')
1062 # FIXME: if fstype not set, then determine based on kernel version
1063 self.format = self.db.get_val('autoformat', "no")
1064 if self.fstype == 'extN':
1065 self.add_lustre_module('extN', 'extN')
1066 self.add_lustre_module('mds', 'mds')
1068 self.add_lustre_module('obdclass', 'fsfilt_%s' % (self.fstype))
1071 if is_prepared(self.uuid):
1073 self.info(self.devname, self.fstype, self.format)
1074 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
1075 if not is_prepared('MDT_UUID'):
1076 lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'),
1078 lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
1079 setup ="%s %s" %(blkdev, self.fstype))
1080 for uuid in self.lovconfig_uuids:
1081 db = self.db.lookup(uuid)
1082 lovconfig = LOVConfig(db)
1086 if is_prepared('MDT_UUID'):
1088 lctl.cleanup("MDT", "MDT_UUID")
1089 except CommandError, e:
1090 print "cleanup failed: ", self.name
1093 if not is_prepared(self.uuid):
1095 Module.cleanup(self)
1096 clean_loop(self.devname)
1098 # Very unusual case, as there is no MDC element in the XML anymore
1099 # Builds itself from an MDS node
1101 def __init__(self,db):
1102 self.mds_uuid = db.getUUID()
1103 self.mds_name = db.getName()
1105 node_name = config.select(self.mds_name)
1107 self.mdd_uuid = self.db.get_mdd(node_name, self.mds_uuid)
1109 self.mdd_uuid = db.get_first_ref('active')
1110 if not self.mdd_uuid:
1111 panic("No MDSDEV found for MDS service:", self.mds_name)
1112 self.module_name = 'MDC'
1113 self.kmodule_list = []
1117 host = socket.gethostname()
1118 self.name = 'MDC_%s' % (self.mds_name)
1119 self.uuid = '%s_%05x_%05x' % (self.name, int(random.random() * 1048576),
1120 int(random.random() * 1048576))
1122 self.lookup_server(self.mdd_uuid)
1123 self.add_lustre_module('mdc', 'mdc')
1126 if is_prepared(self.uuid):
1128 self.info(self.mds_uuid)
1129 srv = self.get_server()
1130 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
1131 lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
1132 setup ="%s %s" %(self.mds_uuid, srv.uuid))
1135 def __init__(self, db):
1136 Module.__init__(self, 'OBD', db)
1137 self.obdtype = self.db.get_val('obdtype')
1138 self.devname = self.db.get_val('devpath', '')
1139 self.size = self.db.get_val_int('devsize', 0)
1140 self.fstype = self.db.get_val('fstype', '')
1141 self.active_target = self.db.get_first_ref('active')
1142 # FIXME: if fstype not set, then determine based on kernel version
1143 self.format = self.db.get_val('autoformat', 'yes')
1144 if self.fstype == 'extN':
1145 self.add_lustre_module('extN', 'extN')
1146 self.add_lustre_module(self.obdtype, self.obdtype)
1148 self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype))
1150 # need to check /proc/mounts and /etc/mtab before
1151 # formatting anything.
1152 # FIXME: check if device is already formatted.
1154 if is_prepared(self.uuid):
1156 self.info(self.obdtype, self.devname, self.size, self.fstype, self.format)
1157 if self.obdtype == 'obdecho':
1160 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
1161 lctl.newdev(attach="%s %s %s" % (self.obdtype, self.name, self.uuid),
1162 setup ="%s %s" %(blkdev, self.fstype))
1164 if not is_prepared(self.uuid):
1166 Module.cleanup(self)
1167 if not self.obdtype == 'obdecho':
1168 clean_loop(self.devname)
1171 def __init__(self, db):
1172 Module.__init__(self, 'COBD', db)
1173 self.real_uuid = self.db.get_first_ref('realobd')
1174 self.cache_uuid = self.db.get_first_ref('cacheobd')
1175 self.add_lustre_module('cobd' , 'cobd')
1177 # need to check /proc/mounts and /etc/mtab before
1178 # formatting anything.
1179 # FIXME: check if device is already formatted.
1181 if is_prepared(self.uuid):
1183 self.info(self.real_uuid, self.cache_uuid)
1184 lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid),
1185 setup ="%s %s" %(self.real_uuid, self.cache_uuid))
1188 def __init__(self,db):
1189 Module.__init__(self, 'OST', db)
1190 self.obd_uuid = self.db.get_first_ref('obd')
1191 self.add_lustre_module('ost', 'ost')
1194 if is_prepared(self.uuid):
1196 self.info(self.obd_uuid)
1197 lctl.newdev(attach="ost %s %s" % (self.name, self.uuid),
1198 setup ="%s" % (self.obd_uuid))
1201 # virtual interface for OSC and LOV
1203 def __init__(self,db):
1204 Module.__init__(self, 'VOSC', db)
1205 if db.get_class() == 'lov':
1208 self.osc = get_osc(db)
1210 return self.osc.uuid
1215 def load_module(self):
1216 self.osc.load_module()
1217 def cleanup_module(self):
1218 self.osc.cleanup_module()
1222 def __init__(self, db, obd_name, obd_uuid, ost_uuid):
1224 self.module_name = 'OSC'
1225 self.name = 'OSC_%s' % (obd_name)
1226 self.uuid = '%s_%05x' % (self.name, int(random.random() * 1048576))
1227 self.kmodule_list = []
1231 self.obd_uuid = obd_uuid
1232 self.ost_uuid = ost_uuid
1233 debug("OSC:", obd_uuid, ost_uuid)
1234 self.lookup_server(self.ost_uuid)
1235 self.add_lustre_module('osc', 'osc')
1237 def prepare(self, ignore_connect_failure = 0):
1238 if is_prepared(self.uuid):
1240 self.info(self.obd_uuid, self.ost_uuid)
1241 srv = self.get_server()
1244 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
1248 lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
1250 panic ("no route to", srv.nid)
1251 except CommandError:
1252 if (ignore_connect_failure == 0):
1255 lctl.newdev(attach="osc %s %s" % (self.name, self.uuid),
1256 setup ="%s %s" %(self.obd_uuid, srv.uuid))
1259 srv = self.get_server()
1261 Module.cleanup(self)
1263 self.info(self.obd_uuid, self.ost_uuid)
1267 lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
1268 except CommandError, e:
1269 print "del_route failed: ", self.name
1272 Module.cleanup(self)
1275 class ECHO_CLIENT(Module):
1276 def __init__(self,db):
1277 Module.__init__(self, 'ECHO_CLIENT', db)
1278 self.add_lustre_module('obdecho', 'obdecho')
1279 self.obd_uuid = self.db.get_first_ref('obd')
1280 obd = self.db.lookup(self.obd_uuid)
1281 self.osc = VOSC(obd)
1284 if is_prepared(self.uuid):
1286 self.osc.prepare() # XXX This is so cheating. -p
1287 self.info(self.obd_uuid)
1289 lctl.newdev(attach="echo_client %s %s" % (self.name, self.uuid),
1290 setup = self.osc.get_uuid())
1293 if not is_prepared(self.uuid):
1297 def load_module(self):
1298 self.osc.load_module()
1299 Module.load_module(self)
1300 def cleanup_module(self):
1301 Module.cleanup_module(self)
1302 self.osc.cleanup_module()
1305 class Mountpoint(Module):
1306 def __init__(self,db):
1307 Module.__init__(self, 'MTPT', db)
1308 self.path = self.db.get_val('path')
1309 self.mds_uuid = self.db.get_first_ref('mds')
1310 self.obd_uuid = self.db.get_first_ref('obd')
1311 self.add_lustre_module('mdc', 'mdc')
1312 self.add_lustre_module('llite', 'llite')
1313 obd = self.db.lookup(self.obd_uuid)
1314 self.osc = VOSC(obd)
1319 mdc_uuid = prepare_mdc(self.db, self.mds_uuid)
1320 self.info(self.path, self.mds_uuid, self.obd_uuid)
1321 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
1322 (self.osc.get_uuid(), mdc_uuid, self.path)
1323 run("mkdir", self.path)
1326 panic("mount failed:", self.path)
1329 self.info(self.path, self.mds_uuid,self.obd_uuid)
1330 if fs_is_mounted(self.path):
1332 (rc, out) = run("umount", "-f", self.path)
1334 (rc, out) = run("umount", self.path)
1336 raise CommandError('umount', out, rc)
1338 if fs_is_mounted(self.path):
1339 panic("fs is still mounted:", self.path)
1342 cleanup_mdc(self.db, self.mds_uuid)
1344 def load_module(self):
1345 self.osc.load_module()
1346 Module.load_module(self)
1347 def cleanup_module(self):
1348 Module.cleanup_module(self)
1349 self.osc.cleanup_module()
1352 # ============================================================
1353 # XML processing and query
1355 # OSC is no longer in the xml, so we have to fake it.
1356 # this is getting ugly and begging for another refactoring
1357 def get_osc(obd_dom):
1359 osc = OSC(obd_dom, obd.name, obd.uuid, obd.active_target)
1363 def lookup(self, uuid):
1364 """ lookup returns a new LustreDB instance"""
1365 return self._lookup_by_uuid(uuid)
1367 def lookup_name(self, name, class_name = ""):
1368 """ lookup returns a new LustreDB instance"""
1369 return self._lookup_by_name(name, class_name)
1371 def lookup_class(self, class_name):
1372 """ lookup returns a new LustreDB instance"""
1373 return self._lookup_by_class(class_name)
1375 def get_val(self, tag, default=None):
1376 v = self._get_val(tag)
1381 debug("LustreDB", self.getName(), " no value for:", tag)
1384 def get_class(self):
1385 return self._get_class()
1387 def get_val_int(self, tag, default=0):
1388 str = self._get_val(tag)
1394 panic("text value is not integer:", str)
1396 def get_first_ref(self, tag):
1397 """ Get the first uuidref of the type TAG. Only
1398 one is expected. Returns the uuid."""
1399 uuids = self._get_refs(tag)
1404 def get_refs(self, tag):
1405 """ Get all the refs of type TAG. Returns list of uuids. """
1406 uuids = self._get_refs(tag)
1409 def get_all_refs(self):
1410 """ Get all the refs. Returns list of uuids. """
1411 uuids = self._get_all_refs()
1414 def get_ost_net(self, uuid):
1415 ost = self.lookup(uuid)
1416 uuid = ost.get_first_ref('network')
1419 return ost.lookup(uuid)
1421 def nid2server(self, nid):
1422 netlist = self.parent.parent.attrs['network']
1423 for net_db in netlist:
1424 if net_db.get_val('nid') == nid:
1428 # the tag name is the service type
1429 # fixme: this should do some checks to make sure the dom_node is a service
1431 # determine what "level" a particular node is at.
1433 # the order of iniitailization is based on level.
1434 def getServiceLevel(self):
1435 type = self.get_class()
1437 if type in ('network',):
1439 elif type in ('device', 'ldlm'):
1441 elif type in ('obd', 'mdd', 'cobd'):
1443 elif type in ('mdsdev','ost'):
1445 elif type in ('mdc','osc'):
1447 elif type in ('lov',):
1449 elif type in ('mountpoint', 'echoclient'):
1452 if ret < config.minlevel() or ret > config.maxlevel():
1457 # return list of services in a profile. list is a list of tuples
1458 # [(level, db_object),]
1459 def getServices(self):
1461 for ref_class, ref_uuid in self.get_all_refs():
1462 servdb = self.lookup(ref_uuid)
1464 level = servdb.getServiceLevel()
1466 list.append((level, servdb))
1468 panic('service not found: ' + ref_uuid)
1473 # Find the mdsdev attached to node_name that points to
1475 # node->profiles->mdsdev_refs->mds
1476 def get_mdd(self, node_name, mds_uuid):
1477 node_db = self.lookup_name(node_name)
1480 prof_list = node_db.get_refs('profile')
1481 for prof_uuid in prof_list:
1482 prof_db = node_db.lookup(prof_uuid)
1483 mdd_list = prof_db.get_refs('mdsdev')
1484 for mdd_uuid in mdd_list:
1485 mdd = self.lookup(mdd_uuid)
1486 if mdd.get_first_ref('mds') == mds_uuid:
1491 class LustreDB_XML(LustreDB):
1492 def __init__(self, dom, root_node):
1495 self.root_node = root_node
1497 def xmltext(self, dom_node, tag):
1498 list = dom_node.getElementsByTagName(tag)
1501 dom_node.normalize()
1502 if dom_node.firstChild:
1503 txt = string.strip(dom_node.firstChild.data)
1507 def xmlattr(self, dom_node, attr):
1508 return dom_node.getAttribute(attr)
1510 def _get_val(self, tag):
1511 """a value could be an attribute of the current node
1512 or the text value in a child node"""
1513 ret = self.xmlattr(self.dom_node, tag)
1515 ret = self.xmltext(self.dom_node, tag)
1518 def _get_class(self):
1519 return self.dom_node.nodeName
1522 # [(ref_class, ref_uuid),]
1523 def _get_all_refs(self):
1525 for n in self.dom_node.childNodes:
1526 if n.nodeType == n.ELEMENT_NODE:
1527 ref_uuid = self.xml_get_ref(n)
1528 ref_class = n.nodeName
1529 list.append((ref_class, ref_uuid))
1534 def _get_refs(self, tag):
1535 """ Get all the refs of type TAG. Returns list of uuids. """
1537 refname = '%s_ref' % tag
1538 reflist = self.dom_node.getElementsByTagName(refname)
1540 uuids.append(self.xml_get_ref(r))
1543 def xmllookup_by_uuid(self, dom_node, uuid):
1544 for n in dom_node.childNodes:
1545 if n.nodeType == n.ELEMENT_NODE:
1546 if self.xml_get_uuid(n) == uuid:
1549 n = self.xmllookup_by_uuid(n, uuid)
1553 def _lookup_by_uuid(self, uuid):
1554 dom = self. xmllookup_by_uuid(self.root_node, uuid)
1556 return LustreDB_XML(dom, self.root_node)
1558 def xmllookup_by_name(self, dom_node, name):
1559 for n in dom_node.childNodes:
1560 if n.nodeType == n.ELEMENT_NODE:
1561 if self.xml_get_name(n) == name:
1564 n = self.xmllookup_by_name(n, name)
1568 def _lookup_by_name(self, name, class_name):
1569 dom = self.xmllookup_by_name(self.root_node, name)
1571 return LustreDB_XML(dom, self.root_node)
1573 def xmllookup_by_class(self, dom_node, class_name):
1574 return dom_node.getElementsByTagName(class_name)
1576 def _lookup_by_class(self, class_name):
1578 domlist = self.xmllookup_by_class(self.root_node, class_name)
1579 for node in domlist:
1580 ret.append(LustreDB_XML(node, self.root_node))
1583 def xml_get_name(self, n):
1584 return n.getAttribute('name')
1587 return self.xml_get_name(self.dom_node)
1589 def xml_get_ref(self, n):
1590 return n.getAttribute('uuidref')
1592 def xml_get_uuid(self, dom_node):
1593 return dom_node.getAttribute('uuid')
1596 return self.xml_get_uuid(self.dom_node)
1598 def get_routes(self, type, gw):
1599 """ Return the routes as a list of tuples of the form:
1600 [(type, gw, lo, hi),]"""
1602 tbl = self.dom_node.getElementsByTagName('route_tbl')
1604 routes = t.getElementsByTagName('route')
1606 lo = self.xmlattr(r, 'lo')
1607 hi = self.xmlattr(r, 'hi', '')
1608 res.append((type, gw, lo, hi))
1611 def get_route_tbl(self):
1613 tbls = self.dom_node.getElementsByTagName('route_tbl')
1615 for r in tbl.getElementsByTagName('route'):
1616 net_type = self.xmlattr(r, 'type')
1617 gw = self.xmlattr(r, 'gw')
1618 lo = self.xmlattr(r, 'lo')
1619 hi = self.xmlattr(r,'hi', '')
1620 ret.append((net_type, gw, lo, hi))
1624 # ================================================================
1626 class LustreDB_LDAP(LustreDB):
1627 def __init__(self, name, attrs,
1630 url = "ldap://localhost",
1631 user = "cn=Manager, fs=lustre",
1637 self._parent = parent
1643 self._base = parent._base
1650 self.l = ldap.initialize(self._url)
1651 # Set LDAP protocol version used
1652 self.l.protocol_version=ldap.VERSION3
1653 # user and pw only needed if modifying db
1654 self.l.bind_s("", "", ldap.AUTH_SIMPLE);
1655 except ldap.LDAPerror, e:
1657 # FIXME, do something useful here
1662 def ldap_search(self, filter):
1663 """Return list of uuids matching the filter."""
1669 for name, attrs in self.l.search_s(dn, ldap.SCOPE_ONELEVEL,
1671 for v in attrs['uuid']:
1673 except ldap.NO_SUCH_OBJECT, e:
1675 except ldap.LDAPError, e:
1676 print e # FIXME: die here?
1679 ret.append(self._lookup_by_uuid(uuid))
1682 def _lookup_by_name(self, name, class_name):
1683 list = self.ldap_search("lustreName=%s" %(name))
1688 def _lookup_by_class(self, class_name):
1689 return self.ldap_search("objectclass=%s" %(string.upper(class_name)))
1691 def _lookup_by_uuid(self, uuid):
1693 dn = "uuid=%s,%s" % (uuid, self._base)
1696 for name, attrs in self.l.search_s(dn, ldap.SCOPE_BASE,
1698 ret = LustreDB_LDAP(name, attrs, parent = self)
1700 except ldap.NO_SUCH_OBJECT, e:
1701 debug("NO_SUCH_OBJECT:", uuid)
1702 pass # just return empty list
1703 except ldap.LDAPError, e:
1704 print e # FIXME: die here?
1708 def _get_val(self, k):
1710 if self._attrs.has_key(k):
1712 if type(v) == types.ListType:
1718 def _get_class(self):
1719 return string.lower(self._attrs['objectClass'][0])
1722 # [(ref_class, ref_uuid),]
1723 def _get_all_refs(self):
1725 for k in self._attrs.keys():
1726 if re.search('.*Ref', k):
1727 for uuid in self._attrs[k]:
1728 list.append((k, uuid))
1731 def _get_refs(self, tag):
1732 """ Get all the refs of type TAG. Returns list of uuids. """
1734 refname = '%sRef' % tag
1735 if self._attrs.has_key(refname):
1736 return self._attrs[refname]
1740 return self._get_val('lustreName')
1743 return self._get_val('uuid')
1745 def get_route_tbl(self):
1748 ############################################################
1750 # FIXME: clean this mess up!
1753 def prepare_mdc(db, mds_uuid):
1755 mds_db = db.lookup(mds_uuid);
1757 panic("no mds:", mds_uuid)
1758 if saved_mdc.has_key(mds_uuid):
1759 return saved_mdc[mds_uuid]
1762 saved_mdc[mds_uuid] = mdc.uuid
1765 def cleanup_mdc(db, mds_uuid):
1767 mds_db = db.lookup(mds_uuid);
1769 panic("no mds:", mds_uuid)
1770 if not saved_mdc.has_key(mds_uuid):
1773 saved_mdc[mds_uuid] = mdc.uuid
1776 ############################################################
1777 # routing ("rooting")
1783 def init_node(node_db):
1784 global local_node, router_flag
1785 netlist = node_db.lookup_class('network')
1787 type = db.get_val('nettype')
1788 gw = db.get_val('nid')
1789 local_node.append((type, gw))
1791 def node_needs_router():
1794 def init_route_config(lustre):
1795 """ Scan the lustre config looking for routers. Build list of
1797 global routes, router_flag
1799 list = lustre.lookup_class('node')
1800 for node_db in list:
1801 if node_db.get_val_int('router', 0):
1803 for (local_type, local_nid) in local_node:
1805 netlist = node_db.lookup_class('network')
1807 if local_type == db.get_val('type'):
1808 gw = db.get_val('server')
1813 if local_type != db.get_val('type'):
1814 for route in db.get_routes(local_type, gw):
1815 routes.append(route)
1820 for iface in local_node:
1821 #debug("local_net a:", net.net_type, "b:", iface[0])
1822 if net.net_type == iface[0]:
1826 def find_route(net):
1827 global local_node, routes
1828 frm_type = local_node[0][0]
1829 to_type = net.net_type
1831 debug ('looking for route to', to_type,to)
1839 ############################################################
1842 def startService(db, module_flag):
1843 type = db.get_class()
1844 debug('Service:', type, db.getName(), db.getUUID())
1845 # there must be a more dynamic way of doing this...
1851 elif type == 'network':
1855 elif type == 'cobd':
1859 elif type == 'mdsdev':
1865 elif type == 'mountpoint':
1867 elif type == 'echoclient':
1870 panic ("unknown service type:", type)
1875 if config.cleanup():
1880 if config.nosetup():
1882 if config.cleanup():
1888 # Prepare the system to run lustre using a particular profile
1889 # in a the configuration.
1890 # * load & the modules
1891 # * setup networking for the current node
1892 # * make sure partitions are in place and prepared
1893 # * initialize devices with lctl
1894 # Levels is important, and needs to be enforced.
1895 def startProfile(prof_db, module_flag):
1897 panic("profile:", profile, "not found.")
1898 services = prof_db.getServices()
1899 if config.cleanup():
1902 startService(s[1], module_flag)
1907 def doHost(lustreDB, hosts):
1912 node_db = lustreDB.lookup_name(h, 'node')
1916 print 'No host entry found.'
1919 router_flag = node_db.get_val_int('router', 0)
1920 recovery_upcall = node_db.get_val('recovery_upcall', '')
1921 timeout = node_db.get_val_int('timeout', 0)
1925 init_route_config(lustreDB)
1927 # Two step process: (1) load modules, (2) setup lustre
1928 # if not cleaning, load modules first.
1929 module_flag = not config.cleanup()
1930 prof_list = node_db.get_refs('profile')
1931 for prof_uuid in prof_list:
1932 prof_db = node_db.lookup(prof_uuid)
1933 startProfile(prof_db, module_flag)
1935 if not config.cleanup():
1936 sys_set_debug_path()
1937 script = config.gdb_script()
1938 run(lctl.lctl, ' modules >', script)
1940 # dump /tmp/ogdb and sleep/pause here
1941 log ("The GDB module script is in", script)
1943 sys_set_timeout(timeout)
1944 sys_set_recovery_upcall(recovery_upcall)
1946 module_flag = not module_flag
1947 for prof_uuid in prof_list:
1948 prof_db = node_db.lookup(prof_uuid)
1949 startProfile(prof_db, module_flag)
1951 ############################################################
1952 # Command line processing
1954 def parse_cmdline(argv):
1955 short_opts = "hdnvf"
1956 long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
1957 "portals=", "makeldiff", "cleanup", "noexec",
1958 "help", "node=", "nomod", "nosetup",
1959 "dump=", "force", "minlevel=", "maxlevel=",
1960 "timeout=", "recovery_upcall=",
1961 "ldapurl=", "config=", "select="]
1966 opts, args = getopt.getopt(argv, short_opts, long_opts)
1967 except getopt.error:
1972 if o in ("-h", "--help"):
1974 if o in ("-d","--cleanup"):
1976 if o in ("-v", "--verbose"):
1978 if o in ("-n", "--noexec"):
1981 if o == "--portals":
1982 config.portals_dir(a)
1984 config.lustre_dir(a)
1985 if o == "--reformat":
1993 if o == "--nosetup":
1997 if o in ("-f", "--force"):
1999 if o == "--minlevel":
2001 if o == "--maxlevel":
2003 if o == "--timeout":
2005 if o == "--recovery_upcall":
2006 config.recovery_upcall(a)
2007 if o == "--ldapurl":
2010 config.config_name(a)
2012 config.init_select(a)
2020 s = urllib.urlopen(url)
2026 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
2027 base = os.path.dirname(cmd)
2028 if os.access(base+"/Makefile", os.R_OK):
2029 if not config.lustre_dir():
2030 config.lustre_dir(os.path.join(base, ".."))
2031 # normalize the portals dir, using command line arg if set
2032 if config.portals_dir():
2033 portals_dir = config.portals_dir()
2034 dir = os.path.join(config.lustre_dir(), portals_dir)
2035 config.portals_dir(dir)
2036 elif config.lustre_dir() and config.portals_dir():
2038 # if --lustre and --portals, normalize portals
2039 # can ignore POTRALS_DIR here, since it is probly useless here
2040 dir = config.portals_dir()
2041 dir = os.path.join(config.lustre_dir(), dir)
2042 config.portals_dir(dir)
2044 def sysctl(path, val):
2048 fp = open(os.path.join('/proc/sys', path), 'w')
2055 def sys_set_debug_path():
2056 debug("debug path: ", config.debug_path())
2057 sysctl('portals/debug_path', config.debug_path())
2059 def sys_set_recovery_upcall(upcall):
2060 # the command overrides the value in the node config
2061 if config.recovery_upcall():
2062 upcall = config.recovery_upcall()
2064 debug("setting recovery_upcall:", upcall)
2065 sysctl('lustre/recovery_upcall', upcall)
2067 def sys_set_timeout(timeout):
2068 # the command overrides the value in the node config
2069 if config.timeout() > 0:
2070 timeout = config.timeout()
2072 debug("setting timeout:", timeout)
2073 sysctl('lustre/timeout', timeout)
2075 def sys_set_ptldebug(ptldebug):
2076 # the command overrides the value in the node config
2077 if config.ptldebug():
2078 ptldebug = config.ptldebug()
2079 sysctl('portals/debug', ptldebug)
2081 def sys_set_netmem_max(path, max):
2082 debug("setting", path, "to at least", max)
2090 fp = open(path, 'w')
2091 fp.write('%d\n' %(max))
2095 def sys_make_devices():
2096 if not os.access('/dev/portals', os.R_OK):
2097 run('mknod /dev/portals c 10 240')
2098 if not os.access('/dev/obd', os.R_OK):
2099 run('mknod /dev/obd c 10 241')
2102 # Add dir to the global PATH, if not already there.
2103 def add_to_path(new_dir):
2104 syspath = string.split(os.environ['PATH'], ':')
2105 if new_dir in syspath:
2107 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
2110 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
2111 # ensure basic elements are in the system path
2112 def sanitise_path():
2113 for dir in DEFAULT_PATH:
2116 # Initialize or shutdown lustre according to a configuration file
2117 # * prepare the system for lustre
2118 # * configure devices with lctl
2119 # Shutdown does steps in reverse
2122 global TCP_ACCEPTOR, lctl, MAXTCPBUF
2124 host = socket.gethostname()
2126 # the PRNG is normally seeded with time(), which is not so good for starting
2127 # time-synchronized clusters
2128 input = open('/dev/urandom', 'r')
2130 print 'Unable to open /dev/urandom!'
2132 seed = input.read(32)
2138 args = parse_cmdline(sys.argv[1:])
2140 if not os.access(args[0], os.R_OK):
2141 print 'File not found or readable:', args[0]
2144 dom = xml.dom.minidom.parse(args[0])
2146 panic("%s does not appear to be a config file." % (args[0]))
2147 sys.exit(1) # make sure to die here, even in debug mode.
2148 db = LustreDB_XML(dom.documentElement, dom.documentElement)
2149 elif config.ldapurl():
2150 if not config.config_name():
2151 panic("--ldapurl requires --config name")
2152 dn = "config=%s,fs=lustre" % (config.config_name())
2153 db = LustreDB_LDAP('', {}, base=dn, url = config.ldapurl())
2159 node_list.append(config.node())
2162 node_list.append(host)
2163 node_list.append('localhost')
2164 debug("configuring for host: ", node_list)
2167 config._debug_path = config._debug_path + '-' + host
2168 config._gdb_script = config._gdb_script + '-' + host
2170 setupModulePath(sys.argv[0])
2172 TCP_ACCEPTOR = find_prog('acceptor')
2173 if not TCP_ACCEPTOR:
2175 TCP_ACCEPTOR = 'acceptor'
2176 debug('! acceptor not found')
2178 panic('acceptor not found')
2180 lctl = LCTLInterface('lctl')
2183 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
2184 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
2186 doHost(db, node_list)
2188 if __name__ == "__main__":
2191 except LconfError, e:
2193 except CommandError, e:
2197 if first_cleanup_error:
2198 sys.exit(first_cleanup_error)