3 # Copyright (C) 2002 Cluster File Systems, Inc.
4 # Author: Robert Read <rread@clusterfs.com>
5 # This file is part of Lustre, http://www.lustre.org.
7 # Lustre is free software; you can redistribute it and/or
8 # modify it under the terms of version 2 of the GNU General Public
9 # License as published by the Free Software Foundation.
11 # Lustre is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with Lustre; if not, write to the Free Software
18 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 # lconf - lustre configuration tool
22 # lconf is the main driver script for starting and stopping
23 # lustre filesystem services.
25 # Based in part on the XML obdctl modifications done by Brian Behlendorf
27 import sys, getopt, types
28 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import xml.dom.minidom
32 if sys.version[0] == '1':
33 from FCNTL import F_GETFL, F_SETFL
35 from fcntl import F_GETFL, F_SETFL
40 DEFAULT_TCPBUF = 1048576
42 # Maximum number of devices to search for.
43 # (the /dev/loop* nodes need to be created beforehand)
44 MAX_LOOP_DEVICES = 256
45 PORTALS_DIR = '@PORTALSLOC@'
47 first_cleanup_error = 0
48 def cleanup_error(rc):
49 global first_cleanup_error
50 if not first_cleanup_error:
51 first_cleanup_error = rc
55 print """usage: lconf config.xml
57 config.xml Lustre configuration in xml format.
58 --ldapurl LDAP server URL, eg. ldap://localhost
59 --config Cluster config name used for LDAP query
60 --node <nodename> Load config for <nodename>
61 --select service=nodeA,service2=nodeB U
62 -d | --cleanup Cleans up config. (Shutdown)
63 -f | --force Forced unmounting and/or obd detach during cleanup
64 -v | --verbose Print system commands as they are run
65 -h | --help Print this help
66 --gdb Prints message after creating gdb module script
67 and sleeps for 5 seconds.
68 -n | --noexec Prints the commands and steps that will be run for a
69 config without executing them. This can used to check if a
70 config file is doing what it should be doing. (Implies -v)
71 --nomod Skip load/unload module step.
72 --nosetup Skip device setup/cleanup step.
73 --reformat Reformat all devices (without question)
74 --dump <file> Dump the kernel debug log before portals is unloaded
75 --minlevel <num> Specify the minimum level of services to configure/cleanup (default 0)
76 --maxlevel <num> Specify the maximum level of services to configure/cleanup (default 100)
77 Levels are aproximatly like:
84 70 - mountpoint, echo_client
85 --lustre=src_dir Base directory of lustre sources. This parameter will cause lconf
86 to load modules from a source tree.
87 --portals=src_dir Portals source directory. If this is a relative path, then it is
88 assumed to be relative to lustre.
92 --ldap server LDAP server with lustre config database
93 --makeldiff Translate xml source to LDIFF
94 This are perhaps not needed:
98 # ============================================================
99 # Config parameters, encapsulated in a class
115 self._gdb_script = '/tmp/ogdb'
116 self._debug_path = '/tmp/lustre-log'
117 self._dump_file = None
118 self._lustre_dir = ''
119 self._portals_dir = ''
123 self._recovery_upcall = ''
125 self._config_name = ''
128 def verbose(self, flag = None):
129 if flag: self._verbose = flag
132 def noexec(self, flag = None):
133 if flag: self._noexec = flag
136 def reformat(self, flag = None):
137 if flag: self._reformat = flag
138 return self._reformat
140 def cleanup(self, flag = None):
141 if flag: self._cleanup = flag
144 def gdb(self, flag = None):
145 if flag: self._gdb = flag
148 def nomod(self, flag = None):
149 if flag: self._nomod = flag
152 def nosetup(self, flag = None):
153 if flag: self._nosetup = flag
156 def force(self, flag = None):
157 if flag: self._force = flag
160 def node(self, val = None):
161 if val: self._node = val
164 def gdb_script(self):
165 if os.path.isdir('/r'):
166 return '/r' + self._gdb_script
168 return self._gdb_script
170 def debug_path(self):
171 if os.path.isdir('/r'):
172 return '/r' + self._debug_path
174 return self._debug_path
176 def dump_file(self, val = None):
177 if val: self._dump_file = val
178 return self._dump_file
179 def minlevel(self, val = None):
180 if val: self._minlevel = int(val)
181 return self._minlevel
183 def maxlevel(self, val = None):
184 if val: self._maxlevel = int(val)
185 return self._maxlevel
187 def portals_dir(self, val = None):
188 if val: self._portals_dir = val
189 return self._portals_dir
191 def lustre_dir(self, val = None):
192 if val: self._lustre_dir = val
193 return self._lustre_dir
195 def timeout(self, val = None):
196 if val: self._timeout = val
199 def recovery_upcall(self, val = None):
200 if val: self._recovery_upcall = val
201 return self._recovery_upcall
203 def ldapurl(self, val = None):
204 if val: self._ldapurl = val
207 def config_name(self, val = None):
208 if val: self._config_name = val
209 return self._config_name
211 def init_select(self, arg):
212 # arg = "service=nodeA,service2=nodeB"
213 list = string.split(arg, ',')
215 srv, node = string.split(entry, '=')
216 self._select[srv] = node
218 def select(self, srv):
219 if self._select.has_key(srv):
220 return self._select[srv]
226 # ============================================================
227 # debugging and error funcs
229 def fixme(msg = "this feature"):
230 raise LconfError, msg + ' not implmemented yet.'
233 msg = string.join(map(str,args))
234 if not config.noexec():
235 raise LconfError(msg)
240 msg = string.join(map(str,args))
245 print string.strip(s)
249 msg = string.join(map(str,args))
252 # ============================================================
253 # locally defined exceptions
254 class CommandError (exceptions.Exception):
255 def __init__(self, cmd_name, cmd_err, rc=None):
256 self.cmd_name = cmd_name
257 self.cmd_err = cmd_err
262 if type(self.cmd_err) == types.StringType:
264 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
266 print "! %s: %s" % (self.cmd_name, self.cmd_err)
267 elif type(self.cmd_err) == types.ListType:
269 print "! %s (error %d):" % (self.cmd_name, self.rc)
271 print "! %s:" % (self.cmd_name)
272 for s in self.cmd_err:
273 print "> %s" %(string.strip(s))
277 class LconfError (exceptions.Exception):
278 def __init__(self, args):
282 # ============================================================
283 # handle lctl interface
286 Manage communication with lctl
289 def __init__(self, cmd):
291 Initialize close by finding the lctl binary.
293 self.lctl = find_prog(cmd)
296 debug('! lctl not found')
299 raise CommandError('lctl', "unable to find lctl binary.")
301 def set_nonblock(self, fd):
302 fl = fcntl.fcntl(fd, F_GETFL)
303 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
308 the cmds are written to stdin of lctl
309 lctl doesn't return errors when run in script mode, so
311 should modify command line to accept multiple commands, or
312 create complex command line options
314 debug("+", self.lctl, cmds)
315 if config.noexec(): return (0, [])
317 child = popen2.Popen3(self.lctl, 1) # Capture stdout and stderr from command
318 child.tochild.write(cmds + "\n")
319 child.tochild.close()
321 # From "Python Cookbook" from O'Reilly
322 outfile = child.fromchild
323 outfd = outfile.fileno()
324 self.set_nonblock(outfd)
325 errfile = child.childerr
326 errfd = errfile.fileno()
327 self.set_nonblock(errfd)
329 outdata = errdata = ''
332 ready = select.select([outfd,errfd],[],[]) # Wait for input
333 if outfd in ready[0]:
334 outchunk = outfile.read()
335 if outchunk == '': outeof = 1
336 outdata = outdata + outchunk
337 if errfd in ready[0]:
338 errchunk = errfile.read()
339 if errchunk == '': erreof = 1
340 errdata = errdata + errchunk
341 if outeof and erreof: break
342 # end of "borrowed" code
345 if os.WIFEXITED(ret):
346 rc = os.WEXITSTATUS(ret)
349 if rc or len(errdata):
350 raise CommandError(self.lctl, errdata, rc)
353 def runcmd(self, *args):
355 run lctl using the command line
357 cmd = string.join(map(str,args))
358 debug("+", self.lctl, cmd)
359 rc, out = run(self.lctl, cmd)
361 raise CommandError(self.lctl, out, rc)
365 def network(self, net, nid):
366 """ initialized network and add "self" """
367 # Idea: "mynid" could be used for all network types to add "self," and then
368 # this special case would be gone and the "self" hack would be hidden.
369 if net in ('tcp', 'toe'):
374 quit""" % (net, nid, nid)
383 # create a new connection
384 def connect(self, net, nid, port, servuuid, send_mem, recv_mem):
385 if net in ('tcp', 'toe'):
392 quit""" % (net, servuuid, nid, send_mem, recv_mem, nid, port, )
398 quit""" % (net, servuuid, nid, nid, port, )
402 # add a route to a range
403 def add_route(self, net, gw, lo, hi):
407 quit """ % (net, gw, lo, hi)
411 def del_route(self, net, gw, lo, hi):
419 # add a route to a host
420 def add_route_host(self, net, uuid, gw, tgt):
425 quit """ % (net, uuid, tgt, gw, tgt)
428 # add a route to a range
429 def del_route_host(self, net, uuid, gw, tgt):
435 quit """ % (net, uuid, tgt)
438 # disconnect one connection
439 def disconnect(self, net, nid, port, servuuid):
445 quit""" % (net, nid, servuuid)
449 def disconnectAll(self, net):
458 # create a new device with lctl
459 def newdev(self, attach, setup = ""):
464 quit""" % (attach, setup)
468 def cleanup(self, name, uuid):
474 quit""" % (name, ('', 'force')[config.force()])
478 def lov_setconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist):
482 lov_setconfig %s %d %d %d %s %s
483 quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist)
487 def dump(self, dump_file):
490 quit""" % (dump_file)
493 # get list of devices
494 def device_list(self):
495 rc, out = self.runcmd('device_list')
499 def lustre_version(self):
500 rc, out = self.runcmd('version')
503 # ============================================================
504 # Various system-level functions
505 # (ideally moved to their own module)
507 # Run a command and return the output and status.
508 # stderr is sent to /dev/null, could use popen3 to
509 # save it if necessary
511 cmd = string.join(map(str,args))
513 if config.noexec(): return (0, [])
514 f = os.popen(cmd + ' 2>&1')
523 # Run a command in the background.
524 def run_daemon(*args):
525 cmd = string.join(map(str,args))
527 if config.noexec(): return 0
528 f = os.popen(cmd + ' 2>&1')
536 # Determine full path to use for an external command
537 # searches dirname(argv[0]) first, then PATH
539 syspath = string.split(os.environ['PATH'], ':')
540 cmdpath = os.path.dirname(sys.argv[0])
541 syspath.insert(0, cmdpath);
542 if config.portals_dir():
543 syspath.insert(0, os.path.join(cmdpath, config.portals_dir()+'/linux/utils/'))
545 prog = os.path.join(d,cmd)
546 if os.access(prog, os.X_OK):
550 # Recursively look for file starting at base dir
551 def do_find_file(base, mod):
552 fullname = os.path.join(base, mod)
553 if os.access(fullname, os.R_OK):
555 for d in os.listdir(base):
556 dir = os.path.join(base,d)
557 if os.path.isdir(dir):
558 module = do_find_file(dir, mod)
562 def find_module(src_dir, dev_dir, modname):
563 mod = '%s.o' % (modname)
564 module = src_dir +'/'+ dev_dir +'/'+ mod
566 if os.access(module, os.R_OK):
572 # is the path a block device?
579 return stat.S_ISBLK(s[stat.ST_MODE])
581 # build fs according to type
583 def mkfs(dev, devsize, fstype):
586 # devsize is in 1k, and fs block count is in 4k
587 block_cnt = devsize/4
589 if(fstype in ('ext3', 'extN')):
590 mkfs = 'mkfs.ext2 -j -b 4096 -F '
591 elif (fstype == 'reiserfs'):
592 mkfs = 'mkreiserfs -ff'
594 print 'unsupported fs type: ', fstype
596 (ret, out) = run (mkfs, dev, block_cnt)
598 panic("Unable to build fs:", dev)
599 # enable hash tree indexing on fsswe
600 # FIXME: this check can probably go away on 2.5
602 htree = 'echo "feature FEATURE_C5" | debugfs -w'
603 (ret, out) = run (htree, dev)
605 panic("Unable to enable htree:", dev)
607 # some systems use /dev/loopN, some /dev/loop/N
611 if not os.access(loop + str(0), os.R_OK):
613 if not os.access(loop + str(0), os.R_OK):
614 panic ("can't access loop devices")
617 # find loop device assigned to thefile
620 for n in xrange(0, MAX_LOOP_DEVICES):
622 if os.access(dev, os.R_OK):
623 (stat, out) = run('losetup', dev)
624 if (out and stat == 0):
625 m = re.search(r'\((.*)\)', out[0])
626 if m and file == m.group(1):
632 # create file if necessary and assign the first free loop device
633 def init_loop(file, size, fstype):
634 dev = find_loop(file)
636 print 'WARNING file:', file, 'already mapped to', dev
638 if config.reformat() or not os.access(file, os.R_OK | os.W_OK):
640 panic(file, "size must be larger than 8MB, currently set to:", size)
641 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size,
644 panic("Unable to create backing store:", file)
647 # find next free loop
648 for n in xrange(0, MAX_LOOP_DEVICES):
650 if os.access(dev, os.R_OK):
651 (stat, out) = run('losetup', dev)
653 run('losetup', dev, file)
656 print "out of loop devices"
658 print "out of loop devices"
661 # undo loop assignment
662 def clean_loop(file):
663 dev = find_loop(file)
665 ret, out = run('losetup -d', dev)
667 log('unable to clean loop device:', dev, 'for file:', file)
670 # determine if dev is formatted as a <fstype> filesystem
671 def need_format(fstype, dev):
672 # FIXME don't know how to implement this
675 # initialize a block device if needed
676 def block_dev(dev, size, fstype, format):
677 if config.noexec(): return dev
678 if not is_block(dev):
679 dev = init_loop(dev, size, fstype)
680 if config.reformat() or (need_format(fstype, dev) and format == 'yes'):
681 mkfs(dev, size, fstype)
684 # panic("device:", dev,
685 # "not prepared, and autoformat is not set.\n",
686 # "Rerun with --reformat option to format ALL filesystems")
691 """lookup IP address for an interface"""
692 rc, out = run("/sbin/ifconfig", iface)
695 addr = string.split(out[1])[1]
696 ip = string.split(addr, ':')[1]
699 def get_local_address(net_type, wildcard):
700 """Return the local address for the network type."""
702 if net_type in ('tcp', 'toe'):
704 iface, star = string.split(wildcard, ':')
705 local = if2addr(iface)
707 panic ("unable to determine ip for:", wildcard)
709 host = socket.gethostname()
710 local = socket.gethostbyname(host)
711 elif net_type == 'elan':
712 # awk '/NodeId/ { print $2 }' '/proc/elan/device0/position'
714 fp = open('/proc/elan/device0/position', 'r')
715 lines = fp.readlines()
724 elif net_type == 'gm':
725 fixme("automatic local address for GM")
729 def is_prepared(uuid):
730 """Return true if a device exists for the uuid"""
731 # expect this format:
732 # 1 UP ldlm ldlm ldlm_UUID 2
734 out = lctl.device_list()
736 if uuid == string.split(s)[4]:
738 except CommandError, e:
742 def fs_is_mounted(path):
743 """Return true if path is a mounted lustre filesystem"""
745 fp = open('/proc/mounts')
746 lines = fp.readlines()
750 if a[1] == path and a[2] == 'lustre_lite':
757 # ============================================================
758 # Classes to prepare and cleanup the various objects
761 """ Base class for the rest of the modules. The default cleanup method is
762 defined here, as well as some utilitiy funcs.
764 def __init__(self, module_name, db):
766 self.module_name = module_name
767 self.name = self.db.getName()
768 self.uuid = self.db.getUUID()
769 self.kmodule_list = []
773 def info(self, *args):
774 msg = string.join(map(str,args))
775 print self.module_name + ":", self.name, self.uuid, msg
777 def lookup_server(self, srv_uuid):
778 """ Lookup a server's network information """
779 net = self.db.get_ost_net(srv_uuid)
781 panic ("Unable to find a server for:", srv_uuid)
782 self._server = Network(net)
784 def get_server(self):
788 """ default cleanup, used for most modules """
790 srv = self.get_server()
791 if srv and local_net(srv):
793 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
794 except CommandError, e:
795 log(self.module_name, "disconnect failed: ", self.name)
799 lctl.cleanup(self.name, self.uuid)
800 except CommandError, e:
801 log(self.module_name, "cleanup failed: ", self.name)
805 def add_portals_module(self, dev_dir, modname):
806 """Append a module to list of modules to load."""
807 self.kmodule_list.append((config.portals_dir(), dev_dir, modname))
809 def add_lustre_module(self, dev_dir, modname):
810 """Append a module to list of modules to load."""
811 self.kmodule_list.append((config.lustre_dir(), dev_dir, modname))
813 def mod_loaded(self, modname):
814 """Check if a module is already loaded. Look in /proc/modules for it."""
815 fp = open('/proc/modules')
816 lines = fp.readlines()
818 # please forgive my tired fingers for this one
819 ret = filter(lambda word, mod=modname: word == mod,
820 map(lambda line: string.split(line)[0], lines))
823 def load_module(self):
824 """Load all the modules in the list in the order they appear."""
825 for src_dir, dev_dir, mod in self.kmodule_list:
826 # (rc, out) = run ('/sbin/lsmod | grep -s', mod)
827 if self.mod_loaded(mod) and not config.noexec():
829 log ('loading module:', mod)
831 module = find_module(src_dir, dev_dir, mod)
833 panic('module not found:', mod)
834 (rc, out) = run('/sbin/insmod', module)
836 raise CommandError('insmod', out, rc)
838 (rc, out) = run('/sbin/modprobe', mod)
840 raise CommandError('modprobe', out, rc)
842 def cleanup_module(self):
843 """Unload the modules in the list in reverse order."""
844 rev = self.kmodule_list
846 for src_dir, dev_dir, mod in rev:
847 if not self.mod_loaded(mod):
850 if mod == 'portals' and config.dump_file():
851 lctl.dump(config.dump_file())
852 log('unloading module:', mod)
855 (rc, out) = run('/sbin/rmmod', mod)
857 log('! unable to unload module:', mod)
861 class Network(Module):
862 def __init__(self,db):
863 Module.__init__(self, 'NETWORK', db)
864 self.net_type = self.db.get_val('nettype')
865 self.nid = self.db.get_val('nid', '*')
866 self.port = self.db.get_val_int('port', 0)
867 self.send_mem = self.db.get_val_int('send_mem', DEFAULT_TCPBUF)
868 self.recv_mem = self.db.get_val_int('recv_mem', DEFAULT_TCPBUF)
870 self.nid = get_local_address(self.net_type, self.nid)
872 panic("unable to set nid for", self.net_type, self.nid)
873 debug("nid:", self.nid)
874 self.add_portals_module("linux/oslib", 'portals')
875 if node_needs_router():
876 self.add_portals_module("linux/router", 'kptlrouter')
877 if self.net_type == 'tcp':
878 self.add_portals_module("linux/socknal", 'ksocknal')
879 if self.net_type == 'toe':
880 self.add_portals_module("/linux/toenal", 'ktoenal')
881 if self.net_type == 'elan':
882 self.add_portals_module("/linux/rqswnal", 'kqswnal')
883 if self.net_type == 'gm':
884 self.add_portals_module("/linux/gmnal", 'kgmnal')
885 self.add_lustre_module('obdclass', 'obdclass')
886 self.add_lustre_module('ptlrpc', 'ptlrpc')
889 self.info(self.net_type, self.nid, self.port)
890 if self.net_type in ('tcp', 'toe'):
891 nal_id = '' # default is socknal
892 if self.net_type == 'toe':
894 ret, out = run(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, nal_id, self.port)
896 raise CommandError(TCP_ACCEPTOR, out, ret)
897 for net_type, gw, lo, hi in self.db.get_route_tbl():
898 lctl.add_route(net_type, gw, lo, hi)
899 if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '':
900 srvdb = self.db.nid2server(lo)
902 panic("no server for nid", lo)
905 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
908 lctl.network(self.net_type, self.nid)
909 if not is_prepared("RPCDEV_UUID"):
910 lctl.newdev(attach = "ptlrpc RPCDEV RPCDEV_UUID")
913 self.info(self.net_type, self.nid, self.port)
914 for net_type, gw, lo, hi in self.db.get_route_tbl():
915 if self.net_type in ('tcp', 'toe') and hi == '':
916 srvdb = self.db.nid2server(lo)
918 panic("no server for nid", lo)
922 lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
923 except CommandError, e:
924 print "disconnect failed: ", self.name
928 lctl.del_route(self.net_type, self.nid, lo, hi)
929 except CommandError, e:
930 print "del_route failed: ", self.name
935 if is_prepared("RPCDEV_UUID"):
936 lctl.cleanup("RPCDEV", "RPCDEV_UUID")
937 except CommandError, e:
938 print "cleanup failed: RPCDEV"
942 lctl.disconnectAll(self.net_type)
943 except CommandError, e:
944 print "disconnectAll failed: ", self.name
947 if self.net_type in ('tcp', 'toe'):
948 # yikes, this ugly! need to save pid in /var/something
949 run("killall acceptor")
952 def __init__(self,db):
953 Module.__init__(self, 'LDLM', db)
954 self.add_lustre_module('ldlm', 'ldlm')
956 if is_prepared(self.uuid):
959 lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid),
963 def __init__(self,db):
964 Module.__init__(self, 'LOV', db)
965 self.add_lustre_module('mdc', 'mdc')
966 self.add_lustre_module('lov', 'lov')
967 self.mds_uuid = self.db.get_first_ref('mds')
968 mds= self.db.lookup(self.mds_uuid)
969 self.mds_name = mds.getName()
970 self.stripe_sz = self.db.get_val_int('stripesize', 65536)
971 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
972 self.pattern = self.db.get_val_int('stripepattern', 0)
973 self.devlist = self.db.get_refs('obd')
974 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
976 for obd_uuid in self.devlist:
977 obd = self.db.lookup(obd_uuid)
978 osc = get_osc(obd, self.name)
980 self.osclist.append(osc)
982 panic('osc not found:', obd_uuid)
985 if is_prepared(self.uuid):
987 for osc in self.osclist:
989 # Ignore connection failures, because the LOV will DTRT with
990 # an unconnected OSC.
991 osc.prepare(ignore_connect_failure=1)
993 print "Error preparing OSC %s (inactive)\n" % osc.uuid
994 self.mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
995 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
996 self.stripe_off, self.pattern, self.devlist, self.mds_name)
997 lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
998 setup ="%s" % (self.mdc_uuid))
1001 if is_prepared(self.uuid):
1002 Module.cleanup(self)
1003 for osc in self.osclist:
1005 cleanup_mdc(self.db, self.name, self.mds_uuid)
1007 def load_module(self):
1008 for osc in self.osclist:
1011 Module.load_module(self)
1013 def cleanup_module(self):
1014 Module.cleanup_module(self)
1015 for osc in self.osclist:
1016 osc.cleanup_module()
1019 class LOVConfig(Module):
1020 def __init__(self,db):
1021 Module.__init__(self, 'LOVConfig', db)
1023 self.lov_uuid = self.db.get_first_ref('lov')
1024 l = self.db.lookup(self.lov_uuid)
1029 self.info(lov.mds_uuid, lov.stripe_cnt, lov.stripe_sz, lov.stripe_off,
1030 lov.pattern, lov.devlist, lov.mds_name)
1031 lctl.lov_setconfig(lov.uuid, lov.mds_name, lov.stripe_cnt,
1032 lov.stripe_sz, lov.stripe_off, lov.pattern,
1033 string.join(lov.devlist))
1039 class MDSDEV(Module):
1040 def __init__(self,db):
1041 Module.__init__(self, 'MDSDEV', db)
1042 self.devname = self.db.get_val('devpath','')
1043 self.size = self.db.get_val_int('devsize', 0)
1044 self.fstype = self.db.get_val('fstype', '')
1045 # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
1046 self.uuid = self.db.get_first_ref('target')
1047 mds = self.db.lookup(self.uuid)
1048 self.name = mds.getName()
1049 self.lovconfig_uuids = mds.get_refs('lovconfig')
1050 # FIXME: if fstype not set, then determine based on kernel version
1051 self.format = self.db.get_val('autoformat', "no")
1052 if self.fstype == 'extN':
1053 self.add_lustre_module('extN', 'extN')
1054 self.add_lustre_module('mds', 'mds')
1056 self.add_lustre_module('obdclass', 'fsfilt_%s' % (self.fstype))
1059 if is_prepared(self.uuid):
1061 self.info(self.devname, self.fstype, self.format)
1062 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
1063 if not is_prepared('MDT_UUID'):
1064 lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'),
1066 lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
1067 setup ="%s %s" %(blkdev, self.fstype))
1068 for uuid in self.lovconfig_uuids:
1069 db = self.db.lookup(uuid)
1070 lovconfig = LOVConfig(db)
1074 if is_prepared('MDT_UUID'):
1076 lctl.cleanup("MDT", "MDT_UUID")
1077 except CommandError, e:
1078 print "cleanup failed: ", self.name
1081 if is_prepared(self.uuid):
1082 Module.cleanup(self)
1083 clean_loop(self.devname)
1086 def __init__(self, db):
1087 Module.__init__(self, 'OSD', db)
1088 self.osdtype = self.db.get_val('osdtype')
1089 self.devname = self.db.get_val('devpath', '')
1090 self.size = self.db.get_val_int('devsize', 0)
1091 self.fstype = self.db.get_val('fstype', '')
1092 self.uuid = self.db.get_first_ref('target')
1093 ost = self.db.lookup(self.uuid)
1094 self.name = ost.getName()
1095 # FIXME: if fstype not set, then determine based on kernel version
1096 self.format = self.db.get_val('autoformat', 'yes')
1097 if self.fstype == 'extN':
1098 self.add_lustre_module('extN', 'extN')
1099 self.add_lustre_module('ost', 'ost')
1100 self.add_lustre_module(self.osdtype, self.osdtype)
1102 self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype))
1104 # need to check /proc/mounts and /etc/mtab before
1105 # formatting anything.
1106 # FIXME: check if device is already formatted.
1108 if is_prepared(self.uuid):
1110 self.info(self.osdtype, self.devname, self.size, self.fstype, self.format)
1111 if self.osdtype == 'obdecho':
1114 blkdev = block_dev(self.devname, self.size, self.fstype, self.format)
1115 lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid),
1116 setup ="%s %s" %(blkdev, self.fstype))
1117 if not is_prepared('OSS_UUID'):
1118 lctl.newdev(attach="ost %s %s" % ('OSS', 'OSS_UUID'),
1122 if is_prepared('OSS_UUID'):
1124 lctl.cleanup("OSS", "OSS_UUID")
1125 except CommandError, e:
1126 print "cleanup failed: ", self.name
1129 if is_prepared(self.uuid):
1130 Module.cleanup(self)
1131 if not self.osdtype == 'obdecho':
1132 clean_loop(self.devname)
1134 # Generic client module, used by OSC and MDC
1135 class Client(Module):
1136 def __init__(self, db, module, owner, target_name, target_uuid):
1137 self.target_name = target_name
1138 self.target_uuid = target_uuid
1140 node_name = config.select(target_name)
1142 self.tgt_dev_uuid = self.db.get_target_device(node_name, target_uuid)
1144 self.tgt_dev_uuid = db.get_first_ref('active')
1145 if not self.tgt_dev_uuid:
1146 panic("No target device found for target:", target_name)
1147 self.kmodule_list = []
1151 self.module = module
1152 self.module_name = string.upper(module)
1153 self.name = '%s_%s_%s' % (self.module_name, owner, target_name)
1154 self.uuid = '%05x_%s_%05x' % (int(random.random() * 1048576), self.name,
1155 int(random.random() * 1048576))
1156 self.uuid = self.uuid[0:36]
1157 self.lookup_server(self.tgt_dev_uuid)
1158 self.add_lustre_module(module, module)
1160 def prepare(self, ignore_connect_failure = 0):
1161 if is_prepared(self.uuid):
1163 self.info(self.target_uuid)
1164 srv = self.get_server()
1168 lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
1170 #debug("NOT LOCAL NET")
1173 lctl.add_route_host(r[0], srv.uuid, r[1], r[2])
1175 panic ("no route to", srv.nid)
1176 except CommandError:
1177 if (ignore_connect_failure == 0):
1179 lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
1180 setup ="%s %s" %(self.target_uuid, srv.uuid))
1183 srv = self.get_server()
1185 Module.cleanup(self)
1187 self.info(self.targt_uuid)
1191 lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
1192 except CommandError, e:
1193 print "del_route failed: ", self.name
1196 Module.cleanup(self)
1201 def __init__(self, db, owner, target_name, target_uuid):
1202 Client.__init__(self, db, 'mdc', owner, target_name, target_uuid)
1205 def __init__(self, db, owner, target_name, target_uuid):
1206 Client.__init__(self, db, 'osc', owner, target_name, target_uuid)
1210 def __init__(self, db):
1211 Module.__init__(self, 'COBD', db)
1212 self.real_uuid = self.db.get_first_ref('realobd')
1213 self.cache_uuid = self.db.get_first_ref('cacheobd')
1214 self.add_lustre_module('cobd' , 'cobd')
1216 # need to check /proc/mounts and /etc/mtab before
1217 # formatting anything.
1218 # FIXME: check if device is already formatted.
1220 if is_prepared(self.uuid):
1222 self.info(self.real_uuid, self.cache_uuid)
1223 lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid),
1224 setup ="%s %s" %(self.real_uuid, self.cache_uuid))
1227 # virtual interface for OSC and LOV
1229 def __init__(self,db, owner):
1230 Module.__init__(self, 'VOSC', db)
1231 if db.get_class() == 'lov':
1234 self.osc = get_osc(db, owner)
1236 return self.osc.uuid
1241 def load_module(self):
1242 self.osc.load_module()
1243 def cleanup_module(self):
1244 self.osc.cleanup_module()
1246 return self.db.get_class() != 'lov'
1247 def get_mdc_uuid(self):
1248 if self.db.get_class() == 'lov':
1249 return self.osc.mdc_uuid
1253 class ECHO_CLIENT(Module):
1254 def __init__(self,db):
1255 Module.__init__(self, 'ECHO_CLIENT', db)
1256 self.add_lustre_module('obdecho', 'obdecho')
1257 self.obd_uuid = self.db.get_first_ref('obd')
1258 obd = self.db.lookup(self.obd_uuid)
1259 self.osc = VOSC(obd, self.name)
1262 if is_prepared(self.uuid):
1264 self.osc.prepare() # XXX This is so cheating. -p
1265 self.info(self.obd_uuid)
1267 lctl.newdev(attach="echo_client %s %s" % (self.name, self.uuid),
1268 setup = self.osc.get_uuid())
1271 if is_prepared(self.uuid):
1272 Module.cleanup(self)
1275 def load_module(self):
1276 self.osc.load_module()
1277 Module.load_module(self)
1278 def cleanup_module(self):
1279 Module.cleanup_module(self)
1280 self.osc.cleanup_module()
1283 class Mountpoint(Module):
1284 def __init__(self,db):
1285 Module.__init__(self, 'MTPT', db)
1286 self.path = self.db.get_val('path')
1287 self.mds_uuid = self.db.get_first_ref('mds')
1288 self.obd_uuid = self.db.get_first_ref('obd')
1289 obd = self.db.lookup(self.obd_uuid)
1290 self.vosc = VOSC(obd, self.name)
1291 if self.vosc.need_mdc():
1292 self.add_lustre_module('mdc', 'mdc')
1293 self.add_lustre_module('llite', 'llite')
1298 if self.vosc.need_mdc():
1299 mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
1301 mdc_uuid = self.vosc.get_mdc_uuid()
1302 self.info(self.path, self.mds_uuid, self.obd_uuid)
1303 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
1304 (self.vosc.get_uuid(), mdc_uuid, self.path)
1305 run("mkdir", self.path)
1308 panic("mount failed:", self.path)
1311 self.info(self.path, self.mds_uuid,self.obd_uuid)
1312 if fs_is_mounted(self.path):
1314 (rc, out) = run("umount", "-f", self.path)
1316 (rc, out) = run("umount", self.path)
1318 raise CommandError('umount', out, rc)
1320 if fs_is_mounted(self.path):
1321 panic("fs is still mounted:", self.path)
1324 if self.vosc.need_mdc():
1325 cleanup_mdc(self.db, self.name, self.mds_uuid)
1327 def load_module(self):
1328 self.vosc.load_module()
1329 Module.load_module(self)
1330 def cleanup_module(self):
1331 Module.cleanup_module(self)
1332 self.vosc.cleanup_module()
1335 # ============================================================
1336 # XML processing and query
1339 def lookup(self, uuid):
1340 """ lookup returns a new LustreDB instance"""
1341 return self._lookup_by_uuid(uuid)
1343 def lookup_name(self, name, class_name = ""):
1344 """ lookup returns a new LustreDB instance"""
1345 return self._lookup_by_name(name, class_name)
1347 def lookup_class(self, class_name):
1348 """ lookup returns a new LustreDB instance"""
1349 return self._lookup_by_class(class_name)
1351 def get_val(self, tag, default=None):
1352 v = self._get_val(tag)
1357 debug("LustreDB", self.getName(), " no value for:", tag)
1360 def get_class(self):
1361 return self._get_class()
1363 def get_val_int(self, tag, default=0):
1364 str = self._get_val(tag)
1370 panic("text value is not integer:", str)
1372 def get_first_ref(self, tag):
1373 """ Get the first uuidref of the type TAG. Only
1374 one is expected. Returns the uuid."""
1375 uuids = self._get_refs(tag)
1380 def get_refs(self, tag):
1381 """ Get all the refs of type TAG. Returns list of uuids. """
1382 uuids = self._get_refs(tag)
1385 def get_all_refs(self):
1386 """ Get all the refs. Returns list of uuids. """
1387 uuids = self._get_all_refs()
1390 def get_ost_net(self, uuid):
1391 ost = self.lookup(uuid)
1392 uuid = ost.get_first_ref('network')
1395 return ost.lookup(uuid)
1397 def nid2server(self, nid):
1398 netlist = self.lookup_class('network')
1399 for net_db in netlist:
1400 if net_db.get_val('nid') == nid:
1404 # the tag name is the service type
1405 # fixme: this should do some checks to make sure the dom_node is a service
1407 # determine what "level" a particular node is at.
1409 # the order of iniitailization is based on level.
1410 def getServiceLevel(self):
1411 type = self.get_class()
1413 if type in ('network',):
1415 elif type in ('device', 'ldlm'):
1417 elif type in ('osd', 'mdd', 'cobd'):
1419 elif type in ('mdsdev','ost'):
1421 elif type in ('mdc','osc'):
1423 elif type in ('lov',):
1425 elif type in ('mountpoint', 'echoclient'):
1428 if ret < config.minlevel() or ret > config.maxlevel():
1433 # return list of services in a profile. list is a list of tuples
1434 # [(level, db_object),]
1435 def getServices(self):
1437 for ref_class, ref_uuid in self.get_all_refs():
1438 servdb = self.lookup(ref_uuid)
1440 level = servdb.getServiceLevel()
1442 list.append((level, servdb))
1444 panic('service not found: ' + ref_uuid)
1449 # Find the target_device for target on a node
1450 # node->profiles->device_refs->target
1451 def get_target_device(self, node_name, target_uuid):
1452 node_db = self.lookup_name(node_name)
1455 prof_list = node_db.get_refs('profile')
1456 for prof_uuid in prof_list:
1457 prof_db = node_db.lookup(prof_uuid)
1458 ref_list = prof_db.get_all_refs()
1459 for ref in ref_list:
1460 dev = self.lookup(ref[1])
1461 if dev and dev.get_first_ref('target') == target_uuid:
1465 # get all network uuids for this node
1466 def get_networks(self):
1468 prof_list = self.get_refs('profile')
1469 for prof_uuid in prof_list:
1470 prof_db = self.lookup(prof_uuid)
1471 net_list = prof_db.get_refs('network')
1472 debug("get_networks():", prof_uuid, net_list)
1473 for net_uuid in net_list:
1474 ret.append(net_uuid)
1477 class LustreDB_XML(LustreDB):
1478 def __init__(self, dom, root_node):
1481 self.root_node = root_node
1483 def xmltext(self, dom_node, tag):
1484 list = dom_node.getElementsByTagName(tag)
1487 dom_node.normalize()
1488 if dom_node.firstChild:
1489 txt = string.strip(dom_node.firstChild.data)
1493 def xmlattr(self, dom_node, attr):
1494 return dom_node.getAttribute(attr)
1496 def _get_val(self, tag):
1497 """a value could be an attribute of the current node
1498 or the text value in a child node"""
1499 ret = self.xmlattr(self.dom_node, tag)
1501 ret = self.xmltext(self.dom_node, tag)
1504 def _get_class(self):
1505 return self.dom_node.nodeName
1508 # [(ref_class, ref_uuid),]
1509 def _get_all_refs(self):
1511 for n in self.dom_node.childNodes:
1512 if n.nodeType == n.ELEMENT_NODE:
1513 ref_uuid = self.xml_get_ref(n)
1514 ref_class = n.nodeName
1515 list.append((ref_class, ref_uuid))
1520 def _get_refs(self, tag):
1521 """ Get all the refs of type TAG. Returns list of uuids. """
1523 refname = '%s_ref' % tag
1524 reflist = self.dom_node.getElementsByTagName(refname)
1526 uuids.append(self.xml_get_ref(r))
1529 def xmllookup_by_uuid(self, dom_node, uuid):
1530 for n in dom_node.childNodes:
1531 if n.nodeType == n.ELEMENT_NODE:
1532 if self.xml_get_uuid(n) == uuid:
1535 n = self.xmllookup_by_uuid(n, uuid)
1539 def _lookup_by_uuid(self, uuid):
1540 dom = self. xmllookup_by_uuid(self.root_node, uuid)
1542 return LustreDB_XML(dom, self.root_node)
1544 def xmllookup_by_name(self, dom_node, name):
1545 for n in dom_node.childNodes:
1546 if n.nodeType == n.ELEMENT_NODE:
1547 if self.xml_get_name(n) == name:
1550 n = self.xmllookup_by_name(n, name)
1554 def _lookup_by_name(self, name, class_name):
1555 dom = self.xmllookup_by_name(self.root_node, name)
1557 return LustreDB_XML(dom, self.root_node)
1559 def xmllookup_by_class(self, dom_node, class_name):
1560 return dom_node.getElementsByTagName(class_name)
1562 def _lookup_by_class(self, class_name):
1564 domlist = self.xmllookup_by_class(self.root_node, class_name)
1565 for node in domlist:
1566 ret.append(LustreDB_XML(node, self.root_node))
1569 def xml_get_name(self, n):
1570 return n.getAttribute('name')
1573 return self.xml_get_name(self.dom_node)
1575 def xml_get_ref(self, n):
1576 return n.getAttribute('uuidref')
1578 def xml_get_uuid(self, dom_node):
1579 return dom_node.getAttribute('uuid')
1582 return self.xml_get_uuid(self.dom_node)
1584 def get_routes(self, type, gw):
1585 """ Return the routes as a list of tuples of the form:
1586 [(type, gw, lo, hi),]"""
1588 tbl = self.dom_node.getElementsByTagName('routetbl')
1590 routes = t.getElementsByTagName('route')
1592 lo = self.xmlattr(r, 'lo')
1593 hi = self.xmlattr(r, 'hi')
1594 res.append((type, gw, lo, hi))
1597 def get_route_tbl(self):
1599 tbls = self.dom_node.getElementsByTagName('routetbl')
1601 for r in tbl.getElementsByTagName('route'):
1602 net_type = self.xmlattr(r, 'type')
1603 gw = self.xmlattr(r, 'gw')
1604 lo = self.xmlattr(r, 'lo')
1605 hi = self.xmlattr(r, 'hi')
1606 ret.append((net_type, gw, lo, hi))
1610 # ================================================================
1612 class LustreDB_LDAP(LustreDB):
1613 def __init__(self, name, attrs,
1616 url = "ldap://localhost",
1617 user = "cn=Manager, fs=lustre",
1623 self._parent = parent
1629 self._base = parent._base
1636 self.l = ldap.initialize(self._url)
1637 # Set LDAP protocol version used
1638 self.l.protocol_version=ldap.VERSION3
1639 # user and pw only needed if modifying db
1640 self.l.bind_s("", "", ldap.AUTH_SIMPLE);
1641 except ldap.LDAPError, e:
1643 # FIXME, do something useful here
1648 def ldap_search(self, filter):
1649 """Return list of uuids matching the filter."""
1655 for name, attrs in self.l.search_s(dn, ldap.SCOPE_ONELEVEL,
1657 for v in attrs['uuid']:
1659 except ldap.NO_SUCH_OBJECT, e:
1661 except ldap.LDAPError, e:
1662 print e # FIXME: die here?
1665 ret.append(self._lookup_by_uuid(uuid))
1668 def _lookup_by_name(self, name, class_name):
1669 list = self.ldap_search("lustreName=%s" %(name))
1674 def _lookup_by_class(self, class_name):
1675 return self.ldap_search("objectclass=%s" %(string.upper(class_name)))
1677 def _lookup_by_uuid(self, uuid):
1679 dn = "uuid=%s,%s" % (uuid, self._base)
1682 for name, attrs in self.l.search_s(dn, ldap.SCOPE_BASE,
1684 ret = LustreDB_LDAP(name, attrs, parent = self)
1686 except ldap.NO_SUCH_OBJECT, e:
1687 debug("NO_SUCH_OBJECT:", uuid)
1688 pass # just return empty list
1689 except ldap.LDAPError, e:
1690 print e # FIXME: die here?
1694 def _get_val(self, k):
1696 if self._attrs.has_key(k):
1698 if type(v) == types.ListType:
1704 def _get_class(self):
1705 return string.lower(self._attrs['objectClass'][0])
1708 # [(ref_class, ref_uuid),]
1709 def _get_all_refs(self):
1711 for k in self._attrs.keys():
1712 if re.search('.*Ref', k):
1713 for uuid in self._attrs[k]:
1714 list.append((k, uuid))
1717 def _get_refs(self, tag):
1718 """ Get all the refs of type TAG. Returns list of uuids. """
1720 refname = '%sRef' % tag
1721 if self._attrs.has_key(refname):
1722 return self._attrs[refname]
1726 return self._get_val('lustreName')
1729 return self._get_val('uuid')
1731 def get_route_tbl(self):
1734 ############################################################
1736 # FIXME: clean this mess up!
1738 # OSC is no longer in the xml, so we have to fake it.
1739 # this is getting ugly and begging for another refactoring
1740 def get_osc(ost_db, owner):
1741 osc = OSC(ost_db, owner, ost_db.getName(), ost_db.getUUID())
1744 def get_mdc(db, owner, mds_uuid):
1745 mds_db = db.lookup(mds_uuid);
1747 panic("no mds:", mds_uuid)
1748 mdc = MDC(mds_db, owner, mds_db.getName(), mds_uuid)
1751 def prepare_mdc(db, owner, mds_uuid):
1752 mdc = get_mdc(db, owner, mds_uuid)
1756 def cleanup_mdc(db, owner, mds_uuid):
1757 mdc = get_mdc(db, owner, mds_uuid)
1761 ############################################################
1762 # routing ("rooting")
1768 def add_local_interfaces(node_db):
1771 for netuuid in node_db.get_networks():
1772 net = node_db.lookup(netuuid)
1773 debug("add_local", netuuid)
1774 local_node.append((net.get_val('nettype'), net.get_val('nid')))
1776 def node_needs_router():
1779 def init_route_config(lustre):
1780 """ Scan the lustre config looking for routers. Build list of
1782 global routes, router_flag
1784 list = lustre.lookup_class('node')
1785 for node_db in list:
1786 if node_db.get_val_int('router', 0):
1788 #debug("init_route_config: found router", node_db.getName())
1789 for (local_type, local_nid) in local_node:
1790 #debug("init_route_config:", local_type, local_nid)
1792 for netuuid in node_db.get_networks():
1793 db = node_db.lookup(netuuid)
1794 if local_type == db.get_val('nettype'):
1795 gw = db.get_val('nid')
1797 #debug("init_route_config: gw is", gw)
1800 for netuuid in node_db.get_networks():
1801 db = node_db.lookup(netuuid)
1802 #debug("init_route_config: tbl: ", db.get_route_tbl())
1803 if local_type != db.get_val('nettype'):
1804 for route in db.get_routes(local_type, gw):
1805 routes.append(route)
1806 #debug("init_route_config routes:", routes)
1811 for iface in local_node:
1812 #debug("local_net a:", net.net_type, "b:", iface[0])
1813 if net.net_type == iface[0]:
1817 def find_route(net):
1818 global local_node, routes
1819 frm_type = local_node[0][0]
1820 to_type = net.net_type
1822 debug ('looking for route to', to_type,to)
1824 #debug("find_route: ", r)
1830 ############################################################
1834 type = db.get_class()
1835 debug('Service:', type, db.getName(), db.getUUID())
1841 elif type == 'network':
1845 elif type == 'cobd':
1847 elif type == 'mdsdev':
1849 elif type == 'mountpoint':
1851 elif type == 'echoclient':
1854 panic ("unknown service type:", type)
1858 # Prepare the system to run lustre using a particular profile
1859 # in a the configuration.
1860 # * load & the modules
1861 # * setup networking for the current node
1862 # * make sure partitions are in place and prepared
1863 # * initialize devices with lctl
1864 # Levels is important, and needs to be enforced.
1865 def for_each_profile(db, prof_list, operation):
1866 for prof_uuid in prof_list:
1867 prof_db = db.lookup(prof_uuid)
1869 panic("profile:", profile, "not found.")
1870 services = prof_db.getServices()
1873 def doSetup(services):
1875 n = newService(s[1])
1878 def doModules(services):
1880 n = newService(s[1])
1883 def doCleanup(services):
1886 n = newService(s[1])
1889 def doUnloadModules(services):
1892 n = newService(s[1])
1897 def doHost(lustreDB, hosts):
1902 node_db = lustreDB.lookup_name(h, 'node')
1906 print 'No host entry found.'
1909 router_flag = node_db.get_val_int('router', 0)
1910 recovery_upcall = node_db.get_val('recovery_upcall', '')
1911 timeout = node_db.get_val_int('timeout', 0)
1914 add_local_interfaces(node_db)
1915 init_route_config(lustreDB)
1917 # Two step process: (1) load modules, (2) setup lustre
1918 # if not cleaning, load modules first.
1919 prof_list = node_db.get_refs('profile')
1921 if config.cleanup():
1923 # the command line can override this value
1925 sys_set_timeout(timeout)
1926 sys_set_recovery_upcall(recovery_upcall)
1928 for_each_profile(node_db, prof_list, doCleanup)
1929 for_each_profile(node_db, prof_list, doUnloadModules)
1932 for_each_profile(node_db, prof_list, doModules)
1934 sys_set_debug_path()
1935 script = config.gdb_script()
1936 run(lctl.lctl, ' modules >', script)
1938 log ("The GDB module script is in", script)
1939 # pause, so user has time to break and
1942 sys_set_timeout(timeout)
1943 sys_set_recovery_upcall(recovery_upcall)
1945 for_each_profile(node_db, prof_list, doSetup)
1947 ############################################################
1948 # Command line processing
1950 def parse_cmdline(argv):
1951 short_opts = "hdnvf"
1952 long_opts = ["ldap", "reformat", "lustre=", "verbose", "gdb",
1953 "portals=", "makeldiff", "cleanup", "noexec",
1954 "help", "node=", "nomod", "nosetup",
1955 "dump=", "force", "minlevel=", "maxlevel=",
1956 "timeout=", "recovery_upcall=",
1957 "ldapurl=", "config=", "select="]
1962 opts, args = getopt.getopt(argv, short_opts, long_opts)
1963 except getopt.error:
1968 if o in ("-h", "--help"):
1970 if o in ("-d","--cleanup"):
1972 if o in ("-v", "--verbose"):
1974 if o in ("-n", "--noexec"):
1977 if o == "--portals":
1978 config.portals_dir(a)
1980 config.lustre_dir(a)
1981 if o == "--reformat":
1989 if o == "--nosetup":
1993 if o in ("-f", "--force"):
1995 if o == "--minlevel":
1997 if o == "--maxlevel":
1999 if o == "--timeout":
2001 if o == "--recovery_upcall":
2002 config.recovery_upcall(a)
2003 if o == "--ldapurl":
2006 config.config_name(a)
2008 config.init_select(a)
2016 s = urllib.urlopen(url)
2022 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
2023 base = os.path.dirname(cmd)
2024 if os.access(base+"/Makefile", os.R_OK):
2025 if not config.lustre_dir():
2026 config.lustre_dir(os.path.join(base, ".."))
2027 # normalize the portals dir, using command line arg if set
2028 if config.portals_dir():
2029 portals_dir = config.portals_dir()
2030 dir = os.path.join(config.lustre_dir(), portals_dir)
2031 config.portals_dir(dir)
2032 elif config.lustre_dir() and config.portals_dir():
2034 # if --lustre and --portals, normalize portals
2035 # can ignore POTRALS_DIR here, since it is probly useless here
2036 dir = config.portals_dir()
2037 dir = os.path.join(config.lustre_dir(), dir)
2038 config.portals_dir(dir)
2040 def sysctl(path, val):
2044 fp = open(os.path.join('/proc/sys', path), 'w')
2051 def sys_set_debug_path():
2052 debug("debug path: ", config.debug_path())
2053 sysctl('portals/debug_path', config.debug_path())
2055 def sys_set_recovery_upcall(upcall):
2056 # the command overrides the value in the node config
2057 if config.recovery_upcall():
2058 upcall = config.recovery_upcall()
2060 debug("setting recovery_upcall:", upcall)
2061 sysctl('lustre/recovery_upcall', upcall)
2063 def sys_set_timeout(timeout):
2064 # the command overrides the value in the node config
2065 if config.timeout() > 0:
2066 timeout = config.timeout()
2068 debug("setting timeout:", timeout)
2069 sysctl('lustre/timeout', timeout)
2071 def sys_set_ptldebug(ptldebug):
2072 # the command overrides the value in the node config
2073 if config.ptldebug():
2074 ptldebug = config.ptldebug()
2075 sysctl('portals/debug', ptldebug)
2077 def sys_set_netmem_max(path, max):
2078 debug("setting", path, "to at least", max)
2086 fp = open(path, 'w')
2087 fp.write('%d\n' %(max))
2091 def sys_make_devices():
2092 if not os.access('/dev/portals', os.R_OK):
2093 run('mknod /dev/portals c 10 240')
2094 if not os.access('/dev/obd', os.R_OK):
2095 run('mknod /dev/obd c 10 241')
2098 # Add dir to the global PATH, if not already there.
2099 def add_to_path(new_dir):
2100 syspath = string.split(os.environ['PATH'], ':')
2101 if new_dir in syspath:
2103 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
2106 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
2107 # ensure basic elements are in the system path
2108 def sanitise_path():
2109 for dir in DEFAULT_PATH:
2112 # Initialize or shutdown lustre according to a configuration file
2113 # * prepare the system for lustre
2114 # * configure devices with lctl
2115 # Shutdown does steps in reverse
2118 global TCP_ACCEPTOR, lctl, MAXTCPBUF
2120 host = socket.gethostname()
2122 # the PRNG is normally seeded with time(), which is not so good for starting
2123 # time-synchronized clusters
2124 input = open('/dev/urandom', 'r')
2126 print 'Unable to open /dev/urandom!'
2128 seed = input.read(32)
2134 args = parse_cmdline(sys.argv[1:])
2136 if not os.access(args[0], os.R_OK):
2137 print 'File not found or readable:', args[0]
2140 dom = xml.dom.minidom.parse(args[0])
2142 panic("%s does not appear to be a config file." % (args[0]))
2143 sys.exit(1) # make sure to die here, even in debug mode.
2144 db = LustreDB_XML(dom.documentElement, dom.documentElement)
2145 elif config.ldapurl():
2146 if not config.config_name():
2147 panic("--ldapurl requires --config name")
2148 dn = "config=%s,fs=lustre" % (config.config_name())
2149 db = LustreDB_LDAP('', {}, base=dn, url = config.ldapurl())
2155 node_list.append(config.node())
2158 node_list.append(host)
2159 node_list.append('localhost')
2160 debug("configuring for host: ", node_list)
2163 config._debug_path = config._debug_path + '-' + host
2164 config._gdb_script = config._gdb_script + '-' + host
2166 setupModulePath(sys.argv[0])
2168 TCP_ACCEPTOR = find_prog('acceptor')
2169 if not TCP_ACCEPTOR:
2171 TCP_ACCEPTOR = 'acceptor'
2172 debug('! acceptor not found')
2174 panic('acceptor not found')
2176 lctl = LCTLInterface('lctl')
2179 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
2180 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
2182 doHost(db, node_list)
2184 if __name__ == "__main__":
2187 except LconfError, e:
2189 except CommandError, e:
2193 if first_cleanup_error:
2194 sys.exit(first_cleanup_error)