3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = 'portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
94 "undefined" : (1 << 0),
104 "portals" : (1 << 10),
105 "socknal" : (1 << 11),
106 "qswnal" : (1 << 12),
107 "pinger" : (1 << 13),
108 "filter" : (1 << 14),
114 "ptlrouter" : (1 << 20),
120 first_cleanup_error = 0
121 def cleanup_error(rc):
122 global first_cleanup_error
123 if not first_cleanup_error:
124 first_cleanup_error = rc
126 # ============================================================
127 # debugging and error funcs
129 def fixme(msg = "this feature"):
130 raise Lustre.LconfError, msg + ' not implmemented yet.'
133 msg = string.join(map(str,args))
134 if not config.noexec:
135 raise Lustre.LconfError(msg)
140 msg = string.join(map(str,args))
145 print string.strip(s)
149 msg = string.join(map(str,args))
152 # ack, python's builtin int() does not support '0x123' syntax.
153 # eval can do it, although what a hack!
157 return eval(s, {}, {})
160 except SyntaxError, e:
161 raise ValueError("not a number")
163 raise ValueError("not a number")
165 # ============================================================
166 # locally defined exceptions
167 class CommandError (exceptions.Exception):
168 def __init__(self, cmd_name, cmd_err, rc=None):
169 self.cmd_name = cmd_name
170 self.cmd_err = cmd_err
175 if type(self.cmd_err) == types.StringType:
177 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
179 print "! %s: %s" % (self.cmd_name, self.cmd_err)
180 elif type(self.cmd_err) == types.ListType:
182 print "! %s (error %d):" % (self.cmd_name, self.rc)
184 print "! %s:" % (self.cmd_name)
185 for s in self.cmd_err:
186 print "> %s" %(string.strip(s))
191 # ============================================================
192 # handle daemons, like the acceptor
194 """ Manage starting and stopping a daemon. Assumes daemon manages
195 it's own pid file. """
197 def __init__(self, cmd):
203 log(self.command, "already running.")
205 self.path = find_prog(self.command)
207 panic(self.command, "not found.")
208 ret, out = runcmd(self.path +' '+ self.command_line())
210 raise CommandError(self.path, out, ret)
214 pid = self.read_pidfile()
216 log ("killing process", pid)
218 #time.sleep(1) # let daemon die
220 log("unable to kill", self.command, e)
222 log("unable to kill", self.command)
225 pid = self.read_pidfile()
235 def read_pidfile(self):
237 fp = open(self.pidfile(), 'r')
244 def clean_pidfile(self):
245 """ Remove a stale pidfile """
246 log("removing stale pidfile:", self.pidfile())
248 os.unlink(self.pidfile())
250 log(self.pidfile(), e)
252 class AcceptorHandler(DaemonHandler):
253 def __init__(self, port, net_type, send_mem, recv_mem, irq_aff):
254 DaemonHandler.__init__(self, "acceptor")
257 self.send_mem = send_mem
258 self.recv_mem = recv_mem
261 self.flags = self.flags + ' -i'
264 return "/var/run/%s-%d.pid" % (self.command, self.port)
266 def command_line(self):
267 return string.join(map(str,('-s', self.send_mem, '-r', self.recv_mem, self.flags, self.port)))
271 # start the acceptors
273 if config.lctl_dump or config.record:
275 for port in acceptors.keys():
276 daemon = acceptors[port]
277 if not daemon.running():
280 def run_one_acceptor(port):
281 if config.lctl_dump or config.record:
283 if acceptors.has_key(port):
284 daemon = acceptors[port]
285 if not daemon.running():
288 panic("run_one_acceptor: No acceptor defined for port:", port)
290 def stop_acceptor(port):
291 if acceptors.has_key(port):
292 daemon = acceptors[port]
297 # ============================================================
298 # handle lctl interface
301 Manage communication with lctl
304 def __init__(self, cmd):
306 Initialize close by finding the lctl binary.
308 self.lctl = find_prog(cmd)
310 self.record_device = ''
313 debug('! lctl not found')
316 raise CommandError('lctl', "unable to find lctl binary.")
318 def use_save_file(self, file):
319 self.save_file = file
321 def record(self, dev_name, logname):
322 log("Recording log", logname, "on", dev_name)
323 self.record_device = dev_name
324 self.record_log = logname
326 def end_record(self):
327 log("End recording log", self.record_log, "on", self.record_device)
328 self.record_device = None
329 self.record_log = None
331 def set_nonblock(self, fd):
332 fl = fcntl.fcntl(fd, F_GETFL)
333 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
338 the cmds are written to stdin of lctl
339 lctl doesn't return errors when run in script mode, so
341 should modify command line to accept multiple commands, or
342 create complex command line options
346 cmds = '\n dump ' + self.save_file + '\n' + cmds
347 elif self.record_device:
351 %s""" % (self.record_device, self.record_log, cmds)
353 debug("+", cmd_line, cmds)
354 if config.noexec: return (0, [])
356 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
357 child.tochild.write(cmds + "\n")
358 child.tochild.close()
360 # From "Python Cookbook" from O'Reilly
361 outfile = child.fromchild
362 outfd = outfile.fileno()
363 self.set_nonblock(outfd)
364 errfile = child.childerr
365 errfd = errfile.fileno()
366 self.set_nonblock(errfd)
368 outdata = errdata = ''
371 ready = select.select([outfd,errfd],[],[]) # Wait for input
372 if outfd in ready[0]:
373 outchunk = outfile.read()
374 if outchunk == '': outeof = 1
375 outdata = outdata + outchunk
376 if errfd in ready[0]:
377 errchunk = errfile.read()
378 if errchunk == '': erreof = 1
379 errdata = errdata + errchunk
380 if outeof and erreof: break
381 # end of "borrowed" code
384 if os.WIFEXITED(ret):
385 rc = os.WEXITSTATUS(ret)
388 if rc or len(errdata):
389 raise CommandError(self.lctl, errdata, rc)
392 def runcmd(self, *args):
394 run lctl using the command line
396 cmd = string.join(map(str,args))
397 debug("+", self.lctl, cmd)
398 rc, out = run(self.lctl, cmd)
400 raise CommandError(self.lctl, out, rc)
404 def clear_log(self, dev, log):
405 """ clear an existing log """
410 quit """ % (dev, log)
413 def network(self, net, nid):
418 quit """ % (net, nid)
421 # create a new connection
422 def add_uuid(self, net_type, uuid, nid):
423 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
426 def add_autoconn(self, net_type, send_mem, recv_mem, nid, hostaddr,
428 if net_type in ('tcp',) and not config.lctl_dump:
433 add_autoconn %s %s %d %s
437 nid, hostaddr, port, flags )
440 def connect(self, srv):
441 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
442 if srv.net_type in ('tcp',) and not config.lctl_dump:
446 self.add_autoconn(srv.net_type, srv.send_mem, srv.recv_mem,
447 srv.nid, srv.hostaddr, srv.port, flags)
450 def recover(self, dev_name, new_conn):
453 recover %s""" %(dev_name, new_conn)
456 # add a route to a range
457 def add_route(self, net, gw, lo, hi):
465 except CommandError, e:
469 def del_route(self, net, gw, lo, hi):
474 quit """ % (net, gw, lo, hi)
477 # add a route to a host
478 def add_route_host(self, net, uuid, gw, tgt):
479 self.add_uuid(net, uuid, tgt)
487 except CommandError, e:
491 # add a route to a range
492 def del_route_host(self, net, uuid, gw, tgt):
498 quit """ % (net, gw, tgt)
502 def del_autoconn(self, net_type, nid, hostaddr):
503 if net_type in ('tcp',) and not config.lctl_dump:
512 # disconnect one connection
513 def disconnect(self, srv):
514 self.del_uuid(srv.nid_uuid)
515 if srv.net_type in ('tcp',) and not config.lctl_dump:
516 self.del_autoconn(srv.net_type, srv.nid, srv.hostaddr)
518 def del_uuid(self, uuid):
526 def disconnectAll(self, net):
534 def attach(self, type, name, uuid):
537 quit""" % (type, name, uuid)
540 def setup(self, name, setup = ""):
544 quit""" % (name, setup)
548 # create a new device with lctl
549 def newdev(self, type, name, uuid, setup = ""):
550 self.attach(type, name, uuid);
552 self.setup(name, setup)
553 except CommandError, e:
554 self.cleanup(name, uuid, 0)
559 def cleanup(self, name, uuid, force, failover = 0):
560 if failover: force = 1
566 quit""" % (name, ('', 'force')[force],
567 ('', 'failover')[failover])
571 def lov_setup(self, name, uuid, desc_uuid, mdsuuid, stripe_cnt,
572 stripe_sz, stripe_off,
576 lov_setup %s %d %d %d %s %s
577 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
582 def lov_setconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off,
586 lov_setconfig %s %d %d %d %s %s
587 quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist)
591 def dump(self, dump_file):
594 quit""" % (dump_file)
597 # get list of devices
598 def device_list(self):
599 devices = '/proc/fs/lustre/devices'
601 if os.access(devices, os.R_OK):
603 fp = open(devices, 'r')
611 def lustre_version(self):
612 rc, out = self.runcmd('version')
616 def mount_option(self, profile, osc, mdc):
618 mount_option %s %s %s
619 quit""" % (profile, osc, mdc)
622 # delete mount options
623 def del_mount_option(self, profile):
629 def set_timeout(self, timeout):
635 # delete mount options
636 def set_lustre_upcall(self, upcall):
641 # ============================================================
642 # Various system-level functions
643 # (ideally moved to their own module)
645 # Run a command and return the output and status.
646 # stderr is sent to /dev/null, could use popen3 to
647 # save it if necessary
650 if config.noexec: return (0, [])
651 f = os.popen(cmd + ' 2>&1')
661 cmd = string.join(map(str,args))
664 # Run a command in the background.
665 def run_daemon(*args):
666 cmd = string.join(map(str,args))
668 if config.noexec: return 0
669 f = os.popen(cmd + ' 2>&1')
677 # Determine full path to use for an external command
678 # searches dirname(argv[0]) first, then PATH
680 syspath = string.split(os.environ['PATH'], ':')
681 cmdpath = os.path.dirname(sys.argv[0])
682 syspath.insert(0, cmdpath);
684 syspath.insert(0, os.path.join(config.portals, 'utils/'))
686 prog = os.path.join(d,cmd)
687 if os.access(prog, os.X_OK):
691 # Recursively look for file starting at base dir
692 def do_find_file(base, mod):
693 fullname = os.path.join(base, mod)
694 if os.access(fullname, os.R_OK):
696 for d in os.listdir(base):
697 dir = os.path.join(base,d)
698 if os.path.isdir(dir):
699 module = do_find_file(dir, mod)
703 def find_module(src_dir, dev_dir, modname):
704 modbase = src_dir +'/'+ dev_dir +'/'+ modname
705 for modext in '.ko', '.o':
706 module = modbase + modext
708 if os.access(module, os.R_OK):
714 # is the path a block device?
721 return stat.S_ISBLK(s[stat.ST_MODE])
723 # build fs according to type
725 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
731 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
733 # devsize is in 1k, and fs block count is in 4k
734 block_cnt = devsize/4
736 if fstype in ('ext3', 'extN', 'ldiskfs'):
737 # ext3 journal size is in megabytes
740 if not is_block(dev):
741 ret, out = runcmd("ls -l %s" %dev)
742 devsize = int(string.split(out[0])[4]) / 1024
744 ret, out = runcmd("sfdisk -s %s" %dev)
745 devsize = int(out[0])
746 if devsize > 1024 * 1024:
747 jsize = ((devsize / 102400) * 4)
750 if jsize: jopt = "-J size=%d" %(jsize,)
751 if isize: iopt = "-I %d" %(isize,)
752 mkfs = 'mkfs.ext2 -j -b 4096 '
753 if not isblock or config.force:
755 elif fstype == 'reiserfs':
756 # reiserfs journal size is in blocks
757 if jsize: jopt = "--journal_size %d" %(jsize,)
758 mkfs = 'mkreiserfs -ff'
760 panic('unsupported fs type: ', fstype)
762 if config.mkfsoptions != None:
763 mkfs = mkfs + ' ' + config.mkfsoptions
764 if mkfsoptions != None:
765 mkfs = mkfs + ' ' + mkfsoptions
766 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
768 panic("Unable to build fs:", dev, string.join(out))
769 # enable hash tree indexing on fsswe
770 if fstype in ('ext3', 'extN', 'ldiskfs'):
771 htree = 'echo "feature FEATURE_C5" | debugfs -w'
772 (ret, out) = run (htree, dev)
774 panic("Unable to enable htree:", dev)
776 # some systems use /dev/loopN, some /dev/loop/N
780 if not os.access(loop + str(0), os.R_OK):
782 if not os.access(loop + str(0), os.R_OK):
783 panic ("can't access loop devices")
786 # find loop device assigned to the file
787 def find_assigned_loop(file):
789 for n in xrange(0, MAX_LOOP_DEVICES):
791 if os.access(dev, os.R_OK):
792 (stat, out) = run('losetup', dev)
793 if out and stat == 0:
794 m = re.search(r'\((.*)\)', out[0])
795 if m and file == m.group(1):
801 # create file if necessary and assign the first free loop device
802 def init_loop(file, size, fstype, journal_size, inode_size,
803 mkfsoptions, reformat, backfstype, backfile):
806 realfstype = backfstype
811 dev = find_assigned_loop(realfile)
813 print 'WARNING file:', realfile, 'already mapped to', dev
816 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
818 panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (realfile, size))
819 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
821 panic("Unable to create backing store:", realfile)
823 mkfs(realfile, size, realfstype, journal_size, inode_size,
824 mkfsoptions, isblock=0)
827 # find next free loop
828 for n in xrange(0, MAX_LOOP_DEVICES):
830 if os.access(dev, os.R_OK):
831 (stat, out) = run('losetup', dev)
833 run('losetup', dev, realfile)
836 print "out of loop devices"
838 print "out of loop devices"
841 # undo loop assignment
842 def clean_loop(file):
843 dev = find_assigned_loop(file)
845 ret, out = run('losetup -d', dev)
847 log('unable to clean loop device:', dev, 'for file:', file)
850 # determine if dev is formatted as a <fstype> filesystem
851 def need_format(fstype, dev):
852 # FIXME don't know how to implement this
855 # initialize a block device if needed
856 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
857 inode_size, mkfsoptions, backfstype, backdev):
861 if fstype == 'smfs' or not is_block(dev):
862 dev = init_loop(dev, size, fstype, journal_size, inode_size,
863 mkfsoptions, reformat, backfstype, backdev)
864 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
865 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
868 # panic("device:", dev,
869 # "not prepared, and autoformat is not set.\n",
870 # "Rerun with --reformat option to format ALL filesystems")
875 """lookup IP address for an interface"""
876 rc, out = run("/sbin/ifconfig", iface)
879 addr = string.split(out[1])[1]
880 ip = string.split(addr, ':')[1]
883 def sys_get_elan_position_file():
884 procfiles = ["/proc/elan/device0/position",
885 "/proc/qsnet/elan4/device0/position",
886 "/proc/qsnet/elan3/device0/position"]
888 if os.access(p, os.R_OK):
892 def sys_get_local_nid(net_type, wildcard, cluster_id):
893 """Return the local nid."""
895 if sys_get_elan_position_file():
896 local = sys_get_local_address('elan', '*', cluster_id)
898 local = sys_get_local_address(net_type, wildcard, cluster_id)
901 def sys_get_local_address(net_type, wildcard, cluster_id):
902 """Return the local address for the network type."""
904 if net_type in ('tcp',):
906 iface, star = string.split(wildcard, ':')
907 local = if2addr(iface)
909 panic ("unable to determine ip for:", wildcard)
911 host = socket.gethostname()
912 local = socket.gethostbyname(host)
913 elif net_type == 'elan':
914 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
915 f = sys_get_elan_position_file()
917 panic ("unable to determine local Elan ID")
920 lines = fp.readlines()
928 nid = my_int(cluster_id) + my_int(elan_id)
930 except ValueError, e:
934 elif net_type == 'gm':
935 fixme("automatic local address for GM")
939 def mod_loaded(modname):
940 """Check if a module is already loaded. Look in /proc/modules for it."""
942 fp = open('/proc/modules')
943 lines = fp.readlines()
945 # please forgive my tired fingers for this one
946 ret = filter(lambda word, mod=modname: word == mod,
947 map(lambda line: string.split(line)[0], lines))
952 # XXX: instead of device_list, ask for $name and see what we get
953 def is_prepared(name):
954 """Return true if a device exists for the name"""
957 if (config.noexec or config.record) and config.cleanup:
960 # expect this format:
961 # 1 UP ldlm ldlm ldlm_UUID 2
962 out = lctl.device_list()
964 if name == string.split(s)[3]:
966 except CommandError, e:
970 def is_network_prepared():
971 """If the any device exists, then assume that all networking
972 has been configured"""
973 out = lctl.device_list()
976 def fs_is_mounted(path):
977 """Return true if path is a mounted lustre filesystem"""
979 fp = open('/proc/mounts')
980 lines = fp.readlines()
984 if a[1] == path and a[2] == 'lustre_lite':
992 """Manage kernel modules"""
993 def __init__(self, lustre_dir, portals_dir):
994 self.lustre_dir = lustre_dir
995 self.portals_dir = portals_dir
996 self.kmodule_list = []
998 def add_portals_module(self, dev_dir, modname):
999 """Append a module to list of modules to load."""
1000 self.kmodule_list.append((self.portals_dir, dev_dir, modname))
1002 def add_lustre_module(self, dev_dir, modname):
1003 """Append a module to list of modules to load."""
1004 self.kmodule_list.append((self.lustre_dir, dev_dir, modname))
1006 def load_module(self):
1007 """Load all the modules in the list in the order they appear."""
1008 for src_dir, dev_dir, mod in self.kmodule_list:
1009 if mod_loaded(mod) and not config.noexec:
1011 log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir)
1013 module = find_module(src_dir, dev_dir, mod)
1015 panic('module not found:', mod)
1016 (rc, out) = run('/sbin/insmod', module)
1018 raise CommandError('insmod', out, rc)
1020 (rc, out) = run('/sbin/modprobe', mod)
1022 raise CommandError('modprobe', out, rc)
1024 def cleanup_module(self):
1025 """Unload the modules in the list in reverse order."""
1026 rev = self.kmodule_list
1028 for src_dir, dev_dir, mod in rev:
1029 if not mod_loaded(mod) and not config.noexec:
1032 if mod == 'portals' and config.dump:
1033 lctl.dump(config.dump)
1034 log('unloading module:', mod)
1035 (rc, out) = run('/sbin/rmmod', mod)
1037 log('! unable to unload module:', mod)
1040 # ============================================================
1041 # Classes to prepare and cleanup the various objects
1044 """ Base class for the rest of the modules. The default cleanup method is
1045 defined here, as well as some utilitiy funcs.
1047 def __init__(self, module_name, db):
1049 self.module_name = module_name
1050 self.name = self.db.getName()
1051 self.uuid = self.db.getUUID()
1054 self.kmod = kmod(config.lustre, config.portals)
1056 def info(self, *args):
1057 msg = string.join(map(str,args))
1058 print self.module_name + ":", self.name, self.uuid, msg
1061 """ default cleanup, used for most modules """
1064 lctl.cleanup(self.name, self.uuid, config.force)
1065 except CommandError, e:
1066 log(self.module_name, "cleanup failed: ", self.name)
1070 def add_portals_module(self, dev_dir, modname):
1071 """Append a module to list of modules to load."""
1072 self.kmod.add_portals_module(dev_dir, modname)
1074 def add_lustre_module(self, dev_dir, modname):
1075 """Append a module to list of modules to load."""
1076 self.kmod.add_lustre_module(dev_dir, modname)
1078 def load_module(self):
1079 """Load all the modules in the list in the order they appear."""
1080 self.kmod.load_module()
1082 def cleanup_module(self):
1083 """Unload the modules in the list in reverse order."""
1084 if self.safe_to_clean():
1085 self.kmod.cleanup_module()
1087 def safe_to_clean(self):
1090 def safe_to_clean_modules(self):
1091 return self.safe_to_clean()
1093 class Network(Module):
1094 def __init__(self,db):
1095 Module.__init__(self, 'NETWORK', db)
1096 self.net_type = self.db.get_val('nettype')
1097 self.nid = self.db.get_val('nid', '*')
1098 self.cluster_id = self.db.get_val('clusterid', "0")
1099 self.port = self.db.get_val_int('port', 0)
1100 self.send_mem = self.db.get_val_int('sendmem', DEFAULT_TCPBUF)
1101 self.recv_mem = self.db.get_val_int('recvmem', DEFAULT_TCPBUF)
1102 self.irq_affinity = self.db.get_val_int('irqaffinity', 0)
1105 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1107 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1108 self.generic_nid = 1
1109 debug("nid:", self.nid)
1111 self.generic_nid = 0
1113 self.nid_uuid = self.nid_to_uuid(self.nid)
1115 self.hostaddr = self.db.get_val('hostaddr', self.nid)
1116 if '*' in self.hostaddr:
1117 self.hostaddr = sys_get_local_address(self.net_type, self.hostaddr, self.cluster_id)
1118 if not self.hostaddr:
1119 panic("unable to set hostaddr for", self.net_type, self.hostaddr, self.cluster_id)
1120 debug("hostaddr:", self.hostaddr)
1122 self.add_portals_module("libcfs", 'libcfs')
1123 self.add_portals_module("portals", 'portals')
1124 if node_needs_router():
1125 self.add_portals_module("router", 'kptlrouter')
1126 if self.net_type == 'tcp':
1127 self.add_portals_module("knals/socknal", 'ksocknal')
1128 if self.net_type == 'elan':
1129 self.add_portals_module("knals/qswnal", 'kqswnal')
1130 if self.net_type == 'gm':
1131 self.add_portals_module("knals/gmnal", 'kgmnal')
1133 def nid_to_uuid(self, nid):
1134 return "NID_%s_UUID" %(nid,)
1137 if is_network_prepared():
1139 self.info(self.net_type, self.nid, self.port)
1140 if not (config.record and self.generic_nid):
1141 lctl.network(self.net_type, self.nid)
1142 if self.net_type == 'tcp':
1144 if self.net_type == 'elan':
1146 if self.port and node_is_router():
1147 run_one_acceptor(self.port)
1148 self.connect_peer_gateways()
1150 def connect_peer_gateways(self):
1151 for router in self.db.lookup_class('node'):
1152 if router.get_val_int('router', 0):
1153 for netuuid in router.get_networks():
1154 net = self.db.lookup(netuuid)
1156 if (gw.cluster_id == self.cluster_id and
1157 gw.net_type == self.net_type):
1158 if gw.nid != self.nid:
1161 def disconnect_peer_gateways(self):
1162 for router in self.db.lookup_class('node'):
1163 if router.get_val_int('router', 0):
1164 for netuuid in router.get_networks():
1165 net = self.db.lookup(netuuid)
1167 if (gw.cluster_id == self.cluster_id and
1168 gw.net_type == self.net_type):
1169 if gw.nid != self.nid:
1172 except CommandError, e:
1173 print "disconnect failed: ", self.name
1177 def safe_to_clean(self):
1178 return not is_network_prepared()
1181 self.info(self.net_type, self.nid, self.port)
1183 stop_acceptor(self.port)
1184 if node_is_router():
1185 self.disconnect_peer_gateways()
1187 class RouteTable(Module):
1188 def __init__(self,db):
1189 Module.__init__(self, 'ROUTES', db)
1191 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1193 # only setup connections for tcp NALs
1195 if not net_type in ('tcp',):
1198 # connect to target if route is to single node and this node is the gw
1199 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1200 if not local_cluster(net_type, tgt_cluster_id):
1201 panic("target", lo, " not on the local cluster")
1202 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1203 # connect to gateway if this node is not the gw
1204 elif (local_cluster(net_type, gw_cluster_id)
1205 and not local_interface(net_type, gw_cluster_id, gw)):
1206 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1211 panic("no server for nid", lo)
1214 return Network(srvdb)
1217 if is_network_prepared():
1220 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1221 lctl.add_route(net_type, gw, lo, hi)
1222 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1226 def safe_to_clean(self):
1227 return not is_network_prepared()
1230 if is_network_prepared():
1231 # the network is still being used, don't clean it up
1233 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1234 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1237 lctl.disconnect(srv)
1238 except CommandError, e:
1239 print "disconnect failed: ", self.name
1244 lctl.del_route(net_type, gw, lo, hi)
1245 except CommandError, e:
1246 print "del_route failed: ", self.name
1250 class Management(Module):
1251 def __init__(self, db):
1252 Module.__init__(self, 'MGMT', db)
1253 self.add_lustre_module('lvfs', 'lvfs')
1254 self.add_lustre_module('obdclass', 'obdclass')
1255 self.add_lustre_module('ptlrpc', 'ptlrpc')
1256 self.add_lustre_module('mgmt', 'mgmt_svc')
1259 if is_prepared(self.name):
1262 lctl.newdev("mgmt", self.name, self.uuid)
1264 def safe_to_clean(self):
1268 if is_prepared(self.name):
1269 Module.cleanup(self)
1271 # This is only needed to load the modules; the LDLM device
1272 # is now created automatically.
1274 def __init__(self,db):
1275 Module.__init__(self, 'LDLM', db)
1276 self.add_lustre_module('lvfs', 'lvfs')
1277 self.add_lustre_module('obdclass', 'obdclass')
1278 self.add_lustre_module('ptlrpc', 'ptlrpc')
1287 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1288 Module.__init__(self, 'LOV', db)
1289 if name_override != None:
1290 self.name = "lov_%s" % name_override
1291 self.add_lustre_module('lov', 'lov')
1292 self.mds_uuid = self.db.get_first_ref('mds')
1293 self.stripe_sz = self.db.get_val_int('stripesize', 65536)
1294 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1295 self.pattern = self.db.get_val_int('stripepattern', 0)
1296 self.devlist = self.db.get_refs('obd')
1297 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1299 self.desc_uuid = self.uuid
1300 self.uuid = generate_client_uuid(self.name)
1301 self.fs_name = fs_name
1303 self.config_only = 1
1305 self.config_only = None
1306 mds= self.db.lookup(self.mds_uuid)
1307 self.mds_name = mds.getName()
1308 for obd_uuid in self.devlist:
1309 obd = self.db.lookup(obd_uuid)
1310 osc = get_osc(obd, self.uuid, fs_name)
1312 self.osclist.append(osc)
1314 panic('osc not found:', obd_uuid)
1317 if is_prepared(self.name):
1319 if self.config_only:
1320 panic("Can't prepare config_only LOV ", self.name)
1322 for osc in self.osclist:
1324 # Only ignore connect failures with --force, which
1325 # isn't implemented here yet.
1326 osc.prepare(ignore_connect_failure=0)
1327 except CommandError, e:
1328 print "Error preparing OSC %s\n" % osc.uuid
1330 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1331 self.stripe_off, self.pattern, self.devlist, self.mds_name)
1332 lctl.lov_setup(self.name, self.uuid,
1333 self.desc_uuid, self.mds_name, self.stripe_cnt,
1334 self.stripe_sz, self.stripe_off, self.pattern,
1335 string.join(self.devlist))
1338 if is_prepared(self.name):
1339 Module.cleanup(self)
1340 if self.config_only:
1341 panic("Can't clean up config_only LOV ", self.name)
1342 for osc in self.osclist:
1345 def load_module(self):
1346 if self.config_only:
1347 panic("Can't load modules for config_only LOV ", self.name)
1348 for osc in self.osclist:
1351 Module.load_module(self)
1353 def cleanup_module(self):
1354 if self.config_only:
1355 panic("Can't cleanup modules for config_only LOV ", self.name)
1356 Module.cleanup_module(self)
1357 for osc in self.osclist:
1358 osc.cleanup_module()
1361 class MDSDEV(Module):
1362 def __init__(self,db):
1363 Module.__init__(self, 'MDSDEV', db)
1364 self.devpath = self.db.get_val('devpath','')
1365 self.backdevpath = self.db.get_val('backdevpath','')
1366 self.size = self.db.get_val_int('devsize', 0)
1367 self.journal_size = self.db.get_val_int('journalsize', 0)
1368 self.fstype = self.db.get_val('fstype', '')
1369 self.backfstype = self.db.get_val('backfstype', '')
1370 self.nspath = self.db.get_val('nspath', '')
1371 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1372 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1373 # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
1374 target_uuid = self.db.get_first_ref('target')
1375 mds = self.db.lookup(target_uuid)
1376 self.name = mds.getName()
1377 self.filesystem_uuids = mds.get_refs('filesystem')
1378 # FIXME: if fstype not set, then determine based on kernel version
1379 self.format = self.db.get_val('autoformat', "no")
1380 if mds.get_val('failover', 0):
1381 self.failover_mds = 'f'
1383 self.failover_mds = 'n'
1384 active_uuid = get_active_target(mds)
1386 panic("No target device found:", target_uuid)
1387 if active_uuid == self.uuid:
1391 if self.active and config.group and config.group != mds.get_val('group'):
1394 self.inode_size = self.db.get_val_int('inodesize', 0)
1395 if self.inode_size == 0:
1396 # find the LOV for this MDS
1397 lovconfig_uuid = mds.get_first_ref('lovconfig')
1398 if not lovconfig_uuid:
1399 panic("No LOV config found for MDS ", mds.name)
1400 lovconfig = mds.lookup(lovconfig_uuid)
1401 lov_uuid = lovconfig.get_first_ref('lov')
1403 panic("No LOV found for lovconfig ", lovconfig.name)
1404 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, 'FS_name', config_only = 1)
1406 # default stripe count controls default inode_size
1407 stripe_count = lov.stripe_cnt
1408 if stripe_count > 77:
1409 self.inode_size = 4096
1410 elif stripe_count > 35:
1411 self.inode_size = 2048
1412 elif stripe_count > 13:
1413 self.inode_size = 1024
1414 elif stripe_count > 3:
1415 self.inode_size = 512
1417 self.inode_size = 256
1419 self.target_dev_uuid = self.uuid
1420 self.uuid = target_uuid
1423 self.add_lustre_module('mdc', 'mdc')
1424 self.add_lustre_module('osc', 'osc')
1425 self.add_lustre_module('lov', 'lov')
1426 self.add_lustre_module('mds', 'mds')
1428 if self.fstype == 'smfs':
1429 self.add_lustre_module('smfs', 'smfs')
1431 if self.fstype == 'ldiskfs':
1432 self.add_lustre_module('ldiskfs', 'ldiskfs')
1435 self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
1437 # if fstype is smfs, then we should also take care about backing
1439 if self.fstype == 'smfs':
1440 self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
1442 def load_module(self):
1444 Module.load_module(self)
1447 if is_prepared(self.name):
1450 debug(self.uuid, "not active")
1453 # run write_conf automatically, if --reformat used
1455 self.info(self.devpath, self.fstype, self.size, self.format)
1457 # never reformat here
1458 blkdev = block_dev(self.devpath, self.size, self.fstype, 0,
1459 self.format, self.journal_size, self.inode_size,
1460 self.mkfsoptions, self.backfstype, self.backdevpath)
1461 if not is_prepared('MDT'):
1462 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
1464 if config.mountfsoptions != None:
1465 mountfsoptions = config.mountfsoptions
1466 if self.mountfsoptions != None:
1467 mountfsoptions = mountfsoptions + ' ' + self.mountfsoptions
1469 mountfsoptions = self.mountfsoptions
1471 # we count, that mountfsoptions is always not None for smfs
1472 if self.fstype == 'smfs':
1473 realdev = self.fstype
1474 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
1480 if mountfsoptions != None:
1481 lctl.newdev("mds", self.name, self.uuid,
1482 setup ="%s %s %s %s" %(realdev, self.fstype,
1483 self.name, mountfsoptions))
1485 lctl.newdev("mds", self.name, self.uuid,
1486 setup ="%s %s %s" %(realdev, self.fstype,
1488 except CommandError, e:
1490 panic("MDS is missing the config log. Need to run " +
1491 "lconf --write_conf.")
1495 def write_conf(self):
1496 if is_prepared(self.name):
1498 self.info(self.devpath, self.fstype, self.format)
1500 blkdev = block_dev(self.devpath, self.size, self.fstype,
1501 config.reformat, self.format, self.journal_size,
1502 self.inode_size, self.mkfsoptions, self.backfstype,
1505 if config.mountfsoptions != None:
1506 mountfsoptions = config.mountfsoptions
1507 if self.mountfsoptions != None:
1508 mountfsoptions = mountfsoptions + ' ' + self.mountfsoptions
1510 mountfsoptions = self.mountfsoptions
1512 # Even for writing logs we mount mds with supplied mount options
1513 # because it will not mount smfs (if used) otherwise.
1515 # we count, that mountfsoptions is always not None for smfs
1516 if self.fstype == 'smfs':
1517 realdev = self.fstype
1518 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
1524 # As mount options are passed by 4th param to config tool, we need
1525 # to pass something in 3rd param. But we do not want this 3rd param
1526 # be counted as a profile name for reading log on MDS setup, thus,
1527 # we pass there some predefined sign @dumb, which will be checked
1528 # in MDS code and skipped.
1529 if mountfsoptions != None:
1530 lctl.newdev("mds", self.name, self.uuid,
1531 setup ="%s %s %s %s" %(realdev, self.fstype, 'dumb',
1534 lctl.newdev("mds", self.name, self.uuid,
1535 setup ="%s %s %s" %(realdev, self.fstype, 'dumb'))
1537 # record logs for the MDS lov
1538 for uuid in self.filesystem_uuids:
1539 log("recording clients for filesystem:", uuid)
1540 fs = self.db.lookup(uuid)
1541 obd_uuid = fs.get_first_ref('obd')
1542 client_uuid = generate_client_uuid(self.name)
1543 client = VOSC(self.db.lookup(obd_uuid), client_uuid, self.name,
1546 lctl.clear_log(self.name, self.name)
1547 lctl.record(self.name, self.name)
1549 lctl.mount_option(self.name, client.get_name(), "")
1553 lctl.clear_log(self.name, self.name + '-clean')
1554 lctl.record(self.name, self.name + '-clean')
1556 lctl.del_mount_option(self.name)
1561 # record logs for each client
1563 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
1565 config_options = CONFIG_FILE
1567 for node_db in self.db.lookup_class('node'):
1568 client_name = node_db.getName()
1569 for prof_uuid in node_db.get_refs('profile'):
1570 prof_db = node_db.lookup(prof_uuid)
1571 # refactor this into a funtion to test "clientness"
1573 for ref_class, ref_uuid in prof_db.get_all_refs():
1574 if ref_class in ('mountpoint','echoclient'):
1575 debug("recording", client_name)
1576 old_noexec = config.noexec
1578 noexec_opt = ('', '-n')
1579 ret, out = run (sys.argv[0],
1580 noexec_opt[old_noexec == 1],
1581 " -v --record --nomod",
1582 "--record_log", client_name,
1583 "--record_device", self.name,
1584 "--node", client_name,
1587 for s in out: log("record> ", string.strip(s))
1588 ret, out = run (sys.argv[0],
1589 noexec_opt[old_noexec == 1],
1590 "--cleanup -v --record --nomod",
1591 "--record_log", client_name + "-clean",
1592 "--record_device", self.name,
1593 "--node", client_name,
1596 for s in out: log("record> ", string.strip(s))
1597 config.noexec = old_noexec
1599 lctl.cleanup(self.name, self.uuid, 0, 0)
1600 except CommandError, e:
1601 log(self.module_name, "cleanup failed: ", self.name)
1604 Module.cleanup(self)
1606 if self.fstype == 'smfs':
1607 clean_loop(self.backdevpath)
1609 clean_loop(self.devpath)
1611 def msd_remaining(self):
1612 out = lctl.device_list()
1614 if string.split(s)[2] in ('mds',):
1617 def safe_to_clean(self):
1620 def safe_to_clean_modules(self):
1621 return not self.msd_remaining()
1625 debug(self.uuid, "not active")
1628 if is_prepared(self.name):
1630 lctl.cleanup(self.name, self.uuid, config.force,
1632 except CommandError, e:
1633 log(self.module_name, "cleanup failed: ", self.name)
1636 Module.cleanup(self)
1637 if not self.msd_remaining() and is_prepared('MDT'):
1639 lctl.cleanup("MDT", "MDT_UUID", config.force,
1641 except CommandError, e:
1642 print "cleanup failed: ", self.name
1646 if self.fstype == 'smfs':
1647 clean_loop(self.backdevpath)
1649 clean_loop(self.devpath)
1652 def __init__(self, db):
1653 Module.__init__(self, 'OSD', db)
1654 self.osdtype = self.db.get_val('osdtype')
1655 self.devpath = self.db.get_val('devpath', '')
1656 self.backdevpath = self.db.get_val('backdevpath', '')
1657 self.size = self.db.get_val_int('devsize', 0)
1658 self.journal_size = self.db.get_val_int('journalsize', 0)
1659 self.inode_size = self.db.get_val_int('inodesize', 0)
1660 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1661 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1662 self.fstype = self.db.get_val('fstype', '')
1663 self.backfstype = self.db.get_val('backfstype', '')
1664 self.nspath = self.db.get_val('nspath', '')
1665 target_uuid = self.db.get_first_ref('target')
1666 ost = self.db.lookup(target_uuid)
1667 self.name = ost.getName()
1668 self.format = self.db.get_val('autoformat', 'yes')
1669 if ost.get_val('failover', 0):
1670 self.failover_ost = 'f'
1672 self.failover_ost = 'n'
1674 active_uuid = get_active_target(ost)
1676 panic("No target device found:", target_uuid)
1677 if active_uuid == self.uuid:
1681 if self.active and config.group and config.group != ost.get_val('group'):
1684 self.target_dev_uuid = self.uuid
1685 self.uuid = target_uuid
1687 self.add_lustre_module('ost', 'ost')
1688 if self.fstype == 'smfs':
1689 self.add_lustre_module('smfs', 'smfs')
1690 # FIXME: should we default to ext3 here?
1691 if self.fstype == 'ldiskfs':
1692 self.add_lustre_module('ldiskfs', 'ldiskfs')
1694 self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
1695 if self.fstype == 'smfs':
1696 self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
1698 self.add_lustre_module(self.osdtype, self.osdtype)
1700 def load_module(self):
1702 Module.load_module(self)
1704 # need to check /proc/mounts and /etc/mtab before
1705 # formatting anything.
1706 # FIXME: check if device is already formatted.
1708 if is_prepared(self.name):
1711 debug(self.uuid, "not active")
1713 self.info(self.osdtype, self.devpath, self.size, self.fstype,
1714 self.format, self.journal_size, self.inode_size)
1716 if self.osdtype == 'obdecho':
1719 blkdev = block_dev(self.devpath, self.size, self.fstype,
1720 config.reformat, self.format, self.journal_size,
1721 self.inode_size, self.mkfsoptions, self.backfstype,
1723 if config.mountfsoptions != None:
1724 mountfsoptions = config.mountfsoptions
1725 if self.mountfsoptions != None:
1726 mountfsoptions = mountfsoptions + ' ' + self.mountfsoptions
1728 mountfsoptions = self.mountfsoptions
1730 # we count, that mountfsoptions is always not None for smfs
1731 if self.fstype == 'smfs':
1732 realdev = self.fstype
1733 mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions,
1739 if mountfsoptions != None:
1740 lctl.newdev(self.osdtype, self.name, self.uuid,
1741 setup ="%s %s %s %s" %(realdev, self.fstype,
1745 lctl.newdev(self.osdtype, self.name, self.uuid,
1746 setup ="%s %s %s" %(realdev, self.fstype,
1748 if not is_prepared('OSS'):
1749 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
1751 def osd_remaining(self):
1752 out = lctl.device_list()
1754 if string.split(s)[2] in ('obdfilter', 'obdecho'):
1757 def safe_to_clean(self):
1760 def safe_to_clean_modules(self):
1761 return not self.osd_remaining()
1765 debug(self.uuid, "not active")
1767 if is_prepared(self.name):
1770 lctl.cleanup(self.name, self.uuid, config.force,
1772 except CommandError, e:
1773 log(self.module_name, "cleanup failed: ", self.name)
1776 if not self.osd_remaining() and is_prepared('OSS'):
1778 lctl.cleanup("OSS", "OSS_UUID", config.force,
1780 except CommandError, e:
1781 print "cleanup failed: ", self.name
1784 if not self.osdtype == 'obdecho':
1785 if self.fstype == 'smfs':
1786 clean_loop(self.backdevpath)
1788 clean_loop(self.devpath)
1790 def mgmt_uuid_for_fs(mtpt_name):
1793 mtpt_db = toplevel.lookup_name(mtpt_name)
1794 fs_uuid = mtpt_db.get_first_ref('filesystem')
1795 fs = toplevel.lookup(fs_uuid)
1798 return fs.get_first_ref('mgmt')
1800 # Generic client module, used by OSC and MDC
1801 class Client(Module):
1802 def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
1804 self.target_name = tgtdb.getName()
1805 self.target_uuid = tgtdb.getUUID()
1808 self.tgt_dev_uuid = get_active_target(tgtdb)
1809 if not self.tgt_dev_uuid:
1810 panic("No target device found for target:", self.target_name)
1812 self.kmod = kmod(config.lustre, config.portals)
1816 self.module = module
1817 self.module_name = string.upper(module)
1819 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
1820 self.target_name, fs_name)
1822 self.name = self_name
1824 self.lookup_server(self.tgt_dev_uuid)
1825 mgmt_uuid = mgmt_uuid_for_fs(fs_name)
1827 self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid)
1830 self.fs_name = fs_name
1833 self.add_lustre_module(module_dir, module)
1835 def lookup_server(self, srv_uuid):
1836 """ Lookup a server's network information """
1837 self._server_nets = get_ost_net(self.db, srv_uuid)
1838 if len(self._server_nets) == 0:
1839 panic ("Unable to find a server for:", srv_uuid)
1841 def get_servers(self):
1842 return self._server_nets
1844 def prepare(self, ignore_connect_failure = 0):
1845 self.info(self.target_uuid)
1846 if is_prepared(self.name):
1849 srv = choose_local_server(self.get_servers())
1853 routes = find_route(self.get_servers())
1854 if len(routes) == 0:
1855 panic ("no route to", self.target_uuid)
1856 for (srv, r) in routes:
1857 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
1858 except CommandError, e:
1859 if not ignore_connect_failure:
1862 if self.target_uuid in config.inactive and self.permits_inactive():
1863 debug("%s inactive" % self.target_uuid)
1864 inactive_p = "inactive"
1866 debug("%s active" % self.target_uuid)
1868 lctl.newdev(self.module, self.name, self.uuid,
1869 setup ="%s %s %s %s" % (self.target_uuid, srv.nid_uuid,
1870 inactive_p, self.mgmt_name))
1873 if is_prepared(self.name):
1874 Module.cleanup(self)
1876 srv = choose_local_server(self.get_servers())
1878 lctl.disconnect(srv)
1880 for (srv, r) in find_route(self.get_servers()):
1881 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
1882 except CommandError, e:
1883 log(self.module_name, "cleanup failed: ", self.name)
1889 def __init__(self, db, uuid, fs_name):
1890 Client.__init__(self, db, uuid, 'mdc', fs_name)
1892 def permits_inactive(self):
1896 def __init__(self, db, uuid, fs_name):
1897 Client.__init__(self, db, uuid, 'osc', fs_name)
1899 def permits_inactive(self):
1902 def mgmtcli_name_for_uuid(uuid):
1903 return 'MGMTCLI_%s' % uuid
1905 class ManagementClient(Client):
1906 def __init__(self, db, uuid):
1907 Client.__init__(self, db, uuid, 'mgmt_cli', '',
1908 self_name = mgmtcli_name_for_uuid(db.getUUID()),
1909 module_dir = 'mgmt')
1912 def __init__(self, db):
1913 Module.__init__(self, 'COBD', db)
1914 self.real_uuid = self.db.get_first_ref('realobd')
1915 self.cache_uuid = self.db.get_first_ref('cacheobd')
1916 self.add_lustre_module('cobd' , 'cobd')
1918 # need to check /proc/mounts and /etc/mtab before
1919 # formatting anything.
1920 # FIXME: check if device is already formatted.
1922 if is_prepared(self.name):
1924 self.info(self.real_uuid, self.cache_uuid)
1925 lctl.newdev("cobd", self.name, self.uuid,
1926 setup ="%s %s" %(self.real_uuid, self.cache_uuid))
1929 # virtual interface for OSC and LOV
1931 def __init__(self, db, uuid, fs_name, name_override = None):
1932 Module.__init__(self, 'VOSC', db)
1933 if db.get_class() == 'lov':
1934 self.osc = LOV(db, uuid, fs_name, name_override)
1936 self.osc = get_osc(db, uuid, fs_name)
1938 return self.osc.uuid
1940 return self.osc.name
1945 def load_module(self):
1946 self.osc.load_module()
1947 def cleanup_module(self):
1948 self.osc.cleanup_module()
1951 class ECHO_CLIENT(Module):
1952 def __init__(self,db):
1953 Module.__init__(self, 'ECHO_CLIENT', db)
1954 self.add_lustre_module('obdecho', 'obdecho')
1955 self.obd_uuid = self.db.get_first_ref('obd')
1956 obd = self.db.lookup(self.obd_uuid)
1957 self.uuid = generate_client_uuid(self.name)
1958 self.osc = VOSC(obd, self.uuid, self.name)
1961 if is_prepared(self.name):
1964 self.osc.prepare() # XXX This is so cheating. -p
1965 self.info(self.obd_uuid)
1967 lctl.newdev("echo_client", self.name, self.uuid,
1968 setup = self.osc.get_name())
1971 if is_prepared(self.name):
1972 Module.cleanup(self)
1975 def load_module(self):
1976 self.osc.load_module()
1977 Module.load_module(self)
1979 def cleanup_module(self):
1980 Module.cleanup_module(self)
1981 self.osc.cleanup_module()
1984 def generate_client_uuid(name):
1985 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
1987 int(random.random() * 1048576),
1988 int(random.random() * 1048576))
1989 return client_uuid[:36]
1992 class Mountpoint(Module):
1993 def __init__(self,db):
1994 Module.__init__(self, 'MTPT', db)
1995 self.path = self.db.get_val('path')
1996 self.fs_uuid = self.db.get_first_ref('filesystem')
1997 fs = self.db.lookup(self.fs_uuid)
1998 self.mds_uuid = fs.get_first_ref('mds')
1999 self.obd_uuid = fs.get_first_ref('obd')
2000 self.mgmt_uuid = fs.get_first_ref('mgmt')
2001 obd = self.db.lookup(self.obd_uuid)
2002 client_uuid = generate_client_uuid(self.name)
2003 self.vosc = VOSC(obd, client_uuid, self.name)
2004 self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid)
2006 self.add_lustre_module('mdc', 'mdc')
2007 self.add_lustre_module('llite', 'llite')
2009 self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid),
2015 if fs_is_mounted(self.path):
2016 log(self.path, "already mounted.")
2020 self.mgmtcli.prepare()
2023 mdc_name = self.mdc.name
2025 self.info(self.path, self.mds_uuid, self.obd_uuid)
2026 if config.record or config.lctl_dump:
2027 lctl.mount_option(local_node_name, self.vosc.get_name(), mdc_name)
2029 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s %s %s" % \
2030 (self.vosc.get_name(), mdc_name, config.config, self.path)
2031 run("mkdir", self.path)
2036 panic("mount failed:", self.path, ":", string.join(val))
2039 self.info(self.path, self.mds_uuid,self.obd_uuid)
2041 if config.record or config.lctl_dump:
2042 lctl.del_mount_option(local_node_name)
2044 if fs_is_mounted(self.path):
2046 (rc, out) = run("umount", "-f", self.path)
2048 (rc, out) = run("umount", self.path)
2050 raise CommandError('umount', out, rc)
2052 if fs_is_mounted(self.path):
2053 panic("fs is still mounted:", self.path)
2058 self.mgmtcli.cleanup()
2060 def load_module(self):
2062 self.mgmtcli.load_module()
2063 self.vosc.load_module()
2064 Module.load_module(self)
2066 def cleanup_module(self):
2067 Module.cleanup_module(self)
2068 self.vosc.cleanup_module()
2070 self.mgmtcli.cleanup_module()
2073 # ============================================================
2074 # misc query functions
2076 def get_ost_net(self, osd_uuid):
2080 osd = self.lookup(osd_uuid)
2081 node_uuid = osd.get_first_ref('node')
2082 node = self.lookup(node_uuid)
2084 panic("unable to find node for osd_uuid:", osd_uuid,
2085 " node_ref:", node_uuid)
2086 for net_uuid in node.get_networks():
2087 db = node.lookup(net_uuid)
2088 srv_list.append(Network(db))
2092 # the order of iniitailization is based on level.
2093 def getServiceLevel(self):
2094 type = self.get_class()
2096 if type in ('network',):
2098 elif type in ('routetbl',):
2100 elif type in ('ldlm',):
2102 elif type in ('mgmt',):
2104 elif type in ('osd', 'cobd'):
2106 elif type in ('mdsdev',):
2108 elif type in ('mountpoint', 'echoclient'):
2111 panic("Unknown type: ", type)
2113 if ret < config.minlevel or ret > config.maxlevel:
2118 # return list of services in a profile. list is a list of tuples
2119 # [(level, db_object),]
2120 def getServices(self):
2122 for ref_class, ref_uuid in self.get_all_refs():
2123 servdb = self.lookup(ref_uuid)
2125 level = getServiceLevel(servdb)
2127 list.append((level, servdb))
2129 panic('service not found: ' + ref_uuid)
2135 ############################################################
2137 # FIXME: clean this mess up!
2139 # OSC is no longer in the xml, so we have to fake it.
2140 # this is getting ugly and begging for another refactoring
2141 def get_osc(ost_db, uuid, fs_name):
2142 osc = OSC(ost_db, uuid, fs_name)
2145 def get_mdc(db, uuid, fs_name, mds_uuid):
2146 mds_db = db.lookup(mds_uuid);
2148 panic("no mds:", mds_uuid)
2149 mdc = MDC(mds_db, uuid, fs_name)
2152 ############################################################
2153 # routing ("rooting")
2155 # list of (nettype, cluster_id, nid)
2158 def find_local_clusters(node_db):
2159 global local_clusters
2160 for netuuid in node_db.get_networks():
2161 net = node_db.lookup(netuuid)
2163 debug("add_local", netuuid)
2164 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2166 if acceptors.has_key(srv.port):
2167 panic("duplicate port:", srv.port)
2168 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type,
2169 srv.send_mem, srv.recv_mem,
2172 # This node is a gateway.
2174 def node_is_router():
2177 # If there are any routers found in the config, then this will be true
2178 # and all nodes will load kptlrouter.
2180 def node_needs_router():
2181 return needs_router or is_router
2183 # list of (nettype, gw, tgt_cluster_id, lo, hi)
2184 # Currently, these local routes are only added to kptlrouter route
2185 # table if they are needed to connect to a specific server. This
2186 # should be changed so all available routes are loaded, and the
2187 # ptlrouter can make all the decisions.
2190 def find_local_routes(lustre):
2191 """ Scan the lustre config looking for routers . Build list of
2193 global local_routes, needs_router
2195 list = lustre.lookup_class('node')
2197 if router.get_val_int('router', 0):
2199 for (local_type, local_cluster_id, local_nid) in local_clusters:
2201 for netuuid in router.get_networks():
2202 db = router.lookup(netuuid)
2203 if (local_type == db.get_val('nettype') and
2204 local_cluster_id == db.get_val('clusterid')):
2205 gw = db.get_val('nid')
2208 debug("find_local_routes: gw is", gw)
2209 for route in router.get_local_routes(local_type, gw):
2210 local_routes.append(route)
2211 debug("find_local_routes:", local_routes)
2214 def choose_local_server(srv_list):
2215 for srv in srv_list:
2216 if local_cluster(srv.net_type, srv.cluster_id):
2219 def local_cluster(net_type, cluster_id):
2220 for cluster in local_clusters:
2221 if net_type == cluster[0] and cluster_id == cluster[1]:
2225 def local_interface(net_type, cluster_id, nid):
2226 for cluster in local_clusters:
2227 if (net_type == cluster[0] and cluster_id == cluster[1]
2228 and nid == cluster[2]):
2232 def find_route(srv_list):
2234 frm_type = local_clusters[0][0]
2235 for srv in srv_list:
2236 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
2237 to_type = srv.net_type
2239 cluster_id = srv.cluster_id
2240 debug ('looking for route to', to_type, to)
2241 for r in local_routes:
2242 debug("find_route: ", r)
2243 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
2244 result.append((srv, r))
2247 def get_active_target(db):
2248 target_uuid = db.getUUID()
2249 target_name = db.getName()
2250 node_name = get_select(target_name)
2252 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
2254 tgt_dev_uuid = db.get_first_ref('active')
2257 def get_server_by_nid_uuid(db, nid_uuid):
2258 for n in db.lookup_class("network"):
2260 if net.nid_uuid == nid_uuid:
2264 ############################################################
2268 type = db.get_class()
2269 debug('Service:', type, db.getName(), db.getUUID())
2274 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2275 elif type == 'network':
2277 elif type == 'routetbl':
2281 elif type == 'cobd':
2283 elif type == 'mdsdev':
2285 elif type == 'mountpoint':
2287 elif type == 'echoclient':
2289 elif type == 'mgmt':
2292 panic ("unknown service type:", type)
2296 # Prepare the system to run lustre using a particular profile
2297 # in a the configuration.
2298 # * load & the modules
2299 # * setup networking for the current node
2300 # * make sure partitions are in place and prepared
2301 # * initialize devices with lctl
2302 # Levels is important, and needs to be enforced.
2303 def for_each_profile(db, prof_list, operation):
2304 for prof_uuid in prof_list:
2305 prof_db = db.lookup(prof_uuid)
2307 panic("profile:", profile, "not found.")
2308 services = getServices(prof_db)
2311 def doWriteconf(services):
2315 if s[1].get_class() == 'mdsdev':
2316 n = newService(s[1])
2319 def doSetup(services):
2323 n = newService(s[1])
2326 def doModules(services):
2330 n = newService(s[1])
2333 def doCleanup(services):
2338 n = newService(s[1])
2339 if n.safe_to_clean():
2342 def doUnloadModules(services):
2347 n = newService(s[1])
2348 if n.safe_to_clean_modules():
2353 def doHost(lustreDB, hosts):
2354 global is_router, local_node_name
2357 node_db = lustreDB.lookup_name(h, 'node')
2361 panic('No host entry found.')
2363 local_node_name = node_db.get_val('name', 0)
2364 is_router = node_db.get_val_int('router', 0)
2365 lustre_upcall = node_db.get_val('lustreUpcall', '')
2366 portals_upcall = node_db.get_val('portalsUpcall', '')
2367 timeout = node_db.get_val_int('timeout', 0)
2368 ptldebug = node_db.get_val('ptldebug', '')
2369 subsystem = node_db.get_val('subsystem', '')
2371 find_local_clusters(node_db)
2373 find_local_routes(lustreDB)
2375 # Two step process: (1) load modules, (2) setup lustre
2376 # if not cleaning, load modules first.
2377 prof_list = node_db.get_refs('profile')
2379 if config.write_conf:
2380 for_each_profile(node_db, prof_list, doModules)
2382 for_each_profile(node_db, prof_list, doWriteconf)
2383 for_each_profile(node_db, prof_list, doUnloadModules)
2385 elif config.recover:
2386 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
2387 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
2388 "--client_uuid <UUID> --conn_uuid <UUID>")
2389 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
2391 elif config.cleanup:
2393 # the command line can override this value
2395 # ugly hack, only need to run lctl commands for --dump
2396 if config.lctl_dump or config.record:
2397 for_each_profile(node_db, prof_list, doCleanup)
2400 sys_set_timeout(timeout)
2401 sys_set_ptldebug(ptldebug)
2402 sys_set_subsystem(subsystem)
2403 sys_set_lustre_upcall(lustre_upcall)
2404 sys_set_portals_upcall(portals_upcall)
2406 for_each_profile(node_db, prof_list, doCleanup)
2407 for_each_profile(node_db, prof_list, doUnloadModules)
2410 # ugly hack, only need to run lctl commands for --dump
2411 if config.lctl_dump or config.record:
2412 sys_set_timeout(timeout)
2413 sys_set_lustre_upcall(lustre_upcall)
2414 for_each_profile(node_db, prof_list, doSetup)
2418 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
2419 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
2421 for_each_profile(node_db, prof_list, doModules)
2423 sys_set_debug_path()
2424 sys_set_ptldebug(ptldebug)
2425 sys_set_subsystem(subsystem)
2426 script = config.gdb_script
2427 run(lctl.lctl, ' modules >', script)
2429 log ("The GDB module script is in", script)
2430 # pause, so user has time to break and
2433 sys_set_timeout(timeout)
2434 sys_set_lustre_upcall(lustre_upcall)
2435 sys_set_portals_upcall(portals_upcall)
2437 for_each_profile(node_db, prof_list, doSetup)
2439 def doRecovery(db, lctl, tgt_uuid, client_uuid, nid_uuid):
2440 tgt = db.lookup(tgt_uuid)
2442 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
2443 new_uuid = get_active_target(tgt)
2445 raise Lustre.LconfError("doRecovery: no active target found for: " +
2447 net = choose_local_server(get_ost_net(db, new_uuid))
2449 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
2451 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
2453 oldnet = get_server_by_nid_uuid(db, nid_uuid)
2455 lctl.disconnect(oldnet)
2456 except CommandError, e:
2457 log("recover: disconnect", nid_uuid, "failed: ")
2462 except CommandError, e:
2463 log("recover: connect failed")
2466 lctl.recover(client_uuid, net.nid_uuid)
2469 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
2470 base = os.path.dirname(cmd)
2471 if development_mode():
2472 if not config.lustre:
2473 debug('using objdir module paths')
2474 config.lustre = (os.path.join(base, ".."))
2475 # normalize the portals dir, using command line arg if set
2477 portals_dir = config.portals
2478 dir = os.path.join(config.lustre, portals_dir)
2479 config.portals = dir
2480 debug('config.portals', config.portals)
2481 elif config.lustre and config.portals:
2483 # if --lustre and --portals, normalize portals
2484 # can ignore POTRALS_DIR here, since it is probly useless here
2485 config.portals = os.path.join(config.lustre, config.portals)
2486 debug('config.portals B', config.portals)
2488 def sysctl(path, val):
2489 debug("+ sysctl", path, val)
2493 fp = open(os.path.join('/proc/sys', path), 'w')
2500 def sys_set_debug_path():
2501 sysctl('portals/debug_path', config.debug_path)
2503 def sys_set_lustre_upcall(upcall):
2504 # the command overrides the value in the node config
2505 if config.lustre_upcall:
2506 upcall = config.lustre_upcall
2508 upcall = config.upcall
2510 lctl.set_lustre_upcall(upcall)
2512 def sys_set_portals_upcall(upcall):
2513 # the command overrides the value in the node config
2514 if config.portals_upcall:
2515 upcall = config.portals_upcall
2517 upcall = config.upcall
2519 sysctl('portals/upcall', upcall)
2521 def sys_set_timeout(timeout):
2522 # the command overrides the value in the node config
2523 if config.timeout and config.timeout > 0:
2524 timeout = config.timeout
2525 if timeout != None and timeout > 0:
2526 lctl.set_timeout(timeout)
2528 def sys_tweak_socknal ():
2529 if config.single_socket:
2530 sysctl("socknal/typed", 0)
2532 def sys_optimize_elan ():
2533 procfiles = ["/proc/elan/config/eventint_punt_loops",
2534 "/proc/qsnet/elan3/config/eventint_punt_loops",
2535 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
2537 if os.access(p, os.R_OK):
2538 run ("echo 0 > " + p)
2540 def sys_set_ptldebug(ptldebug):
2542 ptldebug = config.ptldebug
2545 val = eval(ptldebug, ptldebug_names)
2546 val = "0x%x" % (val)
2547 sysctl('portals/debug', val)
2548 except NameError, e:
2551 def sys_set_subsystem(subsystem):
2552 if config.subsystem:
2553 subsystem = config.subsystem
2556 val = eval(subsystem, subsystem_names)
2557 val = "0x%x" % (val)
2558 sysctl('portals/subsystem_debug', val)
2559 except NameError, e:
2562 def sys_set_netmem_max(path, max):
2563 debug("setting", path, "to at least", max)
2571 fp = open(path, 'w')
2572 fp.write('%d\n' %(max))
2576 def sys_make_devices():
2577 if not os.access('/dev/portals', os.R_OK):
2578 run('mknod /dev/portals c 10 240')
2579 if not os.access('/dev/obd', os.R_OK):
2580 run('mknod /dev/obd c 10 241')
2583 # Add dir to the global PATH, if not already there.
2584 def add_to_path(new_dir):
2585 syspath = string.split(os.environ['PATH'], ':')
2586 if new_dir in syspath:
2588 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
2590 def default_debug_path():
2591 path = '/tmp/lustre-log'
2592 if os.path.isdir('/r'):
2597 def default_gdb_script():
2598 script = '/tmp/ogdb'
2599 if os.path.isdir('/r'):
2600 return '/r' + script
2605 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
2606 # ensure basic elements are in the system path
2607 def sanitise_path():
2608 for dir in DEFAULT_PATH:
2611 # global hack for the --select handling
2613 def init_select(args):
2614 # args = [service=nodeA,service2=nodeB service3=nodeC]
2617 list = string.split(arg, ',')
2619 srv, node = string.split(entry, '=')
2620 tgt_select[srv] = node
2622 def get_select(srv):
2623 if tgt_select.has_key(srv):
2624 return tgt_select[srv]
2628 FLAG = Lustre.Options.FLAG
2629 PARAM = Lustre.Options.PARAM
2630 INTPARAM = Lustre.Options.INTPARAM
2631 PARAMLIST = Lustre.Options.PARAMLIST
2633 ('verbose,v', "Print system commands as they are run"),
2634 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
2635 ('config', "Cluster config name used for LDAP query", PARAM),
2636 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
2637 ('node', "Load config for <nodename>", PARAM),
2638 ('cleanup,d', "Cleans up config. (Shutdown)"),
2639 ('force,f', "Forced unmounting and/or obd detach during cleanup",
2641 ('single_socket', "socknal option: only use one socket instead of bundle",
2643 ('failover',"""Used to shut down without saving state.
2644 This will allow this node to "give up" a service to a
2645 another node for failover purposes. This will not
2646 be a clean shutdown.""",
2648 ('gdb', """Prints message after creating gdb module script
2649 and sleeps for 5 seconds."""),
2650 ('noexec,n', """Prints the commands and steps that will be run for a
2651 config without executing them. This can used to check if a
2652 config file is doing what it should be doing"""),
2653 ('nomod', "Skip load/unload module step."),
2654 ('nosetup', "Skip device setup/cleanup step."),
2655 ('reformat', "Reformat all devices (without question)"),
2656 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
2657 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
2658 ('dump', "Dump the kernel debug log to file before portals is unloaded",
2660 ('write_conf', "Save all the client config information on mds."),
2661 ('record', "Write config information on mds."),
2662 ('record_log', "Name of config record log.", PARAM),
2663 ('record_device', "MDS device name that will record the config commands",
2665 ('minlevel', "Minimum level of services to configure/cleanup",
2667 ('maxlevel', """Maximum level of services to configure/cleanup
2668 Levels are aproximatly like:
2673 70 - mountpoint, echo_client, osc, mdc, lov""",
2675 ('lustre', """Base directory of lustre sources. This parameter will
2676 cause lconf to load modules from a source tree.""", PARAM),
2677 ('portals', """Portals source directory. If this is a relative path,
2678 then it is assumed to be relative to lustre. """, PARAM),
2679 ('timeout', "Set recovery timeout", INTPARAM),
2680 ('upcall', "Set both portals and lustre upcall script", PARAM),
2681 ('lustre_upcall', "Set lustre upcall script", PARAM),
2682 ('portals_upcall', "Set portals upcall script", PARAM),
2683 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
2684 ('ptldebug', "Set the portals debug level", PARAM),
2685 ('subsystem', "Set the portals debug subsystem", PARAM),
2686 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
2687 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
2688 # Client recovery options
2689 ('recover', "Recover a device"),
2690 ('group', "The group of devices to configure or cleanup", PARAM),
2691 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
2692 ('client_uuid', "The failed client (required for recovery)", PARAM),
2693 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
2695 ('inactive', """The name of an inactive service, to be ignored during
2696 mounting (currently OST-only). Can be repeated.""",
2701 global lctl, config, toplevel, CONFIG_FILE
2703 # in the upcall this is set to SIG_IGN
2704 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
2706 cl = Lustre.Options("lconf", "config.xml", lconf_options)
2708 config, args = cl.parse(sys.argv[1:])
2709 except Lustre.OptionError, e:
2713 setupModulePath(sys.argv[0])
2715 host = socket.gethostname()
2717 # the PRNG is normally seeded with time(), which is not so good for starting
2718 # time-synchronized clusters
2719 input = open('/dev/urandom', 'r')
2721 print 'Unable to open /dev/urandom!'
2723 seed = input.read(32)
2729 init_select(config.select)
2732 # allow config to be fetched via HTTP, but only with python2
2733 if sys.version[0] != '1' and args[0].startswith('http://'):
2736 config_file = urllib2.urlopen(args[0])
2737 except (urllib2.URLError, socket.error), err:
2738 if hasattr(err, 'args'):
2740 print "Could not access '%s': %s" %(args[0], err)
2742 elif not os.access(args[0], os.R_OK):
2743 print 'File not found or readable:', args[0]
2747 config_file = open(args[0], 'r')
2749 dom = xml.dom.minidom.parse(config_file)
2751 panic("%s does not appear to be a config file." % (args[0]))
2752 sys.exit(1) # make sure to die here, even in debug mode.
2753 CONFIG_FILE = args[0]
2754 db = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
2755 if not config.config:
2756 config.config = os.path.basename(args[0])# use full path?
2757 if config.config[-4:] == '.xml':
2758 config.config = config.config[:-4]
2759 elif config.ldapurl:
2760 if not config.config:
2761 panic("--ldapurl requires --config name")
2762 dn = "config=%s,fs=lustre" % (config.config)
2763 db = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
2764 elif config.ptldebug or config.subsystem:
2765 sys_set_ptldebug(None)
2766 sys_set_subsystem(None)
2769 print 'Missing config file or ldap URL.'
2770 print 'see lconf --help for command summary'
2775 ver = db.get_version()
2777 panic("No version found in config data, please recreate.")
2778 if ver != Lustre.CONFIG_VERSION:
2779 panic("Config version", ver, "does not match lconf version",
2780 Lustre.CONFIG_VERSION)
2784 node_list.append(config.node)
2787 node_list.append(host)
2788 node_list.append('localhost')
2790 debug("configuring for host: ", node_list)
2793 config.debug_path = config.debug_path + '-' + host
2794 config.gdb_script = config.gdb_script + '-' + host
2796 lctl = LCTLInterface('lctl')
2798 if config.lctl_dump:
2799 lctl.use_save_file(config.lctl_dump)
2802 if not (config.record_device and config.record_log):
2803 panic("When recording, both --record_log and --record_device must be specified.")
2804 lctl.clear_log(config.record_device, config.record_log)
2805 lctl.record(config.record_device, config.record_log)
2807 doHost(db, node_list)
2812 if __name__ == "__main__":
2815 except Lustre.LconfError, e:
2817 # traceback.print_exc(file=sys.stdout)
2819 except CommandError, e:
2823 if first_cleanup_error:
2824 sys.exit(first_cleanup_error)