3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = '../portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
93 "console" : (1 << 25),
99 "undefined" : (1 << 0),
109 "portals" : (1 << 10),
111 "pinger" : (1 << 12),
112 "filter" : (1 << 13),
117 "ptlrouter" : (1 << 18),
121 "confobd" : (1 << 22),
128 first_cleanup_error = 0
129 def cleanup_error(rc):
130 global first_cleanup_error
131 if not first_cleanup_error:
132 first_cleanup_error = rc
134 # ============================================================
135 # debugging and error funcs
137 def fixme(msg = "this feature"):
138 raise Lustre.LconfError, msg + ' not implemented yet.'
141 msg = string.join(map(str,args))
142 if not config.noexec:
143 raise Lustre.LconfError(msg)
148 msg = string.join(map(str,args))
153 print string.strip(s)
157 msg = string.join(map(str,args))
160 # ack, python's builtin int() does not support '0x123' syntax.
161 # eval can do it, although what a hack!
165 return eval(s, {}, {})
168 except SyntaxError, e:
169 raise ValueError("not a number")
171 raise ValueError("not a number")
173 # ============================================================
174 # locally defined exceptions
175 class CommandError (exceptions.Exception):
176 def __init__(self, cmd_name, cmd_err, rc=None):
177 self.cmd_name = cmd_name
178 self.cmd_err = cmd_err
183 if type(self.cmd_err) == types.StringType:
185 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
187 print "! %s: %s" % (self.cmd_name, self.cmd_err)
188 elif type(self.cmd_err) == types.ListType:
190 print "! %s (error %d):" % (self.cmd_name, self.rc)
192 print "! %s:" % (self.cmd_name)
193 for s in self.cmd_err:
194 print "> %s" %(string.strip(s))
199 # ============================================================
200 # handle daemons, like the acceptor
202 """ Manage starting and stopping a daemon. Assumes daemon manages
203 it's own pid file. """
205 def __init__(self, cmd):
211 log(self.command, "already running.")
213 self.path = find_prog(self.command)
215 panic(self.command, "not found.")
216 ret, out = runcmd(self.path +' '+ self.command_line())
218 raise CommandError(self.path, out, ret)
222 pid = self.read_pidfile()
225 log ("killing process", pid)
228 log("was unable to find pid of " + self.command)
229 #time.sleep(1) # let daemon die
231 log("unable to kill", self.command, e)
233 log("unable to kill", self.command)
236 pid = self.read_pidfile()
242 log("was unable to find pid of " + self.command)
249 def read_pidfile(self):
251 fp = open(self.pidfile(), 'r')
261 def clean_pidfile(self):
262 """ Remove a stale pidfile """
263 log("removing stale pidfile:", self.pidfile())
265 os.unlink(self.pidfile())
267 log(self.pidfile(), e)
269 class AcceptorHandler(DaemonHandler):
270 def __init__(self, port, net_type):
271 DaemonHandler.__init__(self, "acceptor")
276 return "/var/run/%s-%d.pid" % (self.command, self.port)
278 def command_line(self):
279 return string.join(map(str,(self.flags, self.port)))
283 # start the acceptors
285 if config.lctl_dump or config.record:
287 for port in acceptors.keys():
288 daemon = acceptors[port]
289 if not daemon.running():
292 def run_one_acceptor(port):
293 if config.lctl_dump or config.record:
295 if acceptors.has_key(port):
296 daemon = acceptors[port]
297 if not daemon.running():
300 panic("run_one_acceptor: No acceptor defined for port:", port)
302 def stop_acceptor(port):
303 if acceptors.has_key(port):
304 daemon = acceptors[port]
309 # ============================================================
310 # handle lctl interface
313 Manage communication with lctl
316 def __init__(self, cmd):
318 Initialize close by finding the lctl binary.
320 self.lctl = find_prog(cmd)
322 self.record_device = ''
325 debug('! lctl not found')
328 raise CommandError('lctl', "unable to find lctl binary.")
330 def use_save_file(self, file):
331 self.save_file = file
333 def record(self, dev_name, logname):
334 log("Recording log", logname, "on", dev_name)
335 self.record_device = dev_name
336 self.record_log = logname
338 def end_record(self):
339 log("End recording log", self.record_log, "on", self.record_device)
340 self.record_device = None
341 self.record_log = None
343 def set_nonblock(self, fd):
344 fl = fcntl.fcntl(fd, F_GETFL)
345 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
350 the cmds are written to stdin of lctl
351 lctl doesn't return errors when run in script mode, so
353 should modify command line to accept multiple commands, or
354 create complex command line options
358 cmds = '\n dump ' + self.save_file + '\n' + cmds
359 elif self.record_device:
363 %s""" % (self.record_device, self.record_log, cmds)
365 debug("+", cmd_line, cmds)
366 if config.noexec: return (0, [])
368 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
369 child.tochild.write(cmds + "\n")
370 child.tochild.close()
371 # print "LCTL:", cmds
373 # From "Python Cookbook" from O'Reilly
374 outfile = child.fromchild
375 outfd = outfile.fileno()
376 self.set_nonblock(outfd)
377 errfile = child.childerr
378 errfd = errfile.fileno()
379 self.set_nonblock(errfd)
381 outdata = errdata = ''
384 ready = select.select([outfd,errfd],[],[]) # Wait for input
385 if outfd in ready[0]:
386 outchunk = outfile.read()
387 if outchunk == '': outeof = 1
388 outdata = outdata + outchunk
389 if errfd in ready[0]:
390 errchunk = errfile.read()
391 if errchunk == '': erreof = 1
392 errdata = errdata + errchunk
393 if outeof and erreof: break
394 # end of "borrowed" code
397 if os.WIFEXITED(ret):
398 rc = os.WEXITSTATUS(ret)
401 if rc or len(errdata):
402 raise CommandError(self.lctl, errdata, rc)
405 def runcmd(self, *args):
407 run lctl using the command line
409 cmd = string.join(map(str,args))
410 debug("+", self.lctl, cmd)
411 rc, out = run(self.lctl, cmd)
413 raise CommandError(self.lctl, out, rc)
416 def clear_log(self, dev, log):
417 """ clear an existing log """
422 quit """ % (dev, log)
425 def root_squash(self, name, uid, nid):
429 quit""" % (name, uid, nid)
432 def network(self, net, nid):
437 quit """ % (net, nid)
441 def add_interface(self, net, ip, netmask = ""):
442 """ add an interface """
446 quit """ % (net, ip, netmask)
449 # delete an interface
450 def del_interface(self, net, ip):
451 """ delete an interface """
458 # create a new connection
459 def add_uuid(self, net_type, uuid, nid):
460 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
463 def add_peer(self, net_type, nid, hostaddr, port):
464 if net_type in ('tcp','openib','ra') and not config.lctl_dump:
469 nid, hostaddr, port )
471 elif net_type in ('iib',) and not config.lctl_dump:
478 elif net_type in ('vib',) and not config.lctl_dump:
486 def connect(self, srv):
487 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
488 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
490 hostaddr = string.split(srv.hostaddr[0], '/')[0]
491 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
494 def recover(self, dev_name, new_conn):
497 recover %s""" %(dev_name, new_conn)
500 # add a route to a range
501 def add_route(self, net, gw, lo, hi):
509 except CommandError, e:
513 def del_route(self, net, gw, lo, hi):
518 quit """ % (net, gw, lo, hi)
521 # add a route to a host
522 def add_route_host(self, net, uuid, gw, tgt):
523 self.add_uuid(net, uuid, tgt)
531 except CommandError, e:
535 # add a route to a range
536 def del_route_host(self, net, uuid, gw, tgt):
542 quit """ % (net, gw, tgt)
546 def del_peer(self, net_type, nid, hostaddr):
547 if net_type in ('tcp',) and not config.lctl_dump:
551 del_peer %s %s single_share
555 elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump:
559 del_peer %s single_share
564 # disconnect one connection
565 def disconnect(self, srv):
566 self.del_uuid(srv.nid_uuid)
567 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
569 hostaddr = string.split(srv.hostaddr[0], '/')[0]
570 self.del_peer(srv.net_type, srv.nid, hostaddr)
572 def del_uuid(self, uuid):
580 def disconnectAll(self, net):
588 def attach(self, type, name, uuid):
591 quit""" % (type, name, uuid)
594 def detach(self, name):
601 def set_security(self, name, key, value):
605 quit""" % (name, key, value)
608 def setup(self, name, setup = ""):
612 quit""" % (name, setup)
615 def add_conn(self, name, conn_uuid):
619 quit""" % (name, conn_uuid)
622 def start(self, name, conf_name):
626 quit""" % (name, conf_name)
629 # create a new device with lctl
630 def newdev(self, type, name, uuid, setup = ""):
632 self.attach(type, name, uuid);
634 self.setup(name, setup)
635 except CommandError, e:
636 self.cleanup(name, uuid, 0)
640 def cleanup(self, name, uuid, force, failover = 0):
641 if failover: force = 1
647 quit""" % (name, ('', 'force')[force],
648 ('', 'failover')[failover])
652 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
653 stripe_sz, stripe_off, pattern, devlist = None):
656 lov_setup %s %d %d %d %s %s
657 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
661 # add an OBD to a LOV
662 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
664 lov_modify_tgts add %s %s %s %s
665 quit""" % (name, obd_uuid, index, gen)
669 def lmv_setup(self, name, uuid, desc_uuid, devlist):
673 quit""" % (name, uuid, desc_uuid, devlist)
676 # delete an OBD from a LOV
677 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
679 lov_modify_tgts del %s %s %s %s
680 quit""" % (name, obd_uuid, index, gen)
684 def deactivate(self, name):
692 def dump(self, dump_file):
695 quit""" % (dump_file)
698 # get list of devices
699 def device_list(self):
700 devices = '/proc/fs/lustre/devices'
702 if os.access(devices, os.R_OK):
704 fp = open(devices, 'r')
712 def lustre_version(self):
713 rc, out = self.runcmd('version')
717 def mount_option(self, profile, osc, mdc):
719 mount_option %s %s %s
720 quit""" % (profile, osc, mdc)
723 # delete mount options
724 def del_mount_option(self, profile):
730 def set_timeout(self, timeout):
736 def set_lustre_upcall(self, upcall):
741 # ============================================================
742 # Various system-level functions
743 # (ideally moved to their own module)
745 # Run a command and return the output and status.
746 # stderr is sent to /dev/null, could use popen3 to
747 # save it if necessary
750 if config.noexec: return (0, [])
751 f = os.popen(cmd + ' 2>&1')
761 cmd = string.join(map(str,args))
764 # Run a command in the background.
765 def run_daemon(*args):
766 cmd = string.join(map(str,args))
768 if config.noexec: return 0
769 f = os.popen(cmd + ' 2>&1')
777 # Determine full path to use for an external command
778 # searches dirname(argv[0]) first, then PATH
780 syspath = string.split(os.environ['PATH'], ':')
781 cmdpath = os.path.dirname(sys.argv[0])
782 syspath.insert(0, cmdpath);
784 syspath.insert(0, os.path.join(config.portals, 'utils/'))
786 prog = os.path.join(d,cmd)
787 if os.access(prog, os.X_OK):
791 # Recursively look for file starting at base dir
792 def do_find_file(base, mod):
793 fullname = os.path.join(base, mod)
794 if os.access(fullname, os.R_OK):
796 for d in os.listdir(base):
797 dir = os.path.join(base,d)
798 if os.path.isdir(dir):
799 module = do_find_file(dir, mod)
803 # is the path a block device?
810 return stat.S_ISBLK(s[stat.ST_MODE])
812 # find the journal device from mkfs options
818 while i < len(x) - 1:
819 if x[i] == '-J' and x[i+1].startswith('device='):
825 # build fs according to type
827 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
833 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
835 # devsize is in 1k, and fs block count is in 4k
836 block_cnt = devsize/4
838 if fstype in ('ext3', 'extN', 'ldiskfs'):
839 # ext3 journal size is in megabytes
840 # but don't set jsize if mkfsoptions indicates a separate journal device
841 if jsize == 0 and jdev(mkfsoptions) == '':
843 if not is_block(dev):
844 ret, out = runcmd("ls -l %s" %dev)
845 devsize = int(string.split(out[0])[4]) / 1024
847 # sfdisk works for symlink, hardlink, and realdev
848 ret, out = runcmd("sfdisk -s %s" %dev)
850 devsize = int(out[0])
852 # sfdisk -s will fail for too large block device,
853 # then, read the size of partition from /proc/partitions
855 # get the realpath of the device
856 # it may be the real device, such as /dev/hda7
857 # or the hardlink created via mknod for a device
858 if 'realpath' in dir(os.path):
859 real_dev = os.path.realpath(dev)
863 while os.path.islink(real_dev) and (link_count < 20):
864 link_count = link_count + 1
865 dev_link = os.readlink(real_dev)
866 if os.path.isabs(dev_link):
869 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
871 panic("Entountered too many symbolic links resolving block device:", dev)
873 # get the major and minor number of the realpath via ls
874 # it seems python(os.stat) does not return
875 # the st_rdev member of the stat structure
876 ret, out = runcmd("ls -l %s" %real_dev)
877 major = string.split(string.split(out[0])[4], ",")[0]
878 minor = string.split(out[0])[5]
880 # get the devsize from /proc/partitions with the major and minor number
881 ret, out = runcmd("cat /proc/partitions")
884 if string.split(line)[0] == major and string.split(line)[1] == minor:
885 devsize = int(string.split(line)[2])
888 if devsize > 1024 * 1024:
889 jsize = ((devsize / 102400) * 4)
892 if jsize: jopt = "-J size=%d" %(jsize,)
893 if isize: iopt = "-I %d" %(isize,)
894 mkfs = 'mkfs.ext2 -j -b 4096 '
895 if not isblock or config.force:
897 if jdev(mkfsoptions) != '':
898 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
900 jmkfs = jmkfs + '-F '
901 jmkfs = jmkfs + jdev(mkfsoptions)
902 (ret, out) = run (jmkfs)
904 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
905 elif fstype == 'reiserfs':
906 # reiserfs journal size is in blocks
907 if jsize: jopt = "--journal_size %d" %(jsize,)
908 mkfs = 'mkreiserfs -ff'
910 panic('unsupported fs type: ', fstype)
912 if config.mkfsoptions != None:
913 mkfs = mkfs + ' ' + config.mkfsoptions
914 if mkfsoptions != None:
915 mkfs = mkfs + ' ' + mkfsoptions
916 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
918 panic("Unable to build fs:", dev, string.join(out))
919 # enable hash tree indexing on fsswe
920 if fstype in ('ext3', 'extN', 'ldiskfs'):
921 htree = 'echo "feature FEATURE_C5" | debugfs -w'
922 (ret, out) = run (htree, dev)
924 panic("Unable to enable htree:", dev)
926 # some systems use /dev/loopN, some /dev/loop/N
930 if not os.access(loop + str(0), os.R_OK):
932 if not os.access(loop + str(0), os.R_OK):
933 panic ("can't access loop devices")
936 # find loop device assigned to the file
937 def find_assigned_loop(file):
939 for n in xrange(0, MAX_LOOP_DEVICES):
941 if os.access(dev, os.R_OK):
942 (stat, out) = run('losetup', dev)
943 if out and stat == 0:
944 m = re.search(r'\((.*)\)', out[0])
945 if m and file == m.group(1):
949 # find free loop device
950 def find_free_loop(file):
953 # find next free loop
954 for n in xrange(0, MAX_LOOP_DEVICES):
956 if os.access(dev, os.R_OK):
957 (stat, out) = run('losetup', dev)
962 # create file if necessary and assign the first free loop device
963 def init_loop(file, size, fstype, journal_size, inode_size,
964 mkfsoptions, reformat, autoformat, backfstype, backfile):
967 realfstype = backfstype
968 if is_block(backfile):
969 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
970 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
976 dev = find_assigned_loop(realfile)
978 print 'WARNING: file', realfile, 'already mapped to', dev
981 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
982 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
984 panic("Unable to create backing store:", realfile)
985 mkfs(realfile, size, realfstype, journal_size, inode_size,
986 mkfsoptions, isblock=0)
988 dev = find_free_loop(realfile)
990 print "attach " + realfile + " <-> " + dev
991 run('losetup', dev, realfile)
994 print "out of loop devices"
997 # undo loop assignment
998 def clean_loop(dev, fstype, backfstype, backdev):
1003 if not is_block(realfile):
1004 dev = find_assigned_loop(realfile)
1006 print "detach " + dev + " <-> " + realfile
1007 ret, out = run('losetup -d', dev)
1009 log('unable to clean loop device', dev, 'for file', realfile)
1012 # finilizes passed device
1013 def clean_dev(dev, fstype, backfstype, backdev):
1014 if fstype == 'smfs' or not is_block(dev):
1015 clean_loop(dev, fstype, backfstype, backdev)
1017 # determine if dev is formatted as a <fstype> filesystem
1018 def need_format(fstype, dev):
1019 # FIXME don't know how to implement this
1022 # initialize a block device if needed
1023 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
1024 inode_size, mkfsoptions, backfstype, backdev):
1028 if fstype == 'smfs' or not is_block(dev):
1029 dev = init_loop(dev, size, fstype, journal_size, inode_size,
1030 mkfsoptions, reformat, autoformat, backfstype, backdev)
1031 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
1032 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
1035 # panic("device:", dev,
1036 # "not prepared, and autoformat is not set.\n",
1037 # "Rerun with --reformat option to format ALL filesystems")
1042 """lookup IP address for an interface"""
1043 rc, out = run("/sbin/ifconfig", iface)
1046 addr = string.split(out[1])[1]
1047 ip = string.split(addr, ':')[1]
1050 def def_mount_options(fstype, target):
1051 """returns deafult mount options for passed fstype and target (mds, ost)"""
1052 if fstype == 'ext3' or fstype == 'ldiskfs':
1053 mountfsoptions = "errors=remount-ro"
1054 if target == 'ost' and sys_get_branch() == '2.4':
1055 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1056 return mountfsoptions
1059 def sys_get_elan_position_file():
1060 procfiles = ["/proc/elan/device0/position",
1061 "/proc/qsnet/elan4/device0/position",
1062 "/proc/qsnet/elan3/device0/position"]
1064 if os.access(p, os.R_OK):
1068 def sys_get_local_nid(net_type, wildcard, cluster_id):
1069 """Return the local nid."""
1071 if sys_get_elan_position_file():
1072 local = sys_get_local_address('elan', '*', cluster_id)
1074 local = sys_get_local_address(net_type, wildcard, cluster_id)
1077 def sys_get_local_address(net_type, wildcard, cluster_id):
1078 """Return the local address for the network type."""
1080 if net_type in ('tcp','openib','iib','vib','ra'):
1082 iface, star = string.split(wildcard, ':')
1083 local = if2addr(iface)
1085 panic ("unable to determine ip for:", wildcard)
1087 host = socket.gethostname()
1088 local = socket.gethostbyname(host)
1089 elif net_type == 'elan':
1090 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1091 f = sys_get_elan_position_file()
1093 panic ("unable to determine local Elan ID")
1096 lines = fp.readlines()
1100 if a[0] == 'NodeId':
1104 nid = my_int(cluster_id) + my_int(elan_id)
1105 local = "%d" % (nid)
1106 except ValueError, e:
1110 elif net_type == 'lo':
1111 fixme("automatic local address for loopback")
1112 elif net_type == 'gm':
1113 fixme("automatic local address for GM")
1117 def sys_get_branch():
1118 """Returns kernel release"""
1120 fp = open('/proc/sys/kernel/osrelease')
1121 lines = fp.readlines()
1125 version = string.split(l)
1126 a = string.split(version[0], '.')
1127 return a[0] + '.' + a[1]
1132 # XXX: instead of device_list, ask for $name and see what we get
1133 def is_prepared(name):
1134 """Return true if a device exists for the name"""
1135 if config.lctl_dump:
1137 if (config.noexec or config.record) and config.cleanup:
1140 # expect this format:
1141 # 1 UP ldlm ldlm ldlm_UUID 2
1142 out = lctl.device_list()
1144 if name == string.split(s)[3]:
1146 except CommandError, e:
1150 def net_is_prepared():
1151 """If the any device exists, then assume that all networking
1152 has been configured"""
1153 out = lctl.device_list()
1156 def fs_is_mounted(path):
1157 """Return true if path is a mounted lustre filesystem"""
1159 fp = open('/proc/mounts')
1160 lines = fp.readlines()
1164 if a[1] == path and a[2] == 'lustre_lite':
1170 def kmod_find(src_dir, dev_dir, modname):
1171 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1172 for modext in '.ko', '.o':
1173 module = modbase + modext
1175 if os.access(module, os.R_OK):
1181 def kmod_info(modname):
1182 """Returns reference count for passed module name."""
1184 fp = open('/proc/modules')
1185 lines = fp.readlines()
1188 # please forgive my tired fingers for this one
1189 ret = filter(lambda word, mod = modname: word[0] == mod,
1190 map(lambda line: string.split(line), lines))
1194 except Exception, e:
1198 """Presents kernel module"""
1199 def __init__(self, src_dir, dev_dir, name):
1200 self.src_dir = src_dir
1201 self.dev_dir = dev_dir
1204 # FIXME we ignore the failure of loading gss module, because we might
1205 # don't need it at all.
1208 log ('loading module:', self.name, 'srcdir',
1209 self.src_dir, 'devdir', self.dev_dir)
1211 module = kmod_find(self.src_dir, self.dev_dir,
1213 if not module and self.name != 'ptlrpcs_gss':
1214 panic('module not found:', self.name)
1215 (rc, out) = run('/sbin/insmod', module)
1217 if self.name == 'ptlrpcs_gss':
1218 print "Warning: not support gss security!"
1220 raise CommandError('insmod', out, rc)
1222 (rc, out) = run('/sbin/modprobe', self.name)
1224 if self.name == 'ptlrpcs_gss':
1225 print "Warning: not support gss security!"
1227 raise CommandError('modprobe', out, rc)
1231 log('unloading module:', self.name)
1232 (rc, out) = run('/sbin/rmmod', self.name)
1234 log('unable to unload module:', self.name +
1235 "(" + self.refcount() + ")")
1239 """Returns module info if any."""
1240 return kmod_info(self.name)
1243 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1250 """Returns module refcount."""
1257 """Returns 1 if module is used, otherwise 0 is returned."""
1263 if users and users != '(unused)' and users != '-':
1271 """Returns 1 if module is busy, otherwise 0 is returned."""
1272 if self.loaded() and (self.used() or self.refcount() != '0'):
1278 """Manage kernel modules"""
1279 def __init__(self, lustre_dir, portals_dir):
1280 self.lustre_dir = lustre_dir
1281 self.portals_dir = portals_dir
1282 self.kmodule_list = []
1284 def find_module(self, modname):
1285 """Find module by module name"""
1286 for mod in self.kmodule_list:
1287 if mod.name == modname:
1291 def add_portals_module(self, dev_dir, modname):
1292 """Append a module to list of modules to load."""
1294 mod = self.find_module(modname)
1296 mod = kmod(self.portals_dir, dev_dir, modname)
1297 self.kmodule_list.append(mod)
1299 def add_lustre_module(self, dev_dir, modname):
1300 """Append a module to list of modules to load."""
1302 mod = self.find_module(modname)
1304 mod = kmod(self.lustre_dir, dev_dir, modname)
1305 self.kmodule_list.append(mod)
1307 def load_modules(self):
1308 """Load all the modules in the list in the order they appear."""
1309 for mod in self.kmodule_list:
1310 if mod.loaded() and not config.noexec:
1314 def cleanup_modules(self):
1315 """Unload the modules in the list in reverse order."""
1316 rev = self.kmodule_list
1319 if (not mod.loaded() or mod.busy()) and not config.noexec:
1322 if mod.name == 'portals' and config.dump:
1323 lctl.dump(config.dump)
1326 # ============================================================
1327 # Classes to prepare and cleanup the various objects
1330 """ Base class for the rest of the modules. The default cleanup method is
1331 defined here, as well as some utilitiy funcs.
1333 def __init__(self, module_name, db):
1335 self.module_name = module_name
1336 self.name = self.db.getName()
1337 self.uuid = self.db.getUUID()
1341 def info(self, *args):
1342 msg = string.join(map(str,args))
1343 print self.module_name + ":", self.name, self.uuid, msg
1346 """ default cleanup, used for most modules """
1349 lctl.cleanup(self.name, self.uuid, config.force)
1350 except CommandError, e:
1351 log(self.module_name, "cleanup failed: ", self.name)
1355 def add_module(self, manager):
1356 """Adds all needed modules in the order they appear."""
1359 def safe_to_clean(self):
1362 def safe_to_clean_modules(self):
1363 return self.safe_to_clean()
1365 class Network(Module):
1366 def __init__(self,db):
1367 Module.__init__(self, 'NETWORK', db)
1368 self.net_type = self.db.get_val('nettype')
1369 self.nid = self.db.get_val('nid', '*')
1370 self.cluster_id = self.db.get_val('clusterid', "0")
1371 self.port = self.db.get_val_int('port', 0)
1374 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1376 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1377 self.generic_nid = 1
1378 debug("nid:", self.nid)
1380 self.generic_nid = 0
1382 self.nid_uuid = self.nid_to_uuid(self.nid)
1383 self.hostaddr = self.db.get_hostaddr()
1384 if len(self.hostaddr) == 0:
1385 self.hostaddr.append(self.nid)
1386 if '*' in self.hostaddr[0]:
1387 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1388 if not self.hostaddr[0]:
1389 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1390 debug("hostaddr:", self.hostaddr[0])
1392 def add_module(self, manager):
1393 manager.add_portals_module("libcfs", 'libcfs')
1394 manager.add_portals_module("portals", 'portals')
1396 if node_needs_router():
1397 manager.add_portals_module("router", 'kptlrouter')
1398 if self.net_type == 'tcp':
1399 manager.add_portals_module("knals/socknal", 'ksocknal')
1400 if self.net_type == 'elan':
1401 manager.add_portals_module("knals/qswnal", 'kqswnal')
1402 if self.net_type == 'gm':
1403 manager.add_portals_module("knals/gmnal", 'kgmnal')
1404 if self.net_type == 'openib':
1405 manager.add_portals_module("knals/openibnal", 'kopenibnal')
1406 if self.net_type == 'iib':
1407 manager.add_portals_module("knals/iibnal", 'kiibnal')
1408 if self.net_type == 'vib':
1409 self.add_portals_module("knals/vibnal", 'kvibnal')
1410 if self.net_type == 'lo':
1411 manager.add_portals_module("knals/lonal", 'klonal')
1412 if self.net_type == 'ra':
1413 manager.add_portals_module("knals/ranal", 'kranal')
1415 def nid_to_uuid(self, nid):
1416 return "NID_%s_UUID" %(nid,)
1419 if not config.record and net_is_prepared():
1421 self.info(self.net_type, self.nid, self.port)
1422 if not (config.record and self.generic_nid):
1423 lctl.network(self.net_type, self.nid)
1424 if self.net_type == 'tcp':
1426 for hostaddr in self.db.get_hostaddr():
1427 ip = string.split(hostaddr, '/')[0]
1428 if len(string.split(hostaddr, '/')) == 2:
1429 netmask = string.split(hostaddr, '/')[1]
1432 lctl.add_interface(self.net_type, ip, netmask)
1433 if self.net_type == 'elan':
1435 if self.port and node_is_router():
1436 run_one_acceptor(self.port)
1437 self.connect_peer_gateways()
1439 def connect_peer_gateways(self):
1440 for router in self.db.lookup_class('node'):
1441 if router.get_val_int('router', 0):
1442 for netuuid in router.get_networks():
1443 net = self.db.lookup(netuuid)
1445 if (gw.cluster_id == self.cluster_id and
1446 gw.net_type == self.net_type):
1447 if gw.nid != self.nid:
1450 def disconnect_peer_gateways(self):
1451 for router in self.db.lookup_class('node'):
1452 if router.get_val_int('router', 0):
1453 for netuuid in router.get_networks():
1454 net = self.db.lookup(netuuid)
1456 if (gw.cluster_id == self.cluster_id and
1457 gw.net_type == self.net_type):
1458 if gw.nid != self.nid:
1461 except CommandError, e:
1462 print "disconnect failed: ", self.name
1466 def safe_to_clean(self):
1467 return not net_is_prepared()
1470 self.info(self.net_type, self.nid, self.port)
1472 stop_acceptor(self.port)
1473 if node_is_router():
1474 self.disconnect_peer_gateways()
1475 if self.net_type == 'tcp':
1476 for hostaddr in self.db.get_hostaddr():
1477 ip = string.split(hostaddr, '/')[0]
1478 lctl.del_interface(self.net_type, ip)
1480 def correct_level(self, level, op=None):
1483 class RouteTable(Module):
1484 def __init__(self,db):
1485 Module.__init__(self, 'ROUTES', db)
1487 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1489 # only setup connections for tcp, openib, and iib NALs
1491 if not net_type in ('tcp','openib','iib','vib','ra'):
1494 # connect to target if route is to single node and this node is the gw
1495 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1496 if not local_cluster(net_type, tgt_cluster_id):
1497 panic("target", lo, " not on the local cluster")
1498 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1499 # connect to gateway if this node is not the gw
1500 elif (local_cluster(net_type, gw_cluster_id)
1501 and not local_interface(net_type, gw_cluster_id, gw)):
1502 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1507 panic("no server for nid", lo)
1510 return Network(srvdb)
1513 if not config.record and net_is_prepared():
1516 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1517 lctl.add_route(net_type, gw, lo, hi)
1518 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1522 def safe_to_clean(self):
1523 return not net_is_prepared()
1526 if net_is_prepared():
1527 # the network is still being used, don't clean it up
1529 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1530 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1533 lctl.disconnect(srv)
1534 except CommandError, e:
1535 print "disconnect failed: ", self.name
1540 lctl.del_route(net_type, gw, lo, hi)
1541 except CommandError, e:
1542 print "del_route failed: ", self.name
1546 class Management(Module):
1547 def __init__(self, db):
1548 Module.__init__(self, 'MGMT', db)
1550 def add_module(self, manager):
1551 manager.add_lustre_module('lvfs', 'lvfs')
1552 manager.add_lustre_module('obdclass', 'obdclass')
1553 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1554 manager.add_lustre_module('mgmt', 'mgmt_svc')
1557 if not config.record and is_prepared(self.name):
1560 lctl.newdev("mgmt", self.name, self.uuid)
1562 def safe_to_clean(self):
1566 if is_prepared(self.name):
1567 Module.cleanup(self)
1569 def correct_level(self, level, op=None):
1572 # This is only needed to load the modules; the LDLM device
1573 # is now created automatically.
1575 def __init__(self,db):
1576 Module.__init__(self, 'LDLM', db)
1578 def add_module(self, manager):
1579 manager.add_lustre_module('lvfs', 'lvfs')
1580 manager.add_lustre_module('obdclass', 'obdclass')
1581 manager.add_lustre_module('sec', 'ptlrpcs')
1582 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1583 manager.add_lustre_module('sec/gss', 'ptlrpcs_gss')
1591 def correct_level(self, level, op=None):
1595 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1596 Module.__init__(self, 'LOV', db)
1597 if name_override != None:
1598 self.name = "lov_%s" % name_override
1599 self.mds_uuid = self.db.get_first_ref('mds')
1600 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1601 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1602 self.pattern = self.db.get_val_int('stripepattern', 0)
1603 self.devlist = self.db.get_lov_tgts('lov_tgt')
1604 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1607 self.desc_uuid = self.uuid
1608 self.uuid = generate_client_uuid(self.name)
1609 self.fs_name = fs_name
1611 self.config_only = 1
1613 self.config_only = None
1614 mds = self.db.lookup(self.mds_uuid)
1615 self.mds_name = mds.getName()
1616 for (obd_uuid, index, gen, active) in self.devlist:
1619 self.obdlist.append(obd_uuid)
1620 obd = self.db.lookup(obd_uuid)
1621 osc = get_osc(obd, self.uuid, fs_name)
1623 self.osclist.append((osc, index, gen, active))
1625 panic('osc not found:', obd_uuid)
1631 if not config.record and is_prepared(self.name):
1633 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1634 self.stripe_off, self.pattern, self.devlist,
1636 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1637 self.stripe_sz, self.stripe_off, self.pattern,
1638 string.join(self.obdlist))
1639 for (osc, index, gen, active) in self.osclist:
1640 target_uuid = osc.target_uuid
1642 # Only ignore connect failures with --force, which
1643 # isn't implemented here yet.
1645 osc.prepare(ignore_connect_failure=0)
1646 except CommandError, e:
1647 print "Error preparing OSC %s\n" % osc.uuid
1649 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1652 for (osc, index, gen, active) in self.osclist:
1653 target_uuid = osc.target_uuid
1655 if is_prepared(self.name):
1656 Module.cleanup(self)
1657 if self.config_only:
1658 panic("Can't clean up config_only LOV ", self.name)
1660 def add_module(self, manager):
1661 if self.config_only:
1662 panic("Can't load modules for config_only LOV ", self.name)
1663 for (osc, index, gen, active) in self.osclist:
1664 osc.add_module(manager)
1666 manager.add_lustre_module('lov', 'lov')
1668 def correct_level(self, level, op=None):
1672 def __init__(self, db, uuid, fs_name, name_override = None):
1673 Module.__init__(self, 'LMV', db)
1674 if name_override != None:
1675 self.name = "lmv_%s" % name_override
1677 self.devlist = self.db.get_lmv_tgts('lmv_tgt')
1678 if self.devlist == None:
1679 self.devlist = self.db.get_refs('mds')
1682 self.desc_uuid = self.uuid
1684 self.fs_name = fs_name
1685 for mds_uuid in self.devlist:
1686 mds = self.db.lookup(mds_uuid)
1688 panic("MDS not found!")
1689 mdc = MDC(mds, self.uuid, fs_name)
1691 self.mdclist.append(mdc)
1693 panic('mdc not found:', mds_uuid)
1696 if is_prepared(self.name):
1700 for mdc in self.mdclist:
1702 # Only ignore connect failures with --force, which
1703 # isn't implemented here yet.
1704 mdc.prepare(ignore_connect_failure=0)
1705 except CommandError, e:
1706 print "Error preparing LMV %s\n" % mdc.uuid
1709 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1710 string.join(self.devlist))
1713 for mdc in self.mdclist:
1715 if is_prepared(self.name):
1716 Module.cleanup(self)
1718 def add_module(self, manager):
1719 for mdc in self.mdclist:
1720 mdc.add_module(manager)
1722 manager.add_lustre_module('lmv', 'lmv')
1724 def correct_level(self, level, op=None):
1727 class CONFDEV(Module):
1728 def __init__(self, db, name, target_uuid, uuid):
1729 Module.__init__(self, 'CONFDEV', db)
1730 self.devpath = self.db.get_val('devpath','')
1731 self.backdevpath = self.db.get_val('devpath','')
1732 self.size = self.db.get_val_int('devsize', 0)
1733 self.journal_size = self.db.get_val_int('journalsize', 0)
1734 self.fstype = self.db.get_val('fstype', '')
1735 self.backfstype = self.db.get_val('backfstype', '')
1736 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1737 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1738 self.target = self.db.lookup(target_uuid)
1739 self.name = "conf_%s" % self.target.getName()
1740 self.client_uuids = self.target.get_refs('client')
1741 self.obdtype = self.db.get_val('obdtype', '')
1743 self.mds_sec = self.db.get_val('mds_sec', '')
1744 self.oss_sec = self.db.get_val('oss_sec', '')
1745 self.deny_sec = self.db.get_val('deny_sec', '')
1747 if config.mds_mds_sec:
1748 self.mds_sec = config.mds_mds_sec
1749 if config.mds_oss_sec:
1750 self.oss_sec = config.mds_oss_sec
1751 if config.mds_deny_sec:
1753 self.deny_sec = "%s,%s" %(self.deny_sec, config.mds_deny_sec)
1755 self.deny_sec = config.mds_deny_sec
1757 if self.obdtype == None:
1758 self.obdtype = 'dumb'
1760 self.conf_name = name
1761 self.conf_uuid = uuid
1762 self.realdev = self.devpath
1767 lmv_uuid = self.db.get_first_ref('lmv')
1768 if lmv_uuid != None:
1769 self.lmv = self.db.lookup(lmv_uuid)
1770 if self.lmv != None:
1771 self.client_uuids = self.lmv.get_refs('client')
1773 if self.target.get_class() == 'mds':
1774 if self.target.get_val('failover', 0):
1775 self.failover_mds = 'f'
1777 self.failover_mds = 'n'
1778 self.format = self.db.get_val('autoformat', "no")
1780 self.format = self.db.get_val('autoformat', "yes")
1781 self.osdtype = self.db.get_val('osdtype')
1782 ost = self.db.lookup(target_uuid)
1783 if ost.get_val('failover', 0):
1784 self.failover_ost = 'f'
1786 self.failover_ost = 'n'
1788 self.inode_size = self.get_inode_size()
1790 if self.lmv != None:
1791 client_uuid = self.name + "_lmv_UUID"
1792 self.master = LMV(self.lmv, client_uuid,
1793 self.conf_name, self.conf_name)
1795 def get_inode_size(self):
1796 inode_size = self.db.get_val_int('inodesize', 0)
1797 if inode_size == 0 and self.target.get_class() == 'mds':
1799 # default inode size for case when neither LOV either
1800 # LMV is accessible.
1801 self.inode_size = 256
1803 # find the LOV for this MDS
1804 lovconfig_uuid = self.target.get_first_ref('lovconfig')
1805 if lovconfig_uuid or self.lmv != None:
1806 if self.lmv != None:
1807 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1808 lovconfig = self.lmv.lookup(lovconfig_uuid)
1809 lov_uuid = lovconfig.get_first_ref('lov')
1810 if lov_uuid == None:
1811 panic(self.target.getName() + ": No LOV found for lovconfig ",
1814 lovconfig = self.target.lookup(lovconfig_uuid)
1815 lov_uuid = lovconfig.get_first_ref('lov')
1816 if lov_uuid == None:
1817 panic(self.target.getName() + ": No LOV found for lovconfig ",
1819 if self.lmv != None:
1820 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1821 lovconfig = self.lmv.lookup(lovconfig_uuid)
1822 lov_uuid = lovconfig.get_first_ref('lov')
1824 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, self.name,
1827 # default stripe count controls default inode_size
1828 if lov.stripe_cnt > 0:
1829 stripe_count = lov.stripe_cnt
1831 stripe_count = len(lov.devlist)
1832 if stripe_count > 77:
1834 elif stripe_count > 35:
1836 elif stripe_count > 13:
1838 elif stripe_count > 3:
1845 def get_mount_options(self, blkdev):
1846 options = def_mount_options(self.fstype,
1847 self.target.get_class())
1849 if config.mountfsoptions:
1851 options = "%s,%s" %(options, config.mountfsoptions)
1853 options = config.mountfsoptions
1854 if self.mountfsoptions:
1855 options = "%s,%s" %(options, self.mountfsoptions)
1857 if self.mountfsoptions:
1859 options = "%s,%s" %(options, self.mountfsoptions)
1861 options = self.mountfsoptions
1863 if self.fstype == 'smfs':
1865 options = "%s,type=%s,dev=%s" %(options, self.backfstype,
1868 options = "type=%s,dev=%s" %(self.backfstype,
1871 if self.target.get_class() == 'mds':
1873 options = "%s,acl,user_xattr,iopen_nopriv" %(options)
1875 options = "iopen_nopriv"
1880 if is_prepared(self.name):
1883 blkdev = block_dev(self.devpath, self.size, self.fstype,
1884 config.reformat, self.format, self.journal_size,
1885 self.inode_size, self.mkfsoptions, self.backfstype,
1888 if self.fstype == 'smfs':
1893 mountfsoptions = self.get_mount_options(blkdev)
1895 self.info(self.target.get_class(), realdev, mountfsoptions,
1896 self.fstype, self.size, self.format)
1898 lctl.newdev("confobd", self.name, self.uuid,
1899 setup ="%s %s %s" %(realdev, self.fstype,
1902 self.mountfsoptions = mountfsoptions
1903 self.realdev = realdev
1905 def add_module(self, manager):
1906 manager.add_lustre_module('obdclass', 'confobd')
1908 def write_conf(self):
1909 if self.target.get_class() == 'ost':
1911 lctl.clear_log(self.name, self.target.getName() + '-conf')
1912 lctl.record(self.name, self.target.getName() + '-conf')
1913 lctl.newdev(self.osdtype, self.conf_name, self.conf_uuid,
1914 setup ="%s %s %s %s" %(self.realdev, self.fstype,
1916 self.mountfsoptions))
1918 lctl.clear_log(self.name, 'OSS-conf')
1919 lctl.record(self.name, 'OSS-conf')
1920 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
1925 if self.target.get_class() == 'mds':
1926 if self.master != None:
1927 master_name = self.master.name
1929 master_name = 'dumb'
1932 lctl.clear_log(self.name, self.target.getName() + '-conf')
1933 lctl.record(self.name, self.target.getName() + '-conf')
1934 lctl.attach("mds", self.conf_name, self.conf_uuid)
1936 lctl.set_security(self.conf_name, "mds_sec", self.mds_sec)
1938 lctl.set_security(self.conf_name, "oss_sec", self.oss_sec)
1940 for flavor in string.split(self.deny_sec, ','):
1941 lctl.set_security(self.conf_name, "deny_sec", flavor)
1942 lctl.newdev("mds", self.conf_name, self.conf_uuid,
1943 setup ="%s %s %s %s %s %s" %(self.realdev, self.fstype,
1944 self.conf_name, self.mountfsoptions,
1945 master_name, self.obdtype))
1949 if not self.client_uuids:
1952 for uuid in self.client_uuids:
1953 log("recording client:", uuid)
1954 client_uuid = generate_client_uuid(self.name)
1955 client = VOSC(self.db.lookup(uuid), client_uuid,
1956 self.target.getName(), self.name)
1958 lctl.clear_log(self.name, self.target.getName())
1959 lctl.record(self.name, self.target.getName())
1961 lctl.mount_option(self.target.getName(), client.get_name(), "")
1965 lctl.clear_log(self.name, self.target.getName() + '-clean')
1966 lctl.record(self.name, self.target.getName() + '-clean')
1968 lctl.del_mount_option(self.target.getName())
1976 # record logs for each client
1978 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
1980 config_options = CONFIG_FILE
1982 for node_db in self.db.lookup_class('node'):
1983 client_name = node_db.getName()
1984 for prof_uuid in node_db.get_refs('profile'):
1985 prof_db = node_db.lookup(prof_uuid)
1986 # refactor this into a funtion to test "clientness"
1988 for ref_class, ref_uuid in prof_db.get_all_refs():
1989 if ref_class in ('mountpoint','echoclient'):
1990 debug("recording", client_name)
1991 old_noexec = config.noexec
1993 noexec_opt = ('', '-n')
1994 ret, out = run (sys.argv[0],
1995 noexec_opt[old_noexec == 1],
1996 " -v --record --nomod",
1997 "--record_log", client_name,
1998 "--record_device", self.name,
1999 "--node", client_name,
2002 for s in out: log("record> ", string.strip(s))
2003 ret, out = run (sys.argv[0],
2004 noexec_opt[old_noexec == 1],
2005 "--cleanup -v --record --nomod",
2006 "--record_log", client_name + "-clean",
2007 "--record_device", self.name,
2008 "--node", client_name,
2011 for s in out: log("record> ", string.strip(s))
2012 config.noexec = old_noexec
2016 lctl.start(self.name, self.conf_name)
2017 except CommandError, e:
2019 if self.target.get_class() == 'ost':
2020 if not is_prepared('OSS'):
2022 lctl.start(self.name, 'OSS')
2023 except CommandError, e:
2027 if is_prepared(self.name):
2029 lctl.cleanup(self.name, self.uuid, 0, 0)
2030 clean_dev(self.devpath, self.fstype,
2031 self.backfstype, self.backdevpath)
2032 except CommandError, e:
2033 log(self.module_name, "cleanup failed: ", self.name)
2036 Module.cleanup(self)
2038 class MDSDEV(Module):
2039 def __init__(self,db):
2040 Module.__init__(self, 'MDSDEV', db)
2041 self.devpath = self.db.get_val('devpath','')
2042 self.backdevpath = self.db.get_val('devpath','')
2043 self.size = self.db.get_val_int('devsize', 0)
2044 self.journal_size = self.db.get_val_int('journalsize', 0)
2045 self.fstype = self.db.get_val('fstype', '')
2046 self.backfstype = self.db.get_val('backfstype', '')
2047 self.nspath = self.db.get_val('nspath', '')
2048 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2049 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2050 self.obdtype = self.db.get_val('obdtype', '')
2051 self.root_squash = self.db.get_val('root_squash', '')
2052 self.no_root_squash = self.db.get_val('no_root_squash', '')
2054 target_uuid = self.db.get_first_ref('target')
2055 self.target = self.db.lookup(target_uuid)
2056 self.name = self.target.getName()
2060 lmv_uuid = self.db.get_first_ref('lmv')
2061 if lmv_uuid != None:
2062 self.lmv = self.db.lookup(lmv_uuid)
2064 active_uuid = get_active_target(self.target)
2066 panic("No target device found:", target_uuid)
2067 if active_uuid == self.uuid:
2069 group = self.target.get_val('group')
2070 if config.group and config.group != group:
2075 self.uuid = target_uuid
2078 if self.lmv != None:
2079 client_uuid = self.name + "_lmv_UUID"
2080 self.master = LMV(self.lmv, client_uuid,
2081 self.name, self.name)
2083 self.confobd = CONFDEV(self.db, self.name,
2084 target_uuid, self.uuid)
2086 def add_module(self, manager):
2088 manager.add_lustre_module('mdc', 'mdc')
2089 manager.add_lustre_module('osc', 'osc')
2090 manager.add_lustre_module('ost', 'ost')
2091 manager.add_lustre_module('lov', 'lov')
2092 manager.add_lustre_module('mds', 'mds')
2094 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2095 manager.add_lustre_module(self.fstype, self.fstype)
2098 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
2100 # if fstype is smfs, then we should also take care about backing
2102 if self.fstype == 'smfs':
2103 manager.add_lustre_module(self.backfstype, self.backfstype)
2104 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
2106 for option in string.split(self.mountfsoptions, ','):
2107 if option == 'snap':
2108 if not self.fstype == 'smfs':
2109 panic("mountoptions has 'snap', but fstype is not smfs.")
2110 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2111 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2114 if self.master != None:
2115 self.master.add_module(manager)
2117 # add CONFOBD modules
2118 if self.confobd != None:
2119 self.confobd.add_module(manager)
2121 def write_conf(self):
2122 if is_prepared(self.name):
2125 debug(self.uuid, "not active")
2128 self.confobd.prepare()
2129 self.confobd.write_conf()
2130 self.confobd.cleanup()
2133 if is_prepared(self.name):
2136 debug(self.uuid, "not active")
2140 self.confobd.prepare()
2142 self.confobd.write_conf()
2145 if self.master != None:
2146 self.master.prepare()
2148 if not config.record:
2149 self.confobd.start()
2151 if not is_prepared('MDT'):
2152 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
2154 if development_mode():
2155 procentry = "/proc/fs/lustre/mds/lsd_upcall"
2156 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
2157 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
2158 print "MDS Warning: failed to set lsd cache upcall"
2160 run("echo ", upcall, " > ", procentry)
2162 if config.root_squash == None:
2163 config.root_squash = self.root_squash
2164 if config.no_root_squash == None:
2165 config.no_root_squash = self.no_root_squash
2166 if config.root_squash:
2167 if config.no_root_squash:
2168 nsnid = config.no_root_squash
2171 lctl.root_squash(self.name, config.root_squash, nsnid)
2173 def msd_remaining(self):
2174 out = lctl.device_list()
2176 if string.split(s)[2] in ('mds',):
2179 def safe_to_clean(self):
2182 def safe_to_clean_modules(self):
2183 return not self.msd_remaining()
2187 debug(self.uuid, "not active")
2190 if is_prepared(self.name):
2192 lctl.cleanup(self.name, self.uuid, config.force,
2194 except CommandError, e:
2195 log(self.module_name, "cleanup failed: ", self.name)
2198 Module.cleanup(self)
2200 if self.master != None:
2201 self.master.cleanup()
2202 if not self.msd_remaining() and is_prepared('MDT'):
2204 lctl.cleanup("MDT", "MDT_UUID", config.force,
2206 except CommandError, e:
2207 print "cleanup failed: ", self.name
2212 self.confobd.cleanup()
2214 def correct_level(self, level, op=None):
2215 #if self.master != None:
2220 def __init__(self, db):
2221 Module.__init__(self, 'OSD', db)
2222 self.osdtype = self.db.get_val('osdtype')
2223 self.devpath = self.db.get_val('devpath', '')
2224 self.backdevpath = self.db.get_val('devpath', '')
2225 self.size = self.db.get_val_int('devsize', 0)
2226 self.journal_size = self.db.get_val_int('journalsize', 0)
2227 self.inode_size = self.db.get_val_int('inodesize', 0)
2228 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2229 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2230 self.fstype = self.db.get_val('fstype', '')
2231 self.backfstype = self.db.get_val('backfstype', '')
2232 self.nspath = self.db.get_val('nspath', '')
2233 target_uuid = self.db.get_first_ref('target')
2234 ost = self.db.lookup(target_uuid)
2235 self.name = ost.getName()
2236 self.format = self.db.get_val('autoformat', 'yes')
2237 if ost.get_val('failover', 0):
2238 self.failover_ost = 'f'
2240 self.failover_ost = 'n'
2242 self.deny_sec = self.db.get_val('deny_sec', '')
2244 if config.ost_deny_sec:
2246 self.deny_sec = "%s,%s" %(self.deny_sec, config.ost_deny_sec)
2248 self.deny_sec = config.ost_deny_sec
2250 active_uuid = get_active_target(ost)
2252 panic("No target device found:", target_uuid)
2253 if active_uuid == self.uuid:
2255 group = ost.get_val('group')
2256 if config.group and config.group != group:
2261 self.uuid = target_uuid
2262 self.confobd = CONFDEV(self.db, self.name,
2263 target_uuid, self.uuid)
2265 def add_module(self, manager):
2268 manager.add_lustre_module('ost', 'ost')
2270 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2271 manager.add_lustre_module(self.fstype, self.fstype)
2274 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2276 if self.fstype == 'smfs':
2277 manager.add_lustre_module(self.backfstype, self.backfstype)
2278 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2280 for option in self.mountfsoptions:
2281 if option == 'snap':
2282 if not self.fstype == 'smfs':
2283 panic("mountoptions with snap, but fstype is not smfs\n")
2284 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2285 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2287 manager.add_lustre_module(self.osdtype, self.osdtype)
2289 # add CONFOBD modules
2290 if self.confobd != None:
2291 self.confobd.add_module(manager)
2294 if is_prepared(self.name):
2297 debug(self.uuid, "not active")
2302 if self.osdtype == 'obdecho':
2303 self.info(self.osdtype)
2304 lctl.newdev("obdecho", self.name, self.uuid)
2305 if not is_prepared('OSS'):
2306 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup="")
2308 self.confobd.prepare()
2310 self.confobd.write_conf()
2311 if not config.record:
2312 self.confobd.start()
2315 for flavor in string.split(self.deny_sec, ','):
2316 lctl.set_security(self.name, "deny_sec", flavor)
2318 def write_conf(self):
2319 if is_prepared(self.name):
2322 debug(self.uuid, "not active")
2326 if self.osdtype != 'obdecho':
2327 self.confobd.prepare()
2328 self.confobd.write_conf()
2329 if not config.write_conf:
2330 self.confobd.start()
2331 self.confobd.cleanup()
2333 def osd_remaining(self):
2334 out = lctl.device_list()
2336 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2339 def safe_to_clean(self):
2342 def safe_to_clean_modules(self):
2343 return not self.osd_remaining()
2347 debug(self.uuid, "not active")
2350 if is_prepared(self.name):
2353 lctl.cleanup(self.name, self.uuid, config.force,
2355 except CommandError, e:
2356 log(self.module_name, "cleanup failed: ", self.name)
2359 if not self.osd_remaining() and is_prepared('OSS'):
2361 lctl.cleanup("OSS", "OSS_UUID", config.force,
2363 except CommandError, e:
2364 print "cleanup failed: ", self.name
2368 if self.osdtype != 'obdecho':
2370 self.confobd.cleanup()
2372 def correct_level(self, level, op=None):
2375 # Generic client module, used by OSC and MDC
2376 class Client(Module):
2377 def __init__(self, tgtdb, uuid, module, fs_name,
2378 self_name=None, module_dir=None):
2379 self.target_name = tgtdb.getName()
2380 self.target_uuid = tgtdb.getUUID()
2381 self.module_dir = module_dir
2382 self.backup_targets = []
2383 self.module = module
2386 self.tgt_dev_uuid = get_active_target(tgtdb)
2387 if not self.tgt_dev_uuid:
2388 panic("No target device found for target(1):", self.target_name)
2393 self.module = module
2394 self.module_name = string.upper(module)
2396 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2397 self.target_name, fs_name)
2399 self.name = self_name
2401 self.lookup_server(self.tgt_dev_uuid)
2402 self.lookup_backup_targets()
2403 self.fs_name = fs_name
2404 if not self.module_dir:
2405 self.module_dir = module
2407 def add_module(self, manager):
2408 manager.add_lustre_module(self.module_dir, self.module)
2410 def lookup_server(self, srv_uuid):
2411 """ Lookup a server's network information """
2412 self._server_nets = get_ost_net(self.db, srv_uuid)
2413 if len(self._server_nets) == 0:
2414 panic ("Unable to find a server for:", srv_uuid)
2419 def get_servers(self):
2420 return self._server_nets
2422 def lookup_backup_targets(self):
2423 """ Lookup alternative network information """
2424 prof_list = toplustreDB.get_refs('profile')
2425 for prof_uuid in prof_list:
2426 prof_db = toplustreDB.lookup(prof_uuid)
2428 panic("profile:", prof_uuid, "not found.")
2429 for ref_class, ref_uuid in prof_db.get_all_refs():
2430 if ref_class in ('osd', 'mdsdev'):
2431 devdb = toplustreDB.lookup(ref_uuid)
2432 uuid = devdb.get_first_ref('target')
2433 if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
2434 self.backup_targets.append(ref_uuid)
2436 def prepare(self, ignore_connect_failure = 0):
2437 self.info(self.target_uuid)
2438 if not config.record and is_prepared(self.name):
2441 srv = choose_local_server(self.get_servers())
2445 routes = find_route(self.get_servers())
2446 if len(routes) == 0:
2447 panic ("no route to", self.target_uuid)
2448 for (srv, r) in routes:
2449 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2450 except CommandError, e:
2451 if not ignore_connect_failure:
2455 if self.target_uuid in config.inactive and self.permits_inactive():
2456 debug("%s inactive" % self.target_uuid)
2457 inactive_p = "inactive"
2459 debug("%s active" % self.target_uuid)
2461 lctl.newdev(self.module, self.name, self.uuid,
2462 setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
2464 for tgt_dev_uuid in self.backup_targets:
2465 this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
2466 if len(this_nets) == 0:
2467 panic ("Unable to find a server for:", tgt_dev_uuid)
2468 srv = choose_local_server(this_nets)
2472 routes = find_route(this_nets);
2473 if len(routes) == 0:
2474 panic("no route to", tgt_dev_uuid)
2475 for (srv, r) in routes:
2476 lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2478 lctl.add_conn(self.name, srv.nid_uuid);
2481 if is_prepared(self.name):
2482 Module.cleanup(self)
2484 srv = choose_local_server(self.get_servers())
2486 lctl.disconnect(srv)
2488 for (srv, r) in find_route(self.get_servers()):
2489 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2490 except CommandError, e:
2491 log(self.module_name, "cleanup failed: ", self.name)
2495 for tgt_dev_uuid in self.backup_targets:
2496 this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
2497 srv = choose_local_server(this_net)
2499 lctl.disconnect(srv)
2501 for (srv, r) in find_route(this_net):
2502 lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2504 def correct_level(self, level, op=None):
2507 def deactivate(self):
2509 lctl.deactivate(self.name)
2510 except CommandError, e:
2511 log(self.module_name, "deactivate failed: ", self.name)
2516 def __init__(self, db, uuid, fs_name):
2517 Client.__init__(self, db, uuid, 'mdc', fs_name)
2519 def permits_inactive(self):
2523 def __init__(self, db, uuid, fs_name):
2524 Client.__init__(self, db, uuid, 'osc', fs_name)
2526 def permits_inactive(self):
2529 class CMOBD(Module):
2530 def __init__(self, db):
2531 Module.__init__(self, 'CMOBD', db)
2532 self.name = self.db.getName();
2533 self.uuid = generate_client_uuid(self.name)
2534 self.master_uuid = self.db.get_first_ref('masterobd')
2535 self.cache_uuid = self.db.get_first_ref('cacheobd')
2537 master_obd = self.db.lookup(self.master_uuid)
2539 panic('master obd not found:', self.master_uuid)
2541 cache_obd = self.db.lookup(self.cache_uuid)
2543 panic('cache obd not found:', self.cache_uuid)
2548 master_class = master_obd.get_class()
2549 cache_class = cache_obd.get_class()
2551 if master_class == 'ost' or master_class == 'lov':
2552 client_uuid = "%s_lov_master_UUID" % (self.name)
2553 self.master = LOV(master_obd, client_uuid, self.name);
2554 elif master_class == 'mds':
2555 self.master = get_mdc(db, self.name, self.master_uuid)
2556 elif master_class == 'lmv':
2557 #tmp fix: cobd and cmobd will use same uuid, so use const name here
2558 client_uuid = "%s_lmv_master_UUID" % "master"
2559 self.master = LMV(master_obd, client_uuid, self.name);
2561 panic("unknown master obd class '%s'" %(master_class))
2563 if cache_class == 'ost' or cache_class == 'lov':
2564 client_uuid = "%s_lov_cache_UUID" % (self.name)
2565 self.cache = LOV(cache_obd, client_uuid, self.name);
2566 elif cache_class == 'mds':
2567 self.cache = get_mdc(db, self.name, self.cache_uuid)
2568 elif cache_class == 'lmv':
2569 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2570 self.cache = LMV(cache_obd, client_uuid, self.name);
2572 panic("unknown cache obd class '%s'" %(cache_class))
2575 self.master.prepare()
2576 if not config.record and is_prepared(self.name):
2578 self.info(self.master_uuid, self.cache_uuid)
2579 lctl.newdev("cmobd", self.name, self.uuid,
2580 setup ="%s %s" %(self.master.uuid,
2589 def get_master_name(self):
2590 return self.master.name
2592 def get_cache_name(self):
2593 return self.cache.name
2596 if is_prepared(self.name):
2597 Module.cleanup(self)
2599 self.master.cleanup()
2601 def add_module(self, manager):
2602 manager.add_lustre_module('cmobd', 'cmobd')
2603 self.master.add_module(manager)
2605 def correct_level(self, level, op=None):
2609 def __init__(self, db, uuid, name):
2610 Module.__init__(self, 'COBD', db)
2611 self.name = self.db.getName();
2612 self.uuid = generate_client_uuid(self.name)
2613 self.master_uuid = self.db.get_first_ref('masterobd')
2614 self.cache_uuid = self.db.get_first_ref('cacheobd')
2616 master_obd = self.db.lookup(self.master_uuid)
2618 panic('master obd not found:', self.master_uuid)
2620 cache_obd = self.db.lookup(self.cache_uuid)
2622 panic('cache obd not found:', self.cache_uuid)
2627 master_class = master_obd.get_class()
2628 cache_class = cache_obd.get_class()
2630 if master_class == 'ost' or master_class == 'lov':
2631 client_uuid = "%s_lov_master_UUID" % (self.name)
2632 self.master = LOV(master_obd, client_uuid, name);
2633 elif master_class == 'mds':
2634 self.master = get_mdc(db, name, self.master_uuid)
2635 elif master_class == 'lmv':
2636 #tmp fix: cobd and cmobd will use same uuid, so use const name here
2637 client_uuid = "%s_lmv_master_UUID" % "master"
2638 self.master = LMV(master_obd, client_uuid, self.name);
2640 panic("unknown master obd class '%s'" %(master_class))
2642 if cache_class == 'ost' or cache_class == 'lov':
2643 client_uuid = "%s_lov_cache_UUID" % (self.name)
2644 self.cache = LOV(cache_obd, client_uuid, name);
2645 elif cache_class == 'mds':
2646 self.cache = get_mdc(db, name, self.cache_uuid)
2647 elif cache_class == 'lmv':
2648 client_uuid = "%s_lmv_cache_UUID" % "cache"
2649 self.cache = LMV(cache_obd, client_uuid, self.name);
2651 panic("unknown cache obd class '%s'" %(cache_class))
2659 def get_master_name(self):
2660 return self.master.name
2662 def get_cache_name(self):
2663 return self.cache.name
2666 self.master.prepare()
2667 self.cache.prepare()
2668 if not config.record and is_prepared(self.name):
2670 self.info(self.master_uuid, self.cache_uuid)
2671 lctl.newdev("cobd", self.name, self.uuid,
2672 setup ="%s %s" %(self.master.name,
2676 if is_prepared(self.name):
2677 Module.cleanup(self)
2678 self.master.cleanup()
2679 self.cache.cleanup()
2681 def add_module(self, manager):
2682 manager.add_lustre_module('cobd', 'cobd')
2683 self.master.add_module(manager)
2685 # virtual interface for OSC and LOV
2687 def __init__(self, db, client_uuid, name, name_override = None):
2688 Module.__init__(self, 'VOSC', db)
2689 if db.get_class() == 'lov':
2690 self.osc = LOV(db, client_uuid, name, name_override)
2692 elif db.get_class() == 'cobd':
2693 self.osc = COBD(db, client_uuid, name)
2696 self.osc = OSC(db, client_uuid, name)
2700 return self.osc.get_uuid()
2703 return self.osc.get_name()
2711 def add_module(self, manager):
2712 self.osc.add_module(manager)
2714 def correct_level(self, level, op=None):
2715 return self.osc.correct_level(level, op)
2717 # virtual interface for MDC and LMV
2719 def __init__(self, db, client_uuid, name, name_override = None):
2720 Module.__init__(self, 'VMDC', db)
2721 if db.get_class() == 'lmv':
2722 self.mdc = LMV(db, client_uuid, name, name_override)
2723 elif db.get_class() == 'cobd':
2724 self.mdc = COBD(db, client_uuid, name)
2726 self.mdc = MDC(db, client_uuid, name)
2729 return self.mdc.uuid
2732 return self.mdc.name
2740 def add_module(self, manager):
2741 self.mdc.add_module(manager)
2743 def correct_level(self, level, op=None):
2744 return self.mdc.correct_level(level, op)
2746 class ECHO_CLIENT(Module):
2747 def __init__(self,db):
2748 Module.__init__(self, 'ECHO_CLIENT', db)
2749 self.obd_uuid = self.db.get_first_ref('obd')
2750 obd = self.db.lookup(self.obd_uuid)
2751 self.uuid = generate_client_uuid(self.name)
2752 self.osc = VOSC(obd, self.uuid, self.name)
2755 if not config.record and is_prepared(self.name):
2758 self.osc.prepare() # XXX This is so cheating. -p
2759 self.info(self.obd_uuid)
2761 lctl.newdev("echo_client", self.name, self.uuid,
2762 setup = self.osc.get_name())
2765 if is_prepared(self.name):
2766 Module.cleanup(self)
2769 def add_module(self, manager):
2770 self.osc.add_module(manager)
2771 manager.add_lustre_module('obdecho', 'obdecho')
2773 def correct_level(self, level, op=None):
2776 def generate_client_uuid(name):
2777 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2779 int(random.random() * 1048576),
2780 int(random.random() * 1048576))
2781 return client_uuid[:36]
2783 class Mountpoint(Module):
2784 def __init__(self,db):
2785 Module.__init__(self, 'MTPT', db)
2786 self.path = self.db.get_val('path')
2787 self.clientoptions = self.db.get_val('clientoptions', '')
2788 self.fs_uuid = self.db.get_first_ref('filesystem')
2789 fs = self.db.lookup(self.fs_uuid)
2790 self.mds_uuid = fs.get_first_ref('lmv')
2791 if not self.mds_uuid:
2792 self.mds_uuid = fs.get_first_ref('mds')
2793 self.obd_uuid = fs.get_first_ref('obd')
2794 client_uuid = generate_client_uuid(self.name)
2796 self.oss_sec = self.db.get_val('oss_sec','null')
2797 self.mds_sec = self.db.get_val('mds_sec','null')
2799 self.mds_sec = config.mds_sec
2801 self.oss_sec = config.oss_sec
2803 ost = self.db.lookup(self.obd_uuid)
2805 panic("no ost: ", self.obd_uuid)
2807 mds = self.db.lookup(self.mds_uuid)
2809 panic("no mds: ", self.mds_uuid)
2811 self.vosc = VOSC(ost, client_uuid, self.name, self.name)
2812 self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
2815 if not config.record and fs_is_mounted(self.path):
2816 log(self.path, "already mounted.")
2823 self.info(self.path, self.mds_uuid, self.obd_uuid)
2824 if config.record or config.lctl_dump:
2825 lctl.mount_option(local_node_name, self.vosc.get_name(),
2826 self.vmdc.get_name())
2829 if config.clientoptions:
2830 if self.clientoptions:
2831 self.clientoptions = self.clientoptions + ',' + config.clientoptions
2833 self.clientoptions = config.clientoptions
2834 if self.clientoptions:
2835 self.clientoptions = ',' + self.clientoptions
2836 # Linux kernel will deal with async and not pass it to ll_fill_super,
2837 # so replace it with Lustre async
2838 self.clientoptions = string.replace(self.clientoptions, "async", "lasync")
2840 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,mds_sec=%s,oss_sec=%s%s %s %s" % \
2841 (self.vosc.get_name(), self.vmdc.get_name(), self.mds_sec,
2842 self.oss_sec, self.clientoptions, config.config, self.path)
2843 run("mkdir", self.path)
2848 panic("mount failed:", self.path, ":", string.join(val))
2851 self.info(self.path, self.mds_uuid,self.obd_uuid)
2853 if config.record or config.lctl_dump:
2854 lctl.del_mount_option(local_node_name)
2856 if fs_is_mounted(self.path):
2858 (rc, out) = run("umount", "-f", self.path)
2860 (rc, out) = run("umount", self.path)
2862 raise CommandError('umount', out, rc)
2864 if fs_is_mounted(self.path):
2865 panic("fs is still mounted:", self.path)
2870 def add_module(self, manager):
2871 self.vosc.add_module(manager)
2872 self.vmdc.add_module(manager)
2873 manager.add_lustre_module('llite', 'llite')
2875 def correct_level(self, level, op=None):
2878 # ============================================================
2879 # misc query functions
2881 def get_ost_net(self, osd_uuid):
2885 osd = self.lookup(osd_uuid)
2886 node_uuid = osd.get_first_ref('node')
2887 node = self.lookup(node_uuid)
2889 panic("unable to find node for osd_uuid:", osd_uuid,
2890 " node_ref:", node_uuid_)
2891 for net_uuid in node.get_networks():
2892 db = node.lookup(net_uuid)
2893 srv_list.append(Network(db))
2896 # the order of iniitailization is based on level.
2897 def getServiceLevel(self):
2898 type = self.get_class()
2900 if type in ('network',):
2902 elif type in ('routetbl',):
2904 elif type in ('ldlm',):
2906 elif type in ('osd', 'cobd'):
2908 elif type in ('mdsdev',):
2910 elif type in ('lmv',):
2912 elif type in ('mountpoint', 'echoclient'):
2914 elif type in ('cmobd',):
2917 panic("Unknown type: ", type)
2919 if ret < config.minlevel or ret > config.maxlevel:
2924 # return list of services in a profile. list is a list of tuples
2925 # [(level, db_object),]
2926 def getServices(self):
2928 for ref_class, ref_uuid in self.get_all_refs():
2929 servdb = self.lookup(ref_uuid)
2931 level = getServiceLevel(servdb)
2933 list.append((level, servdb))
2935 panic('service not found: ' + ref_uuid)
2941 ############################################################
2943 # FIXME: clean this mess up!
2945 # OSC is no longer in the xml, so we have to fake it.
2946 # this is getting ugly and begging for another refactoring
2947 def get_osc(ost_db, uuid, fs_name):
2948 osc = OSC(ost_db, uuid, fs_name)
2951 def get_mdc(db, fs_name, mds_uuid):
2952 mds_db = db.lookup(mds_uuid);
2954 error("no mds:", mds_uuid)
2955 mdc = MDC(mds_db, mds_uuid, fs_name)
2958 ############################################################
2959 # routing ("rooting")
2961 # list of (nettype, cluster_id, nid)
2964 def find_local_clusters(node_db):
2965 global local_clusters
2966 for netuuid in node_db.get_networks():
2967 net = node_db.lookup(netuuid)
2969 debug("add_local", netuuid)
2970 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2972 if not acceptors.has_key(srv.port):
2973 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
2975 # This node is a gateway.
2977 def node_is_router():
2980 # If there are any routers found in the config, then this will be true
2981 # and all nodes will load kptlrouter.
2983 def node_needs_router():
2984 return needs_router or is_router
2986 # list of (nettype, gw, tgt_cluster_id, lo, hi)
2987 # Currently, these local routes are only added to kptlrouter route
2988 # table if they are needed to connect to a specific server. This
2989 # should be changed so all available routes are loaded, and the
2990 # ptlrouter can make all the decisions.
2993 def find_local_routes(lustre):
2994 """ Scan the lustre config looking for routers . Build list of
2996 global local_routes, needs_router
2998 list = lustre.lookup_class('node')
3000 if router.get_val_int('router', 0):
3002 for (local_type, local_cluster_id, local_nid) in local_clusters:
3004 for netuuid in router.get_networks():
3005 db = router.lookup(netuuid)
3006 if (local_type == db.get_val('nettype') and
3007 local_cluster_id == db.get_val('clusterid')):
3008 gw = db.get_val('nid')
3011 debug("find_local_routes: gw is", gw)
3012 for route in router.get_local_routes(local_type, gw):
3013 local_routes.append(route)
3014 debug("find_local_routes:", local_routes)
3017 def choose_local_server(srv_list):
3018 for srv in srv_list:
3019 if local_cluster(srv.net_type, srv.cluster_id):
3022 def local_cluster(net_type, cluster_id):
3023 for cluster in local_clusters:
3024 if net_type == cluster[0] and cluster_id == cluster[1]:
3028 def local_interface(net_type, cluster_id, nid):
3029 for cluster in local_clusters:
3030 if (net_type == cluster[0] and cluster_id == cluster[1]
3031 and nid == cluster[2]):
3035 def find_route(srv_list):
3037 frm_type = local_clusters[0][0]
3038 for srv in srv_list:
3039 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
3040 to_type = srv.net_type
3042 cluster_id = srv.cluster_id
3043 debug ('looking for route to', to_type, to)
3044 for r in local_routes:
3045 debug("find_route: ", r)
3046 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
3047 result.append((srv, r))
3050 def get_active_target(db):
3051 target_uuid = db.getUUID()
3052 target_name = db.getName()
3053 node_name = get_select(target_name)
3055 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
3057 tgt_dev_uuid = db.get_first_ref('active')
3060 def get_server_by_nid_uuid(db, nid_uuid):
3061 for n in db.lookup_class("network"):
3063 if net.nid_uuid == nid_uuid:
3067 ############################################################
3071 type = db.get_class()
3072 debug('Service:', type, db.getName(), db.getUUID())
3077 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3078 elif type == 'network':
3080 elif type == 'routetbl':
3084 elif type == 'cobd':
3085 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3086 elif type == 'cmobd':
3088 elif type == 'mdsdev':
3090 elif type == 'mountpoint':
3092 elif type == 'echoclient':
3097 panic ("unknown service type:", type)
3101 # Prepare the system to run lustre using a particular profile
3102 # in a the configuration.
3103 # * load & the modules
3104 # * setup networking for the current node
3105 # * make sure partitions are in place and prepared
3106 # * initialize devices with lctl
3107 # Levels is important, and needs to be enforced.
3108 def for_each_profile(db, prof_list, operation):
3109 for prof_uuid in prof_list:
3110 prof_db = db.lookup(prof_uuid)
3112 panic("profile:", prof_uuid, "not found.")
3113 services = getServices(prof_db)
3116 def magic_get_osc(db, rec, lov):
3118 lov_uuid = lov.get_uuid()
3119 lov_name = lov.osc.fs_name
3121 lov_uuid = rec.getAttribute('lov_uuidref')
3122 # FIXME: better way to find the mountpoint?
3123 filesystems = db.root_node.getElementsByTagName('filesystem')
3125 for fs in filesystems:
3126 ref = fs.getElementsByTagName('obd_ref')
3127 if ref[0].getAttribute('uuidref') == lov_uuid:
3128 fsuuid = fs.getAttribute('uuid')
3132 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
3134 mtpts = db.root_node.getElementsByTagName('mountpoint')
3137 ref = fs.getElementsByTagName('filesystem_ref')
3138 if ref[0].getAttribute('uuidref') == fsuuid:
3139 lov_name = fs.getAttribute('name')
3143 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
3145 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3147 ost_uuid = rec.getAttribute('ost_uuidref')
3148 obd = db.lookup(ost_uuid)
3151 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
3153 osc = get_osc(obd, lov_uuid, lov_name)
3155 panic('osc not found:', obd_uuid)
3158 # write logs for update records. sadly, logs of all types -- and updates in
3159 # particular -- are something of an afterthought. lconf needs rewritten with
3160 # these as core concepts. so this is a pretty big hack.
3161 def process_update_record(db, update, lov):
3162 for rec in update.childNodes:
3163 if rec.nodeType != rec.ELEMENT_NODE:
3166 log("found "+rec.nodeName+" record in update version " +
3167 str(update.getAttribute('version')))
3169 lov_uuid = rec.getAttribute('lov_uuidref')
3170 ost_uuid = rec.getAttribute('ost_uuidref')
3171 index = rec.getAttribute('index')
3172 gen = rec.getAttribute('generation')
3174 if not lov_uuid or not ost_uuid or not index or not gen:
3175 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
3178 tmplov = db.lookup(lov_uuid)
3180 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
3181 lov_name = tmplov.getName()
3183 lov_name = lov.osc.name
3185 # ------------------------------------------------------------- add
3186 if rec.nodeName == 'add':
3188 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3191 osc = magic_get_osc(db, rec, lov)
3194 # Only ignore connect failures with --force, which
3195 # isn't implemented here yet.
3196 osc.prepare(ignore_connect_failure=0)
3197 except CommandError, e:
3198 print "Error preparing OSC %s\n" % osc.uuid
3201 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3203 # ------------------------------------------------------ deactivate
3204 elif rec.nodeName == 'deactivate':
3208 osc = magic_get_osc(db, rec, lov)
3212 except CommandError, e:
3213 print "Error deactivating OSC %s\n" % osc.uuid
3216 # ---------------------------------------------------------- delete
3217 elif rec.nodeName == 'delete':
3221 osc = magic_get_osc(db, rec, lov)
3227 except CommandError, e:
3228 print "Error cleaning up OSC %s\n" % osc.uuid
3231 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3233 def process_updates(db, log_device, log_name, lov = None):
3234 updates = db.root_node.getElementsByTagName('update')
3236 if not u.childNodes:
3237 log("ignoring empty update record (version " +
3238 str(u.getAttribute('version')) + ")")
3241 version = u.getAttribute('version')
3242 real_name = "%s-%s" % (log_name, version)
3243 lctl.clear_log(log_device, real_name)
3244 lctl.record(log_device, real_name)
3246 process_update_record(db, u, lov)
3250 def doWriteconf(services):
3254 if s[1].get_class() == 'mdsdev' or s[1].get_class() == 'osd':
3255 n = newService(s[1])
3259 def doSetup(services):
3264 n = newService(s[1])
3266 slist.append((n.level, n))
3269 nl = n[1].correct_level(n[0])
3270 nlist.append((nl, n[1]))
3275 def doLoadModules(services):
3279 # adding all needed modules from all services
3281 n = newService(s[1])
3282 n.add_module(mod_manager)
3284 # loading all registered modules
3285 mod_manager.load_modules()
3287 def doUnloadModules(services):
3291 # adding all needed modules from all services
3293 n = newService(s[1])
3294 if n.safe_to_clean_modules():
3295 n.add_module(mod_manager)
3297 # unloading all registered modules
3298 mod_manager.cleanup_modules()
3300 def doCleanup(services):
3306 n = newService(s[1])
3308 slist.append((n.level, n))
3311 nl = n[1].correct_level(n[0])
3312 nlist.append((nl, n[1]))
3317 if n[1].safe_to_clean():
3322 def doHost(lustreDB, hosts):
3323 global is_router, local_node_name
3326 node_db = lustreDB.lookup_name(h, 'node')
3330 panic('No host entry found.')
3332 local_node_name = node_db.get_val('name', 0)
3333 is_router = node_db.get_val_int('router', 0)
3334 lustre_upcall = node_db.get_val('lustreUpcall', '')
3335 portals_upcall = node_db.get_val('portalsUpcall', '')
3336 timeout = node_db.get_val_int('timeout', 0)
3337 ptldebug = node_db.get_val('ptldebug', '')
3338 subsystem = node_db.get_val('subsystem', '')
3340 find_local_clusters(node_db)
3342 find_local_routes(lustreDB)
3344 # Two step process: (1) load modules, (2) setup lustre
3345 # if not cleaning, load modules first.
3346 prof_list = node_db.get_refs('profile')
3348 if config.write_conf:
3349 for_each_profile(node_db, prof_list, doLoadModules)
3351 for_each_profile(node_db, prof_list, doWriteconf)
3352 for_each_profile(node_db, prof_list, doUnloadModules)
3355 elif config.recover:
3356 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3357 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3358 "--client_uuid <UUID> --conn_uuid <UUID>")
3359 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3361 elif config.cleanup:
3363 # the command line can override this value
3365 # ugly hack, only need to run lctl commands for --dump
3366 if config.lctl_dump or config.record:
3367 for_each_profile(node_db, prof_list, doCleanup)
3370 sys_set_timeout(timeout)
3371 sys_set_ptldebug(ptldebug)
3372 sys_set_subsystem(subsystem)
3373 sys_set_lustre_upcall(lustre_upcall)
3374 sys_set_portals_upcall(portals_upcall)
3376 for_each_profile(node_db, prof_list, doCleanup)
3377 for_each_profile(node_db, prof_list, doUnloadModules)
3381 # ugly hack, only need to run lctl commands for --dump
3382 if config.lctl_dump or config.record:
3383 sys_set_timeout(timeout)
3384 sys_set_lustre_upcall(lustre_upcall)
3385 for_each_profile(node_db, prof_list, doSetup)
3389 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3390 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3392 for_each_profile(node_db, prof_list, doLoadModules)
3394 sys_set_debug_path()
3395 sys_set_ptldebug(ptldebug)
3396 sys_set_subsystem(subsystem)
3397 script = config.gdb_script
3398 run(lctl.lctl, ' modules >', script)
3400 log ("The GDB module script is in", script)
3401 # pause, so user has time to break and
3404 sys_set_timeout(timeout)
3405 sys_set_lustre_upcall(lustre_upcall)
3406 sys_set_portals_upcall(portals_upcall)
3408 for_each_profile(node_db, prof_list, doSetup)
3411 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3412 tgt = lustreDB.lookup(tgt_uuid)
3414 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3415 new_uuid = get_active_target(tgt)
3417 raise Lustre.LconfError("doRecovery: no active target found for: " +
3419 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3421 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3423 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3425 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3428 lctl.disconnect(oldnet)
3429 except CommandError, e:
3430 log("recover: disconnect", nid_uuid, "failed: ")
3435 except CommandError, e:
3436 log("recover: connect failed")
3439 lctl.recover(client_uuid, net.nid_uuid)
3442 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3443 base = os.path.dirname(cmd)
3444 if development_mode():
3445 if not config.lustre:
3446 debug('using objdir module paths')
3447 config.lustre = (os.path.join(base, ".."))
3448 # normalize the portals dir, using command line arg if set
3450 portals_dir = config.portals
3451 dir = os.path.join(config.lustre, portals_dir)
3452 config.portals = dir
3453 debug('config.portals', config.portals)
3454 elif config.lustre and config.portals:
3456 # if --lustre and --portals, normalize portals
3457 # can ignore POTRALS_DIR here, since it is probly useless here
3458 config.portals = os.path.join(config.lustre, config.portals)
3459 debug('config.portals B', config.portals)
3461 def sysctl(path, val):
3462 debug("+ sysctl", path, val)
3466 fp = open(os.path.join('/proc/sys', path), 'w')
3473 def sys_set_debug_path():
3474 sysctl('portals/debug_path', config.debug_path)
3476 def sys_set_lustre_upcall(upcall):
3477 # the command overrides the value in the node config
3478 if config.lustre_upcall:
3479 upcall = config.lustre_upcall
3481 upcall = config.upcall
3483 lctl.set_lustre_upcall(upcall)
3485 def sys_set_portals_upcall(upcall):
3486 # the command overrides the value in the node config
3487 if config.portals_upcall:
3488 upcall = config.portals_upcall
3490 upcall = config.upcall
3492 sysctl('portals/upcall', upcall)
3494 def sys_set_timeout(timeout):
3495 # the command overrides the value in the node config
3496 if config.timeout and config.timeout > 0:
3497 timeout = config.timeout
3498 if timeout != None and timeout > 0:
3499 lctl.set_timeout(timeout)
3501 def sys_tweak_socknal ():
3502 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3503 if sys_get_branch() == '2.6':
3504 fp = open('/proc/meminfo')
3505 lines = fp.readlines()
3510 if a[0] == 'MemTotal:':
3512 debug("memtotal" + memtotal)
3513 if int(memtotal) < 262144:
3514 minfree = int(memtotal) / 16
3517 debug("+ minfree ", minfree)
3518 sysctl("vm/min_free_kbytes", minfree)
3519 if config.single_socket:
3520 sysctl("socknal/typed", 0)
3522 def sys_optimize_elan ():
3523 procfiles = ["/proc/elan/config/eventint_punt_loops",
3524 "/proc/qsnet/elan3/config/eventint_punt_loops",
3525 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3527 if os.access(p, os.W_OK):
3528 run ("echo 1 > " + p)
3530 def sys_set_ptldebug(ptldebug):
3532 ptldebug = config.ptldebug
3535 val = eval(ptldebug, ptldebug_names)
3536 val = "0x%x" % (val & 0xffffffffL)
3537 sysctl('portals/debug', val)
3538 except NameError, e:
3541 def sys_set_subsystem(subsystem):
3542 if config.subsystem:
3543 subsystem = config.subsystem
3546 val = eval(subsystem, subsystem_names)
3547 val = "0x%x" % (val & 0xffffffffL)
3548 sysctl('portals/subsystem_debug', val)
3549 except NameError, e:
3552 def sys_set_netmem_max(path, max):
3553 debug("setting", path, "to at least", max)
3561 fp = open(path, 'w')
3562 fp.write('%d\n' %(max))
3565 def sys_make_devices():
3566 if not os.access('/dev/portals', os.R_OK):
3567 run('mknod /dev/portals c 10 240')
3568 if not os.access('/dev/obd', os.R_OK):
3569 run('mknod /dev/obd c 10 241')
3571 # Add dir to the global PATH, if not already there.
3572 def add_to_path(new_dir):
3573 syspath = string.split(os.environ['PATH'], ':')
3574 if new_dir in syspath:
3576 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3578 def default_debug_path():
3579 path = '/tmp/lustre-log'
3580 if os.path.isdir('/r'):
3585 def default_gdb_script():
3586 script = '/tmp/ogdb'
3587 if os.path.isdir('/r'):
3588 return '/r' + script
3592 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3593 # ensure basic elements are in the system path
3594 def sanitise_path():
3595 for dir in DEFAULT_PATH:
3598 # global hack for the --select handling
3600 def init_select(args):
3601 # args = [service=nodeA,service2=nodeB service3=nodeC]
3604 list = string.split(arg, ',')
3606 srv, node = string.split(entry, '=')
3607 tgt_select[srv] = node
3609 def get_select(srv):
3610 if tgt_select.has_key(srv):
3611 return tgt_select[srv]
3615 FLAG = Lustre.Options.FLAG
3616 PARAM = Lustre.Options.PARAM
3617 INTPARAM = Lustre.Options.INTPARAM
3618 PARAMLIST = Lustre.Options.PARAMLIST
3620 ('verbose,v', "Print system commands as they are run"),
3621 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3622 ('config', "Cluster config name used for LDAP query", PARAM),
3623 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3624 ('node', "Load config for <nodename>", PARAM),
3625 ('sec',"security flavor <null|krb5i|krb5p> between this client with mds", PARAM),
3626 ('mds_sec',"security flavor <null|krb5i|krb5p> between this client with mds", PARAM),
3627 ('oss_sec',"security flavor <null|krb5i|krb5p> between this client with ost", PARAM),
3628 ('mds_mds_sec',"security flavor <null|krb5i|krb5p> between this mds with other mds", PARAM),
3629 ('mds_oss_sec',"security flavor <null|krb5i|krb5p> between this mds with ost", PARAM),
3630 ('mds_deny_sec', "security flavor <null|krb5i|krb5p> denied by this mds", PARAM),
3631 ('ost_deny_sec', "security flavor <null|krb5i|krb5p> denied by this ost", PARAM),
3632 ('cleanup,d', "Cleans up config. (Shutdown)"),
3633 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3635 ('single_socket', "socknal option: only use one socket instead of bundle",
3637 ('failover',"""Used to shut down without saving state.
3638 This will allow this node to "give up" a service to a
3639 another node for failover purposes. This will not
3640 be a clean shutdown.""",
3642 ('gdb', """Prints message after creating gdb module script
3643 and sleeps for 5 seconds."""),
3644 ('noexec,n', """Prints the commands and steps that will be run for a
3645 config without executing them. This can used to check if a
3646 config file is doing what it should be doing"""),
3647 ('nomod', "Skip load/unload module step."),
3648 ('nosetup', "Skip device setup/cleanup step."),
3649 ('reformat', "Reformat all devices (without question)"),
3650 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3651 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3652 ('clientoptions', "Additional options for Lustre", PARAM),
3653 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3655 ('write_conf', "Save all the client config information on mds."),
3656 ('record', "Write config information on mds."),
3657 ('record_log', "Name of config record log.", PARAM),
3658 ('record_device', "MDS device name that will record the config commands",
3660 ('root_squash', "MDS squash root to appointed uid",
3662 ('no_root_squash', "Don't squash root for appointed nid",
3664 ('minlevel', "Minimum level of services to configure/cleanup",
3666 ('maxlevel', """Maximum level of services to configure/cleanup
3667 Levels are aproximatly like:
3672 70 - mountpoint, echo_client, osc, mdc, lov""",
3674 ('lustre', """Base directory of lustre sources. This parameter will
3675 cause lconf to load modules from a source tree.""", PARAM),
3676 ('portals', """Portals source directory. If this is a relative path,
3677 then it is assumed to be relative to lustre. """, PARAM),
3678 ('timeout', "Set recovery timeout", INTPARAM),
3679 ('upcall', "Set both portals and lustre upcall script", PARAM),
3680 ('lustre_upcall', "Set lustre upcall script", PARAM),
3681 ('portals_upcall', "Set portals upcall script", PARAM),
3682 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3683 ('ptldebug', "Set the portals debug level", PARAM),
3684 ('subsystem', "Set the portals debug subsystem", PARAM),
3685 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3686 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3687 # Client recovery options
3688 ('recover', "Recover a device"),
3689 ('group', "The group of devices to configure or cleanup", PARAM),
3690 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3691 ('client_uuid', "The failed client (required for recovery)", PARAM),
3692 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3694 ('inactive', """The name of an inactive service, to be ignored during
3695 mounting (currently OST-only). Can be repeated.""",
3700 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3702 # in the upcall this is set to SIG_IGN
3703 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3705 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3707 config, args = cl.parse(sys.argv[1:])
3708 except Lustre.OptionError, e:
3712 setupModulePath(sys.argv[0])
3714 host = socket.gethostname()
3716 # the PRNG is normally seeded with time(), which is not so good for starting
3717 # time-synchronized clusters
3718 input = open('/dev/urandom', 'r')
3720 print 'Unable to open /dev/urandom!'
3722 seed = input.read(32)
3728 init_select(config.select)
3731 # allow config to be fetched via HTTP, but only with python2
3732 if sys.version[0] != '1' and args[0].startswith('http://'):
3735 config_file = urllib2.urlopen(args[0])
3736 except (urllib2.URLError, socket.error), err:
3737 if hasattr(err, 'args'):
3739 print "Could not access '%s': %s" %(args[0], err)
3741 elif not os.access(args[0], os.R_OK):
3742 print 'File not found or readable:', args[0]
3746 config_file = open(args[0], 'r')
3748 dom = xml.dom.minidom.parse(config_file)
3750 panic("%s does not appear to be a config file." % (args[0]))
3751 sys.exit(1) # make sure to die here, even in debug mode.
3753 CONFIG_FILE = args[0]
3754 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3755 if not config.config:
3756 config.config = os.path.basename(args[0])# use full path?
3757 if config.config[-4:] == '.xml':
3758 config.config = config.config[:-4]
3759 elif config.ldapurl:
3760 if not config.config:
3761 panic("--ldapurl requires --config name")
3762 dn = "config=%s,fs=lustre" % (config.config)
3763 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3764 elif config.ptldebug or config.subsystem:
3765 sys_set_ptldebug(None)
3766 sys_set_subsystem(None)
3769 print 'Missing config file or ldap URL.'
3770 print 'see lconf --help for command summary'
3773 toplustreDB = lustreDB
3775 ver = lustreDB.get_version()
3777 panic("No version found in config data, please recreate.")
3778 if ver != Lustre.CONFIG_VERSION:
3779 panic("Config version", ver, "does not match lconf version",
3780 Lustre.CONFIG_VERSION)
3784 node_list.append(config.node)
3787 node_list.append(host)
3788 node_list.append('localhost')
3790 debug("configuring for host: ", node_list)
3793 config.debug_path = config.debug_path + '-' + host
3794 config.gdb_script = config.gdb_script + '-' + host
3796 lctl = LCTLInterface('lctl')
3798 if config.lctl_dump:
3799 lctl.use_save_file(config.lctl_dump)
3802 if not (config.record_device and config.record_log):
3803 panic("When recording, both --record_log and --record_device must be specified.")
3804 lctl.clear_log(config.record_device, config.record_log)
3805 lctl.record(config.record_device, config.record_log)
3807 # init module manager
3808 mod_manager = kmod_manager(config.lustre, config.portals)
3810 doHost(lustreDB, node_list)
3812 if not config.record:
3817 process_updates(lustreDB, config.record_device, config.record_log)
3819 if __name__ == "__main__":
3822 except Lustre.LconfError, e:
3824 # traceback.print_exc(file=sys.stdout)
3826 except CommandError, e:
3830 if first_cleanup_error:
3831 sys.exit(first_cleanup_error)