3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = '../portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
93 "console" : (1 << 25),
99 "undefined" : (1 << 0),
109 "portals" : (1 << 10),
111 "pinger" : (1 << 12),
112 "filter" : (1 << 13),
117 "ptlrouter" : (1 << 18),
121 "confobd" : (1 << 22),
128 first_cleanup_error = 0
129 def cleanup_error(rc):
130 global first_cleanup_error
131 if not first_cleanup_error:
132 first_cleanup_error = rc
134 # ============================================================
135 # debugging and error funcs
137 def fixme(msg = "this feature"):
138 raise Lustre.LconfError, msg + ' not implemented yet.'
141 msg = string.join(map(str,args))
142 if not config.noexec:
143 raise Lustre.LconfError(msg)
148 msg = string.join(map(str,args))
153 print string.strip(s)
157 msg = string.join(map(str,args))
160 # ack, python's builtin int() does not support '0x123' syntax.
161 # eval can do it, although what a hack!
165 return eval(s, {}, {})
168 except SyntaxError, e:
169 raise ValueError("not a number")
171 raise ValueError("not a number")
173 # ============================================================
174 # locally defined exceptions
175 class CommandError (exceptions.Exception):
176 def __init__(self, cmd_name, cmd_err, rc=None):
177 self.cmd_name = cmd_name
178 self.cmd_err = cmd_err
183 if type(self.cmd_err) == types.StringType:
185 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
187 print "! %s: %s" % (self.cmd_name, self.cmd_err)
188 elif type(self.cmd_err) == types.ListType:
190 print "! %s (error %d):" % (self.cmd_name, self.rc)
192 print "! %s:" % (self.cmd_name)
193 for s in self.cmd_err:
194 print "> %s" %(string.strip(s))
199 # ============================================================
200 # handle daemons, like the acceptor
202 """ Manage starting and stopping a daemon. Assumes daemon manages
203 it's own pid file. """
205 def __init__(self, cmd):
211 log(self.command, "already running.")
213 self.path = find_prog(self.command)
215 panic(self.command, "not found.")
216 ret, out = runcmd(self.path +' '+ self.command_line())
218 raise CommandError(self.path, out, ret)
222 pid = self.read_pidfile()
225 log ("killing process", pid)
228 log("was unable to find pid of " + self.command)
229 #time.sleep(1) # let daemon die
231 log("unable to kill", self.command, e)
233 log("unable to kill", self.command)
236 pid = self.read_pidfile()
242 log("was unable to find pid of " + self.command)
249 def read_pidfile(self):
251 fp = open(self.pidfile(), 'r')
261 def clean_pidfile(self):
262 """ Remove a stale pidfile """
263 log("removing stale pidfile:", self.pidfile())
265 os.unlink(self.pidfile())
267 log(self.pidfile(), e)
269 class AcceptorHandler(DaemonHandler):
270 def __init__(self, port, net_type):
271 DaemonHandler.__init__(self, "acceptor")
276 return "/var/run/%s-%d.pid" % (self.command, self.port)
278 def command_line(self):
279 return string.join(map(str,(self.flags, self.port)))
283 # start the acceptors
285 if config.lctl_dump or config.record:
287 for port in acceptors.keys():
288 daemon = acceptors[port]
289 if not daemon.running():
292 def run_one_acceptor(port):
293 if config.lctl_dump or config.record:
295 if acceptors.has_key(port):
296 daemon = acceptors[port]
297 if not daemon.running():
300 panic("run_one_acceptor: No acceptor defined for port:", port)
302 def stop_acceptor(port):
303 if acceptors.has_key(port):
304 daemon = acceptors[port]
309 # ============================================================
310 # handle lctl interface
313 Manage communication with lctl
316 def __init__(self, cmd):
318 Initialize close by finding the lctl binary.
320 self.lctl = find_prog(cmd)
322 self.record_device = ''
325 debug('! lctl not found')
328 raise CommandError('lctl', "unable to find lctl binary.")
330 def use_save_file(self, file):
331 self.save_file = file
333 def record(self, dev_name, logname):
334 log("Recording log", logname, "on", dev_name)
335 self.record_device = dev_name
336 self.record_log = logname
338 def end_record(self):
339 log("End recording log", self.record_log, "on", self.record_device)
340 self.record_device = None
341 self.record_log = None
343 def set_nonblock(self, fd):
344 fl = fcntl.fcntl(fd, F_GETFL)
345 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
350 the cmds are written to stdin of lctl
351 lctl doesn't return errors when run in script mode, so
353 should modify command line to accept multiple commands, or
354 create complex command line options
358 cmds = '\n dump ' + self.save_file + '\n' + cmds
359 elif self.record_device:
363 %s""" % (self.record_device, self.record_log, cmds)
365 debug("+", cmd_line, cmds)
366 if config.noexec: return (0, [])
368 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
369 child.tochild.write(cmds + "\n")
370 child.tochild.close()
371 # print "LCTL:", cmds
373 # From "Python Cookbook" from O'Reilly
374 outfile = child.fromchild
375 outfd = outfile.fileno()
376 self.set_nonblock(outfd)
377 errfile = child.childerr
378 errfd = errfile.fileno()
379 self.set_nonblock(errfd)
381 outdata = errdata = ''
384 ready = select.select([outfd,errfd],[],[]) # Wait for input
385 if outfd in ready[0]:
386 outchunk = outfile.read()
387 if outchunk == '': outeof = 1
388 outdata = outdata + outchunk
389 if errfd in ready[0]:
390 errchunk = errfile.read()
391 if errchunk == '': erreof = 1
392 errdata = errdata + errchunk
393 if outeof and erreof: break
394 # end of "borrowed" code
397 if os.WIFEXITED(ret):
398 rc = os.WEXITSTATUS(ret)
401 if rc or len(errdata):
402 raise CommandError(self.lctl, errdata, rc)
405 def runcmd(self, *args):
407 run lctl using the command line
409 cmd = string.join(map(str,args))
410 debug("+", self.lctl, cmd)
411 rc, out = run(self.lctl, cmd)
413 raise CommandError(self.lctl, out, rc)
416 def clear_log(self, dev, log):
417 """ clear an existing log """
422 quit """ % (dev, log)
425 def root_squash(self, name, uid, nid):
429 quit""" % (name, uid, nid)
432 def network(self, net, nid):
437 quit """ % (net, nid)
441 def add_interface(self, net, ip, netmask = ""):
442 """ add an interface """
446 quit """ % (net, ip, netmask)
449 # delete an interface
450 def del_interface(self, net, ip):
451 """ delete an interface """
458 # create a new connection
459 def add_uuid(self, net_type, uuid, nid):
460 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
463 def add_peer(self, net_type, nid, hostaddr, port):
464 if net_type in ('tcp','openib','ra') and not config.lctl_dump:
469 nid, hostaddr, port )
471 elif net_type in ('iib',) and not config.lctl_dump:
478 elif net_type in ('vib',) and not config.lctl_dump:
486 def connect(self, srv):
487 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
488 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
490 hostaddr = string.split(srv.hostaddr[0], '/')[0]
491 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
494 def recover(self, dev_name, new_conn):
497 recover %s""" %(dev_name, new_conn)
500 # add a route to a range
501 def add_route(self, net, gw, lo, hi):
509 except CommandError, e:
513 def del_route(self, net, gw, lo, hi):
518 quit """ % (net, gw, lo, hi)
521 # add a route to a host
522 def add_route_host(self, net, uuid, gw, tgt):
523 self.add_uuid(net, uuid, tgt)
531 except CommandError, e:
535 # add a route to a range
536 def del_route_host(self, net, uuid, gw, tgt):
542 quit """ % (net, gw, tgt)
546 def del_peer(self, net_type, nid, hostaddr):
547 if net_type in ('tcp',) and not config.lctl_dump:
551 del_peer %s %s single_share
555 elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump:
559 del_peer %s single_share
564 # disconnect one connection
565 def disconnect(self, srv):
566 self.del_uuid(srv.nid_uuid)
567 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
569 hostaddr = string.split(srv.hostaddr[0], '/')[0]
570 self.del_peer(srv.net_type, srv.nid, hostaddr)
572 def del_uuid(self, uuid):
580 def disconnectAll(self, net):
588 def attach(self, type, name, uuid):
591 quit""" % (type, name, uuid)
594 def detach(self, name):
601 def set_security(self, name, key, value):
605 quit""" % (name, key, value)
608 def setup(self, name, setup = ""):
612 quit""" % (name, setup)
615 def add_conn(self, name, conn_uuid):
619 quit""" % (name, conn_uuid)
622 def start(self, name, conf_name):
626 quit""" % (name, conf_name)
629 # create a new device with lctl
630 def newdev(self, type, name, uuid, setup = ""):
632 self.attach(type, name, uuid);
634 self.setup(name, setup)
635 except CommandError, e:
636 self.cleanup(name, uuid, 0)
640 def cleanup(self, name, uuid, force, failover = 0):
641 if failover: force = 1
647 quit""" % (name, ('', 'force')[force],
648 ('', 'failover')[failover])
652 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
653 stripe_sz, stripe_off, pattern, devlist = None):
656 lov_setup %s %d %d %d %s %s
657 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
661 # add an OBD to a LOV
662 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
664 lov_modify_tgts add %s %s %s %s
665 quit""" % (name, obd_uuid, index, gen)
669 def lmv_setup(self, name, uuid, desc_uuid, devlist):
673 quit""" % (name, uuid, desc_uuid, devlist)
676 # delete an OBD from a LOV
677 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
679 lov_modify_tgts del %s %s %s %s
680 quit""" % (name, obd_uuid, index, gen)
684 def deactivate(self, name):
692 def dump(self, dump_file):
695 quit""" % (dump_file)
698 # get list of devices
699 def device_list(self):
700 devices = '/proc/fs/lustre/devices'
702 if os.access(devices, os.R_OK):
704 fp = open(devices, 'r')
712 def lustre_version(self):
713 rc, out = self.runcmd('version')
717 def mount_option(self, profile, osc, mdc):
719 mount_option %s %s %s
720 quit""" % (profile, osc, mdc)
723 # delete mount options
724 def del_mount_option(self, profile):
730 def set_timeout(self, timeout):
736 def set_lustre_upcall(self, upcall):
741 # ============================================================
742 # Various system-level functions
743 # (ideally moved to their own module)
745 # Run a command and return the output and status.
746 # stderr is sent to /dev/null, could use popen3 to
747 # save it if necessary
750 if config.noexec: return (0, [])
751 f = os.popen(cmd + ' 2>&1')
761 cmd = string.join(map(str,args))
764 # Run a command in the background.
765 def run_daemon(*args):
766 cmd = string.join(map(str,args))
768 if config.noexec: return 0
769 f = os.popen(cmd + ' 2>&1')
777 # Determine full path to use for an external command
778 # searches dirname(argv[0]) first, then PATH
780 syspath = string.split(os.environ['PATH'], ':')
781 cmdpath = os.path.dirname(sys.argv[0])
782 syspath.insert(0, cmdpath);
784 syspath.insert(0, os.path.join(config.portals, 'utils/'))
786 prog = os.path.join(d,cmd)
787 if os.access(prog, os.X_OK):
791 # Recursively look for file starting at base dir
792 def do_find_file(base, mod):
793 fullname = os.path.join(base, mod)
794 if os.access(fullname, os.R_OK):
796 for d in os.listdir(base):
797 dir = os.path.join(base,d)
798 if os.path.isdir(dir):
799 module = do_find_file(dir, mod)
803 # is the path a block device?
810 return stat.S_ISBLK(s[stat.ST_MODE])
812 # find the journal device from mkfs options
818 while i < len(x) - 1:
819 if x[i] == '-J' and x[i+1].startswith('device='):
825 # build fs according to type
827 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
833 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
835 # devsize is in 1k, and fs block count is in 4k
836 block_cnt = devsize/4
838 if fstype in ('ext3', 'extN', 'ldiskfs'):
839 # ext3 journal size is in megabytes
840 # but don't set jsize if mkfsoptions indicates a separate journal device
841 if jsize == 0 and jdev(mkfsoptions) == '':
843 if not is_block(dev):
844 ret, out = runcmd("ls -l %s" %dev)
845 devsize = int(string.split(out[0])[4]) / 1024
847 # sfdisk works for symlink, hardlink, and realdev
848 ret, out = runcmd("sfdisk -s %s" %dev)
850 devsize = int(out[0])
852 # sfdisk -s will fail for too large block device,
853 # then, read the size of partition from /proc/partitions
855 # get the realpath of the device
856 # it may be the real device, such as /dev/hda7
857 # or the hardlink created via mknod for a device
858 if 'realpath' in dir(os.path):
859 real_dev = os.path.realpath(dev)
863 while os.path.islink(real_dev) and (link_count < 20):
864 link_count = link_count + 1
865 dev_link = os.readlink(real_dev)
866 if os.path.isabs(dev_link):
869 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
871 panic("Entountered too many symbolic links resolving block device:", dev)
873 # get the major and minor number of the realpath via ls
874 # it seems python(os.stat) does not return
875 # the st_rdev member of the stat structure
876 ret, out = runcmd("ls -l %s" %real_dev)
877 major = string.split(string.split(out[0])[4], ",")[0]
878 minor = string.split(out[0])[5]
880 # get the devsize from /proc/partitions with the major and minor number
881 ret, out = runcmd("cat /proc/partitions")
884 if string.split(line)[0] == major and string.split(line)[1] == minor:
885 devsize = int(string.split(line)[2])
888 if devsize > 1024 * 1024:
889 jsize = ((devsize / 102400) * 4)
892 if jsize: jopt = "-J size=%d" %(jsize,)
893 if isize: iopt = "-I %d" %(isize,)
894 mkfs = 'mkfs.ext2 -j -b 4096 '
895 if not isblock or config.force:
897 if jdev(mkfsoptions) != '':
898 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
900 jmkfs = jmkfs + '-F '
901 jmkfs = jmkfs + jdev(mkfsoptions)
902 (ret, out) = run (jmkfs)
904 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
905 elif fstype == 'reiserfs':
906 # reiserfs journal size is in blocks
907 if jsize: jopt = "--journal_size %d" %(jsize,)
908 mkfs = 'mkreiserfs -ff'
910 panic('unsupported fs type: ', fstype)
912 if config.mkfsoptions != None:
913 mkfs = mkfs + ' ' + config.mkfsoptions
914 if mkfsoptions != None:
915 mkfs = mkfs + ' ' + mkfsoptions
916 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
918 panic("Unable to build fs:", dev, string.join(out))
919 # enable hash tree indexing on fsswe
920 if fstype in ('ext3', 'extN', 'ldiskfs'):
921 htree = 'echo "feature FEATURE_C5" | debugfs -w'
922 (ret, out) = run (htree, dev)
924 panic("Unable to enable htree:", dev)
926 # some systems use /dev/loopN, some /dev/loop/N
930 if not os.access(loop + str(0), os.R_OK):
932 if not os.access(loop + str(0), os.R_OK):
933 panic ("can't access loop devices")
936 # find loop device assigned to the file
937 def find_assigned_loop(file):
939 for n in xrange(0, MAX_LOOP_DEVICES):
941 if os.access(dev, os.R_OK):
942 (stat, out) = run('losetup', dev)
943 if out and stat == 0:
944 m = re.search(r'\((.*)\)', out[0])
945 if m and file == m.group(1):
949 # find free loop device
950 def find_free_loop(file):
953 # find next free loop
954 for n in xrange(0, MAX_LOOP_DEVICES):
956 if os.access(dev, os.R_OK):
957 (stat, out) = run('losetup', dev)
962 # create file if necessary and assign the first free loop device
963 def init_loop(file, size, fstype, journal_size, inode_size,
964 mkfsoptions, reformat, autoformat, backfstype, backfile):
967 realfstype = backfstype
968 if is_block(backfile):
969 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
970 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
976 dev = find_assigned_loop(realfile)
978 print 'WARNING: file', realfile, 'already mapped to', dev
981 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
982 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
984 panic("Unable to create backing store:", realfile)
985 mkfs(realfile, size, realfstype, journal_size, inode_size,
986 mkfsoptions, isblock=0)
988 dev = find_free_loop(realfile)
990 print "attach " + realfile + " <-> " + dev
991 run('losetup', dev, realfile)
994 print "out of loop devices"
997 # undo loop assignment
998 def clean_loop(dev, fstype, backfstype, backdev):
1003 if not is_block(realfile):
1004 dev = find_assigned_loop(realfile)
1006 print "detach " + dev + " <-> " + realfile
1007 ret, out = run('losetup -d', dev)
1009 log('unable to clean loop device', dev, 'for file', realfile)
1012 # finilizes passed device
1013 def clean_dev(dev, fstype, backfstype, backdev):
1014 if fstype == 'smfs' or not is_block(dev):
1015 clean_loop(dev, fstype, backfstype, backdev)
1017 # determine if dev is formatted as a <fstype> filesystem
1018 def need_format(fstype, dev):
1019 # FIXME don't know how to implement this
1022 # initialize a block device if needed
1023 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
1024 inode_size, mkfsoptions, backfstype, backdev):
1028 if fstype == 'smfs' or not is_block(dev):
1029 dev = init_loop(dev, size, fstype, journal_size, inode_size,
1030 mkfsoptions, reformat, autoformat, backfstype, backdev)
1031 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
1032 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
1035 # panic("device:", dev,
1036 # "not prepared, and autoformat is not set.\n",
1037 # "Rerun with --reformat option to format ALL filesystems")
1042 """lookup IP address for an interface"""
1043 rc, out = run("/sbin/ifconfig", iface)
1046 addr = string.split(out[1])[1]
1047 ip = string.split(addr, ':')[1]
1050 def def_mount_options(fstype, target):
1051 """returns deafult mount options for passed fstype and target (mds, ost)"""
1052 if fstype == 'ext3' or fstype == 'ldiskfs':
1053 mountfsoptions = "errors=remount-ro"
1054 if target == 'ost' and sys_get_branch() == '2.4':
1055 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1056 return mountfsoptions
1059 def sys_get_elan_position_file():
1060 procfiles = ["/proc/elan/device0/position",
1061 "/proc/qsnet/elan4/device0/position",
1062 "/proc/qsnet/elan3/device0/position"]
1064 if os.access(p, os.R_OK):
1068 def sys_get_local_nid(net_type, wildcard, cluster_id):
1069 """Return the local nid."""
1071 if sys_get_elan_position_file():
1072 local = sys_get_local_address('elan', '*', cluster_id)
1074 local = sys_get_local_address(net_type, wildcard, cluster_id)
1077 def sys_get_local_address(net_type, wildcard, cluster_id):
1078 """Return the local address for the network type."""
1080 if net_type in ('tcp','openib','iib','vib','ra'):
1082 iface, star = string.split(wildcard, ':')
1083 local = if2addr(iface)
1085 panic ("unable to determine ip for:", wildcard)
1087 host = socket.gethostname()
1088 local = socket.gethostbyname(host)
1089 elif net_type == 'elan':
1090 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1091 f = sys_get_elan_position_file()
1093 panic ("unable to determine local Elan ID")
1096 lines = fp.readlines()
1100 if a[0] == 'NodeId':
1104 nid = my_int(cluster_id) + my_int(elan_id)
1105 local = "%d" % (nid)
1106 except ValueError, e:
1110 elif net_type == 'lo':
1111 fixme("automatic local address for loopback")
1112 elif net_type == 'gm':
1113 fixme("automatic local address for GM")
1117 def sys_get_branch():
1118 """Returns kernel release"""
1120 fp = open('/proc/sys/kernel/osrelease')
1121 lines = fp.readlines()
1125 version = string.split(l)
1126 a = string.split(version[0], '.')
1127 return a[0] + '.' + a[1]
1132 # XXX: instead of device_list, ask for $name and see what we get
1133 def is_prepared(name):
1134 """Return true if a device exists for the name"""
1135 if config.lctl_dump:
1137 if (config.noexec or config.record) and config.cleanup:
1140 # expect this format:
1141 # 1 UP ldlm ldlm ldlm_UUID 2
1142 out = lctl.device_list()
1144 if name == string.split(s)[3]:
1146 except CommandError, e:
1150 def net_is_prepared():
1151 """If the any device exists, then assume that all networking
1152 has been configured"""
1153 out = lctl.device_list()
1156 def fs_is_mounted(path):
1157 """Return true if path is a mounted lustre filesystem"""
1159 fp = open('/proc/mounts')
1160 lines = fp.readlines()
1164 if a[1] == path and a[2] == 'lustre_lite':
1170 def kmod_find(src_dir, dev_dir, modname):
1171 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1172 for modext in '.ko', '.o':
1173 module = modbase + modext
1175 if os.access(module, os.R_OK):
1181 def kmod_info(modname):
1182 """Returns reference count for passed module name."""
1184 fp = open('/proc/modules')
1185 lines = fp.readlines()
1188 # please forgive my tired fingers for this one
1189 ret = filter(lambda word, mod = modname: word[0] == mod,
1190 map(lambda line: string.split(line), lines))
1194 except Exception, e:
1198 """Presents kernel module"""
1199 def __init__(self, src_dir, dev_dir, name):
1200 self.src_dir = src_dir
1201 self.dev_dir = dev_dir
1204 # FIXME we ignore the failure of loading gss module, because we might
1205 # don't need it at all.
1208 log ('loading module:', self.name, 'srcdir',
1209 self.src_dir, 'devdir', self.dev_dir)
1211 module = kmod_find(self.src_dir, self.dev_dir,
1213 if not module and self.name != 'ptlrpcs_gss':
1214 panic('module not found:', self.name)
1215 (rc, out) = run('/sbin/insmod', module)
1217 if self.name == 'ptlrpcs_gss':
1218 print "Warning: not support gss security!"
1220 raise CommandError('insmod', out, rc)
1222 (rc, out) = run('/sbin/modprobe', self.name)
1224 if self.name == 'ptlrpcs_gss':
1225 print "Warning: not support gss security!"
1227 raise CommandError('modprobe', out, rc)
1231 log('unloading module:', self.name)
1232 (rc, out) = run('/sbin/rmmod', self.name)
1234 log('unable to unload module:', self.name +
1235 "(" + self.refcount() + ")")
1239 """Returns module info if any."""
1240 return kmod_info(self.name)
1243 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1250 """Returns module refcount."""
1257 """Returns 1 if module is used, otherwise 0 is returned."""
1263 if users and users != '(unused)' and users != '-':
1271 """Returns 1 if module is busy, otherwise 0 is returned."""
1272 if self.loaded() and (self.used() or self.refcount() != '0'):
1278 """Manage kernel modules"""
1279 def __init__(self, lustre_dir, portals_dir):
1280 self.lustre_dir = lustre_dir
1281 self.portals_dir = portals_dir
1282 self.kmodule_list = []
1284 def find_module(self, modname):
1285 """Find module by module name"""
1286 for mod in self.kmodule_list:
1287 if mod.name == modname:
1291 def add_portals_module(self, dev_dir, modname):
1292 """Append a module to list of modules to load."""
1294 mod = self.find_module(modname)
1296 mod = kmod(self.portals_dir, dev_dir, modname)
1297 self.kmodule_list.append(mod)
1299 def add_lustre_module(self, dev_dir, modname):
1300 """Append a module to list of modules to load."""
1302 mod = self.find_module(modname)
1304 mod = kmod(self.lustre_dir, dev_dir, modname)
1305 self.kmodule_list.append(mod)
1307 def load_modules(self):
1308 """Load all the modules in the list in the order they appear."""
1309 for mod in self.kmodule_list:
1310 if mod.loaded() and not config.noexec:
1314 def cleanup_modules(self):
1315 """Unload the modules in the list in reverse order."""
1316 rev = self.kmodule_list
1319 if (not mod.loaded() or mod.busy()) and not config.noexec:
1322 if mod.name == 'portals' and config.dump:
1323 lctl.dump(config.dump)
1326 # ============================================================
1327 # Classes to prepare and cleanup the various objects
1330 """ Base class for the rest of the modules. The default cleanup method is
1331 defined here, as well as some utilitiy funcs.
1333 def __init__(self, module_name, db):
1335 self.module_name = module_name
1336 self.name = self.db.getName()
1337 self.uuid = self.db.getUUID()
1341 def info(self, *args):
1342 msg = string.join(map(str,args))
1343 print self.module_name + ":", self.name, self.uuid, msg
1346 """ default cleanup, used for most modules """
1349 lctl.cleanup(self.name, self.uuid, config.force)
1350 except CommandError, e:
1351 log(self.module_name, "cleanup failed: ", self.name)
1355 def add_module(self, manager):
1356 """Adds all needed modules in the order they appear."""
1359 def safe_to_clean(self):
1362 def safe_to_clean_modules(self):
1363 return self.safe_to_clean()
1365 class Network(Module):
1366 def __init__(self,db):
1367 Module.__init__(self, 'NETWORK', db)
1368 self.net_type = self.db.get_val('nettype')
1369 self.nid = self.db.get_val('nid', '*')
1370 self.cluster_id = self.db.get_val('clusterid', "0")
1371 self.port = self.db.get_val_int('port', 0)
1374 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1376 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1377 self.generic_nid = 1
1378 debug("nid:", self.nid)
1380 self.generic_nid = 0
1382 self.nid_uuid = self.nid_to_uuid(self.nid)
1383 self.hostaddr = self.db.get_hostaddr()
1384 if len(self.hostaddr) == 0:
1385 self.hostaddr.append(self.nid)
1386 if '*' in self.hostaddr[0]:
1387 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1388 if not self.hostaddr[0]:
1389 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1390 debug("hostaddr:", self.hostaddr[0])
1392 def add_module(self, manager):
1393 manager.add_portals_module("libcfs", 'libcfs')
1394 manager.add_portals_module("portals", 'portals')
1396 if node_needs_router():
1397 manager.add_portals_module("router", 'kptlrouter')
1398 if self.net_type == 'tcp':
1399 manager.add_portals_module("knals/socknal", 'ksocknal')
1400 if self.net_type == 'elan':
1401 manager.add_portals_module("knals/qswnal", 'kqswnal')
1402 if self.net_type == 'gm':
1403 manager.add_portals_module("knals/gmnal", 'kgmnal')
1404 if self.net_type == 'openib':
1405 manager.add_portals_module("knals/openibnal", 'kopenibnal')
1406 if self.net_type == 'iib':
1407 manager.add_portals_module("knals/iibnal", 'kiibnal')
1408 if self.net_type == 'vib':
1409 self.add_portals_module("knals/vibnal", 'kvibnal')
1410 if self.net_type == 'lo':
1411 manager.add_portals_module("knals/lonal", 'klonal')
1412 if self.net_type == 'ra':
1413 manager.add_portals_module("knals/ranal", 'kranal')
1415 def nid_to_uuid(self, nid):
1416 return "NID_%s_UUID" %(nid,)
1419 if not config.record and net_is_prepared():
1421 self.info(self.net_type, self.nid, self.port)
1422 if not (config.record and self.generic_nid):
1423 lctl.network(self.net_type, self.nid)
1424 if self.net_type == 'tcp':
1426 for hostaddr in self.db.get_hostaddr():
1427 ip = string.split(hostaddr, '/')[0]
1428 if len(string.split(hostaddr, '/')) == 2:
1429 netmask = string.split(hostaddr, '/')[1]
1432 lctl.add_interface(self.net_type, ip, netmask)
1433 if self.net_type == 'elan':
1435 if self.port and node_is_router():
1436 run_one_acceptor(self.port)
1437 self.connect_peer_gateways()
1439 def connect_peer_gateways(self):
1440 for router in self.db.lookup_class('node'):
1441 if router.get_val_int('router', 0):
1442 for netuuid in router.get_networks():
1443 net = self.db.lookup(netuuid)
1445 if (gw.cluster_id == self.cluster_id and
1446 gw.net_type == self.net_type):
1447 if gw.nid != self.nid:
1450 def disconnect_peer_gateways(self):
1451 for router in self.db.lookup_class('node'):
1452 if router.get_val_int('router', 0):
1453 for netuuid in router.get_networks():
1454 net = self.db.lookup(netuuid)
1456 if (gw.cluster_id == self.cluster_id and
1457 gw.net_type == self.net_type):
1458 if gw.nid != self.nid:
1461 except CommandError, e:
1462 print "disconnect failed: ", self.name
1466 def safe_to_clean(self):
1467 return not net_is_prepared()
1470 self.info(self.net_type, self.nid, self.port)
1472 stop_acceptor(self.port)
1473 if node_is_router():
1474 self.disconnect_peer_gateways()
1475 if self.net_type == 'tcp':
1476 for hostaddr in self.db.get_hostaddr():
1477 ip = string.split(hostaddr, '/')[0]
1478 lctl.del_interface(self.net_type, ip)
1480 def correct_level(self, level, op=None):
1483 class RouteTable(Module):
1484 def __init__(self,db):
1485 Module.__init__(self, 'ROUTES', db)
1487 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1489 # only setup connections for tcp, openib, and iib NALs
1491 if not net_type in ('tcp','openib','iib','vib','ra'):
1494 # connect to target if route is to single node and this node is the gw
1495 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1496 if not local_cluster(net_type, tgt_cluster_id):
1497 panic("target", lo, " not on the local cluster")
1498 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1499 # connect to gateway if this node is not the gw
1500 elif (local_cluster(net_type, gw_cluster_id)
1501 and not local_interface(net_type, gw_cluster_id, gw)):
1502 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1507 panic("no server for nid", lo)
1510 return Network(srvdb)
1513 if not config.record and net_is_prepared():
1516 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1517 lctl.add_route(net_type, gw, lo, hi)
1518 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1522 def safe_to_clean(self):
1523 return not net_is_prepared()
1526 if net_is_prepared():
1527 # the network is still being used, don't clean it up
1529 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1530 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1533 lctl.disconnect(srv)
1534 except CommandError, e:
1535 print "disconnect failed: ", self.name
1540 lctl.del_route(net_type, gw, lo, hi)
1541 except CommandError, e:
1542 print "del_route failed: ", self.name
1546 class Management(Module):
1547 def __init__(self, db):
1548 Module.__init__(self, 'MGMT', db)
1550 def add_module(self, manager):
1551 manager.add_lustre_module('lvfs', 'lvfs')
1552 manager.add_lustre_module('obdclass', 'obdclass')
1553 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1554 manager.add_lustre_module('mgmt', 'mgmt_svc')
1557 if not config.record and is_prepared(self.name):
1560 lctl.newdev("mgmt", self.name, self.uuid)
1562 def safe_to_clean(self):
1566 if is_prepared(self.name):
1567 Module.cleanup(self)
1569 def correct_level(self, level, op=None):
1572 # This is only needed to load the modules; the LDLM device
1573 # is now created automatically.
1575 def __init__(self,db):
1576 Module.__init__(self, 'LDLM', db)
1578 def add_module(self, manager):
1579 manager.add_lustre_module('lvfs', 'lvfs')
1580 manager.add_lustre_module('obdclass', 'obdclass')
1581 manager.add_lustre_module('sec', 'ptlrpcs')
1582 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1583 manager.add_lustre_module('sec/gss', 'ptlrpcs_gss')
1591 def correct_level(self, level, op=None):
1595 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1596 Module.__init__(self, 'LOV', db)
1597 if name_override != None:
1598 self.name = "lov_%s" % name_override
1599 self.mds_uuid = self.db.get_first_ref('mds')
1600 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1601 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1602 self.pattern = self.db.get_val_int('stripepattern', 0)
1603 self.devlist = self.db.get_lov_tgts('lov_tgt')
1604 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1607 self.desc_uuid = self.uuid
1608 self.uuid = generate_client_uuid(self.name)
1609 self.fs_name = fs_name
1611 self.config_only = 1
1613 self.config_only = None
1614 mds = self.db.lookup(self.mds_uuid)
1615 self.mds_name = mds.getName()
1616 for (obd_uuid, index, gen, active) in self.devlist:
1619 self.obdlist.append(obd_uuid)
1620 obd = self.db.lookup(obd_uuid)
1621 osc = get_osc(obd, self.uuid, fs_name)
1623 self.osclist.append((osc, index, gen, active))
1625 panic('osc not found:', obd_uuid)
1631 if not config.record and is_prepared(self.name):
1633 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1634 self.stripe_off, self.pattern, self.devlist,
1636 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1637 self.stripe_sz, self.stripe_off, self.pattern,
1638 string.join(self.obdlist))
1639 for (osc, index, gen, active) in self.osclist:
1640 target_uuid = osc.target_uuid
1642 # Only ignore connect failures with --force, which
1643 # isn't implemented here yet.
1645 osc.prepare(ignore_connect_failure=0)
1646 except CommandError, e:
1647 print "Error preparing OSC %s\n" % osc.uuid
1649 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1652 for (osc, index, gen, active) in self.osclist:
1653 target_uuid = osc.target_uuid
1655 if is_prepared(self.name):
1656 Module.cleanup(self)
1657 if self.config_only:
1658 panic("Can't clean up config_only LOV ", self.name)
1660 def add_module(self, manager):
1661 if self.config_only:
1662 panic("Can't load modules for config_only LOV ", self.name)
1663 for (osc, index, gen, active) in self.osclist:
1664 osc.add_module(manager)
1666 manager.add_lustre_module('lov', 'lov')
1668 def correct_level(self, level, op=None):
1672 def __init__(self, db, uuid, fs_name, name_override = None):
1673 Module.__init__(self, 'LMV', db)
1674 if name_override != None:
1675 self.name = "lmv_%s" % name_override
1677 self.devlist = self.db.get_lmv_tgts('lmv_tgt')
1678 if self.devlist == None:
1679 self.devlist = self.db.get_refs('mds')
1682 self.desc_uuid = self.uuid
1684 self.fs_name = fs_name
1685 for mds_uuid in self.devlist:
1686 mds = self.db.lookup(mds_uuid)
1688 panic("MDS not found!")
1689 mdc = MDC(mds, self.uuid, fs_name)
1691 self.mdclist.append(mdc)
1693 panic('mdc not found:', mds_uuid)
1696 if is_prepared(self.name):
1700 for mdc in self.mdclist:
1702 # Only ignore connect failures with --force, which
1703 # isn't implemented here yet.
1704 mdc.prepare(ignore_connect_failure=0)
1705 except CommandError, e:
1706 print "Error preparing LMV %s\n" % mdc.uuid
1709 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1710 string.join(self.devlist))
1713 for mdc in self.mdclist:
1715 if is_prepared(self.name):
1716 Module.cleanup(self)
1718 def add_module(self, manager):
1719 for mdc in self.mdclist:
1720 mdc.add_module(manager)
1722 manager.add_lustre_module('lmv', 'lmv')
1724 def correct_level(self, level, op=None):
1727 class CONFDEV(Module):
1728 def __init__(self, db, name, target_uuid, uuid):
1729 Module.__init__(self, 'CONFDEV', db)
1730 self.devpath = self.db.get_val('devpath','')
1731 self.backdevpath = self.db.get_val('devpath','')
1732 self.size = self.db.get_val_int('devsize', 0)
1733 self.journal_size = self.db.get_val_int('journalsize', 0)
1734 self.fstype = self.db.get_val('fstype', '')
1735 self.backfstype = self.db.get_val('backfstype', '')
1736 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1737 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1738 self.target = self.db.lookup(target_uuid)
1739 self.name = "conf_%s" % self.target.getName()
1740 self.client_uuids = self.target.get_refs('client')
1741 self.obdtype = self.db.get_val('obdtype', '')
1743 self.mds_sec = self.db.get_val('mds_sec', '')
1744 self.oss_sec = self.db.get_val('oss_sec', '')
1745 self.deny_sec = self.db.get_val('deny_sec', '')
1747 if config.mds_mds_sec:
1748 self.mds_sec = config.mds_mds_sec
1749 if config.mds_oss_sec:
1750 self.oss_sec = config.mds_oss_sec
1751 if config.mds_deny_sec:
1753 self.deny_sec = "%s,%s" %(self.deny_sec, config.mds_deny_sec)
1755 self.deny_sec = config.mds_deny_sec
1757 if self.obdtype == None:
1758 self.obdtype = 'dumb'
1760 self.conf_name = name
1761 self.conf_uuid = uuid
1762 self.realdev = self.devpath
1767 lmv_uuid = self.db.get_first_ref('lmv')
1768 if lmv_uuid != None:
1769 self.lmv = self.db.lookup(lmv_uuid)
1770 if self.lmv != None:
1771 self.client_uuids = self.lmv.get_refs('client')
1773 if self.target.get_class() == 'mds':
1774 if self.target.get_val('failover', 0):
1775 self.failover_mds = 'f'
1777 self.failover_mds = 'n'
1778 self.format = self.db.get_val('autoformat', "no")
1780 self.format = self.db.get_val('autoformat', "yes")
1781 self.osdtype = self.db.get_val('osdtype')
1782 ost = self.db.lookup(target_uuid)
1783 if ost.get_val('failover', 0):
1784 self.failover_ost = 'f'
1786 self.failover_ost = 'n'
1788 self.inode_size = self.get_inode_size()
1790 if self.lmv != None:
1791 client_uuid = self.name + "_lmv_UUID"
1792 self.master = LMV(self.lmv, client_uuid,
1793 self.conf_name, self.conf_name)
1795 def get_inode_size(self):
1796 inode_size = self.db.get_val_int('inodesize', 0)
1797 if inode_size == 0 and self.target.get_class() == 'mds':
1799 # default inode size for case when neither LOV either
1800 # LMV is accessible.
1801 self.inode_size = 256
1803 # find the LOV for this MDS
1804 lovconfig_uuid = self.target.get_first_ref('lovconfig')
1805 if lovconfig_uuid or self.lmv != None:
1806 if self.lmv != None:
1807 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1808 lovconfig = self.lmv.lookup(lovconfig_uuid)
1809 lov_uuid = lovconfig.get_first_ref('lov')
1810 if lov_uuid == None:
1811 panic(self.target.getName() + ": No LOV found for lovconfig ",
1814 lovconfig = self.target.lookup(lovconfig_uuid)
1815 lov_uuid = lovconfig.get_first_ref('lov')
1816 if lov_uuid == None:
1817 panic(self.target.getName() + ": No LOV found for lovconfig ",
1819 if self.lmv != None:
1820 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1821 lovconfig = self.lmv.lookup(lovconfig_uuid)
1822 lov_uuid = lovconfig.get_first_ref('lov')
1824 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, self.name,
1827 # default stripe count controls default inode_size
1828 if lov.stripe_cnt > 0:
1829 stripe_count = lov.stripe_cnt
1831 stripe_count = len(lov.devlist)
1832 if stripe_count > 77:
1834 elif stripe_count > 35:
1836 elif stripe_count > 13:
1838 elif stripe_count > 3:
1845 def get_mount_options(self, blkdev):
1846 options = def_mount_options(self.fstype,
1847 self.target.get_class())
1849 if config.mountfsoptions:
1851 options = "%s,%s" %(options, config.mountfsoptions)
1853 options = config.mountfsoptions
1854 if self.mountfsoptions:
1855 options = "%s,%s" %(options, self.mountfsoptions)
1857 if self.mountfsoptions:
1859 options = "%s,%s" %(options, self.mountfsoptions)
1861 options = self.mountfsoptions
1863 if self.fstype == 'smfs':
1865 options = "%s,type=%s,dev=%s" %(options, self.backfstype,
1868 options = "type=%s,dev=%s" %(self.backfstype,
1871 if self.target.get_class() == 'mds':
1873 options = "%s,acl,user_xattr,iopen_nopriv" %(options)
1875 options = "iopen_nopriv"
1880 if is_prepared(self.name):
1883 blkdev = block_dev(self.devpath, self.size, self.fstype,
1884 config.reformat, self.format, self.journal_size,
1885 self.inode_size, self.mkfsoptions, self.backfstype,
1888 if self.fstype == 'smfs':
1893 mountfsoptions = self.get_mount_options(blkdev)
1895 self.info(self.target.get_class(), realdev, mountfsoptions,
1896 self.fstype, self.size, self.format)
1898 lctl.newdev("confobd", self.name, self.uuid,
1899 setup ="%s %s %s" %(realdev, self.fstype,
1902 self.mountfsoptions = mountfsoptions
1903 self.realdev = realdev
1905 def add_module(self, manager):
1906 manager.add_lustre_module('obdclass', 'confobd')
1908 def write_conf(self):
1909 if self.target.get_class() == 'ost':
1911 lctl.clear_log(self.name, self.target.getName() + '-conf')
1912 lctl.record(self.name, self.target.getName() + '-conf')
1913 lctl.newdev(self.osdtype, self.conf_name, self.conf_uuid,
1914 setup ="%s %s %s %s" %(self.realdev, self.fstype,
1916 self.mountfsoptions))
1918 lctl.clear_log(self.name, 'OSS-conf')
1919 lctl.record(self.name, 'OSS-conf')
1920 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
1925 if self.target.get_class() == 'mds':
1926 if self.master != None:
1927 master_name = self.master.name
1929 master_name = 'dumb'
1932 lctl.clear_log(self.name, self.target.getName() + '-conf')
1933 lctl.record(self.name, self.target.getName() + '-conf')
1934 lctl.attach("mds", self.conf_name, self.conf_uuid)
1936 lctl.set_security(self.conf_name, "mds_sec", self.mds_sec)
1938 lctl.set_security(self.conf_name, "oss_sec", self.oss_sec)
1940 for flavor in string.split(self.deny_sec, ','):
1941 lctl.set_security(self.conf_name, "deny_sec", flavor)
1942 lctl.newdev("mds", self.conf_name, self.conf_uuid,
1943 setup ="%s %s %s %s %s %s" %(self.realdev, self.fstype,
1944 self.conf_name, self.mountfsoptions,
1945 master_name, self.obdtype))
1949 if not self.client_uuids:
1952 for uuid in self.client_uuids:
1953 log("recording client:", uuid)
1954 client_uuid = generate_client_uuid(self.name)
1955 client = VOSC(self.db.lookup(uuid), client_uuid,
1956 self.target.getName(), self.name)
1958 lctl.clear_log(self.name, self.target.getName())
1959 lctl.record(self.name, self.target.getName())
1961 lctl.mount_option(self.target.getName(), client.get_name(), "")
1965 lctl.clear_log(self.name, self.target.getName() + '-clean')
1966 lctl.record(self.name, self.target.getName() + '-clean')
1968 lctl.del_mount_option(self.target.getName())
1976 # record logs for each client
1978 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
1980 config_options = CONFIG_FILE
1982 for node_db in self.db.lookup_class('node'):
1983 client_name = node_db.getName()
1984 for prof_uuid in node_db.get_refs('profile'):
1985 prof_db = node_db.lookup(prof_uuid)
1986 # refactor this into a funtion to test "clientness"
1988 for ref_class, ref_uuid in prof_db.get_all_refs():
1989 if ref_class in ('mountpoint','echoclient'):
1990 debug("recording", client_name)
1991 old_noexec = config.noexec
1993 noexec_opt = ('', '-n')
1994 ret, out = run (sys.argv[0],
1995 noexec_opt[old_noexec == 1],
1996 " -v --record --nomod",
1997 "--record_log", client_name,
1998 "--record_device", self.name,
1999 "--node", client_name,
2002 for s in out: log("record> ", string.strip(s))
2003 ret, out = run (sys.argv[0],
2004 noexec_opt[old_noexec == 1],
2005 "--cleanup -v --record --nomod",
2006 "--record_log", client_name + "-clean",
2007 "--record_device", self.name,
2008 "--node", client_name,
2011 for s in out: log("record> ", string.strip(s))
2012 config.noexec = old_noexec
2016 lctl.start(self.name, self.conf_name)
2017 except CommandError, e:
2019 if self.target.get_class() == 'ost':
2020 if not is_prepared('OSS'):
2022 lctl.start(self.name, 'OSS')
2023 except CommandError, e:
2027 if is_prepared(self.name):
2029 lctl.cleanup(self.name, self.uuid, 0, 0)
2030 clean_dev(self.devpath, self.fstype,
2031 self.backfstype, self.backdevpath)
2032 except CommandError, e:
2033 log(self.module_name, "cleanup failed: ", self.name)
2036 Module.cleanup(self)
2038 class MDSDEV(Module):
2039 def __init__(self,db):
2040 Module.__init__(self, 'MDSDEV', db)
2041 self.devpath = self.db.get_val('devpath','')
2042 self.backdevpath = self.db.get_val('devpath','')
2043 self.size = self.db.get_val_int('devsize', 0)
2044 self.journal_size = self.db.get_val_int('journalsize', 0)
2045 self.fstype = self.db.get_val('fstype', '')
2046 self.backfstype = self.db.get_val('backfstype', '')
2047 self.nspath = self.db.get_val('nspath', '')
2048 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2049 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2050 self.obdtype = self.db.get_val('obdtype', '')
2051 self.root_squash = self.db.get_val('root_squash', '')
2052 self.no_root_squash = self.db.get_val('no_root_squash', '')
2054 target_uuid = self.db.get_first_ref('target')
2055 self.target = self.db.lookup(target_uuid)
2056 self.name = self.target.getName()
2060 lmv_uuid = self.db.get_first_ref('lmv')
2061 if lmv_uuid != None:
2062 self.lmv = self.db.lookup(lmv_uuid)
2064 active_uuid = get_active_target(self.target)
2066 panic("No target device found:", target_uuid)
2067 if active_uuid == self.uuid:
2069 group = self.target.get_val('group')
2070 if config.group and config.group != group:
2075 self.uuid = target_uuid
2078 if self.lmv != None:
2079 client_uuid = self.name + "_lmv_UUID"
2080 self.master = LMV(self.lmv, client_uuid,
2081 self.name, self.name)
2083 self.confobd = CONFDEV(self.db, self.name,
2084 target_uuid, self.uuid)
2086 def add_module(self, manager):
2088 manager.add_lustre_module('mdc', 'mdc')
2089 manager.add_lustre_module('osc', 'osc')
2090 manager.add_lustre_module('ost', 'ost')
2091 manager.add_lustre_module('lov', 'lov')
2092 manager.add_lustre_module('mds', 'mds')
2094 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2095 manager.add_lustre_module(self.fstype, self.fstype)
2098 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
2100 # if fstype is smfs, then we should also take care about backing
2102 if self.fstype == 'smfs':
2103 manager.add_lustre_module(self.backfstype, self.backfstype)
2104 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
2106 for option in string.split(self.mountfsoptions, ','):
2107 if option == 'snap':
2108 if not self.fstype == 'smfs':
2109 panic("mountoptions has 'snap', but fstype is not smfs.")
2110 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2111 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2114 if self.master != None:
2115 self.master.add_module(manager)
2117 # add CONFOBD modules
2118 if self.confobd != None:
2119 self.confobd.add_module(manager)
2121 def write_conf(self):
2122 if is_prepared(self.name):
2125 debug(self.uuid, "not active")
2128 self.confobd.prepare()
2129 self.confobd.write_conf()
2130 self.confobd.cleanup()
2133 if is_prepared(self.name):
2136 debug(self.uuid, "not active")
2140 self.confobd.prepare()
2142 self.confobd.write_conf()
2145 if self.master != None:
2146 self.master.prepare()
2148 if not config.record:
2149 self.confobd.start()
2151 if not is_prepared('MDT'):
2152 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
2154 if development_mode():
2155 procentry = "/proc/fs/lustre/mds/lsd_upcall"
2156 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
2157 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
2158 print "MDS Warning: failed to set lsd cache upcall"
2160 run("echo ", upcall, " > ", procentry)
2162 if config.root_squash == None:
2163 config.root_squash = self.root_squash
2164 if config.no_root_squash == None:
2165 config.no_root_squash = self.no_root_squash
2166 if config.root_squash:
2167 if config.no_root_squash:
2168 nsnid = config.no_root_squash
2171 lctl.root_squash(self.name, config.root_squash, nsnid)
2173 def msd_remaining(self):
2174 out = lctl.device_list()
2176 if string.split(s)[2] in ('mds',):
2179 def safe_to_clean(self):
2182 def safe_to_clean_modules(self):
2183 return not self.msd_remaining()
2187 debug(self.uuid, "not active")
2190 if is_prepared(self.name):
2192 lctl.cleanup(self.name, self.uuid, config.force,
2194 except CommandError, e:
2195 log(self.module_name, "cleanup failed: ", self.name)
2198 Module.cleanup(self)
2200 if self.master != None:
2201 self.master.cleanup()
2202 if not self.msd_remaining() and is_prepared('MDT'):
2204 lctl.cleanup("MDT", "MDT_UUID", config.force,
2206 except CommandError, e:
2207 print "cleanup failed: ", self.name
2212 self.confobd.cleanup()
2214 def correct_level(self, level, op=None):
2215 #if self.master != None:
2220 def __init__(self, db):
2221 Module.__init__(self, 'OSD', db)
2222 self.osdtype = self.db.get_val('osdtype')
2223 self.devpath = self.db.get_val('devpath', '')
2224 self.backdevpath = self.db.get_val('devpath', '')
2225 self.size = self.db.get_val_int('devsize', 0)
2226 self.journal_size = self.db.get_val_int('journalsize', 0)
2227 self.inode_size = self.db.get_val_int('inodesize', 0)
2228 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2229 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2230 self.fstype = self.db.get_val('fstype', '')
2231 self.backfstype = self.db.get_val('backfstype', '')
2232 self.nspath = self.db.get_val('nspath', '')
2233 target_uuid = self.db.get_first_ref('target')
2234 ost = self.db.lookup(target_uuid)
2235 self.name = ost.getName()
2236 self.format = self.db.get_val('autoformat', 'yes')
2237 if ost.get_val('failover', 0):
2238 self.failover_ost = 'f'
2240 self.failover_ost = 'n'
2242 self.deny_sec = self.db.get_val('deny_sec', '')
2244 if config.ost_deny_sec:
2246 self.deny_sec = "%s,%s" %(self.deny_sec, config.ost_deny_sec)
2248 self.deny_sec = config.ost_deny_sec
2250 active_uuid = get_active_target(ost)
2252 panic("No target device found:", target_uuid)
2253 if active_uuid == self.uuid:
2255 group = ost.get_val('group')
2256 if config.group and config.group != group:
2261 self.uuid = target_uuid
2262 self.confobd = CONFDEV(self.db, self.name,
2263 target_uuid, self.uuid)
2265 def add_module(self, manager):
2268 manager.add_lustre_module('ost', 'ost')
2270 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2271 manager.add_lustre_module(self.fstype, self.fstype)
2274 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2276 if self.fstype == 'smfs':
2277 manager.add_lustre_module(self.backfstype, self.backfstype)
2278 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2280 for option in self.mountfsoptions:
2281 if option == 'snap':
2282 if not self.fstype == 'smfs':
2283 panic("mountoptions with snap, but fstype is not smfs\n")
2284 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2285 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2287 manager.add_lustre_module(self.osdtype, self.osdtype)
2289 # add CONFOBD modules
2290 if self.confobd != None:
2291 self.confobd.add_module(manager)
2294 if is_prepared(self.name):
2297 debug(self.uuid, "not active")
2302 if self.osdtype == 'obdecho':
2303 self.info(self.osdtype)
2304 lctl.newdev("obdecho", self.name, self.uuid)
2305 if not is_prepared('OSS'):
2306 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup="")
2308 self.confobd.prepare()
2310 self.confobd.write_conf()
2311 if not config.record:
2312 self.confobd.start()
2315 for flavor in string.split(self.deny_sec, ','):
2316 lctl.set_security(self.name, "deny_sec", flavor)
2318 def write_conf(self):
2319 if is_prepared(self.name):
2322 debug(self.uuid, "not active")
2326 if self.osdtype != 'obdecho':
2327 self.confobd.prepare()
2328 self.confobd.write_conf()
2329 if not config.write_conf:
2330 self.confobd.start()
2331 self.confobd.cleanup()
2333 def osd_remaining(self):
2334 out = lctl.device_list()
2336 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2339 def safe_to_clean(self):
2342 def safe_to_clean_modules(self):
2343 return not self.osd_remaining()
2347 debug(self.uuid, "not active")
2350 if is_prepared(self.name):
2353 lctl.cleanup(self.name, self.uuid, config.force,
2355 except CommandError, e:
2356 log(self.module_name, "cleanup failed: ", self.name)
2359 if not self.osd_remaining() and is_prepared('OSS'):
2361 lctl.cleanup("OSS", "OSS_UUID", config.force,
2363 except CommandError, e:
2364 print "cleanup failed: ", self.name
2368 if self.osdtype != 'obdecho':
2370 self.confobd.cleanup()
2372 def correct_level(self, level, op=None):
2375 # Generic client module, used by OSC and MDC
2376 class Client(Module):
2377 def __init__(self, tgtdb, uuid, module, fs_name,
2378 self_name=None, module_dir=None):
2379 self.target_name = tgtdb.getName()
2380 self.target_uuid = tgtdb.getUUID()
2381 self.module_dir = module_dir
2382 self.backup_targets = []
2383 self.module = module
2386 self.tgt_dev_uuid = get_active_target(tgtdb)
2387 if not self.tgt_dev_uuid:
2388 panic("No target device found for target(1):", self.target_name)
2393 self.module = module
2394 self.module_name = string.upper(module)
2396 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2397 self.target_name, fs_name)
2399 self.name = self_name
2401 self.lookup_server(self.tgt_dev_uuid)
2402 self.lookup_backup_targets()
2403 self.fs_name = fs_name
2404 if not self.module_dir:
2405 self.module_dir = module
2407 def add_module(self, manager):
2408 manager.add_lustre_module(self.module_dir, self.module)
2410 def lookup_server(self, srv_uuid):
2411 """ Lookup a server's network information """
2412 self._server_nets = get_ost_net(self.db, srv_uuid)
2413 if len(self._server_nets) == 0:
2414 panic ("Unable to find a server for:", srv_uuid)
2419 def get_servers(self):
2420 return self._server_nets
2422 def lookup_backup_targets(self):
2423 """ Lookup alternative network information """
2424 prof_list = toplustreDB.get_refs('profile')
2425 for prof_uuid in prof_list:
2426 prof_db = toplustreDB.lookup(prof_uuid)
2428 panic("profile:", prof_uuid, "not found.")
2429 for ref_class, ref_uuid in prof_db.get_all_refs():
2430 if ref_class in ('osd', 'mdsdev'):
2431 devdb = toplustreDB.lookup(ref_uuid)
2432 uuid = devdb.get_first_ref('target')
2433 if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
2434 self.backup_targets.append(ref_uuid)
2436 def prepare(self, ignore_connect_failure = 0):
2437 self.info(self.target_uuid)
2438 if not config.record and is_prepared(self.name):
2441 srv = choose_local_server(self.get_servers())
2445 routes = find_route(self.get_servers())
2446 if len(routes) == 0:
2447 panic ("no route to", self.target_uuid)
2448 for (srv, r) in routes:
2449 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2450 except CommandError, e:
2451 if not ignore_connect_failure:
2455 if self.target_uuid in config.inactive and self.permits_inactive():
2456 debug("%s inactive" % self.target_uuid)
2457 inactive_p = "inactive"
2459 debug("%s active" % self.target_uuid)
2461 lctl.newdev(self.module, self.name, self.uuid,
2462 setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
2464 for tgt_dev_uuid in self.backup_targets:
2465 this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
2466 if len(this_nets) == 0:
2467 panic ("Unable to find a server for:", tgt_dev_uuid)
2468 srv = choose_local_server(this_nets)
2472 routes = find_route(this_nets);
2473 if len(routes) == 0:
2474 panic("no route to", tgt_dev_uuid)
2475 for (srv, r) in routes:
2476 lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2478 lctl.add_conn(self.name, srv.nid_uuid);
2481 if is_prepared(self.name):
2482 Module.cleanup(self)
2484 srv = choose_local_server(self.get_servers())
2486 lctl.disconnect(srv)
2488 for (srv, r) in find_route(self.get_servers()):
2489 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2490 except CommandError, e:
2491 log(self.module_name, "cleanup failed: ", self.name)
2495 for tgt_dev_uuid in self.backup_targets:
2496 this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
2497 srv = choose_local_server(this_net)
2499 lctl.disconnect(srv)
2501 for (srv, r) in find_route(this_net):
2502 lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2504 def correct_level(self, level, op=None):
2507 def deactivate(self):
2509 lctl.deactivate(self.name)
2510 except CommandError, e:
2511 log(self.module_name, "deactivate failed: ", self.name)
2516 def __init__(self, db, uuid, fs_name):
2517 Client.__init__(self, db, uuid, 'mdc', fs_name)
2519 def permits_inactive(self):
2523 def __init__(self, db, uuid, fs_name):
2524 Client.__init__(self, db, uuid, 'osc', fs_name)
2526 def permits_inactive(self):
2529 class CMOBD(Module):
2530 def __init__(self, db):
2531 Module.__init__(self, 'CMOBD', db)
2532 self.name = self.db.getName();
2533 self.uuid = generate_client_uuid(self.name)
2534 self.master_uuid = self.db.get_first_ref('masterobd')
2535 self.cache_uuid = self.db.get_first_ref('cacheobd')
2537 master_obd = self.db.lookup(self.master_uuid)
2539 panic('master obd not found:', self.master_uuid)
2541 cache_obd = self.db.lookup(self.cache_uuid)
2543 panic('cache obd not found:', self.cache_uuid)
2548 master_class = master_obd.get_class()
2549 cache_class = cache_obd.get_class()
2551 if master_class == 'ost' or master_class == 'lov':
2552 client_uuid = "%s_lov_master_UUID" % (self.name)
2553 self.master = LOV(master_obd, client_uuid, self.name);
2554 elif master_class == 'mds':
2555 self.master = get_mdc(db, self.name, self.master_uuid)
2556 elif master_class == 'lmv':
2557 #tmp fix: cobd and cmobd will use same uuid, so use const name here
2558 client_uuid = "%s_lmv_master_UUID" % "master"
2559 self.master = LMV(master_obd, client_uuid, self.name);
2561 panic("unknown master obd class '%s'" %(master_class))
2563 if cache_class == 'ost' or cache_class == 'lov':
2564 client_uuid = "%s_lov_cache_UUID" % (self.name)
2565 self.cache = LOV(cache_obd, client_uuid, self.name);
2566 elif cache_class == 'mds':
2567 self.cache = get_mdc(db, self.name, self.cache_uuid)
2568 elif cache_class == 'lmv':
2569 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2570 self.cache = LMV(cache_obd, client_uuid, self.name);
2572 panic("unknown cache obd class '%s'" %(cache_class))
2575 self.master.prepare()
2576 if not config.record and is_prepared(self.name):
2578 self.info(self.master_uuid, self.cache_uuid)
2579 lctl.newdev("cmobd", self.name, self.uuid,
2580 setup ="%s %s" %(self.master.uuid,
2589 def get_master_name(self):
2590 return self.master.name
2592 def get_cache_name(self):
2593 return self.cache.name
2596 if is_prepared(self.name):
2597 Module.cleanup(self)
2599 self.master.cleanup()
2601 def add_module(self, manager):
2602 manager.add_lustre_module('smfs', 'smfs')
2603 manager.add_lustre_module('cmobd', 'cmobd')
2604 self.master.add_module(manager)
2606 def correct_level(self, level, op=None):
2610 def __init__(self, db, uuid, name):
2611 Module.__init__(self, 'COBD', db)
2612 self.name = self.db.getName();
2613 self.uuid = generate_client_uuid(self.name)
2614 self.master_uuid = self.db.get_first_ref('masterobd')
2615 self.cache_uuid = self.db.get_first_ref('cacheobd')
2617 master_obd = self.db.lookup(self.master_uuid)
2619 panic('master obd not found:', self.master_uuid)
2621 cache_obd = self.db.lookup(self.cache_uuid)
2623 panic('cache obd not found:', self.cache_uuid)
2628 master_class = master_obd.get_class()
2629 cache_class = cache_obd.get_class()
2631 if master_class == 'ost' or master_class == 'lov':
2632 client_uuid = "%s_lov_master_UUID" % (self.name)
2633 self.master = LOV(master_obd, client_uuid, name);
2634 elif master_class == 'mds':
2635 self.master = get_mdc(db, name, self.master_uuid)
2636 elif master_class == 'lmv':
2637 #tmp fix: cobd and cmobd will use same uuid, so use const name here
2638 client_uuid = "%s_lmv_master_UUID" % "master"
2639 self.master = LMV(master_obd, client_uuid, self.name);
2641 panic("unknown master obd class '%s'" %(master_class))
2643 if cache_class == 'ost' or cache_class == 'lov':
2644 client_uuid = "%s_lov_cache_UUID" % (self.name)
2645 self.cache = LOV(cache_obd, client_uuid, name);
2646 elif cache_class == 'mds':
2647 self.cache = get_mdc(db, name, self.cache_uuid)
2648 elif cache_class == 'lmv':
2649 client_uuid = "%s_lmv_cache_UUID" % "cache"
2650 self.cache = LMV(cache_obd, client_uuid, self.name);
2652 panic("unknown cache obd class '%s'" %(cache_class))
2660 def get_master_name(self):
2661 return self.master.name
2663 def get_cache_name(self):
2664 return self.cache.name
2667 if not config.record and is_prepared(self.name):
2669 self.master.prepare()
2670 self.cache.prepare()
2671 self.info(self.master_uuid, self.cache_uuid)
2672 lctl.newdev("cobd", self.name, self.uuid,
2673 setup ="%s %s" %(self.master.name,
2677 if is_prepared(self.name):
2678 Module.cleanup(self)
2679 self.master.cleanup()
2680 self.cache.cleanup()
2682 def add_module(self, manager):
2683 manager.add_lustre_module('cobd', 'cobd')
2684 self.master.add_module(manager)
2686 # virtual interface for OSC and LOV
2688 def __init__(self, db, client_uuid, name, name_override = None):
2689 Module.__init__(self, 'VOSC', db)
2690 if db.get_class() == 'lov':
2691 self.osc = LOV(db, client_uuid, name, name_override)
2693 elif db.get_class() == 'cobd':
2694 self.osc = COBD(db, client_uuid, name)
2697 self.osc = OSC(db, client_uuid, name)
2701 return self.osc.get_uuid()
2704 return self.osc.get_name()
2712 def add_module(self, manager):
2713 self.osc.add_module(manager)
2715 def correct_level(self, level, op=None):
2716 return self.osc.correct_level(level, op)
2718 # virtual interface for MDC and LMV
2720 def __init__(self, db, client_uuid, name, name_override = None):
2721 Module.__init__(self, 'VMDC', db)
2722 if db.get_class() == 'lmv':
2723 self.mdc = LMV(db, client_uuid, name, name_override)
2724 elif db.get_class() == 'cobd':
2725 self.mdc = COBD(db, client_uuid, name)
2727 self.mdc = MDC(db, client_uuid, name)
2730 return self.mdc.uuid
2733 return self.mdc.name
2741 def add_module(self, manager):
2742 self.mdc.add_module(manager)
2744 def correct_level(self, level, op=None):
2745 return self.mdc.correct_level(level, op)
2747 class ECHO_CLIENT(Module):
2748 def __init__(self,db):
2749 Module.__init__(self, 'ECHO_CLIENT', db)
2750 self.obd_uuid = self.db.get_first_ref('obd')
2751 obd = self.db.lookup(self.obd_uuid)
2752 self.uuid = generate_client_uuid(self.name)
2753 self.osc = VOSC(obd, self.uuid, self.name)
2756 if not config.record and is_prepared(self.name):
2759 self.osc.prepare() # XXX This is so cheating. -p
2760 self.info(self.obd_uuid)
2762 lctl.newdev("echo_client", self.name, self.uuid,
2763 setup = self.osc.get_name())
2766 if is_prepared(self.name):
2767 Module.cleanup(self)
2770 def add_module(self, manager):
2771 self.osc.add_module(manager)
2772 manager.add_lustre_module('obdecho', 'obdecho')
2774 def correct_level(self, level, op=None):
2777 def generate_client_uuid(name):
2778 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2780 int(random.random() * 1048576),
2781 int(random.random() * 1048576))
2782 return client_uuid[:36]
2784 class Mountpoint(Module):
2785 def __init__(self,db):
2786 Module.__init__(self, 'MTPT', db)
2787 self.path = self.db.get_val('path')
2788 self.clientoptions = self.db.get_val('clientoptions', '')
2789 self.fs_uuid = self.db.get_first_ref('filesystem')
2790 fs = self.db.lookup(self.fs_uuid)
2791 self.mds_uuid = fs.get_first_ref('lmv')
2792 if not self.mds_uuid:
2793 self.mds_uuid = fs.get_first_ref('mds')
2794 self.obd_uuid = fs.get_first_ref('obd')
2795 client_uuid = generate_client_uuid(self.name)
2797 self.oss_sec = self.db.get_val('oss_sec','null')
2798 self.mds_sec = self.db.get_val('mds_sec','null')
2800 self.mds_sec = config.mds_sec
2802 self.oss_sec = config.oss_sec
2804 ost = self.db.lookup(self.obd_uuid)
2806 panic("no ost: ", self.obd_uuid)
2808 mds = self.db.lookup(self.mds_uuid)
2810 panic("no mds: ", self.mds_uuid)
2812 self.vosc = VOSC(ost, client_uuid, self.name, self.name)
2813 self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
2816 if not config.record and fs_is_mounted(self.path):
2817 log(self.path, "already mounted.")
2824 self.info(self.path, self.mds_uuid, self.obd_uuid)
2825 if config.record or config.lctl_dump:
2826 lctl.mount_option(local_node_name, self.vosc.get_name(),
2827 self.vmdc.get_name())
2830 if config.clientoptions:
2831 if self.clientoptions:
2832 self.clientoptions = self.clientoptions + ',' + config.clientoptions
2834 self.clientoptions = config.clientoptions
2835 if self.clientoptions:
2836 self.clientoptions = ',' + self.clientoptions
2837 # Linux kernel will deal with async and not pass it to ll_fill_super,
2838 # so replace it with Lustre async
2839 self.clientoptions = string.replace(self.clientoptions, "async", "lasync")
2841 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,mds_sec=%s,oss_sec=%s%s %s %s" % \
2842 (self.vosc.get_name(), self.vmdc.get_name(), self.mds_sec,
2843 self.oss_sec, self.clientoptions, config.config, self.path)
2844 run("mkdir", self.path)
2849 panic("mount failed:", self.path, ":", string.join(val))
2852 self.info(self.path, self.mds_uuid,self.obd_uuid)
2854 if config.record or config.lctl_dump:
2855 lctl.del_mount_option(local_node_name)
2857 if fs_is_mounted(self.path):
2859 (rc, out) = run("umount", "-f", self.path)
2861 (rc, out) = run("umount", self.path)
2863 raise CommandError('umount', out, rc)
2865 if fs_is_mounted(self.path):
2866 panic("fs is still mounted:", self.path)
2871 def add_module(self, manager):
2872 self.vosc.add_module(manager)
2873 self.vmdc.add_module(manager)
2874 manager.add_lustre_module('llite', 'llite')
2876 def correct_level(self, level, op=None):
2879 # ============================================================
2880 # misc query functions
2882 def get_ost_net(self, osd_uuid):
2886 osd = self.lookup(osd_uuid)
2887 node_uuid = osd.get_first_ref('node')
2888 node = self.lookup(node_uuid)
2890 panic("unable to find node for osd_uuid:", osd_uuid,
2891 " node_ref:", node_uuid_)
2892 for net_uuid in node.get_networks():
2893 db = node.lookup(net_uuid)
2894 srv_list.append(Network(db))
2897 # the order of iniitailization is based on level.
2898 def getServiceLevel(self):
2899 type = self.get_class()
2901 if type in ('network',):
2903 elif type in ('routetbl',):
2905 elif type in ('ldlm',):
2907 elif type in ('osd', 'cobd'):
2909 elif type in ('mdsdev',):
2911 elif type in ('lmv',):
2913 elif type in ('mountpoint', 'echoclient'):
2915 elif type in ('cmobd',):
2918 panic("Unknown type: ", type)
2920 if ret < config.minlevel or ret > config.maxlevel:
2925 # return list of services in a profile. list is a list of tuples
2926 # [(level, db_object),]
2927 def getServices(self):
2929 for ref_class, ref_uuid in self.get_all_refs():
2930 servdb = self.lookup(ref_uuid)
2932 level = getServiceLevel(servdb)
2934 list.append((level, servdb))
2936 panic('service not found: ' + ref_uuid)
2942 ############################################################
2944 # FIXME: clean this mess up!
2946 # OSC is no longer in the xml, so we have to fake it.
2947 # this is getting ugly and begging for another refactoring
2948 def get_osc(ost_db, uuid, fs_name):
2949 osc = OSC(ost_db, uuid, fs_name)
2952 def get_mdc(db, fs_name, mds_uuid):
2953 mds_db = db.lookup(mds_uuid);
2955 error("no mds:", mds_uuid)
2956 mdc = MDC(mds_db, mds_uuid, fs_name)
2959 ############################################################
2960 # routing ("rooting")
2962 # list of (nettype, cluster_id, nid)
2965 def find_local_clusters(node_db):
2966 global local_clusters
2967 for netuuid in node_db.get_networks():
2968 net = node_db.lookup(netuuid)
2970 debug("add_local", netuuid)
2971 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2973 if not acceptors.has_key(srv.port):
2974 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
2976 # This node is a gateway.
2978 def node_is_router():
2981 # If there are any routers found in the config, then this will be true
2982 # and all nodes will load kptlrouter.
2984 def node_needs_router():
2985 return needs_router or is_router
2987 # list of (nettype, gw, tgt_cluster_id, lo, hi)
2988 # Currently, these local routes are only added to kptlrouter route
2989 # table if they are needed to connect to a specific server. This
2990 # should be changed so all available routes are loaded, and the
2991 # ptlrouter can make all the decisions.
2994 def find_local_routes(lustre):
2995 """ Scan the lustre config looking for routers . Build list of
2997 global local_routes, needs_router
2999 list = lustre.lookup_class('node')
3001 if router.get_val_int('router', 0):
3003 for (local_type, local_cluster_id, local_nid) in local_clusters:
3005 for netuuid in router.get_networks():
3006 db = router.lookup(netuuid)
3007 if (local_type == db.get_val('nettype') and
3008 local_cluster_id == db.get_val('clusterid')):
3009 gw = db.get_val('nid')
3012 debug("find_local_routes: gw is", gw)
3013 for route in router.get_local_routes(local_type, gw):
3014 local_routes.append(route)
3015 debug("find_local_routes:", local_routes)
3018 def choose_local_server(srv_list):
3019 for srv in srv_list:
3020 if local_cluster(srv.net_type, srv.cluster_id):
3023 def local_cluster(net_type, cluster_id):
3024 for cluster in local_clusters:
3025 if net_type == cluster[0] and cluster_id == cluster[1]:
3029 def local_interface(net_type, cluster_id, nid):
3030 for cluster in local_clusters:
3031 if (net_type == cluster[0] and cluster_id == cluster[1]
3032 and nid == cluster[2]):
3036 def find_route(srv_list):
3038 frm_type = local_clusters[0][0]
3039 for srv in srv_list:
3040 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
3041 to_type = srv.net_type
3043 cluster_id = srv.cluster_id
3044 debug ('looking for route to', to_type, to)
3045 for r in local_routes:
3046 debug("find_route: ", r)
3047 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
3048 result.append((srv, r))
3051 def get_active_target(db):
3052 target_uuid = db.getUUID()
3053 target_name = db.getName()
3054 node_name = get_select(target_name)
3056 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
3058 tgt_dev_uuid = db.get_first_ref('active')
3061 def get_server_by_nid_uuid(db, nid_uuid):
3062 for n in db.lookup_class("network"):
3064 if net.nid_uuid == nid_uuid:
3068 ############################################################
3072 type = db.get_class()
3073 debug('Service:', type, db.getName(), db.getUUID())
3078 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3079 elif type == 'network':
3081 elif type == 'routetbl':
3085 elif type == 'cobd':
3086 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3087 elif type == 'cmobd':
3089 elif type == 'mdsdev':
3091 elif type == 'mountpoint':
3093 elif type == 'echoclient':
3098 panic ("unknown service type:", type)
3102 # Prepare the system to run lustre using a particular profile
3103 # in a the configuration.
3104 # * load & the modules
3105 # * setup networking for the current node
3106 # * make sure partitions are in place and prepared
3107 # * initialize devices with lctl
3108 # Levels is important, and needs to be enforced.
3109 def for_each_profile(db, prof_list, operation):
3110 for prof_uuid in prof_list:
3111 prof_db = db.lookup(prof_uuid)
3113 panic("profile:", prof_uuid, "not found.")
3114 services = getServices(prof_db)
3117 def magic_get_osc(db, rec, lov):
3119 lov_uuid = lov.get_uuid()
3120 lov_name = lov.osc.fs_name
3122 lov_uuid = rec.getAttribute('lov_uuidref')
3123 # FIXME: better way to find the mountpoint?
3124 filesystems = db.root_node.getElementsByTagName('filesystem')
3126 for fs in filesystems:
3127 ref = fs.getElementsByTagName('obd_ref')
3128 if ref[0].getAttribute('uuidref') == lov_uuid:
3129 fsuuid = fs.getAttribute('uuid')
3133 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
3135 mtpts = db.root_node.getElementsByTagName('mountpoint')
3138 ref = fs.getElementsByTagName('filesystem_ref')
3139 if ref[0].getAttribute('uuidref') == fsuuid:
3140 lov_name = fs.getAttribute('name')
3144 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
3146 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3148 ost_uuid = rec.getAttribute('ost_uuidref')
3149 obd = db.lookup(ost_uuid)
3152 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
3154 osc = get_osc(obd, lov_uuid, lov_name)
3156 panic('osc not found:', obd_uuid)
3159 # write logs for update records. sadly, logs of all types -- and updates in
3160 # particular -- are something of an afterthought. lconf needs rewritten with
3161 # these as core concepts. so this is a pretty big hack.
3162 def process_update_record(db, update, lov):
3163 for rec in update.childNodes:
3164 if rec.nodeType != rec.ELEMENT_NODE:
3167 log("found "+rec.nodeName+" record in update version " +
3168 str(update.getAttribute('version')))
3170 lov_uuid = rec.getAttribute('lov_uuidref')
3171 ost_uuid = rec.getAttribute('ost_uuidref')
3172 index = rec.getAttribute('index')
3173 gen = rec.getAttribute('generation')
3175 if not lov_uuid or not ost_uuid or not index or not gen:
3176 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
3179 tmplov = db.lookup(lov_uuid)
3181 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
3182 lov_name = tmplov.getName()
3184 lov_name = lov.osc.name
3186 # ------------------------------------------------------------- add
3187 if rec.nodeName == 'add':
3189 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3192 osc = magic_get_osc(db, rec, lov)
3195 # Only ignore connect failures with --force, which
3196 # isn't implemented here yet.
3197 osc.prepare(ignore_connect_failure=0)
3198 except CommandError, e:
3199 print "Error preparing OSC %s\n" % osc.uuid
3202 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3204 # ------------------------------------------------------ deactivate
3205 elif rec.nodeName == 'deactivate':
3209 osc = magic_get_osc(db, rec, lov)
3213 except CommandError, e:
3214 print "Error deactivating OSC %s\n" % osc.uuid
3217 # ---------------------------------------------------------- delete
3218 elif rec.nodeName == 'delete':
3222 osc = magic_get_osc(db, rec, lov)
3228 except CommandError, e:
3229 print "Error cleaning up OSC %s\n" % osc.uuid
3232 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3234 def process_updates(db, log_device, log_name, lov = None):
3235 updates = db.root_node.getElementsByTagName('update')
3237 if not u.childNodes:
3238 log("ignoring empty update record (version " +
3239 str(u.getAttribute('version')) + ")")
3242 version = u.getAttribute('version')
3243 real_name = "%s-%s" % (log_name, version)
3244 lctl.clear_log(log_device, real_name)
3245 lctl.record(log_device, real_name)
3247 process_update_record(db, u, lov)
3251 def doWriteconf(services):
3255 if s[1].get_class() == 'mdsdev' or s[1].get_class() == 'osd':
3256 n = newService(s[1])
3260 def doSetup(services):
3265 n = newService(s[1])
3267 slist.append((n.level, n))
3270 nl = n[1].correct_level(n[0])
3271 nlist.append((nl, n[1]))
3276 def doLoadModules(services):
3280 # adding all needed modules from all services
3282 n = newService(s[1])
3283 n.add_module(mod_manager)
3285 # loading all registered modules
3286 mod_manager.load_modules()
3288 def doUnloadModules(services):
3292 # adding all needed modules from all services
3294 n = newService(s[1])
3295 if n.safe_to_clean_modules():
3296 n.add_module(mod_manager)
3298 # unloading all registered modules
3299 mod_manager.cleanup_modules()
3301 def doCleanup(services):
3307 n = newService(s[1])
3309 slist.append((n.level, n))
3312 nl = n[1].correct_level(n[0])
3313 nlist.append((nl, n[1]))
3318 if n[1].safe_to_clean():
3323 def doHost(lustreDB, hosts):
3324 global is_router, local_node_name
3327 node_db = lustreDB.lookup_name(h, 'node')
3331 panic('No host entry found.')
3333 local_node_name = node_db.get_val('name', 0)
3334 is_router = node_db.get_val_int('router', 0)
3335 lustre_upcall = node_db.get_val('lustreUpcall', '')
3336 portals_upcall = node_db.get_val('portalsUpcall', '')
3337 timeout = node_db.get_val_int('timeout', 0)
3338 ptldebug = node_db.get_val('ptldebug', '')
3339 subsystem = node_db.get_val('subsystem', '')
3341 find_local_clusters(node_db)
3343 find_local_routes(lustreDB)
3345 # Two step process: (1) load modules, (2) setup lustre
3346 # if not cleaning, load modules first.
3347 prof_list = node_db.get_refs('profile')
3349 if config.write_conf:
3350 for_each_profile(node_db, prof_list, doLoadModules)
3352 for_each_profile(node_db, prof_list, doWriteconf)
3353 for_each_profile(node_db, prof_list, doUnloadModules)
3356 elif config.recover:
3357 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3358 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3359 "--client_uuid <UUID> --conn_uuid <UUID>")
3360 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3362 elif config.cleanup:
3364 # the command line can override this value
3366 # ugly hack, only need to run lctl commands for --dump
3367 if config.lctl_dump or config.record:
3368 for_each_profile(node_db, prof_list, doCleanup)
3371 sys_set_timeout(timeout)
3372 sys_set_ptldebug(ptldebug)
3373 sys_set_subsystem(subsystem)
3374 sys_set_lustre_upcall(lustre_upcall)
3375 sys_set_portals_upcall(portals_upcall)
3377 for_each_profile(node_db, prof_list, doCleanup)
3378 for_each_profile(node_db, prof_list, doUnloadModules)
3382 # ugly hack, only need to run lctl commands for --dump
3383 if config.lctl_dump or config.record:
3384 sys_set_timeout(timeout)
3385 sys_set_lustre_upcall(lustre_upcall)
3386 for_each_profile(node_db, prof_list, doSetup)
3390 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3391 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3393 for_each_profile(node_db, prof_list, doLoadModules)
3395 sys_set_debug_path()
3396 sys_set_ptldebug(ptldebug)
3397 sys_set_subsystem(subsystem)
3398 script = config.gdb_script
3399 run(lctl.lctl, ' modules >', script)
3401 log ("The GDB module script is in", script)
3402 # pause, so user has time to break and
3405 sys_set_timeout(timeout)
3406 sys_set_lustre_upcall(lustre_upcall)
3407 sys_set_portals_upcall(portals_upcall)
3409 for_each_profile(node_db, prof_list, doSetup)
3412 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3413 tgt = lustreDB.lookup(tgt_uuid)
3415 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3416 new_uuid = get_active_target(tgt)
3418 raise Lustre.LconfError("doRecovery: no active target found for: " +
3420 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3422 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3424 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3426 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3429 lctl.disconnect(oldnet)
3430 except CommandError, e:
3431 log("recover: disconnect", nid_uuid, "failed: ")
3436 except CommandError, e:
3437 log("recover: connect failed")
3440 lctl.recover(client_uuid, net.nid_uuid)
3443 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3444 base = os.path.dirname(cmd)
3445 if development_mode():
3446 if not config.lustre:
3447 debug('using objdir module paths')
3448 config.lustre = (os.path.join(base, ".."))
3449 # normalize the portals dir, using command line arg if set
3451 portals_dir = config.portals
3452 dir = os.path.join(config.lustre, portals_dir)
3453 config.portals = dir
3454 debug('config.portals', config.portals)
3455 elif config.lustre and config.portals:
3457 # if --lustre and --portals, normalize portals
3458 # can ignore POTRALS_DIR here, since it is probly useless here
3459 config.portals = os.path.join(config.lustre, config.portals)
3460 debug('config.portals B', config.portals)
3462 def sysctl(path, val):
3463 debug("+ sysctl", path, val)
3467 fp = open(os.path.join('/proc/sys', path), 'w')
3474 def sys_set_debug_path():
3475 sysctl('portals/debug_path', config.debug_path)
3477 def sys_set_lustre_upcall(upcall):
3478 # the command overrides the value in the node config
3479 if config.lustre_upcall:
3480 upcall = config.lustre_upcall
3482 upcall = config.upcall
3484 lctl.set_lustre_upcall(upcall)
3486 def sys_set_portals_upcall(upcall):
3487 # the command overrides the value in the node config
3488 if config.portals_upcall:
3489 upcall = config.portals_upcall
3491 upcall = config.upcall
3493 sysctl('portals/upcall', upcall)
3495 def sys_set_timeout(timeout):
3496 # the command overrides the value in the node config
3497 if config.timeout and config.timeout > 0:
3498 timeout = config.timeout
3499 if timeout != None and timeout > 0:
3500 lctl.set_timeout(timeout)
3502 def sys_tweak_socknal ():
3503 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3504 if sys_get_branch() == '2.6':
3505 fp = open('/proc/meminfo')
3506 lines = fp.readlines()
3511 if a[0] == 'MemTotal:':
3513 debug("memtotal" + memtotal)
3514 if int(memtotal) < 262144:
3515 minfree = int(memtotal) / 16
3518 debug("+ minfree ", minfree)
3519 sysctl("vm/min_free_kbytes", minfree)
3520 if config.single_socket:
3521 sysctl("socknal/typed", 0)
3523 def sys_optimize_elan ():
3524 procfiles = ["/proc/elan/config/eventint_punt_loops",
3525 "/proc/qsnet/elan3/config/eventint_punt_loops",
3526 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3528 if os.access(p, os.W_OK):
3529 run ("echo 1 > " + p)
3531 def sys_set_ptldebug(ptldebug):
3533 ptldebug = config.ptldebug
3536 val = eval(ptldebug, ptldebug_names)
3537 val = "0x%x" % (val & 0xffffffffL)
3538 sysctl('portals/debug', val)
3539 except NameError, e:
3542 def sys_set_subsystem(subsystem):
3543 if config.subsystem:
3544 subsystem = config.subsystem
3547 val = eval(subsystem, subsystem_names)
3548 val = "0x%x" % (val & 0xffffffffL)
3549 sysctl('portals/subsystem_debug', val)
3550 except NameError, e:
3553 def sys_set_netmem_max(path, max):
3554 debug("setting", path, "to at least", max)
3562 fp = open(path, 'w')
3563 fp.write('%d\n' %(max))
3566 def sys_make_devices():
3567 if not os.access('/dev/portals', os.R_OK):
3568 run('mknod /dev/portals c 10 240')
3569 if not os.access('/dev/obd', os.R_OK):
3570 run('mknod /dev/obd c 10 241')
3572 # Add dir to the global PATH, if not already there.
3573 def add_to_path(new_dir):
3574 syspath = string.split(os.environ['PATH'], ':')
3575 if new_dir in syspath:
3577 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3579 def default_debug_path():
3580 path = '/tmp/lustre-log'
3581 if os.path.isdir('/r'):
3586 def default_gdb_script():
3587 script = '/tmp/ogdb'
3588 if os.path.isdir('/r'):
3589 return '/r' + script
3593 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3594 # ensure basic elements are in the system path
3595 def sanitise_path():
3596 for dir in DEFAULT_PATH:
3599 # global hack for the --select handling
3601 def init_select(args):
3602 # args = [service=nodeA,service2=nodeB service3=nodeC]
3605 list = string.split(arg, ',')
3607 srv, node = string.split(entry, '=')
3608 tgt_select[srv] = node
3610 def get_select(srv):
3611 if tgt_select.has_key(srv):
3612 return tgt_select[srv]
3616 FLAG = Lustre.Options.FLAG
3617 PARAM = Lustre.Options.PARAM
3618 INTPARAM = Lustre.Options.INTPARAM
3619 PARAMLIST = Lustre.Options.PARAMLIST
3621 ('verbose,v', "Print system commands as they are run"),
3622 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3623 ('config', "Cluster config name used for LDAP query", PARAM),
3624 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3625 ('node', "Load config for <nodename>", PARAM),
3626 ('sec',"security flavor <null|krb5i|krb5p> between this client with mds", PARAM),
3627 ('mds_sec',"security flavor <null|krb5i|krb5p> between this client with mds", PARAM),
3628 ('oss_sec',"security flavor <null|krb5i|krb5p> between this client with ost", PARAM),
3629 ('mds_mds_sec',"security flavor <null|krb5i|krb5p> between this mds with other mds", PARAM),
3630 ('mds_oss_sec',"security flavor <null|krb5i|krb5p> between this mds with ost", PARAM),
3631 ('mds_deny_sec', "security flavor <null|krb5i|krb5p> denied by this mds", PARAM),
3632 ('ost_deny_sec', "security flavor <null|krb5i|krb5p> denied by this ost", PARAM),
3633 ('cleanup,d', "Cleans up config. (Shutdown)"),
3634 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3636 ('single_socket', "socknal option: only use one socket instead of bundle",
3638 ('failover',"""Used to shut down without saving state.
3639 This will allow this node to "give up" a service to a
3640 another node for failover purposes. This will not
3641 be a clean shutdown.""",
3643 ('gdb', """Prints message after creating gdb module script
3644 and sleeps for 5 seconds."""),
3645 ('noexec,n', """Prints the commands and steps that will be run for a
3646 config without executing them. This can used to check if a
3647 config file is doing what it should be doing"""),
3648 ('nomod', "Skip load/unload module step."),
3649 ('nosetup', "Skip device setup/cleanup step."),
3650 ('reformat', "Reformat all devices (without question)"),
3651 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3652 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3653 ('clientoptions', "Additional options for Lustre", PARAM),
3654 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3656 ('write_conf', "Save all the client config information on mds."),
3657 ('record', "Write config information on mds."),
3658 ('record_log', "Name of config record log.", PARAM),
3659 ('record_device', "MDS device name that will record the config commands",
3661 ('root_squash', "MDS squash root to appointed uid",
3663 ('no_root_squash', "Don't squash root for appointed nid",
3665 ('minlevel', "Minimum level of services to configure/cleanup",
3667 ('maxlevel', """Maximum level of services to configure/cleanup
3668 Levels are aproximatly like:
3673 70 - mountpoint, echo_client, osc, mdc, lov""",
3675 ('lustre', """Base directory of lustre sources. This parameter will
3676 cause lconf to load modules from a source tree.""", PARAM),
3677 ('portals', """Portals source directory. If this is a relative path,
3678 then it is assumed to be relative to lustre. """, PARAM),
3679 ('timeout', "Set recovery timeout", INTPARAM),
3680 ('upcall', "Set both portals and lustre upcall script", PARAM),
3681 ('lustre_upcall', "Set lustre upcall script", PARAM),
3682 ('portals_upcall', "Set portals upcall script", PARAM),
3683 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3684 ('ptldebug', "Set the portals debug level", PARAM),
3685 ('subsystem', "Set the portals debug subsystem", PARAM),
3686 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3687 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3688 # Client recovery options
3689 ('recover', "Recover a device"),
3690 ('group', "The group of devices to configure or cleanup", PARAM),
3691 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3692 ('client_uuid', "The failed client (required for recovery)", PARAM),
3693 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3695 ('inactive', """The name of an inactive service, to be ignored during
3696 mounting (currently OST-only). Can be repeated.""",
3701 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3703 # in the upcall this is set to SIG_IGN
3704 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3706 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3708 config, args = cl.parse(sys.argv[1:])
3709 except Lustre.OptionError, e:
3713 setupModulePath(sys.argv[0])
3715 host = socket.gethostname()
3717 # the PRNG is normally seeded with time(), which is not so good for starting
3718 # time-synchronized clusters
3719 input = open('/dev/urandom', 'r')
3721 print 'Unable to open /dev/urandom!'
3723 seed = input.read(32)
3729 init_select(config.select)
3732 # allow config to be fetched via HTTP, but only with python2
3733 if sys.version[0] != '1' and args[0].startswith('http://'):
3736 config_file = urllib2.urlopen(args[0])
3737 except (urllib2.URLError, socket.error), err:
3738 if hasattr(err, 'args'):
3740 print "Could not access '%s': %s" %(args[0], err)
3742 elif not os.access(args[0], os.R_OK):
3743 print 'File not found or readable:', args[0]
3747 config_file = open(args[0], 'r')
3749 dom = xml.dom.minidom.parse(config_file)
3751 panic("%s does not appear to be a config file." % (args[0]))
3752 sys.exit(1) # make sure to die here, even in debug mode.
3754 CONFIG_FILE = args[0]
3755 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3756 if not config.config:
3757 config.config = os.path.basename(args[0])# use full path?
3758 if config.config[-4:] == '.xml':
3759 config.config = config.config[:-4]
3760 elif config.ldapurl:
3761 if not config.config:
3762 panic("--ldapurl requires --config name")
3763 dn = "config=%s,fs=lustre" % (config.config)
3764 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3765 elif config.ptldebug or config.subsystem:
3766 sys_set_ptldebug(None)
3767 sys_set_subsystem(None)
3770 print 'Missing config file or ldap URL.'
3771 print 'see lconf --help for command summary'
3774 toplustreDB = lustreDB
3776 ver = lustreDB.get_version()
3778 panic("No version found in config data, please recreate.")
3779 if ver != Lustre.CONFIG_VERSION:
3780 panic("Config version", ver, "does not match lconf version",
3781 Lustre.CONFIG_VERSION)
3785 node_list.append(config.node)
3788 node_list.append(host)
3789 node_list.append('localhost')
3791 debug("configuring for host: ", node_list)
3794 config.debug_path = config.debug_path + '-' + host
3795 config.gdb_script = config.gdb_script + '-' + host
3797 lctl = LCTLInterface('lctl')
3799 if config.lctl_dump:
3800 lctl.use_save_file(config.lctl_dump)
3803 if not (config.record_device and config.record_log):
3804 panic("When recording, both --record_log and --record_device must be specified.")
3805 lctl.clear_log(config.record_device, config.record_log)
3806 lctl.record(config.record_device, config.record_log)
3808 # init module manager
3809 mod_manager = kmod_manager(config.lustre, config.portals)
3811 doHost(lustreDB, node_list)
3813 if not config.record:
3818 process_updates(lustreDB, config.record_device, config.record_log)
3820 if __name__ == "__main__":
3823 except Lustre.LconfError, e:
3825 # traceback.print_exc(file=sys.stdout)
3827 except CommandError, e:
3831 if first_cleanup_error:
3832 sys.exit(first_cleanup_error)