3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = '../portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
93 "console" : (1 << 25),
99 "undefined" : (1 << 0),
109 "portals" : (1 << 10),
111 "pinger" : (1 << 12),
112 "filter" : (1 << 13),
117 "ptlrouter" : (1 << 18),
121 "confobd" : (1 << 22),
128 first_cleanup_error = 0
129 def cleanup_error(rc):
130 global first_cleanup_error
131 if not first_cleanup_error:
132 first_cleanup_error = rc
134 # ============================================================
135 # debugging and error funcs
137 def fixme(msg = "this feature"):
138 raise Lustre.LconfError, msg + ' not implemented yet.'
141 msg = string.join(map(str,args))
142 if not config.noexec:
143 raise Lustre.LconfError(msg)
148 msg = string.join(map(str,args))
153 print string.strip(s)
157 msg = string.join(map(str,args))
160 # ack, python's builtin int() does not support '0x123' syntax.
161 # eval can do it, although what a hack!
165 return eval(s, {}, {})
168 except SyntaxError, e:
169 raise ValueError("not a number")
171 raise ValueError("not a number")
173 # ============================================================
174 # locally defined exceptions
175 class CommandError (exceptions.Exception):
176 def __init__(self, cmd_name, cmd_err, rc=None):
177 self.cmd_name = cmd_name
178 self.cmd_err = cmd_err
183 if type(self.cmd_err) == types.StringType:
185 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
187 print "! %s: %s" % (self.cmd_name, self.cmd_err)
188 elif type(self.cmd_err) == types.ListType:
190 print "! %s (error %d):" % (self.cmd_name, self.rc)
192 print "! %s:" % (self.cmd_name)
193 for s in self.cmd_err:
194 print "> %s" %(string.strip(s))
199 # ============================================================
200 # handle daemons, like the acceptor
202 """ Manage starting and stopping a daemon. Assumes daemon manages
203 it's own pid file. """
205 def __init__(self, cmd):
211 log(self.command, "already running.")
213 self.path = find_prog(self.command)
215 panic(self.command, "not found.")
216 ret, out = runcmd(self.path +' '+ self.command_line())
218 raise CommandError(self.path, out, ret)
222 pid = self.read_pidfile()
225 log ("killing process", pid)
228 log("was unable to find pid of " + self.command)
229 #time.sleep(1) # let daemon die
231 log("unable to kill", self.command, e)
233 log("unable to kill", self.command)
236 pid = self.read_pidfile()
242 log("was unable to find pid of " + self.command)
249 def read_pidfile(self):
251 fp = open(self.pidfile(), 'r')
261 def clean_pidfile(self):
262 """ Remove a stale pidfile """
263 log("removing stale pidfile:", self.pidfile())
265 os.unlink(self.pidfile())
267 log(self.pidfile(), e)
269 class AcceptorHandler(DaemonHandler):
270 def __init__(self, port, net_type):
271 DaemonHandler.__init__(self, "acceptor")
276 return "/var/run/%s-%d.pid" % (self.command, self.port)
278 def command_line(self):
279 return string.join(map(str,(self.flags, self.port)))
283 # start the acceptors
285 if config.lctl_dump or config.record:
287 for port in acceptors.keys():
288 daemon = acceptors[port]
289 if not daemon.running():
292 def run_one_acceptor(port):
293 if config.lctl_dump or config.record:
295 if acceptors.has_key(port):
296 daemon = acceptors[port]
297 if not daemon.running():
300 panic("run_one_acceptor: No acceptor defined for port:", port)
302 def stop_acceptor(port):
303 if acceptors.has_key(port):
304 daemon = acceptors[port]
309 # ============================================================
310 # handle lctl interface
313 Manage communication with lctl
316 def __init__(self, cmd):
318 Initialize close by finding the lctl binary.
320 self.lctl = find_prog(cmd)
322 self.record_device = ''
325 debug('! lctl not found')
328 raise CommandError('lctl', "unable to find lctl binary.")
330 def use_save_file(self, file):
331 self.save_file = file
333 def record(self, dev_name, logname):
334 log("Recording log", logname, "on", dev_name)
335 self.record_device = dev_name
336 self.record_log = logname
338 def end_record(self):
339 log("End recording log", self.record_log, "on", self.record_device)
340 self.record_device = None
341 self.record_log = None
343 def set_nonblock(self, fd):
344 fl = fcntl.fcntl(fd, F_GETFL)
345 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
350 the cmds are written to stdin of lctl
351 lctl doesn't return errors when run in script mode, so
353 should modify command line to accept multiple commands, or
354 create complex command line options
358 cmds = '\n dump ' + self.save_file + '\n' + cmds
359 elif self.record_device:
363 %s""" % (self.record_device, self.record_log, cmds)
365 debug("+", cmd_line, cmds)
366 if config.noexec: return (0, [])
368 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
369 child.tochild.write(cmds + "\n")
370 child.tochild.close()
371 # print "LCTL:", cmds
373 # From "Python Cookbook" from O'Reilly
374 outfile = child.fromchild
375 outfd = outfile.fileno()
376 self.set_nonblock(outfd)
377 errfile = child.childerr
378 errfd = errfile.fileno()
379 self.set_nonblock(errfd)
381 outdata = errdata = ''
384 ready = select.select([outfd,errfd],[],[]) # Wait for input
385 if outfd in ready[0]:
386 outchunk = outfile.read()
387 if outchunk == '': outeof = 1
388 outdata = outdata + outchunk
389 if errfd in ready[0]:
390 errchunk = errfile.read()
391 if errchunk == '': erreof = 1
392 errdata = errdata + errchunk
393 if outeof and erreof: break
394 # end of "borrowed" code
397 if os.WIFEXITED(ret):
398 rc = os.WEXITSTATUS(ret)
401 if rc or len(errdata):
402 raise CommandError(self.lctl, errdata, rc)
405 def runcmd(self, *args):
407 run lctl using the command line
409 cmd = string.join(map(str,args))
410 debug("+", self.lctl, cmd)
411 rc, out = run(self.lctl, cmd)
413 raise CommandError(self.lctl, out, rc)
416 def clear_log(self, dev, log):
417 """ clear an existing log """
422 quit """ % (dev, log)
425 def root_squash(self, name, uid, nid):
429 quit""" % (name, uid, nid)
432 def network(self, net, nid):
437 quit """ % (net, nid)
441 def add_interface(self, net, ip, netmask = ""):
442 """ add an interface """
446 quit """ % (net, ip, netmask)
449 # delete an interface
450 def del_interface(self, net, ip):
451 """ delete an interface """
458 # create a new connection
459 def add_uuid(self, net_type, uuid, nid):
460 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
463 def add_peer(self, net_type, nid, hostaddr, port):
464 if net_type in ('tcp','openib','ra') and not config.lctl_dump:
469 nid, hostaddr, port )
471 elif net_type in ('iib',) and not config.lctl_dump:
478 elif net_type in ('vib',) and not config.lctl_dump:
486 def connect(self, srv):
487 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
488 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
490 hostaddr = string.split(srv.hostaddr[0], '/')[0]
491 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
494 def recover(self, dev_name, new_conn):
497 recover %s""" %(dev_name, new_conn)
500 # add a route to a range
501 def add_route(self, net, gw, lo, hi):
509 except CommandError, e:
513 def del_route(self, net, gw, lo, hi):
518 quit """ % (net, gw, lo, hi)
521 # add a route to a host
522 def add_route_host(self, net, uuid, gw, tgt):
523 self.add_uuid(net, uuid, tgt)
531 except CommandError, e:
535 # add a route to a range
536 def del_route_host(self, net, uuid, gw, tgt):
542 quit """ % (net, gw, tgt)
546 def del_peer(self, net_type, nid, hostaddr):
547 if net_type in ('tcp',) and not config.lctl_dump:
551 del_peer %s %s single_share
555 elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump:
559 del_peer %s single_share
564 # disconnect one connection
565 def disconnect(self, srv):
566 self.del_uuid(srv.nid_uuid)
567 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
569 hostaddr = string.split(srv.hostaddr[0], '/')[0]
570 self.del_peer(srv.net_type, srv.nid, hostaddr)
572 def del_uuid(self, uuid):
580 def disconnectAll(self, net):
588 def attach(self, type, name, uuid):
591 quit""" % (type, name, uuid)
594 def detach(self, name):
601 def set_security(self, name, key, value):
605 quit""" % (name, key, value)
608 def setup(self, name, setup = ""):
612 quit""" % (name, setup)
615 def add_conn(self, name, conn_uuid):
619 quit""" % (name, conn_uuid)
622 def start(self, name, conf_name):
626 quit""" % (name, conf_name)
629 # create a new device with lctl
630 def newdev(self, type, name, uuid, setup = ""):
632 self.attach(type, name, uuid);
634 self.setup(name, setup)
635 except CommandError, e:
636 self.cleanup(name, uuid, 0)
640 def cleanup(self, name, uuid, force, failover = 0):
641 if failover: force = 1
647 quit""" % (name, ('', 'force')[force],
648 ('', 'failover')[failover])
652 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
653 stripe_sz, stripe_off, pattern, devlist = None):
656 lov_setup %s %d %d %d %s %s
657 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
661 # add an OBD to a LOV
662 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
664 lov_modify_tgts add %s %s %s %s
665 quit""" % (name, obd_uuid, index, gen)
669 def lmv_setup(self, name, uuid, desc_uuid, devlist):
673 quit""" % (name, uuid, desc_uuid, devlist)
676 # delete an OBD from a LOV
677 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
679 lov_modify_tgts del %s %s %s %s
680 quit""" % (name, obd_uuid, index, gen)
684 def deactivate(self, name):
692 def dump(self, dump_file):
695 quit""" % (dump_file)
698 # get list of devices
699 def device_list(self):
700 devices = '/proc/fs/lustre/devices'
702 if os.access(devices, os.R_OK):
704 fp = open(devices, 'r')
712 def lustre_version(self):
713 rc, out = self.runcmd('version')
717 def mount_option(self, profile, osc, mdc):
719 mount_option %s %s %s
720 quit""" % (profile, osc, mdc)
723 # delete mount options
724 def del_mount_option(self, profile):
730 def set_timeout(self, timeout):
736 def set_lustre_upcall(self, upcall):
741 # ============================================================
742 # Various system-level functions
743 # (ideally moved to their own module)
745 # Run a command and return the output and status.
746 # stderr is sent to /dev/null, could use popen3 to
747 # save it if necessary
750 if config.noexec: return (0, [])
751 f = os.popen(cmd + ' 2>&1')
761 cmd = string.join(map(str,args))
764 # Run a command in the background.
765 def run_daemon(*args):
766 cmd = string.join(map(str,args))
768 if config.noexec: return 0
769 f = os.popen(cmd + ' 2>&1')
777 # Determine full path to use for an external command
778 # searches dirname(argv[0]) first, then PATH
780 syspath = string.split(os.environ['PATH'], ':')
781 cmdpath = os.path.dirname(sys.argv[0])
782 syspath.insert(0, cmdpath);
784 syspath.insert(0, os.path.join(config.portals, 'utils/'))
786 prog = os.path.join(d,cmd)
787 if os.access(prog, os.X_OK):
791 # Recursively look for file starting at base dir
792 def do_find_file(base, mod):
793 fullname = os.path.join(base, mod)
794 if os.access(fullname, os.R_OK):
796 for d in os.listdir(base):
797 dir = os.path.join(base,d)
798 if os.path.isdir(dir):
799 module = do_find_file(dir, mod)
803 # is the path a block device?
810 return stat.S_ISBLK(s[stat.ST_MODE])
812 # find the journal device from mkfs options
818 while i < len(x) - 1:
819 if x[i] == '-J' and x[i+1].startswith('device='):
825 # build fs according to type
827 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
833 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
835 # devsize is in 1k, and fs block count is in 4k
836 block_cnt = devsize/4
838 if fstype in ('ext3', 'extN', 'ldiskfs'):
839 # ext3 journal size is in megabytes
840 # but don't set jsize if mkfsoptions indicates a separate journal device
841 if jsize == 0 and jdev(mkfsoptions) == '':
843 if not is_block(dev):
844 ret, out = runcmd("ls -l %s" %dev)
845 devsize = int(string.split(out[0])[4]) / 1024
847 # sfdisk works for symlink, hardlink, and realdev
848 ret, out = runcmd("sfdisk -s %s" %dev)
850 devsize = int(out[0])
852 # sfdisk -s will fail for too large block device,
853 # then, read the size of partition from /proc/partitions
855 # get the realpath of the device
856 # it may be the real device, such as /dev/hda7
857 # or the hardlink created via mknod for a device
858 if 'realpath' in dir(os.path):
859 real_dev = os.path.realpath(dev)
863 while os.path.islink(real_dev) and (link_count < 20):
864 link_count = link_count + 1
865 dev_link = os.readlink(real_dev)
866 if os.path.isabs(dev_link):
869 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
871 panic("Entountered too many symbolic links resolving block device:", dev)
873 # get the major and minor number of the realpath via ls
874 # it seems python(os.stat) does not return
875 # the st_rdev member of the stat structure
876 ret, out = runcmd("ls -l %s" %real_dev)
877 major = string.split(string.split(out[0])[4], ",")[0]
878 minor = string.split(out[0])[5]
880 # get the devsize from /proc/partitions with the major and minor number
881 ret, out = runcmd("cat /proc/partitions")
884 if string.split(line)[0] == major and string.split(line)[1] == minor:
885 devsize = int(string.split(line)[2])
888 if devsize > 1024 * 1024:
889 jsize = ((devsize / 102400) * 4)
892 if jsize: jopt = "-J size=%d" %(jsize,)
893 if isize: iopt = "-I %d" %(isize,)
894 mkfs = 'mkfs.ext2 -j -b 4096 '
895 if not isblock or config.force:
897 if jdev(mkfsoptions) != '':
898 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
900 jmkfs = jmkfs + '-F '
901 jmkfs = jmkfs + jdev(mkfsoptions)
902 (ret, out) = run (jmkfs)
904 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
905 elif fstype == 'reiserfs':
906 # reiserfs journal size is in blocks
907 if jsize: jopt = "--journal_size %d" %(jsize,)
908 mkfs = 'mkreiserfs -ff'
910 panic('unsupported fs type: ', fstype)
912 if config.mkfsoptions != None:
913 mkfs = mkfs + ' ' + config.mkfsoptions
914 if mkfsoptions != None:
915 mkfs = mkfs + ' ' + mkfsoptions
916 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
918 panic("Unable to build fs:", dev, string.join(out))
919 # enable hash tree indexing on fsswe
920 if fstype in ('ext3', 'extN', 'ldiskfs'):
921 htree = 'echo "feature FEATURE_C5" | debugfs -w'
922 (ret, out) = run (htree, dev)
924 panic("Unable to enable htree:", dev)
926 # some systems use /dev/loopN, some /dev/loop/N
930 if not os.access(loop + str(0), os.R_OK):
932 if not os.access(loop + str(0), os.R_OK):
933 panic ("can't access loop devices")
936 # find loop device assigned to the file
937 def find_assigned_loop(file):
939 for n in xrange(0, MAX_LOOP_DEVICES):
941 if os.access(dev, os.R_OK):
942 (stat, out) = run('losetup', dev)
943 if out and stat == 0:
944 m = re.search(r'\((.*)\)', out[0])
945 if m and file == m.group(1):
949 # find free loop device
950 def find_free_loop(file):
953 # find next free loop
954 for n in xrange(0, MAX_LOOP_DEVICES):
956 if os.access(dev, os.R_OK):
957 (stat, out) = run('losetup', dev)
962 # create file if necessary and assign the first free loop device
963 def init_loop(file, size, fstype, journal_size, inode_size,
964 mkfsoptions, reformat, autoformat, backfstype, backfile):
967 realfstype = backfstype
968 if is_block(backfile):
969 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
970 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
976 dev = find_assigned_loop(realfile)
978 print 'WARNING: file', realfile, 'already mapped to', dev
981 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
982 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
984 panic("Unable to create backing store:", realfile)
985 mkfs(realfile, size, realfstype, journal_size, inode_size,
986 mkfsoptions, isblock=0)
988 dev = find_free_loop(realfile)
990 print "attach " + realfile + " <-> " + dev
991 run('losetup', dev, realfile)
994 print "out of loop devices"
997 # undo loop assignment
998 def clean_loop(dev, fstype, backfstype, backdev):
1003 if not is_block(realfile):
1004 dev = find_assigned_loop(realfile)
1006 print "detach " + dev + " <-> " + realfile
1007 ret, out = run('losetup -d', dev)
1009 log('unable to clean loop device', dev, 'for file', realfile)
1012 # finilizes passed device
1013 def clean_dev(dev, fstype, backfstype, backdev):
1014 if fstype == 'smfs' or not is_block(dev):
1015 clean_loop(dev, fstype, backfstype, backdev)
1017 # determine if dev is formatted as a <fstype> filesystem
1018 def need_format(fstype, dev):
1019 # FIXME don't know how to implement this
1022 # initialize a block device if needed
1023 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
1024 inode_size, mkfsoptions, backfstype, backdev):
1028 if fstype == 'smfs' or not is_block(dev):
1029 dev = init_loop(dev, size, fstype, journal_size, inode_size,
1030 mkfsoptions, reformat, autoformat, backfstype, backdev)
1031 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
1032 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
1035 # panic("device:", dev,
1036 # "not prepared, and autoformat is not set.\n",
1037 # "Rerun with --reformat option to format ALL filesystems")
1042 """lookup IP address for an interface"""
1043 rc, out = run("/sbin/ifconfig", iface)
1046 addr = string.split(out[1])[1]
1047 ip = string.split(addr, ':')[1]
1050 def def_mount_options(fstype, target):
1051 """returns deafult mount options for passed fstype and target (mds, ost)"""
1052 if fstype == 'ext3' or fstype == 'ldiskfs':
1053 mountfsoptions = "errors=remount-ro"
1054 if target == 'ost' and sys_get_branch() == '2.4':
1055 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1056 if target == 'ost' and sys_get_branch() == '2.6':
1057 mountfsoptions = "%s,extents,mballoc" % (mountfsoptions)
1058 return mountfsoptions
1061 def sys_get_elan_position_file():
1062 procfiles = ["/proc/elan/device0/position",
1063 "/proc/qsnet/elan4/device0/position",
1064 "/proc/qsnet/elan3/device0/position"]
1066 if os.access(p, os.R_OK):
1070 def sys_get_local_nid(net_type, wildcard, cluster_id):
1071 """Return the local nid."""
1073 if sys_get_elan_position_file():
1074 local = sys_get_local_address('elan', '*', cluster_id)
1076 local = sys_get_local_address(net_type, wildcard, cluster_id)
1079 def sys_get_local_address(net_type, wildcard, cluster_id):
1080 """Return the local address for the network type."""
1082 if net_type in ('tcp','openib','iib','vib','ra'):
1084 iface, star = string.split(wildcard, ':')
1085 local = if2addr(iface)
1087 panic ("unable to determine ip for:", wildcard)
1089 host = socket.gethostname()
1090 local = socket.gethostbyname(host)
1091 elif net_type == 'elan':
1092 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1093 f = sys_get_elan_position_file()
1095 panic ("unable to determine local Elan ID")
1098 lines = fp.readlines()
1102 if a[0] == 'NodeId':
1106 nid = my_int(cluster_id) + my_int(elan_id)
1107 local = "%d" % (nid)
1108 except ValueError, e:
1112 elif net_type == 'lo':
1113 fixme("automatic local address for loopback")
1114 elif net_type == 'gm':
1115 fixme("automatic local address for GM")
1119 def sys_get_branch():
1120 """Returns kernel release"""
1122 fp = open('/proc/sys/kernel/osrelease')
1123 lines = fp.readlines()
1127 version = string.split(l)
1128 a = string.split(version[0], '.')
1129 return a[0] + '.' + a[1]
1134 # XXX: instead of device_list, ask for $name and see what we get
1135 def is_prepared(name):
1136 """Return true if a device exists for the name"""
1137 if config.lctl_dump:
1139 if (config.noexec or config.record) and config.cleanup:
1142 # expect this format:
1143 # 1 UP ldlm ldlm ldlm_UUID 2
1144 out = lctl.device_list()
1146 if name == string.split(s)[3]:
1148 except CommandError, e:
1152 def net_is_prepared():
1153 """If the any device exists, then assume that all networking
1154 has been configured"""
1155 out = lctl.device_list()
1158 def fs_is_mounted(path):
1159 """Return true if path is a mounted lustre filesystem"""
1161 fp = open('/proc/mounts')
1162 lines = fp.readlines()
1166 if a[1] == path and a[2] == 'lustre_lite':
1172 def kmod_find(src_dir, dev_dir, modname):
1173 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1174 for modext in '.ko', '.o':
1175 module = modbase + modext
1177 if os.access(module, os.R_OK):
1183 def kmod_info(modname):
1184 """Returns reference count for passed module name."""
1186 fp = open('/proc/modules')
1187 lines = fp.readlines()
1190 # please forgive my tired fingers for this one
1191 ret = filter(lambda word, mod = modname: word[0] == mod,
1192 map(lambda line: string.split(line), lines))
1196 except Exception, e:
1200 """Presents kernel module"""
1201 def __init__(self, src_dir, dev_dir, name):
1202 self.src_dir = src_dir
1203 self.dev_dir = dev_dir
1206 # FIXME we ignore the failure of loading gss module, because we might
1207 # don't need it at all.
1210 log ('loading module:', self.name, 'srcdir',
1211 self.src_dir, 'devdir', self.dev_dir)
1213 module = kmod_find(self.src_dir, self.dev_dir,
1215 if not module and self.name != 'ptlrpcs_gss':
1216 panic('module not found:', self.name)
1217 (rc, out) = run('/sbin/insmod', module)
1219 if self.name == 'ptlrpcs_gss':
1220 print "Warning: not support gss security!"
1222 raise CommandError('insmod', out, rc)
1224 (rc, out) = run('/sbin/modprobe', self.name)
1226 if self.name == 'ptlrpcs_gss':
1227 print "Warning: not support gss security!"
1229 raise CommandError('modprobe', out, rc)
1233 log('unloading module:', self.name)
1234 (rc, out) = run('/sbin/rmmod', self.name)
1236 log('unable to unload module:', self.name +
1237 "(" + self.refcount() + ")")
1241 """Returns module info if any."""
1242 return kmod_info(self.name)
1245 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1252 """Returns module refcount."""
1259 """Returns 1 if module is used, otherwise 0 is returned."""
1265 if users and users != '(unused)' and users != '-':
1273 """Returns 1 if module is busy, otherwise 0 is returned."""
1274 if self.loaded() and (self.used() or self.refcount() != '0'):
1280 """Manage kernel modules"""
1281 def __init__(self, lustre_dir, portals_dir):
1282 self.lustre_dir = lustre_dir
1283 self.portals_dir = portals_dir
1284 self.kmodule_list = []
1286 def find_module(self, modname):
1287 """Find module by module name"""
1288 for mod in self.kmodule_list:
1289 if mod.name == modname:
1293 def add_portals_module(self, dev_dir, modname):
1294 """Append a module to list of modules to load."""
1296 mod = self.find_module(modname)
1298 mod = kmod(self.portals_dir, dev_dir, modname)
1299 self.kmodule_list.append(mod)
1301 def add_lustre_module(self, dev_dir, modname):
1302 """Append a module to list of modules to load."""
1304 mod = self.find_module(modname)
1306 mod = kmod(self.lustre_dir, dev_dir, modname)
1307 self.kmodule_list.append(mod)
1309 def load_modules(self):
1310 """Load all the modules in the list in the order they appear."""
1311 for mod in self.kmodule_list:
1312 if mod.loaded() and not config.noexec:
1316 def cleanup_modules(self):
1317 """Unload the modules in the list in reverse order."""
1318 rev = self.kmodule_list
1321 if (not mod.loaded() or mod.busy()) and not config.noexec:
1324 if mod.name == 'portals' and config.dump:
1325 lctl.dump(config.dump)
1328 # ============================================================
1329 # Classes to prepare and cleanup the various objects
1332 """ Base class for the rest of the modules. The default cleanup method is
1333 defined here, as well as some utilitiy funcs.
1335 def __init__(self, module_name, db):
1337 self.module_name = module_name
1338 self.name = self.db.getName()
1339 self.uuid = self.db.getUUID()
1343 def info(self, *args):
1344 msg = string.join(map(str,args))
1345 print self.module_name + ":", self.name, self.uuid, msg
1348 """ default cleanup, used for most modules """
1351 lctl.cleanup(self.name, self.uuid, config.force)
1352 except CommandError, e:
1353 log(self.module_name, "cleanup failed: ", self.name)
1357 def add_module(self, manager):
1358 """Adds all needed modules in the order they appear."""
1361 def safe_to_clean(self):
1364 def safe_to_clean_modules(self):
1365 return self.safe_to_clean()
1367 class Network(Module):
1368 def __init__(self,db):
1369 Module.__init__(self, 'NETWORK', db)
1370 self.net_type = self.db.get_val('nettype')
1371 self.nid = self.db.get_val('nid', '*')
1372 self.cluster_id = self.db.get_val('clusterid', "0")
1373 self.port = self.db.get_val_int('port', 0)
1376 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1378 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1379 self.generic_nid = 1
1380 debug("nid:", self.nid)
1382 self.generic_nid = 0
1384 self.nid_uuid = self.nid_to_uuid(self.nid)
1385 self.hostaddr = self.db.get_hostaddr()
1386 if len(self.hostaddr) == 0:
1387 self.hostaddr.append(self.nid)
1388 if '*' in self.hostaddr[0]:
1389 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1390 if not self.hostaddr[0]:
1391 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1392 debug("hostaddr:", self.hostaddr[0])
1394 def add_module(self, manager):
1395 manager.add_portals_module("libcfs", 'libcfs')
1396 manager.add_portals_module("portals", 'portals')
1398 if node_needs_router():
1399 manager.add_portals_module("router", 'kptlrouter')
1400 if self.net_type == 'tcp':
1401 manager.add_portals_module("knals/socknal", 'ksocknal')
1402 if self.net_type == 'elan':
1403 manager.add_portals_module("knals/qswnal", 'kqswnal')
1404 if self.net_type == 'gm':
1405 manager.add_portals_module("knals/gmnal", 'kgmnal')
1406 if self.net_type == 'openib':
1407 manager.add_portals_module("knals/openibnal", 'kopenibnal')
1408 if self.net_type == 'iib':
1409 manager.add_portals_module("knals/iibnal", 'kiibnal')
1410 if self.net_type == 'vib':
1411 self.add_portals_module("knals/vibnal", 'kvibnal')
1412 if self.net_type == 'lo':
1413 manager.add_portals_module("knals/lonal", 'klonal')
1414 if self.net_type == 'ra':
1415 manager.add_portals_module("knals/ranal", 'kranal')
1417 def nid_to_uuid(self, nid):
1418 return "NID_%s_UUID" %(nid,)
1421 if not config.record and net_is_prepared():
1423 self.info(self.net_type, self.nid, self.port)
1424 if not (config.record and self.generic_nid):
1425 lctl.network(self.net_type, self.nid)
1426 if self.net_type == 'tcp':
1428 for hostaddr in self.db.get_hostaddr():
1429 ip = string.split(hostaddr, '/')[0]
1430 if len(string.split(hostaddr, '/')) == 2:
1431 netmask = string.split(hostaddr, '/')[1]
1434 lctl.add_interface(self.net_type, ip, netmask)
1435 if self.net_type == 'elan':
1437 if self.port and node_is_router():
1438 run_one_acceptor(self.port)
1439 self.connect_peer_gateways()
1441 def connect_peer_gateways(self):
1442 for router in self.db.lookup_class('node'):
1443 if router.get_val_int('router', 0):
1444 for netuuid in router.get_networks():
1445 net = self.db.lookup(netuuid)
1447 if (gw.cluster_id == self.cluster_id and
1448 gw.net_type == self.net_type):
1449 if gw.nid != self.nid:
1452 def disconnect_peer_gateways(self):
1453 for router in self.db.lookup_class('node'):
1454 if router.get_val_int('router', 0):
1455 for netuuid in router.get_networks():
1456 net = self.db.lookup(netuuid)
1458 if (gw.cluster_id == self.cluster_id and
1459 gw.net_type == self.net_type):
1460 if gw.nid != self.nid:
1463 except CommandError, e:
1464 print "disconnect failed: ", self.name
1468 def safe_to_clean(self):
1469 return not net_is_prepared()
1472 self.info(self.net_type, self.nid, self.port)
1474 stop_acceptor(self.port)
1475 if node_is_router():
1476 self.disconnect_peer_gateways()
1477 if self.net_type == 'tcp':
1478 for hostaddr in self.db.get_hostaddr():
1479 ip = string.split(hostaddr, '/')[0]
1480 lctl.del_interface(self.net_type, ip)
1482 def correct_level(self, level, op=None):
1485 class RouteTable(Module):
1486 def __init__(self,db):
1487 Module.__init__(self, 'ROUTES', db)
1489 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1491 # only setup connections for tcp, openib, and iib NALs
1493 if not net_type in ('tcp','openib','iib','vib','ra'):
1496 # connect to target if route is to single node and this node is the gw
1497 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1498 if not local_cluster(net_type, tgt_cluster_id):
1499 panic("target", lo, " not on the local cluster")
1500 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1501 # connect to gateway if this node is not the gw
1502 elif (local_cluster(net_type, gw_cluster_id)
1503 and not local_interface(net_type, gw_cluster_id, gw)):
1504 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1509 panic("no server for nid", lo)
1512 return Network(srvdb)
1515 if not config.record and net_is_prepared():
1518 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1519 lctl.add_route(net_type, gw, lo, hi)
1520 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1524 def safe_to_clean(self):
1525 return not net_is_prepared()
1528 if net_is_prepared():
1529 # the network is still being used, don't clean it up
1531 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1532 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1535 lctl.disconnect(srv)
1536 except CommandError, e:
1537 print "disconnect failed: ", self.name
1542 lctl.del_route(net_type, gw, lo, hi)
1543 except CommandError, e:
1544 print "del_route failed: ", self.name
1548 class Management(Module):
1549 def __init__(self, db):
1550 Module.__init__(self, 'MGMT', db)
1552 def add_module(self, manager):
1553 manager.add_lustre_module('lvfs', 'lvfs')
1554 manager.add_lustre_module('obdclass', 'obdclass')
1555 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1556 manager.add_lustre_module('mgmt', 'mgmt_svc')
1559 if not config.record and is_prepared(self.name):
1562 lctl.newdev("mgmt", self.name, self.uuid)
1564 def safe_to_clean(self):
1568 if is_prepared(self.name):
1569 Module.cleanup(self)
1571 def correct_level(self, level, op=None):
1574 # This is only needed to load the modules; the LDLM device
1575 # is now created automatically.
1577 def __init__(self,db):
1578 Module.__init__(self, 'LDLM', db)
1580 def add_module(self, manager):
1581 manager.add_lustre_module('lvfs', 'lvfs')
1582 manager.add_lustre_module('obdclass', 'obdclass')
1583 manager.add_lustre_module('sec', 'ptlrpcs')
1584 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1585 manager.add_lustre_module('sec/gss', 'ptlrpcs_gss')
1593 def correct_level(self, level, op=None):
1597 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1598 Module.__init__(self, 'LOV', db)
1599 if name_override != None:
1600 self.name = "lov_%s" % name_override
1601 self.mds_uuid = self.db.get_first_ref('mds')
1602 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1603 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1604 self.pattern = self.db.get_val_int('stripepattern', 0)
1605 self.devlist = self.db.get_lov_tgts('lov_tgt')
1606 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1609 self.desc_uuid = self.uuid
1610 self.uuid = generate_client_uuid(self.name)
1611 self.fs_name = fs_name
1613 self.config_only = 1
1615 self.config_only = None
1616 mds = self.db.lookup(self.mds_uuid)
1617 self.mds_name = mds.getName()
1618 for (obd_uuid, index, gen, active) in self.devlist:
1621 self.obdlist.append(obd_uuid)
1622 obd = self.db.lookup(obd_uuid)
1623 osc = get_osc(obd, self.uuid, fs_name)
1625 self.osclist.append((osc, index, gen, active))
1627 panic('osc not found:', obd_uuid)
1633 if not config.record and is_prepared(self.name):
1635 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1636 self.stripe_off, self.pattern, self.devlist,
1638 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1639 self.stripe_sz, self.stripe_off, self.pattern,
1640 string.join(self.obdlist))
1641 for (osc, index, gen, active) in self.osclist:
1642 target_uuid = osc.target_uuid
1644 # Only ignore connect failures with --force, which
1645 # isn't implemented here yet.
1647 osc.prepare(ignore_connect_failure=0)
1648 except CommandError, e:
1649 print "Error preparing OSC %s\n" % osc.uuid
1651 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1654 for (osc, index, gen, active) in self.osclist:
1655 target_uuid = osc.target_uuid
1657 if is_prepared(self.name):
1658 Module.cleanup(self)
1659 if self.config_only:
1660 panic("Can't clean up config_only LOV ", self.name)
1662 def add_module(self, manager):
1663 if self.config_only:
1664 panic("Can't load modules for config_only LOV ", self.name)
1665 for (osc, index, gen, active) in self.osclist:
1666 osc.add_module(manager)
1668 manager.add_lustre_module('lov', 'lov')
1670 def correct_level(self, level, op=None):
1674 def __init__(self, db, uuid, fs_name, name_override = None):
1675 Module.__init__(self, 'LMV', db)
1676 if name_override != None:
1677 self.name = "lmv_%s" % name_override
1679 self.devlist = self.db.get_lmv_tgts('lmv_tgt')
1680 if self.devlist == None:
1681 self.devlist = self.db.get_refs('mds')
1684 self.desc_uuid = self.uuid
1686 self.fs_name = fs_name
1687 for mds_uuid in self.devlist:
1688 mds = self.db.lookup(mds_uuid)
1690 panic("MDS not found!")
1691 mdc = MDC(mds, self.uuid, fs_name)
1693 self.mdclist.append(mdc)
1695 panic('mdc not found:', mds_uuid)
1698 if is_prepared(self.name):
1702 for mdc in self.mdclist:
1704 # Only ignore connect failures with --force, which
1705 # isn't implemented here yet.
1706 mdc.prepare(ignore_connect_failure=0)
1707 except CommandError, e:
1708 print "Error preparing LMV %s\n" % mdc.uuid
1711 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1712 string.join(self.devlist))
1715 for mdc in self.mdclist:
1717 if is_prepared(self.name):
1718 Module.cleanup(self)
1720 def add_module(self, manager):
1721 for mdc in self.mdclist:
1722 mdc.add_module(manager)
1724 manager.add_lustre_module('lmv', 'lmv')
1726 def correct_level(self, level, op=None):
1729 class CONFDEV(Module):
1730 def __init__(self, db, name, target_uuid, uuid):
1731 Module.__init__(self, 'CONFDEV', db)
1732 self.devpath = self.db.get_val('devpath','')
1733 self.backdevpath = self.db.get_val('devpath','')
1734 self.size = self.db.get_val_int('devsize', 0)
1735 self.journal_size = self.db.get_val_int('journalsize', 0)
1736 self.fstype = self.db.get_val('fstype', '')
1737 self.backfstype = self.db.get_val('backfstype', '')
1738 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1739 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1740 self.target = self.db.lookup(target_uuid)
1741 self.name = "conf_%s" % self.target.getName()
1742 self.client_uuids = self.target.get_refs('client')
1743 self.obdtype = self.db.get_val('obdtype', '')
1745 self.mds_sec = self.db.get_val('mds_sec', '')
1746 self.oss_sec = self.db.get_val('oss_sec', '')
1747 self.deny_sec = self.db.get_val('deny_sec', '')
1749 if config.mds_mds_sec:
1750 self.mds_sec = config.mds_mds_sec
1751 if config.mds_oss_sec:
1752 self.oss_sec = config.mds_oss_sec
1753 if config.mds_deny_sec:
1755 self.deny_sec = "%s,%s" %(self.deny_sec, config.mds_deny_sec)
1757 self.deny_sec = config.mds_deny_sec
1759 if self.obdtype == None:
1760 self.obdtype = 'dumb'
1762 self.conf_name = name
1763 self.conf_uuid = uuid
1764 self.realdev = self.devpath
1769 lmv_uuid = self.db.get_first_ref('lmv')
1770 if lmv_uuid != None:
1771 self.lmv = self.db.lookup(lmv_uuid)
1772 if self.lmv != None:
1773 self.client_uuids = self.lmv.get_refs('client')
1775 if self.target.get_class() == 'mds':
1776 if self.target.get_val('failover', 0):
1777 self.failover_mds = 'f'
1779 self.failover_mds = 'n'
1780 self.format = self.db.get_val('autoformat', "no")
1782 self.format = self.db.get_val('autoformat', "yes")
1783 self.osdtype = self.db.get_val('osdtype')
1784 ost = self.db.lookup(target_uuid)
1785 if ost.get_val('failover', 0):
1786 self.failover_ost = 'f'
1788 self.failover_ost = 'n'
1790 self.inode_size = self.get_inode_size()
1792 if self.lmv != None:
1793 client_uuid = self.name + "_lmv_UUID"
1794 self.master = LMV(self.lmv, client_uuid,
1795 self.conf_name, self.conf_name)
1797 def get_inode_size(self):
1798 inode_size = self.db.get_val_int('inodesize', 0)
1799 if inode_size == 0 and self.target.get_class() == 'mds':
1801 # default inode size for case when neither LOV either
1802 # LMV is accessible.
1803 self.inode_size = 256
1805 # find the LOV for this MDS
1806 lovconfig_uuid = self.target.get_first_ref('lovconfig')
1807 if lovconfig_uuid or self.lmv != None:
1808 if self.lmv != None:
1809 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1810 lovconfig = self.lmv.lookup(lovconfig_uuid)
1811 lov_uuid = lovconfig.get_first_ref('lov')
1812 if lov_uuid == None:
1813 panic(self.target.getName() + ": No LOV found for lovconfig ",
1816 lovconfig = self.target.lookup(lovconfig_uuid)
1817 lov_uuid = lovconfig.get_first_ref('lov')
1818 if lov_uuid == None:
1819 panic(self.target.getName() + ": No LOV found for lovconfig ",
1821 if self.lmv != None:
1822 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1823 lovconfig = self.lmv.lookup(lovconfig_uuid)
1824 lov_uuid = lovconfig.get_first_ref('lov')
1826 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, self.name,
1829 # default stripe count controls default inode_size
1830 if lov.stripe_cnt > 0:
1831 stripe_count = lov.stripe_cnt
1833 stripe_count = len(lov.devlist)
1834 if stripe_count > 77:
1836 elif stripe_count > 35:
1838 elif stripe_count > 13:
1840 #elif stripe_count > 3:
1847 def get_mount_options(self, blkdev):
1848 options = def_mount_options(self.fstype,
1849 self.target.get_class())
1851 if config.mountfsoptions:
1853 options = "%s,%s" %(options, config.mountfsoptions)
1855 options = config.mountfsoptions
1856 if self.mountfsoptions:
1857 options = "%s,%s" %(options, self.mountfsoptions)
1859 if self.mountfsoptions:
1861 options = "%s,%s" %(options, self.mountfsoptions)
1863 options = self.mountfsoptions
1865 if self.fstype == 'smfs':
1867 options = "%s,type=%s,dev=%s" %(options, self.backfstype,
1870 options = "type=%s,dev=%s" %(self.backfstype,
1873 if self.target.get_class() == 'mds':
1875 options = "%s,acl,user_xattr,iopen_nopriv" %(options)
1877 options = "iopen_nopriv"
1882 if is_prepared(self.name):
1885 blkdev = block_dev(self.devpath, self.size, self.fstype,
1886 config.reformat, self.format, self.journal_size,
1887 self.inode_size, self.mkfsoptions, self.backfstype,
1890 if self.fstype == 'smfs':
1895 mountfsoptions = self.get_mount_options(blkdev)
1897 self.info(self.target.get_class(), realdev, mountfsoptions,
1898 self.fstype, self.size, self.format)
1900 lctl.newdev("confobd", self.name, self.uuid,
1901 setup ="%s %s %s" %(realdev, self.fstype,
1904 self.mountfsoptions = mountfsoptions
1905 self.realdev = realdev
1907 def add_module(self, manager):
1908 manager.add_lustre_module('obdclass', 'confobd')
1910 def write_conf(self):
1911 if self.target.get_class() == 'ost':
1913 lctl.clear_log(self.name, self.target.getName() + '-conf')
1914 lctl.record(self.name, self.target.getName() + '-conf')
1915 lctl.newdev(self.osdtype, self.conf_name, self.conf_uuid,
1916 setup ="%s %s %s %s" %(self.realdev, self.fstype,
1918 self.mountfsoptions))
1920 lctl.clear_log(self.name, 'OSS-conf')
1921 lctl.record(self.name, 'OSS-conf')
1922 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
1927 if self.target.get_class() == 'mds':
1928 if self.master != None:
1929 master_name = self.master.name
1931 master_name = 'dumb'
1934 lctl.clear_log(self.name, self.target.getName() + '-conf')
1935 lctl.record(self.name, self.target.getName() + '-conf')
1936 lctl.attach("mds", self.conf_name, self.conf_uuid)
1938 lctl.set_security(self.conf_name, "mds_sec", self.mds_sec)
1940 lctl.set_security(self.conf_name, "oss_sec", self.oss_sec)
1942 for flavor in string.split(self.deny_sec, ','):
1943 lctl.set_security(self.conf_name, "deny_sec", flavor)
1944 lctl.newdev("mds", self.conf_name, self.conf_uuid,
1945 setup ="%s %s %s %s %s %s" %(self.realdev, self.fstype,
1946 self.conf_name, self.mountfsoptions,
1947 master_name, self.obdtype))
1951 if not self.client_uuids:
1954 for uuid in self.client_uuids:
1955 log("recording client:", uuid)
1956 client_uuid = generate_client_uuid(self.name)
1957 client = VOSC(self.db.lookup(uuid), client_uuid,
1958 self.target.getName(), self.name)
1960 lctl.clear_log(self.name, self.target.getName())
1961 lctl.record(self.name, self.target.getName())
1963 lctl.mount_option(self.target.getName(), client.get_name(), "")
1967 lctl.clear_log(self.name, self.target.getName() + '-clean')
1968 lctl.record(self.name, self.target.getName() + '-clean')
1970 lctl.del_mount_option(self.target.getName())
1978 # record logs for each client
1980 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
1982 config_options = CONFIG_FILE
1984 for node_db in self.db.lookup_class('node'):
1985 client_name = node_db.getName()
1986 for prof_uuid in node_db.get_refs('profile'):
1987 prof_db = node_db.lookup(prof_uuid)
1988 # refactor this into a funtion to test "clientness"
1990 for ref_class, ref_uuid in prof_db.get_all_refs():
1991 if ref_class in ('mountpoint','echoclient'):
1992 debug("recording", client_name)
1993 old_noexec = config.noexec
1995 noexec_opt = ('', '-n')
1996 ret, out = run (sys.argv[0],
1997 noexec_opt[old_noexec == 1],
1998 " -v --record --nomod",
1999 "--record_log", client_name,
2000 "--record_device", self.name,
2001 "--node", client_name,
2004 for s in out: log("record> ", string.strip(s))
2005 ret, out = run (sys.argv[0],
2006 noexec_opt[old_noexec == 1],
2007 "--cleanup -v --record --nomod",
2008 "--record_log", client_name + "-clean",
2009 "--record_device", self.name,
2010 "--node", client_name,
2013 for s in out: log("record> ", string.strip(s))
2014 config.noexec = old_noexec
2018 lctl.start(self.name, self.conf_name)
2019 except CommandError, e:
2021 if self.target.get_class() == 'ost':
2022 if not is_prepared('OSS'):
2024 lctl.start(self.name, 'OSS')
2025 except CommandError, e:
2029 if is_prepared(self.name):
2031 lctl.cleanup(self.name, self.uuid, 0, 0)
2032 clean_dev(self.devpath, self.fstype,
2033 self.backfstype, self.backdevpath)
2034 except CommandError, e:
2035 log(self.module_name, "cleanup failed: ", self.name)
2038 Module.cleanup(self)
2040 class MDSDEV(Module):
2041 def __init__(self,db):
2042 Module.__init__(self, 'MDSDEV', db)
2043 self.devpath = self.db.get_val('devpath','')
2044 self.backdevpath = self.db.get_val('devpath','')
2045 self.size = self.db.get_val_int('devsize', 0)
2046 self.journal_size = self.db.get_val_int('journalsize', 0)
2047 self.fstype = self.db.get_val('fstype', '')
2048 self.backfstype = self.db.get_val('backfstype', '')
2049 self.nspath = self.db.get_val('nspath', '')
2050 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2051 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2052 self.obdtype = self.db.get_val('obdtype', '')
2053 self.root_squash = self.db.get_val('root_squash', '')
2054 self.no_root_squash = self.db.get_val('no_root_squash', '')
2056 target_uuid = self.db.get_first_ref('target')
2057 self.target = self.db.lookup(target_uuid)
2058 self.name = self.target.getName()
2062 lmv_uuid = self.db.get_first_ref('lmv')
2063 if lmv_uuid != None:
2064 self.lmv = self.db.lookup(lmv_uuid)
2066 active_uuid = get_active_target(self.target)
2068 panic("No target device found:", target_uuid)
2069 if active_uuid == self.uuid:
2071 group = self.target.get_val('group')
2072 if config.group and config.group != group:
2077 self.uuid = target_uuid
2080 if self.lmv != None:
2081 client_uuid = self.name + "_lmv_UUID"
2082 self.master = LMV(self.lmv, client_uuid,
2083 self.name, self.name)
2085 self.confobd = CONFDEV(self.db, self.name,
2086 target_uuid, self.uuid)
2088 def add_module(self, manager):
2090 manager.add_lustre_module('mdc', 'mdc')
2091 manager.add_lustre_module('osc', 'osc')
2092 manager.add_lustre_module('ost', 'ost')
2093 manager.add_lustre_module('lov', 'lov')
2094 manager.add_lustre_module('mds', 'mds')
2096 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2097 manager.add_lustre_module(self.fstype, self.fstype)
2100 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
2102 # if fstype is smfs, then we should also take care about backing
2104 if self.fstype == 'smfs':
2105 manager.add_lustre_module(self.backfstype, self.backfstype)
2106 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
2108 for option in string.split(self.mountfsoptions, ','):
2109 if option == 'snap':
2110 if not self.fstype == 'smfs':
2111 panic("mountoptions has 'snap', but fstype is not smfs.")
2112 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2113 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2116 if self.master != None:
2117 self.master.add_module(manager)
2119 # add CONFOBD modules
2120 if self.confobd != None:
2121 self.confobd.add_module(manager)
2123 def write_conf(self):
2124 if is_prepared(self.name):
2127 debug(self.uuid, "not active")
2130 self.confobd.prepare()
2131 self.confobd.write_conf()
2132 self.confobd.cleanup()
2135 if is_prepared(self.name):
2138 debug(self.uuid, "not active")
2142 self.confobd.prepare()
2144 self.confobd.write_conf()
2147 if self.master != None:
2148 self.master.prepare()
2150 if not config.record:
2151 self.confobd.start()
2153 if not is_prepared('MDT'):
2154 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
2156 if development_mode():
2157 # set lsd upcall path
2158 procentry = "/proc/fs/lustre/mds/lsd_upcall"
2159 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
2160 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
2161 print "MDS Warning: failed to set lsd cache upcall"
2163 run("echo ", upcall, " > ", procentry)
2164 # set lacl upcall path
2165 procentry = "/proc/fs/lustre/mds/lacl_upcall"
2166 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lacl_upcall")
2167 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
2168 print "MDS Warning: failed to set remote acl upcall"
2170 run("echo ", upcall, " > ", procentry)
2172 if config.root_squash == None:
2173 config.root_squash = self.root_squash
2174 if config.no_root_squash == None:
2175 config.no_root_squash = self.no_root_squash
2176 if config.root_squash:
2177 if config.no_root_squash:
2178 nsnid = config.no_root_squash
2181 lctl.root_squash(self.name, config.root_squash, nsnid)
2183 def msd_remaining(self):
2184 out = lctl.device_list()
2186 if string.split(s)[2] in ('mds',):
2189 def safe_to_clean(self):
2192 def safe_to_clean_modules(self):
2193 return not self.msd_remaining()
2197 debug(self.uuid, "not active")
2200 if is_prepared(self.name):
2202 lctl.cleanup(self.name, self.uuid, config.force,
2204 except CommandError, e:
2205 log(self.module_name, "cleanup failed: ", self.name)
2208 Module.cleanup(self)
2210 if self.master != None:
2211 self.master.cleanup()
2212 if not self.msd_remaining() and is_prepared('MDT'):
2214 lctl.cleanup("MDT", "MDT_UUID", config.force,
2216 except CommandError, e:
2217 print "cleanup failed: ", self.name
2222 self.confobd.cleanup()
2224 def correct_level(self, level, op=None):
2225 #if self.master != None:
2230 def __init__(self, db):
2231 Module.__init__(self, 'OSD', db)
2232 self.osdtype = self.db.get_val('osdtype')
2233 self.devpath = self.db.get_val('devpath', '')
2234 self.backdevpath = self.db.get_val('devpath', '')
2235 self.size = self.db.get_val_int('devsize', 0)
2236 self.journal_size = self.db.get_val_int('journalsize', 0)
2237 self.inode_size = self.db.get_val_int('inodesize', 0)
2238 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2239 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2240 self.fstype = self.db.get_val('fstype', '')
2241 self.backfstype = self.db.get_val('backfstype', '')
2242 self.nspath = self.db.get_val('nspath', '')
2243 target_uuid = self.db.get_first_ref('target')
2244 ost = self.db.lookup(target_uuid)
2245 self.name = ost.getName()
2246 self.format = self.db.get_val('autoformat', 'yes')
2247 if ost.get_val('failover', 0):
2248 self.failover_ost = 'f'
2250 self.failover_ost = 'n'
2252 self.deny_sec = self.db.get_val('deny_sec', '')
2254 if config.ost_deny_sec:
2256 self.deny_sec = "%s,%s" %(self.deny_sec, config.ost_deny_sec)
2258 self.deny_sec = config.ost_deny_sec
2260 active_uuid = get_active_target(ost)
2262 panic("No target device found:", target_uuid)
2263 if active_uuid == self.uuid:
2265 group = ost.get_val('group')
2266 if config.group and config.group != group:
2271 self.uuid = target_uuid
2272 self.confobd = CONFDEV(self.db, self.name,
2273 target_uuid, self.uuid)
2275 def add_module(self, manager):
2278 manager.add_lustre_module('ost', 'ost')
2280 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2281 manager.add_lustre_module(self.fstype, self.fstype)
2284 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2286 if self.fstype == 'smfs':
2287 manager.add_lustre_module(self.backfstype, self.backfstype)
2288 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2290 for option in self.mountfsoptions:
2291 if option == 'snap':
2292 if not self.fstype == 'smfs':
2293 panic("mountoptions with snap, but fstype is not smfs\n")
2294 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2295 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2297 manager.add_lustre_module(self.osdtype, self.osdtype)
2299 # add CONFOBD modules
2300 if self.confobd != None:
2301 self.confobd.add_module(manager)
2304 if is_prepared(self.name):
2307 debug(self.uuid, "not active")
2312 if self.osdtype == 'obdecho':
2313 self.info(self.osdtype)
2314 lctl.newdev("obdecho", self.name, self.uuid)
2315 if not is_prepared('OSS'):
2316 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup="")
2318 self.confobd.prepare()
2320 self.confobd.write_conf()
2321 if not config.record:
2322 self.confobd.start()
2325 for flavor in string.split(self.deny_sec, ','):
2326 lctl.set_security(self.name, "deny_sec", flavor)
2328 def write_conf(self):
2329 if is_prepared(self.name):
2332 debug(self.uuid, "not active")
2336 if self.osdtype != 'obdecho':
2337 self.confobd.prepare()
2338 self.confobd.write_conf()
2339 if not config.write_conf:
2340 self.confobd.start()
2341 self.confobd.cleanup()
2343 def osd_remaining(self):
2344 out = lctl.device_list()
2346 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2349 def safe_to_clean(self):
2352 def safe_to_clean_modules(self):
2353 return not self.osd_remaining()
2357 debug(self.uuid, "not active")
2360 if is_prepared(self.name):
2363 lctl.cleanup(self.name, self.uuid, config.force,
2365 except CommandError, e:
2366 log(self.module_name, "cleanup failed: ", self.name)
2369 if not self.osd_remaining() and is_prepared('OSS'):
2371 lctl.cleanup("OSS", "OSS_UUID", config.force,
2373 except CommandError, e:
2374 print "cleanup failed: ", self.name
2378 if self.osdtype != 'obdecho':
2380 self.confobd.cleanup()
2382 def correct_level(self, level, op=None):
2385 # Generic client module, used by OSC and MDC
2386 class Client(Module):
2387 def __init__(self, tgtdb, uuid, module, fs_name,
2388 self_name=None, module_dir=None):
2389 self.target_name = tgtdb.getName()
2390 self.target_uuid = tgtdb.getUUID()
2391 self.module_dir = module_dir
2392 self.backup_targets = []
2393 self.module = module
2396 self.tgt_dev_uuid = get_active_target(tgtdb)
2397 if not self.tgt_dev_uuid:
2398 panic("No target device found for target(1):", self.target_name)
2403 self.module = module
2404 self.module_name = string.upper(module)
2406 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2407 self.target_name, fs_name)
2409 self.name = self_name
2411 self.lookup_server(self.tgt_dev_uuid)
2412 self.lookup_backup_targets()
2413 self.fs_name = fs_name
2414 if not self.module_dir:
2415 self.module_dir = module
2417 def add_module(self, manager):
2418 manager.add_lustre_module(self.module_dir, self.module)
2420 def lookup_server(self, srv_uuid):
2421 """ Lookup a server's network information """
2422 self._server_nets = get_ost_net(self.db, srv_uuid)
2423 if len(self._server_nets) == 0:
2424 panic ("Unable to find a server for:", srv_uuid)
2429 def get_servers(self):
2430 return self._server_nets
2432 def lookup_backup_targets(self):
2433 """ Lookup alternative network information """
2434 prof_list = toplustreDB.get_refs('profile')
2435 for prof_uuid in prof_list:
2436 prof_db = toplustreDB.lookup(prof_uuid)
2438 panic("profile:", prof_uuid, "not found.")
2439 for ref_class, ref_uuid in prof_db.get_all_refs():
2440 if ref_class in ('osd', 'mdsdev'):
2441 devdb = toplustreDB.lookup(ref_uuid)
2442 uuid = devdb.get_first_ref('target')
2443 if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
2444 self.backup_targets.append(ref_uuid)
2446 def prepare(self, ignore_connect_failure = 0):
2447 self.info(self.target_uuid)
2448 if not config.record and is_prepared(self.name):
2451 srv = choose_local_server(self.get_servers())
2455 routes = find_route(self.get_servers())
2456 if len(routes) == 0:
2457 panic ("no route to", self.target_uuid)
2458 for (srv, r) in routes:
2459 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2460 except CommandError, e:
2461 if not ignore_connect_failure:
2465 if self.target_uuid in config.inactive and self.permits_inactive():
2466 debug("%s inactive" % self.target_uuid)
2467 inactive_p = "inactive"
2469 debug("%s active" % self.target_uuid)
2471 lctl.newdev(self.module, self.name, self.uuid,
2472 setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
2474 for tgt_dev_uuid in self.backup_targets:
2475 this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
2476 if len(this_nets) == 0:
2477 panic ("Unable to find a server for:", tgt_dev_uuid)
2478 srv = choose_local_server(this_nets)
2482 routes = find_route(this_nets);
2483 if len(routes) == 0:
2484 panic("no route to", tgt_dev_uuid)
2485 for (srv, r) in routes:
2486 lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2488 lctl.add_conn(self.name, srv.nid_uuid);
2491 if is_prepared(self.name):
2492 Module.cleanup(self)
2494 srv = choose_local_server(self.get_servers())
2496 lctl.disconnect(srv)
2498 for (srv, r) in find_route(self.get_servers()):
2499 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2500 except CommandError, e:
2501 log(self.module_name, "cleanup failed: ", self.name)
2505 for tgt_dev_uuid in self.backup_targets:
2506 this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
2507 srv = choose_local_server(this_net)
2509 lctl.disconnect(srv)
2511 for (srv, r) in find_route(this_net):
2512 lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2514 def correct_level(self, level, op=None):
2517 def deactivate(self):
2519 lctl.deactivate(self.name)
2520 except CommandError, e:
2521 log(self.module_name, "deactivate failed: ", self.name)
2526 def __init__(self, db, uuid, fs_name):
2527 Client.__init__(self, db, uuid, 'mdc', fs_name)
2529 def permits_inactive(self):
2533 def __init__(self, db, uuid, fs_name):
2534 Client.__init__(self, db, uuid, 'osc', fs_name)
2536 def permits_inactive(self):
2539 class CMOBD(Module):
2540 def __init__(self, db):
2541 Module.__init__(self, 'CMOBD', db)
2542 self.name = self.db.getName();
2543 self.uuid = generate_client_uuid(self.name)
2544 self.master_uuid = self.db.get_first_ref('masterobd')
2545 self.cache_uuid = self.db.get_first_ref('cacheobd')
2547 master_obd = self.db.lookup(self.master_uuid)
2549 panic('master obd not found:', self.master_uuid)
2551 cache_obd = self.db.lookup(self.cache_uuid)
2553 panic('cache obd not found:', self.cache_uuid)
2558 master_class = master_obd.get_class()
2559 cache_class = cache_obd.get_class()
2561 if master_class == 'ost' or master_class == 'lov':
2562 client_uuid = "%s_lov_master_UUID" % (self.name)
2563 self.master = LOV(master_obd, client_uuid, self.name,
2564 "master_%s" % (self.name));
2565 elif master_class == 'mds':
2566 self.master = get_mdc(db, self.name, self.master_uuid)
2567 elif master_class == 'lmv':
2568 client_uuid = "%s_lmv_master_UUID" % (self.name)
2569 self.master = LMV(master_obd, client_uuid, self.name,
2570 "master_%s" % (self.name));
2572 panic("unknown master obd class '%s'" %(master_class))
2574 if cache_class == 'ost' or cache_class == 'lov':
2575 client_uuid = "%s_lov_cache_UUID" % (self.name)
2576 self.cache = LOV(cache_obd, client_uuid, self.name,
2577 "cache_%s" % (self.name));
2578 elif cache_class == 'mds':
2579 self.cache = get_mdc(db, self.name, self.cache_uuid)
2580 elif cache_class == 'lmv':
2581 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2582 self.cache = LMV(cache_obd, client_uuid, self.name,
2583 "cache_%s" % (self.name));
2585 panic("unknown cache obd class '%s'" %(cache_class))
2588 self.master.prepare()
2589 if not config.record and is_prepared(self.name):
2591 self.info(self.master_uuid, self.cache_uuid)
2592 lctl.newdev("cmobd", self.name, self.uuid,
2593 setup ="%s %s" %(self.master.uuid,
2602 def get_master_name(self):
2603 return self.master.name
2605 def get_cache_name(self):
2606 return self.cache.name
2609 if is_prepared(self.name):
2610 Module.cleanup(self)
2612 self.master.cleanup()
2614 def add_module(self, manager):
2615 manager.add_lustre_module('smfs', 'smfs')
2616 manager.add_lustre_module('cmobd', 'cmobd')
2617 self.master.add_module(manager)
2619 def correct_level(self, level, op=None):
2623 def __init__(self, db, uuid, name):
2624 Module.__init__(self, 'COBD', db)
2625 self.name = self.db.getName();
2626 self.uuid = generate_client_uuid(self.name)
2627 self.master_uuid = self.db.get_first_ref('masterobd')
2628 self.cache_uuid = self.db.get_first_ref('cacheobd')
2630 master_obd = self.db.lookup(self.master_uuid)
2632 panic('master obd not found:', self.master_uuid)
2634 cache_obd = self.db.lookup(self.cache_uuid)
2636 panic('cache obd not found:', self.cache_uuid)
2641 master_class = master_obd.get_class()
2642 cache_class = cache_obd.get_class()
2644 if master_class == 'ost' or master_class == 'lov':
2645 client_uuid = "%s_lov_master_UUID" % (self.name)
2646 self.master = LOV(master_obd, client_uuid, name,
2647 "master_%s" % (self.name));
2648 elif master_class == 'mds':
2649 self.master = get_mdc(db, name, self.master_uuid)
2650 elif master_class == 'lmv':
2651 client_uuid = "%s_lmv_master_UUID" % (self.name)
2652 self.master = LMV(master_obd, client_uuid, self.name,
2653 "master_%s" % (self.name));
2655 panic("unknown master obd class '%s'" %(master_class))
2657 if cache_class == 'ost' or cache_class == 'lov':
2658 client_uuid = "%s_lov_cache_UUID" % (self.name)
2659 self.cache = LOV(cache_obd, client_uuid, name,
2660 "cache_%s" % (self.name));
2661 elif cache_class == 'mds':
2662 self.cache = get_mdc(db, name, self.cache_uuid)
2663 elif cache_class == 'lmv':
2664 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2665 self.cache = LMV(cache_obd, client_uuid, self.name,
2666 "cache_%s" % (self.name));
2668 panic("unknown cache obd class '%s'" %(cache_class))
2676 def get_master_name(self):
2677 return self.master.name
2679 def get_cache_name(self):
2680 return self.cache.name
2683 if not config.record and is_prepared(self.name):
2685 self.master.prepare()
2686 self.cache.prepare()
2687 self.info(self.master_uuid, self.cache_uuid)
2688 lctl.newdev("cobd", self.name, self.uuid,
2689 setup ="%s %s" %(self.master.name,
2693 if is_prepared(self.name):
2694 Module.cleanup(self)
2695 self.master.cleanup()
2696 self.cache.cleanup()
2698 def add_module(self, manager):
2699 manager.add_lustre_module('cobd', 'cobd')
2700 self.master.add_module(manager)
2702 # virtual interface for OSC and LOV
2704 def __init__(self, db, client_uuid, name, name_override = None):
2705 Module.__init__(self, 'VOSC', db)
2706 if db.get_class() == 'lov':
2707 self.osc = LOV(db, client_uuid, name, name_override)
2709 elif db.get_class() == 'cobd':
2710 self.osc = COBD(db, client_uuid, name)
2713 self.osc = OSC(db, client_uuid, name)
2717 return self.osc.get_uuid()
2720 return self.osc.get_name()
2728 def add_module(self, manager):
2729 self.osc.add_module(manager)
2731 def correct_level(self, level, op=None):
2732 return self.osc.correct_level(level, op)
2734 # virtual interface for MDC and LMV
2736 def __init__(self, db, client_uuid, name, name_override = None):
2737 Module.__init__(self, 'VMDC', db)
2738 if db.get_class() == 'lmv':
2739 self.mdc = LMV(db, client_uuid, name, name_override)
2740 elif db.get_class() == 'cobd':
2741 self.mdc = COBD(db, client_uuid, name)
2743 self.mdc = MDC(db, client_uuid, name)
2746 return self.mdc.uuid
2749 return self.mdc.name
2757 def add_module(self, manager):
2758 self.mdc.add_module(manager)
2760 def correct_level(self, level, op=None):
2761 return self.mdc.correct_level(level, op)
2763 class ECHO_CLIENT(Module):
2764 def __init__(self,db):
2765 Module.__init__(self, 'ECHO_CLIENT', db)
2766 self.obd_uuid = self.db.get_first_ref('obd')
2767 obd = self.db.lookup(self.obd_uuid)
2768 self.uuid = generate_client_uuid(self.name)
2769 self.osc = VOSC(obd, self.uuid, self.name)
2772 if not config.record and is_prepared(self.name):
2775 self.osc.prepare() # XXX This is so cheating. -p
2776 self.info(self.obd_uuid)
2778 lctl.newdev("echo_client", self.name, self.uuid,
2779 setup = self.osc.get_name())
2782 if is_prepared(self.name):
2783 Module.cleanup(self)
2786 def add_module(self, manager):
2787 self.osc.add_module(manager)
2788 manager.add_lustre_module('obdecho', 'obdecho')
2790 def correct_level(self, level, op=None):
2793 def generate_client_uuid(name):
2794 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2796 int(random.random() * 1048576),
2797 int(random.random() * 1048576))
2798 return client_uuid[:36]
2800 class Mountpoint(Module):
2801 def __init__(self,db):
2802 Module.__init__(self, 'MTPT', db)
2803 self.path = self.db.get_val('path')
2804 self.clientoptions = self.db.get_val('clientoptions', '')
2805 self.fs_uuid = self.db.get_first_ref('filesystem')
2806 fs = self.db.lookup(self.fs_uuid)
2807 self.mds_uuid = fs.get_first_ref('lmv')
2808 if not self.mds_uuid:
2809 self.mds_uuid = fs.get_first_ref('mds')
2810 self.obd_uuid = fs.get_first_ref('obd')
2811 client_uuid = generate_client_uuid(self.name)
2813 self.oss_sec = self.db.get_val('oss_sec','null')
2814 self.mds_sec = self.db.get_val('mds_sec','null')
2816 self.mds_sec = config.mds_sec
2818 self.oss_sec = config.oss_sec
2820 ost = self.db.lookup(self.obd_uuid)
2822 panic("no ost: ", self.obd_uuid)
2824 mds = self.db.lookup(self.mds_uuid)
2826 panic("no mds: ", self.mds_uuid)
2828 self.vosc = VOSC(ost, client_uuid, self.name, self.name)
2829 self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
2832 if not config.record and fs_is_mounted(self.path):
2833 log(self.path, "already mounted.")
2840 self.info(self.path, self.mds_uuid, self.obd_uuid)
2841 if config.record or config.lctl_dump:
2842 lctl.mount_option(local_node_name, self.vosc.get_name(),
2843 self.vmdc.get_name())
2846 if config.clientoptions:
2847 if self.clientoptions:
2848 self.clientoptions = self.clientoptions + ',' + config.clientoptions
2850 self.clientoptions = config.clientoptions
2851 if self.clientoptions:
2852 self.clientoptions = ',' + self.clientoptions
2853 # Linux kernel will deal with async and not pass it to ll_fill_super,
2854 # so replace it with Lustre async
2855 self.clientoptions = string.replace(self.clientoptions, "async", "lasync")
2857 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,mds_sec=%s,oss_sec=%s%s %s %s" % \
2858 (self.vosc.get_name(), self.vmdc.get_name(), self.mds_sec,
2859 self.oss_sec, self.clientoptions, config.config, self.path)
2860 run("mkdir", self.path)
2865 panic("mount failed:", self.path, ":", string.join(val))
2868 self.info(self.path, self.mds_uuid,self.obd_uuid)
2870 if config.record or config.lctl_dump:
2871 lctl.del_mount_option(local_node_name)
2873 if fs_is_mounted(self.path):
2875 (rc, out) = run("umount", "-f", self.path)
2877 (rc, out) = run("umount", self.path)
2879 raise CommandError('umount', out, rc)
2881 if fs_is_mounted(self.path):
2882 panic("fs is still mounted:", self.path)
2887 def add_module(self, manager):
2888 self.vosc.add_module(manager)
2889 self.vmdc.add_module(manager)
2890 manager.add_lustre_module('llite', 'llite')
2892 def correct_level(self, level, op=None):
2895 # ============================================================
2896 # misc query functions
2898 def get_ost_net(self, osd_uuid):
2902 osd = self.lookup(osd_uuid)
2903 node_uuid = osd.get_first_ref('node')
2904 node = self.lookup(node_uuid)
2906 panic("unable to find node for osd_uuid:", osd_uuid,
2907 " node_ref:", node_uuid_)
2908 for net_uuid in node.get_networks():
2909 db = node.lookup(net_uuid)
2910 srv_list.append(Network(db))
2913 # the order of iniitailization is based on level.
2914 def getServiceLevel(self):
2915 type = self.get_class()
2917 if type in ('network',):
2919 elif type in ('routetbl',):
2921 elif type in ('ldlm',):
2923 elif type in ('osd',):
2925 elif type in ('mdsdev',):
2927 elif type in ('lmv',):
2929 elif type in ('cmobd', 'cobd',):
2931 elif type in ('mountpoint', 'echoclient'):
2934 panic("Unknown type: ", type)
2936 if ret < config.minlevel or ret > config.maxlevel:
2941 # return list of services in a profile. list is a list of tuples
2942 # [(level, db_object),]
2943 def getServices(self):
2945 for ref_class, ref_uuid in self.get_all_refs():
2946 servdb = self.lookup(ref_uuid)
2948 level = getServiceLevel(servdb)
2950 list.append((level, servdb))
2952 panic('service not found: ' + ref_uuid)
2958 ############################################################
2960 # FIXME: clean this mess up!
2962 # OSC is no longer in the xml, so we have to fake it.
2963 # this is getting ugly and begging for another refactoring
2964 def get_osc(ost_db, uuid, fs_name):
2965 osc = OSC(ost_db, uuid, fs_name)
2968 def get_mdc(db, fs_name, mds_uuid):
2969 mds_db = db.lookup(mds_uuid);
2971 error("no mds:", mds_uuid)
2972 mdc = MDC(mds_db, mds_uuid, fs_name)
2975 ############################################################
2976 # routing ("rooting")
2978 # list of (nettype, cluster_id, nid)
2981 def find_local_clusters(node_db):
2982 global local_clusters
2983 for netuuid in node_db.get_networks():
2984 net = node_db.lookup(netuuid)
2986 debug("add_local", netuuid)
2987 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2989 if not acceptors.has_key(srv.port):
2990 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
2992 # This node is a gateway.
2994 def node_is_router():
2997 # If there are any routers found in the config, then this will be true
2998 # and all nodes will load kptlrouter.
3000 def node_needs_router():
3001 return needs_router or is_router
3003 # list of (nettype, gw, tgt_cluster_id, lo, hi)
3004 # Currently, these local routes are only added to kptlrouter route
3005 # table if they are needed to connect to a specific server. This
3006 # should be changed so all available routes are loaded, and the
3007 # ptlrouter can make all the decisions.
3010 def find_local_routes(lustre):
3011 """ Scan the lustre config looking for routers . Build list of
3013 global local_routes, needs_router
3015 list = lustre.lookup_class('node')
3017 if router.get_val_int('router', 0):
3019 for (local_type, local_cluster_id, local_nid) in local_clusters:
3021 for netuuid in router.get_networks():
3022 db = router.lookup(netuuid)
3023 if (local_type == db.get_val('nettype') and
3024 local_cluster_id == db.get_val('clusterid')):
3025 gw = db.get_val('nid')
3028 debug("find_local_routes: gw is", gw)
3029 for route in router.get_local_routes(local_type, gw):
3030 local_routes.append(route)
3031 debug("find_local_routes:", local_routes)
3034 def choose_local_server(srv_list):
3035 for srv in srv_list:
3036 if local_cluster(srv.net_type, srv.cluster_id):
3039 def local_cluster(net_type, cluster_id):
3040 for cluster in local_clusters:
3041 if net_type == cluster[0] and cluster_id == cluster[1]:
3045 def local_interface(net_type, cluster_id, nid):
3046 for cluster in local_clusters:
3047 if (net_type == cluster[0] and cluster_id == cluster[1]
3048 and nid == cluster[2]):
3052 def find_route(srv_list):
3054 frm_type = local_clusters[0][0]
3055 for srv in srv_list:
3056 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
3057 to_type = srv.net_type
3059 cluster_id = srv.cluster_id
3060 debug ('looking for route to', to_type, to)
3061 for r in local_routes:
3062 debug("find_route: ", r)
3063 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
3064 result.append((srv, r))
3067 def get_active_target(db):
3068 target_uuid = db.getUUID()
3069 target_name = db.getName()
3070 node_name = get_select(target_name)
3072 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
3074 tgt_dev_uuid = db.get_first_ref('active')
3077 def get_server_by_nid_uuid(db, nid_uuid):
3078 for n in db.lookup_class("network"):
3080 if net.nid_uuid == nid_uuid:
3084 ############################################################
3088 type = db.get_class()
3089 debug('Service:', type, db.getName(), db.getUUID())
3094 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3095 elif type == 'network':
3097 elif type == 'routetbl':
3101 elif type == 'cobd':
3102 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3103 elif type == 'cmobd':
3105 elif type == 'mdsdev':
3107 elif type == 'mountpoint':
3109 elif type == 'echoclient':
3114 panic ("unknown service type:", type)
3118 # Prepare the system to run lustre using a particular profile
3119 # in a the configuration.
3120 # * load & the modules
3121 # * setup networking for the current node
3122 # * make sure partitions are in place and prepared
3123 # * initialize devices with lctl
3124 # Levels is important, and needs to be enforced.
3125 def for_each_profile(db, prof_list, operation):
3126 for prof_uuid in prof_list:
3127 prof_db = db.lookup(prof_uuid)
3129 panic("profile:", prof_uuid, "not found.")
3130 services = getServices(prof_db)
3133 def magic_get_osc(db, rec, lov):
3135 lov_uuid = lov.get_uuid()
3136 lov_name = lov.osc.fs_name
3138 lov_uuid = rec.getAttribute('lov_uuidref')
3139 # FIXME: better way to find the mountpoint?
3140 filesystems = db.root_node.getElementsByTagName('filesystem')
3142 for fs in filesystems:
3143 ref = fs.getElementsByTagName('obd_ref')
3144 if ref[0].getAttribute('uuidref') == lov_uuid:
3145 fsuuid = fs.getAttribute('uuid')
3149 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
3151 mtpts = db.root_node.getElementsByTagName('mountpoint')
3154 ref = fs.getElementsByTagName('filesystem_ref')
3155 if ref[0].getAttribute('uuidref') == fsuuid:
3156 lov_name = fs.getAttribute('name')
3160 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
3162 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3164 ost_uuid = rec.getAttribute('ost_uuidref')
3165 obd = db.lookup(ost_uuid)
3168 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
3170 osc = get_osc(obd, lov_uuid, lov_name)
3172 panic('osc not found:', obd_uuid)
3175 # write logs for update records. sadly, logs of all types -- and updates in
3176 # particular -- are something of an afterthought. lconf needs rewritten with
3177 # these as core concepts. so this is a pretty big hack.
3178 def process_update_record(db, update, lov):
3179 for rec in update.childNodes:
3180 if rec.nodeType != rec.ELEMENT_NODE:
3183 log("found "+rec.nodeName+" record in update version " +
3184 str(update.getAttribute('version')))
3186 lov_uuid = rec.getAttribute('lov_uuidref')
3187 ost_uuid = rec.getAttribute('ost_uuidref')
3188 index = rec.getAttribute('index')
3189 gen = rec.getAttribute('generation')
3191 if not lov_uuid or not ost_uuid or not index or not gen:
3192 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
3195 tmplov = db.lookup(lov_uuid)
3197 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
3198 lov_name = tmplov.getName()
3200 lov_name = lov.osc.name
3202 # ------------------------------------------------------------- add
3203 if rec.nodeName == 'add':
3205 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3208 osc = magic_get_osc(db, rec, lov)
3211 # Only ignore connect failures with --force, which
3212 # isn't implemented here yet.
3213 osc.prepare(ignore_connect_failure=0)
3214 except CommandError, e:
3215 print "Error preparing OSC %s\n" % osc.uuid
3218 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3220 # ------------------------------------------------------ deactivate
3221 elif rec.nodeName == 'deactivate':
3225 osc = magic_get_osc(db, rec, lov)
3229 except CommandError, e:
3230 print "Error deactivating OSC %s\n" % osc.uuid
3233 # ---------------------------------------------------------- delete
3234 elif rec.nodeName == 'delete':
3238 osc = magic_get_osc(db, rec, lov)
3244 except CommandError, e:
3245 print "Error cleaning up OSC %s\n" % osc.uuid
3248 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3250 def process_updates(db, log_device, log_name, lov = None):
3251 updates = db.root_node.getElementsByTagName('update')
3253 if not u.childNodes:
3254 log("ignoring empty update record (version " +
3255 str(u.getAttribute('version')) + ")")
3258 version = u.getAttribute('version')
3259 real_name = "%s-%s" % (log_name, version)
3260 lctl.clear_log(log_device, real_name)
3261 lctl.record(log_device, real_name)
3263 process_update_record(db, u, lov)
3267 def doWriteconf(services):
3271 if s[1].get_class() == 'mdsdev' or s[1].get_class() == 'osd':
3272 n = newService(s[1])
3276 def doSetup(services):
3281 n = newService(s[1])
3283 slist.append((n.level, n))
3286 nl = n[1].correct_level(n[0])
3287 nlist.append((nl, n[1]))
3292 def doLoadModules(services):
3296 # adding all needed modules from all services
3298 n = newService(s[1])
3299 n.add_module(mod_manager)
3301 # loading all registered modules
3302 mod_manager.load_modules()
3304 def doUnloadModules(services):
3308 # adding all needed modules from all services
3310 n = newService(s[1])
3311 if n.safe_to_clean_modules():
3312 n.add_module(mod_manager)
3314 # unloading all registered modules
3315 mod_manager.cleanup_modules()
3317 def doCleanup(services):
3323 n = newService(s[1])
3325 slist.append((n.level, n))
3328 nl = n[1].correct_level(n[0])
3329 nlist.append((nl, n[1]))
3334 if n[1].safe_to_clean():
3339 def doHost(lustreDB, hosts):
3340 global is_router, local_node_name
3343 node_db = lustreDB.lookup_name(h, 'node')
3347 panic('No host entry found.')
3349 local_node_name = node_db.get_val('name', 0)
3350 is_router = node_db.get_val_int('router', 0)
3351 lustre_upcall = node_db.get_val('lustreUpcall', '')
3352 portals_upcall = node_db.get_val('portalsUpcall', '')
3353 timeout = node_db.get_val_int('timeout', 0)
3354 ptldebug = node_db.get_val('ptldebug', '')
3355 subsystem = node_db.get_val('subsystem', '')
3357 find_local_clusters(node_db)
3359 find_local_routes(lustreDB)
3361 # Two step process: (1) load modules, (2) setup lustre
3362 # if not cleaning, load modules first.
3363 prof_list = node_db.get_refs('profile')
3365 if config.write_conf:
3366 for_each_profile(node_db, prof_list, doLoadModules)
3368 for_each_profile(node_db, prof_list, doWriteconf)
3369 for_each_profile(node_db, prof_list, doUnloadModules)
3372 elif config.recover:
3373 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3374 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3375 "--client_uuid <UUID> --conn_uuid <UUID>")
3376 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3378 elif config.cleanup:
3380 # the command line can override this value
3382 # ugly hack, only need to run lctl commands for --dump
3383 if config.lctl_dump or config.record:
3384 for_each_profile(node_db, prof_list, doCleanup)
3387 sys_set_timeout(timeout)
3388 sys_set_ptldebug(ptldebug)
3389 sys_set_subsystem(subsystem)
3390 sys_set_lustre_upcall(lustre_upcall)
3391 sys_set_portals_upcall(portals_upcall)
3393 for_each_profile(node_db, prof_list, doCleanup)
3394 for_each_profile(node_db, prof_list, doUnloadModules)
3398 # ugly hack, only need to run lctl commands for --dump
3399 if config.lctl_dump or config.record:
3400 sys_set_timeout(timeout)
3401 sys_set_lustre_upcall(lustre_upcall)
3402 for_each_profile(node_db, prof_list, doSetup)
3406 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3407 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3409 for_each_profile(node_db, prof_list, doLoadModules)
3411 sys_set_debug_path()
3412 sys_set_ptldebug(ptldebug)
3413 sys_set_subsystem(subsystem)
3414 script = config.gdb_script
3415 run(lctl.lctl, ' modules >', script)
3417 log ("The GDB module script is in", script)
3418 # pause, so user has time to break and
3421 sys_set_timeout(timeout)
3422 sys_set_lustre_upcall(lustre_upcall)
3423 sys_set_portals_upcall(portals_upcall)
3425 for_each_profile(node_db, prof_list, doSetup)
3428 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3429 tgt = lustreDB.lookup(tgt_uuid)
3431 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3432 new_uuid = get_active_target(tgt)
3434 raise Lustre.LconfError("doRecovery: no active target found for: " +
3436 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3438 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3440 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3442 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3445 lctl.disconnect(oldnet)
3446 except CommandError, e:
3447 log("recover: disconnect", nid_uuid, "failed: ")
3452 except CommandError, e:
3453 log("recover: connect failed")
3456 lctl.recover(client_uuid, net.nid_uuid)
3459 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3460 base = os.path.dirname(cmd)
3461 if development_mode():
3462 if not config.lustre:
3463 debug('using objdir module paths')
3464 config.lustre = (os.path.join(base, ".."))
3465 # normalize the portals dir, using command line arg if set
3467 portals_dir = config.portals
3468 dir = os.path.join(config.lustre, portals_dir)
3469 config.portals = dir
3470 debug('config.portals', config.portals)
3471 elif config.lustre and config.portals:
3473 # if --lustre and --portals, normalize portals
3474 # can ignore POTRALS_DIR here, since it is probly useless here
3475 config.portals = os.path.join(config.lustre, config.portals)
3476 debug('config.portals B', config.portals)
3478 def sysctl(path, val):
3479 debug("+ sysctl", path, val)
3483 fp = open(os.path.join('/proc/sys', path), 'w')
3490 def sys_set_debug_path():
3491 sysctl('portals/debug_path', config.debug_path)
3493 def sys_set_lustre_upcall(upcall):
3494 # the command overrides the value in the node config
3495 if config.lustre_upcall:
3496 upcall = config.lustre_upcall
3498 upcall = config.upcall
3500 lctl.set_lustre_upcall(upcall)
3502 def sys_set_portals_upcall(upcall):
3503 # the command overrides the value in the node config
3504 if config.portals_upcall:
3505 upcall = config.portals_upcall
3507 upcall = config.upcall
3509 sysctl('portals/upcall', upcall)
3511 def sys_set_timeout(timeout):
3512 # the command overrides the value in the node config
3513 if config.timeout and config.timeout > 0:
3514 timeout = config.timeout
3515 if timeout != None and timeout > 0:
3516 lctl.set_timeout(timeout)
3518 def sys_tweak_socknal ():
3519 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3520 if sys_get_branch() == '2.6':
3521 fp = open('/proc/meminfo')
3522 lines = fp.readlines()
3527 if a[0] == 'MemTotal:':
3529 debug("memtotal" + memtotal)
3530 if int(memtotal) < 262144:
3531 minfree = int(memtotal) / 16
3534 debug("+ minfree ", minfree)
3535 sysctl("vm/min_free_kbytes", minfree)
3536 if config.single_socket:
3537 sysctl("socknal/typed", 0)
3539 def sys_optimize_elan ():
3540 procfiles = ["/proc/elan/config/eventint_punt_loops",
3541 "/proc/qsnet/elan3/config/eventint_punt_loops",
3542 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3544 if os.access(p, os.W_OK):
3545 run ("echo 1 > " + p)
3547 def sys_set_ptldebug(ptldebug):
3549 ptldebug = config.ptldebug
3552 val = eval(ptldebug, ptldebug_names)
3553 val = "0x%x" % (val & 0xffffffffL)
3554 sysctl('portals/debug', val)
3555 except NameError, e:
3558 def sys_set_subsystem(subsystem):
3559 if config.subsystem:
3560 subsystem = config.subsystem
3563 val = eval(subsystem, subsystem_names)
3564 val = "0x%x" % (val & 0xffffffffL)
3565 sysctl('portals/subsystem_debug', val)
3566 except NameError, e:
3569 def sys_set_netmem_max(path, max):
3570 debug("setting", path, "to at least", max)
3578 fp = open(path, 'w')
3579 fp.write('%d\n' %(max))
3582 def sys_make_devices():
3583 if not os.access('/dev/portals', os.R_OK):
3584 run('mknod /dev/portals c 10 240')
3585 if not os.access('/dev/obd', os.R_OK):
3586 run('mknod /dev/obd c 10 241')
3588 # Add dir to the global PATH, if not already there.
3589 def add_to_path(new_dir):
3590 syspath = string.split(os.environ['PATH'], ':')
3591 if new_dir in syspath:
3593 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3595 def default_debug_path():
3596 path = '/tmp/lustre-log'
3597 if os.path.isdir('/r'):
3602 def default_gdb_script():
3603 script = '/tmp/ogdb'
3604 if os.path.isdir('/r'):
3605 return '/r' + script
3609 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3610 # ensure basic elements are in the system path
3611 def sanitise_path():
3612 for dir in DEFAULT_PATH:
3615 # global hack for the --select handling
3617 def init_select(args):
3618 # args = [service=nodeA,service2=nodeB service3=nodeC]
3621 list = string.split(arg, ',')
3623 srv, node = string.split(entry, '=')
3624 tgt_select[srv] = node
3626 def get_select(srv):
3627 if tgt_select.has_key(srv):
3628 return tgt_select[srv]
3632 FLAG = Lustre.Options.FLAG
3633 PARAM = Lustre.Options.PARAM
3634 INTPARAM = Lustre.Options.INTPARAM
3635 PARAMLIST = Lustre.Options.PARAMLIST
3637 ('verbose,v', "Print system commands as they are run"),
3638 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3639 ('config', "Cluster config name used for LDAP query", PARAM),
3640 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3641 ('node', "Load config for <nodename>", PARAM),
3642 ('sec',"security flavor <null|krb5i|krb5p> between this client with mds", PARAM),
3643 ('mds_sec',"security flavor <null|krb5i|krb5p> between this client with mds", PARAM),
3644 ('oss_sec',"security flavor <null|krb5i|krb5p> between this client with ost", PARAM),
3645 ('mds_mds_sec',"security flavor <null|krb5i|krb5p> between this mds with other mds", PARAM),
3646 ('mds_oss_sec',"security flavor <null|krb5i|krb5p> between this mds with ost", PARAM),
3647 ('mds_deny_sec', "security flavor <null|krb5i|krb5p> denied by this mds", PARAM),
3648 ('ost_deny_sec', "security flavor <null|krb5i|krb5p> denied by this ost", PARAM),
3649 ('cleanup,d', "Cleans up config. (Shutdown)"),
3650 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3652 ('single_socket', "socknal option: only use one socket instead of bundle",
3654 ('failover',"""Used to shut down without saving state.
3655 This will allow this node to "give up" a service to a
3656 another node for failover purposes. This will not
3657 be a clean shutdown.""",
3659 ('gdb', """Prints message after creating gdb module script
3660 and sleeps for 5 seconds."""),
3661 ('noexec,n', """Prints the commands and steps that will be run for a
3662 config without executing them. This can used to check if a
3663 config file is doing what it should be doing"""),
3664 ('nomod', "Skip load/unload module step."),
3665 ('nosetup', "Skip device setup/cleanup step."),
3666 ('reformat', "Reformat all devices (without question)"),
3667 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3668 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3669 ('clientoptions', "Additional options for Lustre", PARAM),
3670 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3672 ('write_conf', "Save all the client config information on mds."),
3673 ('record', "Write config information on mds."),
3674 ('record_log', "Name of config record log.", PARAM),
3675 ('record_device', "MDS device name that will record the config commands",
3677 ('root_squash', "MDS squash root to appointed uid",
3679 ('no_root_squash', "Don't squash root for appointed nid",
3681 ('minlevel', "Minimum level of services to configure/cleanup",
3683 ('maxlevel', """Maximum level of services to configure/cleanup
3684 Levels are aproximatly like:
3689 70 - mountpoint, echo_client, osc, mdc, lov""",
3691 ('lustre', """Base directory of lustre sources. This parameter will
3692 cause lconf to load modules from a source tree.""", PARAM),
3693 ('portals', """Portals source directory. If this is a relative path,
3694 then it is assumed to be relative to lustre. """, PARAM),
3695 ('timeout', "Set recovery timeout", INTPARAM),
3696 ('upcall', "Set both portals and lustre upcall script", PARAM),
3697 ('lustre_upcall', "Set lustre upcall script", PARAM),
3698 ('portals_upcall', "Set portals upcall script", PARAM),
3699 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3700 ('ptldebug', "Set the portals debug level", PARAM),
3701 ('subsystem', "Set the portals debug subsystem", PARAM),
3702 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3703 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3704 # Client recovery options
3705 ('recover', "Recover a device"),
3706 ('group', "The group of devices to configure or cleanup", PARAM),
3707 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3708 ('client_uuid', "The failed client (required for recovery)", PARAM),
3709 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3711 ('inactive', """The name of an inactive service, to be ignored during
3712 mounting (currently OST-only). Can be repeated.""",
3717 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3719 # in the upcall this is set to SIG_IGN
3720 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3722 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3724 config, args = cl.parse(sys.argv[1:])
3725 except Lustre.OptionError, e:
3729 setupModulePath(sys.argv[0])
3731 host = socket.gethostname()
3733 # the PRNG is normally seeded with time(), which is not so good for starting
3734 # time-synchronized clusters
3735 input = open('/dev/urandom', 'r')
3737 print 'Unable to open /dev/urandom!'
3739 seed = input.read(32)
3745 init_select(config.select)
3748 # allow config to be fetched via HTTP, but only with python2
3749 if sys.version[0] != '1' and args[0].startswith('http://'):
3752 config_file = urllib2.urlopen(args[0])
3753 except (urllib2.URLError, socket.error), err:
3754 if hasattr(err, 'args'):
3756 print "Could not access '%s': %s" %(args[0], err)
3758 elif not os.access(args[0], os.R_OK):
3759 print 'File not found or readable:', args[0]
3763 config_file = open(args[0], 'r')
3765 dom = xml.dom.minidom.parse(config_file)
3767 panic("%s does not appear to be a config file." % (args[0]))
3768 sys.exit(1) # make sure to die here, even in debug mode.
3770 CONFIG_FILE = args[0]
3771 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3772 if not config.config:
3773 config.config = os.path.basename(args[0])# use full path?
3774 if config.config[-4:] == '.xml':
3775 config.config = config.config[:-4]
3776 elif config.ldapurl:
3777 if not config.config:
3778 panic("--ldapurl requires --config name")
3779 dn = "config=%s,fs=lustre" % (config.config)
3780 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3781 elif config.ptldebug or config.subsystem:
3782 sys_set_ptldebug(None)
3783 sys_set_subsystem(None)
3786 print 'Missing config file or ldap URL.'
3787 print 'see lconf --help for command summary'
3790 toplustreDB = lustreDB
3792 ver = lustreDB.get_version()
3794 panic("No version found in config data, please recreate.")
3795 if ver != Lustre.CONFIG_VERSION:
3796 panic("Config version", ver, "does not match lconf version",
3797 Lustre.CONFIG_VERSION)
3801 node_list.append(config.node)
3804 node_list.append(host)
3805 node_list.append('localhost')
3807 debug("configuring for host: ", node_list)
3810 config.debug_path = config.debug_path + '-' + host
3811 config.gdb_script = config.gdb_script + '-' + host
3813 lctl = LCTLInterface('lctl')
3815 if config.lctl_dump:
3816 lctl.use_save_file(config.lctl_dump)
3819 if not (config.record_device and config.record_log):
3820 panic("When recording, both --record_log and --record_device must be specified.")
3821 lctl.clear_log(config.record_device, config.record_log)
3822 lctl.record(config.record_device, config.record_log)
3824 # init module manager
3825 mod_manager = kmod_manager(config.lustre, config.portals)
3827 doHost(lustreDB, node_list)
3829 if not config.record:
3834 process_updates(lustreDB, config.record_device, config.record_log)
3836 if __name__ == "__main__":
3839 except Lustre.LconfError, e:
3841 # traceback.print_exc(file=sys.stdout)
3843 except CommandError, e:
3847 if first_cleanup_error:
3848 sys.exit(first_cleanup_error)