3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = '../portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
93 "console" : (1 << 25),
99 "undefined" : (1 << 0),
109 "portals" : (1 << 10),
111 "pinger" : (1 << 12),
112 "filter" : (1 << 13),
117 "ptlrouter" : (1 << 18),
121 "confobd" : (1 << 22),
128 first_cleanup_error = 0
129 def cleanup_error(rc):
130 global first_cleanup_error
131 if not first_cleanup_error:
132 first_cleanup_error = rc
134 # ============================================================
135 # debugging and error funcs
137 def fixme(msg = "this feature"):
138 raise Lustre.LconfError, msg + ' not implemented yet.'
141 msg = string.join(map(str,args))
142 if not config.noexec:
143 raise Lustre.LconfError(msg)
148 msg = string.join(map(str,args))
153 print string.strip(s)
157 msg = string.join(map(str,args))
160 # ack, python's builtin int() does not support '0x123' syntax.
161 # eval can do it, although what a hack!
165 return eval(s, {}, {})
168 except SyntaxError, e:
169 raise ValueError("not a number")
171 raise ValueError("not a number")
173 # ============================================================
174 # locally defined exceptions
175 class CommandError (exceptions.Exception):
176 def __init__(self, cmd_name, cmd_err, rc=None):
177 self.cmd_name = cmd_name
178 self.cmd_err = cmd_err
183 if type(self.cmd_err) == types.StringType:
185 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
187 print "! %s: %s" % (self.cmd_name, self.cmd_err)
188 elif type(self.cmd_err) == types.ListType:
190 print "! %s (error %d):" % (self.cmd_name, self.rc)
192 print "! %s:" % (self.cmd_name)
193 for s in self.cmd_err:
194 print "> %s" %(string.strip(s))
199 # ============================================================
200 # handle daemons, like the acceptor
202 """ Manage starting and stopping a daemon. Assumes daemon manages
203 it's own pid file. """
205 def __init__(self, cmd):
211 log(self.command, "already running.")
213 self.path = find_prog(self.command)
215 panic(self.command, "not found.")
216 ret, out = runcmd(self.path +' '+ self.command_line())
218 raise CommandError(self.path, out, ret)
222 pid = self.read_pidfile()
225 log ("killing process", pid)
228 log("was unable to find pid of " + self.command)
229 #time.sleep(1) # let daemon die
231 log("unable to kill", self.command, e)
233 log("unable to kill", self.command)
236 pid = self.read_pidfile()
242 log("was unable to find pid of " + self.command)
249 def read_pidfile(self):
251 fp = open(self.pidfile(), 'r')
261 def clean_pidfile(self):
262 """ Remove a stale pidfile """
263 log("removing stale pidfile:", self.pidfile())
265 os.unlink(self.pidfile())
267 log(self.pidfile(), e)
269 class AcceptorHandler(DaemonHandler):
270 def __init__(self, port, net_type):
271 DaemonHandler.__init__(self, "acceptor")
276 return "/var/run/%s-%d.pid" % (self.command, self.port)
278 def command_line(self):
279 return string.join(map(str,(self.flags, self.port)))
283 # start the acceptors
285 if config.lctl_dump or config.record:
287 for port in acceptors.keys():
288 daemon = acceptors[port]
289 if not daemon.running():
292 def run_one_acceptor(port):
293 if config.lctl_dump or config.record:
295 if acceptors.has_key(port):
296 daemon = acceptors[port]
297 if not daemon.running():
300 panic("run_one_acceptor: No acceptor defined for port:", port)
302 def stop_acceptor(port):
303 if acceptors.has_key(port):
304 daemon = acceptors[port]
309 # ============================================================
310 # handle lctl interface
313 Manage communication with lctl
316 def __init__(self, cmd):
318 Initialize close by finding the lctl binary.
320 self.lctl = find_prog(cmd)
322 self.record_device = ''
325 debug('! lctl not found')
328 raise CommandError('lctl', "unable to find lctl binary.")
330 def use_save_file(self, file):
331 self.save_file = file
333 def record(self, dev_name, logname):
334 log("Recording log", logname, "on", dev_name)
335 self.record_device = dev_name
336 self.record_log = logname
338 def end_record(self):
339 log("End recording log", self.record_log, "on", self.record_device)
340 self.record_device = None
341 self.record_log = None
343 def set_nonblock(self, fd):
344 fl = fcntl.fcntl(fd, F_GETFL)
345 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
350 the cmds are written to stdin of lctl
351 lctl doesn't return errors when run in script mode, so
353 should modify command line to accept multiple commands, or
354 create complex command line options
358 cmds = '\n dump ' + self.save_file + '\n' + cmds
359 elif self.record_device:
363 %s""" % (self.record_device, self.record_log, cmds)
365 debug("+", cmd_line, cmds)
366 if config.noexec: return (0, [])
368 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
369 child.tochild.write(cmds + "\n")
370 child.tochild.close()
371 # print "LCTL:", cmds
373 # From "Python Cookbook" from O'Reilly
374 outfile = child.fromchild
375 outfd = outfile.fileno()
376 self.set_nonblock(outfd)
377 errfile = child.childerr
378 errfd = errfile.fileno()
379 self.set_nonblock(errfd)
381 outdata = errdata = ''
384 ready = select.select([outfd,errfd],[],[]) # Wait for input
385 if outfd in ready[0]:
386 outchunk = outfile.read()
387 if outchunk == '': outeof = 1
388 outdata = outdata + outchunk
389 if errfd in ready[0]:
390 errchunk = errfile.read()
391 if errchunk == '': erreof = 1
392 errdata = errdata + errchunk
393 if outeof and erreof: break
394 # end of "borrowed" code
397 if os.WIFEXITED(ret):
398 rc = os.WEXITSTATUS(ret)
401 if rc or len(errdata):
402 raise CommandError(self.lctl, errdata, rc)
405 def runcmd(self, *args):
407 run lctl using the command line
409 cmd = string.join(map(str,args))
410 debug("+", self.lctl, cmd)
411 rc, out = run(self.lctl, cmd)
413 raise CommandError(self.lctl, out, rc)
416 def clear_log(self, dev, log):
417 """ clear an existing log """
422 quit """ % (dev, log)
425 def root_squash(self, name, uid, nid):
429 quit""" % (name, uid, nid)
432 def network(self, net, nid):
437 quit """ % (net, nid)
441 def add_interface(self, net, ip, netmask = ""):
442 """ add an interface """
446 quit """ % (net, ip, netmask)
449 # delete an interface
450 def del_interface(self, net, ip):
451 """ delete an interface """
458 # create a new connection
459 def add_uuid(self, net_type, uuid, nid):
460 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
463 def add_peer(self, net_type, nid, hostaddr, port):
464 if net_type in ('tcp','openib','ra') and not config.lctl_dump:
469 nid, hostaddr, port )
471 elif net_type in ('iib',) and not config.lctl_dump:
478 elif net_type in ('vib',) and not config.lctl_dump:
486 def connect(self, srv):
487 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
488 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
490 hostaddr = string.split(srv.hostaddr[0], '/')[0]
491 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
494 def recover(self, dev_name, new_conn):
497 recover %s""" %(dev_name, new_conn)
500 # add a route to a range
501 def add_route(self, net, gw, lo, hi):
509 except CommandError, e:
513 def del_route(self, net, gw, lo, hi):
518 quit """ % (net, gw, lo, hi)
521 # add a route to a host
522 def add_route_host(self, net, uuid, gw, tgt):
523 self.add_uuid(net, uuid, tgt)
531 except CommandError, e:
535 # add a route to a range
536 def del_route_host(self, net, uuid, gw, tgt):
542 quit """ % (net, gw, tgt)
546 def del_peer(self, net_type, nid, hostaddr):
547 if net_type in ('tcp',) and not config.lctl_dump:
551 del_peer %s %s single_share
555 elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump:
559 del_peer %s single_share
564 # disconnect one connection
565 def disconnect(self, srv):
566 self.del_uuid(srv.nid_uuid)
567 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
569 hostaddr = string.split(srv.hostaddr[0], '/')[0]
570 self.del_peer(srv.net_type, srv.nid, hostaddr)
572 def del_uuid(self, uuid):
580 def disconnectAll(self, net):
588 def attach(self, type, name, uuid):
591 quit""" % (type, name, uuid)
594 def detach(self, name):
601 def set_security(self, name, key, value):
605 quit""" % (name, key, value)
608 def setup(self, name, setup = ""):
612 quit""" % (name, setup)
615 def add_conn(self, name, conn_uuid):
619 quit""" % (name, conn_uuid)
622 def start(self, name, conf_name):
626 quit""" % (name, conf_name)
629 # create a new device with lctl
630 def newdev(self, type, name, uuid, setup = ""):
632 self.attach(type, name, uuid);
634 self.setup(name, setup)
635 except CommandError, e:
636 self.cleanup(name, uuid, 0)
640 def cleanup(self, name, uuid, force, failover = 0):
641 if failover: force = 1
647 quit""" % (name, ('', 'force')[force],
648 ('', 'failover')[failover])
652 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
653 stripe_sz, stripe_off, pattern, devlist = None):
656 lov_setup %s %d %d %d %s %s
657 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
661 # add an OBD to a LOV
662 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
664 lov_modify_tgts add %s %s %s %s
665 quit""" % (name, obd_uuid, index, gen)
669 def lmv_setup(self, name, uuid, desc_uuid, devlist):
673 quit""" % (name, uuid, desc_uuid, devlist)
676 # delete an OBD from a LOV
677 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
679 lov_modify_tgts del %s %s %s %s
680 quit""" % (name, obd_uuid, index, gen)
684 def deactivate(self, name):
692 def dump(self, dump_file):
695 quit""" % (dump_file)
698 # get list of devices
699 def device_list(self):
700 devices = '/proc/fs/lustre/devices'
702 if os.access(devices, os.R_OK):
704 fp = open(devices, 'r')
712 def lustre_version(self):
713 rc, out = self.runcmd('version')
717 def mount_option(self, profile, osc, mdc, gkc):
719 mount_option %s %s %s %s
720 quit""" % (profile, osc, mdc, gkc)
723 # delete mount options
724 def del_mount_option(self, profile):
730 def set_timeout(self, timeout):
736 def set_lustre_upcall(self, upcall):
741 # ============================================================
742 # Various system-level functions
743 # (ideally moved to their own module)
745 # Run a command and return the output and status.
746 # stderr is sent to /dev/null, could use popen3 to
747 # save it if necessary
750 if config.noexec: return (0, [])
751 f = os.popen(cmd + ' 2>&1')
761 cmd = string.join(map(str,args))
764 # Run a command in the background.
765 def run_daemon(*args):
766 cmd = string.join(map(str,args))
768 if config.noexec: return 0
769 f = os.popen(cmd + ' 2>&1')
777 # Determine full path to use for an external command
778 # searches dirname(argv[0]) first, then PATH
780 syspath = string.split(os.environ['PATH'], ':')
781 cmdpath = os.path.dirname(sys.argv[0])
782 syspath.insert(0, cmdpath);
784 syspath.insert(0, os.path.join(config.portals, 'utils/'))
786 prog = os.path.join(d,cmd)
787 if os.access(prog, os.X_OK):
791 # Recursively look for file starting at base dir
792 def do_find_file(base, mod):
793 fullname = os.path.join(base, mod)
794 if os.access(fullname, os.R_OK):
796 for d in os.listdir(base):
797 dir = os.path.join(base,d)
798 if os.path.isdir(dir):
799 module = do_find_file(dir, mod)
803 # is the path a block device?
810 return stat.S_ISBLK(s[stat.ST_MODE])
812 # find the journal device from mkfs options
818 while i < len(x) - 1:
819 if x[i] == '-J' and x[i+1].startswith('device='):
825 # build fs according to type
827 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
833 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
835 # devsize is in 1k, and fs block count is in 4k
836 block_cnt = devsize/4
838 if fstype in ('ext3', 'extN', 'ldiskfs'):
839 # ext3 journal size is in megabytes
840 # but don't set jsize if mkfsoptions indicates a separate journal device
841 if jsize == 0 and jdev(mkfsoptions) == '':
843 if not is_block(dev):
844 ret, out = runcmd("ls -l %s" %dev)
845 devsize = int(string.split(out[0])[4]) / 1024
847 # sfdisk works for symlink, hardlink, and realdev
848 ret, out = runcmd("sfdisk -s %s" %dev)
850 devsize = int(out[0])
852 # sfdisk -s will fail for too large block device,
853 # then, read the size of partition from /proc/partitions
855 # get the realpath of the device
856 # it may be the real device, such as /dev/hda7
857 # or the hardlink created via mknod for a device
858 if 'realpath' in dir(os.path):
859 real_dev = os.path.realpath(dev)
863 while os.path.islink(real_dev) and (link_count < 20):
864 link_count = link_count + 1
865 dev_link = os.readlink(real_dev)
866 if os.path.isabs(dev_link):
869 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
871 panic("Entountered too many symbolic links resolving block device:", dev)
873 # get the major and minor number of the realpath via ls
874 # it seems python(os.stat) does not return
875 # the st_rdev member of the stat structure
876 ret, out = runcmd("ls -l %s" %real_dev)
877 major = string.split(string.split(out[0])[4], ",")[0]
878 minor = string.split(out[0])[5]
880 # get the devsize from /proc/partitions with the major and minor number
881 ret, out = runcmd("cat /proc/partitions")
884 if string.split(line)[0] == major and string.split(line)[1] == minor:
885 devsize = int(string.split(line)[2])
888 if devsize > 1024 * 1024:
889 jsize = ((devsize / 102400) * 4)
892 if jsize: jopt = "-J size=%d" %(jsize,)
893 if isize: iopt = "-I %d" %(isize,)
894 mkfs = 'mkfs.ext2 -j -b 4096 '
895 if not isblock or config.force:
897 if jdev(mkfsoptions) != '':
898 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
900 jmkfs = jmkfs + '-F '
901 jmkfs = jmkfs + jdev(mkfsoptions)
902 (ret, out) = run (jmkfs)
904 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
905 elif fstype == 'reiserfs':
906 # reiserfs journal size is in blocks
907 if jsize: jopt = "--journal_size %d" %(jsize,)
908 mkfs = 'mkreiserfs -ff'
910 panic('unsupported fs type: ', fstype)
912 if config.mkfsoptions != None:
913 mkfs = mkfs + ' ' + config.mkfsoptions
914 if mkfsoptions != None:
915 mkfs = mkfs + ' ' + mkfsoptions
916 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
918 panic("Unable to build fs:", dev, string.join(out))
919 # enable hash tree indexing on fsswe
920 if fstype in ('ext3', 'extN', 'ldiskfs'):
921 htree = 'echo "feature FEATURE_C5" | debugfs -w'
922 (ret, out) = run (htree, dev)
924 panic("Unable to enable htree:", dev)
926 # some systems use /dev/loopN, some /dev/loop/N
930 if not os.access(loop + str(0), os.R_OK):
932 if not os.access(loop + str(0), os.R_OK):
933 panic ("can't access loop devices")
936 # find loop device assigned to the file
937 def find_assigned_loop(file):
939 for n in xrange(0, MAX_LOOP_DEVICES):
941 if os.access(dev, os.R_OK):
942 (stat, out) = run('losetup', dev)
943 if out and stat == 0:
944 m = re.search(r'\((.*)\)', out[0])
945 if m and file == m.group(1):
949 # find free loop device
950 def find_free_loop(file):
953 # find next free loop
954 for n in xrange(0, MAX_LOOP_DEVICES):
956 if os.access(dev, os.R_OK):
957 (stat, out) = run('losetup', dev)
962 # create file if necessary and assign the first free loop device
963 def init_loop(file, size, fstype, journal_size, inode_size,
964 mkfsoptions, reformat, autoformat, backfstype, backfile):
967 realfstype = backfstype
968 if is_block(backfile):
969 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
970 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
976 dev = find_assigned_loop(realfile)
978 print 'WARNING: file', realfile, 'already mapped to', dev
981 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
982 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
984 panic("Unable to create backing store:", realfile)
985 mkfs(realfile, size, realfstype, journal_size, inode_size,
986 mkfsoptions, isblock=0)
988 dev = find_free_loop(realfile)
990 print "attach " + realfile + " <-> " + dev
991 run('losetup', dev, realfile)
994 print "out of loop devices"
997 # undo loop assignment
998 def clean_loop(dev, fstype, backfstype, backdev):
1003 if not is_block(realfile):
1004 dev = find_assigned_loop(realfile)
1006 print "detach " + dev + " <-> " + realfile
1007 ret, out = run('losetup -d', dev)
1009 log('unable to clean loop device', dev, 'for file', realfile)
1012 # finilizes passed device
1013 def clean_dev(dev, fstype, backfstype, backdev):
1014 if fstype == 'smfs' or not is_block(dev):
1015 clean_loop(dev, fstype, backfstype, backdev)
1017 # determine if dev is formatted as a <fstype> filesystem
1018 def need_format(fstype, dev):
1019 # FIXME don't know how to implement this
1022 # initialize a block device if needed
1023 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
1024 inode_size, mkfsoptions, backfstype, backdev):
1028 if fstype == 'smfs' or not is_block(dev):
1029 dev = init_loop(dev, size, fstype, journal_size, inode_size,
1030 mkfsoptions, reformat, autoformat, backfstype, backdev)
1031 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
1032 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
1035 # panic("device:", dev,
1036 # "not prepared, and autoformat is not set.\n",
1037 # "Rerun with --reformat option to format ALL filesystems")
1042 """lookup IP address for an interface"""
1043 rc, out = run("/sbin/ifconfig", iface)
1046 addr = string.split(out[1])[1]
1047 ip = string.split(addr, ':')[1]
1050 def def_mount_options(fstype, target):
1051 """returns deafult mount options for passed fstype and target (mds, ost)"""
1052 if fstype == 'ext3' or fstype == 'ldiskfs':
1053 mountfsoptions = "errors=remount-ro"
1054 if target == 'ost' and sys_get_branch() == '2.4':
1055 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1056 if target == 'ost' and sys_get_branch() == '2.6':
1057 mountfsoptions = "%s,extents,mballoc" % (mountfsoptions)
1058 return mountfsoptions
1061 def sys_get_elan_position_file():
1062 procfiles = ["/proc/elan/device0/position",
1063 "/proc/qsnet/elan4/device0/position",
1064 "/proc/qsnet/elan3/device0/position"]
1066 if os.access(p, os.R_OK):
1070 def sys_get_local_nid(net_type, wildcard, cluster_id):
1071 """Return the local nid."""
1073 if sys_get_elan_position_file():
1074 local = sys_get_local_address('elan', '*', cluster_id)
1076 local = sys_get_local_address(net_type, wildcard, cluster_id)
1079 def sys_get_local_address(net_type, wildcard, cluster_id):
1080 """Return the local address for the network type."""
1082 if net_type in ('tcp','openib','iib','vib','ra'):
1084 iface, star = string.split(wildcard, ':')
1085 local = if2addr(iface)
1087 panic ("unable to determine ip for:", wildcard)
1089 host = socket.gethostname()
1090 local = socket.gethostbyname(host)
1091 elif net_type == 'elan':
1092 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1093 f = sys_get_elan_position_file()
1095 panic ("unable to determine local Elan ID")
1098 lines = fp.readlines()
1102 if a[0] == 'NodeId':
1106 nid = my_int(cluster_id) + my_int(elan_id)
1107 local = "%d" % (nid)
1108 except ValueError, e:
1112 elif net_type == 'lo':
1113 fixme("automatic local address for loopback")
1114 elif net_type == 'gm':
1115 fixme("automatic local address for GM")
1119 def sys_get_branch():
1120 """Returns kernel release"""
1122 fp = open('/proc/sys/kernel/osrelease')
1123 lines = fp.readlines()
1127 version = string.split(l)
1128 a = string.split(version[0], '.')
1129 return a[0] + '.' + a[1]
1134 # XXX: instead of device_list, ask for $name and see what we get
1135 def is_prepared(name):
1136 """Return true if a device exists for the name"""
1137 if config.lctl_dump:
1139 if (config.noexec or config.record) and config.cleanup:
1142 # expect this format:
1143 # 1 UP ldlm ldlm ldlm_UUID 2
1144 out = lctl.device_list()
1146 if name == string.split(s)[3]:
1148 except CommandError, e:
1152 def net_is_prepared():
1153 """If the any device exists, then assume that all networking
1154 has been configured"""
1155 out = lctl.device_list()
1158 def fs_is_mounted(path):
1159 """Return true if path is a mounted lustre filesystem"""
1161 fp = open('/proc/mounts')
1162 lines = fp.readlines()
1166 if a[1] == path and a[2] == 'lustre_lite':
1172 def kmod_find(src_dir, dev_dir, modname):
1173 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1174 for modext in '.ko', '.o':
1175 module = modbase + modext
1177 if os.access(module, os.R_OK):
1183 def kmod_info(modname):
1184 """Returns reference count for passed module name."""
1186 fp = open('/proc/modules')
1187 lines = fp.readlines()
1190 # please forgive my tired fingers for this one
1191 ret = filter(lambda word, mod = modname: word[0] == mod,
1192 map(lambda line: string.split(line), lines))
1196 except Exception, e:
1200 """Presents kernel module"""
1201 def __init__(self, src_dir, dev_dir, name):
1202 self.src_dir = src_dir
1203 self.dev_dir = dev_dir
1206 # FIXME we ignore the failure of loading gss module, because we might
1207 # don't need it at all.
1210 log ('loading module:', self.name, 'srcdir',
1211 self.src_dir, 'devdir', self.dev_dir)
1213 module = kmod_find(self.src_dir, self.dev_dir,
1215 if not module and self.name != 'ptlrpcs_gss':
1216 panic('module not found:', self.name)
1217 (rc, out) = run('/sbin/insmod', module)
1219 if self.name == 'ptlrpcs_gss':
1220 print "Warning: not support gss security!"
1222 raise CommandError('insmod', out, rc)
1224 (rc, out) = run('/sbin/modprobe', self.name)
1226 if self.name == 'ptlrpcs_gss':
1227 print "Warning: not support gss security!"
1229 raise CommandError('modprobe', out, rc)
1233 log('unloading module:', self.name)
1234 (rc, out) = run('/sbin/rmmod', self.name)
1236 log('unable to unload module:', self.name +
1237 "(" + self.refcount() + ")")
1241 """Returns module info if any."""
1242 return kmod_info(self.name)
1245 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1252 """Returns module refcount."""
1259 """Returns 1 if module is used, otherwise 0 is returned."""
1265 if users and users != '(unused)' and users != '-':
1273 """Returns 1 if module is busy, otherwise 0 is returned."""
1274 if self.loaded() and (self.used() or self.refcount() != '0'):
1280 """Manage kernel modules"""
1281 def __init__(self, lustre_dir, portals_dir):
1282 self.lustre_dir = lustre_dir
1283 self.portals_dir = portals_dir
1284 self.kmodule_list = []
1286 def find_module(self, modname):
1287 """Find module by module name"""
1288 for mod in self.kmodule_list:
1289 if mod.name == modname:
1293 def add_portals_module(self, dev_dir, modname):
1294 """Append a module to list of modules to load."""
1296 mod = self.find_module(modname)
1298 mod = kmod(self.portals_dir, dev_dir, modname)
1299 self.kmodule_list.append(mod)
1301 def add_lustre_module(self, dev_dir, modname):
1302 """Append a module to list of modules to load."""
1304 mod = self.find_module(modname)
1306 mod = kmod(self.lustre_dir, dev_dir, modname)
1307 self.kmodule_list.append(mod)
1309 def load_modules(self):
1310 """Load all the modules in the list in the order they appear."""
1311 for mod in self.kmodule_list:
1312 if mod.loaded() and not config.noexec:
1316 def cleanup_modules(self):
1317 """Unload the modules in the list in reverse order."""
1318 rev = self.kmodule_list
1321 if (not mod.loaded() or mod.busy()) and not config.noexec:
1324 if mod.name == 'portals' and config.dump:
1325 lctl.dump(config.dump)
1328 # ============================================================
1329 # Classes to prepare and cleanup the various objects
1332 """ Base class for the rest of the modules. The default cleanup method is
1333 defined here, as well as some utilitiy funcs.
1335 def __init__(self, module_name, db):
1337 self.module_name = module_name
1338 self.name = self.db.getName()
1339 self.uuid = self.db.getUUID()
1343 def info(self, *args):
1344 msg = string.join(map(str,args))
1345 print self.module_name + ":", self.name, self.uuid, msg
1348 """ default cleanup, used for most modules """
1351 lctl.cleanup(self.name, self.uuid, config.force)
1352 except CommandError, e:
1353 log(self.module_name, "cleanup failed: ", self.name)
1357 def add_module(self, manager):
1358 """Adds all needed modules in the order they appear."""
1361 def safe_to_clean(self):
1364 def safe_to_clean_modules(self):
1365 return self.safe_to_clean()
1367 class Network(Module):
1368 def __init__(self,db):
1369 Module.__init__(self, 'NETWORK', db)
1370 self.net_type = self.db.get_val('nettype')
1371 self.nid = self.db.get_val('nid', '*')
1372 self.cluster_id = self.db.get_val('clusterid', "0")
1373 self.port = self.db.get_val_int('port', 0)
1376 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1378 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1379 self.generic_nid = 1
1380 debug("nid:", self.nid)
1382 self.generic_nid = 0
1384 self.nid_uuid = self.nid_to_uuid(self.nid)
1385 self.hostaddr = self.db.get_hostaddr()
1386 if len(self.hostaddr) == 0:
1387 self.hostaddr.append(self.nid)
1388 if '*' in self.hostaddr[0]:
1389 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1390 if not self.hostaddr[0]:
1391 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1392 debug("hostaddr:", self.hostaddr[0])
1394 def add_module(self, manager):
1395 manager.add_portals_module("libcfs", 'libcfs')
1396 manager.add_portals_module("portals", 'portals')
1398 if node_needs_router():
1399 manager.add_portals_module("router", 'kptlrouter')
1400 if self.net_type == 'tcp':
1401 manager.add_portals_module("knals/socknal", 'ksocknal')
1402 if self.net_type == 'elan':
1403 manager.add_portals_module("knals/qswnal", 'kqswnal')
1404 if self.net_type == 'gm':
1405 manager.add_portals_module("knals/gmnal", 'kgmnal')
1406 if self.net_type == 'openib':
1407 manager.add_portals_module("knals/openibnal", 'kopenibnal')
1408 if self.net_type == 'iib':
1409 manager.add_portals_module("knals/iibnal", 'kiibnal')
1410 if self.net_type == 'vib':
1411 self.add_portals_module("knals/vibnal", 'kvibnal')
1412 if self.net_type == 'lo':
1413 manager.add_portals_module("knals/lonal", 'klonal')
1414 if self.net_type == 'ra':
1415 manager.add_portals_module("knals/ranal", 'kranal')
1417 def nid_to_uuid(self, nid):
1418 return "NID_%s_UUID" %(nid,)
1421 if not config.record and net_is_prepared():
1423 self.info(self.net_type, self.nid, self.port)
1424 if not (config.record and self.generic_nid):
1425 lctl.network(self.net_type, self.nid)
1426 if self.net_type == 'tcp':
1428 for hostaddr in self.db.get_hostaddr():
1429 ip = string.split(hostaddr, '/')[0]
1430 if len(string.split(hostaddr, '/')) == 2:
1431 netmask = string.split(hostaddr, '/')[1]
1434 lctl.add_interface(self.net_type, ip, netmask)
1435 if self.net_type == 'elan':
1437 if self.port and node_is_router():
1438 run_one_acceptor(self.port)
1439 self.connect_peer_gateways()
1441 def connect_peer_gateways(self):
1442 for router in self.db.lookup_class('node'):
1443 if router.get_val_int('router', 0):
1444 for netuuid in router.get_networks():
1445 net = self.db.lookup(netuuid)
1447 if (gw.cluster_id == self.cluster_id and
1448 gw.net_type == self.net_type):
1449 if gw.nid != self.nid:
1452 def disconnect_peer_gateways(self):
1453 for router in self.db.lookup_class('node'):
1454 if router.get_val_int('router', 0):
1455 for netuuid in router.get_networks():
1456 net = self.db.lookup(netuuid)
1458 if (gw.cluster_id == self.cluster_id and
1459 gw.net_type == self.net_type):
1460 if gw.nid != self.nid:
1463 except CommandError, e:
1464 print "disconnect failed: ", self.name
1468 def safe_to_clean(self):
1469 return not net_is_prepared()
1472 self.info(self.net_type, self.nid, self.port)
1474 stop_acceptor(self.port)
1475 if node_is_router():
1476 self.disconnect_peer_gateways()
1477 if self.net_type == 'tcp':
1478 for hostaddr in self.db.get_hostaddr():
1479 ip = string.split(hostaddr, '/')[0]
1480 lctl.del_interface(self.net_type, ip)
1482 def correct_level(self, level, op=None):
1485 class RouteTable(Module):
1486 def __init__(self,db):
1487 Module.__init__(self, 'ROUTES', db)
1489 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1491 # only setup connections for tcp, openib, and iib NALs
1493 if not net_type in ('tcp','openib','iib','vib','ra'):
1496 # connect to target if route is to single node and this node is the gw
1497 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1498 if not local_cluster(net_type, tgt_cluster_id):
1499 panic("target", lo, " not on the local cluster")
1500 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1501 # connect to gateway if this node is not the gw
1502 elif (local_cluster(net_type, gw_cluster_id)
1503 and not local_interface(net_type, gw_cluster_id, gw)):
1504 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1509 panic("no server for nid", lo)
1512 return Network(srvdb)
1515 if not config.record and net_is_prepared():
1518 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1519 lctl.add_route(net_type, gw, lo, hi)
1520 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1524 def safe_to_clean(self):
1525 return not net_is_prepared()
1528 if net_is_prepared():
1529 # the network is still being used, don't clean it up
1531 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1532 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1535 lctl.disconnect(srv)
1536 except CommandError, e:
1537 print "disconnect failed: ", self.name
1542 lctl.del_route(net_type, gw, lo, hi)
1543 except CommandError, e:
1544 print "del_route failed: ", self.name
1548 class Management(Module):
1549 def __init__(self, db):
1550 Module.__init__(self, 'MGMT', db)
1552 def add_module(self, manager):
1553 manager.add_lustre_module('lvfs', 'lvfs')
1554 manager.add_lustre_module('obdclass', 'obdclass')
1555 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1556 manager.add_lustre_module('mgmt', 'mgmt_svc')
1559 if not config.record and is_prepared(self.name):
1562 lctl.newdev("mgmt", self.name, self.uuid)
1564 def safe_to_clean(self):
1568 if is_prepared(self.name):
1569 Module.cleanup(self)
1571 def correct_level(self, level, op=None):
1574 # This is only needed to load the modules; the LDLM device
1575 # is now created automatically.
1577 def __init__(self,db):
1578 Module.__init__(self, 'LDLM', db)
1580 def add_module(self, manager):
1581 manager.add_lustre_module('lvfs', 'lvfs')
1582 manager.add_lustre_module('obdclass', 'obdclass')
1583 manager.add_lustre_module('sec', 'ptlrpcs')
1584 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1585 manager.add_lustre_module('sec/gss', 'ptlrpcs_gss')
1593 def correct_level(self, level, op=None):
1597 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1598 Module.__init__(self, 'LOV', db)
1599 if name_override != None:
1600 self.name = "lov_%s" % name_override
1601 self.mds_uuid = self.db.get_first_ref('mds')
1602 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1603 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1604 self.pattern = self.db.get_val_int('stripepattern', 0)
1605 self.devlist = self.db.get_lov_tgts('lov_tgt')
1606 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1609 self.desc_uuid = self.uuid
1610 self.uuid = generate_client_uuid(self.name)
1611 self.fs_name = fs_name
1613 self.config_only = 1
1615 self.config_only = None
1616 mds = self.db.lookup(self.mds_uuid)
1617 self.mds_name = mds.getName()
1618 for (obd_uuid, index, gen, active) in self.devlist:
1621 self.obdlist.append(obd_uuid)
1622 obd = self.db.lookup(obd_uuid)
1623 osc = get_osc(obd, self.uuid, fs_name)
1625 self.osclist.append((osc, index, gen, active))
1627 panic('osc not found:', obd_uuid)
1636 if not config.record and is_prepared(self.name):
1638 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1639 self.stripe_off, self.pattern, self.devlist,
1641 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1642 self.stripe_sz, self.stripe_off, self.pattern,
1643 string.join(self.obdlist))
1644 for (osc, index, gen, active) in self.osclist:
1645 target_uuid = osc.target_uuid
1647 # Only ignore connect failures with --force, which
1648 # isn't implemented here yet.
1650 osc.prepare(ignore_connect_failure=0)
1651 except CommandError, e:
1652 print "Error preparing OSC %s\n" % osc.uuid
1654 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1657 for (osc, index, gen, active) in self.osclist:
1658 target_uuid = osc.target_uuid
1660 if is_prepared(self.name):
1661 Module.cleanup(self)
1662 if self.config_only:
1663 panic("Can't clean up config_only LOV ", self.name)
1665 def add_module(self, manager):
1666 if self.config_only:
1667 panic("Can't load modules for config_only LOV ", self.name)
1668 for (osc, index, gen, active) in self.osclist:
1669 osc.add_module(manager)
1671 manager.add_lustre_module('lov', 'lov')
1673 def correct_level(self, level, op=None):
1677 def __init__(self, db, uuid, fs_name, name_override = None):
1678 Module.__init__(self, 'LMV', db)
1679 if name_override != None:
1680 self.name = "lmv_%s" % name_override
1682 self.devlist = self.db.get_lmv_tgts('lmv_tgt')
1683 if self.devlist == None:
1684 self.devlist = self.db.get_refs('mds')
1687 self.desc_uuid = self.uuid
1689 self.fs_name = fs_name
1690 for mds_uuid in self.devlist:
1691 mds = self.db.lookup(mds_uuid)
1693 panic("MDS not found!")
1694 mdc = MDC(mds, self.uuid, fs_name)
1696 self.mdclist.append(mdc)
1698 panic('mdc not found:', mds_uuid)
1701 if is_prepared(self.name):
1705 for mdc in self.mdclist:
1707 # Only ignore connect failures with --force, which
1708 # isn't implemented here yet.
1709 mdc.prepare(ignore_connect_failure=0)
1710 except CommandError, e:
1711 print "Error preparing LMV %s\n" % mdc.uuid
1714 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1715 string.join(self.devlist))
1718 for mdc in self.mdclist:
1720 if is_prepared(self.name):
1721 Module.cleanup(self)
1723 def add_module(self, manager):
1724 for mdc in self.mdclist:
1725 mdc.add_module(manager)
1727 manager.add_lustre_module('lmv', 'lmv')
1729 def correct_level(self, level, op=None):
1733 def __init__(self,db):
1734 Module.__init__(self, 'GKD', db)
1735 target_uuid = self.db.get_first_ref('target')
1736 self.target = self.db.lookup(target_uuid)
1737 self.name = self.target.getName()
1739 active_uuid = get_active_target(self.target)
1741 panic("No target device found:", target_uuid)
1742 if active_uuid == self.uuid:
1747 self.uuid = target_uuid
1750 if is_prepared(self.name):
1753 debug(self.uuid, "not active")
1757 lctl.newdev("gks", self.name, self.uuid, setup ="")
1758 if not is_prepared('GKT'):
1759 lctl.newdev("gkt", 'GKT', 'GKT_UUID', setup ="")
1763 debug(self.uuid, "not active")
1766 if is_prepared(self.name):
1768 lctl.cleanup(self.name, self.uuid, config.force,
1770 except CommandError, e:
1771 log(self.module_name, "cleanup failed: ", self.name)
1774 Module.cleanup(self)
1775 if is_prepared('GKT'):
1777 lctl.cleanup("GKT", "GKT_UUID", config.force,
1779 except CommandError, e:
1780 print "cleanup failed: ", self.name
1784 def add_module(self, manager):
1786 manager.add_lustre_module('sec/gks', 'gks')
1787 manager.add_lustre_module('sec/gks', 'gkc')
1789 def correct_level(self, level, op=None):
1792 class CONFDEV(Module):
1793 def __init__(self, db, name, target_uuid, uuid):
1794 Module.__init__(self, 'CONFDEV', db)
1795 self.devpath = self.db.get_val('devpath','')
1796 self.backdevpath = self.db.get_val('devpath','')
1797 self.size = self.db.get_val_int('devsize', 0)
1798 self.journal_size = self.db.get_val_int('journalsize', 0)
1799 self.fstype = self.db.get_val('fstype', '')
1800 self.backfstype = self.db.get_val('backfstype', '')
1801 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1802 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1803 self.target = self.db.lookup(target_uuid)
1804 self.name = "conf_%s" % self.target.getName()
1805 self.client_uuids = self.target.get_refs('client')
1806 self.fs_uuid = self.db.get_first_ref('filesystem')
1807 self.obdtype = self.db.get_val('obdtype', '')
1809 self.mds_sec = self.db.get_val('mds_sec', '')
1810 self.oss_sec = self.db.get_val('oss_sec', '')
1811 self.deny_sec = self.db.get_val('deny_sec', '')
1813 if config.mds_mds_sec:
1814 self.mds_sec = config.mds_mds_sec
1815 if config.mds_oss_sec:
1816 self.oss_sec = config.mds_oss_sec
1817 if config.mds_deny_sec:
1819 self.deny_sec = "%s,%s" %(self.deny_sec, config.mds_deny_sec)
1821 self.deny_sec = config.mds_deny_sec
1823 if self.obdtype == None:
1824 self.obdtype = 'dumb'
1826 self.conf_name = name
1827 self.conf_uuid = uuid
1828 self.realdev = self.devpath
1833 lmv_uuid = self.db.get_first_ref('lmv')
1834 if lmv_uuid != None:
1835 self.lmv = self.db.lookup(lmv_uuid)
1836 if self.lmv != None:
1837 self.client_uuids = self.lmv.get_refs('client')
1839 if self.target.get_class() == 'mds':
1840 if self.target.get_val('failover', 0):
1841 self.failover_mds = 'f'
1843 self.failover_mds = 'n'
1844 self.format = self.db.get_val('autoformat', "no")
1846 self.format = self.db.get_val('autoformat', "yes")
1847 self.osdtype = self.db.get_val('osdtype')
1848 ost = self.db.lookup(target_uuid)
1849 if ost.get_val('failover', 0):
1850 self.failover_ost = 'f'
1852 self.failover_ost = 'n'
1854 self.inode_size = self.get_inode_size()
1856 if self.lmv != None:
1857 client_uuid = self.name + "_lmv_UUID"
1858 self.master = LMV(self.lmv, client_uuid,
1859 self.conf_name, self.conf_name)
1861 def get_inode_size(self):
1862 inode_size = self.db.get_val_int('inodesize', 0)
1863 if inode_size == 0 and self.target.get_class() == 'mds':
1865 # default inode size for case when neither LOV either
1866 # LMV is accessible.
1867 self.inode_size = 256
1869 # find the LOV for this MDS
1870 lovconfig_uuid = self.target.get_first_ref('lovconfig')
1871 if lovconfig_uuid or self.lmv != None:
1872 if self.lmv != None:
1873 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1874 lovconfig = self.lmv.lookup(lovconfig_uuid)
1875 lov_uuid = lovconfig.get_first_ref('lov')
1876 if lov_uuid == None:
1877 panic(self.target.getName() + ": No LOV found for lovconfig ",
1880 lovconfig = self.target.lookup(lovconfig_uuid)
1881 lov_uuid = lovconfig.get_first_ref('lov')
1882 if lov_uuid == None:
1883 panic(self.target.getName() + ": No LOV found for lovconfig ",
1885 if self.lmv != None:
1886 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1887 lovconfig = self.lmv.lookup(lovconfig_uuid)
1888 lov_uuid = lovconfig.get_first_ref('lov')
1890 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, self.name,
1893 # default stripe count controls default inode_size
1894 if lov.stripe_cnt > 0:
1895 stripe_count = lov.stripe_cnt
1897 stripe_count = len(lov.devlist)
1898 if stripe_count > 77:
1900 elif stripe_count > 35:
1902 elif stripe_count > 13:
1904 #elif stripe_count > 3:
1911 def get_mount_options(self, blkdev):
1912 options = def_mount_options(self.fstype,
1913 self.target.get_class())
1915 if config.mountfsoptions:
1917 options = "%s,%s" %(options, config.mountfsoptions)
1919 options = config.mountfsoptions
1920 if self.mountfsoptions:
1921 options = "%s,%s" %(options, self.mountfsoptions)
1923 if self.mountfsoptions:
1925 options = "%s,%s" %(options, self.mountfsoptions)
1927 options = self.mountfsoptions
1929 if self.fstype == 'smfs':
1931 options = "%s,type=%s,dev=%s" %(options, self.backfstype,
1934 options = "type=%s,dev=%s" %(self.backfstype,
1937 if self.target.get_class() == 'mds':
1939 options = "%s,acl,user_xattr,iopen_nopriv" %(options)
1941 options = "iopen_nopriv"
1946 if is_prepared(self.name):
1949 blkdev = block_dev(self.devpath, self.size, self.fstype,
1950 config.reformat, self.format, self.journal_size,
1951 self.inode_size, self.mkfsoptions, self.backfstype,
1954 if self.fstype == 'smfs':
1959 mountfsoptions = self.get_mount_options(blkdev)
1961 self.info(self.target.get_class(), realdev, mountfsoptions,
1962 self.fstype, self.size, self.format)
1964 lctl.newdev("confobd", self.name, self.uuid,
1965 setup ="%s %s %s" %(realdev, self.fstype,
1968 self.mountfsoptions = mountfsoptions
1969 self.realdev = realdev
1971 def add_module(self, manager):
1972 manager.add_lustre_module('obdclass', 'confobd')
1974 # this method checks if current OBD belong to the same FS as passed
1975 # mount point uuid. If not - do not write mountpoint and echo client
1976 # to log, it is not needed, but take damn long time (WB test case)
1978 def belong_to_fs(self, mtpt_uuid):
1979 mtpt = self.db.lookup(mtpt_uuid)
1980 fs_uuid = mtpt.get_first_ref('filesystem')
1982 if not self.fs_uuid or self.fs_uuid == "" or fs_uuid == self.fs_uuid:
1987 def write_conf(self):
1988 if self.target.get_class() == 'ost':
1990 lctl.clear_log(self.name, self.target.getName() + '-conf')
1991 lctl.record(self.name, self.target.getName() + '-conf')
1992 lctl.newdev(self.osdtype, self.conf_name, self.conf_uuid,
1993 setup ="%s %s %s %s" %(self.realdev, self.fstype,
1995 self.mountfsoptions))
1997 lctl.clear_log(self.name, 'OSS-conf')
1998 lctl.record(self.name, 'OSS-conf')
1999 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
2004 if self.target.get_class() == 'mds':
2005 if self.master != None:
2006 master_name = self.master.name
2008 master_name = 'dumb'
2011 lctl.clear_log(self.name, self.target.getName() + '-conf')
2012 lctl.record(self.name, self.target.getName() + '-conf')
2013 lctl.attach("mds", self.conf_name, self.conf_uuid)
2015 lctl.set_security(self.conf_name, "mds_sec", self.mds_sec)
2017 lctl.set_security(self.conf_name, "oss_sec", self.oss_sec)
2019 for flavor in string.split(self.deny_sec, ','):
2020 lctl.set_security(self.conf_name, "deny_sec", flavor)
2021 lctl.newdev("mds", self.conf_name, self.conf_uuid,
2022 setup ="%s %s %s %s %s %s" %(self.realdev, self.fstype,
2023 self.conf_name, self.mountfsoptions,
2024 master_name, self.obdtype))
2028 if not self.client_uuids:
2031 for uuid in self.client_uuids:
2032 log("recording client:", uuid)
2033 client_uuid = generate_client_uuid(self.name)
2034 client = VOSC(self.db.lookup(uuid), client_uuid,
2035 self.target.getName(), self.name)
2037 lctl.clear_log(self.name, self.target.getName())
2038 lctl.record(self.name, self.target.getName())
2040 lctl.mount_option(self.target.getName(), client.get_name(), "", "")
2044 lctl.clear_log(self.name, self.target.getName() + '-clean')
2045 lctl.record(self.name, self.target.getName() + '-clean')
2047 lctl.del_mount_option(self.target.getName())
2055 # record logs for each client
2057 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
2059 config_options = CONFIG_FILE
2061 for node_db in self.db.lookup_class('node'):
2062 client_name = node_db.getName()
2063 for prof_uuid in node_db.get_refs('profile'):
2064 prof_db = node_db.lookup(prof_uuid)
2065 # refactor this into a funtion to test "clientness"
2067 for ref_class, ref_uuid in prof_db.get_all_refs():
2068 if ref_class in ('mountpoint','echoclient') and self.belong_to_fs(ref_uuid):
2069 debug("recording:", client_name)
2070 log("recording mountpoint:", ref_uuid)
2071 old_noexec = config.noexec
2073 noexec_opt = ('', '-n')
2074 ret, out = run (sys.argv[0],
2075 noexec_opt[old_noexec == 1],
2076 " -v --record --nomod",
2077 "--record_log", client_name,
2078 "--record_device", self.name,
2079 "--node", client_name,
2082 for s in out: log("record> ", string.strip(s))
2083 ret, out = run (sys.argv[0],
2084 noexec_opt[old_noexec == 1],
2085 "--cleanup -v --record --nomod",
2086 "--record_log", client_name + "-clean",
2087 "--record_device", self.name,
2088 "--node", client_name,
2091 for s in out: log("record> ", string.strip(s))
2092 config.noexec = old_noexec
2096 lctl.start(self.name, self.conf_name)
2097 except CommandError, e:
2099 if self.target.get_class() == 'ost':
2100 if not is_prepared('OSS'):
2102 lctl.start(self.name, 'OSS')
2103 except CommandError, e:
2107 if is_prepared(self.name):
2109 lctl.cleanup(self.name, self.uuid, 0, 0)
2110 clean_dev(self.devpath, self.fstype,
2111 self.backfstype, self.backdevpath)
2112 except CommandError, e:
2113 log(self.module_name, "cleanup failed: ", self.name)
2116 Module.cleanup(self)
2118 class MDSDEV(Module):
2119 def __init__(self,db):
2120 Module.__init__(self, 'MDSDEV', db)
2121 self.devpath = self.db.get_val('devpath','')
2122 self.backdevpath = self.db.get_val('devpath','')
2123 self.size = self.db.get_val_int('devsize', 0)
2124 self.journal_size = self.db.get_val_int('journalsize', 0)
2125 self.fstype = self.db.get_val('fstype', '')
2126 self.backfstype = self.db.get_val('backfstype', '')
2127 self.nspath = self.db.get_val('nspath', '')
2128 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2129 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2130 self.obdtype = self.db.get_val('obdtype', '')
2131 self.root_squash = self.db.get_val('root_squash', '')
2132 self.no_root_squash = self.db.get_val('no_root_squash', '')
2134 target_uuid = self.db.get_first_ref('target')
2135 self.target = self.db.lookup(target_uuid)
2136 self.name = self.target.getName()
2140 lmv_uuid = self.db.get_first_ref('lmv')
2141 if lmv_uuid != None:
2142 self.lmv = self.db.lookup(lmv_uuid)
2144 active_uuid = get_active_target(self.target)
2146 panic("No target device found:", target_uuid)
2147 if active_uuid == self.uuid:
2149 group = self.target.get_val('group')
2150 if config.group and config.group != group:
2155 self.uuid = target_uuid
2158 if self.lmv != None:
2159 client_uuid = self.name + "_lmv_UUID"
2160 self.master = LMV(self.lmv, client_uuid,
2161 self.name, self.name)
2163 self.confobd = CONFDEV(self.db, self.name,
2164 target_uuid, self.uuid)
2166 def add_module(self, manager):
2168 manager.add_lustre_module('mdc', 'mdc')
2169 manager.add_lustre_module('osc', 'osc')
2170 manager.add_lustre_module('ost', 'ost')
2171 manager.add_lustre_module('lov', 'lov')
2172 manager.add_lustre_module('mds', 'mds')
2174 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2175 manager.add_lustre_module(self.fstype, self.fstype)
2178 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
2180 # if fstype is smfs, then we should also take care about backing
2182 if self.fstype == 'smfs':
2183 manager.add_lustre_module(self.backfstype, self.backfstype)
2184 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
2186 for option in string.split(self.mountfsoptions, ','):
2187 if option == 'snap':
2188 if not self.fstype == 'smfs':
2189 panic("mountoptions has 'snap', but fstype is not smfs.")
2190 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2191 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2194 if self.master != None:
2195 self.master.add_module(manager)
2197 # add CONFDEV modules
2198 if self.confobd != None:
2199 self.confobd.add_module(manager)
2201 def write_conf(self):
2202 if is_prepared(self.name):
2205 debug(self.uuid, "not active")
2208 self.confobd.prepare()
2209 self.confobd.write_conf()
2210 self.confobd.cleanup()
2213 if is_prepared(self.name):
2216 debug(self.uuid, "not active")
2220 self.confobd.prepare()
2222 self.confobd.write_conf()
2225 if self.master != None:
2226 self.master.prepare()
2228 if not config.record:
2229 self.confobd.start()
2231 if not is_prepared('MDT'):
2232 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
2234 if development_mode():
2235 # set lsd upcall path
2236 procentry = "/proc/fs/lustre/mds/lsd_upcall"
2237 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
2238 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
2239 print "MDS Warning: failed to set lsd cache upcall"
2241 run("echo ", upcall, " > ", procentry)
2242 # set lacl upcall path
2243 procentry = "/proc/fs/lustre/mds/lacl_upcall"
2244 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lacl_upcall")
2245 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
2246 print "MDS Warning: failed to set remote acl upcall"
2248 run("echo ", upcall, " > ", procentry)
2250 if config.root_squash == None:
2251 config.root_squash = self.root_squash
2252 if config.no_root_squash == None:
2253 config.no_root_squash = self.no_root_squash
2254 if config.root_squash:
2255 if config.no_root_squash:
2256 nsnid = config.no_root_squash
2259 lctl.root_squash(self.name, config.root_squash, nsnid)
2261 def msd_remaining(self):
2262 out = lctl.device_list()
2264 if string.split(s)[2] in ('mds',):
2267 def safe_to_clean(self):
2270 def safe_to_clean_modules(self):
2271 return not self.msd_remaining()
2275 debug(self.uuid, "not active")
2278 if is_prepared(self.name):
2280 lctl.cleanup(self.name, self.uuid, config.force,
2282 except CommandError, e:
2283 log(self.module_name, "cleanup failed: ", self.name)
2286 Module.cleanup(self)
2288 if self.master != None:
2289 self.master.cleanup()
2290 if not self.msd_remaining() and is_prepared('MDT'):
2292 lctl.cleanup("MDT", "MDT_UUID", config.force,
2294 except CommandError, e:
2295 print "cleanup failed: ", self.name
2300 self.confobd.cleanup()
2302 def correct_level(self, level, op=None):
2303 #if self.master != None:
2308 def __init__(self, db):
2309 Module.__init__(self, 'OSD', db)
2310 self.osdtype = self.db.get_val('osdtype')
2311 self.devpath = self.db.get_val('devpath', '')
2312 self.backdevpath = self.db.get_val('devpath', '')
2313 self.size = self.db.get_val_int('devsize', 0)
2314 self.journal_size = self.db.get_val_int('journalsize', 0)
2315 self.inode_size = self.db.get_val_int('inodesize', 0)
2316 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2317 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2318 self.fstype = self.db.get_val('fstype', '')
2319 self.backfstype = self.db.get_val('backfstype', '')
2320 self.nspath = self.db.get_val('nspath', '')
2321 target_uuid = self.db.get_first_ref('target')
2322 ost = self.db.lookup(target_uuid)
2323 self.name = ost.getName()
2324 self.format = self.db.get_val('autoformat', 'yes')
2325 if ost.get_val('failover', 0):
2326 self.failover_ost = 'f'
2328 self.failover_ost = 'n'
2330 self.deny_sec = self.db.get_val('deny_sec', '')
2332 if config.ost_deny_sec:
2334 self.deny_sec = "%s,%s" %(self.deny_sec, config.ost_deny_sec)
2336 self.deny_sec = config.ost_deny_sec
2338 active_uuid = get_active_target(ost)
2340 panic("No target device found:", target_uuid)
2341 if active_uuid == self.uuid:
2343 group = ost.get_val('group')
2344 if config.group and config.group != group:
2349 self.uuid = target_uuid
2350 self.confobd = CONFDEV(self.db, self.name,
2351 target_uuid, self.uuid)
2353 def add_module(self, manager):
2356 manager.add_lustre_module('ost', 'ost')
2358 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2359 manager.add_lustre_module(self.fstype, self.fstype)
2362 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2364 if self.fstype == 'smfs':
2365 manager.add_lustre_module(self.backfstype, self.backfstype)
2366 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2368 for option in self.mountfsoptions:
2369 if option == 'snap':
2370 if not self.fstype == 'smfs':
2371 panic("mountoptions with snap, but fstype is not smfs\n")
2372 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2373 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2375 manager.add_lustre_module(self.osdtype, self.osdtype)
2377 # add CONFDEV modules
2378 if self.confobd != None:
2379 self.confobd.add_module(manager)
2382 if is_prepared(self.name):
2385 debug(self.uuid, "not active")
2390 if self.osdtype == 'obdecho':
2391 self.info(self.osdtype)
2392 lctl.newdev("obdecho", self.name, self.uuid)
2393 if not is_prepared('OSS'):
2394 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup="")
2396 self.confobd.prepare()
2398 self.confobd.write_conf()
2399 if not config.record:
2400 self.confobd.start()
2403 for flavor in string.split(self.deny_sec, ','):
2404 lctl.set_security(self.name, "deny_sec", flavor)
2406 def write_conf(self):
2407 if is_prepared(self.name):
2410 debug(self.uuid, "not active")
2414 if self.osdtype != 'obdecho':
2415 self.confobd.prepare()
2416 self.confobd.write_conf()
2417 if not config.write_conf:
2418 self.confobd.start()
2419 self.confobd.cleanup()
2421 def osd_remaining(self):
2422 out = lctl.device_list()
2424 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2427 def safe_to_clean(self):
2430 def safe_to_clean_modules(self):
2431 return not self.osd_remaining()
2435 debug(self.uuid, "not active")
2438 if is_prepared(self.name):
2441 lctl.cleanup(self.name, self.uuid, config.force,
2443 except CommandError, e:
2444 log(self.module_name, "cleanup failed: ", self.name)
2447 if not self.osd_remaining() and is_prepared('OSS'):
2449 lctl.cleanup("OSS", "OSS_UUID", config.force,
2451 except CommandError, e:
2452 print "cleanup failed: ", self.name
2456 if self.osdtype != 'obdecho':
2458 self.confobd.cleanup()
2460 def correct_level(self, level, op=None):
2463 # Generic client module, used by OSC and MDC
2464 class Client(Module):
2465 def __init__(self, tgtdb, uuid, module, fs_name,
2466 self_name=None, module_dir=None):
2467 self.target_name = tgtdb.getName()
2468 self.target_uuid = tgtdb.getUUID()
2469 self.module_dir = module_dir
2470 self.backup_targets = []
2471 self.module = module
2474 self.tgt_dev_uuid = get_active_target(tgtdb)
2475 if not self.tgt_dev_uuid:
2476 panic("No target device found for target(1):", self.target_name)
2481 self.module = module
2482 self.module_name = string.upper(module)
2484 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2485 self.target_name, fs_name)
2487 self.name = self_name
2489 self.lookup_server(self.tgt_dev_uuid)
2490 self.lookup_backup_targets()
2491 self.fs_name = fs_name
2492 if not self.module_dir:
2493 self.module_dir = module
2495 def add_module(self, manager):
2496 manager.add_lustre_module(self.module_dir, self.module)
2498 def lookup_server(self, srv_uuid):
2499 """ Lookup a server's network information """
2500 self._server_nets = get_ost_net(self.db, srv_uuid)
2501 if len(self._server_nets) == 0:
2502 panic ("Unable to find a server for:", srv_uuid)
2507 def get_servers(self):
2508 return self._server_nets
2510 def lookup_backup_targets(self):
2511 """ Lookup alternative network information """
2512 prof_list = toplustreDB.get_refs('profile')
2513 for prof_uuid in prof_list:
2514 prof_db = toplustreDB.lookup(prof_uuid)
2516 panic("profile:", prof_uuid, "not found.")
2517 for ref_class, ref_uuid in prof_db.get_all_refs():
2518 if ref_class in ('osd', 'mdsdev'):
2519 devdb = toplustreDB.lookup(ref_uuid)
2520 uuid = devdb.get_first_ref('target')
2521 if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
2522 self.backup_targets.append(ref_uuid)
2524 def prepare(self, ignore_connect_failure = 0):
2525 self.info(self.target_uuid)
2526 if not config.record and is_prepared(self.name):
2529 srv = choose_local_server(self.get_servers())
2533 routes = find_route(self.get_servers())
2534 if len(routes) == 0:
2535 panic ("no route to", self.target_uuid)
2536 for (srv, r) in routes:
2537 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2538 except CommandError, e:
2539 if not ignore_connect_failure:
2543 if self.target_uuid in config.inactive and self.permits_inactive():
2544 debug("%s inactive" % self.target_uuid)
2545 inactive_p = "inactive"
2547 debug("%s active" % self.target_uuid)
2549 lctl.newdev(self.module, self.name, self.uuid,
2550 setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
2552 for tgt_dev_uuid in self.backup_targets:
2553 this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
2554 if len(this_nets) == 0:
2555 panic ("Unable to find a server for:", tgt_dev_uuid)
2556 srv = choose_local_server(this_nets)
2560 routes = find_route(this_nets);
2561 if len(routes) == 0:
2562 panic("no route to", tgt_dev_uuid)
2563 for (srv, r) in routes:
2564 lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2566 lctl.add_conn(self.name, srv.nid_uuid);
2569 if is_prepared(self.name):
2570 Module.cleanup(self)
2572 srv = choose_local_server(self.get_servers())
2574 lctl.disconnect(srv)
2576 for (srv, r) in find_route(self.get_servers()):
2577 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2578 except CommandError, e:
2579 log(self.module_name, "cleanup failed: ", self.name)
2583 for tgt_dev_uuid in self.backup_targets:
2584 this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
2585 srv = choose_local_server(this_net)
2587 lctl.disconnect(srv)
2589 for (srv, r) in find_route(this_net):
2590 lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2592 def correct_level(self, level, op=None):
2595 def deactivate(self):
2597 lctl.deactivate(self.name)
2598 except CommandError, e:
2599 log(self.module_name, "deactivate failed: ", self.name)
2604 def __init__(self, db, uuid, fs_name):
2605 Client.__init__(self, db, uuid, 'gkc', fs_name)
2607 def permits_inactive(self):
2611 def __init__(self, db, uuid, fs_name):
2612 Client.__init__(self, db, uuid, 'mdc', fs_name)
2614 def permits_inactive(self):
2618 def __init__(self, db, uuid, fs_name):
2619 Client.__init__(self, db, uuid, 'osc', fs_name)
2621 def permits_inactive(self):
2624 class CMOBD(Module):
2625 def __init__(self, db):
2626 Module.__init__(self, 'CMOBD', db)
2627 self.name = self.db.getName();
2628 self.uuid = generate_client_uuid(self.name)
2629 self.master_uuid = self.db.get_first_ref('masterobd')
2630 self.cache_uuid = self.db.get_first_ref('cacheobd')
2632 master_obd = self.db.lookup(self.master_uuid)
2634 panic('master obd not found:', self.master_uuid)
2636 cache_obd = self.db.lookup(self.cache_uuid)
2638 panic('cache obd not found:', self.cache_uuid)
2643 master_class = master_obd.get_class()
2644 cache_class = cache_obd.get_class()
2646 if master_class == 'lov':
2647 client_uuid = "%s_lov_master_UUID" % (self.name)
2648 self.master = LOV(master_obd, client_uuid, self.name,
2649 "master_%s" % (self.name));
2650 elif master_class == 'ost':
2651 client_uuid = "%s_ost_master_UUID" % (self.name)
2652 self.master = get_osc(master_obd, client_uuid, self.master_uuid)
2653 elif master_class == 'mds':
2654 client_uuid = "%s_mds_master_UUID" % (self.name)
2655 self.master = get_mdc(master_obd, client_uuid, self.master_uuid)
2656 elif master_class == 'lmv':
2657 client_uuid = "%s_lmv_master_UUID" % (self.name)
2658 self.master = LMV(master_obd, client_uuid, self.name,
2659 "master_%s" % (self.name));
2661 panic("unknown master obd class '%s'" %(master_class))
2663 if cache_class == 'ost':
2664 self.cache = get_osc(cache_obd, cache_obd.getUUID(),
2666 elif cache_class == 'mds':
2667 self.cache = get_mdc(cache_obd, cache_obd.getUUID(),
2670 panic("invalid cache obd class '%s'" %(cache_class))
2673 if not config.record and is_prepared(self.name):
2675 self.info(self.master_uuid, self.cache_uuid)
2676 self.master.prepare()
2677 lctl.newdev("cmobd", self.name, self.uuid,
2678 setup ="%s %s" %(self.master.uuid,
2687 def get_master_name(self):
2688 return self.master.name
2690 def get_cache_name(self):
2691 return self.cache.name
2694 if is_prepared(self.name):
2695 Module.cleanup(self)
2697 self.master.cleanup()
2699 def add_module(self, manager):
2700 manager.add_lustre_module('smfs', 'smfs')
2701 manager.add_lustre_module('cmobd', 'cmobd')
2702 self.master.add_module(manager)
2704 def correct_level(self, level, op=None):
2708 def __init__(self, db, uuid, name):
2709 Module.__init__(self, 'COBD', db)
2710 self.name = self.db.getName();
2711 self.uuid = generate_client_uuid(self.name)
2712 self.master_uuid = self.db.get_first_ref('masterobd')
2713 self.cache_uuid = self.db.get_first_ref('cacheobd')
2715 master_obd = self.db.lookup(self.master_uuid)
2717 panic('master obd not found:', self.master_uuid)
2719 cache_obd = self.db.lookup(self.cache_uuid)
2721 panic('cache obd not found:', self.cache_uuid)
2726 master_class = master_obd.get_class()
2727 cache_class = cache_obd.get_class()
2729 if master_class == 'ost' or master_class == 'lov':
2730 client_uuid = "%s_lov_master_UUID" % (self.name)
2731 self.master = LOV(master_obd, client_uuid, name,
2732 "master_%s" % (self.name));
2733 elif master_class == 'mds':
2734 self.master = get_mdc(db, name, self.master_uuid)
2735 elif master_class == 'lmv':
2736 client_uuid = "%s_lmv_master_UUID" % (self.name)
2737 self.master = LMV(master_obd, client_uuid, self.name,
2738 "master_%s" % (self.name));
2740 panic("unknown master obd class '%s'" %(master_class))
2742 if cache_class == 'ost' or cache_class == 'lov':
2743 client_uuid = "%s_lov_cache_UUID" % (self.name)
2744 self.cache = LOV(cache_obd, client_uuid, name,
2745 "cache_%s" % (self.name));
2746 elif cache_class == 'mds':
2747 self.cache = get_mdc(db, name, self.cache_uuid)
2748 elif cache_class == 'lmv':
2749 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2750 self.cache = LMV(cache_obd, client_uuid, self.name,
2751 "cache_%s" % (self.name));
2753 panic("unknown cache obd class '%s'" %(cache_class))
2761 def get_master_name(self):
2762 return self.master.name
2764 def get_cache_name(self):
2765 return self.cache.name
2768 if not config.record and is_prepared(self.name):
2770 self.master.prepare()
2771 self.cache.prepare()
2772 self.info(self.master_uuid, self.cache_uuid)
2773 lctl.newdev("cobd", self.name, self.uuid,
2774 setup ="%s %s" %(self.master.name,
2778 if is_prepared(self.name):
2779 Module.cleanup(self)
2780 self.master.cleanup()
2781 self.cache.cleanup()
2783 def add_module(self, manager):
2784 manager.add_lustre_module('cobd', 'cobd')
2785 self.master.add_module(manager)
2787 # virtual interface for OSC and LOV
2789 def __init__(self, db, client_uuid, name, name_override = None):
2790 Module.__init__(self, 'VOSC', db)
2791 if db.get_class() == 'lov':
2792 self.osc = LOV(db, client_uuid, name, name_override)
2794 elif db.get_class() == 'cobd':
2795 self.osc = COBD(db, client_uuid, name)
2798 self.osc = OSC(db, client_uuid, name)
2802 return self.osc.get_uuid()
2805 return self.osc.get_name()
2813 def add_module(self, manager):
2814 self.osc.add_module(manager)
2816 def correct_level(self, level, op=None):
2817 return self.osc.correct_level(level, op)
2819 # virtual interface for MDC and LMV
2821 def __init__(self, db, client_uuid, name, name_override = None):
2822 Module.__init__(self, 'VMDC', db)
2823 if db.get_class() == 'lmv':
2824 self.mdc = LMV(db, client_uuid, name, name_override)
2825 elif db.get_class() == 'cobd':
2826 self.mdc = COBD(db, client_uuid, name)
2828 self.mdc = MDC(db, client_uuid, name)
2831 return self.mdc.uuid
2834 return self.mdc.name
2842 def add_module(self, manager):
2843 self.mdc.add_module(manager)
2845 def correct_level(self, level, op=None):
2846 return self.mdc.correct_level(level, op)
2848 class ECHO_CLIENT(Module):
2849 def __init__(self,db):
2850 Module.__init__(self, 'ECHO_CLIENT', db)
2851 self.obd_uuid = self.db.get_first_ref('obd')
2852 obd = self.db.lookup(self.obd_uuid)
2853 self.uuid = generate_client_uuid(self.name)
2854 self.osc = VOSC(obd, self.uuid, self.name)
2857 if not config.record and is_prepared(self.name):
2860 self.osc.prepare() # XXX This is so cheating. -p
2861 self.info(self.obd_uuid)
2863 lctl.newdev("echo_client", self.name, self.uuid,
2864 setup = self.osc.get_name())
2867 if is_prepared(self.name):
2868 Module.cleanup(self)
2871 def add_module(self, manager):
2872 self.osc.add_module(manager)
2873 manager.add_lustre_module('obdecho', 'obdecho')
2875 def correct_level(self, level, op=None):
2878 def generate_client_uuid(name):
2879 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2881 int(random.random() * 1048576),
2882 int(random.random() * 1048576))
2883 return client_uuid[:36]
2885 class Mountpoint(Module):
2886 def __init__(self, db):
2887 Module.__init__(self, 'MTPT', db)
2888 self.path = self.db.get_val('path')
2889 self.clientoptions = self.db.get_val('clientoptions', '')
2890 self.fs_uuid = self.db.get_first_ref('filesystem')
2891 fs = self.db.lookup(self.fs_uuid)
2892 self.mds_uuid = fs.get_first_ref('lmv')
2893 if not self.mds_uuid:
2894 self.mds_uuid = fs.get_first_ref('mds')
2895 self.obd_uuid = fs.get_first_ref('obd')
2896 self.gks_uuid = fs.get_first_ref('gks')
2897 client_uuid = generate_client_uuid(self.name)
2899 self.oss_sec = self.db.get_val('oss_sec','null')
2900 self.mds_sec = self.db.get_val('mds_sec','null')
2902 self.mds_sec = config.mds_sec
2904 self.oss_sec = config.oss_sec
2906 self.oss_sec = self.db.get_val('oss_sec','null')
2907 self.mds_sec = self.db.get_val('mds_sec','null')
2909 self.mds_sec = config.mds_sec
2911 self.oss_sec = config.oss_sec
2913 ost = self.db.lookup(self.obd_uuid)
2915 panic("no ost: ", self.obd_uuid)
2917 mds = self.db.lookup(self.mds_uuid)
2919 panic("no mds: ", self.mds_uuid)
2921 self.vosc = VOSC(ost, client_uuid, self.name, self.name)
2922 self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
2925 self.gkc = get_gkc(db, client_uuid, self.name, self.gks_uuid)
2928 if not config.record and fs_is_mounted(self.path):
2929 log(self.path, "already mounted.")
2938 self.info(self.path, self.mds_uuid, self.obd_uuid)
2939 if config.record or config.lctl_dump:
2941 lctl.mount_option(local_node_name, self.vosc.get_name(),
2942 self.vmdc.get_name(), self.gkc.get_name())
2944 lctl.mount_option(local_node_name, self.vosc.get_name(),
2945 self.vmdc.get_name(), "")
2948 if config.clientoptions:
2949 if self.clientoptions:
2950 self.clientoptions = self.clientoptions + ',' + config.clientoptions
2952 self.clientoptions = config.clientoptions
2953 if self.clientoptions:
2954 self.clientoptions = ',' + self.clientoptions
2955 # Linux kernel will deal with async and not pass it to ll_fill_super,
2956 # so replace it with Lustre async
2957 self.clientoptions = string.replace(self.clientoptions, "async", "lasync")
2960 gkc_name = self.gkc.get_name();
2963 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,gkc=%s,mds_sec=%s,oss_sec=%s%s %s %s" % \
2964 (self.vosc.get_name(), self.vmdc.get_name(), gkc_name, self.mds_sec,
2965 self.oss_sec, self.clientoptions, config.config, self.path)
2966 log("mount -t lustre_lite -o osc=%s,mdc=%s,gkc=%s,mds_sec=%s,oss_sec=%s%s %s %s" % \
2967 (self.vosc.get_name(), self.vmdc.get_name(), gkc_name, self.mds_sec,
2968 self.oss_sec, self.clientoptions, config.config, self.path))
2969 run("mkdir", self.path)
2974 panic("mount failed:", self.path, ":", string.join(val))
2977 self.info(self.path, self.mds_uuid,self.obd_uuid)
2979 if config.record or config.lctl_dump:
2980 lctl.del_mount_option(local_node_name)
2982 if fs_is_mounted(self.path):
2984 (rc, out) = run("umount", "-f", self.path)
2986 (rc, out) = run("umount", self.path)
2988 raise CommandError('umount', out, rc)
2990 if fs_is_mounted(self.path):
2991 panic("fs is still mounted:", self.path)
2998 def add_module(self, manager):
2999 self.vosc.add_module(manager)
3000 self.vmdc.add_module(manager)
3001 manager.add_lustre_module('llite', 'llite')
3003 manager.add_lustre_module('sec/gks', 'gkc')
3005 def correct_level(self, level, op=None):
3008 # ============================================================
3009 # misc query functions
3011 def get_ost_net(self, osd_uuid):
3015 osd = self.lookup(osd_uuid)
3016 node_uuid = osd.get_first_ref('node')
3017 node = self.lookup(node_uuid)
3019 panic("unable to find node for osd_uuid:", osd_uuid,
3020 " node_ref:", node_uuid_)
3021 for net_uuid in node.get_networks():
3022 db = node.lookup(net_uuid)
3023 srv_list.append(Network(db))
3026 # the order of iniitailization is based on level.
3027 def getServiceLevel(self):
3028 type = self.get_class()
3030 if type in ('network',):
3032 elif type in ('routetbl',):
3034 elif type in ('ldlm',):
3036 elif type in ('osd',):
3038 elif type in ('mdsdev',):
3040 elif type in ('lmv', 'cobd',):
3042 elif type in ('gkd',):
3044 elif type in ('cmobd', 'cobd',):
3046 elif type in ('mountpoint', 'echoclient'):
3049 panic("Unknown type: ", type)
3051 if ret < config.minlevel or ret > config.maxlevel:
3056 # return list of services in a profile. list is a list of tuples
3057 # [(level, db_object),]
3058 def getServices(self):
3060 for ref_class, ref_uuid in self.get_all_refs():
3061 servdb = self.lookup(ref_uuid)
3063 level = getServiceLevel(servdb)
3065 list.append((level, servdb))
3067 panic('service not found: ' + ref_uuid)
3073 ############################################################
3075 # FIXME: clean this mess up!
3077 # OSC is no longer in the xml, so we have to fake it.
3078 # this is getting ugly and begging for another refactoring
3079 def get_osc(ost_db, uuid, fs_name):
3080 osc = OSC(ost_db, uuid, fs_name)
3083 def get_mdc(db, fs_name, mds_uuid):
3084 mds_db = db.lookup(mds_uuid);
3086 error("no mds:", mds_uuid)
3087 mdc = MDC(mds_db, mds_uuid, fs_name)
3090 def get_gkc(db, uuid, fs_name, gks_uuid):
3091 gks_db = db.lookup(gks_uuid);
3093 error("no gks:", gks_uuid)
3094 gkc = GKC(gks_db, uuid, fs_name)
3097 ############################################################
3098 # routing ("rooting")
3100 # list of (nettype, cluster_id, nid)
3103 def find_local_clusters(node_db):
3104 global local_clusters
3105 for netuuid in node_db.get_networks():
3106 net = node_db.lookup(netuuid)
3108 debug("add_local", netuuid)
3109 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
3111 if not acceptors.has_key(srv.port):
3112 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
3114 # This node is a gateway.
3116 def node_is_router():
3119 # If there are any routers found in the config, then this will be true
3120 # and all nodes will load kptlrouter.
3122 def node_needs_router():
3123 return needs_router or is_router
3125 # list of (nettype, gw, tgt_cluster_id, lo, hi)
3126 # Currently, these local routes are only added to kptlrouter route
3127 # table if they are needed to connect to a specific server. This
3128 # should be changed so all available routes are loaded, and the
3129 # ptlrouter can make all the decisions.
3132 def find_local_routes(lustre):
3133 """ Scan the lustre config looking for routers . Build list of
3135 global local_routes, needs_router
3137 list = lustre.lookup_class('node')
3139 if router.get_val_int('router', 0):
3141 for (local_type, local_cluster_id, local_nid) in local_clusters:
3143 for netuuid in router.get_networks():
3144 db = router.lookup(netuuid)
3145 if (local_type == db.get_val('nettype') and
3146 local_cluster_id == db.get_val('clusterid')):
3147 gw = db.get_val('nid')
3150 debug("find_local_routes: gw is", gw)
3151 for route in router.get_local_routes(local_type, gw):
3152 local_routes.append(route)
3153 debug("find_local_routes:", local_routes)
3156 def choose_local_server(srv_list):
3157 for srv in srv_list:
3158 if local_cluster(srv.net_type, srv.cluster_id):
3161 def local_cluster(net_type, cluster_id):
3162 for cluster in local_clusters:
3163 if net_type == cluster[0] and cluster_id == cluster[1]:
3167 def local_interface(net_type, cluster_id, nid):
3168 for cluster in local_clusters:
3169 if (net_type == cluster[0] and cluster_id == cluster[1]
3170 and nid == cluster[2]):
3174 def find_route(srv_list):
3176 frm_type = local_clusters[0][0]
3177 for srv in srv_list:
3178 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
3179 to_type = srv.net_type
3181 cluster_id = srv.cluster_id
3182 debug ('looking for route to', to_type, to)
3183 for r in local_routes:
3184 debug("find_route: ", r)
3185 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
3186 result.append((srv, r))
3189 def get_active_target(db):
3190 target_uuid = db.getUUID()
3191 target_name = db.getName()
3192 node_name = get_select(target_name)
3194 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
3196 tgt_dev_uuid = db.get_first_ref('active')
3199 def get_server_by_nid_uuid(db, nid_uuid):
3200 for n in db.lookup_class("network"):
3202 if net.nid_uuid == nid_uuid:
3206 ############################################################
3210 type = db.get_class()
3211 debug('Service:', type, db.getName(), db.getUUID())
3216 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3217 elif type == 'network':
3219 elif type == 'routetbl':
3223 elif type == 'cobd':
3224 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3225 elif type == 'cmobd':
3227 elif type == 'mdsdev':
3229 elif type == 'mountpoint':
3231 elif type == 'echoclient':
3238 panic ("unknown service type:", type)
3242 # Prepare the system to run lustre using a particular profile
3243 # in a the configuration.
3244 # * load & the modules
3245 # * setup networking for the current node
3246 # * make sure partitions are in place and prepared
3247 # * initialize devices with lctl
3248 # Levels is important, and needs to be enforced.
3249 def for_each_profile(db, prof_list, operation):
3250 for prof_uuid in prof_list:
3251 prof_db = db.lookup(prof_uuid)
3253 panic("profile:", prof_uuid, "not found.")
3254 services = getServices(prof_db)
3257 def magic_get_osc(db, rec, lov):
3259 lov_uuid = lov.get_uuid()
3260 lov_name = lov.osc.fs_name
3262 lov_uuid = rec.getAttribute('lov_uuidref')
3263 # FIXME: better way to find the mountpoint?
3264 filesystems = db.root_node.getElementsByTagName('filesystem')
3266 for fs in filesystems:
3267 ref = fs.getElementsByTagName('obd_ref')
3268 if ref[0].getAttribute('uuidref') == lov_uuid:
3269 fsuuid = fs.getAttribute('uuid')
3273 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
3275 mtpts = db.root_node.getElementsByTagName('mountpoint')
3278 ref = fs.getElementsByTagName('filesystem_ref')
3279 if ref[0].getAttribute('uuidref') == fsuuid:
3280 lov_name = fs.getAttribute('name')
3284 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
3286 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3288 ost_uuid = rec.getAttribute('ost_uuidref')
3289 obd = db.lookup(ost_uuid)
3292 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
3294 osc = get_osc(obd, lov_uuid, lov_name)
3296 panic('osc not found:', obd_uuid)
3299 # write logs for update records. sadly, logs of all types -- and updates in
3300 # particular -- are something of an afterthought. lconf needs rewritten with
3301 # these as core concepts. so this is a pretty big hack.
3302 def process_update_record(db, update, lov):
3303 for rec in update.childNodes:
3304 if rec.nodeType != rec.ELEMENT_NODE:
3307 log("found "+rec.nodeName+" record in update version " +
3308 str(update.getAttribute('version')))
3310 lov_uuid = rec.getAttribute('lov_uuidref')
3311 ost_uuid = rec.getAttribute('ost_uuidref')
3312 index = rec.getAttribute('index')
3313 gen = rec.getAttribute('generation')
3315 if not lov_uuid or not ost_uuid or not index or not gen:
3316 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
3319 tmplov = db.lookup(lov_uuid)
3321 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
3322 lov_name = tmplov.getName()
3324 lov_name = lov.osc.name
3326 # ------------------------------------------------------------- add
3327 if rec.nodeName == 'add':
3329 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3332 osc = magic_get_osc(db, rec, lov)
3335 # Only ignore connect failures with --force, which
3336 # isn't implemented here yet.
3337 osc.prepare(ignore_connect_failure=0)
3338 except CommandError, e:
3339 print "Error preparing OSC %s\n" % osc.uuid
3342 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3344 # ------------------------------------------------------ deactivate
3345 elif rec.nodeName == 'deactivate':
3349 osc = magic_get_osc(db, rec, lov)
3353 except CommandError, e:
3354 print "Error deactivating OSC %s\n" % osc.uuid
3357 # ---------------------------------------------------------- delete
3358 elif rec.nodeName == 'delete':
3362 osc = magic_get_osc(db, rec, lov)
3368 except CommandError, e:
3369 print "Error cleaning up OSC %s\n" % osc.uuid
3372 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3374 def process_updates(db, log_device, log_name, lov = None):
3375 updates = db.root_node.getElementsByTagName('update')
3377 if not u.childNodes:
3378 log("ignoring empty update record (version " +
3379 str(u.getAttribute('version')) + ")")
3382 version = u.getAttribute('version')
3383 real_name = "%s-%s" % (log_name, version)
3384 lctl.clear_log(log_device, real_name)
3385 lctl.record(log_device, real_name)
3387 process_update_record(db, u, lov)
3391 def doWriteconf(services):
3395 if s[1].get_class() == 'mdsdev' or s[1].get_class() == 'osd':
3396 n = newService(s[1])
3400 def doSetup(services):
3405 n = newService(s[1])
3407 slist.append((n.level, n))
3410 nl = n[1].correct_level(n[0])
3411 nlist.append((nl, n[1]))
3416 def doLoadModules(services):
3420 # adding all needed modules from all services
3422 n = newService(s[1])
3423 n.add_module(mod_manager)
3425 # loading all registered modules
3426 mod_manager.load_modules()
3428 def doUnloadModules(services):
3432 # adding all needed modules from all services
3434 n = newService(s[1])
3435 if n.safe_to_clean_modules():
3436 n.add_module(mod_manager)
3438 # unloading all registered modules
3439 mod_manager.cleanup_modules()
3441 def doCleanup(services):
3447 n = newService(s[1])
3449 slist.append((n.level, n))
3452 nl = n[1].correct_level(n[0])
3453 nlist.append((nl, n[1]))
3458 if n[1].safe_to_clean():
3463 def doHost(lustreDB, hosts):
3464 global is_router, local_node_name
3467 node_db = lustreDB.lookup_name(h, 'node')
3471 panic('No host entry found.')
3473 local_node_name = node_db.get_val('name', 0)
3474 is_router = node_db.get_val_int('router', 0)
3475 lustre_upcall = node_db.get_val('lustreUpcall', '')
3476 portals_upcall = node_db.get_val('portalsUpcall', '')
3477 timeout = node_db.get_val_int('timeout', 0)
3478 ptldebug = node_db.get_val('ptldebug', '')
3479 subsystem = node_db.get_val('subsystem', '')
3481 find_local_clusters(node_db)
3483 find_local_routes(lustreDB)
3485 # Two step process: (1) load modules, (2) setup lustre
3486 # if not cleaning, load modules first.
3487 prof_list = node_db.get_refs('profile')
3489 if config.write_conf:
3490 for_each_profile(node_db, prof_list, doLoadModules)
3492 for_each_profile(node_db, prof_list, doWriteconf)
3493 for_each_profile(node_db, prof_list, doUnloadModules)
3496 elif config.recover:
3497 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3498 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3499 "--client_uuid <UUID> --conn_uuid <UUID>")
3500 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3502 elif config.cleanup:
3504 # the command line can override this value
3506 # ugly hack, only need to run lctl commands for --dump
3507 if config.lctl_dump or config.record:
3508 for_each_profile(node_db, prof_list, doCleanup)
3511 sys_set_timeout(timeout)
3512 sys_set_ptldebug(ptldebug)
3513 sys_set_subsystem(subsystem)
3514 sys_set_lustre_upcall(lustre_upcall)
3515 sys_set_portals_upcall(portals_upcall)
3517 for_each_profile(node_db, prof_list, doCleanup)
3518 for_each_profile(node_db, prof_list, doUnloadModules)
3522 # ugly hack, only need to run lctl commands for --dump
3523 if config.lctl_dump or config.record:
3524 sys_set_timeout(timeout)
3525 sys_set_lustre_upcall(lustre_upcall)
3526 for_each_profile(node_db, prof_list, doSetup)
3530 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3531 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3533 for_each_profile(node_db, prof_list, doLoadModules)
3535 sys_set_debug_path()
3536 sys_set_ptldebug(ptldebug)
3537 sys_set_subsystem(subsystem)
3538 script = config.gdb_script
3539 run(lctl.lctl, ' modules >', script)
3541 log ("The GDB module script is in", script)
3542 # pause, so user has time to break and
3545 sys_set_timeout(timeout)
3546 sys_set_lustre_upcall(lustre_upcall)
3547 sys_set_portals_upcall(portals_upcall)
3549 for_each_profile(node_db, prof_list, doSetup)
3552 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3553 tgt = lustreDB.lookup(tgt_uuid)
3555 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3556 new_uuid = get_active_target(tgt)
3558 raise Lustre.LconfError("doRecovery: no active target found for: " +
3560 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3562 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3564 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3566 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3569 lctl.disconnect(oldnet)
3570 except CommandError, e:
3571 log("recover: disconnect", nid_uuid, "failed: ")
3576 except CommandError, e:
3577 log("recover: connect failed")
3580 lctl.recover(client_uuid, net.nid_uuid)
3583 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3584 base = os.path.dirname(cmd)
3585 if development_mode():
3586 if not config.lustre:
3587 debug('using objdir module paths')
3588 config.lustre = (os.path.join(base, ".."))
3589 # normalize the portals dir, using command line arg if set
3591 portals_dir = config.portals
3592 dir = os.path.join(config.lustre, portals_dir)
3593 config.portals = dir
3594 debug('config.portals', config.portals)
3595 elif config.lustre and config.portals:
3597 # if --lustre and --portals, normalize portals
3598 # can ignore POTRALS_DIR here, since it is probly useless here
3599 config.portals = os.path.join(config.lustre, config.portals)
3600 debug('config.portals B', config.portals)
3602 def sysctl(path, val):
3603 debug("+ sysctl", path, val)
3607 fp = open(os.path.join('/proc/sys', path), 'w')
3613 def sys_set_debug_path():
3614 sysctl('portals/debug_path', config.debug_path)
3616 def sys_set_lustre_upcall(upcall):
3617 # the command overrides the value in the node config
3618 if config.lustre_upcall:
3619 upcall = config.lustre_upcall
3621 upcall = config.upcall
3623 lctl.set_lustre_upcall(upcall)
3625 def sys_set_portals_upcall(upcall):
3626 # the command overrides the value in the node config
3627 if config.portals_upcall:
3628 upcall = config.portals_upcall
3630 upcall = config.upcall
3632 sysctl('portals/upcall', upcall)
3634 def sys_set_timeout(timeout):
3635 # the command overrides the value in the node config
3636 if config.timeout and config.timeout > 0:
3637 timeout = config.timeout
3638 if timeout != None and timeout > 0:
3639 lctl.set_timeout(timeout)
3641 def sys_tweak_socknal ():
3642 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3643 if sys_get_branch() == '2.6':
3644 fp = open('/proc/meminfo')
3645 lines = fp.readlines()
3650 if a[0] == 'MemTotal:':
3652 debug("memtotal" + memtotal)
3653 if int(memtotal) < 262144:
3654 minfree = int(memtotal) / 16
3657 debug("+ minfree ", minfree)
3658 sysctl("vm/min_free_kbytes", minfree)
3659 if config.single_socket:
3660 sysctl("socknal/typed", 0)
3662 def sys_optimize_elan ():
3663 procfiles = ["/proc/elan/config/eventint_punt_loops",
3664 "/proc/qsnet/elan3/config/eventint_punt_loops",
3665 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3667 if os.access(p, os.W_OK):
3668 run ("echo 1 > " + p)
3670 def sys_set_ptldebug(ptldebug):
3672 ptldebug = config.ptldebug
3675 val = eval(ptldebug, ptldebug_names)
3676 val = "0x%x" % (val & 0xffffffffL)
3677 sysctl('portals/debug', val)
3678 except NameError, e:
3681 def sys_set_subsystem(subsystem):
3682 if config.subsystem:
3683 subsystem = config.subsystem
3686 val = eval(subsystem, subsystem_names)
3687 val = "0x%x" % (val & 0xffffffffL)
3688 sysctl('portals/subsystem_debug', val)
3689 except NameError, e:
3692 def sys_set_netmem_max(path, max):
3693 debug("setting", path, "to at least", max)
3701 fp = open(path, 'w')
3702 fp.write('%d\n' %(max))
3705 def sys_make_devices():
3706 if not os.access('/dev/portals', os.R_OK):
3707 run('mknod /dev/portals c 10 240')
3708 if not os.access('/dev/obd', os.R_OK):
3709 run('mknod /dev/obd c 10 241')
3711 # Add dir to the global PATH, if not already there.
3712 def add_to_path(new_dir):
3713 syspath = string.split(os.environ['PATH'], ':')
3714 if new_dir in syspath:
3716 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3718 def default_debug_path():
3719 path = '/tmp/lustre-log'
3720 if os.path.isdir('/r'):
3725 def default_gdb_script():
3726 script = '/tmp/ogdb'
3727 if os.path.isdir('/r'):
3728 return '/r' + script
3732 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3733 # ensure basic elements are in the system path
3734 def sanitise_path():
3735 for dir in DEFAULT_PATH:
3738 # global hack for the --select handling
3740 def init_select(args):
3741 # args = [service=nodeA,service2=nodeB service3=nodeC]
3744 list = string.split(arg, ',')
3746 srv, node = string.split(entry, '=')
3747 tgt_select[srv] = node
3749 def get_select(srv):
3750 if tgt_select.has_key(srv):
3751 return tgt_select[srv]
3755 FLAG = Lustre.Options.FLAG
3756 PARAM = Lustre.Options.PARAM
3757 INTPARAM = Lustre.Options.INTPARAM
3758 PARAMLIST = Lustre.Options.PARAMLIST
3760 ('verbose,v', "Print system commands as they are run"),
3761 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3762 ('config', "Cluster config name used for LDAP query", PARAM),
3763 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3764 ('node', "Load config for <nodename>", PARAM),
3765 ('sec',"security flavor <null|krb5i|krb5p> between this client with mds", PARAM),
3766 ('mds_sec',"security flavor <null|krb5i|krb5p> between this client with mds", PARAM),
3767 ('oss_sec',"security flavor <null|krb5i|krb5p> between this client with ost", PARAM),
3768 ('mds_mds_sec',"security flavor <null|krb5i|krb5p> between this mds with other mds", PARAM),
3769 ('mds_oss_sec',"security flavor <null|krb5i|krb5p> between this mds with ost", PARAM),
3770 ('mds_deny_sec', "security flavor <null|krb5i|krb5p> denied by this mds", PARAM),
3771 ('ost_deny_sec', "security flavor <null|krb5i|krb5p> denied by this ost", PARAM),
3772 ('cleanup,d', "Cleans up config. (Shutdown)"),
3773 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3775 ('single_socket', "socknal option: only use one socket instead of bundle",
3777 ('failover',"""Used to shut down without saving state.
3778 This will allow this node to "give up" a service to a
3779 another node for failover purposes. This will not
3780 be a clean shutdown.""",
3782 ('gdb', """Prints message after creating gdb module script
3783 and sleeps for 5 seconds."""),
3784 ('noexec,n', """Prints the commands and steps that will be run for a
3785 config without executing them. This can used to check if a
3786 config file is doing what it should be doing"""),
3787 ('nomod', "Skip load/unload module step."),
3788 ('nosetup', "Skip device setup/cleanup step."),
3789 ('reformat', "Reformat all devices (without question)"),
3790 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3791 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3792 ('clientoptions', "Additional options for Lustre", PARAM),
3793 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3795 ('write_conf', "Save all the client config information on mds."),
3796 ('record', "Write config information on mds."),
3797 ('record_log', "Name of config record log.", PARAM),
3798 ('record_device', "MDS device name that will record the config commands",
3800 ('root_squash', "MDS squash root to appointed uid",
3802 ('no_root_squash', "Don't squash root for appointed nid",
3804 ('minlevel', "Minimum level of services to configure/cleanup",
3806 ('maxlevel', """Maximum level of services to configure/cleanup
3807 Levels are aproximatly like:
3812 70 - mountpoint, echo_client, osc, mdc, lov""",
3814 ('lustre', """Base directory of lustre sources. This parameter will
3815 cause lconf to load modules from a source tree.""", PARAM),
3816 ('portals', """Portals source directory. If this is a relative path,
3817 then it is assumed to be relative to lustre. """, PARAM),
3818 ('timeout', "Set recovery timeout", INTPARAM),
3819 ('upcall', "Set both portals and lustre upcall script", PARAM),
3820 ('lustre_upcall', "Set lustre upcall script", PARAM),
3821 ('portals_upcall', "Set portals upcall script", PARAM),
3822 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3823 ('ptldebug', "Set the portals debug level", PARAM),
3824 ('subsystem', "Set the portals debug subsystem", PARAM),
3825 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3826 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3827 # Client recovery options
3828 ('recover', "Recover a device"),
3829 ('group', "The group of devices to configure or cleanup", PARAM),
3830 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3831 ('client_uuid', "The failed client (required for recovery)", PARAM),
3832 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3834 ('inactive', """The name of an inactive service, to be ignored during
3835 mounting (currently OST-only). Can be repeated.""",
3840 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3842 # in the upcall this is set to SIG_IGN
3843 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3845 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3847 config, args = cl.parse(sys.argv[1:])
3848 except Lustre.OptionError, e:
3852 setupModulePath(sys.argv[0])
3854 host = socket.gethostname()
3856 # the PRNG is normally seeded with time(), which is not so good for starting
3857 # time-synchronized clusters
3858 input = open('/dev/urandom', 'r')
3860 print 'Unable to open /dev/urandom!'
3862 seed = input.read(32)
3868 init_select(config.select)
3871 # allow config to be fetched via HTTP, but only with python2
3872 if sys.version[0] != '1' and args[0].startswith('http://'):
3875 config_file = urllib2.urlopen(args[0])
3876 except (urllib2.URLError, socket.error), err:
3877 if hasattr(err, 'args'):
3879 print "Could not access '%s': %s" %(args[0], err)
3881 elif not os.access(args[0], os.R_OK):
3882 print 'File not found or readable:', args[0]
3886 config_file = open(args[0], 'r')
3888 dom = xml.dom.minidom.parse(config_file)
3890 panic("%s does not appear to be a config file." % (args[0]))
3891 sys.exit(1) # make sure to die here, even in debug mode.
3893 CONFIG_FILE = args[0]
3894 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3895 if not config.config:
3896 config.config = os.path.basename(args[0])# use full path?
3897 if config.config[-4:] == '.xml':
3898 config.config = config.config[:-4]
3899 elif config.ldapurl:
3900 if not config.config:
3901 panic("--ldapurl requires --config name")
3902 dn = "config=%s,fs=lustre" % (config.config)
3903 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3904 elif config.ptldebug or config.subsystem:
3905 sys_set_ptldebug(None)
3906 sys_set_subsystem(None)
3909 print 'Missing config file or ldap URL.'
3910 print 'see lconf --help for command summary'
3913 toplustreDB = lustreDB
3915 ver = lustreDB.get_version()
3917 panic("No version found in config data, please recreate.")
3918 if ver != Lustre.CONFIG_VERSION:
3919 panic("Config version", ver, "does not match lconf version",
3920 Lustre.CONFIG_VERSION)
3924 node_list.append(config.node)
3927 node_list.append(host)
3928 node_list.append('localhost')
3930 debug("configuring for host: ", node_list)
3933 config.debug_path = config.debug_path + '-' + host
3934 config.gdb_script = config.gdb_script + '-' + host
3936 lctl = LCTLInterface('lctl')
3938 if config.lctl_dump:
3939 lctl.use_save_file(config.lctl_dump)
3942 if not (config.record_device and config.record_log):
3943 panic("When recording, both --record_log and --record_device must be specified.")
3944 lctl.clear_log(config.record_device, config.record_log)
3945 lctl.record(config.record_device, config.record_log)
3947 # init module manager
3948 mod_manager = kmod_manager(config.lustre, config.portals)
3950 doHost(lustreDB, node_list)
3952 if not config.record:
3957 process_updates(lustreDB, config.record_device, config.record_log)
3959 if __name__ == "__main__":
3962 except Lustre.LconfError, e:
3964 # traceback.print_exc(file=sys.stdout)
3966 except CommandError, e:
3970 if first_cleanup_error:
3971 sys.exit(first_cleanup_error)