3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = '../portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
93 "console" : (1 << 25),
99 "undefined" : (1 << 0),
109 "portals" : (1 << 10),
111 "pinger" : (1 << 12),
112 "filter" : (1 << 13),
117 "ptlrouter" : (1 << 18),
121 "confobd" : (1 << 22),
128 first_cleanup_error = 0
129 def cleanup_error(rc):
130 global first_cleanup_error
131 if not first_cleanup_error:
132 first_cleanup_error = rc
134 # ============================================================
135 # debugging and error funcs
137 def fixme(msg = "this feature"):
138 raise Lustre.LconfError, msg + ' not implemented yet.'
141 msg = string.join(map(str,args))
142 if not config.noexec:
143 raise Lustre.LconfError(msg)
148 msg = string.join(map(str,args))
153 print string.strip(s)
157 msg = string.join(map(str,args))
160 # ack, python's builtin int() does not support '0x123' syntax.
161 # eval can do it, although what a hack!
165 return eval(s, {}, {})
168 except SyntaxError, e:
169 raise ValueError("not a number")
171 raise ValueError("not a number")
173 # ============================================================
174 # locally defined exceptions
175 class CommandError (exceptions.Exception):
176 def __init__(self, cmd_name, cmd_err, rc=None):
177 self.cmd_name = cmd_name
178 self.cmd_err = cmd_err
183 if type(self.cmd_err) == types.StringType:
185 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
187 print "! %s: %s" % (self.cmd_name, self.cmd_err)
188 elif type(self.cmd_err) == types.ListType:
190 print "! %s (error %d):" % (self.cmd_name, self.rc)
192 print "! %s:" % (self.cmd_name)
193 for s in self.cmd_err:
194 print "> %s" %(string.strip(s))
199 # ============================================================
200 # handle daemons, like the acceptor
202 """ Manage starting and stopping a daemon. Assumes daemon manages
203 it's own pid file. """
205 def __init__(self, cmd):
211 log(self.command, "already running.")
213 self.path = find_prog(self.command)
215 panic(self.command, "not found.")
216 ret, out = runcmd(self.path +' '+ self.command_line())
218 raise CommandError(self.path, out, ret)
222 pid = self.read_pidfile()
225 log ("killing process", pid)
228 log("was unable to find pid of " + self.command)
229 #time.sleep(1) # let daemon die
231 log("unable to kill", self.command, e)
233 log("unable to kill", self.command)
236 pid = self.read_pidfile()
242 log("was unable to find pid of " + self.command)
249 def read_pidfile(self):
251 fp = open(self.pidfile(), 'r')
261 def clean_pidfile(self):
262 """ Remove a stale pidfile """
263 log("removing stale pidfile:", self.pidfile())
265 os.unlink(self.pidfile())
267 log(self.pidfile(), e)
269 class AcceptorHandler(DaemonHandler):
270 def __init__(self, port, net_type):
271 DaemonHandler.__init__(self, "acceptor")
276 return "/var/run/%s-%d.pid" % (self.command, self.port)
278 def command_line(self):
279 return string.join(map(str,(self.flags, self.port)))
283 # start the acceptors
285 if config.lctl_dump or config.record:
287 for port in acceptors.keys():
288 daemon = acceptors[port]
289 if not daemon.running():
292 def run_one_acceptor(port):
293 if config.lctl_dump or config.record:
295 if acceptors.has_key(port):
296 daemon = acceptors[port]
297 if not daemon.running():
300 panic("run_one_acceptor: No acceptor defined for port:", port)
302 def stop_acceptor(port):
303 if acceptors.has_key(port):
304 daemon = acceptors[port]
309 # ============================================================
310 # handle lctl interface
313 Manage communication with lctl
316 def __init__(self, cmd):
318 Initialize close by finding the lctl binary.
320 self.lctl = find_prog(cmd)
322 self.record_device = ''
325 debug('! lctl not found')
328 raise CommandError('lctl', "unable to find lctl binary.")
330 def use_save_file(self, file):
331 self.save_file = file
333 def record(self, dev_name, logname):
334 log("Recording log", logname, "on", dev_name)
335 self.record_device = dev_name
336 self.record_log = logname
338 def end_record(self):
339 log("End recording log", self.record_log, "on", self.record_device)
340 self.record_device = None
341 self.record_log = None
343 def set_nonblock(self, fd):
344 fl = fcntl.fcntl(fd, F_GETFL)
345 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
350 the cmds are written to stdin of lctl
351 lctl doesn't return errors when run in script mode, so
353 should modify command line to accept multiple commands, or
354 create complex command line options
358 cmds = '\n dump ' + self.save_file + '\n' + cmds
359 elif self.record_device:
363 %s""" % (self.record_device, self.record_log, cmds)
365 debug("+", cmd_line, cmds)
366 if config.noexec: return (0, [])
368 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
369 child.tochild.write(cmds + "\n")
370 child.tochild.close()
371 # print "LCTL:", cmds
373 # From "Python Cookbook" from O'Reilly
374 outfile = child.fromchild
375 outfd = outfile.fileno()
376 self.set_nonblock(outfd)
377 errfile = child.childerr
378 errfd = errfile.fileno()
379 self.set_nonblock(errfd)
381 outdata = errdata = ''
384 ready = select.select([outfd,errfd],[],[]) # Wait for input
385 if outfd in ready[0]:
386 outchunk = outfile.read()
387 if outchunk == '': outeof = 1
388 outdata = outdata + outchunk
389 if errfd in ready[0]:
390 errchunk = errfile.read()
391 if errchunk == '': erreof = 1
392 errdata = errdata + errchunk
393 if outeof and erreof: break
394 # end of "borrowed" code
397 if os.WIFEXITED(ret):
398 rc = os.WEXITSTATUS(ret)
401 if rc or len(errdata):
402 raise CommandError(self.lctl, errdata, rc)
405 def runcmd(self, *args):
407 run lctl using the command line
409 cmd = string.join(map(str,args))
410 debug("+", self.lctl, cmd)
411 rc, out = run(self.lctl, cmd)
413 raise CommandError(self.lctl, out, rc)
416 def clear_log(self, dev, log):
417 """ clear an existing log """
422 quit """ % (dev, log)
425 def root_squash(self, name, uid, nid):
429 quit""" % (name, uid, nid)
432 def network(self, net, nid):
437 quit """ % (net, nid)
441 def add_interface(self, net, ip, netmask = ""):
442 """ add an interface """
446 quit """ % (net, ip, netmask)
449 # delete an interface
450 def del_interface(self, net, ip):
451 """ delete an interface """
458 # create a new connection
459 def add_uuid(self, net_type, uuid, nid):
460 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
463 def add_peer(self, net_type, nid, hostaddr, port):
464 if net_type in ('tcp','openib','ra') and not config.lctl_dump:
469 nid, hostaddr, port )
471 elif net_type in ('iib',) and not config.lctl_dump:
478 elif net_type in ('vib',) and not config.lctl_dump:
486 def connect(self, srv):
487 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
488 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
490 hostaddr = string.split(srv.hostaddr[0], '/')[0]
491 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
494 def recover(self, dev_name, new_conn):
497 recover %s""" %(dev_name, new_conn)
500 # add a route to a range
501 def add_route(self, net, gw, lo, hi):
509 except CommandError, e:
513 def del_route(self, net, gw, lo, hi):
518 quit """ % (net, gw, lo, hi)
521 # add a route to a host
522 def add_route_host(self, net, uuid, gw, tgt):
523 self.add_uuid(net, uuid, tgt)
531 except CommandError, e:
535 # add a route to a range
536 def del_route_host(self, net, uuid, gw, tgt):
542 quit """ % (net, gw, tgt)
546 def del_peer(self, net_type, nid, hostaddr):
547 if net_type in ('tcp',) and not config.lctl_dump:
551 del_peer %s %s single_share
555 elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump:
559 del_peer %s single_share
564 # disconnect one connection
565 def disconnect(self, srv):
566 self.del_uuid(srv.nid_uuid)
567 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
569 hostaddr = string.split(srv.hostaddr[0], '/')[0]
570 self.del_peer(srv.net_type, srv.nid, hostaddr)
572 def del_uuid(self, uuid):
580 def disconnectAll(self, net):
588 def attach(self, type, name, uuid):
591 quit""" % (type, name, uuid)
594 def detach(self, name):
601 def set_security(self, name, key, value):
605 quit""" % (name, key, value)
608 def setup(self, name, setup = ""):
612 quit""" % (name, setup)
615 def add_conn(self, name, conn_uuid):
619 quit""" % (name, conn_uuid)
622 def start(self, name, conf_name):
626 quit""" % (name, conf_name)
629 # create a new device with lctl
630 def newdev(self, type, name, uuid, setup = ""):
632 self.attach(type, name, uuid);
634 self.setup(name, setup)
635 except CommandError, e:
636 self.cleanup(name, uuid, 0)
640 def cleanup(self, name, uuid, force, failover = 0):
641 if failover: force = 1
647 quit""" % (name, ('', 'force')[force],
648 ('', 'failover')[failover])
652 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
653 stripe_sz, stripe_off, pattern, devlist = None):
656 lov_setup %s %d %d %d %s %s
657 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
661 # add an OBD to a LOV
662 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
664 lov_modify_tgts add %s %s %s %s
665 quit""" % (name, obd_uuid, index, gen)
669 def lmv_setup(self, name, uuid, desc_uuid, devlist):
673 quit""" % (name, uuid, desc_uuid, devlist)
676 # delete an OBD from a LOV
677 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
679 lov_modify_tgts del %s %s %s %s
680 quit""" % (name, obd_uuid, index, gen)
684 def deactivate(self, name):
692 def dump(self, dump_file):
695 quit""" % (dump_file)
698 # get list of devices
699 def device_list(self):
700 devices = '/proc/fs/lustre/devices'
702 if os.access(devices, os.R_OK):
704 fp = open(devices, 'r')
712 def lustre_version(self):
713 rc, out = self.runcmd('version')
717 def mount_option(self, profile, osc, mdc, gkc):
719 mount_option %s %s %s %s
720 quit""" % (profile, osc, mdc, gkc)
723 # delete mount options
724 def del_mount_option(self, profile):
730 def set_timeout(self, timeout):
736 def set_lustre_upcall(self, upcall):
741 # ============================================================
742 # Various system-level functions
743 # (ideally moved to their own module)
745 # Run a command and return the output and status.
746 # stderr is sent to /dev/null, could use popen3 to
747 # save it if necessary
750 if config.noexec: return (0, [])
751 f = os.popen(cmd + ' 2>&1')
761 cmd = string.join(map(str,args))
764 # Run a command in the background.
765 def run_daemon(*args):
766 cmd = string.join(map(str,args))
768 if config.noexec: return 0
769 f = os.popen(cmd + ' 2>&1')
777 # Determine full path to use for an external command
778 # searches dirname(argv[0]) first, then PATH
780 syspath = string.split(os.environ['PATH'], ':')
781 cmdpath = os.path.dirname(sys.argv[0])
782 syspath.insert(0, cmdpath);
784 syspath.insert(0, os.path.join(config.portals, 'utils/'))
786 prog = os.path.join(d,cmd)
787 if os.access(prog, os.X_OK):
791 # Recursively look for file starting at base dir
792 def do_find_file(base, mod):
793 fullname = os.path.join(base, mod)
794 if os.access(fullname, os.R_OK):
796 for d in os.listdir(base):
797 dir = os.path.join(base,d)
798 if os.path.isdir(dir):
799 module = do_find_file(dir, mod)
803 # is the path a block device?
810 return stat.S_ISBLK(s[stat.ST_MODE])
812 # find the journal device from mkfs options
818 while i < len(x) - 1:
819 if x[i] == '-J' and x[i+1].startswith('device='):
825 # build fs according to type
827 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
833 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
835 # devsize is in 1k, and fs block count is in 4k
836 block_cnt = devsize/4
838 if fstype in ('ext3', 'extN', 'ldiskfs'):
839 # ext3 journal size is in megabytes
840 # but don't set jsize if mkfsoptions indicates a separate journal device
841 if jsize == 0 and jdev(mkfsoptions) == '':
843 if not is_block(dev):
844 ret, out = runcmd("ls -l %s" %dev)
845 devsize = int(string.split(out[0])[4]) / 1024
847 # sfdisk works for symlink, hardlink, and realdev
848 ret, out = runcmd("sfdisk -s %s" %dev)
850 devsize = int(out[0])
852 # sfdisk -s will fail for too large block device,
853 # then, read the size of partition from /proc/partitions
855 # get the realpath of the device
856 # it may be the real device, such as /dev/hda7
857 # or the hardlink created via mknod for a device
858 if 'realpath' in dir(os.path):
859 real_dev = os.path.realpath(dev)
863 while os.path.islink(real_dev) and (link_count < 20):
864 link_count = link_count + 1
865 dev_link = os.readlink(real_dev)
866 if os.path.isabs(dev_link):
869 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
871 panic("Entountered too many symbolic links resolving block device:", dev)
873 # get the major and minor number of the realpath via ls
874 # it seems python(os.stat) does not return
875 # the st_rdev member of the stat structure
876 ret, out = runcmd("ls -l %s" %real_dev)
877 major = string.split(string.split(out[0])[4], ",")[0]
878 minor = string.split(out[0])[5]
880 # get the devsize from /proc/partitions with the major and minor number
881 ret, out = runcmd("cat /proc/partitions")
884 if string.split(line)[0] == major and string.split(line)[1] == minor:
885 devsize = int(string.split(line)[2])
888 if devsize > 1024 * 1024:
889 jsize = ((devsize / 102400) * 4)
892 if jsize: jopt = "-J size=%d" %(jsize,)
893 if isize: iopt = "-I %d" %(isize,)
894 mkfs = 'mkfs.ext2 -j -b 4096 '
895 if not isblock or config.force:
897 if jdev(mkfsoptions) != '':
898 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
900 jmkfs = jmkfs + '-F '
901 jmkfs = jmkfs + jdev(mkfsoptions)
902 (ret, out) = run (jmkfs)
904 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
905 elif fstype == 'reiserfs':
906 # reiserfs journal size is in blocks
907 if jsize: jopt = "--journal_size %d" %(jsize,)
908 mkfs = 'mkreiserfs -ff'
910 panic('unsupported fs type: ', fstype)
912 if config.mkfsoptions != None:
913 mkfs = mkfs + ' ' + config.mkfsoptions
914 if mkfsoptions != None:
915 mkfs = mkfs + ' ' + mkfsoptions
916 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
918 panic("Unable to build fs:", dev, string.join(out))
919 # enable hash tree indexing on fsswe
920 if fstype in ('ext3', 'extN', 'ldiskfs'):
921 htree = 'echo "feature FEATURE_C5" | debugfs -w'
922 (ret, out) = run (htree, dev)
924 panic("Unable to enable htree:", dev)
926 # some systems use /dev/loopN, some /dev/loop/N
930 if not os.access(loop + str(0), os.R_OK):
932 if not os.access(loop + str(0), os.R_OK):
933 panic ("can't access loop devices")
936 # find loop device assigned to the file
937 def find_assigned_loop(file):
939 for n in xrange(0, MAX_LOOP_DEVICES):
941 if os.access(dev, os.R_OK):
942 (stat, out) = run('losetup', dev)
943 if out and stat == 0:
944 m = re.search(r'\((.*)\)', out[0])
945 if m and file == m.group(1):
949 # find free loop device
950 def find_free_loop(file):
953 # find next free loop
954 for n in xrange(0, MAX_LOOP_DEVICES):
956 if os.access(dev, os.R_OK):
957 (stat, out) = run('losetup', dev)
962 # create file if necessary and assign the first free loop device
963 def init_loop(file, size, fstype, journal_size, inode_size,
964 mkfsoptions, reformat, autoformat, backfstype, backfile):
967 realfstype = backfstype
968 if is_block(backfile):
969 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
970 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
976 dev = find_assigned_loop(realfile)
978 print 'WARNING: file', realfile, 'already mapped to', dev
981 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
982 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
984 panic("Unable to create backing store:", realfile)
985 mkfs(realfile, size, realfstype, journal_size, inode_size,
986 mkfsoptions, isblock=0)
988 dev = find_free_loop(realfile)
990 print "attach " + realfile + " <-> " + dev
991 run('losetup', dev, realfile)
994 print "out of loop devices"
997 # undo loop assignment
998 def clean_loop(dev, fstype, backfstype, backdev):
1003 if not is_block(realfile):
1004 dev = find_assigned_loop(realfile)
1006 print "detach " + dev + " <-> " + realfile
1007 ret, out = run('losetup -d', dev)
1009 log('unable to clean loop device', dev, 'for file', realfile)
1012 # finilizes passed device
1013 def clean_dev(dev, fstype, backfstype, backdev):
1014 if fstype == 'smfs' or not is_block(dev):
1015 clean_loop(dev, fstype, backfstype, backdev)
1017 # determine if dev is formatted as a <fstype> filesystem
1018 def need_format(fstype, dev):
1019 # FIXME don't know how to implement this
1022 # initialize a block device if needed
1023 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
1024 inode_size, mkfsoptions, backfstype, backdev):
1028 if fstype == 'smfs' or not is_block(dev):
1029 dev = init_loop(dev, size, fstype, journal_size, inode_size,
1030 mkfsoptions, reformat, autoformat, backfstype, backdev)
1031 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
1032 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
1035 # panic("device:", dev,
1036 # "not prepared, and autoformat is not set.\n",
1037 # "Rerun with --reformat option to format ALL filesystems")
1042 """lookup IP address for an interface"""
1043 rc, out = run("/sbin/ifconfig", iface)
1046 addr = string.split(out[1])[1]
1047 ip = string.split(addr, ':')[1]
1050 def def_mount_options(fstype, target):
1051 """returns deafult mount options for passed fstype and target (mds, ost)"""
1052 if fstype == 'ext3' or fstype == 'ldiskfs':
1053 mountfsoptions = "errors=remount-ro"
1054 if target == 'ost' and sys_get_branch() == '2.4':
1055 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1056 if target == 'ost' and sys_get_branch() == '2.6':
1057 mountfsoptions = "%s,extents,mballoc" % (mountfsoptions)
1058 return mountfsoptions
1061 def sys_get_elan_position_file():
1062 procfiles = ["/proc/elan/device0/position",
1063 "/proc/qsnet/elan4/device0/position",
1064 "/proc/qsnet/elan3/device0/position"]
1066 if os.access(p, os.R_OK):
1070 def sys_get_local_nid(net_type, wildcard, cluster_id):
1071 """Return the local nid."""
1073 if sys_get_elan_position_file():
1074 local = sys_get_local_address('elan', '*', cluster_id)
1076 local = sys_get_local_address(net_type, wildcard, cluster_id)
1079 def sys_get_local_address(net_type, wildcard, cluster_id):
1080 """Return the local address for the network type."""
1082 if net_type in ('tcp','openib','iib','vib','ra'):
1084 iface, star = string.split(wildcard, ':')
1085 local = if2addr(iface)
1087 panic ("unable to determine ip for:", wildcard)
1089 host = socket.gethostname()
1090 local = socket.gethostbyname(host)
1091 elif net_type == 'elan':
1092 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1093 f = sys_get_elan_position_file()
1095 panic ("unable to determine local Elan ID")
1098 lines = fp.readlines()
1102 if a[0] == 'NodeId':
1106 nid = my_int(cluster_id) + my_int(elan_id)
1107 local = "%d" % (nid)
1108 except ValueError, e:
1112 elif net_type == 'lo':
1113 fixme("automatic local address for loopback")
1114 elif net_type == 'gm':
1115 fixme("automatic local address for GM")
1119 def sys_get_branch():
1120 """Returns kernel release"""
1122 fp = open('/proc/sys/kernel/osrelease')
1123 lines = fp.readlines()
1127 version = string.split(l)
1128 a = string.split(version[0], '.')
1129 return a[0] + '.' + a[1]
1134 # XXX: instead of device_list, ask for $name and see what we get
1135 def is_prepared(name):
1136 """Return true if a device exists for the name"""
1137 if config.lctl_dump:
1139 if (config.noexec or config.record) and config.cleanup:
1142 # expect this format:
1143 # 1 UP ldlm ldlm ldlm_UUID 2
1144 out = lctl.device_list()
1146 if name == string.split(s)[3]:
1148 except CommandError, e:
1152 def net_is_prepared():
1153 """If the any device exists, then assume that all networking
1154 has been configured"""
1155 out = lctl.device_list()
1158 def fs_is_mounted(path):
1159 """Return true if path is a mounted lustre filesystem"""
1161 fp = open('/proc/mounts')
1162 lines = fp.readlines()
1166 if a[1] == path and a[2] == 'lustre_lite':
1172 def kmod_find(src_dir, dev_dir, modname):
1173 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1174 for modext in '.ko', '.o':
1175 module = modbase + modext
1177 if os.access(module, os.R_OK):
1183 def kmod_info(modname):
1184 """Returns reference count for passed module name."""
1186 fp = open('/proc/modules')
1187 lines = fp.readlines()
1190 # please forgive my tired fingers for this one
1191 ret = filter(lambda word, mod = modname: word[0] == mod,
1192 map(lambda line: string.split(line), lines))
1196 except Exception, e:
1200 """Presents kernel module"""
1201 def __init__(self, src_dir, dev_dir, name):
1202 self.src_dir = src_dir
1203 self.dev_dir = dev_dir
1206 # FIXME we ignore the failure of loading gss module, because we might
1207 # don't need it at all.
1210 log ('loading module:', self.name, 'srcdir',
1211 self.src_dir, 'devdir', self.dev_dir)
1213 module = kmod_find(self.src_dir, self.dev_dir,
1215 if not module and self.name != 'ptlrpcs_gss':
1216 panic('module not found:', self.name)
1217 (rc, out) = run('/sbin/insmod', module)
1219 if self.name == 'ptlrpcs_gss':
1220 print "Warning: not support gss security!"
1222 raise CommandError('insmod', out, rc)
1224 (rc, out) = run('/sbin/modprobe', self.name)
1226 if self.name == 'ptlrpcs_gss':
1227 print "Warning: not support gss security!"
1229 raise CommandError('modprobe', out, rc)
1233 log('unloading module:', self.name)
1234 (rc, out) = run('/sbin/rmmod', self.name)
1236 log('unable to unload module:', self.name +
1237 "(" + self.refcount() + ")")
1241 """Returns module info if any."""
1242 return kmod_info(self.name)
1245 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1252 """Returns module refcount."""
1259 """Returns 1 if module is used, otherwise 0 is returned."""
1265 if users and users != '(unused)' and users != '-':
1273 """Returns 1 if module is busy, otherwise 0 is returned."""
1274 if self.loaded() and (self.used() or self.refcount() != '0'):
1280 """Manage kernel modules"""
1281 def __init__(self, lustre_dir, portals_dir):
1282 self.lustre_dir = lustre_dir
1283 self.portals_dir = portals_dir
1284 self.kmodule_list = []
1286 def find_module(self, modname):
1287 """Find module by module name"""
1288 for mod in self.kmodule_list:
1289 if mod.name == modname:
1293 def add_portals_module(self, dev_dir, modname):
1294 """Append a module to list of modules to load."""
1296 mod = self.find_module(modname)
1298 mod = kmod(self.portals_dir, dev_dir, modname)
1299 self.kmodule_list.append(mod)
1301 def add_lustre_module(self, dev_dir, modname):
1302 """Append a module to list of modules to load."""
1304 mod = self.find_module(modname)
1306 mod = kmod(self.lustre_dir, dev_dir, modname)
1307 self.kmodule_list.append(mod)
1309 def load_modules(self):
1310 """Load all the modules in the list in the order they appear."""
1311 for mod in self.kmodule_list:
1312 if mod.loaded() and not config.noexec:
1316 def cleanup_modules(self):
1317 """Unload the modules in the list in reverse order."""
1318 rev = self.kmodule_list
1321 if (not mod.loaded() or mod.busy()) and not config.noexec:
1324 if mod.name == 'portals' and config.dump:
1325 lctl.dump(config.dump)
1328 # ============================================================
1329 # Classes to prepare and cleanup the various objects
1332 """ Base class for the rest of the modules. The default cleanup method is
1333 defined here, as well as some utilitiy funcs.
1335 def __init__(self, module_name, db):
1337 self.module_name = module_name
1338 self.name = self.db.getName()
1339 self.uuid = self.db.getUUID()
1343 def info(self, *args):
1344 msg = string.join(map(str,args))
1345 print self.module_name + ":", self.name, self.uuid, msg
1348 """ default cleanup, used for most modules """
1351 lctl.cleanup(self.name, self.uuid, config.force)
1352 except CommandError, e:
1353 log(self.module_name, "cleanup failed: ", self.name)
1357 def add_module(self, manager):
1358 """Adds all needed modules in the order they appear."""
1361 def safe_to_clean(self):
1364 def safe_to_clean_modules(self):
1365 return self.safe_to_clean()
1367 class Network(Module):
1368 def __init__(self,db):
1369 Module.__init__(self, 'NETWORK', db)
1370 self.net_type = self.db.get_val('nettype')
1371 self.nid = self.db.get_val('nid', '*')
1372 self.cluster_id = self.db.get_val('clusterid', "0")
1373 self.port = self.db.get_val_int('port', 0)
1376 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1378 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1379 self.generic_nid = 1
1380 debug("nid:", self.nid)
1382 self.generic_nid = 0
1384 self.nid_uuid = self.nid_to_uuid(self.nid)
1385 self.hostaddr = self.db.get_hostaddr()
1386 if len(self.hostaddr) == 0:
1387 self.hostaddr.append(self.nid)
1388 if '*' in self.hostaddr[0]:
1389 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1390 if not self.hostaddr[0]:
1391 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1392 debug("hostaddr:", self.hostaddr[0])
1394 def add_module(self, manager):
1395 manager.add_portals_module("libcfs", 'libcfs')
1396 manager.add_portals_module("portals", 'portals')
1398 if node_needs_router():
1399 manager.add_portals_module("router", 'kptlrouter')
1400 if self.net_type == 'tcp':
1401 manager.add_portals_module("knals/socknal", 'ksocknal')
1402 if self.net_type == 'elan':
1403 manager.add_portals_module("knals/qswnal", 'kqswnal')
1404 if self.net_type == 'gm':
1405 manager.add_portals_module("knals/gmnal", 'kgmnal')
1406 if self.net_type == 'openib':
1407 manager.add_portals_module("knals/openibnal", 'kopenibnal')
1408 if self.net_type == 'iib':
1409 manager.add_portals_module("knals/iibnal", 'kiibnal')
1410 if self.net_type == 'vib':
1411 self.add_portals_module("knals/vibnal", 'kvibnal')
1412 if self.net_type == 'lo':
1413 manager.add_portals_module("knals/lonal", 'klonal')
1414 if self.net_type == 'ra':
1415 manager.add_portals_module("knals/ranal", 'kranal')
1417 def nid_to_uuid(self, nid):
1418 return "NID_%s_UUID" %(nid,)
1421 if not config.record and net_is_prepared():
1423 self.info(self.net_type, self.nid, self.port)
1424 if not (config.record and self.generic_nid):
1425 lctl.network(self.net_type, self.nid)
1426 if self.net_type == 'tcp':
1428 for hostaddr in self.db.get_hostaddr():
1429 ip = string.split(hostaddr, '/')[0]
1430 if len(string.split(hostaddr, '/')) == 2:
1431 netmask = string.split(hostaddr, '/')[1]
1434 lctl.add_interface(self.net_type, ip, netmask)
1435 if self.net_type == 'elan':
1437 if self.port and node_is_router():
1438 run_one_acceptor(self.port)
1439 self.connect_peer_gateways()
1441 def connect_peer_gateways(self):
1442 for router in self.db.lookup_class('node'):
1443 if router.get_val_int('router', 0):
1444 for netuuid in router.get_networks():
1445 net = self.db.lookup(netuuid)
1447 if (gw.cluster_id == self.cluster_id and
1448 gw.net_type == self.net_type):
1449 if gw.nid != self.nid:
1452 def disconnect_peer_gateways(self):
1453 for router in self.db.lookup_class('node'):
1454 if router.get_val_int('router', 0):
1455 for netuuid in router.get_networks():
1456 net = self.db.lookup(netuuid)
1458 if (gw.cluster_id == self.cluster_id and
1459 gw.net_type == self.net_type):
1460 if gw.nid != self.nid:
1463 except CommandError, e:
1464 print "disconnect failed: ", self.name
1468 def safe_to_clean(self):
1469 return not net_is_prepared()
1472 self.info(self.net_type, self.nid, self.port)
1474 stop_acceptor(self.port)
1475 if node_is_router():
1476 self.disconnect_peer_gateways()
1477 if self.net_type == 'tcp':
1478 for hostaddr in self.db.get_hostaddr():
1479 ip = string.split(hostaddr, '/')[0]
1480 lctl.del_interface(self.net_type, ip)
1482 def correct_level(self, level, op=None):
1485 class RouteTable(Module):
1486 def __init__(self,db):
1487 Module.__init__(self, 'ROUTES', db)
1489 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1491 # only setup connections for tcp, openib, and iib NALs
1493 if not net_type in ('tcp','openib','iib','vib','ra'):
1496 # connect to target if route is to single node and this node is the gw
1497 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1498 if not local_cluster(net_type, tgt_cluster_id):
1499 panic("target", lo, " not on the local cluster")
1500 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1501 # connect to gateway if this node is not the gw
1502 elif (local_cluster(net_type, gw_cluster_id)
1503 and not local_interface(net_type, gw_cluster_id, gw)):
1504 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1509 panic("no server for nid", lo)
1512 return Network(srvdb)
1515 if not config.record and net_is_prepared():
1518 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1519 lctl.add_route(net_type, gw, lo, hi)
1520 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1524 def safe_to_clean(self):
1525 return not net_is_prepared()
1528 if net_is_prepared():
1529 # the network is still being used, don't clean it up
1531 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1532 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1535 lctl.disconnect(srv)
1536 except CommandError, e:
1537 print "disconnect failed: ", self.name
1542 lctl.del_route(net_type, gw, lo, hi)
1543 except CommandError, e:
1544 print "del_route failed: ", self.name
1548 class Management(Module):
1549 def __init__(self, db):
1550 Module.__init__(self, 'MGMT', db)
1552 def add_module(self, manager):
1553 manager.add_lustre_module('lvfs', 'lvfs')
1554 manager.add_lustre_module('obdclass', 'obdclass')
1555 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1556 manager.add_lustre_module('mgmt', 'mgmt_svc')
1559 if not config.record and is_prepared(self.name):
1562 lctl.newdev("mgmt", self.name, self.uuid)
1564 def safe_to_clean(self):
1568 if is_prepared(self.name):
1569 Module.cleanup(self)
1571 def correct_level(self, level, op=None):
1574 # This is only needed to load the modules; the LDLM device
1575 # is now created automatically.
1577 def __init__(self,db):
1578 Module.__init__(self, 'LDLM', db)
1580 def add_module(self, manager):
1581 manager.add_lustre_module('lvfs', 'lvfs')
1582 manager.add_lustre_module('obdclass', 'obdclass')
1583 manager.add_lustre_module('sec', 'ptlrpcs')
1584 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1585 manager.add_lustre_module('sec/gss', 'ptlrpcs_gss')
1593 def correct_level(self, level, op=None):
1597 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1598 Module.__init__(self, 'LOV', db)
1599 if name_override != None:
1600 self.name = "lov_%s" % name_override
1601 self.mds_uuid = self.db.get_first_ref('mds')
1602 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1603 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1604 self.pattern = self.db.get_val_int('stripepattern', 0)
1605 self.devlist = self.db.get_lov_tgts('lov_tgt')
1606 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1609 self.desc_uuid = self.uuid
1610 self.uuid = generate_client_uuid(self.name)
1611 self.fs_name = fs_name
1613 self.config_only = 1
1615 self.config_only = None
1616 mds = self.db.lookup(self.mds_uuid)
1617 self.mds_name = mds.getName()
1618 for (obd_uuid, index, gen, active) in self.devlist:
1621 self.obdlist.append(obd_uuid)
1622 obd = self.db.lookup(obd_uuid)
1623 osc = get_osc(obd, self.uuid, fs_name)
1625 self.osclist.append((osc, index, gen, active))
1627 panic('osc not found:', obd_uuid)
1633 if not config.record and is_prepared(self.name):
1635 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1636 self.stripe_off, self.pattern, self.devlist,
1638 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1639 self.stripe_sz, self.stripe_off, self.pattern,
1640 string.join(self.obdlist))
1641 for (osc, index, gen, active) in self.osclist:
1642 target_uuid = osc.target_uuid
1644 # Only ignore connect failures with --force, which
1645 # isn't implemented here yet.
1647 osc.prepare(ignore_connect_failure=0)
1648 except CommandError, e:
1649 print "Error preparing OSC %s\n" % osc.uuid
1651 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1654 for (osc, index, gen, active) in self.osclist:
1655 target_uuid = osc.target_uuid
1657 if is_prepared(self.name):
1658 Module.cleanup(self)
1659 if self.config_only:
1660 panic("Can't clean up config_only LOV ", self.name)
1662 def add_module(self, manager):
1663 if self.config_only:
1664 panic("Can't load modules for config_only LOV ", self.name)
1665 for (osc, index, gen, active) in self.osclist:
1666 osc.add_module(manager)
1668 manager.add_lustre_module('lov', 'lov')
1670 def correct_level(self, level, op=None):
1674 def __init__(self, db, uuid, fs_name, name_override = None):
1675 Module.__init__(self, 'LMV', db)
1676 if name_override != None:
1677 self.name = "lmv_%s" % name_override
1679 self.devlist = self.db.get_lmv_tgts('lmv_tgt')
1680 if self.devlist == None:
1681 self.devlist = self.db.get_refs('mds')
1684 self.desc_uuid = self.uuid
1686 self.fs_name = fs_name
1687 for mds_uuid in self.devlist:
1688 mds = self.db.lookup(mds_uuid)
1690 panic("MDS not found!")
1691 mdc = MDC(mds, self.uuid, fs_name)
1693 self.mdclist.append(mdc)
1695 panic('mdc not found:', mds_uuid)
1698 if is_prepared(self.name):
1702 for mdc in self.mdclist:
1704 # Only ignore connect failures with --force, which
1705 # isn't implemented here yet.
1706 mdc.prepare(ignore_connect_failure=0)
1707 except CommandError, e:
1708 print "Error preparing LMV %s\n" % mdc.uuid
1711 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1712 string.join(self.devlist))
1715 for mdc in self.mdclist:
1717 if is_prepared(self.name):
1718 Module.cleanup(self)
1720 def add_module(self, manager):
1721 for mdc in self.mdclist:
1722 mdc.add_module(manager)
1724 manager.add_lustre_module('lmv', 'lmv')
1726 def correct_level(self, level, op=None):
1730 def __init__(self,db):
1731 Module.__init__(self, 'GKD', db)
1732 target_uuid = self.db.get_first_ref('target')
1733 self.target = self.db.lookup(target_uuid)
1734 self.name = self.target.getName()
1736 active_uuid = get_active_target(self.target)
1738 panic("No target device found:", target_uuid)
1739 if active_uuid == self.uuid:
1744 self.uuid = target_uuid
1746 if is_prepared(self.name):
1749 debug(self.uuid, "not active")
1753 lctl.newdev("gks", self.name, self.uuid, setup ="")
1754 if not is_prepared('GKT'):
1755 lctl.newdev("gkt", 'GKT', 'GKT_UUID', setup ="")
1759 debug(self.uuid, "not active")
1762 if is_prepared(self.name):
1764 lctl.cleanup(self.name, self.uuid, config.force,
1766 except CommandError, e:
1767 log(self.module_name, "cleanup failed: ", self.name)
1770 Module.cleanup(self)
1771 if is_prepared('GKT'):
1773 lctl.cleanup("GKT", "GKT_UUID", config.force,
1775 except CommandError, e:
1776 print "cleanup failed: ", self.name
1780 def add_module(self, manager):
1782 manager.add_lustre_module('sec/gks', 'gks')
1783 manager.add_lustre_module('sec/gks', 'gkc')
1785 def correct_level(self, level, op=None):
1789 class CONFDEV(Module):
1790 def __init__(self, db, name, target_uuid, uuid):
1791 Module.__init__(self, 'CONFDEV', db)
1792 self.devpath = self.db.get_val('devpath','')
1793 self.backdevpath = self.db.get_val('devpath','')
1794 self.size = self.db.get_val_int('devsize', 0)
1795 self.journal_size = self.db.get_val_int('journalsize', 0)
1796 self.fstype = self.db.get_val('fstype', '')
1797 self.backfstype = self.db.get_val('backfstype', '')
1798 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1799 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1800 self.target = self.db.lookup(target_uuid)
1801 self.name = "conf_%s" % self.target.getName()
1802 self.client_uuids = self.target.get_refs('client')
1803 self.obdtype = self.db.get_val('obdtype', '')
1805 self.mds_sec = self.db.get_val('mds_sec', '')
1806 self.oss_sec = self.db.get_val('oss_sec', '')
1807 self.deny_sec = self.db.get_val('deny_sec', '')
1809 if config.mds_mds_sec:
1810 self.mds_sec = config.mds_mds_sec
1811 if config.mds_oss_sec:
1812 self.oss_sec = config.mds_oss_sec
1813 if config.mds_deny_sec:
1815 self.deny_sec = "%s,%s" %(self.deny_sec, config.mds_deny_sec)
1817 self.deny_sec = config.mds_deny_sec
1819 if self.obdtype == None:
1820 self.obdtype = 'dumb'
1822 self.conf_name = name
1823 self.conf_uuid = uuid
1824 self.realdev = self.devpath
1829 lmv_uuid = self.db.get_first_ref('lmv')
1830 if lmv_uuid != None:
1831 self.lmv = self.db.lookup(lmv_uuid)
1832 if self.lmv != None:
1833 self.client_uuids = self.lmv.get_refs('client')
1835 if self.target.get_class() == 'mds':
1836 if self.target.get_val('failover', 0):
1837 self.failover_mds = 'f'
1839 self.failover_mds = 'n'
1840 self.format = self.db.get_val('autoformat', "no")
1842 self.format = self.db.get_val('autoformat', "yes")
1843 self.osdtype = self.db.get_val('osdtype')
1844 ost = self.db.lookup(target_uuid)
1845 if ost.get_val('failover', 0):
1846 self.failover_ost = 'f'
1848 self.failover_ost = 'n'
1850 self.inode_size = self.get_inode_size()
1852 if self.lmv != None:
1853 client_uuid = self.name + "_lmv_UUID"
1854 self.master = LMV(self.lmv, client_uuid,
1855 self.conf_name, self.conf_name)
1857 def get_inode_size(self):
1858 inode_size = self.db.get_val_int('inodesize', 0)
1859 if inode_size == 0 and self.target.get_class() == 'mds':
1861 # default inode size for case when neither LOV either
1862 # LMV is accessible.
1863 self.inode_size = 256
1865 # find the LOV for this MDS
1866 lovconfig_uuid = self.target.get_first_ref('lovconfig')
1867 if lovconfig_uuid or self.lmv != None:
1868 if self.lmv != None:
1869 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1870 lovconfig = self.lmv.lookup(lovconfig_uuid)
1871 lov_uuid = lovconfig.get_first_ref('lov')
1872 if lov_uuid == None:
1873 panic(self.target.getName() + ": No LOV found for lovconfig ",
1876 lovconfig = self.target.lookup(lovconfig_uuid)
1877 lov_uuid = lovconfig.get_first_ref('lov')
1878 if lov_uuid == None:
1879 panic(self.target.getName() + ": No LOV found for lovconfig ",
1881 if self.lmv != None:
1882 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1883 lovconfig = self.lmv.lookup(lovconfig_uuid)
1884 lov_uuid = lovconfig.get_first_ref('lov')
1886 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, self.name,
1889 # default stripe count controls default inode_size
1890 if lov.stripe_cnt > 0:
1891 stripe_count = lov.stripe_cnt
1893 stripe_count = len(lov.devlist)
1894 if stripe_count > 77:
1896 elif stripe_count > 35:
1898 elif stripe_count > 13:
1900 #elif stripe_count > 3:
1907 def get_mount_options(self, blkdev):
1908 options = def_mount_options(self.fstype,
1909 self.target.get_class())
1911 if config.mountfsoptions:
1913 options = "%s,%s" %(options, config.mountfsoptions)
1915 options = config.mountfsoptions
1916 if self.mountfsoptions:
1917 options = "%s,%s" %(options, self.mountfsoptions)
1919 if self.mountfsoptions:
1921 options = "%s,%s" %(options, self.mountfsoptions)
1923 options = self.mountfsoptions
1925 if self.fstype == 'smfs':
1927 options = "%s,type=%s,dev=%s" %(options, self.backfstype,
1930 options = "type=%s,dev=%s" %(self.backfstype,
1933 if self.target.get_class() == 'mds':
1935 options = "%s,acl,user_xattr,iopen_nopriv" %(options)
1937 options = "iopen_nopriv"
1942 if is_prepared(self.name):
1945 blkdev = block_dev(self.devpath, self.size, self.fstype,
1946 config.reformat, self.format, self.journal_size,
1947 self.inode_size, self.mkfsoptions, self.backfstype,
1950 if self.fstype == 'smfs':
1955 mountfsoptions = self.get_mount_options(blkdev)
1957 self.info(self.target.get_class(), realdev, mountfsoptions,
1958 self.fstype, self.size, self.format)
1960 lctl.newdev("confobd", self.name, self.uuid,
1961 setup ="%s %s %s" %(realdev, self.fstype,
1964 self.mountfsoptions = mountfsoptions
1965 self.realdev = realdev
1967 def add_module(self, manager):
1968 manager.add_lustre_module('obdclass', 'confobd')
1970 def write_conf(self):
1971 if self.target.get_class() == 'ost':
1973 lctl.clear_log(self.name, self.target.getName() + '-conf')
1974 lctl.record(self.name, self.target.getName() + '-conf')
1975 lctl.newdev(self.osdtype, self.conf_name, self.conf_uuid,
1976 setup ="%s %s %s %s" %(self.realdev, self.fstype,
1978 self.mountfsoptions))
1980 lctl.clear_log(self.name, 'OSS-conf')
1981 lctl.record(self.name, 'OSS-conf')
1982 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
1987 if self.target.get_class() == 'mds':
1988 if self.master != None:
1989 master_name = self.master.name
1991 master_name = 'dumb'
1994 lctl.clear_log(self.name, self.target.getName() + '-conf')
1995 lctl.record(self.name, self.target.getName() + '-conf')
1996 lctl.attach("mds", self.conf_name, self.conf_uuid)
1998 lctl.set_security(self.conf_name, "mds_sec", self.mds_sec)
2000 lctl.set_security(self.conf_name, "oss_sec", self.oss_sec)
2002 for flavor in string.split(self.deny_sec, ','):
2003 lctl.set_security(self.conf_name, "deny_sec", flavor)
2004 lctl.newdev("mds", self.conf_name, self.conf_uuid,
2005 setup ="%s %s %s %s %s %s" %(self.realdev, self.fstype,
2006 self.conf_name, self.mountfsoptions,
2007 master_name, self.obdtype))
2011 if not self.client_uuids:
2014 for uuid in self.client_uuids:
2015 log("recording client:", uuid)
2016 client_uuid = generate_client_uuid(self.name)
2017 client = VOSC(self.db.lookup(uuid), client_uuid,
2018 self.target.getName(), self.name)
2020 lctl.clear_log(self.name, self.target.getName())
2021 lctl.record(self.name, self.target.getName())
2023 lctl.mount_option(self.target.getName(), client.get_name(), "", "")
2027 lctl.clear_log(self.name, self.target.getName() + '-clean')
2028 lctl.record(self.name, self.target.getName() + '-clean')
2030 lctl.del_mount_option(self.target.getName())
2038 # record logs for each client
2040 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
2042 config_options = CONFIG_FILE
2044 for node_db in self.db.lookup_class('node'):
2045 client_name = node_db.getName()
2046 for prof_uuid in node_db.get_refs('profile'):
2047 prof_db = node_db.lookup(prof_uuid)
2048 # refactor this into a funtion to test "clientness"
2050 for ref_class, ref_uuid in prof_db.get_all_refs():
2051 if ref_class in ('mountpoint','echoclient'):
2052 debug("recording", client_name)
2053 old_noexec = config.noexec
2055 noexec_opt = ('', '-n')
2056 ret, out = run (sys.argv[0],
2057 noexec_opt[old_noexec == 1],
2058 " -v --record --nomod",
2059 "--record_log", client_name,
2060 "--record_device", self.name,
2061 "--node", client_name,
2064 for s in out: log("record> ", string.strip(s))
2065 ret, out = run (sys.argv[0],
2066 noexec_opt[old_noexec == 1],
2067 "--cleanup -v --record --nomod",
2068 "--record_log", client_name + "-clean",
2069 "--record_device", self.name,
2070 "--node", client_name,
2073 for s in out: log("record> ", string.strip(s))
2074 config.noexec = old_noexec
2078 lctl.start(self.name, self.conf_name)
2079 except CommandError, e:
2081 if self.target.get_class() == 'ost':
2082 if not is_prepared('OSS'):
2084 lctl.start(self.name, 'OSS')
2085 except CommandError, e:
2089 if is_prepared(self.name):
2091 lctl.cleanup(self.name, self.uuid, 0, 0)
2092 clean_dev(self.devpath, self.fstype,
2093 self.backfstype, self.backdevpath)
2094 except CommandError, e:
2095 log(self.module_name, "cleanup failed: ", self.name)
2098 Module.cleanup(self)
2100 class MDSDEV(Module):
2101 def __init__(self,db):
2102 Module.__init__(self, 'MDSDEV', db)
2103 self.devpath = self.db.get_val('devpath','')
2104 self.backdevpath = self.db.get_val('devpath','')
2105 self.size = self.db.get_val_int('devsize', 0)
2106 self.journal_size = self.db.get_val_int('journalsize', 0)
2107 self.fstype = self.db.get_val('fstype', '')
2108 self.backfstype = self.db.get_val('backfstype', '')
2109 self.nspath = self.db.get_val('nspath', '')
2110 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2111 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2112 self.obdtype = self.db.get_val('obdtype', '')
2113 self.root_squash = self.db.get_val('root_squash', '')
2114 self.no_root_squash = self.db.get_val('no_root_squash', '')
2116 target_uuid = self.db.get_first_ref('target')
2117 self.target = self.db.lookup(target_uuid)
2118 self.name = self.target.getName()
2122 lmv_uuid = self.db.get_first_ref('lmv')
2123 if lmv_uuid != None:
2124 self.lmv = self.db.lookup(lmv_uuid)
2126 active_uuid = get_active_target(self.target)
2128 panic("No target device found:", target_uuid)
2129 if active_uuid == self.uuid:
2131 group = self.target.get_val('group')
2132 if config.group and config.group != group:
2137 self.uuid = target_uuid
2140 if self.lmv != None:
2141 client_uuid = self.name + "_lmv_UUID"
2142 self.master = LMV(self.lmv, client_uuid,
2143 self.name, self.name)
2145 self.confobd = CONFDEV(self.db, self.name,
2146 target_uuid, self.uuid)
2148 def add_module(self, manager):
2150 manager.add_lustre_module('mdc', 'mdc')
2151 manager.add_lustre_module('osc', 'osc')
2152 manager.add_lustre_module('ost', 'ost')
2153 manager.add_lustre_module('lov', 'lov')
2154 manager.add_lustre_module('mds', 'mds')
2156 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2157 manager.add_lustre_module(self.fstype, self.fstype)
2160 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
2162 # if fstype is smfs, then we should also take care about backing
2164 if self.fstype == 'smfs':
2165 manager.add_lustre_module(self.backfstype, self.backfstype)
2166 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
2168 for option in string.split(self.mountfsoptions, ','):
2169 if option == 'snap':
2170 if not self.fstype == 'smfs':
2171 panic("mountoptions has 'snap', but fstype is not smfs.")
2172 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2173 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2176 if self.master != None:
2177 self.master.add_module(manager)
2179 # add CONFOBD modules
2180 if self.confobd != None:
2181 self.confobd.add_module(manager)
2183 def write_conf(self):
2184 if is_prepared(self.name):
2187 debug(self.uuid, "not active")
2190 self.confobd.prepare()
2191 self.confobd.write_conf()
2192 self.confobd.cleanup()
2195 if is_prepared(self.name):
2198 debug(self.uuid, "not active")
2202 self.confobd.prepare()
2204 self.confobd.write_conf()
2207 if self.master != None:
2208 self.master.prepare()
2210 if not config.record:
2211 self.confobd.start()
2213 if not is_prepared('MDT'):
2214 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
2216 if development_mode():
2217 # set lsd upcall path
2218 procentry = "/proc/fs/lustre/mds/lsd_upcall"
2219 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
2220 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
2221 print "MDS Warning: failed to set lsd cache upcall"
2223 run("echo ", upcall, " > ", procentry)
2224 # set lacl upcall path
2225 procentry = "/proc/fs/lustre/mds/lacl_upcall"
2226 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lacl_upcall")
2227 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
2228 print "MDS Warning: failed to set remote acl upcall"
2230 run("echo ", upcall, " > ", procentry)
2232 if config.root_squash == None:
2233 config.root_squash = self.root_squash
2234 if config.no_root_squash == None:
2235 config.no_root_squash = self.no_root_squash
2236 if config.root_squash:
2237 if config.no_root_squash:
2238 nsnid = config.no_root_squash
2241 lctl.root_squash(self.name, config.root_squash, nsnid)
2243 def msd_remaining(self):
2244 out = lctl.device_list()
2246 if string.split(s)[2] in ('mds',):
2249 def safe_to_clean(self):
2252 def safe_to_clean_modules(self):
2253 return not self.msd_remaining()
2257 debug(self.uuid, "not active")
2260 if is_prepared(self.name):
2262 lctl.cleanup(self.name, self.uuid, config.force,
2264 except CommandError, e:
2265 log(self.module_name, "cleanup failed: ", self.name)
2268 Module.cleanup(self)
2270 if self.master != None:
2271 self.master.cleanup()
2272 if not self.msd_remaining() and is_prepared('MDT'):
2274 lctl.cleanup("MDT", "MDT_UUID", config.force,
2276 except CommandError, e:
2277 print "cleanup failed: ", self.name
2282 self.confobd.cleanup()
2284 def correct_level(self, level, op=None):
2285 #if self.master != None:
2290 def __init__(self, db):
2291 Module.__init__(self, 'OSD', db)
2292 self.osdtype = self.db.get_val('osdtype')
2293 self.devpath = self.db.get_val('devpath', '')
2294 self.backdevpath = self.db.get_val('devpath', '')
2295 self.size = self.db.get_val_int('devsize', 0)
2296 self.journal_size = self.db.get_val_int('journalsize', 0)
2297 self.inode_size = self.db.get_val_int('inodesize', 0)
2298 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2299 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2300 self.fstype = self.db.get_val('fstype', '')
2301 self.backfstype = self.db.get_val('backfstype', '')
2302 self.nspath = self.db.get_val('nspath', '')
2303 target_uuid = self.db.get_first_ref('target')
2304 ost = self.db.lookup(target_uuid)
2305 self.name = ost.getName()
2306 self.format = self.db.get_val('autoformat', 'yes')
2307 if ost.get_val('failover', 0):
2308 self.failover_ost = 'f'
2310 self.failover_ost = 'n'
2312 self.deny_sec = self.db.get_val('deny_sec', '')
2314 if config.ost_deny_sec:
2316 self.deny_sec = "%s,%s" %(self.deny_sec, config.ost_deny_sec)
2318 self.deny_sec = config.ost_deny_sec
2320 active_uuid = get_active_target(ost)
2322 panic("No target device found:", target_uuid)
2323 if active_uuid == self.uuid:
2325 group = ost.get_val('group')
2326 if config.group and config.group != group:
2331 self.uuid = target_uuid
2332 self.confobd = CONFDEV(self.db, self.name,
2333 target_uuid, self.uuid)
2335 def add_module(self, manager):
2338 manager.add_lustre_module('ost', 'ost')
2340 #if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2341 # manager.add_lustre_module(self.fstype, self.fstype)
2344 # manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2346 #if self.fstype == 'smfs':
2347 # manager.add_lustre_module(self.backfstype, self.backfstype)
2348 # manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2350 #for option in self.mountfsoptions:
2351 # if option == 'snap':
2352 # if not self.fstype == 'smfs':
2353 # panic("mountoptions with snap, but fstype is not smfs\n")
2354 # manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2355 # manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2357 manager.add_lustre_module(self.osdtype, self.osdtype)
2359 # add CONFOBD modules
2360 if self.confobd != None:
2361 self.confobd.add_module(manager)
2364 if is_prepared(self.name):
2367 debug(self.uuid, "not active")
2372 if self.osdtype == 'obdecho':
2373 self.info(self.osdtype)
2374 lctl.newdev("obdecho", self.name, self.uuid)
2375 if not is_prepared('OSS'):
2376 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup="")
2378 self.confobd.prepare()
2380 self.confobd.write_conf()
2381 if not config.record:
2382 self.confobd.start()
2385 for flavor in string.split(self.deny_sec, ','):
2386 lctl.set_security(self.name, "deny_sec", flavor)
2388 def write_conf(self):
2389 if is_prepared(self.name):
2392 debug(self.uuid, "not active")
2396 if self.osdtype != 'obdecho':
2397 self.confobd.prepare()
2398 self.confobd.write_conf()
2399 if not config.write_conf:
2400 self.confobd.start()
2401 self.confobd.cleanup()
2403 def osd_remaining(self):
2404 out = lctl.device_list()
2406 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2409 def safe_to_clean(self):
2412 def safe_to_clean_modules(self):
2413 return not self.osd_remaining()
2417 debug(self.uuid, "not active")
2420 if is_prepared(self.name):
2423 lctl.cleanup(self.name, self.uuid, config.force,
2425 except CommandError, e:
2426 log(self.module_name, "cleanup failed: ", self.name)
2429 if not self.osd_remaining() and is_prepared('OSS'):
2431 lctl.cleanup("OSS", "OSS_UUID", config.force,
2433 except CommandError, e:
2434 print "cleanup failed: ", self.name
2438 if self.osdtype != 'obdecho':
2440 self.confobd.cleanup()
2442 def correct_level(self, level, op=None):
2445 # Generic client module, used by OSC and MDC
2446 class Client(Module):
2447 def __init__(self, tgtdb, uuid, module, fs_name,
2448 self_name=None, module_dir=None):
2449 self.target_name = tgtdb.getName()
2450 self.target_uuid = tgtdb.getUUID()
2451 self.module_dir = module_dir
2452 self.backup_targets = []
2453 self.module = module
2456 self.tgt_dev_uuid = get_active_target(tgtdb)
2457 if not self.tgt_dev_uuid:
2458 panic("No target device found for target(1):", self.target_name)
2463 self.module = module
2464 self.module_name = string.upper(module)
2466 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2467 self.target_name, fs_name)
2469 self.name = self_name
2471 self.lookup_server(self.tgt_dev_uuid)
2472 self.lookup_backup_targets()
2473 self.fs_name = fs_name
2474 if not self.module_dir:
2475 self.module_dir = module
2477 def add_module(self, manager):
2478 manager.add_lustre_module(self.module_dir, self.module)
2480 def lookup_server(self, srv_uuid):
2481 """ Lookup a server's network information """
2482 self._server_nets = get_ost_net(self.db, srv_uuid)
2483 if len(self._server_nets) == 0:
2484 panic ("Unable to find a server for:", srv_uuid)
2489 def get_servers(self):
2490 return self._server_nets
2492 def lookup_backup_targets(self):
2493 """ Lookup alternative network information """
2494 prof_list = toplustreDB.get_refs('profile')
2495 for prof_uuid in prof_list:
2496 prof_db = toplustreDB.lookup(prof_uuid)
2498 panic("profile:", prof_uuid, "not found.")
2499 for ref_class, ref_uuid in prof_db.get_all_refs():
2500 if ref_class in ('osd', 'mdsdev'):
2501 devdb = toplustreDB.lookup(ref_uuid)
2502 uuid = devdb.get_first_ref('target')
2503 if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
2504 self.backup_targets.append(ref_uuid)
2506 def prepare(self, ignore_connect_failure = 0):
2507 self.info(self.target_uuid)
2508 if not config.record and is_prepared(self.name):
2511 srv = choose_local_server(self.get_servers())
2515 routes = find_route(self.get_servers())
2516 if len(routes) == 0:
2517 panic ("no route to", self.target_uuid)
2518 for (srv, r) in routes:
2519 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2520 except CommandError, e:
2521 if not ignore_connect_failure:
2525 if self.target_uuid in config.inactive and self.permits_inactive():
2526 debug("%s inactive" % self.target_uuid)
2527 inactive_p = "inactive"
2529 debug("%s active" % self.target_uuid)
2531 lctl.newdev(self.module, self.name, self.uuid,
2532 setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
2534 for tgt_dev_uuid in self.backup_targets:
2535 this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
2536 if len(this_nets) == 0:
2537 panic ("Unable to find a server for:", tgt_dev_uuid)
2538 srv = choose_local_server(this_nets)
2542 routes = find_route(this_nets);
2543 if len(routes) == 0:
2544 panic("no route to", tgt_dev_uuid)
2545 for (srv, r) in routes:
2546 lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2548 lctl.add_conn(self.name, srv.nid_uuid);
2551 if is_prepared(self.name):
2552 Module.cleanup(self)
2554 srv = choose_local_server(self.get_servers())
2556 lctl.disconnect(srv)
2558 for (srv, r) in find_route(self.get_servers()):
2559 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2560 except CommandError, e:
2561 log(self.module_name, "cleanup failed: ", self.name)
2565 for tgt_dev_uuid in self.backup_targets:
2566 this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
2567 srv = choose_local_server(this_net)
2569 lctl.disconnect(srv)
2571 for (srv, r) in find_route(this_net):
2572 lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2574 def correct_level(self, level, op=None):
2577 def deactivate(self):
2579 lctl.deactivate(self.name)
2580 except CommandError, e:
2581 log(self.module_name, "deactivate failed: ", self.name)
2586 def __init__(self, db, uuid, fs_name):
2587 Client.__init__(self, db, uuid, 'gkc', fs_name)
2589 def permits_inactive(self):
2593 def __init__(self, db, uuid, fs_name):
2594 Client.__init__(self, db, uuid, 'mdc', fs_name)
2596 def permits_inactive(self):
2600 def __init__(self, db, uuid, fs_name):
2601 Client.__init__(self, db, uuid, 'osc', fs_name)
2603 def permits_inactive(self):
2606 class CMOBD(Module):
2607 def __init__(self, db):
2608 Module.__init__(self, 'CMOBD', db)
2609 self.name = self.db.getName();
2610 self.uuid = generate_client_uuid(self.name)
2611 self.master_uuid = self.db.get_first_ref('masterobd')
2612 self.cache_uuid = self.db.get_first_ref('cacheobd')
2614 master_obd = self.db.lookup(self.master_uuid)
2616 panic('master obd not found:', self.master_uuid)
2618 cache_obd = self.db.lookup(self.cache_uuid)
2620 panic('cache obd not found:', self.cache_uuid)
2625 master_class = master_obd.get_class()
2626 cache_class = cache_obd.get_class()
2628 if master_class == 'ost' or master_class == 'lov':
2629 client_uuid = "%s_lov_master_UUID" % (self.name)
2630 self.master = LOV(master_obd, client_uuid, self.name,
2631 "master_%s" % (self.name));
2632 elif master_class == 'mds':
2633 self.master = get_mdc(db, self.name, self.master_uuid)
2634 elif master_class == 'lmv':
2635 client_uuid = "%s_lmv_master_UUID" % (self.name)
2636 self.master = LMV(master_obd, client_uuid, self.name,
2637 "master_%s" % (self.name));
2639 panic("unknown master obd class '%s'" %(master_class))
2641 if cache_class == 'ost' or cache_class == 'lov':
2642 client_uuid = "%s_lov_cache_UUID" % (self.name)
2643 self.cache = LOV(cache_obd, client_uuid, self.name,
2644 "cache_%s" % (self.name));
2645 elif cache_class == 'mds':
2646 self.cache = get_mdc(db, self.name, self.cache_uuid)
2647 elif cache_class == 'lmv':
2648 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2649 self.cache = LMV(cache_obd, client_uuid, self.name,
2650 "cache_%s" % (self.name));
2652 panic("unknown cache obd class '%s'" %(cache_class))
2655 self.master.prepare()
2656 if not config.record and is_prepared(self.name):
2658 self.info(self.master_uuid, self.cache_uuid)
2659 lctl.newdev("cmobd", self.name, self.uuid,
2660 setup ="%s %s" %(self.master.uuid,
2669 def get_master_name(self):
2670 return self.master.name
2672 def get_cache_name(self):
2673 return self.cache.name
2676 if is_prepared(self.name):
2677 Module.cleanup(self)
2679 self.master.cleanup()
2681 def add_module(self, manager):
2682 manager.add_lustre_module('smfs', 'smfs')
2683 manager.add_lustre_module('cmobd', 'cmobd')
2684 self.master.add_module(manager)
2686 def correct_level(self, level, op=None):
2690 def __init__(self, db, uuid, name):
2691 Module.__init__(self, 'COBD', db)
2692 self.name = self.db.getName();
2693 self.uuid = generate_client_uuid(self.name)
2694 self.master_uuid = self.db.get_first_ref('masterobd')
2695 self.cache_uuid = self.db.get_first_ref('cacheobd')
2697 master_obd = self.db.lookup(self.master_uuid)
2699 panic('master obd not found:', self.master_uuid)
2701 cache_obd = self.db.lookup(self.cache_uuid)
2703 panic('cache obd not found:', self.cache_uuid)
2708 master_class = master_obd.get_class()
2709 cache_class = cache_obd.get_class()
2711 if master_class == 'ost' or master_class == 'lov':
2712 client_uuid = "%s_lov_master_UUID" % (self.name)
2713 self.master = LOV(master_obd, client_uuid, name,
2714 "master_%s" % (self.name));
2715 elif master_class == 'mds':
2716 self.master = get_mdc(db, name, self.master_uuid)
2717 elif master_class == 'lmv':
2718 client_uuid = "%s_lmv_master_UUID" % (self.name)
2719 self.master = LMV(master_obd, client_uuid, self.name,
2720 "master_%s" % (self.name));
2722 panic("unknown master obd class '%s'" %(master_class))
2724 if cache_class == 'ost' or cache_class == 'lov':
2725 client_uuid = "%s_lov_cache_UUID" % (self.name)
2726 self.cache = LOV(cache_obd, client_uuid, name,
2727 "cache_%s" % (self.name));
2728 elif cache_class == 'mds':
2729 self.cache = get_mdc(db, name, self.cache_uuid)
2730 elif cache_class == 'lmv':
2731 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2732 self.cache = LMV(cache_obd, client_uuid, self.name,
2733 "cache_%s" % (self.name));
2735 panic("unknown cache obd class '%s'" %(cache_class))
2743 def get_master_name(self):
2744 return self.master.name
2746 def get_cache_name(self):
2747 return self.cache.name
2750 if not config.record and is_prepared(self.name):
2752 self.master.prepare()
2753 self.cache.prepare()
2754 self.info(self.master_uuid, self.cache_uuid)
2755 lctl.newdev("cobd", self.name, self.uuid,
2756 setup ="%s %s" %(self.master.name,
2760 if is_prepared(self.name):
2761 Module.cleanup(self)
2762 self.master.cleanup()
2763 self.cache.cleanup()
2765 def add_module(self, manager):
2766 manager.add_lustre_module('cobd', 'cobd')
2767 self.master.add_module(manager)
2769 # virtual interface for OSC and LOV
2771 def __init__(self, db, client_uuid, name, name_override = None):
2772 Module.__init__(self, 'VOSC', db)
2773 if db.get_class() == 'lov':
2774 self.osc = LOV(db, client_uuid, name, name_override)
2776 elif db.get_class() == 'cobd':
2777 self.osc = COBD(db, client_uuid, name)
2780 self.osc = OSC(db, client_uuid, name)
2784 return self.osc.get_uuid()
2787 return self.osc.get_name()
2795 def add_module(self, manager):
2796 self.osc.add_module(manager)
2798 def correct_level(self, level, op=None):
2799 return self.osc.correct_level(level, op)
2801 # virtual interface for MDC and LMV
2803 def __init__(self, db, client_uuid, name, name_override = None):
2804 Module.__init__(self, 'VMDC', db)
2805 if db.get_class() == 'lmv':
2806 self.mdc = LMV(db, client_uuid, name, name_override)
2807 elif db.get_class() == 'cobd':
2808 self.mdc = COBD(db, client_uuid, name)
2810 self.mdc = MDC(db, client_uuid, name)
2813 return self.mdc.uuid
2816 return self.mdc.name
2824 def add_module(self, manager):
2825 self.mdc.add_module(manager)
2827 def correct_level(self, level, op=None):
2828 return self.mdc.correct_level(level, op)
2830 class ECHO_CLIENT(Module):
2831 def __init__(self,db):
2832 Module.__init__(self, 'ECHO_CLIENT', db)
2833 self.obd_uuid = self.db.get_first_ref('obd')
2834 obd = self.db.lookup(self.obd_uuid)
2835 self.uuid = generate_client_uuid(self.name)
2836 self.osc = VOSC(obd, self.uuid, self.name)
2839 if not config.record and is_prepared(self.name):
2842 self.osc.prepare() # XXX This is so cheating. -p
2843 self.info(self.obd_uuid)
2845 lctl.newdev("echo_client", self.name, self.uuid,
2846 setup = self.osc.get_name())
2849 if is_prepared(self.name):
2850 Module.cleanup(self)
2853 def add_module(self, manager):
2854 self.osc.add_module(manager)
2855 manager.add_lustre_module('obdecho', 'obdecho')
2857 def correct_level(self, level, op=None):
2860 def generate_client_uuid(name):
2861 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2863 int(random.random() * 1048576),
2864 int(random.random() * 1048576))
2865 return client_uuid[:36]
2867 class Mountpoint(Module):
2868 def __init__(self,db):
2869 Module.__init__(self, 'MTPT', db)
2870 self.path = self.db.get_val('path')
2871 self.clientoptions = self.db.get_val('clientoptions', '')
2872 self.fs_uuid = self.db.get_first_ref('filesystem')
2873 fs = self.db.lookup(self.fs_uuid)
2874 self.mds_uuid = fs.get_first_ref('lmv')
2875 if not self.mds_uuid:
2876 self.mds_uuid = fs.get_first_ref('mds')
2877 self.obd_uuid = fs.get_first_ref('obd')
2878 self.gks_uuid =fs.get_first_ref('gks')
2879 client_uuid = generate_client_uuid(self.name)
2881 self.oss_sec = self.db.get_val('oss_sec','null')
2882 self.mds_sec = self.db.get_val('mds_sec','null')
2884 self.mds_sec = config.mds_sec
2886 self.oss_sec = config.oss_sec
2888 self.oss_sec = self.db.get_val('oss_sec','null')
2889 self.mds_sec = self.db.get_val('mds_sec','null')
2891 self.mds_sec = config.mds_sec
2893 self.oss_sec = config.oss_sec
2895 ost = self.db.lookup(self.obd_uuid)
2897 panic("no ost: ", self.obd_uuid)
2899 mds = self.db.lookup(self.mds_uuid)
2901 panic("no mds: ", self.mds_uuid)
2903 self.vosc = VOSC(ost, client_uuid, self.name, self.name)
2904 self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
2907 self.gkc = get_gkc(db, client_uuid, self.name, self.gks_uuid)
2909 if not config.record and fs_is_mounted(self.path):
2910 log(self.path, "already mounted.")
2919 self.info(self.path, self.mds_uuid, self.obd_uuid)
2920 if config.record or config.lctl_dump:
2922 lctl.mount_option(local_node_name, self.vosc.get_name(),
2923 self.vmdc.get_name(), self.gkc.get_name())
2925 lctl.mount_option(local_node_name, self.vosc.get_name(),
2926 self.vmdc.get_name(), "")
2929 if config.clientoptions:
2930 if self.clientoptions:
2931 self.clientoptions = self.clientoptions + ',' + config.clientoptions
2933 self.clientoptions = config.clientoptions
2934 if self.clientoptions:
2935 self.clientoptions = ',' + self.clientoptions
2936 # Linux kernel will deal with async and not pass it to ll_fill_super,
2937 # so replace it with Lustre async
2938 self.clientoptions = string.replace(self.clientoptions, "async", "lasync")
2941 gkc_name = self.gkc.get_name();
2944 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,gkc=%s,mds_sec=%s,oss_sec=%s%s %s %s" % \
2945 (self.vosc.get_name(), self.vmdc.get_name(), gkc_name, self.mds_sec,
2946 self.oss_sec, self.clientoptions, config.config, self.path)
2947 run("mkdir", self.path)
2952 panic("mount failed:", self.path, ":", string.join(val))
2955 self.info(self.path, self.mds_uuid,self.obd_uuid)
2957 if config.record or config.lctl_dump:
2958 lctl.del_mount_option(local_node_name)
2960 if fs_is_mounted(self.path):
2962 (rc, out) = run("umount", "-f", self.path)
2964 (rc, out) = run("umount", self.path)
2966 raise CommandError('umount', out, rc)
2968 if fs_is_mounted(self.path):
2969 panic("fs is still mounted:", self.path)
2976 def add_module(self, manager):
2977 self.vosc.add_module(manager)
2978 self.vmdc.add_module(manager)
2979 manager.add_lustre_module('llite', 'llite')
2981 manager.add_lustre_module('sec/gks', 'gkc')
2982 def correct_level(self, level, op=None):
2985 # ============================================================
2986 # misc query functions
2988 def get_ost_net(self, osd_uuid):
2992 osd = self.lookup(osd_uuid)
2993 node_uuid = osd.get_first_ref('node')
2994 node = self.lookup(node_uuid)
2996 panic("unable to find node for osd_uuid:", osd_uuid,
2997 " node_ref:", node_uuid_)
2998 for net_uuid in node.get_networks():
2999 db = node.lookup(net_uuid)
3000 srv_list.append(Network(db))
3003 # the order of iniitailization is based on level.
3004 def getServiceLevel(self):
3005 type = self.get_class()
3007 if type in ('network',):
3009 elif type in ('routetbl',):
3011 elif type in ('ldlm',):
3013 elif type in ('osd',):
3015 elif type in ('mdsdev',):
3017 elif type in ('lmv', 'cobd',):
3019 elif type in ('gkd',):
3021 elif type in ('cmobd', 'cobd',):
3023 elif type in ('mountpoint', 'echoclient'):
3026 panic("Unknown type: ", type)
3028 if ret < config.minlevel or ret > config.maxlevel:
3033 # return list of services in a profile. list is a list of tuples
3034 # [(level, db_object),]
3035 def getServices(self):
3037 for ref_class, ref_uuid in self.get_all_refs():
3038 servdb = self.lookup(ref_uuid)
3040 level = getServiceLevel(servdb)
3042 list.append((level, servdb))
3044 panic('service not found: ' + ref_uuid)
3050 ############################################################
3052 # FIXME: clean this mess up!
3054 # OSC is no longer in the xml, so we have to fake it.
3055 # this is getting ugly and begging for another refactoring
3056 def get_osc(ost_db, uuid, fs_name):
3057 osc = OSC(ost_db, uuid, fs_name)
3060 def get_mdc(db, fs_name, mds_uuid):
3061 mds_db = db.lookup(mds_uuid);
3063 error("no mds:", mds_uuid)
3064 mdc = MDC(mds_db, mds_uuid, fs_name)
3067 def get_gkc(db, uuid, fs_name, gks_uuid):
3068 gks_db = db.lookup(gks_uuid);
3070 error("no gks:", gks_uuid)
3071 gkc = GKC(gks_db, uuid, fs_name)
3074 ############################################################
3075 # routing ("rooting")
3077 # list of (nettype, cluster_id, nid)
3080 def find_local_clusters(node_db):
3081 global local_clusters
3082 for netuuid in node_db.get_networks():
3083 net = node_db.lookup(netuuid)
3085 debug("add_local", netuuid)
3086 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
3088 if not acceptors.has_key(srv.port):
3089 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
3091 # This node is a gateway.
3093 def node_is_router():
3096 # If there are any routers found in the config, then this will be true
3097 # and all nodes will load kptlrouter.
3099 def node_needs_router():
3100 return needs_router or is_router
3102 # list of (nettype, gw, tgt_cluster_id, lo, hi)
3103 # Currently, these local routes are only added to kptlrouter route
3104 # table if they are needed to connect to a specific server. This
3105 # should be changed so all available routes are loaded, and the
3106 # ptlrouter can make all the decisions.
3109 def find_local_routes(lustre):
3110 """ Scan the lustre config looking for routers . Build list of
3112 global local_routes, needs_router
3114 list = lustre.lookup_class('node')
3116 if router.get_val_int('router', 0):
3118 for (local_type, local_cluster_id, local_nid) in local_clusters:
3120 for netuuid in router.get_networks():
3121 db = router.lookup(netuuid)
3122 if (local_type == db.get_val('nettype') and
3123 local_cluster_id == db.get_val('clusterid')):
3124 gw = db.get_val('nid')
3127 debug("find_local_routes: gw is", gw)
3128 for route in router.get_local_routes(local_type, gw):
3129 local_routes.append(route)
3130 debug("find_local_routes:", local_routes)
3133 def choose_local_server(srv_list):
3134 for srv in srv_list:
3135 if local_cluster(srv.net_type, srv.cluster_id):
3138 def local_cluster(net_type, cluster_id):
3139 for cluster in local_clusters:
3140 if net_type == cluster[0] and cluster_id == cluster[1]:
3144 def local_interface(net_type, cluster_id, nid):
3145 for cluster in local_clusters:
3146 if (net_type == cluster[0] and cluster_id == cluster[1]
3147 and nid == cluster[2]):
3151 def find_route(srv_list):
3153 frm_type = local_clusters[0][0]
3154 for srv in srv_list:
3155 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
3156 to_type = srv.net_type
3158 cluster_id = srv.cluster_id
3159 debug ('looking for route to', to_type, to)
3160 for r in local_routes:
3161 debug("find_route: ", r)
3162 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
3163 result.append((srv, r))
3166 def get_active_target(db):
3167 target_uuid = db.getUUID()
3168 target_name = db.getName()
3169 node_name = get_select(target_name)
3171 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
3173 tgt_dev_uuid = db.get_first_ref('active')
3176 def get_server_by_nid_uuid(db, nid_uuid):
3177 for n in db.lookup_class("network"):
3179 if net.nid_uuid == nid_uuid:
3183 ############################################################
3187 type = db.get_class()
3188 debug('Service:', type, db.getName(), db.getUUID())
3193 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3194 elif type == 'network':
3196 elif type == 'routetbl':
3200 elif type == 'cobd':
3201 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3202 elif type == 'cmobd':
3204 elif type == 'mdsdev':
3206 elif type == 'mountpoint':
3208 elif type == 'echoclient':
3215 panic ("unknown service type:", type)
3219 # Prepare the system to run lustre using a particular profile
3220 # in a the configuration.
3221 # * load & the modules
3222 # * setup networking for the current node
3223 # * make sure partitions are in place and prepared
3224 # * initialize devices with lctl
3225 # Levels is important, and needs to be enforced.
3226 def for_each_profile(db, prof_list, operation):
3227 for prof_uuid in prof_list:
3228 prof_db = db.lookup(prof_uuid)
3230 panic("profile:", prof_uuid, "not found.")
3231 services = getServices(prof_db)
3234 def magic_get_osc(db, rec, lov):
3236 lov_uuid = lov.get_uuid()
3237 lov_name = lov.osc.fs_name
3239 lov_uuid = rec.getAttribute('lov_uuidref')
3240 # FIXME: better way to find the mountpoint?
3241 filesystems = db.root_node.getElementsByTagName('filesystem')
3243 for fs in filesystems:
3244 ref = fs.getElementsByTagName('obd_ref')
3245 if ref[0].getAttribute('uuidref') == lov_uuid:
3246 fsuuid = fs.getAttribute('uuid')
3250 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
3252 mtpts = db.root_node.getElementsByTagName('mountpoint')
3255 ref = fs.getElementsByTagName('filesystem_ref')
3256 if ref[0].getAttribute('uuidref') == fsuuid:
3257 lov_name = fs.getAttribute('name')
3261 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
3263 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3265 ost_uuid = rec.getAttribute('ost_uuidref')
3266 obd = db.lookup(ost_uuid)
3269 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
3271 osc = get_osc(obd, lov_uuid, lov_name)
3273 panic('osc not found:', obd_uuid)
3276 # write logs for update records. sadly, logs of all types -- and updates in
3277 # particular -- are something of an afterthought. lconf needs rewritten with
3278 # these as core concepts. so this is a pretty big hack.
3279 def process_update_record(db, update, lov):
3280 for rec in update.childNodes:
3281 if rec.nodeType != rec.ELEMENT_NODE:
3284 log("found "+rec.nodeName+" record in update version " +
3285 str(update.getAttribute('version')))
3287 lov_uuid = rec.getAttribute('lov_uuidref')
3288 ost_uuid = rec.getAttribute('ost_uuidref')
3289 index = rec.getAttribute('index')
3290 gen = rec.getAttribute('generation')
3292 if not lov_uuid or not ost_uuid or not index or not gen:
3293 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
3296 tmplov = db.lookup(lov_uuid)
3298 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
3299 lov_name = tmplov.getName()
3301 lov_name = lov.osc.name
3303 # ------------------------------------------------------------- add
3304 if rec.nodeName == 'add':
3306 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3309 osc = magic_get_osc(db, rec, lov)
3312 # Only ignore connect failures with --force, which
3313 # isn't implemented here yet.
3314 osc.prepare(ignore_connect_failure=0)
3315 except CommandError, e:
3316 print "Error preparing OSC %s\n" % osc.uuid
3319 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3321 # ------------------------------------------------------ deactivate
3322 elif rec.nodeName == 'deactivate':
3326 osc = magic_get_osc(db, rec, lov)
3330 except CommandError, e:
3331 print "Error deactivating OSC %s\n" % osc.uuid
3334 # ---------------------------------------------------------- delete
3335 elif rec.nodeName == 'delete':
3339 osc = magic_get_osc(db, rec, lov)
3345 except CommandError, e:
3346 print "Error cleaning up OSC %s\n" % osc.uuid
3349 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3351 def process_updates(db, log_device, log_name, lov = None):
3352 updates = db.root_node.getElementsByTagName('update')
3354 if not u.childNodes:
3355 log("ignoring empty update record (version " +
3356 str(u.getAttribute('version')) + ")")
3359 version = u.getAttribute('version')
3360 real_name = "%s-%s" % (log_name, version)
3361 lctl.clear_log(log_device, real_name)
3362 lctl.record(log_device, real_name)
3364 process_update_record(db, u, lov)
3368 def doWriteconf(services):
3372 if s[1].get_class() == 'mdsdev' or s[1].get_class() == 'osd':
3373 n = newService(s[1])
3377 def doSetup(services):
3382 n = newService(s[1])
3384 slist.append((n.level, n))
3387 nl = n[1].correct_level(n[0])
3388 nlist.append((nl, n[1]))
3393 def doLoadModules(services):
3397 # adding all needed modules from all services
3399 n = newService(s[1])
3400 n.add_module(mod_manager)
3402 # loading all registered modules
3403 mod_manager.load_modules()
3405 def doUnloadModules(services):
3409 # adding all needed modules from all services
3411 n = newService(s[1])
3412 if n.safe_to_clean_modules():
3413 n.add_module(mod_manager)
3415 # unloading all registered modules
3416 mod_manager.cleanup_modules()
3418 def doCleanup(services):
3424 n = newService(s[1])
3426 slist.append((n.level, n))
3429 nl = n[1].correct_level(n[0])
3430 nlist.append((nl, n[1]))
3435 if n[1].safe_to_clean():
3440 def doHost(lustreDB, hosts):
3441 global is_router, local_node_name
3444 node_db = lustreDB.lookup_name(h, 'node')
3448 panic('No host entry found.')
3450 local_node_name = node_db.get_val('name', 0)
3451 is_router = node_db.get_val_int('router', 0)
3452 lustre_upcall = node_db.get_val('lustreUpcall', '')
3453 portals_upcall = node_db.get_val('portalsUpcall', '')
3454 timeout = node_db.get_val_int('timeout', 0)
3455 ptldebug = node_db.get_val('ptldebug', '')
3456 subsystem = node_db.get_val('subsystem', '')
3458 find_local_clusters(node_db)
3460 find_local_routes(lustreDB)
3462 # Two step process: (1) load modules, (2) setup lustre
3463 # if not cleaning, load modules first.
3464 prof_list = node_db.get_refs('profile')
3466 if config.write_conf:
3467 for_each_profile(node_db, prof_list, doLoadModules)
3469 for_each_profile(node_db, prof_list, doWriteconf)
3470 for_each_profile(node_db, prof_list, doUnloadModules)
3473 elif config.recover:
3474 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3475 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3476 "--client_uuid <UUID> --conn_uuid <UUID>")
3477 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3479 elif config.cleanup:
3481 # the command line can override this value
3483 # ugly hack, only need to run lctl commands for --dump
3484 if config.lctl_dump or config.record:
3485 for_each_profile(node_db, prof_list, doCleanup)
3488 sys_set_timeout(timeout)
3489 sys_set_ptldebug(ptldebug)
3490 sys_set_subsystem(subsystem)
3491 sys_set_lustre_upcall(lustre_upcall)
3492 sys_set_portals_upcall(portals_upcall)
3494 for_each_profile(node_db, prof_list, doCleanup)
3495 for_each_profile(node_db, prof_list, doUnloadModules)
3499 # ugly hack, only need to run lctl commands for --dump
3500 if config.lctl_dump or config.record:
3501 sys_set_timeout(timeout)
3502 sys_set_lustre_upcall(lustre_upcall)
3503 for_each_profile(node_db, prof_list, doSetup)
3507 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3508 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3510 for_each_profile(node_db, prof_list, doLoadModules)
3512 sys_set_debug_path()
3513 sys_set_ptldebug(ptldebug)
3514 sys_set_subsystem(subsystem)
3515 script = config.gdb_script
3516 run(lctl.lctl, ' modules >', script)
3518 log ("The GDB module script is in", script)
3519 # pause, so user has time to break and
3522 sys_set_timeout(timeout)
3523 sys_set_lustre_upcall(lustre_upcall)
3524 sys_set_portals_upcall(portals_upcall)
3526 for_each_profile(node_db, prof_list, doSetup)
3529 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3530 tgt = lustreDB.lookup(tgt_uuid)
3532 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3533 new_uuid = get_active_target(tgt)
3535 raise Lustre.LconfError("doRecovery: no active target found for: " +
3537 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3539 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3541 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3543 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3546 lctl.disconnect(oldnet)
3547 except CommandError, e:
3548 log("recover: disconnect", nid_uuid, "failed: ")
3553 except CommandError, e:
3554 log("recover: connect failed")
3557 lctl.recover(client_uuid, net.nid_uuid)
3560 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3561 base = os.path.dirname(cmd)
3562 if development_mode():
3563 if not config.lustre:
3564 debug('using objdir module paths')
3565 config.lustre = (os.path.join(base, ".."))
3566 # normalize the portals dir, using command line arg if set
3568 portals_dir = config.portals
3569 dir = os.path.join(config.lustre, portals_dir)
3570 config.portals = dir
3571 debug('config.portals', config.portals)
3572 elif config.lustre and config.portals:
3574 # if --lustre and --portals, normalize portals
3575 # can ignore POTRALS_DIR here, since it is probly useless here
3576 config.portals = os.path.join(config.lustre, config.portals)
3577 debug('config.portals B', config.portals)
3579 def sysctl(path, val):
3580 debug("+ sysctl", path, val)
3584 fp = open(os.path.join('/proc/sys', path), 'w')
3591 def sys_set_debug_path():
3592 sysctl('portals/debug_path', config.debug_path)
3594 def sys_set_lustre_upcall(upcall):
3595 # the command overrides the value in the node config
3596 if config.lustre_upcall:
3597 upcall = config.lustre_upcall
3599 upcall = config.upcall
3601 lctl.set_lustre_upcall(upcall)
3603 def sys_set_portals_upcall(upcall):
3604 # the command overrides the value in the node config
3605 if config.portals_upcall:
3606 upcall = config.portals_upcall
3608 upcall = config.upcall
3610 sysctl('portals/upcall', upcall)
3612 def sys_set_timeout(timeout):
3613 # the command overrides the value in the node config
3614 if config.timeout and config.timeout > 0:
3615 timeout = config.timeout
3616 if timeout != None and timeout > 0:
3617 lctl.set_timeout(timeout)
3619 def sys_tweak_socknal ():
3620 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3621 if sys_get_branch() == '2.6':
3622 fp = open('/proc/meminfo')
3623 lines = fp.readlines()
3628 if a[0] == 'MemTotal:':
3630 debug("memtotal" + memtotal)
3631 if int(memtotal) < 262144:
3632 minfree = int(memtotal) / 16
3635 debug("+ minfree ", minfree)
3636 sysctl("vm/min_free_kbytes", minfree)
3637 if config.single_socket:
3638 sysctl("socknal/typed", 0)
3640 def sys_optimize_elan ():
3641 procfiles = ["/proc/elan/config/eventint_punt_loops",
3642 "/proc/qsnet/elan3/config/eventint_punt_loops",
3643 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3645 if os.access(p, os.W_OK):
3646 run ("echo 1 > " + p)
3648 def sys_set_ptldebug(ptldebug):
3650 ptldebug = config.ptldebug
3653 val = eval(ptldebug, ptldebug_names)
3654 val = "0x%x" % (val & 0xffffffffL)
3655 sysctl('portals/debug', val)
3656 except NameError, e:
3658 sysctl('portals/debug', 0xffffffffL)
3660 def sys_set_subsystem(subsystem):
3661 if config.subsystem:
3662 subsystem = config.subsystem
3665 val = eval(subsystem, subsystem_names)
3666 val = "0x%x" % (val & 0xffffffffL)
3667 sysctl('portals/subsystem_debug', val)
3668 except NameError, e:
3671 def sys_set_netmem_max(path, max):
3672 debug("setting", path, "to at least", max)
3680 fp = open(path, 'w')
3681 fp.write('%d\n' %(max))
3684 def sys_make_devices():
3685 if not os.access('/dev/portals', os.R_OK):
3686 run('mknod /dev/portals c 10 240')
3687 if not os.access('/dev/obd', os.R_OK):
3688 run('mknod /dev/obd c 10 241')
3690 # Add dir to the global PATH, if not already there.
3691 def add_to_path(new_dir):
3692 syspath = string.split(os.environ['PATH'], ':')
3693 if new_dir in syspath:
3695 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3697 def default_debug_path():
3698 path = '/tmp/lustre-log'
3699 if os.path.isdir('/r'):
3704 def default_gdb_script():
3705 script = '/tmp/ogdb'
3706 if os.path.isdir('/r'):
3707 return '/r' + script
3711 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3712 # ensure basic elements are in the system path
3713 def sanitise_path():
3714 for dir in DEFAULT_PATH:
3717 # global hack for the --select handling
3719 def init_select(args):
3720 # args = [service=nodeA,service2=nodeB service3=nodeC]
3723 list = string.split(arg, ',')
3725 srv, node = string.split(entry, '=')
3726 tgt_select[srv] = node
3728 def get_select(srv):
3729 if tgt_select.has_key(srv):
3730 return tgt_select[srv]
3734 FLAG = Lustre.Options.FLAG
3735 PARAM = Lustre.Options.PARAM
3736 INTPARAM = Lustre.Options.INTPARAM
3737 PARAMLIST = Lustre.Options.PARAMLIST
3739 ('verbose,v', "Print system commands as they are run"),
3740 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3741 ('config', "Cluster config name used for LDAP query", PARAM),
3742 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3743 ('node', "Load config for <nodename>", PARAM),
3744 ('sec',"security flavor <null|krb5i|krb5p> between this client with mds", PARAM),
3745 ('mds_sec',"security flavor <null|krb5i|krb5p> between this client with mds", PARAM),
3746 ('oss_sec',"security flavor <null|krb5i|krb5p> between this client with ost", PARAM),
3747 ('mds_mds_sec',"security flavor <null|krb5i|krb5p> between this mds with other mds", PARAM),
3748 ('mds_oss_sec',"security flavor <null|krb5i|krb5p> between this mds with ost", PARAM),
3749 ('mds_deny_sec', "security flavor <null|krb5i|krb5p> denied by this mds", PARAM),
3750 ('ost_deny_sec', "security flavor <null|krb5i|krb5p> denied by this ost", PARAM),
3751 ('cleanup,d', "Cleans up config. (Shutdown)"),
3752 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3754 ('single_socket', "socknal option: only use one socket instead of bundle",
3756 ('failover',"""Used to shut down without saving state.
3757 This will allow this node to "give up" a service to a
3758 another node for failover purposes. This will not
3759 be a clean shutdown.""",
3761 ('gdb', """Prints message after creating gdb module script
3762 and sleeps for 5 seconds."""),
3763 ('noexec,n', """Prints the commands and steps that will be run for a
3764 config without executing them. This can used to check if a
3765 config file is doing what it should be doing"""),
3766 ('nomod', "Skip load/unload module step."),
3767 ('nosetup', "Skip device setup/cleanup step."),
3768 ('reformat', "Reformat all devices (without question)"),
3769 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3770 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3771 ('clientoptions', "Additional options for Lustre", PARAM),
3772 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3774 ('write_conf', "Save all the client config information on mds."),
3775 ('record', "Write config information on mds."),
3776 ('record_log', "Name of config record log.", PARAM),
3777 ('record_device', "MDS device name that will record the config commands",
3779 ('root_squash', "MDS squash root to appointed uid",
3781 ('no_root_squash', "Don't squash root for appointed nid",
3783 ('minlevel', "Minimum level of services to configure/cleanup",
3785 ('maxlevel', """Maximum level of services to configure/cleanup
3786 Levels are aproximatly like:
3791 70 - mountpoint, echo_client, osc, mdc, lov""",
3793 ('lustre', """Base directory of lustre sources. This parameter will
3794 cause lconf to load modules from a source tree.""", PARAM),
3795 ('portals', """Portals source directory. If this is a relative path,
3796 then it is assumed to be relative to lustre. """, PARAM),
3797 ('timeout', "Set recovery timeout", INTPARAM),
3798 ('upcall', "Set both portals and lustre upcall script", PARAM),
3799 ('lustre_upcall', "Set lustre upcall script", PARAM),
3800 ('portals_upcall', "Set portals upcall script", PARAM),
3801 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3802 ('ptldebug', "Set the portals debug level", PARAM),
3803 ('subsystem', "Set the portals debug subsystem", PARAM),
3804 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3805 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3806 # Client recovery options
3807 ('recover', "Recover a device"),
3808 ('group', "The group of devices to configure or cleanup", PARAM),
3809 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3810 ('client_uuid', "The failed client (required for recovery)", PARAM),
3811 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3813 ('inactive', """The name of an inactive service, to be ignored during
3814 mounting (currently OST-only). Can be repeated.""",
3819 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3821 # in the upcall this is set to SIG_IGN
3822 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3824 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3826 config, args = cl.parse(sys.argv[1:])
3827 except Lustre.OptionError, e:
3831 setupModulePath(sys.argv[0])
3833 host = socket.gethostname()
3835 # the PRNG is normally seeded with time(), which is not so good for starting
3836 # time-synchronized clusters
3837 input = open('/dev/urandom', 'r')
3839 print 'Unable to open /dev/urandom!'
3841 seed = input.read(32)
3847 init_select(config.select)
3850 # allow config to be fetched via HTTP, but only with python2
3851 if sys.version[0] != '1' and args[0].startswith('http://'):
3854 config_file = urllib2.urlopen(args[0])
3855 except (urllib2.URLError, socket.error), err:
3856 if hasattr(err, 'args'):
3858 print "Could not access '%s': %s" %(args[0], err)
3860 elif not os.access(args[0], os.R_OK):
3861 print 'File not found or readable:', args[0]
3865 config_file = open(args[0], 'r')
3867 dom = xml.dom.minidom.parse(config_file)
3869 panic("%s does not appear to be a config file." % (args[0]))
3870 sys.exit(1) # make sure to die here, even in debug mode.
3872 CONFIG_FILE = args[0]
3873 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3874 if not config.config:
3875 config.config = os.path.basename(args[0])# use full path?
3876 if config.config[-4:] == '.xml':
3877 config.config = config.config[:-4]
3878 elif config.ldapurl:
3879 if not config.config:
3880 panic("--ldapurl requires --config name")
3881 dn = "config=%s,fs=lustre" % (config.config)
3882 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3883 elif config.ptldebug or config.subsystem:
3884 sys_set_ptldebug(None)
3885 sys_set_subsystem(None)
3888 print 'Missing config file or ldap URL.'
3889 print 'see lconf --help for command summary'
3892 toplustreDB = lustreDB
3894 ver = lustreDB.get_version()
3896 panic("No version found in config data, please recreate.")
3897 if ver != Lustre.CONFIG_VERSION:
3898 panic("Config version", ver, "does not match lconf version",
3899 Lustre.CONFIG_VERSION)
3903 node_list.append(config.node)
3906 node_list.append(host)
3907 node_list.append('localhost')
3909 debug("configuring for host: ", node_list)
3912 config.debug_path = config.debug_path + '-' + host
3913 config.gdb_script = config.gdb_script + '-' + host
3915 lctl = LCTLInterface('lctl')
3917 if config.lctl_dump:
3918 lctl.use_save_file(config.lctl_dump)
3921 if not (config.record_device and config.record_log):
3922 panic("When recording, both --record_log and --record_device must be specified.")
3923 lctl.clear_log(config.record_device, config.record_log)
3924 lctl.record(config.record_device, config.record_log)
3926 # init module manager
3927 mod_manager = kmod_manager(config.lustre, config.portals)
3929 doHost(lustreDB, node_list)
3931 if not config.record:
3936 process_updates(lustreDB, config.record_device, config.record_log)
3938 if __name__ == "__main__":
3941 except Lustre.LconfError, e:
3943 # traceback.print_exc(file=sys.stdout)
3945 except CommandError, e:
3949 if first_cleanup_error:
3950 sys.exit(first_cleanup_error)