3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = '../portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
93 "console" : (1 << 25),
99 "undefined" : (1 << 0),
109 "portals" : (1 << 10),
111 "pinger" : (1 << 12),
112 "filter" : (1 << 13),
117 "ptlrouter" : (1 << 18),
121 "confobd" : (1 << 22),
128 first_cleanup_error = 0
129 def cleanup_error(rc):
130 global first_cleanup_error
131 if not first_cleanup_error:
132 first_cleanup_error = rc
134 # ============================================================
135 # debugging and error funcs
137 def fixme(msg = "this feature"):
138 raise Lustre.LconfError, msg + ' not implemented yet.'
141 msg = string.join(map(str,args))
142 if not config.noexec:
143 raise Lustre.LconfError(msg)
148 msg = string.join(map(str,args))
153 print string.strip(s)
157 msg = string.join(map(str,args))
160 # ack, python's builtin int() does not support '0x123' syntax.
161 # eval can do it, although what a hack!
165 return eval(s, {}, {})
168 except SyntaxError, e:
169 raise ValueError("not a number")
171 raise ValueError("not a number")
173 # ============================================================
174 # locally defined exceptions
175 class CommandError (exceptions.Exception):
176 def __init__(self, cmd_name, cmd_err, rc=None):
177 self.cmd_name = cmd_name
178 self.cmd_err = cmd_err
183 if type(self.cmd_err) == types.StringType:
185 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
187 print "! %s: %s" % (self.cmd_name, self.cmd_err)
188 elif type(self.cmd_err) == types.ListType:
190 print "! %s (error %d):" % (self.cmd_name, self.rc)
192 print "! %s:" % (self.cmd_name)
193 for s in self.cmd_err:
194 print "> %s" %(string.strip(s))
199 # ============================================================
200 # handle daemons, like the acceptor
202 """ Manage starting and stopping a daemon. Assumes daemon manages
203 it's own pid file. """
205 def __init__(self, cmd):
211 log(self.command, "already running.")
213 self.path = find_prog(self.command)
215 panic(self.command, "not found.")
216 ret, out = runcmd(self.path +' '+ self.command_line())
218 raise CommandError(self.path, out, ret)
222 pid = self.read_pidfile()
225 log ("killing process", pid)
228 log("was unable to find pid of " + self.command)
230 log("unable to kill", self.command, e)
231 time.sleep(5) # let daemon die
233 log("unable to kill", self.command)
236 pid = self.read_pidfile()
242 log("was unable to find pid of " + self.command)
249 def read_pidfile(self):
251 fp = open(self.pidfile(), 'r')
261 def clean_pidfile(self):
262 """ Remove a stale pidfile """
263 log("removing stale pidfile:", self.pidfile())
265 os.unlink(self.pidfile())
267 log(self.pidfile(), e)
269 class AcceptorHandler(DaemonHandler):
270 def __init__(self, port, net_type):
271 DaemonHandler.__init__(self, "acceptor")
276 return "/var/run/%s-%d.pid" % (self.command, self.port)
278 def command_line(self):
279 return string.join(map(str,(self.flags, self.port)))
283 # start the acceptors
285 if config.lctl_dump or config.record:
287 for port in acceptors.keys():
288 daemon = acceptors[port]
289 if not daemon.running():
292 def run_one_acceptor(port):
293 if config.lctl_dump or config.record:
295 if acceptors.has_key(port):
296 daemon = acceptors[port]
297 if not daemon.running():
300 panic("run_one_acceptor: No acceptor defined for port:", port)
302 def stop_acceptor(port):
303 if acceptors.has_key(port):
304 daemon = acceptors[port]
309 # ============================================================
310 # handle lctl interface
313 Manage communication with lctl
316 def __init__(self, cmd):
318 Initialize close by finding the lctl binary.
320 self.lctl = find_prog(cmd)
322 self.record_device = ''
325 debug('! lctl not found')
328 raise CommandError('lctl', "unable to find lctl binary.")
330 def use_save_file(self, file):
331 self.save_file = file
333 def record(self, dev_name, logname):
334 log("Recording log", logname, "on", dev_name)
335 self.record_device = dev_name
336 self.record_log = logname
338 def end_record(self):
339 log("End recording log", self.record_log, "on", self.record_device)
340 self.record_device = None
341 self.record_log = None
343 def set_nonblock(self, fd):
344 fl = fcntl.fcntl(fd, F_GETFL)
345 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
350 the cmds are written to stdin of lctl
351 lctl doesn't return errors when run in script mode, so
353 should modify command line to accept multiple commands, or
354 create complex command line options
358 cmds = '\n dump ' + self.save_file + '\n' + cmds
359 elif self.record_device:
363 %s""" % (self.record_device, self.record_log, cmds)
365 debug("+", cmd_line, cmds)
366 if config.noexec: return (0, [])
368 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
369 child.tochild.write(cmds + "\n")
370 child.tochild.close()
371 # print "LCTL:", cmds
373 # From "Python Cookbook" from O'Reilly
374 outfile = child.fromchild
375 outfd = outfile.fileno()
376 self.set_nonblock(outfd)
377 errfile = child.childerr
378 errfd = errfile.fileno()
379 self.set_nonblock(errfd)
381 outdata = errdata = ''
384 ready = select.select([outfd,errfd],[],[]) # Wait for input
385 if outfd in ready[0]:
386 outchunk = outfile.read()
387 if outchunk == '': outeof = 1
388 outdata = outdata + outchunk
389 if errfd in ready[0]:
390 errchunk = errfile.read()
391 if errchunk == '': erreof = 1
392 errdata = errdata + errchunk
393 if outeof and erreof: break
394 # end of "borrowed" code
397 if os.WIFEXITED(ret):
398 rc = os.WEXITSTATUS(ret)
401 if rc or len(errdata):
402 raise CommandError(self.lctl, errdata, rc)
405 def runcmd(self, *args):
407 run lctl using the command line
409 cmd = string.join(map(str,args))
410 debug("+", self.lctl, cmd)
411 rc, out = run(self.lctl, cmd)
413 raise CommandError(self.lctl, out, rc)
416 def clear_log(self, dev, log):
417 """ clear an existing log """
422 quit """ % (dev, log)
425 def root_squash(self, name, uid, nid):
429 quit""" % (name, uid, nid)
432 def network(self, net, nid):
437 quit """ % (net, nid)
441 def add_interface(self, net, ip, netmask = ""):
442 """ add an interface """
446 quit """ % (net, ip, netmask)
449 # delete an interface
450 def del_interface(self, net, ip):
451 """ delete an interface """
458 # create a new connection
459 def add_uuid(self, net_type, uuid, nid):
460 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
463 def add_peer(self, net_type, nid, hostaddr, port):
464 if net_type in ('tcp','openib','ra') and not config.lctl_dump:
469 nid, hostaddr, port )
471 elif net_type in ('iib',) and not config.lctl_dump:
478 elif net_type in ('vib',) and not config.lctl_dump:
486 def connect(self, srv):
487 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
488 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
490 hostaddr = string.split(srv.hostaddr[0], '/')[0]
491 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
494 def recover(self, dev_name, new_conn):
497 recover %s""" %(dev_name, new_conn)
500 # add a route to a range
501 def add_route(self, net, gw, lo, hi):
509 except CommandError, e:
513 def del_route(self, net, gw, lo, hi):
518 quit """ % (net, gw, lo, hi)
521 # add a route to a host
522 def add_route_host(self, net, uuid, gw, tgt):
523 self.add_uuid(net, uuid, tgt)
531 except CommandError, e:
535 # add a route to a range
536 def del_route_host(self, net, uuid, gw, tgt):
542 quit """ % (net, gw, tgt)
546 def del_peer(self, net_type, nid, hostaddr):
547 if net_type in ('tcp',) and not config.lctl_dump:
551 del_peer %s %s single_share
555 elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump:
559 del_peer %s single_share
564 # disconnect one connection
565 def disconnect(self, srv):
566 self.del_uuid(srv.nid_uuid)
567 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
569 hostaddr = string.split(srv.hostaddr[0], '/')[0]
570 self.del_peer(srv.net_type, srv.nid, hostaddr)
572 def del_uuid(self, uuid):
580 def disconnectAll(self, net):
588 def attach(self, type, name, uuid):
591 quit""" % (type, name, uuid)
594 def detach(self, name):
601 def set_security(self, name, key, value):
605 quit""" % (name, key, value)
608 def setup(self, name, setup = ""):
612 quit""" % (name, setup)
615 def add_conn(self, name, conn_uuid):
619 quit""" % (name, conn_uuid)
622 def start(self, name, conf_name):
626 quit""" % (name, conf_name)
629 # create a new device with lctl
630 def newdev(self, type, name, uuid, setup = ""):
632 self.attach(type, name, uuid);
634 self.setup(name, setup)
635 except CommandError, e:
636 self.cleanup(name, uuid, 0)
640 def cleanup(self, name, uuid, force, failover = 0):
641 if failover: force = 1
647 quit""" % (name, ('', 'force')[force],
648 ('', 'failover')[failover])
652 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
653 stripe_sz, stripe_off, pattern):
656 lov_setup %s %d %d %d %s
657 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off, pattern)
660 # add an OSC to a LOV
661 def lov_add_osc(self, name, ost_uuid, index, gen):
663 lov_modify_tgts add %s %s %s %s
664 quit""" % (name, ost_uuid, index, gen)
668 def lmv_setup(self, name, uuid, desc_uuid, devlist):
672 quit""" % (name, uuid, desc_uuid, devlist)
675 # delete an OSC from a LOV
676 def lov_del_osc(self, name, ost_uuid, index, gen):
678 lov_modify_tgts del %s %s %s %s
679 quit""" % (name, ost_uuid, index, gen)
683 def deactivate(self, name):
691 def dump(self, dump_file):
694 quit""" % (dump_file)
697 # get list of devices
698 def device_list(self):
699 devices = '/proc/fs/lustre/devices'
701 if os.access(devices, os.R_OK):
703 fp = open(devices, 'r')
711 def lustre_version(self):
712 rc, out = self.runcmd('version')
716 def mount_option(self, profile, osc, mdc, gkc):
718 mount_option %s %s %s %s
719 quit""" % (profile, osc, mdc, gkc)
722 # delete mount options
723 def del_mount_option(self, profile):
729 def set_timeout(self, timeout):
735 def set_lustre_upcall(self, upcall):
740 # ============================================================
741 # Various system-level functions
742 # (ideally moved to their own module)
744 # Run a command and return the output and status.
745 # stderr is sent to /dev/null, could use popen3 to
746 # save it if necessary
749 if config.noexec: return (0, [])
750 f = os.popen(cmd + ' 2>&1')
760 cmd = string.join(map(str,args))
763 # Run a command in the background.
764 def run_daemon(*args):
765 cmd = string.join(map(str,args))
767 if config.noexec: return 0
768 f = os.popen(cmd + ' 2>&1')
776 # Determine full path to use for an external command
777 # searches dirname(argv[0]) first, then PATH
779 syspath = string.split(os.environ['PATH'], ':')
780 cmdpath = os.path.dirname(sys.argv[0])
781 syspath.insert(0, cmdpath);
783 syspath.insert(0, os.path.join(config.portals, 'utils/'))
785 prog = os.path.join(d,cmd)
786 if os.access(prog, os.X_OK):
790 # Recursively look for file starting at base dir
791 def do_find_file(base, mod):
792 fullname = os.path.join(base, mod)
793 if os.access(fullname, os.R_OK):
795 for d in os.listdir(base):
796 dir = os.path.join(base,d)
797 if os.path.isdir(dir):
798 module = do_find_file(dir, mod)
802 # is the path a block device?
809 return stat.S_ISBLK(s[stat.ST_MODE])
811 # find the journal device from mkfs options
817 while i < len(x) - 1:
818 if x[i] == '-J' and x[i+1].startswith('device='):
824 # build fs according to type
826 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
832 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
834 # devsize is in 1k, and fs block count is in 4k
835 block_cnt = devsize/4
837 if fstype in ('ext3', 'extN', 'ldiskfs'):
838 # ext3 journal size is in megabytes
839 # but don't set jsize if mkfsoptions indicates a separate journal device
840 if jsize == 0 and jdev(mkfsoptions) == '':
842 if not is_block(dev):
843 ret, out = runcmd("ls -l %s" %dev)
844 devsize = int(string.split(out[0])[4]) / 1024
846 # sfdisk works for symlink, hardlink, and realdev
847 ret, out = runcmd("sfdisk -s %s" %dev)
849 devsize = int(out[0])
851 # sfdisk -s will fail for too large block device,
852 # then, read the size of partition from /proc/partitions
854 # get the realpath of the device
855 # it may be the real device, such as /dev/hda7
856 # or the hardlink created via mknod for a device
857 if 'realpath' in dir(os.path):
858 real_dev = os.path.realpath(dev)
862 while os.path.islink(real_dev) and (link_count < 20):
863 link_count = link_count + 1
864 dev_link = os.readlink(real_dev)
865 if os.path.isabs(dev_link):
868 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
870 panic("Entountered too many symbolic links resolving block device:", dev)
872 # get the major and minor number of the realpath via ls
873 # it seems python(os.stat) does not return
874 # the st_rdev member of the stat structure
875 ret, out = runcmd("ls -l %s" %real_dev)
876 major = string.split(string.split(out[0])[4], ",")[0]
877 minor = string.split(out[0])[5]
879 # get the devsize from /proc/partitions with the major and minor number
880 ret, out = runcmd("cat /proc/partitions")
883 if string.split(line)[0] == major and string.split(line)[1] == minor:
884 devsize = int(string.split(line)[2])
887 if devsize > 1024 * 1024:
888 jsize = ((devsize / 102400) * 4)
891 if jsize: jopt = "-J size=%d" %(jsize,)
892 if isize: iopt = "-I %d" %(isize,)
893 mkfs = 'mkfs.ext2 -j -b 4096 '
894 if not isblock or config.force:
896 if jdev(mkfsoptions) != '':
897 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
899 jmkfs = jmkfs + '-F '
900 jmkfs = jmkfs + jdev(mkfsoptions)
901 (ret, out) = run (jmkfs)
903 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
904 elif fstype == 'reiserfs':
905 # reiserfs journal size is in blocks
906 if jsize: jopt = "--journal_size %d" %(jsize,)
907 mkfs = 'mkreiserfs -ff'
909 panic('unsupported fs type: ', fstype)
911 if config.mkfsoptions != None:
912 mkfs = mkfs + ' ' + config.mkfsoptions
913 if mkfsoptions != None:
914 mkfs = mkfs + ' ' + mkfsoptions
915 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
917 panic("Unable to build fs:", dev, string.join(out))
918 # enable hash tree indexing on fsswe
919 if fstype in ('ext3', 'extN', 'ldiskfs'):
920 htree = 'echo "feature FEATURE_C5" | debugfs -w'
921 (ret, out) = run (htree, dev)
923 panic("Unable to enable htree:", dev)
925 # some systems use /dev/loopN, some /dev/loop/N
929 if not os.access(loop + str(0), os.R_OK):
931 if not os.access(loop + str(0), os.R_OK):
932 panic ("can't access loop devices")
935 # find loop device assigned to the file
936 def find_assigned_loop(file):
938 for n in xrange(0, MAX_LOOP_DEVICES):
940 if os.access(dev, os.R_OK):
941 (stat, out) = run('losetup', dev)
942 if out and stat == 0:
943 m = re.search(r'\((.*)\)', out[0])
944 if m and file == m.group(1):
948 # find free loop device
949 def find_free_loop(file):
952 # find next free loop
953 for n in xrange(0, MAX_LOOP_DEVICES):
955 if os.access(dev, os.R_OK):
956 (stat, out) = run('losetup', dev)
961 # create file if necessary and assign the first free loop device
962 def init_loop(file, size, fstype, journal_size, inode_size,
963 mkfsoptions, reformat, autoformat, backfstype, backfile):
966 realfstype = backfstype
967 if is_block(backfile):
968 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
969 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
975 dev = find_assigned_loop(realfile)
977 print 'WARNING: file', realfile, 'already mapped to', dev
980 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
981 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
983 panic("Unable to create backing store:", realfile)
984 mkfs(realfile, size, realfstype, journal_size, inode_size,
985 mkfsoptions, isblock=0)
987 dev = find_free_loop(realfile)
989 print "attach " + realfile + " <-> " + dev
990 run('losetup', dev, realfile)
993 print "out of loop devices"
996 # undo loop assignment
997 def clean_loop(dev, fstype, backfstype, backdev):
1002 if not is_block(realfile):
1003 dev = find_assigned_loop(realfile)
1005 print "detach " + dev + " <-> " + realfile
1006 ret, out = run('losetup -d', dev)
1008 log('unable to clean loop device', dev, 'for file', realfile)
1011 # finilizes passed device
1012 def clean_dev(dev, fstype, backfstype, backdev):
1013 if fstype == 'smfs' or not is_block(dev):
1014 clean_loop(dev, fstype, backfstype, backdev)
1016 # determine if dev is formatted as a <fstype> filesystem
1017 def need_format(fstype, dev):
1018 # FIXME don't know how to implement this
1021 # initialize a block device if needed
1022 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
1023 inode_size, mkfsoptions, backfstype, backdev):
1027 if fstype == 'smfs' or not is_block(dev):
1028 dev = init_loop(dev, size, fstype, journal_size, inode_size,
1029 mkfsoptions, reformat, autoformat, backfstype, backdev)
1030 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
1031 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
1034 # panic("device:", dev,
1035 # "not prepared, and autoformat is not set.\n",
1036 # "Rerun with --reformat option to format ALL filesystems")
1041 """lookup IP address for an interface"""
1042 rc, out = run("/sbin/ifconfig", iface)
1045 addr = string.split(out[1])[1]
1046 ip = string.split(addr, ':')[1]
1049 def def_mount_options(fstype, target):
1050 """returns deafult mount options for passed fstype and target (mds, ost)"""
1051 if fstype == 'ext3' or fstype == 'ldiskfs':
1052 mountfsoptions = "errors=remount-ro"
1053 if target == 'ost' and sys_get_branch() == '2.4':
1054 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1055 if target == 'ost' and sys_get_branch() == '2.6':
1056 mountfsoptions = "%s,extents,mballoc" % (mountfsoptions)
1057 return mountfsoptions
1060 def sys_get_elan_position_file():
1061 procfiles = ["/proc/elan/device0/position",
1062 "/proc/qsnet/elan4/device0/position",
1063 "/proc/qsnet/elan3/device0/position"]
1065 if os.access(p, os.R_OK):
1069 def sys_get_local_nid(net_type, wildcard, cluster_id):
1070 """Return the local nid."""
1072 if sys_get_elan_position_file():
1073 local = sys_get_local_address('elan', '*', cluster_id)
1075 local = sys_get_local_address(net_type, wildcard, cluster_id)
1078 def sys_get_local_address(net_type, wildcard, cluster_id):
1079 """Return the local address for the network type."""
1081 if net_type in ('tcp','openib','iib','vib','ra'):
1083 iface, star = string.split(wildcard, ':')
1084 local = if2addr(iface)
1086 panic ("unable to determine ip for:", wildcard)
1088 host = socket.gethostname()
1089 local = socket.gethostbyname(host)
1090 elif net_type == 'elan':
1091 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1092 f = sys_get_elan_position_file()
1094 panic ("unable to determine local Elan ID")
1097 lines = fp.readlines()
1101 if a[0] == 'NodeId':
1105 nid = my_int(cluster_id) + my_int(elan_id)
1106 local = "%d" % (nid)
1107 except ValueError, e:
1111 elif net_type == 'lo':
1112 fixme("automatic local address for loopback")
1113 elif net_type == 'gm':
1114 fixme("automatic local address for GM")
1118 def sys_get_branch():
1119 """Returns kernel release"""
1121 fp = open('/proc/sys/kernel/osrelease')
1122 lines = fp.readlines()
1126 version = string.split(l)
1127 a = string.split(version[0], '.')
1128 return a[0] + '.' + a[1]
1133 # XXX: instead of device_list, ask for $name and see what we get
1134 def is_prepared(name):
1135 """Return true if a device exists for the name"""
1136 if config.lctl_dump:
1138 if (config.noexec or config.record) and config.cleanup:
1141 # expect this format:
1142 # 1 UP ldlm ldlm ldlm_UUID 2
1143 out = lctl.device_list()
1145 if name == string.split(s)[3]:
1147 except CommandError, e:
1151 def net_is_prepared():
1152 """If the any device exists, then assume that all networking
1153 has been configured"""
1154 out = lctl.device_list()
1157 def fs_is_mounted(path):
1158 """Return true if path is a mounted lustre filesystem"""
1160 fp = open('/proc/mounts')
1161 lines = fp.readlines()
1165 if a[1] == path and a[2] == 'lustre_lite':
1171 def kmod_find(src_dir, dev_dir, modname):
1172 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1173 for modext in '.ko', '.o':
1174 module = modbase + modext
1176 if os.access(module, os.R_OK):
1182 def kmod_info(modname):
1183 """Returns reference count for passed module name."""
1185 fp = open('/proc/modules')
1186 lines = fp.readlines()
1189 # please forgive my tired fingers for this one
1190 ret = filter(lambda word, mod = modname: word[0] == mod,
1191 map(lambda line: string.split(line), lines))
1195 except Exception, e:
1199 """Presents kernel module"""
1200 def __init__(self, src_dir, dev_dir, name):
1201 self.src_dir = src_dir
1202 self.dev_dir = dev_dir
1205 # FIXME we ignore the failure of loading gss module, because we might
1206 # don't need it at all.
1209 log ('loading module:', self.name, 'srcdir',
1210 self.src_dir, 'devdir', self.dev_dir)
1212 module = kmod_find(self.src_dir, self.dev_dir,
1214 if not module and self.name != 'ptlrpcs_gss':
1215 panic('module not found:', self.name)
1216 (rc, out) = run('/sbin/insmod', module)
1218 if self.name == 'ptlrpcs_gss':
1219 print "Warning: not support gss security!"
1221 raise CommandError('insmod', out, rc)
1223 (rc, out) = run('/sbin/modprobe', self.name)
1225 if self.name == 'ptlrpcs_gss':
1226 print "Warning: not support gss security!"
1228 raise CommandError('modprobe', out, rc)
1232 log('unloading module:', self.name)
1233 (rc, out) = run('/sbin/rmmod', self.name)
1235 log('unable to unload module:', self.name +
1236 "(" + self.refcount() + ")")
1240 """Returns module info if any."""
1241 return kmod_info(self.name)
1244 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1251 """Returns module refcount."""
1258 """Returns 1 if module is used, otherwise 0 is returned."""
1264 if users and users != '(unused)' and users != '-':
1272 """Returns 1 if module is busy, otherwise 0 is returned."""
1273 if self.loaded() and (self.used() or self.refcount() != '0'):
1279 """Manage kernel modules"""
1280 def __init__(self, lustre_dir, portals_dir):
1281 self.lustre_dir = lustre_dir
1282 self.portals_dir = portals_dir
1283 self.kmodule_list = []
1285 def find_module(self, modname):
1286 """Find module by module name"""
1287 for mod in self.kmodule_list:
1288 if mod.name == modname:
1292 def add_portals_module(self, dev_dir, modname):
1293 """Append a module to list of modules to load."""
1295 mod = self.find_module(modname)
1297 mod = kmod(self.portals_dir, dev_dir, modname)
1298 self.kmodule_list.append(mod)
1300 def add_lustre_module(self, dev_dir, modname):
1301 """Append a module to list of modules to load."""
1303 mod = self.find_module(modname)
1305 mod = kmod(self.lustre_dir, dev_dir, modname)
1306 self.kmodule_list.append(mod)
1308 def load_modules(self):
1309 """Load all the modules in the list in the order they appear."""
1310 for mod in self.kmodule_list:
1311 if mod.loaded() and not config.noexec:
1315 def cleanup_modules(self):
1316 """Unload the modules in the list in reverse order."""
1317 rev = self.kmodule_list
1320 if (not mod.loaded() or mod.busy()) and not config.noexec:
1323 if mod.name == 'portals' and config.dump:
1324 lctl.dump(config.dump)
1327 # ============================================================
1328 # Classes to prepare and cleanup the various objects
1331 """ Base class for the rest of the modules. The default cleanup method is
1332 defined here, as well as some utilitiy funcs.
1334 def __init__(self, module_name, db):
1336 self.module_name = module_name
1337 self.name = self.db.getName()
1338 self.uuid = self.db.getUUID()
1342 def info(self, *args):
1343 msg = string.join(map(str,args))
1344 print self.module_name + ":", self.name, self.uuid, msg
1347 """ default cleanup, used for most modules """
1350 lctl.cleanup(self.name, self.uuid, config.force)
1351 except CommandError, e:
1352 log(self.module_name, "cleanup failed: ", self.name)
1356 def add_module(self, manager):
1357 """Adds all needed modules in the order they appear."""
1360 def safe_to_clean(self):
1363 def safe_to_clean_modules(self):
1364 return self.safe_to_clean()
1366 class Network(Module):
1367 def __init__(self,db):
1368 Module.__init__(self, 'NETWORK', db)
1369 self.net_type = self.db.get_val('nettype')
1370 self.nid = self.db.get_val('nid', '*')
1371 self.cluster_id = self.db.get_val('clusterid', "0")
1372 self.port = self.db.get_val_int('port', 0)
1375 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1377 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1378 self.generic_nid = 1
1379 debug("nid:", self.nid)
1381 self.generic_nid = 0
1383 self.nid_uuid = self.nid_to_uuid(self.nid)
1384 self.hostaddr = self.db.get_hostaddr()
1385 if len(self.hostaddr) == 0:
1386 self.hostaddr.append(self.nid)
1387 if '*' in self.hostaddr[0]:
1388 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1389 if not self.hostaddr[0]:
1390 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1391 debug("hostaddr:", self.hostaddr[0])
1393 def add_module(self, manager):
1394 manager.add_portals_module("libcfs", 'libcfs')
1395 manager.add_portals_module("portals", 'portals')
1397 if node_needs_router():
1398 manager.add_portals_module("router", 'kptlrouter')
1399 if self.net_type == 'tcp':
1400 manager.add_portals_module("knals/socknal", 'ksocknal')
1401 if self.net_type == 'elan':
1402 manager.add_portals_module("knals/qswnal", 'kqswnal')
1403 if self.net_type == 'gm':
1404 manager.add_portals_module("knals/gmnal", 'kgmnal')
1405 if self.net_type == 'openib':
1406 manager.add_portals_module("knals/openibnal", 'kopenibnal')
1407 if self.net_type == 'iib':
1408 manager.add_portals_module("knals/iibnal", 'kiibnal')
1409 if self.net_type == 'vib':
1410 self.add_portals_module("knals/vibnal", 'kvibnal')
1411 if self.net_type == 'lo':
1412 manager.add_portals_module("knals/lonal", 'klonal')
1413 if self.net_type == 'ra':
1414 manager.add_portals_module("knals/ranal", 'kranal')
1416 def nid_to_uuid(self, nid):
1417 return "NID_%s_UUID" %(nid,)
1420 if not config.record and net_is_prepared():
1422 self.info(self.net_type, self.nid, self.port)
1423 if not (config.record and self.generic_nid):
1424 lctl.network(self.net_type, self.nid)
1425 if self.net_type == 'tcp':
1427 for hostaddr in self.db.get_hostaddr():
1428 ip = string.split(hostaddr, '/')[0]
1429 if len(string.split(hostaddr, '/')) == 2:
1430 netmask = string.split(hostaddr, '/')[1]
1433 lctl.add_interface(self.net_type, ip, netmask)
1434 if self.net_type == 'elan':
1436 if self.port and node_is_router():
1437 run_one_acceptor(self.port)
1438 self.connect_peer_gateways()
1440 def connect_peer_gateways(self):
1441 for router in self.db.lookup_class('node'):
1442 if router.get_val_int('router', 0):
1443 for netuuid in router.get_networks():
1444 net = self.db.lookup(netuuid)
1446 if (gw.cluster_id == self.cluster_id and
1447 gw.net_type == self.net_type):
1448 if gw.nid != self.nid:
1451 def disconnect_peer_gateways(self):
1452 for router in self.db.lookup_class('node'):
1453 if router.get_val_int('router', 0):
1454 for netuuid in router.get_networks():
1455 net = self.db.lookup(netuuid)
1457 if (gw.cluster_id == self.cluster_id and
1458 gw.net_type == self.net_type):
1459 if gw.nid != self.nid:
1462 except CommandError, e:
1463 print "disconnect failed: ", self.name
1467 def safe_to_clean(self):
1468 return not net_is_prepared()
1471 self.info(self.net_type, self.nid, self.port)
1473 stop_acceptor(self.port)
1474 if node_is_router():
1475 self.disconnect_peer_gateways()
1476 if self.net_type == 'tcp':
1477 for hostaddr in self.db.get_hostaddr():
1478 ip = string.split(hostaddr, '/')[0]
1479 lctl.del_interface(self.net_type, ip)
1481 def correct_level(self, level, op=None):
1484 class RouteTable(Module):
1485 def __init__(self,db):
1486 Module.__init__(self, 'ROUTES', db)
1488 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1490 # only setup connections for tcp, openib, and iib NALs
1492 if not net_type in ('tcp','openib','iib','vib','ra'):
1495 # connect to target if route is to single node and this node is the gw
1496 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1497 if not local_cluster(net_type, tgt_cluster_id):
1498 panic("target", lo, " not on the local cluster")
1499 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1500 # connect to gateway if this node is not the gw
1501 elif (local_cluster(net_type, gw_cluster_id)
1502 and not local_interface(net_type, gw_cluster_id, gw)):
1503 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1508 panic("no server for nid", lo)
1511 return Network(srvdb)
1514 if not config.record and net_is_prepared():
1517 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1518 lctl.add_route(net_type, gw, lo, hi)
1519 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1523 def safe_to_clean(self):
1524 return not net_is_prepared()
1527 if net_is_prepared():
1528 # the network is still being used, don't clean it up
1530 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1531 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1534 lctl.disconnect(srv)
1535 except CommandError, e:
1536 print "disconnect failed: ", self.name
1541 lctl.del_route(net_type, gw, lo, hi)
1542 except CommandError, e:
1543 print "del_route failed: ", self.name
1547 class Management(Module):
1548 def __init__(self, db):
1549 Module.__init__(self, 'MGMT', db)
1551 def add_module(self, manager):
1552 manager.add_lustre_module('lvfs', 'lvfs')
1553 manager.add_lustre_module('obdclass', 'obdclass')
1554 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1555 manager.add_lustre_module('mgmt', 'mgmt_svc')
1558 if not config.record and is_prepared(self.name):
1561 lctl.newdev("mgmt", self.name, self.uuid)
1563 def safe_to_clean(self):
1567 if is_prepared(self.name):
1568 Module.cleanup(self)
1570 def correct_level(self, level, op=None):
1573 # This is only needed to load the modules; the LDLM device
1574 # is now created automatically.
1576 def __init__(self,db):
1577 Module.__init__(self, 'LDLM', db)
1579 def add_module(self, manager):
1580 manager.add_lustre_module('lvfs', 'lvfs')
1581 manager.add_lustre_module('obdclass', 'obdclass')
1582 manager.add_lustre_module('sec', 'ptlrpcs')
1583 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1584 manager.add_lustre_module('sec/gss', 'ptlrpcs_gss')
1592 def correct_level(self, level, op=None):
1596 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1597 Module.__init__(self, 'LOV', db)
1598 if name_override != None:
1599 self.name = "lov_%s" % name_override
1600 self.mds_uuid = self.db.get_first_ref('mds')
1601 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1602 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1603 self.pattern = self.db.get_val_int('stripepattern', 0)
1604 self.devlist = self.db.get_lov_tgts('lov_tgt')
1605 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1608 self.desc_uuid = self.uuid
1609 self.uuid = generate_client_uuid(self.name)
1610 self.fs_name = fs_name
1612 self.config_only = 1
1614 self.config_only = None
1615 mds = self.db.lookup(self.mds_uuid)
1616 self.mds_name = mds.getName()
1617 for (obd_uuid, index, gen, active) in self.devlist:
1620 self.obdlist.append(obd_uuid)
1621 obd = self.db.lookup(obd_uuid)
1622 osc = get_osc(obd, self.uuid, fs_name)
1624 self.osclist.append((osc, index, gen, active))
1626 panic('osc not found:', obd_uuid)
1635 if not config.record and is_prepared(self.name):
1637 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1638 self.stripe_off, self.pattern, self.devlist,
1640 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1641 self.stripe_sz, self.stripe_off, self.pattern)
1642 for (osc, index, gen, active) in self.osclist:
1643 target_uuid = osc.target_uuid
1645 # Only ignore connect failures with --force, which
1646 # isn't implemented here yet.
1648 osc.prepare(ignore_connect_failure=0)
1649 except CommandError, e:
1650 print "Error preparing OSC %s\n" % osc.uuid
1652 lctl.lov_add_osc(self.name, target_uuid, index, gen)
1655 for (osc, index, gen, active) in self.osclist:
1656 target_uuid = osc.target_uuid
1658 if is_prepared(self.name):
1659 Module.cleanup(self)
1660 if self.config_only:
1661 panic("Can't clean up config_only LOV ", self.name)
1663 def add_module(self, manager):
1664 if self.config_only:
1665 panic("Can't load modules for config_only LOV ", self.name)
1666 for (osc, index, gen, active) in self.osclist:
1667 osc.add_module(manager)
1669 manager.add_lustre_module('lov', 'lov')
1671 def correct_level(self, level, op=None):
1675 def __init__(self, db, uuid, fs_name, name_override = None):
1676 Module.__init__(self, 'LMV', db)
1677 if name_override != None:
1678 self.name = "lmv_%s" % name_override
1680 self.devlist = self.db.get_lmv_tgts('lmv_tgt')
1681 if self.devlist == None:
1682 self.devlist = self.db.get_refs('mds')
1685 self.desc_uuid = self.uuid
1687 self.fs_name = fs_name
1688 for mds_uuid in self.devlist:
1689 mds = self.db.lookup(mds_uuid)
1691 panic("MDS not found!")
1692 mdc = MDC(mds, self.uuid, fs_name)
1694 self.mdclist.append(mdc)
1696 panic('mdc not found:', mds_uuid)
1699 if config.record and is_prepared(self.name):
1703 for mdc in self.mdclist:
1705 # Only ignore connect failures with --force, which
1706 # isn't implemented here yet.
1707 mdc.prepare(ignore_connect_failure=0)
1708 except CommandError, e:
1709 print "Error preparing LMV %s\n" % mdc.uuid
1712 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1713 string.join(self.devlist))
1716 for mdc in self.mdclist:
1718 if is_prepared(self.name):
1719 Module.cleanup(self)
1721 def add_module(self, manager):
1722 for mdc in self.mdclist:
1723 mdc.add_module(manager)
1725 manager.add_lustre_module('lmv', 'lmv')
1727 def correct_level(self, level, op=None):
1731 def __init__(self,db):
1732 Module.__init__(self, 'GKD', db)
1733 target_uuid = self.db.get_first_ref('target')
1734 self.target = self.db.lookup(target_uuid)
1735 self.name = self.target.getName()
1737 active_uuid = get_active_target(self.target)
1739 panic("No target device found:", target_uuid)
1740 if active_uuid == self.uuid:
1745 self.uuid = target_uuid
1748 if is_prepared(self.name):
1751 debug(self.uuid, "not active")
1755 lctl.newdev("gks", self.name, self.uuid, setup ="")
1756 if not is_prepared('GKT'):
1757 lctl.newdev("gkt", 'GKT', 'GKT_UUID', setup ="")
1761 debug(self.uuid, "not active")
1764 if is_prepared(self.name):
1766 lctl.cleanup(self.name, self.uuid, config.force,
1768 except CommandError, e:
1769 log(self.module_name, "cleanup failed: ", self.name)
1772 Module.cleanup(self)
1773 if is_prepared('GKT'):
1775 lctl.cleanup("GKT", "GKT_UUID", config.force,
1777 except CommandError, e:
1778 print "cleanup failed: ", self.name
1782 def add_module(self, manager):
1784 manager.add_lustre_module('sec/gks', 'gks')
1785 manager.add_lustre_module('sec/gks', 'gkc')
1787 def correct_level(self, level, op=None):
1790 class CONFDEV(Module):
1791 def __init__(self, db, name, target_uuid, uuid):
1792 Module.__init__(self, 'CONFDEV', db)
1793 self.devpath = self.db.get_val('devpath','')
1794 self.backdevpath = self.db.get_val('devpath','')
1795 self.size = self.db.get_val_int('devsize', 0)
1796 self.journal_size = self.db.get_val_int('journalsize', 0)
1797 self.fstype = self.db.get_val('fstype', '')
1798 self.backfstype = self.db.get_val('backfstype', '')
1799 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1800 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1801 self.target = self.db.lookup(target_uuid)
1802 self.name = "conf_%s" % self.target.getName()
1803 self.client_uuids = self.target.get_refs('client')
1804 self.fs_uuid = self.db.get_first_ref('filesystem')
1805 self.obdtype = self.db.get_val('obdtype', '')
1807 self.mds_sec = self.db.get_val('mds_sec', '')
1808 self.oss_sec = self.db.get_val('oss_sec', '')
1809 self.deny_sec = self.db.get_val('deny_sec', '')
1811 if config.mds_mds_sec:
1812 self.mds_sec = config.mds_mds_sec
1813 if config.mds_oss_sec:
1814 self.oss_sec = config.mds_oss_sec
1815 if config.mds_deny_sec:
1817 self.deny_sec = "%s,%s" %(self.deny_sec, config.mds_deny_sec)
1819 self.deny_sec = config.mds_deny_sec
1821 if self.obdtype == None:
1822 self.obdtype = 'dumb'
1824 self.conf_name = name
1825 self.conf_uuid = uuid
1826 self.realdev = self.devpath
1831 lmv_uuid = self.db.get_first_ref('lmv')
1832 if lmv_uuid != None:
1833 self.lmv = self.db.lookup(lmv_uuid)
1834 if self.lmv != None:
1835 self.client_uuids = self.lmv.get_refs('client')
1837 if self.target.get_class() == 'mds':
1838 if self.target.get_val('failover', 0):
1839 self.failover_mds = 'f'
1841 self.failover_mds = 'n'
1842 self.format = self.db.get_val('autoformat', "no")
1844 self.format = self.db.get_val('autoformat', "yes")
1845 self.osdtype = self.db.get_val('osdtype')
1846 ost = self.db.lookup(target_uuid)
1847 if ost.get_val('failover', 0):
1848 self.failover_ost = 'f'
1850 self.failover_ost = 'n'
1852 self.inode_size = self.get_inode_size()
1854 if self.lmv != None:
1855 client_uuid = self.name + "_lmv_UUID"
1856 self.master = LMV(self.lmv, client_uuid,
1857 self.conf_name, self.conf_name)
1859 def get_inode_size(self):
1860 inode_size = self.db.get_val_int('inodesize', 0)
1861 if inode_size == 0 and self.target.get_class() == 'mds':
1863 # default inode size for case when neither LOV either
1864 # LMV is accessible.
1865 self.inode_size = 256
1867 # find the LOV for this MDS
1868 lovconfig_uuid = self.target.get_first_ref('lovconfig')
1869 if lovconfig_uuid or self.lmv != None:
1870 if self.lmv != None:
1871 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1872 lovconfig = self.lmv.lookup(lovconfig_uuid)
1873 lov_uuid = lovconfig.get_first_ref('lov')
1874 if lov_uuid == None:
1875 panic(self.target.getName() + ": No LOV found for lovconfig ",
1878 lovconfig = self.target.lookup(lovconfig_uuid)
1879 lov_uuid = lovconfig.get_first_ref('lov')
1880 if lov_uuid == None:
1881 panic(self.target.getName() + ": No LOV found for lovconfig ",
1883 if self.lmv != None:
1884 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1885 lovconfig = self.lmv.lookup(lovconfig_uuid)
1886 lov_uuid = lovconfig.get_first_ref('lov')
1888 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, self.name,
1891 # default stripe count controls default inode_size
1892 if lov.stripe_cnt > 0:
1893 stripe_count = lov.stripe_cnt
1895 stripe_count = len(lov.devlist)
1896 if stripe_count > 77:
1898 elif stripe_count > 35:
1900 elif stripe_count > 13:
1902 #elif stripe_count > 3:
1909 def get_mount_options(self, blkdev):
1910 options = def_mount_options(self.fstype,
1911 self.target.get_class())
1913 if config.mountfsoptions:
1915 options = "%s,%s" %(options, config.mountfsoptions)
1917 options = config.mountfsoptions
1918 if self.mountfsoptions:
1919 options = "%s,%s" %(options, self.mountfsoptions)
1921 if self.mountfsoptions:
1923 options = "%s,%s" %(options, self.mountfsoptions)
1925 options = self.mountfsoptions
1927 if self.fstype == 'smfs':
1929 options = "%s,type=%s,dev=%s" %(options, self.backfstype,
1932 options = "type=%s,dev=%s" %(self.backfstype,
1935 if self.target.get_class() == 'mds':
1937 options = "%s,acl,user_xattr,iopen_nopriv" %(options)
1939 options = "iopen_nopriv"
1944 if is_prepared(self.name):
1947 blkdev = block_dev(self.devpath, self.size, self.fstype,
1948 config.reformat, self.format, self.journal_size,
1949 self.inode_size, self.mkfsoptions, self.backfstype,
1952 if self.fstype == 'smfs':
1957 mountfsoptions = self.get_mount_options(blkdev)
1959 self.info(self.target.get_class(), realdev, mountfsoptions,
1960 self.fstype, self.size, self.format)
1962 lctl.newdev("confobd", self.name, self.uuid,
1963 setup ="%s %s %s" %(realdev, self.fstype,
1966 self.mountfsoptions = mountfsoptions
1967 self.realdev = realdev
1969 def add_module(self, manager):
1970 manager.add_lustre_module('obdclass', 'confobd')
1972 # this method checks if current OBD belong to the same FS as passed
1973 # mount point uuid. If not - do not write mountpoint and echo client
1974 # to log, it is not needed, but take damn long time (WB test case)
1976 def belong_to_fs(self, mtpt_uuid):
1977 mtpt = self.db.lookup(mtpt_uuid)
1978 fs_uuid = mtpt.get_first_ref('filesystem')
1980 if not self.fs_uuid or self.fs_uuid == "" or fs_uuid == self.fs_uuid:
1985 def write_conf(self):
1986 if self.target.get_class() == 'ost':
1988 lctl.clear_log(self.name, self.target.getName() + '-conf')
1989 lctl.record(self.name, self.target.getName() + '-conf')
1990 lctl.newdev(self.osdtype, self.conf_name, self.conf_uuid,
1991 setup ="%s %s %s %s" %(self.realdev, self.fstype,
1993 self.mountfsoptions))
1995 lctl.clear_log(self.name, 'OSS-conf')
1996 lctl.record(self.name, 'OSS-conf')
1997 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
2002 if self.target.get_class() == 'mds':
2003 if self.master != None:
2004 master_name = self.master.name
2006 master_name = 'dumb'
2009 lctl.clear_log(self.name, self.target.getName() + '-conf')
2010 lctl.record(self.name, self.target.getName() + '-conf')
2011 lctl.attach("mds", self.conf_name, self.conf_uuid)
2013 lctl.set_security(self.conf_name, "mds_sec", self.mds_sec)
2015 lctl.set_security(self.conf_name, "oss_sec", self.oss_sec)
2017 for flavor in string.split(self.deny_sec, ','):
2018 lctl.set_security(self.conf_name, "deny_sec", flavor)
2019 lctl.newdev("mds", self.conf_name, self.conf_uuid,
2020 setup ="%s %s %s %s %s %s" %(self.realdev, self.fstype,
2021 self.conf_name, self.mountfsoptions,
2022 master_name, self.obdtype))
2026 if not self.client_uuids:
2029 for uuid in self.client_uuids:
2030 log("recording client:", uuid)
2031 client_uuid = generate_client_uuid(self.name)
2032 client = VOSC(self.db.lookup(uuid), client_uuid,
2033 self.target.getName(), self.name)
2035 lctl.clear_log(self.name, self.target.getName())
2036 lctl.record(self.name, self.target.getName())
2038 lctl.mount_option(self.target.getName(), client.get_name(), "", "")
2040 process_updates(self.db, self.name, self.target.getName(), client)
2042 lctl.clear_log(self.name, self.target.getName() + '-clean')
2043 lctl.record(self.name, self.target.getName() + '-clean')
2045 lctl.del_mount_option(self.target.getName())
2053 # record logs for each client
2055 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
2057 config_options = CONFIG_FILE
2059 for node_db in self.db.lookup_class('node'):
2060 client_name = node_db.getName()
2061 for prof_uuid in node_db.get_refs('profile'):
2062 prof_db = node_db.lookup(prof_uuid)
2063 # refactor this into a funtion to test "clientness"
2065 for ref_class, ref_uuid in prof_db.get_all_refs():
2066 if ref_class in ('mountpoint','echoclient') and self.belong_to_fs(ref_uuid):
2067 debug("recording:", client_name)
2068 log("recording mountpoint:", ref_uuid)
2069 old_noexec = config.noexec
2071 noexec_opt = ('', '-n')
2072 ret, out = run (sys.argv[0],
2073 noexec_opt[old_noexec == 1],
2074 " -v --record --nomod",
2075 "--record_log", client_name,
2076 "--record_device", self.name,
2077 "--node", client_name,
2080 for s in out: log("record> ", string.strip(s))
2081 ret, out = run (sys.argv[0],
2082 noexec_opt[old_noexec == 1],
2083 "--cleanup -v --record --nomod",
2084 "--record_log", client_name + "-clean",
2085 "--record_device", self.name,
2086 "--node", client_name,
2089 for s in out: log("record> ", string.strip(s))
2090 config.noexec = old_noexec
2094 lctl.start(self.name, self.conf_name)
2095 except CommandError, e:
2097 if self.target.get_class() == 'ost':
2098 if not is_prepared('OSS'):
2100 lctl.start(self.name, 'OSS')
2101 except CommandError, e:
2105 if is_prepared(self.name):
2107 lctl.cleanup(self.name, self.uuid, 0, 0)
2108 clean_dev(self.devpath, self.fstype,
2109 self.backfstype, self.backdevpath)
2110 except CommandError, e:
2111 log(self.module_name, "cleanup failed: ", self.name)
2114 Module.cleanup(self)
2116 class MDSDEV(Module):
2117 def __init__(self,db):
2118 Module.__init__(self, 'MDSDEV', db)
2119 self.devpath = self.db.get_val('devpath','')
2120 self.backdevpath = self.db.get_val('devpath','')
2121 self.size = self.db.get_val_int('devsize', 0)
2122 self.journal_size = self.db.get_val_int('journalsize', 0)
2123 self.fstype = self.db.get_val('fstype', '')
2124 self.backfstype = self.db.get_val('backfstype', '')
2125 self.nspath = self.db.get_val('nspath', '')
2126 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2127 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2128 self.obdtype = self.db.get_val('obdtype', '')
2129 self.root_squash = self.db.get_val('root_squash', '')
2130 self.no_root_squash = self.db.get_val('no_root_squash', '')
2132 target_uuid = self.db.get_first_ref('target')
2133 self.target = self.db.lookup(target_uuid)
2134 self.name = self.target.getName()
2138 lmv_uuid = self.db.get_first_ref('lmv')
2139 if lmv_uuid != None:
2140 self.lmv = self.db.lookup(lmv_uuid)
2142 active_uuid = get_active_target(self.target)
2144 panic("No target device found:", target_uuid)
2145 if active_uuid == self.uuid:
2147 group = self.target.get_val('group')
2148 if config.group and config.group != group:
2153 self.uuid = target_uuid
2156 if self.lmv != None:
2157 client_uuid = self.name + "_lmv_UUID"
2158 self.master = LMV(self.lmv, client_uuid,
2159 self.name, self.name)
2161 self.confobd = CONFDEV(self.db, self.name,
2162 target_uuid, self.uuid)
2164 def add_module(self, manager):
2166 manager.add_lustre_module('mdc', 'mdc')
2167 manager.add_lustre_module('osc', 'osc')
2168 manager.add_lustre_module('ost', 'ost')
2169 manager.add_lustre_module('lov', 'lov')
2170 manager.add_lustre_module('mds', 'mds')
2172 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2173 manager.add_lustre_module(self.fstype, self.fstype)
2176 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
2178 # if fstype is smfs, then we should also take care about backing
2180 if self.fstype == 'smfs':
2181 manager.add_lustre_module(self.backfstype, self.backfstype)
2182 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
2184 for option in string.split(self.mountfsoptions, ','):
2185 if option == 'snap':
2186 if not self.fstype == 'smfs':
2187 panic("mountoptions has 'snap', but fstype is not smfs.")
2188 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2189 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2192 if self.master != None:
2193 self.master.add_module(manager)
2195 # add CONFDEV modules
2196 if self.confobd != None:
2197 self.confobd.add_module(manager)
2199 def write_conf(self):
2200 if config.write_conf:
2202 debug(self.uuid, "not active")
2204 self.confobd.write_conf()
2206 if is_prepared(self.name):
2209 debug(self.uuid, "not active")
2212 self.confobd.prepare()
2213 self.confobd.write_conf()
2214 self.confobd.cleanup()
2217 if is_prepared(self.name):
2220 debug(self.uuid, "not active")
2224 self.confobd.prepare()
2226 self.confobd.write_conf()
2229 if self.master != None:
2230 self.master.prepare()
2232 if not config.record:
2233 self.confobd.start()
2235 if not is_prepared('MDT'):
2236 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
2238 if development_mode():
2239 # set lsd upcall path
2240 procentry = "/proc/fs/lustre/mds/lsd_upcall"
2241 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
2242 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
2243 print "MDS Warning: failed to set lsd cache upcall"
2245 run("echo ", upcall, " > ", procentry)
2246 # set lacl upcall path
2247 procentry = "/proc/fs/lustre/mds/lacl_upcall"
2248 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lacl_upcall")
2249 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
2250 print "MDS Warning: failed to set remote acl upcall"
2252 run("echo ", upcall, " > ", procentry)
2254 if config.root_squash == None:
2255 config.root_squash = self.root_squash
2256 if config.no_root_squash == None:
2257 config.no_root_squash = self.no_root_squash
2258 if config.root_squash:
2259 if config.no_root_squash:
2260 nsnid = config.no_root_squash
2263 lctl.root_squash(self.name, config.root_squash, nsnid)
2265 def msd_remaining(self):
2266 out = lctl.device_list()
2268 if string.split(s)[2] in ('mds',):
2271 def safe_to_clean(self):
2274 def safe_to_clean_modules(self):
2275 return not self.msd_remaining()
2279 debug(self.uuid, "not active")
2282 if is_prepared(self.name):
2284 lctl.cleanup(self.name, self.uuid, config.force,
2286 except CommandError, e:
2287 log(self.module_name, "cleanup failed: ", self.name)
2290 Module.cleanup(self)
2292 if self.master != None:
2293 self.master.cleanup()
2294 if not self.msd_remaining() and is_prepared('MDT'):
2296 lctl.cleanup("MDT", "MDT_UUID", config.force,
2298 except CommandError, e:
2299 print "cleanup failed: ", self.name
2304 self.confobd.cleanup()
2306 def correct_level(self, level, op=None):
2307 #if self.master != None:
2312 def __init__(self, db):
2313 Module.__init__(self, 'OSD', db)
2314 self.osdtype = self.db.get_val('osdtype')
2315 self.devpath = self.db.get_val('devpath', '')
2316 self.backdevpath = self.db.get_val('devpath', '')
2317 self.size = self.db.get_val_int('devsize', 0)
2318 self.journal_size = self.db.get_val_int('journalsize', 0)
2319 self.inode_size = self.db.get_val_int('inodesize', 0)
2320 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2321 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2322 self.fstype = self.db.get_val('fstype', '')
2323 self.backfstype = self.db.get_val('backfstype', '')
2324 self.nspath = self.db.get_val('nspath', '')
2325 target_uuid = self.db.get_first_ref('target')
2326 ost = self.db.lookup(target_uuid)
2327 self.name = ost.getName()
2328 self.format = self.db.get_val('autoformat', 'yes')
2329 if ost.get_val('failover', 0):
2330 self.failover_ost = 'f'
2332 self.failover_ost = 'n'
2334 self.deny_sec = self.db.get_val('deny_sec', '')
2336 if config.ost_deny_sec:
2338 self.deny_sec = "%s,%s" %(self.deny_sec, config.ost_deny_sec)
2340 self.deny_sec = config.ost_deny_sec
2342 active_uuid = get_active_target(ost)
2344 panic("No target device found:", target_uuid)
2345 if active_uuid == self.uuid:
2347 group = ost.get_val('group')
2348 if config.group and config.group != group:
2353 self.uuid = target_uuid
2354 self.confobd = CONFDEV(self.db, self.name,
2355 target_uuid, self.uuid)
2357 def add_module(self, manager):
2360 manager.add_lustre_module('ost', 'ost')
2362 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2363 manager.add_lustre_module(self.fstype, self.fstype)
2366 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2368 if self.fstype == 'smfs':
2369 manager.add_lustre_module(self.backfstype, self.backfstype)
2370 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2372 for option in self.mountfsoptions:
2373 if option == 'snap':
2374 if not self.fstype == 'smfs':
2375 panic("mountoptions with snap, but fstype is not smfs\n")
2376 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2377 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2379 manager.add_lustre_module(self.osdtype, self.osdtype)
2381 # add CONFDEV modules
2382 if self.confobd != None:
2383 self.confobd.add_module(manager)
2386 if is_prepared(self.name):
2389 debug(self.uuid, "not active")
2394 if self.osdtype == 'obdecho':
2395 self.info(self.osdtype)
2396 lctl.newdev("obdecho", self.name, self.uuid)
2397 if not is_prepared('OSS'):
2398 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup="")
2400 self.confobd.prepare()
2402 self.confobd.write_conf()
2403 if not config.record:
2404 self.confobd.start()
2407 for flavor in string.split(self.deny_sec, ','):
2408 lctl.set_security(self.name, "deny_sec", flavor)
2410 def write_conf(self):
2411 if is_prepared(self.name):
2414 debug(self.uuid, "not active")
2418 if self.osdtype != 'obdecho':
2419 self.confobd.prepare()
2420 self.confobd.write_conf()
2421 if not config.write_conf:
2422 self.confobd.start()
2423 self.confobd.cleanup()
2425 def osd_remaining(self):
2426 out = lctl.device_list()
2428 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2431 def safe_to_clean(self):
2434 def safe_to_clean_modules(self):
2435 return not self.osd_remaining()
2439 debug(self.uuid, "not active")
2442 if is_prepared(self.name):
2445 lctl.cleanup(self.name, self.uuid, config.force,
2447 except CommandError, e:
2448 log(self.module_name, "cleanup failed: ", self.name)
2451 if not self.osd_remaining() and is_prepared('OSS'):
2453 lctl.cleanup("OSS", "OSS_UUID", config.force,
2455 except CommandError, e:
2456 print "cleanup failed: ", self.name
2460 if self.osdtype != 'obdecho':
2462 self.confobd.cleanup()
2464 def correct_level(self, level, op=None):
2467 # Generic client module, used by OSC and MDC
2468 class Client(Module):
2469 def __init__(self, tgtdb, uuid, module, fs_name,
2470 self_name=None, module_dir=None):
2471 self.target_name = tgtdb.getName()
2472 self.target_uuid = tgtdb.getUUID()
2473 self.module_dir = module_dir
2474 self.backup_targets = []
2475 self.module = module
2478 self.module = module
2479 self.module_name = string.upper(module)
2480 self.fs_name = fs_name
2482 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2483 self.target_name, fs_name)
2485 self.name = self_name
2486 if not self.module_dir:
2487 self.module_dir = module
2489 self.tgt_dev_uuid = get_active_target(tgtdb)
2490 if not self.tgt_dev_uuid:
2491 panic("No target device found for target(1):", self.target_name)
2495 self.lookup_server(tgtdb, self.tgt_dev_uuid)
2496 self.lookup_backup_targets()
2498 def add_module(self, manager):
2499 manager.add_lustre_module(self.module_dir, self.module)
2501 def lookup_server(self, db, srv_uuid):
2502 """ Lookup a server's network information """
2503 self._server_nets = get_ost_net(db, srv_uuid)
2504 if len(self._server_nets) == 0:
2505 panic ("Unable to find a server for:", srv_uuid)
2510 def get_servers(self):
2511 return self._server_nets
2513 def lookup_backup_targets(self):
2514 """ Lookup alternative network information """
2515 prof_list = toplustreDB.get_refs('profile')
2516 for prof_uuid in prof_list:
2517 prof_db = toplustreDB.lookup(prof_uuid)
2519 panic("profile:", prof_uuid, "not found.")
2520 for ref_class, ref_uuid in prof_db.get_all_refs():
2521 if ref_class in ('osd', 'mdsdev'):
2522 devdb = toplustreDB.lookup(ref_uuid)
2523 uuid = devdb.get_first_ref('target')
2524 if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
2525 self.backup_targets.append(ref_uuid)
2527 def prepare(self, ignore_connect_failure = 0):
2528 self.info(self.target_uuid)
2529 if not config.record and is_prepared(self.name):
2532 srv = choose_local_server(self.get_servers())
2536 routes = find_route(self.get_servers())
2537 if len(routes) == 0:
2538 panic ("no route to", self.target_uuid)
2539 for (srv, r) in routes:
2540 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2541 except CommandError, e:
2542 if not ignore_connect_failure:
2546 if self.target_uuid in config.inactive and self.permits_inactive():
2547 debug("%s inactive" % self.target_uuid)
2548 inactive_p = "inactive"
2550 debug("%s active" % self.target_uuid)
2552 lctl.newdev(self.module, self.name, self.uuid,
2553 setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
2555 for tgt_dev_uuid in self.backup_targets:
2556 this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
2557 if len(this_nets) == 0:
2558 panic ("Unable to find a server for:", tgt_dev_uuid)
2559 srv = choose_local_server(this_nets)
2563 routes = find_route(this_nets);
2564 if len(routes) == 0:
2565 panic("no route to", tgt_dev_uuid)
2566 for (srv, r) in routes:
2567 lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2569 lctl.add_conn(self.name, srv.nid_uuid);
2572 if is_prepared(self.name):
2573 Module.cleanup(self)
2575 srv = choose_local_server(self.get_servers())
2577 lctl.disconnect(srv)
2579 for (srv, r) in find_route(self.get_servers()):
2580 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2581 except CommandError, e:
2582 log(self.module_name, "cleanup failed: ", self.name)
2586 for tgt_dev_uuid in self.backup_targets:
2587 this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
2588 srv = choose_local_server(this_net)
2590 lctl.disconnect(srv)
2592 for (srv, r) in find_route(this_net):
2593 lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2595 def correct_level(self, level, op=None):
2598 def deactivate(self):
2600 lctl.deactivate(self.name)
2601 except CommandError, e:
2602 log(self.module_name, "deactivate failed: ", self.name)
2607 def __init__(self, db, uuid, fs_name):
2608 Client.__init__(self, db, uuid, 'gkc', fs_name)
2610 def permits_inactive(self):
2614 def __init__(self, db, uuid, fs_name):
2615 Client.__init__(self, db, uuid, 'mdc', fs_name)
2617 def permits_inactive(self):
2621 def __init__(self, db, uuid, fs_name):
2622 Client.__init__(self, db, uuid, 'osc', fs_name)
2624 def permits_inactive(self):
2627 class CMOBD(Module):
2628 def __init__(self, db):
2629 Module.__init__(self, 'CMOBD', db)
2630 self.name = self.db.getName();
2631 self.uuid = generate_client_uuid(self.name)
2632 self.master_uuid = self.db.get_first_ref('masterobd')
2633 self.cache_uuid = self.db.get_first_ref('cacheobd')
2635 master_obd = self.db.lookup(self.master_uuid)
2637 panic('master obd not found:', self.master_uuid)
2639 cache_obd = self.db.lookup(self.cache_uuid)
2641 panic('cache obd not found:', self.cache_uuid)
2646 master_class = master_obd.get_class()
2647 cache_class = cache_obd.get_class()
2649 if master_class == 'lov':
2650 client_uuid = "%s_lov_master_UUID" % (self.name)
2651 self.master = LOV(master_obd, client_uuid, self.name,
2652 "master_%s" % (self.name));
2653 elif master_class == 'ost':
2654 client_uuid = "%s_ost_master_UUID" % (self.name)
2655 self.master = get_osc(master_obd, client_uuid, self.master_uuid)
2656 elif master_class == 'mds':
2657 client_uuid = "%s_mds_master_UUID" % (self.name)
2658 self.master = get_mdc(master_obd, client_uuid, self.master_uuid)
2659 elif master_class == 'lmv':
2660 client_uuid = "%s_lmv_master_UUID" % (self.name)
2661 self.master = LMV(master_obd, client_uuid, self.name,
2662 "master_%s" % (self.name));
2664 panic("unknown master obd class '%s'" %(master_class))
2666 if cache_class == 'ost':
2667 self.cache = get_osc(cache_obd, cache_obd.getUUID(),
2669 elif cache_class == 'mds':
2670 self.cache = get_mdc(cache_obd, cache_obd.getUUID(),
2673 panic("invalid cache obd class '%s'" %(cache_class))
2676 if not config.record and is_prepared(self.name):
2678 self.info(self.master_uuid, self.cache_uuid)
2679 self.master.prepare()
2680 lctl.newdev("cmobd", self.name, self.uuid,
2681 setup ="%s %s" %(self.master.uuid,
2690 def get_master_name(self):
2691 return self.master.name
2693 def get_cache_name(self):
2694 return self.cache.name
2697 if is_prepared(self.name):
2698 Module.cleanup(self)
2700 self.master.cleanup()
2702 def add_module(self, manager):
2703 manager.add_lustre_module('smfs', 'smfs')
2704 manager.add_lustre_module('cmobd', 'cmobd')
2705 self.master.add_module(manager)
2707 def correct_level(self, level, op=None):
2711 def __init__(self, db, uuid, name):
2712 Module.__init__(self, 'COBD', db)
2713 self.name = self.db.getName();
2714 self.uuid = generate_client_uuid(self.name)
2715 self.master_uuid = self.db.get_first_ref('masterobd')
2716 self.cache_uuid = self.db.get_first_ref('cacheobd')
2718 master_obd = self.db.lookup(self.master_uuid)
2720 panic('master obd not found:', self.master_uuid)
2722 cache_obd = self.db.lookup(self.cache_uuid)
2724 panic('cache obd not found:', self.cache_uuid)
2729 master_class = master_obd.get_class()
2730 cache_class = cache_obd.get_class()
2732 if master_class == 'ost' or master_class == 'lov':
2733 client_uuid = "%s_lov_master_UUID" % (self.name)
2734 self.master = LOV(master_obd, client_uuid, name,
2735 "master_%s" % (self.name));
2736 elif master_class == 'mds':
2737 self.master = get_mdc(db, name, self.master_uuid)
2738 elif master_class == 'lmv':
2739 client_uuid = "%s_lmv_master_UUID" % (self.name)
2740 self.master = LMV(master_obd, client_uuid, self.name,
2741 "master_%s" % (self.name));
2743 panic("unknown master obd class '%s'" %(master_class))
2745 if cache_class == 'ost' or cache_class == 'lov':
2746 client_uuid = "%s_lov_cache_UUID" % (self.name)
2747 self.cache = LOV(cache_obd, client_uuid, name,
2748 "cache_%s" % (self.name));
2749 elif cache_class == 'mds':
2750 self.cache = get_mdc(db, name, self.cache_uuid)
2751 elif cache_class == 'lmv':
2752 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2753 self.cache = LMV(cache_obd, client_uuid, self.name,
2754 "cache_%s" % (self.name));
2756 panic("unknown cache obd class '%s'" %(cache_class))
2764 def get_master_name(self):
2765 return self.master.name
2767 def get_cache_name(self):
2768 return self.cache.name
2771 if not config.record and is_prepared(self.name):
2773 self.master.prepare()
2774 self.cache.prepare()
2775 self.info(self.master_uuid, self.cache_uuid)
2776 lctl.newdev("cobd", self.name, self.uuid,
2777 setup ="%s %s" %(self.master.name,
2781 if is_prepared(self.name):
2782 Module.cleanup(self)
2783 self.master.cleanup()
2784 self.cache.cleanup()
2786 def add_module(self, manager):
2787 manager.add_lustre_module('cobd', 'cobd')
2788 self.master.add_module(manager)
2790 # virtual interface for OSC and LOV
2792 def __init__(self, db, client_uuid, name, name_override = None):
2793 Module.__init__(self, 'VOSC', db)
2794 if db.get_class() == 'lov':
2795 self.osc = LOV(db, client_uuid, name, name_override)
2797 elif db.get_class() == 'cobd':
2798 self.osc = COBD(db, client_uuid, name)
2801 self.osc = OSC(db, client_uuid, name)
2805 return self.osc.get_uuid()
2808 return self.osc.get_name()
2816 def add_module(self, manager):
2817 self.osc.add_module(manager)
2819 def correct_level(self, level, op=None):
2820 return self.osc.correct_level(level, op)
2822 # virtual interface for MDC and LMV
2824 def __init__(self, db, client_uuid, name, name_override = None):
2825 Module.__init__(self, 'VMDC', db)
2826 if db.get_class() == 'lmv':
2827 self.mdc = LMV(db, client_uuid, name, name_override)
2828 elif db.get_class() == 'cobd':
2829 self.mdc = COBD(db, client_uuid, name)
2831 self.mdc = MDC(db, client_uuid, name)
2834 return self.mdc.uuid
2837 return self.mdc.name
2845 def add_module(self, manager):
2846 self.mdc.add_module(manager)
2848 def correct_level(self, level, op=None):
2849 return self.mdc.correct_level(level, op)
2851 class ECHO_CLIENT(Module):
2852 def __init__(self,db):
2853 Module.__init__(self, 'ECHO_CLIENT', db)
2854 self.obd_uuid = self.db.get_first_ref('obd')
2855 obd = self.db.lookup(self.obd_uuid)
2856 self.uuid = generate_client_uuid(self.name)
2857 self.osc = VOSC(obd, self.uuid, self.name)
2860 if not config.record and is_prepared(self.name):
2863 self.osc.prepare() # XXX This is so cheating. -p
2864 self.info(self.obd_uuid)
2866 lctl.newdev("echo_client", self.name, self.uuid,
2867 setup = self.osc.get_name())
2870 if is_prepared(self.name):
2871 Module.cleanup(self)
2874 def add_module(self, manager):
2875 self.osc.add_module(manager)
2876 manager.add_lustre_module('obdecho', 'obdecho')
2878 def correct_level(self, level, op=None):
2881 def generate_client_uuid(name):
2882 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2884 int(random.random() * 1048576),
2885 int(random.random() * 1048576))
2886 return client_uuid[:36]
2888 class Mountpoint(Module):
2889 def __init__(self, db):
2890 Module.__init__(self, 'MTPT', db)
2891 self.path = self.db.get_val('path')
2892 self.clientoptions = self.db.get_val('clientoptions', '')
2893 self.fs_uuid = self.db.get_first_ref('filesystem')
2894 fs = self.db.lookup(self.fs_uuid)
2895 self.mds_uuid = fs.get_first_ref('lmv')
2896 if not self.mds_uuid:
2897 self.mds_uuid = fs.get_first_ref('mds')
2898 self.obd_uuid = fs.get_first_ref('obd')
2899 self.gks_uuid = fs.get_first_ref('gks')
2900 client_uuid = generate_client_uuid(self.name)
2902 self.oss_sec = self.db.get_val('oss_sec','null')
2903 self.mds_sec = self.db.get_val('mds_sec','null')
2905 self.mds_sec = config.mds_sec
2907 self.oss_sec = config.oss_sec
2909 self.oss_sec = self.db.get_val('oss_sec','null')
2910 self.mds_sec = self.db.get_val('mds_sec','null')
2912 self.mds_sec = config.mds_sec
2914 self.oss_sec = config.oss_sec
2916 ost = self.db.lookup(self.obd_uuid)
2918 panic("no ost: ", self.obd_uuid)
2920 mds = self.db.lookup(self.mds_uuid)
2922 panic("no mds: ", self.mds_uuid)
2924 self.vosc = VOSC(ost, client_uuid, self.name, self.name)
2925 self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
2928 self.gkc = get_gkc(db, client_uuid, self.name, self.gks_uuid)
2931 if not config.record and fs_is_mounted(self.path):
2932 log(self.path, "already mounted.")
2941 self.info(self.path, self.mds_uuid, self.obd_uuid)
2942 if config.record or config.lctl_dump:
2944 lctl.mount_option(local_node_name, self.vosc.get_name(),
2945 self.vmdc.get_name(), self.gkc.get_name())
2947 lctl.mount_option(local_node_name, self.vosc.get_name(),
2948 self.vmdc.get_name(), "")
2951 if config.clientoptions:
2952 if self.clientoptions:
2953 self.clientoptions = self.clientoptions + ',' + config.clientoptions
2955 self.clientoptions = config.clientoptions
2956 if self.clientoptions:
2957 self.clientoptions = ',' + self.clientoptions
2958 # Linux kernel will deal with async and not pass it to ll_fill_super,
2959 # so replace it with Lustre async
2960 self.clientoptions = string.replace(self.clientoptions, "async", "lasync")
2963 gkc_name = self.gkc.get_name();
2966 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,gkc=%s,mds_sec=%s,oss_sec=%s%s %s %s" % \
2967 (self.vosc.get_name(), self.vmdc.get_name(), gkc_name, self.mds_sec,
2968 self.oss_sec, self.clientoptions, config.config, self.path)
2969 log("mount -t lustre_lite -o osc=%s,mdc=%s,gkc=%s,mds_sec=%s,oss_sec=%s%s %s %s" % \
2970 (self.vosc.get_name(), self.vmdc.get_name(), gkc_name, self.mds_sec,
2971 self.oss_sec, self.clientoptions, config.config, self.path))
2972 run("mkdir", self.path)
2977 panic("mount failed:", self.path, ":", string.join(val))
2980 self.info(self.path, self.mds_uuid,self.obd_uuid)
2982 if config.record or config.lctl_dump:
2983 lctl.del_mount_option(local_node_name)
2985 if fs_is_mounted(self.path):
2987 (rc, out) = run("umount", "-f", self.path)
2989 (rc, out) = run("umount", self.path)
2991 raise CommandError('umount', out, rc)
2993 if fs_is_mounted(self.path):
2994 panic("fs is still mounted:", self.path)
3001 def add_module(self, manager):
3002 self.vosc.add_module(manager)
3003 self.vmdc.add_module(manager)
3004 manager.add_lustre_module('llite', 'llite')
3006 manager.add_lustre_module('sec/gks', 'gkc')
3008 def correct_level(self, level, op=None):
3011 # ============================================================
3012 # misc query functions
3014 def get_ost_net(self, osd_uuid):
3018 osd = self.lookup(osd_uuid)
3019 node_uuid = osd.get_first_ref('node')
3020 node = self.lookup(node_uuid)
3022 panic("unable to find node for osd_uuid:", osd_uuid,
3023 " node_ref:", node_uuid)
3024 for net_uuid in node.get_networks():
3025 db = node.lookup(net_uuid)
3026 srv_list.append(Network(db))
3029 # the order of iniitailization is based on level.
3030 def getServiceLevel(self):
3031 type = self.get_class()
3033 if type in ('network',):
3035 elif type in ('routetbl',):
3037 elif type in ('ldlm',):
3039 elif type in ('osd',):
3041 elif type in ('mdsdev',):
3043 elif type in ('lmv', 'cobd',):
3045 elif type in ('gkd',):
3047 elif type in ('cmobd', 'cobd',):
3049 elif type in ('mountpoint', 'echoclient'):
3052 panic("Unknown type: ", type)
3054 if ret < config.minlevel or ret > config.maxlevel:
3059 # return list of services in a profile. list is a list of tuples
3060 # [(level, db_object),]
3061 def getServices(self):
3063 for ref_class, ref_uuid in self.get_all_refs():
3064 servdb = self.lookup(ref_uuid)
3066 level = getServiceLevel(servdb)
3068 list.append((level, servdb))
3070 panic('service not found: ' + ref_uuid)
3076 ############################################################
3078 # FIXME: clean this mess up!
3080 # OSC is no longer in the xml, so we have to fake it.
3081 # this is getting ugly and begging for another refactoring
3082 def get_osc(db, ost_uuid, fs_name):
3083 osc = OSC(db, ost_uuid, fs_name)
3086 def get_mdc(db, fs_name, mds_uuid):
3087 mds_db = db.lookup(mds_uuid);
3089 error("no mds:", mds_uuid)
3090 mdc = MDC(mds_db, mds_uuid, fs_name)
3093 def get_gkc(db, uuid, fs_name, gks_uuid):
3094 gks_db = db.lookup(gks_uuid);
3096 error("no gks:", gks_uuid)
3097 gkc = GKC(gks_db, uuid, fs_name)
3100 ############################################################
3101 # routing ("rooting")
3103 # list of (nettype, cluster_id, nid)
3106 def find_local_clusters(node_db):
3107 global local_clusters
3108 for netuuid in node_db.get_networks():
3109 net = node_db.lookup(netuuid)
3111 debug("add_local", netuuid)
3112 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
3114 if not acceptors.has_key(srv.port):
3115 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
3117 # This node is a gateway.
3119 def node_is_router():
3122 # If there are any routers found in the config, then this will be true
3123 # and all nodes will load kptlrouter.
3125 def node_needs_router():
3126 return needs_router or is_router
3128 # list of (nettype, gw, tgt_cluster_id, lo, hi)
3129 # Currently, these local routes are only added to kptlrouter route
3130 # table if they are needed to connect to a specific server. This
3131 # should be changed so all available routes are loaded, and the
3132 # ptlrouter can make all the decisions.
3135 def find_local_routes(lustre):
3136 """ Scan the lustre config looking for routers . Build list of
3138 global local_routes, needs_router
3140 list = lustre.lookup_class('node')
3142 if router.get_val_int('router', 0):
3144 for (local_type, local_cluster_id, local_nid) in local_clusters:
3146 for netuuid in router.get_networks():
3147 db = router.lookup(netuuid)
3148 if (local_type == db.get_val('nettype') and
3149 local_cluster_id == db.get_val('clusterid')):
3150 gw = db.get_val('nid')
3153 debug("find_local_routes: gw is", gw)
3154 for route in router.get_local_routes(local_type, gw):
3155 local_routes.append(route)
3156 debug("find_local_routes:", local_routes)
3159 def choose_local_server(srv_list):
3160 for srv in srv_list:
3161 if local_cluster(srv.net_type, srv.cluster_id):
3164 def local_cluster(net_type, cluster_id):
3165 for cluster in local_clusters:
3166 if net_type == cluster[0] and cluster_id == cluster[1]:
3170 def local_interface(net_type, cluster_id, nid):
3171 for cluster in local_clusters:
3172 if (net_type == cluster[0] and cluster_id == cluster[1]
3173 and nid == cluster[2]):
3177 def find_route(srv_list):
3179 frm_type = local_clusters[0][0]
3180 for srv in srv_list:
3181 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
3182 to_type = srv.net_type
3184 cluster_id = srv.cluster_id
3185 debug ('looking for route to', to_type, to)
3186 for r in local_routes:
3187 debug("find_route: ", r)
3188 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
3189 result.append((srv, r))
3192 def get_active_target(db):
3193 target_uuid = db.getUUID()
3194 target_name = db.getName()
3195 node_name = get_select(target_name)
3197 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
3199 tgt_dev_uuid = db.get_first_ref('active')
3202 def get_server_by_nid_uuid(db, nid_uuid):
3203 for n in db.lookup_class("network"):
3205 if net.nid_uuid == nid_uuid:
3209 ############################################################
3213 type = db.get_class()
3214 debug('Service:', type, db.getName(), db.getUUID())
3219 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3220 elif type == 'network':
3222 elif type == 'routetbl':
3226 elif type == 'cobd':
3227 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3228 elif type == 'cmobd':
3230 elif type == 'mdsdev':
3232 elif type == 'mountpoint':
3234 elif type == 'echoclient':
3241 panic ("unknown service type:", type)
3245 # Prepare the system to run lustre using a particular profile
3246 # in a the configuration.
3247 # * load & the modules
3248 # * setup networking for the current node
3249 # * make sure partitions are in place and prepared
3250 # * initialize devices with lctl
3251 # Levels is important, and needs to be enforced.
3252 def for_each_profile(db, prof_list, operation):
3253 for prof_uuid in prof_list:
3254 prof_db = db.lookup(prof_uuid)
3256 panic("profile:", prof_uuid, "not found.")
3257 services = getServices(prof_db)
3260 def get_fs_name(db, rec, tag, uuid):
3261 # FIXME: better way to find the mountpoint?
3262 filesystems = db.root_node.getElementsByTagName('filesystem')
3264 for fs in filesystems:
3265 ref = fs.getElementsByTagName(tag)
3266 if ref[0].getAttribute('uuidref') == uuid:
3267 fsuuid = fs.getAttribute('uuid')
3271 panic("malformed xml: uuid '" + uuid + "' referenced in '" + \
3272 rec.nodeName + "' record is not used by any filesystems.")
3274 mtpts = db.root_node.getElementsByTagName('mountpoint')
3277 ref = fs.getElementsByTagName('filesystem_ref')
3278 if ref[0].getAttribute('uuidref') == fsuuid:
3279 fs_name = fs.getAttribute('name')
3283 panic("malformed xml: '" + rec.nodeName + \
3284 "' record references uuid '" + uuid + \
3285 "', which references filesystem uuid '" + fsuuid + \
3286 "', which does not reference a mountpoint.")
3290 def magic_get_osc(db, rec, lov):
3292 lov_uuid = lov.get_uuid()
3293 fs_name = lov.osc.fs_name
3294 lov_name = lov.osc.name
3296 lov_uuid = rec.getAttribute('lov_uuidref')
3297 fs_name = get_fs_name(db, rec, 'obd_ref', lov_uuid)
3298 lov_name = "lov_" + fs_name
3300 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3302 ost_uuid = rec.getAttribute('ost_uuidref')
3304 if rec.nodeName == 'lov_delete':
3306 # Use the update as a subtree in case a new OST is created with the
3307 # same name as the one that we deleted or other info about the OSS
3308 # has changed since the delete.
3309 # XXX - Not sure if this is the way this is supposed to be done.
3311 info = rec.parentNode.getElementsByTagName('info')
3313 print "delete record missing info !"
3314 tgtdb = Lustre.LustreDB_XML(info[0], info[0])
3318 obd = tgtdb.lookup(ost_uuid)
3320 panic("malformed xml: '" + rec.nodeName + \
3321 "' record references ost uuid '" + ost_uuid + \
3322 "' which cannot be found.")
3323 osc = get_osc(obd, lov_uuid, fs_name)
3325 panic('osc not found:', obd_uuid)
3326 return lov_name, lov_uuid, osc
3328 # write logs for update records. sadly, logs of all types -- and updates in
3329 # particular -- are something of an afterthought. lconf needs rewritten with
3330 # these as core concepts. so this is a pretty big hack.
3331 def process_update_record(db, update, lov):
3332 for rec in update.childNodes:
3333 if rec.nodeType != rec.ELEMENT_NODE:
3336 if rec.nodeName == 'info':
3339 log("found " + rec.nodeName + " record in update version " +
3340 str(update.getAttribute('version')))
3342 if rec.nodeName != 'lov_add' and rec.nodeName != 'lov_delete' and \
3343 rec.nodeName != 'lov_deactivate':
3344 panic("unrecognized update record type '" + rec.nodeName + "'.")
3346 lov_uuid = rec.getAttribute('lov_uuidref')
3347 ost_uuid = rec.getAttribute('ost_uuidref')
3348 index = rec.getAttribute('index')
3349 gen = rec.getAttribute('generation')
3351 if not lov_uuid or not ost_uuid or not index or not gen:
3352 panic("malformed xml: '" + rec.nodeName + "' record requires lov_uuid, ost_uuid, index, and generation.")
3354 lov_name, lov_uuid, osc = magic_get_osc(db, rec, lov)
3356 # ------------------------------------------------------------- add
3357 if rec.nodeName == 'lov_add':
3359 # Only ignore connect failures with --force, which
3360 # isn't implemented here yet.
3361 osc.prepare(ignore_connect_failure=0)
3362 except CommandError, e:
3363 print "Error preparing OSC %s\n" % osc.uuid
3366 lctl.lov_add_osc(lov_name, ost_uuid, index, gen)
3368 # ------------------------------------------------------ deactivate
3369 elif rec.nodeName == 'lov_deactivate':
3372 except CommandError, e:
3373 print "Error deactivating OSC %s\n" % osc.uuid
3376 # ---------------------------------------------------------- delete
3377 elif rec.nodeName == 'lov_delete':
3378 lctl.lov_del_osc(lov_name, ost_uuid, index, gen)
3384 except CommandError, e:
3385 print "Error cleaning up OSC %s\n" % osc.uuid
3388 def process_updates(db, log_device, log_name, lov = None):
3389 if not config.write_conf and not config.record:
3394 updates = db.root_node.getElementsByTagName('update')
3396 if not u.childNodes:
3397 log("ignoring empty update record (version " +
3398 str(u.getAttribute('version')) + ")")
3401 version = u.getAttribute('version')
3402 real_name = "%s-%s" % (log_name, version)
3403 lctl.clear_log(log_device, real_name)
3404 lctl.record(log_device, real_name)
3406 process_update_record(db, u, lov)
3410 def doWriteconf(services):
3412 if s[1].get_class() == 'mdsdev' or s[1].get_class() == 'osd':
3413 n = newService(s[1])
3415 if not config.nosetup:
3418 def doSetup(services):
3423 n = newService(s[1])
3425 slist.append((n.level, n))
3428 nl = n[1].correct_level(n[0])
3429 nlist.append((nl, n[1]))
3434 def doLoadModules(services):
3438 # adding all needed modules from all services
3440 n = newService(s[1])
3441 n.add_module(mod_manager)
3443 # loading all registered modules
3444 mod_manager.load_modules()
3446 def doUnloadModules(services):
3450 # adding all needed modules from all services
3452 n = newService(s[1])
3453 if n.safe_to_clean_modules():
3454 n.add_module(mod_manager)
3456 # unloading all registered modules
3457 mod_manager.cleanup_modules()
3459 def doCleanup(services):
3465 n = newService(s[1])
3467 slist.append((n.level, n))
3470 nl = n[1].correct_level(n[0])
3471 nlist.append((nl, n[1]))
3476 if n[1].safe_to_clean():
3481 def doHost(lustreDB, hosts):
3482 global is_router, local_node_name
3485 node_db = lustreDB.lookup_name(h, 'node')
3489 panic('No host entry found.')
3491 local_node_name = node_db.get_val('name', 0)
3492 is_router = node_db.get_val_int('router', 0)
3493 lustre_upcall = node_db.get_val('lustreUpcall', '')
3494 portals_upcall = node_db.get_val('portalsUpcall', '')
3495 timeout = node_db.get_val_int('timeout', 0)
3496 ptldebug = node_db.get_val('ptldebug', '')
3497 subsystem = node_db.get_val('subsystem', '')
3499 find_local_clusters(node_db)
3501 find_local_routes(lustreDB)
3503 # Two step process: (1) load modules, (2) setup lustre
3504 # if not cleaning, load modules first.
3505 prof_list = node_db.get_refs('profile')
3507 if config.write_conf:
3508 for_each_profile(node_db, prof_list, doLoadModules)
3510 for_each_profile(node_db, prof_list, doWriteconf)
3511 for_each_profile(node_db, prof_list, doUnloadModules)
3514 elif config.recover:
3515 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3516 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3517 "--client_uuid <UUID> --conn_uuid <UUID>")
3518 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3520 elif config.cleanup:
3522 # the command line can override this value
3524 # ugly hack, only need to run lctl commands for --dump
3525 if config.lctl_dump or config.record:
3526 for_each_profile(node_db, prof_list, doCleanup)
3529 sys_set_timeout(timeout)
3530 sys_set_ptldebug(ptldebug)
3531 sys_set_subsystem(subsystem)
3532 sys_set_lustre_upcall(lustre_upcall)
3533 sys_set_portals_upcall(portals_upcall)
3535 for_each_profile(node_db, prof_list, doCleanup)
3536 for_each_profile(node_db, prof_list, doUnloadModules)
3540 # ugly hack, only need to run lctl commands for --dump
3541 if config.lctl_dump or config.record:
3542 sys_set_timeout(timeout)
3543 sys_set_lustre_upcall(lustre_upcall)
3544 for_each_profile(node_db, prof_list, doSetup)
3548 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3549 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3551 for_each_profile(node_db, prof_list, doLoadModules)
3553 sys_set_debug_path()
3554 sys_set_ptldebug(ptldebug)
3555 sys_set_subsystem(subsystem)
3556 script = config.gdb_script
3557 run(lctl.lctl, ' modules >', script)
3559 log ("The GDB module script is in", script)
3560 # pause, so user has time to break and
3563 sys_set_timeout(timeout)
3564 sys_set_lustre_upcall(lustre_upcall)
3565 sys_set_portals_upcall(portals_upcall)
3567 for_each_profile(node_db, prof_list, doSetup)
3570 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3571 tgt = lustreDB.lookup(tgt_uuid)
3573 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3574 new_uuid = get_active_target(tgt)
3576 raise Lustre.LconfError("doRecovery: no active target found for: " +
3578 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3580 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3582 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3584 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3587 lctl.disconnect(oldnet)
3588 except CommandError, e:
3589 log("recover: disconnect", nid_uuid, "failed: ")
3594 except CommandError, e:
3595 log("recover: connect failed")
3598 lctl.recover(client_uuid, net.nid_uuid)
3601 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3602 base = os.path.dirname(cmd)
3603 if development_mode():
3604 if not config.lustre:
3605 debug('using objdir module paths')
3606 config.lustre = (os.path.join(base, ".."))
3607 # normalize the portals dir, using command line arg if set
3609 portals_dir = config.portals
3610 dir = os.path.join(config.lustre, portals_dir)
3611 config.portals = dir
3612 debug('config.portals', config.portals)
3613 elif config.lustre and config.portals:
3615 # if --lustre and --portals, normalize portals
3616 # can ignore POTRALS_DIR here, since it is probly useless here
3617 config.portals = os.path.join(config.lustre, config.portals)
3618 debug('config.portals B', config.portals)
3620 def sysctl(path, val):
3621 debug("+ sysctl", path, val)
3625 fp = open(os.path.join('/proc/sys', path), 'w')
3631 def sys_set_debug_path():
3632 sysctl('portals/debug_path', config.debug_path)
3634 def sys_set_lustre_upcall(upcall):
3635 # the command overrides the value in the node config
3636 if config.lustre_upcall:
3637 upcall = config.lustre_upcall
3639 upcall = config.upcall
3641 lctl.set_lustre_upcall(upcall)
3643 def sys_set_portals_upcall(upcall):
3644 # the command overrides the value in the node config
3645 if config.portals_upcall:
3646 upcall = config.portals_upcall
3648 upcall = config.upcall
3650 sysctl('portals/upcall', upcall)
3652 def sys_set_timeout(timeout):
3653 # the command overrides the value in the node config
3654 if config.timeout and config.timeout > 0:
3655 timeout = config.timeout
3656 if timeout != None and timeout > 0:
3657 lctl.set_timeout(timeout)
3659 def sys_tweak_socknal ():
3660 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3661 if sys_get_branch() == '2.6':
3662 fp = open('/proc/meminfo')
3663 lines = fp.readlines()
3668 if a[0] == 'MemTotal:':
3670 debug("memtotal" + memtotal)
3671 if int(memtotal) < 262144:
3672 minfree = int(memtotal) / 16
3675 debug("+ minfree ", minfree)
3676 sysctl("vm/min_free_kbytes", minfree)
3677 if config.single_socket:
3678 sysctl("socknal/typed", 0)
3680 def sys_optimize_elan ():
3681 procfiles = ["/proc/elan/config/eventint_punt_loops",
3682 "/proc/qsnet/elan3/config/eventint_punt_loops",
3683 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3685 if os.access(p, os.W_OK):
3686 run ("echo 1 > " + p)
3688 def sys_set_ptldebug(ptldebug):
3690 ptldebug = config.ptldebug
3693 val = eval(ptldebug, ptldebug_names)
3694 val = "0x%x" % (val & 0xffffffffL)
3695 sysctl('portals/debug', val)
3696 except NameError, e:
3699 def sys_set_subsystem(subsystem):
3700 if config.subsystem:
3701 subsystem = config.subsystem
3704 val = eval(subsystem, subsystem_names)
3705 val = "0x%x" % (val & 0xffffffffL)
3706 sysctl('portals/subsystem_debug', val)
3707 except NameError, e:
3710 def sys_set_netmem_max(path, max):
3711 debug("setting", path, "to at least", max)
3719 fp = open(path, 'w')
3720 fp.write('%d\n' %(max))
3723 def sys_make_devices():
3724 if not os.access('/dev/portals', os.R_OK):
3725 run('mknod /dev/portals c 10 240')
3726 if not os.access('/dev/obd', os.R_OK):
3727 run('mknod /dev/obd c 10 241')
3729 # Add dir to the global PATH, if not already there.
3730 def add_to_path(new_dir):
3731 syspath = string.split(os.environ['PATH'], ':')
3732 if new_dir in syspath:
3734 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3736 def default_debug_path():
3737 path = '/tmp/lustre-log'
3738 if os.path.isdir('/r'):
3743 def default_gdb_script():
3744 script = '/tmp/ogdb'
3745 if os.path.isdir('/r'):
3746 return '/r' + script
3750 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3751 # ensure basic elements are in the system path
3752 def sanitise_path():
3753 for dir in DEFAULT_PATH:
3756 # global hack for the --select handling
3758 def init_select(args):
3759 # args = [service=nodeA,service2=nodeB service3=nodeC]
3762 list = string.split(arg, ',')
3764 srv, node = string.split(entry, '=')
3765 tgt_select[srv] = node
3767 def get_select(srv):
3768 if tgt_select.has_key(srv):
3769 return tgt_select[srv]
3773 FLAG = Lustre.Options.FLAG
3774 PARAM = Lustre.Options.PARAM
3775 INTPARAM = Lustre.Options.INTPARAM
3776 PARAMLIST = Lustre.Options.PARAMLIST
3778 ('verbose,v', "Print system commands as they are run"),
3779 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3780 ('config', "Cluster config name used for LDAP query", PARAM),
3781 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3782 ('node', "Load config for <nodename>", PARAM),
3783 ('sec',"security flavor <null|krb5i|krb5p> between this client with mds", PARAM),
3784 ('mds_sec',"security flavor <null|krb5i|krb5p> between this client with mds", PARAM),
3785 ('oss_sec',"security flavor <null|krb5i|krb5p> between this client with ost", PARAM),
3786 ('mds_mds_sec',"security flavor <null|krb5i|krb5p> between this mds with other mds", PARAM),
3787 ('mds_oss_sec',"security flavor <null|krb5i|krb5p> between this mds with ost", PARAM),
3788 ('mds_deny_sec', "security flavor <null|krb5i|krb5p> denied by this mds", PARAM),
3789 ('ost_deny_sec', "security flavor <null|krb5i|krb5p> denied by this ost", PARAM),
3790 ('cleanup,d', "Cleans up config. (Shutdown)"),
3791 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3793 ('single_socket', "socknal option: only use one socket instead of bundle",
3795 ('failover',"""Used to shut down without saving state.
3796 This will allow this node to "give up" a service to a
3797 another node for failover purposes. This will not
3798 be a clean shutdown.""",
3800 ('gdb', """Prints message after creating gdb module script
3801 and sleeps for 5 seconds."""),
3802 ('noexec,n', """Prints the commands and steps that will be run for a
3803 config without executing them. This can used to check if a
3804 config file is doing what it should be doing"""),
3805 ('nomod', "Skip load/unload module step."),
3806 ('nosetup', "Skip device setup/cleanup step."),
3807 ('reformat', "Reformat all devices (without question)"),
3808 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3809 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3810 ('clientoptions', "Additional options for Lustre", PARAM),
3811 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3813 ('write_conf', "Save all the client config information on mds."),
3814 ('record', "Write config information on mds."),
3815 ('record_log', "Name of config record log.", PARAM),
3816 ('record_device', "MDS device name that will record the config commands",
3818 ('root_squash', "MDS squash root to appointed uid",
3820 ('no_root_squash', "Don't squash root for appointed nid",
3822 ('minlevel', "Minimum level of services to configure/cleanup",
3824 ('maxlevel', """Maximum level of services to configure/cleanup
3825 Levels are aproximatly like:
3830 70 - mountpoint, echo_client, osc, mdc, lov""",
3832 ('lustre', """Base directory of lustre sources. This parameter will
3833 cause lconf to load modules from a source tree.""", PARAM),
3834 ('portals', """Portals source directory. If this is a relative path,
3835 then it is assumed to be relative to lustre. """, PARAM),
3836 ('timeout', "Set recovery timeout", INTPARAM),
3837 ('upcall', "Set both portals and lustre upcall script", PARAM),
3838 ('lustre_upcall', "Set lustre upcall script", PARAM),
3839 ('portals_upcall', "Set portals upcall script", PARAM),
3840 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3841 ('ptldebug', "Set the portals debug level", PARAM),
3842 ('subsystem', "Set the portals debug subsystem", PARAM),
3843 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3844 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3845 # Client recovery options
3846 ('recover', "Recover a device"),
3847 ('group', "The group of devices to configure or cleanup", PARAM),
3848 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3849 ('client_uuid', "The failed client (required for recovery)", PARAM),
3850 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3852 ('inactive', """The name of an inactive service, to be ignored during
3853 mounting (currently OST-only). Can be repeated.""",
3858 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3860 # in the upcall this is set to SIG_IGN
3861 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3863 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3865 config, args = cl.parse(sys.argv[1:])
3866 except Lustre.OptionError, e:
3870 setupModulePath(sys.argv[0])
3872 host = socket.gethostname()
3874 # the PRNG is normally seeded with time(), which is not so good for starting
3875 # time-synchronized clusters
3876 input = open('/dev/urandom', 'r')
3878 print 'Unable to open /dev/urandom!'
3880 seed = input.read(32)
3886 init_select(config.select)
3889 # allow config to be fetched via HTTP, but only with python2
3890 if sys.version[0] != '1' and args[0].startswith('http://'):
3893 config_file = urllib2.urlopen(args[0])
3894 except (urllib2.URLError, socket.error), err:
3895 if hasattr(err, 'args'):
3897 print "Could not access '%s': %s" %(args[0], err)
3899 elif not os.access(args[0], os.R_OK):
3900 print 'File not found or readable:', args[0]
3904 config_file = open(args[0], 'r')
3906 dom = xml.dom.minidom.parse(config_file)
3908 panic("%s does not appear to be a config file." % (args[0]))
3909 sys.exit(1) # make sure to die here, even in debug mode.
3911 CONFIG_FILE = args[0]
3912 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3913 if not config.config:
3914 config.config = os.path.basename(args[0])# use full path?
3915 if config.config[-4:] == '.xml':
3916 config.config = config.config[:-4]
3917 elif config.ldapurl:
3918 if not config.config:
3919 panic("--ldapurl requires --config name")
3920 dn = "config=%s,fs=lustre" % (config.config)
3921 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3922 elif config.ptldebug or config.subsystem:
3923 sys_set_ptldebug(None)
3924 sys_set_subsystem(None)
3927 print 'Missing config file or ldap URL.'
3928 print 'see lconf --help for command summary'
3931 toplustreDB = lustreDB
3933 ver = lustreDB.get_version()
3935 panic("No version found in config data, please recreate.")
3936 if ver != Lustre.CONFIG_VERSION:
3937 panic("Config version", ver, "does not match lconf version",
3938 Lustre.CONFIG_VERSION)
3942 node_list.append(config.node)
3945 node_list.append(host)
3946 node_list.append('localhost')
3948 debug("configuring for host: ", node_list)
3951 config.debug_path = config.debug_path + '-' + host
3952 config.gdb_script = config.gdb_script + '-' + host
3954 lctl = LCTLInterface('lctl')
3956 if config.lctl_dump:
3957 lctl.use_save_file(config.lctl_dump)
3960 if not (config.record_device and config.record_log):
3961 panic("When recording, both --record_log and --record_device must be specified.")
3962 lctl.clear_log(config.record_device, config.record_log)
3963 lctl.record(config.record_device, config.record_log)
3965 # init module manager
3966 mod_manager = kmod_manager(config.lustre, config.portals)
3968 doHost(lustreDB, node_list)
3972 process_updates(lustreDB, config.record_device, config.record_log)
3976 if __name__ == "__main__":
3979 except Lustre.LconfError, e:
3981 # traceback.print_exc(file=sys.stdout)
3983 except CommandError, e:
3987 if first_cleanup_error:
3988 sys.exit(first_cleanup_error)