3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = '../portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
93 "console" : (1 << 25),
99 "undefined" : (1 << 0),
109 "portals" : (1 << 10),
111 "pinger" : (1 << 12),
112 "filter" : (1 << 13),
117 "ptlrouter" : (1 << 18),
121 "confobd" : (1 << 22),
128 first_cleanup_error = 0
129 def cleanup_error(rc):
130 global first_cleanup_error
131 if not first_cleanup_error:
132 first_cleanup_error = rc
134 # ============================================================
135 # debugging and error funcs
137 def fixme(msg = "this feature"):
138 raise Lustre.LconfError, msg + ' not implemented yet.'
141 msg = string.join(map(str,args))
142 if not config.noexec:
143 raise Lustre.LconfError(msg)
148 msg = string.join(map(str,args))
153 print string.strip(s)
157 msg = string.join(map(str,args))
160 # ack, python's builtin int() does not support '0x123' syntax.
161 # eval can do it, although what a hack!
165 return eval(s, {}, {})
168 except SyntaxError, e:
169 raise ValueError("not a number")
171 raise ValueError("not a number")
173 # ============================================================
174 # locally defined exceptions
175 class CommandError (exceptions.Exception):
176 def __init__(self, cmd_name, cmd_err, rc=None):
177 self.cmd_name = cmd_name
178 self.cmd_err = cmd_err
183 if type(self.cmd_err) == types.StringType:
185 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
187 print "! %s: %s" % (self.cmd_name, self.cmd_err)
188 elif type(self.cmd_err) == types.ListType:
190 print "! %s (error %d):" % (self.cmd_name, self.rc)
192 print "! %s:" % (self.cmd_name)
193 for s in self.cmd_err:
194 print "> %s" %(string.strip(s))
199 # ============================================================
200 # handle daemons, like the acceptor
202 """ Manage starting and stopping a daemon. Assumes daemon manages
203 it's own pid file. """
205 def __init__(self, cmd):
211 log(self.command, "already running.")
213 self.path = find_prog(self.command)
215 panic(self.command, "not found.")
216 ret, out = runcmd(self.path +' '+ self.command_line())
218 raise CommandError(self.path, out, ret)
222 pid = self.read_pidfile()
225 log ("killing process", pid)
228 log("was unable to find pid of " + self.command)
229 #time.sleep(1) # let daemon die
231 log("unable to kill", self.command, e)
233 log("unable to kill", self.command)
236 pid = self.read_pidfile()
242 log("was unable to find pid of " + self.command)
249 def read_pidfile(self):
251 fp = open(self.pidfile(), 'r')
261 def clean_pidfile(self):
262 """ Remove a stale pidfile """
263 log("removing stale pidfile:", self.pidfile())
265 os.unlink(self.pidfile())
267 log(self.pidfile(), e)
269 class AcceptorHandler(DaemonHandler):
270 def __init__(self, port, net_type):
271 DaemonHandler.__init__(self, "acceptor")
276 return "/var/run/%s-%d.pid" % (self.command, self.port)
278 def command_line(self):
279 return string.join(map(str,(self.flags, self.port)))
283 # start the acceptors
285 if config.lctl_dump or config.record:
287 for port in acceptors.keys():
288 daemon = acceptors[port]
289 if not daemon.running():
292 def run_one_acceptor(port):
293 if config.lctl_dump or config.record:
295 if acceptors.has_key(port):
296 daemon = acceptors[port]
297 if not daemon.running():
300 panic("run_one_acceptor: No acceptor defined for port:", port)
302 def stop_acceptor(port):
303 if acceptors.has_key(port):
304 daemon = acceptors[port]
309 # ============================================================
310 # handle lctl interface
313 Manage communication with lctl
316 def __init__(self, cmd):
318 Initialize close by finding the lctl binary.
320 self.lctl = find_prog(cmd)
322 self.record_device = ''
325 debug('! lctl not found')
328 raise CommandError('lctl', "unable to find lctl binary.")
330 def use_save_file(self, file):
331 self.save_file = file
333 def record(self, dev_name, logname):
334 log("Recording log", logname, "on", dev_name)
335 self.record_device = dev_name
336 self.record_log = logname
338 def end_record(self):
339 log("End recording log", self.record_log, "on", self.record_device)
340 self.record_device = None
341 self.record_log = None
343 def set_nonblock(self, fd):
344 fl = fcntl.fcntl(fd, F_GETFL)
345 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
350 the cmds are written to stdin of lctl
351 lctl doesn't return errors when run in script mode, so
353 should modify command line to accept multiple commands, or
354 create complex command line options
358 cmds = '\n dump ' + self.save_file + '\n' + cmds
359 elif self.record_device:
363 %s""" % (self.record_device, self.record_log, cmds)
365 debug("+", cmd_line, cmds)
366 if config.noexec: return (0, [])
368 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
369 child.tochild.write(cmds + "\n")
370 child.tochild.close()
371 # print "LCTL:", cmds
373 # From "Python Cookbook" from O'Reilly
374 outfile = child.fromchild
375 outfd = outfile.fileno()
376 self.set_nonblock(outfd)
377 errfile = child.childerr
378 errfd = errfile.fileno()
379 self.set_nonblock(errfd)
381 outdata = errdata = ''
384 ready = select.select([outfd,errfd],[],[]) # Wait for input
385 if outfd in ready[0]:
386 outchunk = outfile.read()
387 if outchunk == '': outeof = 1
388 outdata = outdata + outchunk
389 if errfd in ready[0]:
390 errchunk = errfile.read()
391 if errchunk == '': erreof = 1
392 errdata = errdata + errchunk
393 if outeof and erreof: break
394 # end of "borrowed" code
397 if os.WIFEXITED(ret):
398 rc = os.WEXITSTATUS(ret)
401 if rc or len(errdata):
402 raise CommandError(self.lctl, errdata, rc)
405 def runcmd(self, *args):
407 run lctl using the command line
409 cmd = string.join(map(str,args))
410 debug("+", self.lctl, cmd)
411 rc, out = run(self.lctl, cmd)
413 raise CommandError(self.lctl, out, rc)
416 def clear_log(self, dev, log):
417 """ clear an existing log """
422 quit """ % (dev, log)
425 def root_squash(self, name, uid, nid):
429 quit""" % (name, uid, nid)
432 def network(self, net, nid):
437 quit """ % (net, nid)
441 def add_interface(self, net, ip, netmask = ""):
442 """ add an interface """
446 quit """ % (net, ip, netmask)
449 # delete an interface
450 def del_interface(self, net, ip):
451 """ delete an interface """
458 # create a new connection
459 def add_uuid(self, net_type, uuid, nid):
460 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
463 def add_peer(self, net_type, nid, hostaddr, port):
464 if net_type in ('tcp','openib','ra') and not config.lctl_dump:
469 nid, hostaddr, port )
471 elif net_type in ('iib',) and not config.lctl_dump:
478 elif net_type in ('vib',) and not config.lctl_dump:
486 def connect(self, srv):
487 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
488 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
490 hostaddr = string.split(srv.hostaddr[0], '/')[0]
491 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
494 def recover(self, dev_name, new_conn):
497 recover %s""" %(dev_name, new_conn)
500 # add a route to a range
501 def add_route(self, net, gw, lo, hi):
509 except CommandError, e:
513 def del_route(self, net, gw, lo, hi):
518 quit """ % (net, gw, lo, hi)
521 # add a route to a host
522 def add_route_host(self, net, uuid, gw, tgt):
523 self.add_uuid(net, uuid, tgt)
531 except CommandError, e:
535 # add a route to a range
536 def del_route_host(self, net, uuid, gw, tgt):
542 quit """ % (net, gw, tgt)
546 def del_peer(self, net_type, nid, hostaddr):
547 if net_type in ('tcp',) and not config.lctl_dump:
551 del_peer %s %s single_share
555 elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump:
559 del_peer %s single_share
564 # disconnect one connection
565 def disconnect(self, srv):
566 self.del_uuid(srv.nid_uuid)
567 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
569 hostaddr = string.split(srv.hostaddr[0], '/')[0]
570 self.del_peer(srv.net_type, srv.nid, hostaddr)
572 def del_uuid(self, uuid):
580 def disconnectAll(self, net):
588 def attach(self, type, name, uuid):
591 quit""" % (type, name, uuid)
594 def detach(self, name):
601 def set_security(self, name, key, value):
605 quit""" % (name, key, value)
608 def setup(self, name, setup = ""):
612 quit""" % (name, setup)
615 def add_conn(self, name, conn_uuid):
619 quit""" % (name, conn_uuid)
622 def start(self, name, conf_name):
626 quit""" % (name, conf_name)
629 # create a new device with lctl
630 def newdev(self, type, name, uuid, setup = ""):
631 self.attach(type, name, uuid);
633 self.setup(name, setup)
634 except CommandError, e:
635 self.cleanup(name, uuid, 0)
639 def cleanup(self, name, uuid, force, failover = 0):
640 if failover: force = 1
646 quit""" % (name, ('', 'force')[force],
647 ('', 'failover')[failover])
651 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
652 stripe_sz, stripe_off, pattern, devlist = None):
655 lov_setup %s %d %d %d %s %s
656 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
660 # add an OBD to a LOV
661 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
663 lov_modify_tgts add %s %s %s %s
664 quit""" % (name, obd_uuid, index, gen)
668 def lmv_setup(self, name, uuid, desc_uuid, devlist):
672 quit""" % (name, uuid, desc_uuid, devlist)
675 # delete an OBD from a LOV
676 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
678 lov_modify_tgts del %s %s %s %s
679 quit""" % (name, obd_uuid, index, gen)
683 def deactivate(self, name):
691 def dump(self, dump_file):
694 quit""" % (dump_file)
697 # get list of devices
698 def device_list(self):
699 devices = '/proc/fs/lustre/devices'
701 if os.access(devices, os.R_OK):
703 fp = open(devices, 'r')
711 def lustre_version(self):
712 rc, out = self.runcmd('version')
716 def mount_option(self, profile, osc, mdc):
718 mount_option %s %s %s
719 quit""" % (profile, osc, mdc)
722 # delete mount options
723 def del_mount_option(self, profile):
729 def set_timeout(self, timeout):
735 def set_lustre_upcall(self, upcall):
740 # ============================================================
741 # Various system-level functions
742 # (ideally moved to their own module)
744 # Run a command and return the output and status.
745 # stderr is sent to /dev/null, could use popen3 to
746 # save it if necessary
749 if config.noexec: return (0, [])
750 f = os.popen(cmd + ' 2>&1')
760 cmd = string.join(map(str,args))
763 # Run a command in the background.
764 def run_daemon(*args):
765 cmd = string.join(map(str,args))
767 if config.noexec: return 0
768 f = os.popen(cmd + ' 2>&1')
776 # Determine full path to use for an external command
777 # searches dirname(argv[0]) first, then PATH
779 syspath = string.split(os.environ['PATH'], ':')
780 cmdpath = os.path.dirname(sys.argv[0])
781 syspath.insert(0, cmdpath);
783 syspath.insert(0, os.path.join(config.portals, 'utils/'))
785 prog = os.path.join(d,cmd)
786 if os.access(prog, os.X_OK):
790 # Recursively look for file starting at base dir
791 def do_find_file(base, mod):
792 fullname = os.path.join(base, mod)
793 if os.access(fullname, os.R_OK):
795 for d in os.listdir(base):
796 dir = os.path.join(base,d)
797 if os.path.isdir(dir):
798 module = do_find_file(dir, mod)
802 # is the path a block device?
809 return stat.S_ISBLK(s[stat.ST_MODE])
811 # find the journal device from mkfs options
817 while i < len(x) - 1:
818 if x[i] == '-J' and x[i+1].startswith('device='):
824 # build fs according to type
826 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
832 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
834 # devsize is in 1k, and fs block count is in 4k
835 block_cnt = devsize/4
837 if fstype in ('ext3', 'extN', 'ldiskfs'):
838 # ext3 journal size is in megabytes
839 # but don't set jsize if mkfsoptions indicates a separate journal device
840 if jsize == 0 and jdev(mkfsoptions) == '':
842 if not is_block(dev):
843 ret, out = runcmd("ls -l %s" %dev)
844 devsize = int(string.split(out[0])[4]) / 1024
846 # sfdisk works for symlink, hardlink, and realdev
847 ret, out = runcmd("sfdisk -s %s" %dev)
849 devsize = int(out[0])
851 # sfdisk -s will fail for too large block device,
852 # then, read the size of partition from /proc/partitions
854 # get the realpath of the device
855 # it may be the real device, such as /dev/hda7
856 # or the hardlink created via mknod for a device
857 if 'realpath' in dir(os.path):
858 real_dev = os.path.realpath(dev)
862 while os.path.islink(real_dev) and (link_count < 20):
863 link_count = link_count + 1
864 dev_link = os.readlink(real_dev)
865 if os.path.isabs(dev_link):
868 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
870 panic("Entountered too many symbolic links resolving block device:", dev)
872 # get the major and minor number of the realpath via ls
873 # it seems python(os.stat) does not return
874 # the st_rdev member of the stat structure
875 ret, out = runcmd("ls -l %s" %real_dev)
876 major = string.split(string.split(out[0])[4], ",")[0]
877 minor = string.split(out[0])[5]
879 # get the devsize from /proc/partitions with the major and minor number
880 ret, out = runcmd("cat /proc/partitions")
883 if string.split(line)[0] == major and string.split(line)[1] == minor:
884 devsize = int(string.split(line)[2])
887 if devsize > 1024 * 1024:
888 jsize = ((devsize / 102400) * 4)
891 if jsize: jopt = "-J size=%d" %(jsize,)
892 if isize: iopt = "-I %d" %(isize,)
893 mkfs = 'mkfs.ext2 -j -b 4096 '
894 if not isblock or config.force:
896 if jdev(mkfsoptions) != '':
897 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
899 jmkfs = jmkfs + '-F '
900 jmkfs = jmkfs + jdev(mkfsoptions)
901 (ret, out) = run (jmkfs)
903 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
904 elif fstype == 'reiserfs':
905 # reiserfs journal size is in blocks
906 if jsize: jopt = "--journal_size %d" %(jsize,)
907 mkfs = 'mkreiserfs -ff'
909 panic('unsupported fs type: ', fstype)
911 if config.mkfsoptions != None:
912 mkfs = mkfs + ' ' + config.mkfsoptions
913 if mkfsoptions != None:
914 mkfs = mkfs + ' ' + mkfsoptions
915 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
917 panic("Unable to build fs:", dev, string.join(out))
918 # enable hash tree indexing on fsswe
919 if fstype in ('ext3', 'extN', 'ldiskfs'):
920 htree = 'echo "feature FEATURE_C5" | debugfs -w'
921 (ret, out) = run (htree, dev)
923 panic("Unable to enable htree:", dev)
925 # some systems use /dev/loopN, some /dev/loop/N
929 if not os.access(loop + str(0), os.R_OK):
931 if not os.access(loop + str(0), os.R_OK):
932 panic ("can't access loop devices")
935 # find loop device assigned to the file
936 def find_assigned_loop(file):
938 for n in xrange(0, MAX_LOOP_DEVICES):
940 if os.access(dev, os.R_OK):
941 (stat, out) = run('losetup', dev)
942 if out and stat == 0:
943 m = re.search(r'\((.*)\)', out[0])
944 if m and file == m.group(1):
948 # find free loop device
949 def find_free_loop(file):
952 # find next free loop
953 for n in xrange(0, MAX_LOOP_DEVICES):
955 if os.access(dev, os.R_OK):
956 (stat, out) = run('losetup', dev)
961 # create file if necessary and assign the first free loop device
962 def init_loop(file, size, fstype, journal_size, inode_size,
963 mkfsoptions, reformat, autoformat, backfstype, backfile):
966 realfstype = backfstype
967 if is_block(backfile):
968 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
969 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
975 dev = find_assigned_loop(realfile)
977 print 'WARNING: file', realfile, 'already mapped to', dev
980 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
981 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
983 panic("Unable to create backing store:", realfile)
984 mkfs(realfile, size, realfstype, journal_size, inode_size,
985 mkfsoptions, isblock=0)
987 dev = find_free_loop(realfile)
989 print "attach " + realfile + " <-> " + dev
990 run('losetup', dev, realfile)
993 print "out of loop devices"
996 # undo loop assignment
997 def clean_loop(dev, fstype, backfstype, backdev):
1002 if not is_block(realfile):
1003 dev = find_assigned_loop(realfile)
1005 print "detach " + dev + " <-> " + realfile
1006 ret, out = run('losetup -d', dev)
1008 log('unable to clean loop device', dev, 'for file', realfile)
1011 # finilizes passed device
1012 def clean_dev(dev, fstype, backfstype, backdev):
1013 if fstype == 'smfs' or not is_block(dev):
1014 clean_loop(dev, fstype, backfstype, backdev)
1016 # determine if dev is formatted as a <fstype> filesystem
1017 def need_format(fstype, dev):
1018 # FIXME don't know how to implement this
1021 # initialize a block device if needed
1022 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
1023 inode_size, mkfsoptions, backfstype, backdev):
1027 if fstype == 'smfs' or not is_block(dev):
1028 dev = init_loop(dev, size, fstype, journal_size, inode_size,
1029 mkfsoptions, reformat, autoformat, backfstype, backdev)
1030 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
1031 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
1034 # panic("device:", dev,
1035 # "not prepared, and autoformat is not set.\n",
1036 # "Rerun with --reformat option to format ALL filesystems")
1041 """lookup IP address for an interface"""
1042 rc, out = run("/sbin/ifconfig", iface)
1045 addr = string.split(out[1])[1]
1046 ip = string.split(addr, ':')[1]
1049 def def_mount_options(fstype, target):
1050 """returns deafult mount options for passed fstype and target (mds, ost)"""
1051 if fstype == 'ext3' or fstype == 'ldiskfs':
1052 mountfsoptions = "errors=remount-ro"
1053 if target == 'ost' and sys_get_branch() == '2.4':
1054 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1055 return mountfsoptions
1058 def sys_get_elan_position_file():
1059 procfiles = ["/proc/elan/device0/position",
1060 "/proc/qsnet/elan4/device0/position",
1061 "/proc/qsnet/elan3/device0/position"]
1063 if os.access(p, os.R_OK):
1067 def sys_get_local_nid(net_type, wildcard, cluster_id):
1068 """Return the local nid."""
1070 if sys_get_elan_position_file():
1071 local = sys_get_local_address('elan', '*', cluster_id)
1073 local = sys_get_local_address(net_type, wildcard, cluster_id)
1076 def sys_get_local_address(net_type, wildcard, cluster_id):
1077 """Return the local address for the network type."""
1079 if net_type in ('tcp','openib','iib','vib','ra'):
1081 iface, star = string.split(wildcard, ':')
1082 local = if2addr(iface)
1084 panic ("unable to determine ip for:", wildcard)
1086 host = socket.gethostname()
1087 local = socket.gethostbyname(host)
1088 elif net_type == 'elan':
1089 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1090 f = sys_get_elan_position_file()
1092 panic ("unable to determine local Elan ID")
1095 lines = fp.readlines()
1099 if a[0] == 'NodeId':
1103 nid = my_int(cluster_id) + my_int(elan_id)
1104 local = "%d" % (nid)
1105 except ValueError, e:
1109 elif net_type == 'lo':
1110 fixme("automatic local address for loopback")
1111 elif net_type == 'gm':
1112 fixme("automatic local address for GM")
1116 def sys_get_branch():
1117 """Returns kernel release"""
1119 fp = open('/proc/sys/kernel/osrelease')
1120 lines = fp.readlines()
1124 version = string.split(l)
1125 a = string.split(version[0], '.')
1126 return a[0] + '.' + a[1]
1131 # XXX: instead of device_list, ask for $name and see what we get
1132 def is_prepared(name):
1133 """Return true if a device exists for the name"""
1134 if config.lctl_dump:
1136 if (config.noexec or config.record) and config.cleanup:
1139 # expect this format:
1140 # 1 UP ldlm ldlm ldlm_UUID 2
1141 out = lctl.device_list()
1143 if name == string.split(s)[3]:
1145 except CommandError, e:
1149 def net_is_prepared():
1150 """If the any device exists, then assume that all networking
1151 has been configured"""
1152 out = lctl.device_list()
1155 def fs_is_mounted(path):
1156 """Return true if path is a mounted lustre filesystem"""
1158 fp = open('/proc/mounts')
1159 lines = fp.readlines()
1163 if a[1] == path and a[2] == 'lustre_lite':
1169 def kmod_find(src_dir, dev_dir, modname):
1170 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1171 for modext in '.ko', '.o':
1172 module = modbase + modext
1174 if os.access(module, os.R_OK):
1180 def kmod_info(modname):
1181 """Returns reference count for passed module name."""
1183 fp = open('/proc/modules')
1184 lines = fp.readlines()
1187 # please forgive my tired fingers for this one
1188 ret = filter(lambda word, mod = modname: word[0] == mod,
1189 map(lambda line: string.split(line), lines))
1193 except Exception, e:
1197 """Presents kernel module"""
1198 def __init__(self, src_dir, dev_dir, name):
1199 self.src_dir = src_dir
1200 self.dev_dir = dev_dir
1203 # FIXME we ignore the failure of loading gss module, because we might
1204 # don't need it at all.
1207 log ('loading module:', self.name, 'srcdir',
1208 self.src_dir, 'devdir', self.dev_dir)
1210 module = kmod_find(self.src_dir, self.dev_dir,
1212 if not module and self.name != 'ptlrpcs_gss':
1213 panic('module not found:', self.name)
1214 (rc, out) = run('/sbin/insmod', module)
1216 if self.name == 'ptlrpcs_gss':
1217 print "Warning: not support gss security!"
1219 raise CommandError('insmod', out, rc)
1221 (rc, out) = run('/sbin/modprobe', self.name)
1223 if self.name == 'ptlrpcs_gss':
1224 print "Warning: not support gss security!"
1226 raise CommandError('modprobe', out, rc)
1230 log('unloading module:', self.name)
1231 (rc, out) = run('/sbin/rmmod', self.name)
1233 log('unable to unload module:', self.name +
1234 "(" + self.refcount() + ")")
1238 """Returns module info if any."""
1239 return kmod_info(self.name)
1242 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1249 """Returns module refcount."""
1256 """Returns 1 if module is used, otherwise 0 is returned."""
1262 if users and users != '(unused)' and users != '-':
1270 """Returns 1 if module is busy, otherwise 0 is returned."""
1271 if self.loaded() and (self.used() or self.refcount() != '0'):
1277 """Manage kernel modules"""
1278 def __init__(self, lustre_dir, portals_dir):
1279 self.lustre_dir = lustre_dir
1280 self.portals_dir = portals_dir
1281 self.kmodule_list = []
1283 def find_module(self, modname):
1284 """Find module by module name"""
1285 for mod in self.kmodule_list:
1286 if mod.name == modname:
1290 def add_portals_module(self, dev_dir, modname):
1291 """Append a module to list of modules to load."""
1293 mod = self.find_module(modname)
1295 mod = kmod(self.portals_dir, dev_dir, modname)
1296 self.kmodule_list.append(mod)
1298 def add_lustre_module(self, dev_dir, modname):
1299 """Append a module to list of modules to load."""
1301 mod = self.find_module(modname)
1303 mod = kmod(self.lustre_dir, dev_dir, modname)
1304 self.kmodule_list.append(mod)
1306 def load_modules(self):
1307 """Load all the modules in the list in the order they appear."""
1308 for mod in self.kmodule_list:
1309 if mod.loaded() and not config.noexec:
1313 def cleanup_modules(self):
1314 """Unload the modules in the list in reverse order."""
1315 rev = self.kmodule_list
1318 if (not mod.loaded() or mod.busy()) and not config.noexec:
1321 if mod.name == 'portals' and config.dump:
1322 lctl.dump(config.dump)
1325 # ============================================================
1326 # Classes to prepare and cleanup the various objects
1329 """ Base class for the rest of the modules. The default cleanup method is
1330 defined here, as well as some utilitiy funcs.
1332 def __init__(self, module_name, db):
1334 self.module_name = module_name
1335 self.name = self.db.getName()
1336 self.uuid = self.db.getUUID()
1340 def info(self, *args):
1341 msg = string.join(map(str,args))
1342 print self.module_name + ":", self.name, self.uuid, msg
1345 """ default cleanup, used for most modules """
1348 lctl.cleanup(self.name, self.uuid, config.force)
1349 except CommandError, e:
1350 log(self.module_name, "cleanup failed: ", self.name)
1354 def add_module(self, manager):
1355 """Adds all needed modules in the order they appear."""
1358 def safe_to_clean(self):
1361 def safe_to_clean_modules(self):
1362 return self.safe_to_clean()
1364 class Network(Module):
1365 def __init__(self,db):
1366 Module.__init__(self, 'NETWORK', db)
1367 self.net_type = self.db.get_val('nettype')
1368 self.nid = self.db.get_val('nid', '*')
1369 self.cluster_id = self.db.get_val('clusterid', "0")
1370 self.port = self.db.get_val_int('port', 0)
1373 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1375 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1376 self.generic_nid = 1
1377 debug("nid:", self.nid)
1379 self.generic_nid = 0
1381 self.nid_uuid = self.nid_to_uuid(self.nid)
1382 self.hostaddr = self.db.get_hostaddr()
1383 if len(self.hostaddr) == 0:
1384 self.hostaddr.append(self.nid)
1385 if '*' in self.hostaddr[0]:
1386 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1387 if not self.hostaddr[0]:
1388 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1389 debug("hostaddr:", self.hostaddr[0])
1391 def add_module(self, manager):
1392 manager.add_portals_module("libcfs", 'libcfs')
1393 manager.add_portals_module("portals", 'portals')
1395 if node_needs_router():
1396 manager.add_portals_module("router", 'kptlrouter')
1397 if self.net_type == 'tcp':
1398 manager.add_portals_module("knals/socknal", 'ksocknal')
1399 if self.net_type == 'elan':
1400 manager.add_portals_module("knals/qswnal", 'kqswnal')
1401 if self.net_type == 'gm':
1402 manager.add_portals_module("knals/gmnal", 'kgmnal')
1403 if self.net_type == 'openib':
1404 manager.add_portals_module("knals/openibnal", 'kopenibnal')
1405 if self.net_type == 'iib':
1406 manager.add_portals_module("knals/iibnal", 'kiibnal')
1407 if self.net_type == 'vib':
1408 self.add_portals_module("knals/vibnal", 'kvibnal')
1409 if self.net_type == 'lo':
1410 manager.add_portals_module("knals/lonal", 'klonal')
1411 if self.net_type == 'ra':
1412 manager.add_portals_module("knals/ranal", 'kranal')
1414 def nid_to_uuid(self, nid):
1415 return "NID_%s_UUID" %(nid,)
1418 if not config.record and net_is_prepared():
1420 self.info(self.net_type, self.nid, self.port)
1421 if not (config.record and self.generic_nid):
1422 lctl.network(self.net_type, self.nid)
1423 if self.net_type == 'tcp':
1425 for hostaddr in self.db.get_hostaddr():
1426 ip = string.split(hostaddr, '/')[0]
1427 if len(string.split(hostaddr, '/')) == 2:
1428 netmask = string.split(hostaddr, '/')[1]
1431 lctl.add_interface(self.net_type, ip, netmask)
1432 if self.net_type == 'elan':
1434 if self.port and node_is_router():
1435 run_one_acceptor(self.port)
1436 self.connect_peer_gateways()
1438 def connect_peer_gateways(self):
1439 for router in self.db.lookup_class('node'):
1440 if router.get_val_int('router', 0):
1441 for netuuid in router.get_networks():
1442 net = self.db.lookup(netuuid)
1444 if (gw.cluster_id == self.cluster_id and
1445 gw.net_type == self.net_type):
1446 if gw.nid != self.nid:
1449 def disconnect_peer_gateways(self):
1450 for router in self.db.lookup_class('node'):
1451 if router.get_val_int('router', 0):
1452 for netuuid in router.get_networks():
1453 net = self.db.lookup(netuuid)
1455 if (gw.cluster_id == self.cluster_id and
1456 gw.net_type == self.net_type):
1457 if gw.nid != self.nid:
1460 except CommandError, e:
1461 print "disconnect failed: ", self.name
1465 def safe_to_clean(self):
1466 return not net_is_prepared()
1469 self.info(self.net_type, self.nid, self.port)
1471 stop_acceptor(self.port)
1472 if node_is_router():
1473 self.disconnect_peer_gateways()
1474 if self.net_type == 'tcp':
1475 for hostaddr in self.db.get_hostaddr():
1476 ip = string.split(hostaddr, '/')[0]
1477 lctl.del_interface(self.net_type, ip)
1479 def correct_level(self, level, op=None):
1482 class RouteTable(Module):
1483 def __init__(self,db):
1484 Module.__init__(self, 'ROUTES', db)
1486 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1488 # only setup connections for tcp, openib, and iib NALs
1490 if not net_type in ('tcp','openib','iib','vib','ra'):
1493 # connect to target if route is to single node and this node is the gw
1494 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1495 if not local_cluster(net_type, tgt_cluster_id):
1496 panic("target", lo, " not on the local cluster")
1497 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1498 # connect to gateway if this node is not the gw
1499 elif (local_cluster(net_type, gw_cluster_id)
1500 and not local_interface(net_type, gw_cluster_id, gw)):
1501 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1506 panic("no server for nid", lo)
1509 return Network(srvdb)
1512 if not config.record and net_is_prepared():
1515 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1516 lctl.add_route(net_type, gw, lo, hi)
1517 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1521 def safe_to_clean(self):
1522 return not net_is_prepared()
1525 if net_is_prepared():
1526 # the network is still being used, don't clean it up
1528 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1529 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1532 lctl.disconnect(srv)
1533 except CommandError, e:
1534 print "disconnect failed: ", self.name
1539 lctl.del_route(net_type, gw, lo, hi)
1540 except CommandError, e:
1541 print "del_route failed: ", self.name
1545 class Management(Module):
1546 def __init__(self, db):
1547 Module.__init__(self, 'MGMT', db)
1549 def add_module(self, manager):
1550 manager.add_lustre_module('lvfs', 'lvfs')
1551 manager.add_lustre_module('obdclass', 'obdclass')
1552 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1553 manager.add_lustre_module('mgmt', 'mgmt_svc')
1556 if not config.record and is_prepared(self.name):
1559 lctl.newdev("mgmt", self.name, self.uuid)
1561 def safe_to_clean(self):
1565 if is_prepared(self.name):
1566 Module.cleanup(self)
1568 def correct_level(self, level, op=None):
1571 # This is only needed to load the modules; the LDLM device
1572 # is now created automatically.
1574 def __init__(self,db):
1575 Module.__init__(self, 'LDLM', db)
1577 def add_module(self, manager):
1578 manager.add_lustre_module('lvfs', 'lvfs')
1579 manager.add_lustre_module('obdclass', 'obdclass')
1580 manager.add_lustre_module('sec', 'ptlrpcs')
1581 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1582 manager.add_lustre_module('sec/gss', 'ptlrpcs_gss')
1590 def correct_level(self, level, op=None):
1594 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1595 Module.__init__(self, 'LOV', db)
1596 if name_override != None:
1597 self.name = "lov_%s" % name_override
1598 self.mds_uuid = self.db.get_first_ref('mds')
1599 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1600 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1601 self.pattern = self.db.get_val_int('stripepattern', 0)
1602 self.devlist = self.db.get_lov_tgts('lov_tgt')
1603 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1606 self.desc_uuid = self.uuid
1607 self.uuid = generate_client_uuid(self.name)
1608 self.fs_name = fs_name
1610 self.config_only = 1
1612 self.config_only = None
1613 mds = self.db.lookup(self.mds_uuid)
1614 self.mds_name = mds.getName()
1615 for (obd_uuid, index, gen, active) in self.devlist:
1618 self.obdlist.append(obd_uuid)
1619 obd = self.db.lookup(obd_uuid)
1620 osc = get_osc(obd, self.uuid, fs_name)
1622 self.osclist.append((osc, index, gen, active))
1624 panic('osc not found:', obd_uuid)
1630 if not config.record and is_prepared(self.name):
1632 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1633 self.stripe_off, self.pattern, self.devlist,
1635 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1636 self.stripe_sz, self.stripe_off, self.pattern,
1637 string.join(self.obdlist))
1638 for (osc, index, gen, active) in self.osclist:
1639 target_uuid = osc.target_uuid
1641 # Only ignore connect failures with --force, which
1642 # isn't implemented here yet.
1644 osc.prepare(ignore_connect_failure=0)
1645 except CommandError, e:
1646 print "Error preparing OSC %s\n" % osc.uuid
1648 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1651 for (osc, index, gen, active) in self.osclist:
1652 target_uuid = osc.target_uuid
1654 if is_prepared(self.name):
1655 Module.cleanup(self)
1656 if self.config_only:
1657 panic("Can't clean up config_only LOV ", self.name)
1659 def add_module(self, manager):
1660 if self.config_only:
1661 panic("Can't load modules for config_only LOV ", self.name)
1662 for (osc, index, gen, active) in self.osclist:
1663 osc.add_module(manager)
1665 manager.add_lustre_module('lov', 'lov')
1667 def correct_level(self, level, op=None):
1671 def __init__(self, db, uuid, fs_name, name_override = None):
1672 Module.__init__(self, 'LMV', db)
1673 if name_override != None:
1674 self.name = "lmv_%s" % name_override
1676 self.devlist = self.db.get_lmv_tgts('lmv_tgt')
1677 if self.devlist == None:
1678 self.devlist = self.db.get_refs('mds')
1681 self.desc_uuid = self.uuid
1683 self.fs_name = fs_name
1684 for mds_uuid in self.devlist:
1685 mds = self.db.lookup(mds_uuid)
1687 panic("MDS not found!")
1688 mdc = MDC(mds, self.uuid, fs_name)
1690 self.mdclist.append(mdc)
1692 panic('mdc not found:', mds_uuid)
1695 if is_prepared(self.name):
1699 for mdc in self.mdclist:
1701 # Only ignore connect failures with --force, which
1702 # isn't implemented here yet.
1703 mdc.prepare(ignore_connect_failure=0)
1704 except CommandError, e:
1705 print "Error preparing LMV %s\n" % mdc.uuid
1708 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1709 string.join(self.devlist))
1712 for mdc in self.mdclist:
1714 if is_prepared(self.name):
1715 Module.cleanup(self)
1717 def add_module(self, manager):
1718 for mdc in self.mdclist:
1719 mdc.add_module(manager)
1721 manager.add_lustre_module('lmv', 'lmv')
1723 def correct_level(self, level, op=None):
1726 class CONFDEV(Module):
1727 def __init__(self, db, name, target_uuid, uuid):
1728 Module.__init__(self, 'CONFDEV', db)
1729 self.devpath = self.db.get_val('devpath','')
1730 self.backdevpath = self.db.get_val('devpath','')
1731 self.size = self.db.get_val_int('devsize', 0)
1732 self.journal_size = self.db.get_val_int('journalsize', 0)
1733 self.fstype = self.db.get_val('fstype', '')
1734 self.backfstype = self.db.get_val('backfstype', '')
1735 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1736 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1737 self.target = self.db.lookup(target_uuid)
1738 self.name = "conf_%s" % self.target.getName()
1739 self.client_uuids = self.target.get_refs('client')
1740 self.obdtype = self.db.get_val('obdtype', '')
1742 if self.obdtype == None:
1743 self.obdtype = 'dumb'
1745 self.conf_name = name
1746 self.conf_uuid = uuid
1747 self.realdev = self.devpath
1752 lmv_uuid = self.db.get_first_ref('lmv')
1753 if lmv_uuid != None:
1754 self.lmv = self.db.lookup(lmv_uuid)
1755 if self.lmv != None:
1756 self.client_uuids = self.lmv.get_refs('client')
1758 if self.target.get_class() == 'mds':
1759 if self.target.get_val('failover', 0):
1760 self.failover_mds = 'f'
1762 self.failover_mds = 'n'
1763 self.format = self.db.get_val('autoformat', "no")
1765 self.format = self.db.get_val('autoformat', "yes")
1766 self.osdtype = self.db.get_val('osdtype')
1767 ost = self.db.lookup(target_uuid)
1768 if ost.get_val('failover', 0):
1769 self.failover_ost = 'f'
1771 self.failover_ost = 'n'
1773 self.inode_size = self.get_inode_size()
1775 if self.lmv != None:
1776 client_uuid = self.name + "_lmv_UUID"
1777 self.master = LMV(self.lmv, client_uuid,
1778 self.conf_name, self.conf_name)
1780 def get_inode_size(self):
1781 inode_size = self.db.get_val_int('inodesize', 0)
1782 if inode_size == 0 and self.target.get_class() == 'mds':
1784 # default inode size for case when neither LOV either
1785 # LMV is accessible.
1786 self.inode_size = 256
1788 # find the LOV for this MDS
1789 lovconfig_uuid = self.target.get_first_ref('lovconfig')
1790 if lovconfig_uuid or self.lmv != None:
1791 if self.lmv != None:
1792 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1793 lovconfig = self.lmv.lookup(lovconfig_uuid)
1794 lov_uuid = lovconfig.get_first_ref('lov')
1795 if lov_uuid == None:
1796 panic(self.target.getName() + ": No LOV found for lovconfig ",
1799 lovconfig = self.target.lookup(lovconfig_uuid)
1800 lov_uuid = lovconfig.get_first_ref('lov')
1801 if lov_uuid == None:
1802 panic(self.target.getName() + ": No LOV found for lovconfig ",
1804 if self.lmv != None:
1805 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1806 lovconfig = self.lmv.lookup(lovconfig_uuid)
1807 lov_uuid = lovconfig.get_first_ref('lov')
1809 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, self.name,
1812 # default stripe count controls default inode_size
1813 if lov.stripe_cnt > 0:
1814 stripe_count = lov.stripe_cnt
1816 stripe_count = len(lov.devlist)
1817 if stripe_count > 77:
1819 elif stripe_count > 35:
1821 elif stripe_count > 13:
1823 elif stripe_count > 3:
1830 def get_mount_options(self, blkdev):
1831 options = def_mount_options(self.fstype,
1832 self.target.get_class())
1834 if config.mountfsoptions:
1836 options = "%s,%s" %(options, config.mountfsoptions)
1838 options = config.mountfsoptions
1839 if self.mountfsoptions:
1840 options = "%s,%s" %(options, self.mountfsoptions)
1842 if self.mountfsoptions:
1844 options = "%s,%s" %(options, self.mountfsoptions)
1846 options = self.mountfsoptions
1848 if self.fstype == 'smfs':
1850 options = "%s,type=%s,dev=%s" %(options, self.backfstype,
1853 options = "type=%s,dev=%s" %(self.backfstype,
1856 if self.target.get_class() == 'mds':
1858 options = "%s,acl,user_xattr,iopen_nopriv" %(options)
1860 options = "iopen_nopriv"
1865 if is_prepared(self.name):
1868 blkdev = block_dev(self.devpath, self.size, self.fstype,
1869 config.reformat, self.format, self.journal_size,
1870 self.inode_size, self.mkfsoptions, self.backfstype,
1873 if self.fstype == 'smfs':
1878 mountfsoptions = self.get_mount_options(blkdev)
1880 self.info(self.target.get_class(), realdev, mountfsoptions,
1881 self.fstype, self.size, self.format)
1883 lctl.newdev("confobd", self.name, self.uuid,
1884 setup ="%s %s %s" %(realdev, self.fstype,
1887 self.mountfsoptions = mountfsoptions
1888 self.realdev = realdev
1890 def add_module(self, manager):
1891 manager.add_lustre_module('obdclass', 'confobd')
1893 def write_conf(self):
1894 if self.target.get_class() == 'ost':
1896 lctl.clear_log(self.name, self.target.getName() + '-conf')
1897 lctl.record(self.name, self.target.getName() + '-conf')
1898 lctl.newdev(self.osdtype, self.conf_name, self.conf_uuid,
1899 setup ="%s %s %s %s" %(self.realdev, self.fstype,
1901 self.mountfsoptions))
1903 lctl.clear_log(self.name, 'OSS-conf')
1904 lctl.record(self.name, 'OSS-conf')
1905 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
1910 if self.target.get_class() == 'mds':
1911 if self.master != None:
1912 master_name = self.master.name
1914 master_name = 'dumb'
1917 lctl.clear_log(self.name, self.target.getName() + '-conf')
1918 lctl.record(self.name, self.target.getName() + '-conf')
1919 lctl.newdev("mds", self.conf_name, self.conf_uuid,
1920 setup ="%s %s %s %s %s %s" %(self.realdev, self.fstype,
1921 self.conf_name, self.mountfsoptions,
1922 master_name, self.obdtype))
1926 if not self.client_uuids:
1929 for uuid in self.client_uuids:
1930 log("recording client:", uuid)
1931 client_uuid = generate_client_uuid(self.name)
1932 client = VOSC(self.db.lookup(uuid), client_uuid,
1933 self.target.getName(), self.name)
1935 lctl.clear_log(self.name, self.target.getName())
1936 lctl.record(self.name, self.target.getName())
1938 lctl.mount_option(self.target.getName(), client.get_name(), "")
1942 lctl.clear_log(self.name, self.target.getName() + '-clean')
1943 lctl.record(self.name, self.target.getName() + '-clean')
1945 lctl.del_mount_option(self.target.getName())
1953 # record logs for each client
1955 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
1957 config_options = CONFIG_FILE
1959 for node_db in self.db.lookup_class('node'):
1960 client_name = node_db.getName()
1961 for prof_uuid in node_db.get_refs('profile'):
1962 prof_db = node_db.lookup(prof_uuid)
1963 # refactor this into a funtion to test "clientness"
1965 for ref_class, ref_uuid in prof_db.get_all_refs():
1966 if ref_class in ('mountpoint','echoclient'):
1967 debug("recording", client_name)
1968 old_noexec = config.noexec
1970 noexec_opt = ('', '-n')
1971 ret, out = run (sys.argv[0],
1972 noexec_opt[old_noexec == 1],
1973 " -v --record --nomod",
1974 "--record_log", client_name,
1975 "--record_device", self.name,
1976 "--node", client_name,
1979 for s in out: log("record> ", string.strip(s))
1980 ret, out = run (sys.argv[0],
1981 noexec_opt[old_noexec == 1],
1982 "--cleanup -v --record --nomod",
1983 "--record_log", client_name + "-clean",
1984 "--record_device", self.name,
1985 "--node", client_name,
1988 for s in out: log("record> ", string.strip(s))
1989 config.noexec = old_noexec
1993 lctl.start(self.name, self.conf_name)
1994 except CommandError, e:
1996 if self.target.get_class() == 'ost':
1997 if not is_prepared('OSS'):
1999 lctl.start(self.name, 'OSS')
2000 except CommandError, e:
2004 if is_prepared(self.name):
2006 lctl.cleanup(self.name, self.uuid, 0, 0)
2007 clean_dev(self.devpath, self.fstype,
2008 self.backfstype, self.backdevpath)
2009 except CommandError, e:
2010 log(self.module_name, "cleanup failed: ", self.name)
2013 Module.cleanup(self)
2015 class MDSDEV(Module):
2016 def __init__(self,db):
2017 Module.__init__(self, 'MDSDEV', db)
2018 self.devpath = self.db.get_val('devpath','')
2019 self.backdevpath = self.db.get_val('devpath','')
2020 self.size = self.db.get_val_int('devsize', 0)
2021 self.journal_size = self.db.get_val_int('journalsize', 0)
2022 self.fstype = self.db.get_val('fstype', '')
2023 self.backfstype = self.db.get_val('backfstype', '')
2024 self.nspath = self.db.get_val('nspath', '')
2025 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2026 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2027 self.obdtype = self.db.get_val('obdtype', '')
2028 self.root_squash = self.db.get_val('root_squash', '')
2029 self.no_root_squash = self.db.get_val('no_root_squash', '')
2031 target_uuid = self.db.get_first_ref('target')
2032 self.target = self.db.lookup(target_uuid)
2033 self.name = self.target.getName()
2037 lmv_uuid = self.db.get_first_ref('lmv')
2038 if lmv_uuid != None:
2039 self.lmv = self.db.lookup(lmv_uuid)
2041 active_uuid = get_active_target(self.target)
2043 panic("No target device found:", target_uuid)
2044 if active_uuid == self.uuid:
2046 group = self.target.get_val('group')
2047 if config.group and config.group != group:
2052 self.uuid = target_uuid
2055 if self.lmv != None:
2056 client_uuid = self.name + "_lmv_UUID"
2057 self.master = LMV(self.lmv, client_uuid,
2058 self.name, self.name)
2060 self.confobd = CONFDEV(self.db, self.name,
2061 target_uuid, self.uuid)
2063 def add_module(self, manager):
2065 manager.add_lustre_module('mdc', 'mdc')
2066 manager.add_lustre_module('osc', 'osc')
2067 manager.add_lustre_module('ost', 'ost')
2068 manager.add_lustre_module('lov', 'lov')
2069 manager.add_lustre_module('mds', 'mds')
2071 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2072 manager.add_lustre_module(self.fstype, self.fstype)
2075 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
2077 # if fstype is smfs, then we should also take care about backing
2079 if self.fstype == 'smfs':
2080 manager.add_lustre_module(self.backfstype, self.backfstype)
2081 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
2083 for option in string.split(self.mountfsoptions, ','):
2084 if option == 'snap':
2085 if not self.fstype == 'smfs':
2086 panic("mountoptions has 'snap', but fstype is not smfs.")
2087 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2088 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2091 if self.master != None:
2092 self.master.add_module(manager)
2094 # add CONFOBD modules
2095 if self.confobd != None:
2096 self.confobd.add_module(manager)
2098 def write_conf(self):
2099 if is_prepared(self.name):
2102 debug(self.uuid, "not active")
2105 self.confobd.prepare()
2106 self.confobd.write_conf()
2107 self.confobd.cleanup()
2110 if is_prepared(self.name):
2113 debug(self.uuid, "not active")
2117 self.confobd.prepare()
2119 self.confobd.write_conf()
2122 if self.master != None:
2123 self.master.prepare()
2125 lctl.attach("mds", self.name, self.uuid)
2126 if config.mds_mds_sec:
2127 lctl.set_security(self.name, "mds_mds_sec", config.mds_mds_sec)
2128 if config.mds_ost_sec:
2129 lctl.set_security(self.name, "mds_ost_sec", config.mds_ost_sec)
2130 lctl.detach(self.name)
2132 if not config.record:
2133 self.confobd.start()
2135 if not is_prepared('MDT'):
2136 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
2138 if development_mode():
2139 procentry = "/proc/fs/lustre/mds/lsd_upcall"
2140 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
2141 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
2142 print "MDS Warning: failed to set lsd cache upcall"
2144 run("echo ", upcall, " > ", procentry)
2146 if config.root_squash == None:
2147 config.root_squash = self.root_squash
2148 if config.no_root_squash == None:
2149 config.no_root_squash = self.no_root_squash
2150 if config.root_squash:
2151 if config.no_root_squash:
2152 nsnid = config.no_root_squash
2155 lctl.root_squash(self.name, config.root_squash, nsnid)
2157 def msd_remaining(self):
2158 out = lctl.device_list()
2160 if string.split(s)[2] in ('mds',):
2163 def safe_to_clean(self):
2166 def safe_to_clean_modules(self):
2167 return not self.msd_remaining()
2171 debug(self.uuid, "not active")
2174 if is_prepared(self.name):
2176 lctl.cleanup(self.name, self.uuid, config.force,
2178 except CommandError, e:
2179 log(self.module_name, "cleanup failed: ", self.name)
2182 Module.cleanup(self)
2184 if self.master != None:
2185 self.master.cleanup()
2186 if not self.msd_remaining() and is_prepared('MDT'):
2188 lctl.cleanup("MDT", "MDT_UUID", config.force,
2190 except CommandError, e:
2191 print "cleanup failed: ", self.name
2196 self.confobd.cleanup()
2198 def correct_level(self, level, op=None):
2199 #if self.master != None:
2204 def __init__(self, db):
2205 Module.__init__(self, 'OSD', db)
2206 self.osdtype = self.db.get_val('osdtype')
2207 self.devpath = self.db.get_val('devpath', '')
2208 self.backdevpath = self.db.get_val('devpath', '')
2209 self.size = self.db.get_val_int('devsize', 0)
2210 self.journal_size = self.db.get_val_int('journalsize', 0)
2211 self.inode_size = self.db.get_val_int('inodesize', 0)
2212 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2213 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2214 self.fstype = self.db.get_val('fstype', '')
2215 self.backfstype = self.db.get_val('backfstype', '')
2216 self.nspath = self.db.get_val('nspath', '')
2217 target_uuid = self.db.get_first_ref('target')
2218 ost = self.db.lookup(target_uuid)
2219 self.name = ost.getName()
2220 self.format = self.db.get_val('autoformat', 'yes')
2221 if ost.get_val('failover', 0):
2222 self.failover_ost = 'f'
2224 self.failover_ost = 'n'
2226 active_uuid = get_active_target(ost)
2228 panic("No target device found:", target_uuid)
2229 if active_uuid == self.uuid:
2231 group = ost.get_val('group')
2232 if config.group and config.group != group:
2237 self.uuid = target_uuid
2238 self.confobd = CONFDEV(self.db, self.name,
2239 target_uuid, self.uuid)
2241 def add_module(self, manager):
2244 manager.add_lustre_module('ost', 'ost')
2246 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2247 manager.add_lustre_module(self.fstype, self.fstype)
2250 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2252 if self.fstype == 'smfs':
2253 manager.add_lustre_module(self.backfstype, self.backfstype)
2254 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2256 for option in self.mountfsoptions:
2257 if option == 'snap':
2258 if not self.fstype == 'smfs':
2259 panic("mountoptions with snap, but fstype is not smfs\n")
2260 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2261 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2263 manager.add_lustre_module(self.osdtype, self.osdtype)
2265 # add CONFOBD modules
2266 if self.confobd != None:
2267 self.confobd.add_module(manager)
2270 if is_prepared(self.name):
2273 debug(self.uuid, "not active")
2277 if self.osdtype == 'obdecho':
2278 self.info(self.osdtype)
2279 lctl.newdev("obdecho", self.name, self.uuid)
2280 if not is_prepared('OSS'):
2281 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup="")
2283 self.confobd.prepare()
2285 self.confobd.write_conf()
2286 if not config.record:
2287 self.confobd.start()
2289 def write_conf(self):
2290 if is_prepared(self.name):
2293 debug(self.uuid, "not active")
2297 if self.osdtype != 'obdecho':
2298 self.confobd.prepare()
2299 self.confobd.write_conf()
2300 if not config.write_conf:
2301 self.confobd.start()
2302 self.confobd.cleanup()
2304 def osd_remaining(self):
2305 out = lctl.device_list()
2307 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2310 def safe_to_clean(self):
2313 def safe_to_clean_modules(self):
2314 return not self.osd_remaining()
2318 debug(self.uuid, "not active")
2321 if is_prepared(self.name):
2324 lctl.cleanup(self.name, self.uuid, config.force,
2326 except CommandError, e:
2327 log(self.module_name, "cleanup failed: ", self.name)
2330 if not self.osd_remaining() and is_prepared('OSS'):
2332 lctl.cleanup("OSS", "OSS_UUID", config.force,
2334 except CommandError, e:
2335 print "cleanup failed: ", self.name
2339 if self.osdtype != 'obdecho':
2341 self.confobd.cleanup()
2343 def correct_level(self, level, op=None):
2346 # Generic client module, used by OSC and MDC
2347 class Client(Module):
2348 def __init__(self, tgtdb, uuid, module, fs_name,
2349 self_name=None, module_dir=None):
2350 self.target_name = tgtdb.getName()
2351 self.target_uuid = tgtdb.getUUID()
2352 self.module_dir = module_dir
2353 self.backup_targets = []
2354 self.module = module
2357 self.tgt_dev_uuid = get_active_target(tgtdb)
2358 if not self.tgt_dev_uuid:
2359 panic("No target device found for target(1):", self.target_name)
2364 self.module = module
2365 self.module_name = string.upper(module)
2367 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2368 self.target_name, fs_name)
2370 self.name = self_name
2372 self.lookup_server(self.tgt_dev_uuid)
2373 self.lookup_backup_targets()
2374 self.fs_name = fs_name
2375 if not self.module_dir:
2376 self.module_dir = module
2378 def add_module(self, manager):
2379 manager.add_lustre_module(self.module_dir, self.module)
2381 def lookup_server(self, srv_uuid):
2382 """ Lookup a server's network information """
2383 self._server_nets = get_ost_net(self.db, srv_uuid)
2384 if len(self._server_nets) == 0:
2385 panic ("Unable to find a server for:", srv_uuid)
2390 def get_servers(self):
2391 return self._server_nets
2393 def lookup_backup_targets(self):
2394 """ Lookup alternative network information """
2395 prof_list = toplustreDB.get_refs('profile')
2396 for prof_uuid in prof_list:
2397 prof_db = toplustreDB.lookup(prof_uuid)
2399 panic("profile:", prof_uuid, "not found.")
2400 for ref_class, ref_uuid in prof_db.get_all_refs():
2401 if ref_class in ('osd', 'mdsdev'):
2402 devdb = toplustreDB.lookup(ref_uuid)
2403 uuid = devdb.get_first_ref('target')
2404 if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
2405 self.backup_targets.append(ref_uuid)
2407 def prepare(self, ignore_connect_failure = 0):
2408 self.info(self.target_uuid)
2409 if not config.record and is_prepared(self.name):
2412 srv = choose_local_server(self.get_servers())
2416 routes = find_route(self.get_servers())
2417 if len(routes) == 0:
2418 panic ("no route to", self.target_uuid)
2419 for (srv, r) in routes:
2420 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2421 except CommandError, e:
2422 if not ignore_connect_failure:
2426 if self.target_uuid in config.inactive and self.permits_inactive():
2427 debug("%s inactive" % self.target_uuid)
2428 inactive_p = "inactive"
2430 debug("%s active" % self.target_uuid)
2432 lctl.newdev(self.module, self.name, self.uuid,
2433 setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
2435 for tgt_dev_uuid in self.backup_targets:
2436 this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
2437 if len(this_nets) == 0:
2438 panic ("Unable to find a server for:", tgt_dev_uuid)
2439 srv = choose_local_server(this_nets)
2443 routes = find_route(this_nets);
2444 if len(routes) == 0:
2445 panic("no route to", tgt_dev_uuid)
2446 for (srv, r) in routes:
2447 lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2449 lctl.add_conn(self.name, srv.nid_uuid);
2452 if is_prepared(self.name):
2453 Module.cleanup(self)
2455 srv = choose_local_server(self.get_servers())
2457 lctl.disconnect(srv)
2459 for (srv, r) in find_route(self.get_servers()):
2460 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2461 except CommandError, e:
2462 log(self.module_name, "cleanup failed: ", self.name)
2466 for tgt_dev_uuid in self.backup_targets:
2467 this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
2468 srv = choose_local_server(this_net)
2470 lctl.disconnect(srv)
2472 for (srv, r) in find_route(this_net):
2473 lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2475 def correct_level(self, level, op=None):
2478 def deactivate(self):
2480 lctl.deactivate(self.name)
2481 except CommandError, e:
2482 log(self.module_name, "deactivate failed: ", self.name)
2487 def __init__(self, db, uuid, fs_name):
2488 Client.__init__(self, db, uuid, 'mdc', fs_name)
2490 def permits_inactive(self):
2494 def __init__(self, db, uuid, fs_name):
2495 Client.__init__(self, db, uuid, 'osc', fs_name)
2497 def permits_inactive(self):
2500 class CMOBD(Module):
2501 def __init__(self, db):
2502 Module.__init__(self, 'CMOBD', db)
2503 self.name = self.db.getName();
2504 self.uuid = generate_client_uuid(self.name)
2505 self.master_uuid = self.db.get_first_ref('masterobd')
2506 self.cache_uuid = self.db.get_first_ref('cacheobd')
2508 master_obd = self.db.lookup(self.master_uuid)
2510 panic('master obd not found:', self.master_uuid)
2512 cache_obd = self.db.lookup(self.cache_uuid)
2514 panic('cache obd not found:', self.cache_uuid)
2519 master_class = master_obd.get_class()
2520 cache_class = cache_obd.get_class()
2522 if master_class == 'ost' or master_class == 'lov':
2523 client_uuid = "%s_lov_master_UUID" % (self.name)
2524 self.master = LOV(master_obd, client_uuid, self.name);
2525 elif master_class == 'mds':
2526 self.master = get_mdc(db, self.name, self.master_uuid)
2527 elif master_class == 'lmv':
2528 #tmp fix: cobd and cmobd will use same uuid, so use const name here
2529 client_uuid = "%s_lmv_master_UUID" % "master"
2530 self.master = LMV(master_obd, client_uuid, self.name);
2532 panic("unknown master obd class '%s'" %(master_class))
2534 if cache_class == 'ost' or cache_class == 'lov':
2535 client_uuid = "%s_lov_cache_UUID" % (self.name)
2536 self.cache = LOV(cache_obd, client_uuid, self.name);
2537 elif cache_class == 'mds':
2538 self.cache = get_mdc(db, self.name, self.cache_uuid)
2539 elif cache_class == 'lmv':
2540 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2541 self.cache = LMV(cache_obd, client_uuid, self.name);
2543 panic("unknown cache obd class '%s'" %(cache_class))
2546 self.master.prepare()
2547 if not config.record and is_prepared(self.name):
2549 self.info(self.master_uuid, self.cache_uuid)
2550 lctl.newdev("cmobd", self.name, self.uuid,
2551 setup ="%s %s" %(self.master.uuid,
2560 def get_master_name(self):
2561 return self.master.name
2563 def get_cache_name(self):
2564 return self.cache.name
2567 if is_prepared(self.name):
2568 Module.cleanup(self)
2570 self.master.cleanup()
2572 def add_module(self, manager):
2573 manager.add_lustre_module('cmobd', 'cmobd')
2574 self.master.add_module(manager)
2576 def correct_level(self, level, op=None):
2580 def __init__(self, db, uuid, name):
2581 Module.__init__(self, 'COBD', db)
2582 self.name = self.db.getName();
2583 self.uuid = generate_client_uuid(self.name)
2584 self.master_uuid = self.db.get_first_ref('masterobd')
2585 self.cache_uuid = self.db.get_first_ref('cacheobd')
2587 master_obd = self.db.lookup(self.master_uuid)
2589 panic('master obd not found:', self.master_uuid)
2591 cache_obd = self.db.lookup(self.cache_uuid)
2593 panic('cache obd not found:', self.cache_uuid)
2598 master_class = master_obd.get_class()
2599 cache_class = cache_obd.get_class()
2601 if master_class == 'ost' or master_class == 'lov':
2602 client_uuid = "%s_lov_master_UUID" % (self.name)
2603 self.master = LOV(master_obd, client_uuid, name);
2604 elif master_class == 'mds':
2605 self.master = get_mdc(db, name, self.master_uuid)
2606 elif master_class == 'lmv':
2607 #tmp fix: cobd and cmobd will use same uuid, so use const name here
2608 client_uuid = "%s_lmv_master_UUID" % "master"
2609 self.master = LMV(master_obd, client_uuid, self.name);
2611 panic("unknown master obd class '%s'" %(master_class))
2613 if cache_class == 'ost' or cache_class == 'lov':
2614 client_uuid = "%s_lov_cache_UUID" % (self.name)
2615 self.cache = LOV(cache_obd, client_uuid, name);
2616 elif cache_class == 'mds':
2617 self.cache = get_mdc(db, name, self.cache_uuid)
2618 elif cache_class == 'lmv':
2619 client_uuid = "%s_lmv_cache_UUID" % "cache"
2620 self.cache = LMV(cache_obd, client_uuid, self.name);
2622 panic("unknown cache obd class '%s'" %(cache_class))
2630 def get_master_name(self):
2631 return self.master.name
2633 def get_cache_name(self):
2634 return self.cache.name
2637 self.master.prepare()
2638 self.cache.prepare()
2639 if not config.record and is_prepared(self.name):
2641 self.info(self.master_uuid, self.cache_uuid)
2642 lctl.newdev("cobd", self.name, self.uuid,
2643 setup ="%s %s" %(self.master.name,
2647 if is_prepared(self.name):
2648 Module.cleanup(self)
2649 self.master.cleanup()
2650 self.cache.cleanup()
2652 def add_module(self, manager):
2653 manager.add_lustre_module('cobd', 'cobd')
2654 self.master.add_module(manager)
2656 # virtual interface for OSC and LOV
2658 def __init__(self, db, client_uuid, name, name_override = None):
2659 Module.__init__(self, 'VOSC', db)
2660 if db.get_class() == 'lov':
2661 self.osc = LOV(db, client_uuid, name, name_override)
2663 elif db.get_class() == 'cobd':
2664 self.osc = COBD(db, client_uuid, name)
2667 self.osc = OSC(db, client_uuid, name)
2671 return self.osc.get_uuid()
2674 return self.osc.get_name()
2682 def add_module(self, manager):
2683 self.osc.add_module(manager)
2685 def correct_level(self, level, op=None):
2686 return self.osc.correct_level(level, op)
2688 # virtual interface for MDC and LMV
2690 def __init__(self, db, client_uuid, name, name_override = None):
2691 Module.__init__(self, 'VMDC', db)
2692 if db.get_class() == 'lmv':
2693 self.mdc = LMV(db, client_uuid, name, name_override)
2694 elif db.get_class() == 'cobd':
2695 self.mdc = COBD(db, client_uuid, name)
2697 self.mdc = MDC(db, client_uuid, name)
2700 return self.mdc.uuid
2703 return self.mdc.name
2711 def add_module(self, manager):
2712 self.mdc.add_module(manager)
2714 def correct_level(self, level, op=None):
2715 return self.mdc.correct_level(level, op)
2717 class ECHO_CLIENT(Module):
2718 def __init__(self,db):
2719 Module.__init__(self, 'ECHO_CLIENT', db)
2720 self.obd_uuid = self.db.get_first_ref('obd')
2721 obd = self.db.lookup(self.obd_uuid)
2722 self.uuid = generate_client_uuid(self.name)
2723 self.osc = VOSC(obd, self.uuid, self.name)
2726 if not config.record and is_prepared(self.name):
2729 self.osc.prepare() # XXX This is so cheating. -p
2730 self.info(self.obd_uuid)
2732 lctl.newdev("echo_client", self.name, self.uuid,
2733 setup = self.osc.get_name())
2736 if is_prepared(self.name):
2737 Module.cleanup(self)
2740 def add_module(self, manager):
2741 self.osc.add_module(manager)
2742 manager.add_lustre_module('obdecho', 'obdecho')
2744 def correct_level(self, level, op=None):
2747 def generate_client_uuid(name):
2748 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2750 int(random.random() * 1048576),
2751 int(random.random() * 1048576))
2752 return client_uuid[:36]
2754 class Mountpoint(Module):
2755 def __init__(self,db):
2756 Module.__init__(self, 'MTPT', db)
2757 self.path = self.db.get_val('path')
2758 self.clientoptions = self.db.get_val('clientoptions', '')
2759 self.fs_uuid = self.db.get_first_ref('filesystem')
2760 fs = self.db.lookup(self.fs_uuid)
2761 self.mds_uuid = fs.get_first_ref('lmv')
2762 if not self.mds_uuid:
2763 self.mds_uuid = fs.get_first_ref('mds')
2764 self.obd_uuid = fs.get_first_ref('obd')
2765 client_uuid = generate_client_uuid(self.name)
2767 ost = self.db.lookup(self.obd_uuid)
2769 panic("no ost: ", self.obd_uuid)
2771 mds = self.db.lookup(self.mds_uuid)
2773 panic("no mds: ", self.mds_uuid)
2775 self.vosc = VOSC(ost, client_uuid, self.name, self.name)
2776 self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
2779 if not config.record and fs_is_mounted(self.path):
2780 log(self.path, "already mounted.")
2787 self.info(self.path, self.mds_uuid, self.obd_uuid)
2788 if config.record or config.lctl_dump:
2789 lctl.mount_option(local_node_name, self.vosc.get_name(),
2790 self.vmdc.get_name())
2793 if config.clientoptions:
2794 if self.clientoptions:
2795 self.clientoptions = self.clientoptions + ',' + config.clientoptions
2797 self.clientoptions = config.clientoptions
2798 if self.clientoptions:
2799 self.clientoptions = ',' + self.clientoptions
2800 # Linux kernel will deal with async and not pass it to ll_fill_super,
2801 # so replace it with Lustre async
2802 self.clientoptions = string.replace(self.clientoptions, "async", "lasync")
2806 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,sec=%s%s %s %s" % \
2807 (self.vosc.get_name(), self.vmdc.get_name(), config.sec,
2808 self.clientoptions, config.config, self.path)
2809 run("mkdir", self.path)
2814 panic("mount failed:", self.path, ":", string.join(val))
2817 self.info(self.path, self.mds_uuid,self.obd_uuid)
2819 if config.record or config.lctl_dump:
2820 lctl.del_mount_option(local_node_name)
2822 if fs_is_mounted(self.path):
2824 (rc, out) = run("umount", "-f", self.path)
2826 (rc, out) = run("umount", self.path)
2828 raise CommandError('umount', out, rc)
2830 if fs_is_mounted(self.path):
2831 panic("fs is still mounted:", self.path)
2836 def add_module(self, manager):
2837 self.vosc.add_module(manager)
2838 self.vmdc.add_module(manager)
2839 manager.add_lustre_module('llite', 'llite')
2841 def correct_level(self, level, op=None):
2844 # ============================================================
2845 # misc query functions
2847 def get_ost_net(self, osd_uuid):
2851 osd = self.lookup(osd_uuid)
2852 node_uuid = osd.get_first_ref('node')
2853 node = self.lookup(node_uuid)
2855 panic("unable to find node for osd_uuid:", osd_uuid,
2856 " node_ref:", node_uuid_)
2857 for net_uuid in node.get_networks():
2858 db = node.lookup(net_uuid)
2859 srv_list.append(Network(db))
2862 # the order of iniitailization is based on level.
2863 def getServiceLevel(self):
2864 type = self.get_class()
2866 if type in ('network',):
2868 elif type in ('routetbl',):
2870 elif type in ('ldlm',):
2872 elif type in ('osd', 'cobd'):
2874 elif type in ('mdsdev',):
2876 elif type in ('lmv',):
2878 elif type in ('mountpoint', 'echoclient'):
2880 elif type in ('cmobd',):
2883 panic("Unknown type: ", type)
2885 if ret < config.minlevel or ret > config.maxlevel:
2890 # return list of services in a profile. list is a list of tuples
2891 # [(level, db_object),]
2892 def getServices(self):
2894 for ref_class, ref_uuid in self.get_all_refs():
2895 servdb = self.lookup(ref_uuid)
2897 level = getServiceLevel(servdb)
2899 list.append((level, servdb))
2901 panic('service not found: ' + ref_uuid)
2907 ############################################################
2909 # FIXME: clean this mess up!
2911 # OSC is no longer in the xml, so we have to fake it.
2912 # this is getting ugly and begging for another refactoring
2913 def get_osc(ost_db, uuid, fs_name):
2914 osc = OSC(ost_db, uuid, fs_name)
2917 def get_mdc(db, fs_name, mds_uuid):
2918 mds_db = db.lookup(mds_uuid);
2920 error("no mds:", mds_uuid)
2921 mdc = MDC(mds_db, mds_uuid, fs_name)
2924 ############################################################
2925 # routing ("rooting")
2927 # list of (nettype, cluster_id, nid)
2930 def find_local_clusters(node_db):
2931 global local_clusters
2932 for netuuid in node_db.get_networks():
2933 net = node_db.lookup(netuuid)
2935 debug("add_local", netuuid)
2936 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2938 if not acceptors.has_key(srv.port):
2939 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
2941 # This node is a gateway.
2943 def node_is_router():
2946 # If there are any routers found in the config, then this will be true
2947 # and all nodes will load kptlrouter.
2949 def node_needs_router():
2950 return needs_router or is_router
2952 # list of (nettype, gw, tgt_cluster_id, lo, hi)
2953 # Currently, these local routes are only added to kptlrouter route
2954 # table if they are needed to connect to a specific server. This
2955 # should be changed so all available routes are loaded, and the
2956 # ptlrouter can make all the decisions.
2959 def find_local_routes(lustre):
2960 """ Scan the lustre config looking for routers . Build list of
2962 global local_routes, needs_router
2964 list = lustre.lookup_class('node')
2966 if router.get_val_int('router', 0):
2968 for (local_type, local_cluster_id, local_nid) in local_clusters:
2970 for netuuid in router.get_networks():
2971 db = router.lookup(netuuid)
2972 if (local_type == db.get_val('nettype') and
2973 local_cluster_id == db.get_val('clusterid')):
2974 gw = db.get_val('nid')
2977 debug("find_local_routes: gw is", gw)
2978 for route in router.get_local_routes(local_type, gw):
2979 local_routes.append(route)
2980 debug("find_local_routes:", local_routes)
2983 def choose_local_server(srv_list):
2984 for srv in srv_list:
2985 if local_cluster(srv.net_type, srv.cluster_id):
2988 def local_cluster(net_type, cluster_id):
2989 for cluster in local_clusters:
2990 if net_type == cluster[0] and cluster_id == cluster[1]:
2994 def local_interface(net_type, cluster_id, nid):
2995 for cluster in local_clusters:
2996 if (net_type == cluster[0] and cluster_id == cluster[1]
2997 and nid == cluster[2]):
3001 def find_route(srv_list):
3003 frm_type = local_clusters[0][0]
3004 for srv in srv_list:
3005 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
3006 to_type = srv.net_type
3008 cluster_id = srv.cluster_id
3009 debug ('looking for route to', to_type, to)
3010 for r in local_routes:
3011 debug("find_route: ", r)
3012 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
3013 result.append((srv, r))
3016 def get_active_target(db):
3017 target_uuid = db.getUUID()
3018 target_name = db.getName()
3019 node_name = get_select(target_name)
3021 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
3023 tgt_dev_uuid = db.get_first_ref('active')
3026 def get_server_by_nid_uuid(db, nid_uuid):
3027 for n in db.lookup_class("network"):
3029 if net.nid_uuid == nid_uuid:
3033 ############################################################
3037 type = db.get_class()
3038 debug('Service:', type, db.getName(), db.getUUID())
3043 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3044 elif type == 'network':
3046 elif type == 'routetbl':
3050 elif type == 'cobd':
3051 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3052 elif type == 'cmobd':
3054 elif type == 'mdsdev':
3056 elif type == 'mountpoint':
3058 elif type == 'echoclient':
3063 panic ("unknown service type:", type)
3067 # Prepare the system to run lustre using a particular profile
3068 # in a the configuration.
3069 # * load & the modules
3070 # * setup networking for the current node
3071 # * make sure partitions are in place and prepared
3072 # * initialize devices with lctl
3073 # Levels is important, and needs to be enforced.
3074 def for_each_profile(db, prof_list, operation):
3075 for prof_uuid in prof_list:
3076 prof_db = db.lookup(prof_uuid)
3078 panic("profile:", prof_uuid, "not found.")
3079 services = getServices(prof_db)
3082 def magic_get_osc(db, rec, lov):
3084 lov_uuid = lov.get_uuid()
3085 lov_name = lov.osc.fs_name
3087 lov_uuid = rec.getAttribute('lov_uuidref')
3088 # FIXME: better way to find the mountpoint?
3089 filesystems = db.root_node.getElementsByTagName('filesystem')
3091 for fs in filesystems:
3092 ref = fs.getElementsByTagName('obd_ref')
3093 if ref[0].getAttribute('uuidref') == lov_uuid:
3094 fsuuid = fs.getAttribute('uuid')
3098 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
3100 mtpts = db.root_node.getElementsByTagName('mountpoint')
3103 ref = fs.getElementsByTagName('filesystem_ref')
3104 if ref[0].getAttribute('uuidref') == fsuuid:
3105 lov_name = fs.getAttribute('name')
3109 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
3111 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3113 ost_uuid = rec.getAttribute('ost_uuidref')
3114 obd = db.lookup(ost_uuid)
3117 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
3119 osc = get_osc(obd, lov_uuid, lov_name)
3121 panic('osc not found:', obd_uuid)
3124 # write logs for update records. sadly, logs of all types -- and updates in
3125 # particular -- are something of an afterthought. lconf needs rewritten with
3126 # these as core concepts. so this is a pretty big hack.
3127 def process_update_record(db, update, lov):
3128 for rec in update.childNodes:
3129 if rec.nodeType != rec.ELEMENT_NODE:
3132 log("found "+rec.nodeName+" record in update version " +
3133 str(update.getAttribute('version')))
3135 lov_uuid = rec.getAttribute('lov_uuidref')
3136 ost_uuid = rec.getAttribute('ost_uuidref')
3137 index = rec.getAttribute('index')
3138 gen = rec.getAttribute('generation')
3140 if not lov_uuid or not ost_uuid or not index or not gen:
3141 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
3144 tmplov = db.lookup(lov_uuid)
3146 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
3147 lov_name = tmplov.getName()
3149 lov_name = lov.osc.name
3151 # ------------------------------------------------------------- add
3152 if rec.nodeName == 'add':
3154 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3157 osc = magic_get_osc(db, rec, lov)
3160 # Only ignore connect failures with --force, which
3161 # isn't implemented here yet.
3162 osc.prepare(ignore_connect_failure=0)
3163 except CommandError, e:
3164 print "Error preparing OSC %s\n" % osc.uuid
3167 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3169 # ------------------------------------------------------ deactivate
3170 elif rec.nodeName == 'deactivate':
3174 osc = magic_get_osc(db, rec, lov)
3178 except CommandError, e:
3179 print "Error deactivating OSC %s\n" % osc.uuid
3182 # ---------------------------------------------------------- delete
3183 elif rec.nodeName == 'delete':
3187 osc = magic_get_osc(db, rec, lov)
3193 except CommandError, e:
3194 print "Error cleaning up OSC %s\n" % osc.uuid
3197 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3199 def process_updates(db, log_device, log_name, lov = None):
3200 updates = db.root_node.getElementsByTagName('update')
3202 if not u.childNodes:
3203 log("ignoring empty update record (version " +
3204 str(u.getAttribute('version')) + ")")
3207 version = u.getAttribute('version')
3208 real_name = "%s-%s" % (log_name, version)
3209 lctl.clear_log(log_device, real_name)
3210 lctl.record(log_device, real_name)
3212 process_update_record(db, u, lov)
3216 def doWriteconf(services):
3220 if s[1].get_class() == 'mdsdev' or s[1].get_class() == 'osd':
3221 n = newService(s[1])
3225 def doSetup(services):
3230 n = newService(s[1])
3232 slist.append((n.level, n))
3235 nl = n[1].correct_level(n[0])
3236 nlist.append((nl, n[1]))
3241 def doLoadModules(services):
3245 # adding all needed modules from all services
3247 n = newService(s[1])
3248 n.add_module(mod_manager)
3250 # loading all registered modules
3251 mod_manager.load_modules()
3253 def doUnloadModules(services):
3257 # adding all needed modules from all services
3259 n = newService(s[1])
3260 if n.safe_to_clean_modules():
3261 n.add_module(mod_manager)
3263 # unloading all registered modules
3264 mod_manager.cleanup_modules()
3266 def doCleanup(services):
3272 n = newService(s[1])
3274 slist.append((n.level, n))
3277 nl = n[1].correct_level(n[0])
3278 nlist.append((nl, n[1]))
3283 if n[1].safe_to_clean():
3288 def doHost(lustreDB, hosts):
3289 global is_router, local_node_name
3292 node_db = lustreDB.lookup_name(h, 'node')
3296 panic('No host entry found.')
3298 local_node_name = node_db.get_val('name', 0)
3299 is_router = node_db.get_val_int('router', 0)
3300 lustre_upcall = node_db.get_val('lustreUpcall', '')
3301 portals_upcall = node_db.get_val('portalsUpcall', '')
3302 timeout = node_db.get_val_int('timeout', 0)
3303 ptldebug = node_db.get_val('ptldebug', '')
3304 subsystem = node_db.get_val('subsystem', '')
3306 find_local_clusters(node_db)
3308 find_local_routes(lustreDB)
3310 # Two step process: (1) load modules, (2) setup lustre
3311 # if not cleaning, load modules first.
3312 prof_list = node_db.get_refs('profile')
3314 if config.write_conf:
3315 for_each_profile(node_db, prof_list, doLoadModules)
3317 for_each_profile(node_db, prof_list, doWriteconf)
3318 for_each_profile(node_db, prof_list, doUnloadModules)
3321 elif config.recover:
3322 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3323 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3324 "--client_uuid <UUID> --conn_uuid <UUID>")
3325 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3327 elif config.cleanup:
3329 # the command line can override this value
3331 # ugly hack, only need to run lctl commands for --dump
3332 if config.lctl_dump or config.record:
3333 for_each_profile(node_db, prof_list, doCleanup)
3336 sys_set_timeout(timeout)
3337 sys_set_ptldebug(ptldebug)
3338 sys_set_subsystem(subsystem)
3339 sys_set_lustre_upcall(lustre_upcall)
3340 sys_set_portals_upcall(portals_upcall)
3342 for_each_profile(node_db, prof_list, doCleanup)
3343 for_each_profile(node_db, prof_list, doUnloadModules)
3347 # ugly hack, only need to run lctl commands for --dump
3348 if config.lctl_dump or config.record:
3349 sys_set_timeout(timeout)
3350 sys_set_lustre_upcall(lustre_upcall)
3351 for_each_profile(node_db, prof_list, doSetup)
3355 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3356 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3358 for_each_profile(node_db, prof_list, doLoadModules)
3360 sys_set_debug_path()
3361 sys_set_ptldebug(ptldebug)
3362 sys_set_subsystem(subsystem)
3363 script = config.gdb_script
3364 run(lctl.lctl, ' modules >', script)
3366 log ("The GDB module script is in", script)
3367 # pause, so user has time to break and
3370 sys_set_timeout(timeout)
3371 sys_set_lustre_upcall(lustre_upcall)
3372 sys_set_portals_upcall(portals_upcall)
3374 for_each_profile(node_db, prof_list, doSetup)
3377 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3378 tgt = lustreDB.lookup(tgt_uuid)
3380 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3381 new_uuid = get_active_target(tgt)
3383 raise Lustre.LconfError("doRecovery: no active target found for: " +
3385 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3387 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3389 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3391 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3394 lctl.disconnect(oldnet)
3395 except CommandError, e:
3396 log("recover: disconnect", nid_uuid, "failed: ")
3401 except CommandError, e:
3402 log("recover: connect failed")
3405 lctl.recover(client_uuid, net.nid_uuid)
3408 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3409 base = os.path.dirname(cmd)
3410 if development_mode():
3411 if not config.lustre:
3412 debug('using objdir module paths')
3413 config.lustre = (os.path.join(base, ".."))
3414 # normalize the portals dir, using command line arg if set
3416 portals_dir = config.portals
3417 dir = os.path.join(config.lustre, portals_dir)
3418 config.portals = dir
3419 debug('config.portals', config.portals)
3420 elif config.lustre and config.portals:
3422 # if --lustre and --portals, normalize portals
3423 # can ignore POTRALS_DIR here, since it is probly useless here
3424 config.portals = os.path.join(config.lustre, config.portals)
3425 debug('config.portals B', config.portals)
3427 def sysctl(path, val):
3428 debug("+ sysctl", path, val)
3432 fp = open(os.path.join('/proc/sys', path), 'w')
3439 def sys_set_debug_path():
3440 sysctl('portals/debug_path', config.debug_path)
3442 def sys_set_lustre_upcall(upcall):
3443 # the command overrides the value in the node config
3444 if config.lustre_upcall:
3445 upcall = config.lustre_upcall
3447 upcall = config.upcall
3449 lctl.set_lustre_upcall(upcall)
3451 def sys_set_portals_upcall(upcall):
3452 # the command overrides the value in the node config
3453 if config.portals_upcall:
3454 upcall = config.portals_upcall
3456 upcall = config.upcall
3458 sysctl('portals/upcall', upcall)
3460 def sys_set_timeout(timeout):
3461 # the command overrides the value in the node config
3462 if config.timeout and config.timeout > 0:
3463 timeout = config.timeout
3464 if timeout != None and timeout > 0:
3465 lctl.set_timeout(timeout)
3467 def sys_tweak_socknal ():
3468 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3469 if sys_get_branch() == '2.6':
3470 fp = open('/proc/meminfo')
3471 lines = fp.readlines()
3476 if a[0] == 'MemTotal:':
3478 debug("memtotal" + memtotal)
3479 if int(memtotal) < 262144:
3480 minfree = int(memtotal) / 16
3483 debug("+ minfree ", minfree)
3484 sysctl("vm/min_free_kbytes", minfree)
3485 if config.single_socket:
3486 sysctl("socknal/typed", 0)
3488 def sys_optimize_elan ():
3489 procfiles = ["/proc/elan/config/eventint_punt_loops",
3490 "/proc/qsnet/elan3/config/eventint_punt_loops",
3491 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3493 if os.access(p, os.W_OK):
3494 run ("echo 1 > " + p)
3496 def sys_set_ptldebug(ptldebug):
3498 ptldebug = config.ptldebug
3501 val = eval(ptldebug, ptldebug_names)
3502 val = "0x%x" % (val & 0xffffffffL)
3503 sysctl('portals/debug', val)
3504 except NameError, e:
3507 def sys_set_subsystem(subsystem):
3508 if config.subsystem:
3509 subsystem = config.subsystem
3512 val = eval(subsystem, subsystem_names)
3513 val = "0x%x" % (val & 0xffffffffL)
3514 sysctl('portals/subsystem_debug', val)
3515 except NameError, e:
3518 def sys_set_netmem_max(path, max):
3519 debug("setting", path, "to at least", max)
3527 fp = open(path, 'w')
3528 fp.write('%d\n' %(max))
3531 def sys_make_devices():
3532 if not os.access('/dev/portals', os.R_OK):
3533 run('mknod /dev/portals c 10 240')
3534 if not os.access('/dev/obd', os.R_OK):
3535 run('mknod /dev/obd c 10 241')
3537 # Add dir to the global PATH, if not already there.
3538 def add_to_path(new_dir):
3539 syspath = string.split(os.environ['PATH'], ':')
3540 if new_dir in syspath:
3542 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3544 def default_debug_path():
3545 path = '/tmp/lustre-log'
3546 if os.path.isdir('/r'):
3551 def default_gdb_script():
3552 script = '/tmp/ogdb'
3553 if os.path.isdir('/r'):
3554 return '/r' + script
3558 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3559 # ensure basic elements are in the system path
3560 def sanitise_path():
3561 for dir in DEFAULT_PATH:
3564 # global hack for the --select handling
3566 def init_select(args):
3567 # args = [service=nodeA,service2=nodeB service3=nodeC]
3570 list = string.split(arg, ',')
3572 srv, node = string.split(entry, '=')
3573 tgt_select[srv] = node
3575 def get_select(srv):
3576 if tgt_select.has_key(srv):
3577 return tgt_select[srv]
3581 FLAG = Lustre.Options.FLAG
3582 PARAM = Lustre.Options.PARAM
3583 INTPARAM = Lustre.Options.INTPARAM
3584 PARAMLIST = Lustre.Options.PARAMLIST
3586 ('verbose,v', "Print system commands as they are run"),
3587 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3588 ('config', "Cluster config name used for LDAP query", PARAM),
3589 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3590 ('node', "Load config for <nodename>", PARAM),
3591 ('sec', "security flavor <null|krb5i|krb5p> of client", PARAM),
3592 ('mds_mds_sec', "security flavor <null|krb5i|krb5p> of inter mds's", PARAM),
3593 ('mds_ost_sec', "security flavor <null|krb5i|krb5p> of mds's-ost's", PARAM),
3594 ('cleanup,d', "Cleans up config. (Shutdown)"),
3595 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3597 ('single_socket', "socknal option: only use one socket instead of bundle",
3599 ('failover',"""Used to shut down without saving state.
3600 This will allow this node to "give up" a service to a
3601 another node for failover purposes. This will not
3602 be a clean shutdown.""",
3604 ('gdb', """Prints message after creating gdb module script
3605 and sleeps for 5 seconds."""),
3606 ('noexec,n', """Prints the commands and steps that will be run for a
3607 config without executing them. This can used to check if a
3608 config file is doing what it should be doing"""),
3609 ('nomod', "Skip load/unload module step."),
3610 ('nosetup', "Skip device setup/cleanup step."),
3611 ('reformat', "Reformat all devices (without question)"),
3612 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3613 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3614 ('clientoptions', "Additional options for Lustre", PARAM),
3615 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3617 ('write_conf', "Save all the client config information on mds."),
3618 ('record', "Write config information on mds."),
3619 ('record_log', "Name of config record log.", PARAM),
3620 ('record_device', "MDS device name that will record the config commands",
3622 ('root_squash', "MDS squash root to appointed uid",
3624 ('no_root_squash', "Don't squash root for appointed nid",
3626 ('minlevel', "Minimum level of services to configure/cleanup",
3628 ('maxlevel', """Maximum level of services to configure/cleanup
3629 Levels are aproximatly like:
3634 70 - mountpoint, echo_client, osc, mdc, lov""",
3636 ('lustre', """Base directory of lustre sources. This parameter will
3637 cause lconf to load modules from a source tree.""", PARAM),
3638 ('portals', """Portals source directory. If this is a relative path,
3639 then it is assumed to be relative to lustre. """, PARAM),
3640 ('timeout', "Set recovery timeout", INTPARAM),
3641 ('upcall', "Set both portals and lustre upcall script", PARAM),
3642 ('lustre_upcall', "Set lustre upcall script", PARAM),
3643 ('portals_upcall', "Set portals upcall script", PARAM),
3644 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3645 ('ptldebug', "Set the portals debug level", PARAM),
3646 ('subsystem', "Set the portals debug subsystem", PARAM),
3647 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3648 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3649 # Client recovery options
3650 ('recover', "Recover a device"),
3651 ('group', "The group of devices to configure or cleanup", PARAM),
3652 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3653 ('client_uuid', "The failed client (required for recovery)", PARAM),
3654 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3656 ('inactive', """The name of an inactive service, to be ignored during
3657 mounting (currently OST-only). Can be repeated.""",
3662 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3664 # in the upcall this is set to SIG_IGN
3665 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3667 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3669 config, args = cl.parse(sys.argv[1:])
3670 except Lustre.OptionError, e:
3674 setupModulePath(sys.argv[0])
3676 host = socket.gethostname()
3678 # the PRNG is normally seeded with time(), which is not so good for starting
3679 # time-synchronized clusters
3680 input = open('/dev/urandom', 'r')
3682 print 'Unable to open /dev/urandom!'
3684 seed = input.read(32)
3690 init_select(config.select)
3693 # allow config to be fetched via HTTP, but only with python2
3694 if sys.version[0] != '1' and args[0].startswith('http://'):
3697 config_file = urllib2.urlopen(args[0])
3698 except (urllib2.URLError, socket.error), err:
3699 if hasattr(err, 'args'):
3701 print "Could not access '%s': %s" %(args[0], err)
3703 elif not os.access(args[0], os.R_OK):
3704 print 'File not found or readable:', args[0]
3708 config_file = open(args[0], 'r')
3710 dom = xml.dom.minidom.parse(config_file)
3712 panic("%s does not appear to be a config file." % (args[0]))
3713 sys.exit(1) # make sure to die here, even in debug mode.
3715 CONFIG_FILE = args[0]
3716 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3717 if not config.config:
3718 config.config = os.path.basename(args[0])# use full path?
3719 if config.config[-4:] == '.xml':
3720 config.config = config.config[:-4]
3721 elif config.ldapurl:
3722 if not config.config:
3723 panic("--ldapurl requires --config name")
3724 dn = "config=%s,fs=lustre" % (config.config)
3725 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3726 elif config.ptldebug or config.subsystem:
3727 sys_set_ptldebug(None)
3728 sys_set_subsystem(None)
3731 print 'Missing config file or ldap URL.'
3732 print 'see lconf --help for command summary'
3735 toplustreDB = lustreDB
3737 ver = lustreDB.get_version()
3739 panic("No version found in config data, please recreate.")
3740 if ver != Lustre.CONFIG_VERSION:
3741 panic("Config version", ver, "does not match lconf version",
3742 Lustre.CONFIG_VERSION)
3746 node_list.append(config.node)
3749 node_list.append(host)
3750 node_list.append('localhost')
3752 debug("configuring for host: ", node_list)
3755 config.debug_path = config.debug_path + '-' + host
3756 config.gdb_script = config.gdb_script + '-' + host
3758 lctl = LCTLInterface('lctl')
3760 if config.lctl_dump:
3761 lctl.use_save_file(config.lctl_dump)
3764 if not (config.record_device and config.record_log):
3765 panic("When recording, both --record_log and --record_device must be specified.")
3766 lctl.clear_log(config.record_device, config.record_log)
3767 lctl.record(config.record_device, config.record_log)
3769 # init module manager
3770 mod_manager = kmod_manager(config.lustre, config.portals)
3772 doHost(lustreDB, node_list)
3774 if not config.record:
3779 process_updates(lustreDB, config.record_device, config.record_log)
3781 if __name__ == "__main__":
3784 except Lustre.LconfError, e:
3786 # traceback.print_exc(file=sys.stdout)
3788 except CommandError, e:
3792 if first_cleanup_error:
3793 sys.exit(first_cleanup_error)