3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = '../portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
96 "undefined" : (1 << 0),
106 "portals" : (1 << 10),
108 "pinger" : (1 << 12),
109 "filter" : (1 << 13),
114 "ptlrouter" : (1 << 18),
118 "confobd" : (1 << 22),
124 first_cleanup_error = 0
125 def cleanup_error(rc):
126 global first_cleanup_error
127 if not first_cleanup_error:
128 first_cleanup_error = rc
130 # ============================================================
131 # debugging and error funcs
133 def fixme(msg = "this feature"):
134 raise Lustre.LconfError, msg + ' not implemented yet.'
137 msg = string.join(map(str,args))
138 if not config.noexec:
139 raise Lustre.LconfError(msg)
144 msg = string.join(map(str,args))
149 print string.strip(s)
153 msg = string.join(map(str,args))
156 # ack, python's builtin int() does not support '0x123' syntax.
157 # eval can do it, although what a hack!
161 return eval(s, {}, {})
164 except SyntaxError, e:
165 raise ValueError("not a number")
167 raise ValueError("not a number")
169 # ============================================================
170 # locally defined exceptions
171 class CommandError (exceptions.Exception):
172 def __init__(self, cmd_name, cmd_err, rc=None):
173 self.cmd_name = cmd_name
174 self.cmd_err = cmd_err
179 if type(self.cmd_err) == types.StringType:
181 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
183 print "! %s: %s" % (self.cmd_name, self.cmd_err)
184 elif type(self.cmd_err) == types.ListType:
186 print "! %s (error %d):" % (self.cmd_name, self.rc)
188 print "! %s:" % (self.cmd_name)
189 for s in self.cmd_err:
190 print "> %s" %(string.strip(s))
195 # ============================================================
196 # handle daemons, like the acceptor
198 """ Manage starting and stopping a daemon. Assumes daemon manages
199 it's own pid file. """
201 def __init__(self, cmd):
207 log(self.command, "already running.")
209 self.path = find_prog(self.command)
211 panic(self.command, "not found.")
212 ret, out = runcmd(self.path +' '+ self.command_line())
214 raise CommandError(self.path, out, ret)
218 pid = self.read_pidfile()
221 log ("killing process", pid)
224 log("was unable to find pid of " + self.command)
225 #time.sleep(1) # let daemon die
227 log("unable to kill", self.command, e)
229 log("unable to kill", self.command)
232 pid = self.read_pidfile()
238 log("was unable to find pid of " + self.command)
245 def read_pidfile(self):
247 fp = open(self.pidfile(), 'r')
257 def clean_pidfile(self):
258 """ Remove a stale pidfile """
259 log("removing stale pidfile:", self.pidfile())
261 os.unlink(self.pidfile())
263 log(self.pidfile(), e)
265 class AcceptorHandler(DaemonHandler):
266 def __init__(self, port, net_type):
267 DaemonHandler.__init__(self, "acceptor")
272 return "/var/run/%s-%d.pid" % (self.command, self.port)
274 def command_line(self):
275 return string.join(map(str,(self.flags, self.port)))
279 # start the acceptors
281 if config.lctl_dump or config.record:
283 for port in acceptors.keys():
284 daemon = acceptors[port]
285 if not daemon.running():
288 def run_one_acceptor(port):
289 if config.lctl_dump or config.record:
291 if acceptors.has_key(port):
292 daemon = acceptors[port]
293 if not daemon.running():
296 panic("run_one_acceptor: No acceptor defined for port:", port)
298 def stop_acceptor(port):
299 if acceptors.has_key(port):
300 daemon = acceptors[port]
305 # ============================================================
306 # handle lctl interface
309 Manage communication with lctl
312 def __init__(self, cmd):
314 Initialize close by finding the lctl binary.
316 self.lctl = find_prog(cmd)
318 self.record_device = ''
321 debug('! lctl not found')
324 raise CommandError('lctl', "unable to find lctl binary.")
326 def use_save_file(self, file):
327 self.save_file = file
329 def record(self, dev_name, logname):
330 log("Recording log", logname, "on", dev_name)
331 self.record_device = dev_name
332 self.record_log = logname
334 def end_record(self):
335 log("End recording log", self.record_log, "on", self.record_device)
336 self.record_device = None
337 self.record_log = None
339 def set_nonblock(self, fd):
340 fl = fcntl.fcntl(fd, F_GETFL)
341 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
346 the cmds are written to stdin of lctl
347 lctl doesn't return errors when run in script mode, so
349 should modify command line to accept multiple commands, or
350 create complex command line options
354 cmds = '\n dump ' + self.save_file + '\n' + cmds
355 elif self.record_device:
359 %s""" % (self.record_device, self.record_log, cmds)
361 debug("+", cmd_line, cmds)
362 if config.noexec: return (0, [])
364 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
365 child.tochild.write(cmds + "\n")
366 child.tochild.close()
367 # print "LCTL:", cmds
369 # From "Python Cookbook" from O'Reilly
370 outfile = child.fromchild
371 outfd = outfile.fileno()
372 self.set_nonblock(outfd)
373 errfile = child.childerr
374 errfd = errfile.fileno()
375 self.set_nonblock(errfd)
377 outdata = errdata = ''
380 ready = select.select([outfd,errfd],[],[]) # Wait for input
381 if outfd in ready[0]:
382 outchunk = outfile.read()
383 if outchunk == '': outeof = 1
384 outdata = outdata + outchunk
385 if errfd in ready[0]:
386 errchunk = errfile.read()
387 if errchunk == '': erreof = 1
388 errdata = errdata + errchunk
389 if outeof and erreof: break
390 # end of "borrowed" code
393 if os.WIFEXITED(ret):
394 rc = os.WEXITSTATUS(ret)
397 if rc or len(errdata):
398 raise CommandError(self.lctl, errdata, rc)
401 def runcmd(self, *args):
403 run lctl using the command line
405 cmd = string.join(map(str,args))
406 debug("+", self.lctl, cmd)
407 rc, out = run(self.lctl, cmd)
409 raise CommandError(self.lctl, out, rc)
412 def clear_log(self, dev, log):
413 """ clear an existing log """
418 quit """ % (dev, log)
421 def root_squash(self, name, uid, nid):
425 quit""" % (name, uid, nid)
428 def network(self, net, nid):
433 quit """ % (net, nid)
437 def add_interface(self, net, ip, netmask = ""):
438 """ add an interface """
442 quit """ % (net, ip, netmask)
445 # delete an interface
446 def del_interface(self, net, ip):
447 """ delete an interface """
454 # create a new connection
455 def add_uuid(self, net_type, uuid, nid):
456 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
459 def add_peer(self, net_type, nid, hostaddr, port):
460 if net_type in ('tcp','openib','ra') and not config.lctl_dump:
465 nid, hostaddr, port )
467 elif net_type in ('iib',) and not config.lctl_dump:
474 elif net_type in ('vib',) and not config.lctl_dump:
482 def connect(self, srv):
483 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
484 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
486 hostaddr = string.split(srv.hostaddr[0], '/')[0]
487 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
490 def recover(self, dev_name, new_conn):
493 recover %s""" %(dev_name, new_conn)
496 # add a route to a range
497 def add_route(self, net, gw, lo, hi):
505 except CommandError, e:
509 def del_route(self, net, gw, lo, hi):
514 quit """ % (net, gw, lo, hi)
517 # add a route to a host
518 def add_route_host(self, net, uuid, gw, tgt):
519 self.add_uuid(net, uuid, tgt)
527 except CommandError, e:
531 # add a route to a range
532 def del_route_host(self, net, uuid, gw, tgt):
538 quit """ % (net, gw, tgt)
542 def del_peer(self, net_type, nid, hostaddr):
543 if net_type in ('tcp',) and not config.lctl_dump:
547 del_peer %s %s single_share
551 elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump:
555 del_peer %s single_share
560 # disconnect one connection
561 def disconnect(self, srv):
562 self.del_uuid(srv.nid_uuid)
563 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
565 hostaddr = string.split(srv.hostaddr[0], '/')[0]
566 self.del_peer(srv.net_type, srv.nid, hostaddr)
568 def del_uuid(self, uuid):
576 def disconnectAll(self, net):
584 def attach(self, type, name, uuid):
587 quit""" % (type, name, uuid)
590 def setup(self, name, setup = ""):
594 quit""" % (name, setup)
597 def add_conn(self, name, conn_uuid):
601 quit""" % (name, conn_uuid)
605 # create a new device with lctl
606 def newdev(self, type, name, uuid, setup = ""):
607 self.attach(type, name, uuid);
609 self.setup(name, setup)
610 except CommandError, e:
611 self.cleanup(name, uuid, 0)
616 def cleanup(self, name, uuid, force, failover = 0):
617 if failover: force = 1
623 quit""" % (name, ('', 'force')[force],
624 ('', 'failover')[failover])
628 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
629 stripe_sz, stripe_off, pattern, devlist = None):
632 lov_setup %s %d %d %d %s %s
633 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
637 # add an OBD to a LOV
638 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
640 lov_modify_tgts add %s %s %s %s
641 quit""" % (name, obd_uuid, index, gen)
645 def lmv_setup(self, name, uuid, desc_uuid, devlist):
649 quit""" % (name, uuid, desc_uuid, devlist)
652 # delete an OBD from a LOV
653 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
655 lov_modify_tgts del %s %s %s %s
656 quit""" % (name, obd_uuid, index, gen)
660 def deactivate(self, name):
668 def dump(self, dump_file):
671 quit""" % (dump_file)
674 # get list of devices
675 def device_list(self):
676 devices = '/proc/fs/lustre/devices'
678 if os.access(devices, os.R_OK):
680 fp = open(devices, 'r')
688 def lustre_version(self):
689 rc, out = self.runcmd('version')
693 def mount_option(self, profile, osc, mdc):
695 mount_option %s %s %s
696 quit""" % (profile, osc, mdc)
699 # delete mount options
700 def del_mount_option(self, profile):
706 def set_timeout(self, timeout):
712 def set_lustre_upcall(self, upcall):
717 # ============================================================
718 # Various system-level functions
719 # (ideally moved to their own module)
721 # Run a command and return the output and status.
722 # stderr is sent to /dev/null, could use popen3 to
723 # save it if necessary
726 if config.noexec: return (0, [])
727 f = os.popen(cmd + ' 2>&1')
737 cmd = string.join(map(str,args))
740 # Run a command in the background.
741 def run_daemon(*args):
742 cmd = string.join(map(str,args))
744 if config.noexec: return 0
745 f = os.popen(cmd + ' 2>&1')
753 # Determine full path to use for an external command
754 # searches dirname(argv[0]) first, then PATH
756 syspath = string.split(os.environ['PATH'], ':')
757 cmdpath = os.path.dirname(sys.argv[0])
758 syspath.insert(0, cmdpath);
760 syspath.insert(0, os.path.join(config.portals, 'utils/'))
762 prog = os.path.join(d,cmd)
763 if os.access(prog, os.X_OK):
767 # Recursively look for file starting at base dir
768 def do_find_file(base, mod):
769 fullname = os.path.join(base, mod)
770 if os.access(fullname, os.R_OK):
772 for d in os.listdir(base):
773 dir = os.path.join(base,d)
774 if os.path.isdir(dir):
775 module = do_find_file(dir, mod)
779 # is the path a block device?
786 return stat.S_ISBLK(s[stat.ST_MODE])
788 # find the journal device from mkfs options
794 while i < len(x) - 1:
795 if x[i] == '-J' and x[i+1].startswith('device='):
801 # build fs according to type
803 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
809 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
811 # devsize is in 1k, and fs block count is in 4k
812 block_cnt = devsize/4
814 if fstype in ('ext3', 'extN', 'ldiskfs'):
815 # ext3 journal size is in megabytes
816 # but don't set jsize if mkfsoptions indicates a separate journal device
817 if jsize == 0 and jdev(mkfsoptions) == '':
819 if not is_block(dev):
820 ret, out = runcmd("ls -l %s" %dev)
821 devsize = int(string.split(out[0])[4]) / 1024
823 # sfdisk works for symlink, hardlink, and realdev
824 ret, out = runcmd("sfdisk -s %s" %dev)
826 devsize = int(out[0])
828 # sfdisk -s will fail for too large block device,
829 # then, read the size of partition from /proc/partitions
831 # get the realpath of the device
832 # it may be the real device, such as /dev/hda7
833 # or the hardlink created via mknod for a device
834 if 'realpath' in dir(os.path):
835 real_dev = os.path.realpath(dev)
839 while os.path.islink(real_dev) and (link_count < 20):
840 link_count = link_count + 1
841 dev_link = os.readlink(real_dev)
842 if os.path.isabs(dev_link):
845 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
847 panic("Entountered too many symbolic links resolving block device:", dev)
849 # get the major and minor number of the realpath via ls
850 # it seems python(os.stat) does not return
851 # the st_rdev member of the stat structure
852 ret, out = runcmd("ls -l %s" %real_dev)
853 major = string.split(string.split(out[0])[4], ",")[0]
854 minor = string.split(out[0])[5]
856 # get the devsize from /proc/partitions with the major and minor number
857 ret, out = runcmd("cat /proc/partitions")
860 if string.split(line)[0] == major and string.split(line)[1] == minor:
861 devsize = int(string.split(line)[2])
864 if devsize > 1024 * 1024:
865 jsize = ((devsize / 102400) * 4)
868 if jsize: jopt = "-J size=%d" %(jsize,)
869 if isize: iopt = "-I %d" %(isize,)
870 mkfs = 'mkfs.ext2 -j -b 4096 '
871 if not isblock or config.force:
873 if jdev(mkfsoptions) != '':
874 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
876 jmkfs = jmkfs + '-F '
877 jmkfs = jmkfs + jdev(mkfsoptions)
878 (ret, out) = run (jmkfs)
880 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
881 elif fstype == 'reiserfs':
882 # reiserfs journal size is in blocks
883 if jsize: jopt = "--journal_size %d" %(jsize,)
884 mkfs = 'mkreiserfs -ff'
886 panic('unsupported fs type: ', fstype)
888 if config.mkfsoptions != None:
889 mkfs = mkfs + ' ' + config.mkfsoptions
890 if mkfsoptions != None:
891 mkfs = mkfs + ' ' + mkfsoptions
892 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
894 panic("Unable to build fs:", dev, string.join(out))
895 # enable hash tree indexing on fsswe
896 if fstype in ('ext3', 'extN', 'ldiskfs'):
897 htree = 'echo "feature FEATURE_C5" | debugfs -w'
898 (ret, out) = run (htree, dev)
900 panic("Unable to enable htree:", dev)
902 # some systems use /dev/loopN, some /dev/loop/N
906 if not os.access(loop + str(0), os.R_OK):
908 if not os.access(loop + str(0), os.R_OK):
909 panic ("can't access loop devices")
912 # find loop device assigned to the file
913 def find_assigned_loop(file):
915 for n in xrange(0, MAX_LOOP_DEVICES):
917 if os.access(dev, os.R_OK):
918 (stat, out) = run('losetup', dev)
919 if out and stat == 0:
920 m = re.search(r'\((.*)\)', out[0])
921 if m and file == m.group(1):
927 # create file if necessary and assign the first free loop device
928 def init_loop(file, size, fstype, journal_size, inode_size,
929 mkfsoptions, reformat, autoformat, backfstype, backfile):
932 realfstype = backfstype
933 if is_block(backfile):
934 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
935 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
941 dev = find_assigned_loop(realfile)
943 print 'WARNING: file ', realfile, 'already mapped to', dev
946 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
948 panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (realfile, size))
949 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
951 panic("Unable to create backing store:", realfile)
953 mkfs(realfile, size, realfstype, journal_size, inode_size,
954 mkfsoptions, isblock=0)
957 # find next free loop
958 for n in xrange(0, MAX_LOOP_DEVICES):
960 if os.access(dev, os.R_OK):
961 (stat, out) = run('losetup', dev)
963 print "attach " + realfile + " <-> " + dev
964 run('losetup', dev, realfile)
967 print "out of loop devices"
969 print "out of loop devices"
972 # undo loop assignment
973 def clean_loop(dev, fstype, backfstype, backdev):
978 if not is_block(realfile):
979 dev = find_assigned_loop(realfile)
981 print "detach " + dev + " <-> " + realfile
982 ret, out = run('losetup -d', dev)
984 log('unable to clean loop device:', dev, 'for file:', realfile)
987 # finilizes passed device
988 def clean_dev(dev, fstype, backfstype, backdev):
989 if fstype == 'smfs' or not is_block(dev):
990 clean_loop(dev, fstype, backfstype, backdev)
992 # determine if dev is formatted as a <fstype> filesystem
993 def need_format(fstype, dev):
994 # FIXME don't know how to implement this
997 # initialize a block device if needed
998 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
999 inode_size, mkfsoptions, backfstype, backdev):
1003 if fstype == 'smfs' or not is_block(dev):
1004 dev = init_loop(dev, size, fstype, journal_size, inode_size,
1005 mkfsoptions, reformat, autoformat, backfstype, backdev)
1006 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
1007 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
1010 # panic("device:", dev,
1011 # "not prepared, and autoformat is not set.\n",
1012 # "Rerun with --reformat option to format ALL filesystems")
1017 """lookup IP address for an interface"""
1018 rc, out = run("/sbin/ifconfig", iface)
1021 addr = string.split(out[1])[1]
1022 ip = string.split(addr, ':')[1]
1025 def def_mount_options(fstype, target):
1026 """returns deafult mount options for passed fstype and target (mds, ost)"""
1027 if fstype == 'ext3' or fstype == 'ldiskfs':
1028 mountfsoptions = "errors=remount-ro"
1029 if target == 'ost' and sys_get_branch() == '2.4':
1030 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1031 return mountfsoptions
1034 def sys_get_elan_position_file():
1035 procfiles = ["/proc/elan/device0/position",
1036 "/proc/qsnet/elan4/device0/position",
1037 "/proc/qsnet/elan3/device0/position"]
1039 if os.access(p, os.R_OK):
1043 def sys_get_local_nid(net_type, wildcard, cluster_id):
1044 """Return the local nid."""
1046 if sys_get_elan_position_file():
1047 local = sys_get_local_address('elan', '*', cluster_id)
1049 local = sys_get_local_address(net_type, wildcard, cluster_id)
1052 def sys_get_local_address(net_type, wildcard, cluster_id):
1053 """Return the local address for the network type."""
1055 if net_type in ('tcp','openib','iib','vib','ra'):
1057 iface, star = string.split(wildcard, ':')
1058 local = if2addr(iface)
1060 panic ("unable to determine ip for:", wildcard)
1062 host = socket.gethostname()
1063 local = socket.gethostbyname(host)
1064 elif net_type == 'elan':
1065 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1066 f = sys_get_elan_position_file()
1068 panic ("unable to determine local Elan ID")
1071 lines = fp.readlines()
1075 if a[0] == 'NodeId':
1079 nid = my_int(cluster_id) + my_int(elan_id)
1080 local = "%d" % (nid)
1081 except ValueError, e:
1085 elif net_type == 'lo':
1086 fixme("automatic local address for loopback")
1087 elif net_type == 'gm':
1088 fixme("automatic local address for GM")
1092 def sys_get_branch():
1093 """Returns kernel release"""
1095 fp = open('/proc/sys/kernel/osrelease')
1096 lines = fp.readlines()
1100 version = string.split(l)
1101 a = string.split(version[0], '.')
1102 return a[0] + '.' + a[1]
1107 # XXX: instead of device_list, ask for $name and see what we get
1108 def is_prepared(name):
1109 """Return true if a device exists for the name"""
1110 if config.lctl_dump:
1112 if (config.noexec or config.record) and config.cleanup:
1115 # expect this format:
1116 # 1 UP ldlm ldlm ldlm_UUID 2
1117 out = lctl.device_list()
1119 if name == string.split(s)[3]:
1121 except CommandError, e:
1125 def net_is_prepared():
1126 """If the any device exists, then assume that all networking
1127 has been configured"""
1128 out = lctl.device_list()
1131 def fs_is_mounted(path):
1132 """Return true if path is a mounted lustre filesystem"""
1134 fp = open('/proc/mounts')
1135 lines = fp.readlines()
1139 if a[1] == path and a[2] == 'lustre_lite':
1145 def kmod_find(src_dir, dev_dir, modname):
1146 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1147 for modext in '.ko', '.o':
1148 module = modbase + modext
1150 if os.access(module, os.R_OK):
1156 def kmod_info(modname):
1157 """Returns reference count for passed module name."""
1159 fp = open('/proc/modules')
1160 lines = fp.readlines()
1163 # please forgive my tired fingers for this one
1164 ret = filter(lambda word, mod = modname: word[0] == mod,
1165 map(lambda line: string.split(line), lines))
1169 except Exception, e:
1173 """Presents kernel module"""
1174 def __init__(self, src_dir, dev_dir, name):
1175 self.src_dir = src_dir
1176 self.dev_dir = dev_dir
1181 log ('loading module:', self.name, 'srcdir',
1182 self.src_dir, 'devdir', self.dev_dir)
1184 module = kmod_find(self.src_dir, self.dev_dir,
1187 panic('module not found:', self.name)
1188 (rc, out) = run('/sbin/insmod', module)
1190 raise CommandError('insmod', out, rc)
1192 (rc, out) = run('/sbin/modprobe', self.name)
1194 raise CommandError('modprobe', out, rc)
1198 log('unloading module:', self.name)
1199 (rc, out) = run('/sbin/rmmod', self.name)
1201 log('unable to unload module:', self.name +
1202 "(" + self.refcount() + ")")
1206 """Returns module info if any."""
1207 return kmod_info(self.name)
1210 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1217 """Returns module refcount."""
1224 """Returns 1 if module is used, otherwise 0 is returned."""
1230 if users and users != '(unused)' and users != '-':
1238 """Returns 1 if module is busy, otherwise 0 is returned."""
1239 if self.loaded() and (self.used() or self.refcount() != '0'):
1245 """Manage kernel modules"""
1246 def __init__(self, lustre_dir, portals_dir):
1247 self.lustre_dir = lustre_dir
1248 self.portals_dir = portals_dir
1249 self.kmodule_list = []
1251 def find_module(self, modname):
1252 """Find module by module name"""
1253 for mod in self.kmodule_list:
1254 if mod.name == modname:
1258 def add_portals_module(self, dev_dir, modname):
1259 """Append a module to list of modules to load."""
1261 mod = self.find_module(modname)
1263 mod = kmod(self.portals_dir, dev_dir, modname)
1264 self.kmodule_list.append(mod)
1266 def add_lustre_module(self, dev_dir, modname):
1267 """Append a module to list of modules to load."""
1269 mod = self.find_module(modname)
1271 mod = kmod(self.lustre_dir, dev_dir, modname)
1272 self.kmodule_list.append(mod)
1274 def load_modules(self):
1275 """Load all the modules in the list in the order they appear."""
1276 for mod in self.kmodule_list:
1277 if mod.loaded() and not config.noexec:
1281 def cleanup_modules(self):
1282 """Unload the modules in the list in reverse order."""
1283 rev = self.kmodule_list
1286 if (not mod.loaded() or mod.busy()) and not config.noexec:
1289 if mod.name == 'portals' and config.dump:
1290 lctl.dump(config.dump)
1293 # ============================================================
1294 # Classes to prepare and cleanup the various objects
1297 """ Base class for the rest of the modules. The default cleanup method is
1298 defined here, as well as some utilitiy funcs.
1300 def __init__(self, module_name, db):
1302 self.module_name = module_name
1303 self.name = self.db.getName()
1304 self.uuid = self.db.getUUID()
1308 def info(self, *args):
1309 msg = string.join(map(str,args))
1310 print self.module_name + ":", self.name, self.uuid, msg
1313 """ default cleanup, used for most modules """
1316 lctl.cleanup(self.name, self.uuid, config.force)
1317 except CommandError, e:
1318 log(self.module_name, "cleanup failed: ", self.name)
1322 def add_module(self, manager):
1323 """Adds all needed modules in the order they appear."""
1326 def safe_to_clean(self):
1329 def safe_to_clean_modules(self):
1330 return self.safe_to_clean()
1332 class Network(Module):
1333 def __init__(self,db):
1334 Module.__init__(self, 'NETWORK', db)
1335 self.net_type = self.db.get_val('nettype')
1336 self.nid = self.db.get_val('nid', '*')
1337 self.cluster_id = self.db.get_val('clusterid', "0")
1338 self.port = self.db.get_val_int('port', 0)
1341 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1343 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1344 self.generic_nid = 1
1345 debug("nid:", self.nid)
1347 self.generic_nid = 0
1349 self.nid_uuid = self.nid_to_uuid(self.nid)
1350 self.hostaddr = self.db.get_hostaddr()
1351 if len(self.hostaddr) == 0:
1352 self.hostaddr.append(self.nid)
1353 if '*' in self.hostaddr[0]:
1354 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1355 if not self.hostaddr[0]:
1356 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1357 debug("hostaddr:", self.hostaddr[0])
1359 def add_module(self, manager):
1360 manager.add_portals_module("libcfs", 'libcfs')
1361 manager.add_portals_module("portals", 'portals')
1362 if node_needs_router():
1363 manager.add_portals_module("router", 'kptlrouter')
1364 if self.net_type == 'tcp':
1365 manager.add_portals_module("knals/socknal", 'ksocknal')
1366 if self.net_type == 'elan':
1367 manager.add_portals_module("knals/qswnal", 'kqswnal')
1368 if self.net_type == 'gm':
1369 manager.add_portals_module("knals/gmnal", 'kgmnal')
1370 if self.net_type == 'openib':
1371 manager.add_portals_module("knals/openibnal", 'kopenibnal')
1372 if self.net_type == 'iib':
1373 manager.add_portals_module("knals/iibnal", 'kiibnal')
1374 if self.net_type == 'vib':
1375 self.add_portals_module("knals/vibnal", 'kvibnal')
1376 if self.net_type == 'lo':
1377 manager.add_portals_module("knals/lonal", 'klonal')
1378 if self.net_type == 'ra':
1379 manager.add_portals_module("knals/ranal", 'kranal')
1381 def nid_to_uuid(self, nid):
1382 return "NID_%s_UUID" %(nid,)
1385 if not config.record and net_is_prepared():
1387 self.info(self.net_type, self.nid, self.port)
1388 if not (config.record and self.generic_nid):
1389 lctl.network(self.net_type, self.nid)
1390 if self.net_type == 'tcp':
1392 for hostaddr in self.db.get_hostaddr():
1393 ip = string.split(hostaddr, '/')[0]
1394 if len(string.split(hostaddr, '/')) == 2:
1395 netmask = string.split(hostaddr, '/')[1]
1398 lctl.add_interface(self.net_type, ip, netmask)
1399 if self.net_type == 'elan':
1401 if self.port and node_is_router():
1402 run_one_acceptor(self.port)
1403 self.connect_peer_gateways()
1405 def connect_peer_gateways(self):
1406 for router in self.db.lookup_class('node'):
1407 if router.get_val_int('router', 0):
1408 for netuuid in router.get_networks():
1409 net = self.db.lookup(netuuid)
1411 if (gw.cluster_id == self.cluster_id and
1412 gw.net_type == self.net_type):
1413 if gw.nid != self.nid:
1416 def disconnect_peer_gateways(self):
1417 for router in self.db.lookup_class('node'):
1418 if router.get_val_int('router', 0):
1419 for netuuid in router.get_networks():
1420 net = self.db.lookup(netuuid)
1422 if (gw.cluster_id == self.cluster_id and
1423 gw.net_type == self.net_type):
1424 if gw.nid != self.nid:
1427 except CommandError, e:
1428 print "disconnect failed: ", self.name
1432 def safe_to_clean(self):
1433 return not net_is_prepared()
1436 self.info(self.net_type, self.nid, self.port)
1438 stop_acceptor(self.port)
1439 if node_is_router():
1440 self.disconnect_peer_gateways()
1441 if self.net_type == 'tcp':
1442 for hostaddr in self.db.get_hostaddr():
1443 ip = string.split(hostaddr, '/')[0]
1444 lctl.del_interface(self.net_type, ip)
1446 def correct_level(self, level, op=None):
1449 class RouteTable(Module):
1450 def __init__(self,db):
1451 Module.__init__(self, 'ROUTES', db)
1453 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1455 # only setup connections for tcp, openib, and iib NALs
1457 if not net_type in ('tcp','openib','iib','vib','ra'):
1460 # connect to target if route is to single node and this node is the gw
1461 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1462 if not local_cluster(net_type, tgt_cluster_id):
1463 panic("target", lo, " not on the local cluster")
1464 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1465 # connect to gateway if this node is not the gw
1466 elif (local_cluster(net_type, gw_cluster_id)
1467 and not local_interface(net_type, gw_cluster_id, gw)):
1468 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1473 panic("no server for nid", lo)
1476 return Network(srvdb)
1479 if not config.record and net_is_prepared():
1482 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1483 lctl.add_route(net_type, gw, lo, hi)
1484 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1488 def safe_to_clean(self):
1489 return not net_is_prepared()
1492 if net_is_prepared():
1493 # the network is still being used, don't clean it up
1495 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1496 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1499 lctl.disconnect(srv)
1500 except CommandError, e:
1501 print "disconnect failed: ", self.name
1506 lctl.del_route(net_type, gw, lo, hi)
1507 except CommandError, e:
1508 print "del_route failed: ", self.name
1512 class Management(Module):
1513 def __init__(self, db):
1514 Module.__init__(self, 'MGMT', db)
1516 def add_module(self, manager):
1517 manager.add_lustre_module('lvfs', 'lvfs')
1518 manager.add_lustre_module('obdclass', 'obdclass')
1519 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1520 manager.add_lustre_module('mgmt', 'mgmt_svc')
1523 if not config.record and is_prepared(self.name):
1526 lctl.newdev("mgmt", self.name, self.uuid)
1528 def safe_to_clean(self):
1532 if is_prepared(self.name):
1533 Module.cleanup(self)
1535 def correct_level(self, level, op=None):
1538 # This is only needed to load the modules; the LDLM device
1539 # is now created automatically.
1541 def __init__(self,db):
1542 Module.__init__(self, 'LDLM', db)
1544 def add_module(self, manager):
1545 manager.add_lustre_module('lvfs', 'lvfs')
1546 manager.add_lustre_module('obdclass', 'obdclass')
1547 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1555 def correct_level(self, level, op=None):
1559 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1560 Module.__init__(self, 'LOV', db)
1561 if name_override != None:
1562 self.name = "lov_%s" % name_override
1563 self.mds_uuid = self.db.get_first_ref('mds')
1564 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1565 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1566 self.pattern = self.db.get_val_int('stripepattern', 0)
1567 self.devlist = self.db.get_lov_tgts('lov_tgt')
1568 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1571 self.desc_uuid = self.uuid
1572 self.uuid = generate_client_uuid(self.name)
1573 self.fs_name = fs_name
1575 self.config_only = 1
1577 self.config_only = None
1578 mds = self.db.lookup(self.mds_uuid)
1579 self.mds_name = mds.getName()
1580 for (obd_uuid, index, gen, active) in self.devlist:
1583 self.obdlist.append(obd_uuid)
1584 obd = self.db.lookup(obd_uuid)
1585 osc = get_osc(obd, self.uuid, fs_name)
1587 self.osclist.append((osc, index, gen, active))
1589 panic('osc not found:', obd_uuid)
1595 if not config.record and is_prepared(self.name):
1597 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1598 self.stripe_off, self.pattern, self.devlist,
1600 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1601 self.stripe_sz, self.stripe_off, self.pattern,
1602 string.join(self.obdlist))
1603 for (osc, index, gen, active) in self.osclist:
1604 target_uuid = osc.target_uuid
1606 # Only ignore connect failures with --force, which
1607 # isn't implemented here yet.
1609 osc.prepare(ignore_connect_failure=0)
1610 except CommandError, e:
1611 print "Error preparing OSC %s\n" % osc.uuid
1613 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1616 for (osc, index, gen, active) in self.osclist:
1617 target_uuid = osc.target_uuid
1619 if is_prepared(self.name):
1620 Module.cleanup(self)
1621 if self.config_only:
1622 panic("Can't clean up config_only LOV ", self.name)
1624 def add_module(self, manager):
1625 if self.config_only:
1626 panic("Can't load modules for config_only LOV ", self.name)
1627 for (osc, index, gen, active) in self.osclist:
1628 osc.add_module(manager)
1630 manager.add_lustre_module('lov', 'lov')
1632 def correct_level(self, level, op=None):
1636 def __init__(self, db, uuid, fs_name, name_override = None):
1637 Module.__init__(self, 'LMV', db)
1638 if name_override != None:
1639 self.name = "lmv_%s" % name_override
1641 self.devlist = self.db.get_lmv_tgts('lmv_tgt')
1642 if self.devlist == None:
1643 self.devlist = self.db.get_refs('mds')
1646 self.desc_uuid = self.uuid
1648 self.fs_name = fs_name
1649 for mds_uuid in self.devlist:
1650 mds = self.db.lookup(mds_uuid)
1652 panic("MDS not found!")
1653 mdc = MDC(mds, self.uuid, fs_name)
1655 self.mdclist.append(mdc)
1657 panic('mdc not found:', mds_uuid)
1660 if is_prepared(self.name):
1664 for mdc in self.mdclist:
1666 # Only ignore connect failures with --force, which
1667 # isn't implemented here yet.
1668 mdc.prepare(ignore_connect_failure=0)
1669 except CommandError, e:
1670 print "Error preparing LMV %s\n" % mdc.uuid
1673 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1674 string.join(self.devlist))
1677 for mdc in self.mdclist:
1679 if is_prepared(self.name):
1680 Module.cleanup(self)
1682 def add_module(self, manager):
1683 for mdc in self.mdclist:
1684 mdc.add_module(manager)
1686 manager.add_lustre_module('lmv', 'lmv')
1688 def correct_level(self, level, op=None):
1691 class MDSDEV(Module):
1692 def __init__(self,db):
1693 Module.__init__(self, 'MDSDEV', db)
1694 self.devpath = self.db.get_val('devpath','')
1695 self.backdevpath = self.db.get_val('backdevpath','')
1696 self.size = self.db.get_val_int('devsize', 0)
1697 self.journal_size = self.db.get_val_int('journalsize', 0)
1698 self.fstype = self.db.get_val('fstype', '')
1699 self.backfstype = self.db.get_val('backfstype', '')
1700 self.nspath = self.db.get_val('nspath', '')
1701 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1702 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1703 self.obdtype = self.db.get_val('obdtype', '')
1704 self.root_squash = self.db.get_val('root_squash', '')
1705 self.no_root_squash = self.db.get_val('no_root_squash', '')
1706 # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
1707 target_uuid = self.db.get_first_ref('target')
1708 self.mds = self.db.lookup(target_uuid)
1709 self.name = self.mds.getName()
1710 self.client_uuids = self.mds.get_refs('client')
1715 lmv_uuid = self.db.get_first_ref('lmv')
1716 if lmv_uuid != None:
1717 self.lmv = self.db.lookup(lmv_uuid)
1718 if self.lmv != None:
1719 self.client_uuids = self.lmv.get_refs('client')
1721 # FIXME: if fstype not set, then determine based on kernel version
1722 self.format = self.db.get_val('autoformat', "no")
1723 if self.mds.get_val('failover', 0):
1724 self.failover_mds = 'f'
1726 self.failover_mds = 'n'
1727 active_uuid = get_active_target(self.mds)
1729 panic("No target device found:", target_uuid)
1730 if active_uuid == self.uuid:
1734 if self.active and config.group and config.group != self.mds.get_val('group'):
1737 # default inode inode for case when neither LOV either
1738 # LMV is accessible.
1739 self.inode_size = 256
1741 inode_size = self.db.get_val_int('inodesize', 0)
1742 if not inode_size == 0:
1743 self.inode_size = inode_size
1745 # find the LOV for this MDS
1746 lovconfig_uuid = self.mds.get_first_ref('lovconfig')
1747 if lovconfig_uuid or self.lmv != None:
1748 if self.lmv != None:
1749 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1750 lovconfig = self.lmv.lookup(lovconfig_uuid)
1751 lov_uuid = lovconfig.get_first_ref('lov')
1752 if lov_uuid == None:
1753 panic(self.mds.getName() + ": No LOV found for lovconfig ",
1756 lovconfig = self.mds.lookup(lovconfig_uuid)
1757 lov_uuid = lovconfig.get_first_ref('lov')
1758 if lov_uuid == None:
1759 panic(self.mds.getName() + ": No LOV found for lovconfig ",
1762 if self.lmv != None:
1763 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1764 lovconfig = self.lmv.lookup(lovconfig_uuid)
1765 lov_uuid = lovconfig.get_first_ref('lov')
1767 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, self.name,
1770 # default stripe count controls default inode_size
1771 stripe_count = lov.stripe_cnt
1772 if stripe_count > 77:
1773 self.inode_size = 4096
1774 elif stripe_count > 35:
1775 self.inode_size = 2048
1776 elif stripe_count > 13:
1777 self.inode_size = 1024
1778 elif stripe_count > 3:
1779 self.inode_size = 512
1781 self.inode_size = 256
1783 self.target_dev_uuid = self.uuid
1784 self.uuid = target_uuid
1787 if self.lmv != None:
1788 client_uuid = self.name + "_lmv_UUID"
1789 self.master = LMV(self.lmv, client_uuid,
1790 self.name, self.name)
1792 def add_module(self, manager):
1794 manager.add_lustre_module('mdc', 'mdc')
1795 manager.add_lustre_module('osc', 'osc')
1796 manager.add_lustre_module('ost', 'ost')
1797 manager.add_lustre_module('lov', 'lov')
1798 manager.add_lustre_module('mds', 'mds')
1800 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
1801 manager.add_lustre_module(self.fstype, self.fstype)
1804 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
1806 # if fstype is smfs, then we should also take care about backing
1808 if self.fstype == 'smfs':
1809 manager.add_lustre_module(self.backfstype, self.backfstype)
1810 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
1812 for option in string.split(self.mountfsoptions, ','):
1813 if option == 'snap':
1814 if not self.fstype == 'smfs':
1815 panic("mountoptions has 'snap', but fstype is not smfs.")
1816 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
1817 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
1820 if self.master != None:
1821 self.master.add_module(manager)
1823 def get_mount_options(self, blkdev):
1824 options = def_mount_options(self.fstype, 'mds')
1826 if config.mountfsoptions:
1828 options = "%s,%s" %(options, config.mountfsoptions)
1830 options = config.mountfsoptions
1831 if self.mountfsoptions:
1832 options = "%s,%s" %(options, self.mountfsoptions)
1834 if self.mountfsoptions:
1836 options = "%s,%s" %(options, self.mountfsoptions)
1838 options = self.mountfsoptions
1840 if self.fstype == 'smfs':
1842 options = "%s,type=%s,dev=%s" %(options,
1843 self.backfstype, blkdev)
1845 options = "type=%s,dev=%s" %(self.backfstype, blkdev)
1849 if not config.record and is_prepared(self.name):
1852 debug(self.uuid, "not active")
1855 # run write_conf automatically, if --reformat used
1860 if self.master != None:
1861 self.master.prepare()
1863 # never reformat here
1864 blkdev = block_dev(self.devpath, self.size, self.fstype, 0,
1865 self.format, self.journal_size, self.inode_size,
1866 self.mkfsoptions, self.backfstype, self.backdevpath)
1868 if not is_prepared('MDT'):
1869 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
1871 if self.fstype == 'smfs':
1872 realdev = self.fstype
1876 if self.obdtype == None:
1877 self.obdtype = 'dumb'
1879 if self.master == None:
1880 master_name = 'dumb'
1882 master_name = self.master.name
1884 if self.client_uuids == None:
1885 profile_name = 'dumb'
1887 profile_name = self.name
1889 mountfsoptions = self.get_mount_options(blkdev)
1891 self.info("mds", realdev, mountfsoptions, self.fstype, self.size,
1892 self.format, master_name, profile_name, self.obdtype)
1894 lctl.newdev("mds", self.name, self.uuid,
1895 setup = "%s %s %s %s %s %s" %(realdev,
1896 self.fstype, profile_name, mountfsoptions,
1897 master_name, self.obdtype))
1899 if development_mode():
1900 procentry = "/proc/fs/lustre/mds/grp_hash_upcall"
1901 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/l_getgroups")
1902 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
1903 print "MDS Warning: failed to set group-hash upcall"
1905 run("echo ", upcall, " > ", procentry)
1907 except CommandError, e:
1909 panic("MDS is missing the config log. Need to run " +
1910 "lconf --write_conf.")
1914 if config.root_squash == None:
1915 config.root_squash = self.root_squash
1916 if config.no_root_squash == None:
1917 config.no_root_squash = self.no_root_squash
1918 if config.root_squash:
1919 if config.no_root_squash:
1920 nsnid = config.no_root_squash
1923 lctl.root_squash(self.name, config.root_squash, nsnid)
1925 def write_conf(self):
1926 if not self.client_uuids:
1930 if not is_prepared(self.name):
1931 blkdev = block_dev(self.devpath, self.size, self.fstype,
1932 config.reformat, self.format, self.journal_size,
1933 self.inode_size, self.mkfsoptions,
1934 self.backfstype, self.backdevpath)
1936 if self.fstype == 'smfs':
1937 realdev = self.fstype
1941 # Even for writing logs we mount mds with supplied mount options
1942 # because it will not mount smfs (if used) otherwise.
1943 mountfsoptions = self.get_mount_options(blkdev)
1945 if self.obdtype == None:
1946 self.obdtype = 'dumb'
1948 self.info("mds", realdev, mountfsoptions, self.fstype, self.size,
1949 self.format, "dumb", "dumb", self.obdtype)
1951 lctl.newdev("mds", self.name, self.uuid,
1952 setup ="%s %s %s %s %s %s" %(realdev, self.fstype,
1953 'dumb', mountfsoptions,
1954 'dumb', self.obdtype))
1957 # record logs for all MDS clients
1958 for obd_uuid in self.client_uuids:
1959 log("recording client:", obd_uuid)
1961 client_uuid = generate_client_uuid(self.name)
1962 client = VOSC(self.db.lookup(obd_uuid), client_uuid,
1963 self.name, self.name)
1965 lctl.clear_log(self.name, self.name)
1966 lctl.record(self.name, self.name)
1968 lctl.mount_option(self.name, client.get_name(), "")
1970 process_updates(self.db, self.name, self.name, client)
1973 lctl.clear_log(self.name, self.name + '-clean')
1974 lctl.record(self.name, self.name + '-clean')
1976 lctl.del_mount_option(self.name)
1978 process_updates(self.db, self.name, self.name + '-clean', client)
1982 # record logs for each client
1988 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
1990 config_options = CONFIG_FILE
1992 for node_db in self.db.lookup_class('node'):
1993 client_name = node_db.getName()
1994 for prof_uuid in node_db.get_refs('profile'):
1995 prof_db = node_db.lookup(prof_uuid)
1996 # refactor this into a funtion to test "clientness"
1998 for ref_class, ref_uuid in prof_db.get_all_refs():
1999 if ref_class in ('mountpoint','echoclient'):
2000 debug("recording", client_name)
2001 old_noexec = config.noexec
2003 ret, out = run (sys.argv[0], noexec_opt,
2004 " -v --record --nomod",
2005 "--record_log", client_name,
2006 "--record_device", self.name,
2007 "--node", client_name,
2010 for s in out: log("record> ", string.strip(s))
2011 ret, out = run (sys.argv[0], noexec_opt,
2012 "--cleanup -v --record --nomod",
2013 "--record_log", client_name + "-clean",
2014 "--record_device", self.name,
2015 "--node", client_name,
2018 for s in out: log("record> ", string.strip(s))
2019 config.noexec = old_noexec
2022 lctl.cleanup(self.name, self.uuid, 0, 0)
2023 except CommandError, e:
2024 log(self.module_name, "cleanup failed: ", self.name)
2027 Module.cleanup(self)
2029 clean_dev(self.devpath, self.fstype, self.backfstype,
2032 def msd_remaining(self):
2033 out = lctl.device_list()
2035 if string.split(s)[2] in ('mds',):
2038 def safe_to_clean(self):
2041 def safe_to_clean_modules(self):
2042 return not self.msd_remaining()
2046 debug(self.uuid, "not active")
2049 if is_prepared(self.name):
2051 lctl.cleanup(self.name, self.uuid, config.force,
2053 except CommandError, e:
2054 log(self.module_name, "cleanup failed: ", self.name)
2057 Module.cleanup(self)
2059 if self.master != None:
2060 self.master.cleanup()
2061 if not self.msd_remaining() and is_prepared('MDT'):
2063 lctl.cleanup("MDT", "MDT_UUID", config.force,
2065 except CommandError, e:
2066 print "cleanup failed: ", self.name
2070 clean_dev(self.devpath, self.fstype, self.backfstype,
2073 def correct_level(self, level, op=None):
2074 #if self.master != None:
2079 def __init__(self, db):
2080 Module.__init__(self, 'OSD', db)
2081 self.osdtype = self.db.get_val('osdtype')
2082 self.devpath = self.db.get_val('devpath', '')
2083 self.backdevpath = self.db.get_val('backdevpath', '')
2084 self.size = self.db.get_val_int('devsize', 0)
2085 self.journal_size = self.db.get_val_int('journalsize', 0)
2086 self.inode_size = self.db.get_val_int('inodesize', 0)
2087 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2088 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2089 self.fstype = self.db.get_val('fstype', '')
2090 self.backfstype = self.db.get_val('backfstype', '')
2091 self.nspath = self.db.get_val('nspath', '')
2092 target_uuid = self.db.get_first_ref('target')
2093 ost = self.db.lookup(target_uuid)
2094 self.name = ost.getName()
2095 self.format = self.db.get_val('autoformat', 'yes')
2096 if ost.get_val('failover', 0):
2097 self.failover_ost = 'f'
2099 self.failover_ost = 'n'
2101 active_uuid = get_active_target(ost)
2103 panic("No target device found:", target_uuid)
2104 if active_uuid == self.uuid:
2108 if self.active and config.group and config.group != ost.get_val('group'):
2111 self.target_dev_uuid = self.uuid
2112 self.uuid = target_uuid
2114 def add_module(self, manager):
2116 manager.add_lustre_module('ost', 'ost')
2118 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2119 manager.add_lustre_module(self.fstype, self.fstype)
2122 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2124 if self.fstype == 'smfs':
2125 manager.add_lustre_module(self.backfstype, self.backfstype)
2126 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2128 for option in self.mountfsoptions:
2129 if option == 'snap':
2130 if not self.fstype == 'smfs':
2131 panic("mountoptions with snap, but fstype is not smfs\n")
2132 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2133 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2135 manager.add_lustre_module(self.osdtype, self.osdtype)
2137 def get_mount_options(self, blkdev):
2138 options = def_mount_options(self.fstype, 'ost')
2140 if config.mountfsoptions:
2142 options = "%s,%s" %(options, config.mountfsoptions)
2144 options = config.mountfsoptions
2145 if self.mountfsoptions:
2146 options = "%s,%s" %(options, self.mountfsoptions)
2148 if self.mountfsoptions:
2150 options = "%s,%s" %(options, self.mountfsoptions)
2152 options = self.mountfsoptions
2154 if self.fstype == 'smfs':
2156 options = "%s,type=%s,dev=%s" %(options,
2157 self.backfstype, blkdev)
2159 options = "type=%s,dev=%s" %(self.backfstype,
2163 # need to check /proc/mounts and /etc/mtab before
2164 # formatting anything.
2165 # FIXME: check if device is already formatted.
2167 if is_prepared(self.name):
2170 debug(self.uuid, "not active")
2173 if self.osdtype == 'obdecho':
2176 blkdev = block_dev(self.devpath, self.size, self.fstype,
2177 config.reformat, self.format, self.journal_size,
2178 self.inode_size, self.mkfsoptions, self.backfstype,
2181 if self.fstype == 'smfs':
2182 realdev = self.fstype
2186 mountfsoptions = self.get_mount_options(blkdev)
2188 self.info(self.osdtype, realdev, mountfsoptions, self.fstype,
2189 self.size, self.format, self.journal_size, self.inode_size)
2191 lctl.newdev(self.osdtype, self.name, self.uuid,
2192 setup ="%s %s %s %s" %(realdev, self.fstype,
2195 if not is_prepared('OSS'):
2196 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
2198 def osd_remaining(self):
2199 out = lctl.device_list()
2201 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2204 def safe_to_clean(self):
2207 def safe_to_clean_modules(self):
2208 return not self.osd_remaining()
2212 debug(self.uuid, "not active")
2214 if is_prepared(self.name):
2217 lctl.cleanup(self.name, self.uuid, config.force,
2219 except CommandError, e:
2220 log(self.module_name, "cleanup failed: ", self.name)
2223 if not self.osd_remaining() and is_prepared('OSS'):
2225 lctl.cleanup("OSS", "OSS_UUID", config.force,
2227 except CommandError, e:
2228 print "cleanup failed: ", self.name
2231 if not self.osdtype == 'obdecho':
2232 clean_dev(self.devpath, self.fstype, self.backfstype,
2235 def correct_level(self, level, op=None):
2238 def mgmt_uuid_for_fs(mtpt_name):
2241 mtpt_db = toplustreDB.lookup_name(mtpt_name)
2242 fs_uuid = mtpt_db.get_first_ref('filesystem')
2243 fs = toplustreDB.lookup(fs_uuid)
2246 return fs.get_first_ref('mgmt')
2248 # Generic client module, used by OSC and MDC
2249 class Client(Module):
2250 def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
2252 self.target_name = tgtdb.getName()
2253 self.target_uuid = tgtdb.getUUID()
2254 self.module_dir = module_dir
2255 self.module = module
2259 self.tgt_dev_uuid = get_active_target(tgtdb)
2260 if not self.tgt_dev_uuid:
2261 panic("No target device found for target(1):", self.target_name)
2266 self.module = module
2267 self.module_name = string.upper(module)
2269 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2270 self.target_name, fs_name)
2272 self.name = self_name
2274 self.lookup_server(self.tgt_dev_uuid)
2275 mgmt_uuid = mgmt_uuid_for_fs(fs_name)
2277 self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid)
2280 self.fs_name = fs_name
2281 if not self.module_dir:
2282 self.module_dir = module
2284 def add_module(self, manager):
2285 manager.add_lustre_module(self.module_dir, self.module)
2287 def lookup_server(self, srv_uuid):
2288 """ Lookup a server's network information """
2289 self._server_nets = get_ost_net(self.db, srv_uuid)
2290 if len(self._server_nets) == 0:
2291 panic ("Unable to find a server for:", srv_uuid)
2296 def get_servers(self):
2297 return self._server_nets
2299 def prepare(self, ignore_connect_failure = 0):
2300 self.info(self.target_uuid)
2301 if not config.record and is_prepared(self.name):
2304 srv = choose_local_server(self.get_servers())
2308 routes = find_route(self.get_servers())
2309 if len(routes) == 0:
2310 panic ("no route to", self.target_uuid)
2311 for (srv, r) in routes:
2312 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2313 except CommandError, e:
2314 if not ignore_connect_failure:
2317 if self.permits_inactive() and (self.target_uuid in config.inactive or self.active == 0):
2318 debug("%s inactive" % self.target_uuid)
2319 inactive_p = "inactive"
2321 debug("%s active" % self.target_uuid)
2323 lctl.newdev(self.module, self.name, self.uuid,
2324 setup ="%s %s %s %s" % (self.target_uuid, srv.nid_uuid,
2325 inactive_p, self.mgmt_name))
2328 if is_prepared(self.name):
2329 Module.cleanup(self)
2331 srv = choose_local_server(self.get_servers())
2333 lctl.disconnect(srv)
2335 for (srv, r) in find_route(self.get_servers()):
2336 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2337 except CommandError, e:
2338 log(self.module_name, "cleanup failed: ", self.name)
2342 def correct_level(self, level, op=None):
2345 def deactivate(self):
2347 lctl.deactivate(self.name)
2348 except CommandError, e:
2349 log(self.module_name, "deactivate failed: ", self.name)
2354 def __init__(self, db, uuid, fs_name):
2355 Client.__init__(self, db, uuid, 'mdc', fs_name)
2357 def permits_inactive(self):
2361 def __init__(self, db, uuid, fs_name):
2362 Client.__init__(self, db, uuid, 'osc', fs_name)
2364 def permits_inactive(self):
2367 def mgmtcli_name_for_uuid(uuid):
2368 return 'MGMTCLI_%s' % uuid
2370 class ManagementClient(Client):
2371 def __init__(self, db, uuid):
2372 Client.__init__(self, db, uuid, 'mgmt_cli', '',
2373 self_name = mgmtcli_name_for_uuid(db.getUUID()),
2374 module_dir = 'mgmt')
2376 class CMOBD(Module):
2377 def __init__(self, db):
2378 Module.__init__(self, 'CMOBD', db)
2379 self.name = self.db.getName();
2380 self.uuid = generate_client_uuid(self.name)
2381 self.master_uuid = self.db.get_first_ref('masterobd')
2382 self.cache_uuid = self.db.get_first_ref('cacheobd')
2384 master_obd = self.db.lookup(self.master_uuid)
2386 panic('master obd not found:', self.master_uuid)
2388 cache_obd = self.db.lookup(self.cache_uuid)
2390 panic('cache obd not found:', self.cache_uuid)
2395 master_class = master_obd.get_class()
2396 cache_class = cache_obd.get_class()
2398 if master_class == 'ost' or master_class == 'lov':
2399 client_uuid = "%s_lov_master_UUID" % (self.name)
2400 self.master = LOV(master_obd, client_uuid, self.name);
2401 elif master_class == 'mds':
2402 self.master = get_mdc(db, self.name, self.master_uuid)
2403 elif master_class == 'lmv':
2404 client_uuid = "%s_lmv_master_UUID" % (self.name)
2405 self.master = LMV(master_obd, client_uuid, self.name);
2407 panic("unknown master obd class '%s'" %(master_class))
2409 if cache_class == 'ost' or cache_class == 'lov':
2410 client_uuid = "%s_lov_cache_UUID" % (self.name)
2411 self.cache = LOV(cache_obd, client_uuid, self.name);
2412 elif cache_class == 'mds':
2413 self.cache = get_mdc(db, self.name, self.cache_uuid)
2414 elif cache_class == 'lmv':
2415 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2416 self.cache = LMV(cache_obd, client_uuid, self.name);
2418 panic("unknown cache obd class '%s'" %(cache_class))
2421 self.master.prepare()
2422 if not config.record and is_prepared(self.name):
2424 self.info(self.master_uuid, self.cache_uuid)
2425 lctl.newdev("cmobd", self.name, self.uuid,
2426 setup ="%s %s" %(self.master.uuid,
2435 def get_master_name(self):
2436 return self.master.name
2438 def get_cache_name(self):
2439 return self.cache.name
2442 if is_prepared(self.name):
2443 Module.cleanup(self)
2445 self.master.cleanup()
2447 def add_module(self, manager):
2448 manager.add_lustre_module('cmobd', 'cmobd')
2449 self.master.add_module(manager)
2451 def correct_level(self, level, op=None):
2455 def __init__(self, db, uuid, name):
2456 Module.__init__(self, 'COBD', db)
2457 self.name = self.db.getName();
2458 self.uuid = generate_client_uuid(self.name)
2459 self.master_uuid = self.db.get_first_ref('masterobd')
2460 self.cache_uuid = self.db.get_first_ref('cacheobd')
2462 master_obd = self.db.lookup(self.master_uuid)
2464 panic('master obd not found:', self.master_uuid)
2466 cache_obd = self.db.lookup(self.cache_uuid)
2468 panic('cache obd not found:', self.cache_uuid)
2473 master_class = master_obd.get_class()
2474 cache_class = cache_obd.get_class()
2476 if master_class == 'ost' or master_class == 'lov':
2477 client_uuid = "%s_lov_master_UUID" % (self.name)
2478 self.master = LOV(master_obd, client_uuid, name);
2479 elif master_class == 'mds':
2480 self.master = get_mdc(db, name, self.master_uuid)
2481 elif master_class == 'lmv':
2482 client_uuid = "%s_lmv_master_UUID" % (self.name)
2483 self.master = LMV(master_obd, client_uuid, self.name);
2485 panic("unknown master obd class '%s'" %(master_class))
2487 if cache_class == 'ost' or cache_class == 'lov':
2488 client_uuid = "%s_lov_cache_UUID" % (self.name)
2489 self.cache = LOV(cache_obd, client_uuid, name);
2490 elif cache_class == 'mds':
2491 self.cache = get_mdc(db, name, self.cache_uuid)
2492 elif cache_class == 'lmv':
2493 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2494 self.cache = LMV(cache_obd, client_uuid, self.name);
2496 panic("unknown cache obd class '%s'" %(cache_class))
2504 def get_master_name(self):
2505 return self.master.name
2507 def get_cache_name(self):
2508 return self.cache.name
2511 self.master.prepare()
2512 self.cache.prepare()
2513 if not config.record and is_prepared(self.name):
2515 self.info(self.master_uuid, self.cache_uuid)
2516 lctl.newdev("cobd", self.name, self.uuid,
2517 setup ="%s %s" %(self.master.name,
2521 if is_prepared(self.name):
2522 Module.cleanup(self)
2523 self.master.cleanup()
2524 self.cache.cleanup()
2526 def add_module(self, manager):
2527 manager.add_lustre_module('cobd', 'cobd')
2528 self.master.add_module(manager)
2530 # virtual interface for OSC and LOV
2532 def __init__(self, db, client_uuid, name, name_override = None):
2533 Module.__init__(self, 'VOSC', db)
2534 if db.get_class() == 'lov':
2535 self.osc = LOV(db, client_uuid, name, name_override)
2537 elif db.get_class() == 'cobd':
2538 self.osc = COBD(db, client_uuid, name)
2541 self.osc = OSC(db, client_uuid, name)
2545 return self.osc.get_uuid()
2548 return self.osc.get_name()
2556 def add_module(self, manager):
2557 self.osc.add_module(manager)
2559 def correct_level(self, level, op=None):
2560 return self.osc.correct_level(level, op)
2562 # virtual interface for MDC and LMV
2564 def __init__(self, db, client_uuid, name, name_override = None):
2565 Module.__init__(self, 'VMDC', db)
2566 if db.get_class() == 'lmv':
2567 self.mdc = LMV(db, client_uuid, name, name_override)
2568 elif db.get_class() == 'cobd':
2569 self.mdc = COBD(db, client_uuid, name)
2571 self.mdc = MDC(db, client_uuid, name)
2574 return self.mdc.uuid
2577 return self.mdc.name
2585 def add_module(self, manager):
2586 self.mdc.add_module(manager)
2588 def correct_level(self, level, op=None):
2589 return self.mdc.correct_level(level, op)
2591 class ECHO_CLIENT(Module):
2592 def __init__(self,db):
2593 Module.__init__(self, 'ECHO_CLIENT', db)
2594 self.obd_uuid = self.db.get_first_ref('obd')
2595 obd = self.db.lookup(self.obd_uuid)
2596 self.uuid = generate_client_uuid(self.name)
2597 self.osc = VOSC(obd, self.uuid, self.name)
2600 if not config.record and is_prepared(self.name):
2603 self.osc.prepare() # XXX This is so cheating. -p
2604 self.info(self.obd_uuid)
2606 lctl.newdev("echo_client", self.name, self.uuid,
2607 setup = self.osc.get_name())
2610 if is_prepared(self.name):
2611 Module.cleanup(self)
2614 def add_module(self, manager):
2615 self.osc.add_module(manager)
2616 manager.add_lustre_module('obdecho', 'obdecho')
2618 def correct_level(self, level, op=None):
2621 def generate_client_uuid(name):
2622 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2624 int(random.random() * 1048576),
2625 int(random.random() * 1048576))
2626 return client_uuid[:36]
2628 class Mountpoint(Module):
2629 def __init__(self,db):
2630 Module.__init__(self, 'MTPT', db)
2631 self.path = self.db.get_val('path')
2632 self.clientoptions = self.db.get_val('clientoptions', '')
2633 self.fs_uuid = self.db.get_first_ref('filesystem')
2634 fs = self.db.lookup(self.fs_uuid)
2635 self.mds_uuid = fs.get_first_ref('lmv')
2636 if not self.mds_uuid:
2637 self.mds_uuid = fs.get_first_ref('mds')
2638 self.obd_uuid = fs.get_first_ref('obd')
2639 self.mgmt_uuid = fs.get_first_ref('mgmt')
2640 client_uuid = generate_client_uuid(self.name)
2642 ost = self.db.lookup(self.obd_uuid)
2644 panic("no ost: ", self.obd_uuid)
2646 mds = self.db.lookup(self.mds_uuid)
2648 panic("no mds: ", self.mds_uuid)
2650 self.vosc = VOSC(ost, client_uuid, self.name, self.name)
2651 self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
2654 self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid),
2660 if not config.record and fs_is_mounted(self.path):
2661 log(self.path, "already mounted.")
2665 self.mgmtcli.prepare()
2668 vmdc_name = self.vmdc.get_name()
2670 self.info(self.path, self.mds_uuid, self.obd_uuid)
2671 if config.record or config.lctl_dump:
2672 lctl.mount_option(local_node_name, self.vosc.get_name(), vmdc_name)
2675 if config.clientoptions:
2676 if self.clientoptions:
2677 self.clientoptions = self.clientoptions + ',' + \
2678 config.clientoptions
2680 self.clientoptions = config.clientoptions
2681 if self.clientoptions:
2682 self.clientoptions = ',' + self.clientoptions
2683 # Linux kernel will deal with async and not pass it to ll_fill_super,
2684 # so replace it with Lustre async
2685 self.clientoptions = string.replace(self.clientoptions, "async",
2688 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s%s %s %s" % \
2689 (self.vosc.get_name(), vmdc_name, self.clientoptions,
2690 config.config, self.path)
2691 run("mkdir", self.path)
2696 panic("mount failed:", self.path, ":", string.join(val))
2699 self.info(self.path, self.mds_uuid,self.obd_uuid)
2701 if config.record or config.lctl_dump:
2702 lctl.del_mount_option(local_node_name)
2704 if fs_is_mounted(self.path):
2706 (rc, out) = run("umount", "-f", self.path)
2708 (rc, out) = run("umount", self.path)
2710 raise CommandError('umount', out, rc)
2712 if fs_is_mounted(self.path):
2713 panic("fs is still mounted:", self.path)
2718 self.mgmtcli.cleanup()
2720 def add_module(self, manager):
2721 manager.add_lustre_module('mdc', 'mdc')
2724 self.mgmtcli.add_module(manager)
2726 self.vosc.add_module(manager)
2727 self.vmdc.add_module(manager)
2729 manager.add_lustre_module('llite', 'llite')
2731 def correct_level(self, level, op=None):
2734 # ============================================================
2735 # misc query functions
2737 def get_ost_net(self, osd_uuid):
2741 osd = self.lookup(osd_uuid)
2742 node_uuid = osd.get_first_ref('node')
2743 node = self.lookup(node_uuid)
2745 panic("unable to find node for osd_uuid:", osd_uuid,
2746 " node_ref:", node_uuid_)
2747 for net_uuid in node.get_networks():
2748 db = node.lookup(net_uuid)
2749 srv_list.append(Network(db))
2753 # the order of iniitailization is based on level.
2754 def getServiceLevel(self):
2755 type = self.get_class()
2757 if type in ('network',):
2759 elif type in ('routetbl',):
2761 elif type in ('ldlm',):
2763 elif type in ('osd', 'cobd'):
2765 elif type in ('mdsdev',):
2767 elif type in ('lmv',):
2769 elif type in ('cmobd',):
2771 elif type in ('mountpoint', 'echoclient'):
2774 panic("Unknown type: ", type)
2776 if ret < config.minlevel or ret > config.maxlevel:
2781 # return list of services in a profile. list is a list of tuples
2782 # [(level, db_object),]
2783 def getServices(self):
2785 for ref_class, ref_uuid in self.get_all_refs():
2786 servdb = self.lookup(ref_uuid)
2788 level = getServiceLevel(servdb)
2790 list.append((level, servdb))
2792 panic('service not found: ' + ref_uuid)
2798 ############################################################
2800 # FIXME: clean this mess up!
2802 # OSC is no longer in the xml, so we have to fake it.
2803 # this is getting ugly and begging for another refactoring
2804 def get_osc(ost_db, uuid, fs_name):
2805 osc = OSC(ost_db, uuid, fs_name)
2808 def get_mdc(db, fs_name, mds_uuid):
2809 mds_db = db.lookup(mds_uuid);
2811 error("no mds:", mds_uuid)
2812 mdc = MDC(mds_db, mds_uuid, fs_name)
2815 ############################################################
2816 # routing ("rooting")
2818 # list of (nettype, cluster_id, nid)
2821 def find_local_clusters(node_db):
2822 global local_clusters
2823 for netuuid in node_db.get_networks():
2824 net = node_db.lookup(netuuid)
2826 debug("add_local", netuuid)
2827 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2829 if acceptors.has_key(srv.port):
2830 panic("duplicate port:", srv.port)
2831 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
2833 # This node is a gateway.
2835 def node_is_router():
2838 # If there are any routers found in the config, then this will be true
2839 # and all nodes will load kptlrouter.
2841 def node_needs_router():
2842 return needs_router or is_router
2844 # list of (nettype, gw, tgt_cluster_id, lo, hi)
2845 # Currently, these local routes are only added to kptlrouter route
2846 # table if they are needed to connect to a specific server. This
2847 # should be changed so all available routes are loaded, and the
2848 # ptlrouter can make all the decisions.
2851 def find_local_routes(lustre):
2852 """ Scan the lustre config looking for routers . Build list of
2854 global local_routes, needs_router
2856 list = lustre.lookup_class('node')
2858 if router.get_val_int('router', 0):
2860 for (local_type, local_cluster_id, local_nid) in local_clusters:
2862 for netuuid in router.get_networks():
2863 db = router.lookup(netuuid)
2864 if (local_type == db.get_val('nettype') and
2865 local_cluster_id == db.get_val('clusterid')):
2866 gw = db.get_val('nid')
2869 debug("find_local_routes: gw is", gw)
2870 for route in router.get_local_routes(local_type, gw):
2871 local_routes.append(route)
2872 debug("find_local_routes:", local_routes)
2875 def choose_local_server(srv_list):
2876 for srv in srv_list:
2877 if local_cluster(srv.net_type, srv.cluster_id):
2880 def local_cluster(net_type, cluster_id):
2881 for cluster in local_clusters:
2882 if net_type == cluster[0] and cluster_id == cluster[1]:
2886 def local_interface(net_type, cluster_id, nid):
2887 for cluster in local_clusters:
2888 if (net_type == cluster[0] and cluster_id == cluster[1]
2889 and nid == cluster[2]):
2893 def find_route(srv_list):
2895 frm_type = local_clusters[0][0]
2896 for srv in srv_list:
2897 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
2898 to_type = srv.net_type
2900 cluster_id = srv.cluster_id
2901 debug ('looking for route to', to_type, to)
2902 for r in local_routes:
2903 debug("find_route: ", r)
2904 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
2905 result.append((srv, r))
2908 def get_active_target(db):
2909 target_uuid = db.getUUID()
2910 target_name = db.getName()
2911 node_name = get_select(target_name)
2913 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
2915 tgt_dev_uuid = db.get_first_ref('active')
2918 def get_server_by_nid_uuid(db, nid_uuid):
2919 for n in db.lookup_class("network"):
2921 if net.nid_uuid == nid_uuid:
2925 ############################################################
2929 type = db.get_class()
2930 debug('Service:', type, db.getName(), db.getUUID())
2935 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2936 elif type == 'network':
2938 elif type == 'routetbl':
2942 elif type == 'cobd':
2943 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2944 elif type == 'cmobd':
2946 elif type == 'mdsdev':
2948 elif type == 'mountpoint':
2950 elif type == 'echoclient':
2955 panic ("unknown service type:", type)
2959 # Prepare the system to run lustre using a particular profile
2960 # in a the configuration.
2961 # * load & the modules
2962 # * setup networking for the current node
2963 # * make sure partitions are in place and prepared
2964 # * initialize devices with lctl
2965 # Levels is important, and needs to be enforced.
2966 def for_each_profile(db, prof_list, operation):
2967 for prof_uuid in prof_list:
2968 prof_db = db.lookup(prof_uuid)
2970 panic("profile:", prof_uuid, "not found.")
2971 services = getServices(prof_db)
2974 def magic_get_osc(db, rec, lov):
2976 lov_uuid = lov.get_uuid()
2977 lov_name = lov.osc.fs_name
2979 lov_uuid = rec.getAttribute('lov_uuidref')
2980 # FIXME: better way to find the mountpoint?
2981 filesystems = db.root_node.getElementsByTagName('filesystem')
2983 for fs in filesystems:
2984 ref = fs.getElementsByTagName('obd_ref')
2985 if ref[0].getAttribute('uuidref') == lov_uuid:
2986 fsuuid = fs.getAttribute('uuid')
2990 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
2992 mtpts = db.root_node.getElementsByTagName('mountpoint')
2995 ref = fs.getElementsByTagName('filesystem_ref')
2996 if ref[0].getAttribute('uuidref') == fsuuid:
2997 lov_name = fs.getAttribute('name')
3001 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
3003 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3005 ost_uuid = rec.getAttribute('ost_uuidref')
3006 obd = db.lookup(ost_uuid)
3009 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
3011 osc = get_osc(obd, lov_uuid, lov_name)
3013 panic('osc not found:', obd_uuid)
3016 # write logs for update records. sadly, logs of all types -- and updates in
3017 # particular -- are something of an afterthought. lconf needs rewritten with
3018 # these as core concepts. so this is a pretty big hack.
3019 def process_update_record(db, update, lov):
3020 for rec in update.childNodes:
3021 if rec.nodeType != rec.ELEMENT_NODE:
3024 log("found "+rec.nodeName+" record in update version " +
3025 str(update.getAttribute('version')))
3027 lov_uuid = rec.getAttribute('lov_uuidref')
3028 ost_uuid = rec.getAttribute('ost_uuidref')
3029 index = rec.getAttribute('index')
3030 gen = rec.getAttribute('generation')
3032 if not lov_uuid or not ost_uuid or not index or not gen:
3033 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
3036 tmplov = db.lookup(lov_uuid)
3038 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
3039 lov_name = tmplov.getName()
3041 lov_name = lov.osc.name
3043 # ------------------------------------------------------------- add
3044 if rec.nodeName == 'add':
3046 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3049 osc = magic_get_osc(db, rec, lov)
3052 # Only ignore connect failures with --force, which
3053 # isn't implemented here yet.
3054 osc.prepare(ignore_connect_failure=0)
3055 except CommandError, e:
3056 print "Error preparing OSC %s\n" % osc.uuid
3059 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3061 # ------------------------------------------------------ deactivate
3062 elif rec.nodeName == 'deactivate':
3066 osc = magic_get_osc(db, rec, lov)
3070 except CommandError, e:
3071 print "Error deactivating OSC %s\n" % osc.uuid
3074 # ---------------------------------------------------------- delete
3075 elif rec.nodeName == 'delete':
3079 osc = magic_get_osc(db, rec, lov)
3085 except CommandError, e:
3086 print "Error cleaning up OSC %s\n" % osc.uuid
3089 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3091 def process_updates(db, log_device, log_name, lov = None):
3092 updates = db.root_node.getElementsByTagName('update')
3094 if not u.childNodes:
3095 log("ignoring empty update record (version " +
3096 str(u.getAttribute('version')) + ")")
3099 version = u.getAttribute('version')
3100 real_name = "%s-%s" % (log_name, version)
3101 lctl.clear_log(log_device, real_name)
3102 lctl.record(log_device, real_name)
3104 process_update_record(db, u, lov)
3108 def doWriteconf(services):
3112 if s[1].get_class() == 'mdsdev':
3113 n = newService(s[1])
3116 def doSetup(services):
3121 n = newService(s[1])
3123 slist.append((n.level, n))
3126 nl = n[1].correct_level(n[0])
3127 nlist.append((nl, n[1]))
3132 def doLoadModules(services):
3136 # adding all needed modules from all services
3138 n = newService(s[1])
3139 n.add_module(mod_manager)
3141 # loading all registered modules
3142 mod_manager.load_modules()
3144 def doUnloadModules(services):
3148 # adding all needed modules from all services
3150 n = newService(s[1])
3151 if n.safe_to_clean_modules():
3152 n.add_module(mod_manager)
3154 # unloading all registered modules
3155 mod_manager.cleanup_modules()
3157 def doCleanup(services):
3163 n = newService(s[1])
3165 slist.append((n.level, n))
3168 nl = n[1].correct_level(n[0])
3169 nlist.append((nl, n[1]))
3174 if n[1].safe_to_clean():
3179 def doHost(lustreDB, hosts):
3180 global is_router, local_node_name
3183 node_db = lustreDB.lookup_name(h, 'node')
3187 panic('No host entry found.')
3189 local_node_name = node_db.get_val('name', 0)
3190 is_router = node_db.get_val_int('router', 0)
3191 lustre_upcall = node_db.get_val('lustreUpcall', '')
3192 portals_upcall = node_db.get_val('portalsUpcall', '')
3193 timeout = node_db.get_val_int('timeout', 0)
3194 ptldebug = node_db.get_val('ptldebug', '')
3195 subsystem = node_db.get_val('subsystem', '')
3197 find_local_clusters(node_db)
3199 find_local_routes(lustreDB)
3201 # Two step process: (1) load modules, (2) setup lustre
3202 # if not cleaning, load modules first.
3203 prof_list = node_db.get_refs('profile')
3205 if config.write_conf:
3206 for_each_profile(node_db, prof_list, doLoadModules)
3208 for_each_profile(node_db, prof_list, doWriteconf)
3209 for_each_profile(node_db, prof_list, doUnloadModules)
3212 elif config.recover:
3213 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3214 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3215 "--client_uuid <UUID> --conn_uuid <UUID>")
3216 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3218 elif config.cleanup:
3220 # the command line can override this value
3222 # ugly hack, only need to run lctl commands for --dump
3223 if config.lctl_dump or config.record:
3224 for_each_profile(node_db, prof_list, doCleanup)
3227 sys_set_timeout(timeout)
3228 sys_set_ptldebug(ptldebug)
3229 sys_set_subsystem(subsystem)
3230 sys_set_lustre_upcall(lustre_upcall)
3231 sys_set_portals_upcall(portals_upcall)
3233 for_each_profile(node_db, prof_list, doCleanup)
3234 for_each_profile(node_db, prof_list, doUnloadModules)
3238 # ugly hack, only need to run lctl commands for --dump
3239 if config.lctl_dump or config.record:
3240 sys_set_timeout(timeout)
3241 sys_set_lustre_upcall(lustre_upcall)
3242 for_each_profile(node_db, prof_list, doSetup)
3246 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3247 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3249 for_each_profile(node_db, prof_list, doLoadModules)
3251 sys_set_debug_path()
3252 sys_set_ptldebug(ptldebug)
3253 sys_set_subsystem(subsystem)
3254 script = config.gdb_script
3255 run(lctl.lctl, ' modules >', script)
3257 log ("The GDB module script is in", script)
3258 # pause, so user has time to break and
3261 sys_set_timeout(timeout)
3262 sys_set_lustre_upcall(lustre_upcall)
3263 sys_set_portals_upcall(portals_upcall)
3265 for_each_profile(node_db, prof_list, doSetup)
3268 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3269 tgt = lustreDB.lookup(tgt_uuid)
3271 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3272 new_uuid = get_active_target(tgt)
3274 raise Lustre.LconfError("doRecovery: no active target found for: " +
3276 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3278 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3280 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3282 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3285 lctl.disconnect(oldnet)
3286 except CommandError, e:
3287 log("recover: disconnect", nid_uuid, "failed: ")
3292 except CommandError, e:
3293 log("recover: connect failed")
3296 lctl.recover(client_uuid, net.nid_uuid)
3299 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3300 base = os.path.dirname(cmd)
3301 if development_mode():
3302 if not config.lustre:
3303 debug('using objdir module paths')
3304 config.lustre = (os.path.join(base, ".."))
3305 # normalize the portals dir, using command line arg if set
3307 portals_dir = config.portals
3308 dir = os.path.join(config.lustre, portals_dir)
3309 config.portals = dir
3310 debug('config.portals', config.portals)
3311 elif config.lustre and config.portals:
3313 # if --lustre and --portals, normalize portals
3314 # can ignore POTRALS_DIR here, since it is probly useless here
3315 config.portals = os.path.join(config.lustre, config.portals)
3316 debug('config.portals B', config.portals)
3318 def sysctl(path, val):
3319 debug("+ sysctl", path, val)
3323 fp = open(os.path.join('/proc/sys', path), 'w')
3330 def sys_set_debug_path():
3331 sysctl('portals/debug_path', config.debug_path)
3333 def sys_set_lustre_upcall(upcall):
3334 # the command overrides the value in the node config
3335 if config.lustre_upcall:
3336 upcall = config.lustre_upcall
3338 upcall = config.upcall
3340 lctl.set_lustre_upcall(upcall)
3342 def sys_set_portals_upcall(upcall):
3343 # the command overrides the value in the node config
3344 if config.portals_upcall:
3345 upcall = config.portals_upcall
3347 upcall = config.upcall
3349 sysctl('portals/upcall', upcall)
3351 def sys_set_timeout(timeout):
3352 # the command overrides the value in the node config
3353 if config.timeout and config.timeout > 0:
3354 timeout = config.timeout
3355 if timeout != None and timeout > 0:
3356 lctl.set_timeout(timeout)
3358 def sys_tweak_socknal ():
3359 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3360 if sys_get_branch() == '2.6':
3361 fp = open('/proc/meminfo')
3362 lines = fp.readlines()
3367 if a[0] == 'MemTotal:':
3369 debug("memtotal" + memtotal)
3370 if int(memtotal) < 262144:
3371 minfree = int(memtotal) / 16
3374 debug("+ minfree ", minfree)
3375 sysctl("vm/min_free_kbytes", minfree)
3376 if config.single_socket:
3377 sysctl("socknal/typed", 0)
3379 def sys_optimize_elan ():
3380 procfiles = ["/proc/elan/config/eventint_punt_loops",
3381 "/proc/qsnet/elan3/config/eventint_punt_loops",
3382 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3384 if os.access(p, os.W_OK):
3385 run ("echo 1 > " + p)
3387 def sys_set_ptldebug(ptldebug):
3389 ptldebug = config.ptldebug
3392 val = eval(ptldebug, ptldebug_names)
3393 val = "0x%x" % (val)
3394 sysctl('portals/debug', val)
3395 except NameError, e:
3398 def sys_set_subsystem(subsystem):
3399 if config.subsystem:
3400 subsystem = config.subsystem
3403 val = eval(subsystem, subsystem_names)
3404 val = "0x%x" % (val)
3405 sysctl('portals/subsystem_debug', val)
3406 except NameError, e:
3409 def sys_set_netmem_max(path, max):
3410 debug("setting", path, "to at least", max)
3418 fp = open(path, 'w')
3419 fp.write('%d\n' %(max))
3423 def sys_make_devices():
3424 if not os.access('/dev/portals', os.R_OK):
3425 run('mknod /dev/portals c 10 240')
3426 if not os.access('/dev/obd', os.R_OK):
3427 run('mknod /dev/obd c 10 241')
3430 # Add dir to the global PATH, if not already there.
3431 def add_to_path(new_dir):
3432 syspath = string.split(os.environ['PATH'], ':')
3433 if new_dir in syspath:
3435 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3437 def default_debug_path():
3438 path = '/tmp/lustre-log'
3439 if os.path.isdir('/r'):
3444 def default_gdb_script():
3445 script = '/tmp/ogdb'
3446 if os.path.isdir('/r'):
3447 return '/r' + script
3452 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3453 # ensure basic elements are in the system path
3454 def sanitise_path():
3455 for dir in DEFAULT_PATH:
3458 # global hack for the --select handling
3460 def init_select(args):
3461 # args = [service=nodeA,service2=nodeB service3=nodeC]
3464 list = string.split(arg, ',')
3466 srv, node = string.split(entry, '=')
3467 tgt_select[srv] = node
3469 def get_select(srv):
3470 if tgt_select.has_key(srv):
3471 return tgt_select[srv]
3475 FLAG = Lustre.Options.FLAG
3476 PARAM = Lustre.Options.PARAM
3477 INTPARAM = Lustre.Options.INTPARAM
3478 PARAMLIST = Lustre.Options.PARAMLIST
3480 ('verbose,v', "Print system commands as they are run"),
3481 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3482 ('config', "Cluster config name used for LDAP query", PARAM),
3483 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3484 ('node', "Load config for <nodename>", PARAM),
3485 ('cleanup,d', "Cleans up config. (Shutdown)"),
3486 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3488 ('single_socket', "socknal option: only use one socket instead of bundle",
3490 ('failover',"""Used to shut down without saving state.
3491 This will allow this node to "give up" a service to a
3492 another node for failover purposes. This will not
3493 be a clean shutdown.""",
3495 ('gdb', """Prints message after creating gdb module script
3496 and sleeps for 5 seconds."""),
3497 ('noexec,n', """Prints the commands and steps that will be run for a
3498 config without executing them. This can used to check if a
3499 config file is doing what it should be doing"""),
3500 ('nomod', "Skip load/unload module step."),
3501 ('nosetup', "Skip device setup/cleanup step."),
3502 ('reformat', "Reformat all devices (without question)"),
3503 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3504 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3505 ('clientoptions', "Additional options for Lustre", PARAM),
3506 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3508 ('write_conf', "Save all the client config information on mds."),
3509 ('record', "Write config information on mds."),
3510 ('record_log', "Name of config record log.", PARAM),
3511 ('record_device', "MDS device name that will record the config commands",
3513 ('root_squash', "MDS squash root to appointed uid",
3515 ('no_root_squash', "Don't squash root for appointed nid",
3517 ('minlevel', "Minimum level of services to configure/cleanup",
3519 ('maxlevel', """Maximum level of services to configure/cleanup
3520 Levels are aproximatly like:
3525 70 - mountpoint, echo_client, osc, mdc, lov""",
3527 ('lustre', """Base directory of lustre sources. This parameter will
3528 cause lconf to load modules from a source tree.""", PARAM),
3529 ('portals', """Portals source directory. If this is a relative path,
3530 then it is assumed to be relative to lustre. """, PARAM),
3531 ('timeout', "Set recovery timeout", INTPARAM),
3532 ('upcall', "Set both portals and lustre upcall script", PARAM),
3533 ('lustre_upcall', "Set lustre upcall script", PARAM),
3534 ('portals_upcall', "Set portals upcall script", PARAM),
3535 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3536 ('ptldebug', "Set the portals debug level", PARAM),
3537 ('subsystem', "Set the portals debug subsystem", PARAM),
3538 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3539 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3540 # Client recovery options
3541 ('recover', "Recover a device"),
3542 ('group', "The group of devices to configure or cleanup", PARAM),
3543 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3544 ('client_uuid', "The failed client (required for recovery)", PARAM),
3545 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3547 ('inactive', """The name of an inactive service, to be ignored during
3548 mounting (currently OST-only). Can be repeated.""",
3553 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3555 # in the upcall this is set to SIG_IGN
3556 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3558 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3560 config, args = cl.parse(sys.argv[1:])
3561 except Lustre.OptionError, e:
3565 setupModulePath(sys.argv[0])
3567 host = socket.gethostname()
3569 # the PRNG is normally seeded with time(), which is not so good for starting
3570 # time-synchronized clusters
3571 input = open('/dev/urandom', 'r')
3573 print 'Unable to open /dev/urandom!'
3575 seed = input.read(32)
3581 init_select(config.select)
3584 # allow config to be fetched via HTTP, but only with python2
3585 if sys.version[0] != '1' and args[0].startswith('http://'):
3588 config_file = urllib2.urlopen(args[0])
3589 except (urllib2.URLError, socket.error), err:
3590 if hasattr(err, 'args'):
3592 print "Could not access '%s': %s" %(args[0], err)
3594 elif not os.access(args[0], os.R_OK):
3595 print 'File not found or readable:', args[0]
3599 config_file = open(args[0], 'r')
3601 dom = xml.dom.minidom.parse(config_file)
3603 panic("%s does not appear to be a config file." % (args[0]))
3604 sys.exit(1) # make sure to die here, even in debug mode.
3606 CONFIG_FILE = args[0]
3607 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3608 if not config.config:
3609 config.config = os.path.basename(args[0])# use full path?
3610 if config.config[-4:] == '.xml':
3611 config.config = config.config[:-4]
3612 elif config.ldapurl:
3613 if not config.config:
3614 panic("--ldapurl requires --config name")
3615 dn = "config=%s,fs=lustre" % (config.config)
3616 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3617 elif config.ptldebug or config.subsystem:
3618 sys_set_ptldebug(None)
3619 sys_set_subsystem(None)
3622 print 'Missing config file or ldap URL.'
3623 print 'see lconf --help for command summary'
3626 toplustreDB = lustreDB
3628 ver = lustreDB.get_version()
3630 panic("No version found in config data, please recreate.")
3631 if ver != Lustre.CONFIG_VERSION:
3632 panic("Config version", ver, "does not match lconf version",
3633 Lustre.CONFIG_VERSION)
3637 node_list.append(config.node)
3640 node_list.append(host)
3641 node_list.append('localhost')
3643 debug("configuring for host: ", node_list)
3646 config.debug_path = config.debug_path + '-' + host
3647 config.gdb_script = config.gdb_script + '-' + host
3649 lctl = LCTLInterface('lctl')
3651 if config.lctl_dump:
3652 lctl.use_save_file(config.lctl_dump)
3655 if not (config.record_device and config.record_log):
3656 panic("When recording, both --record_log and --record_device must be specified.")
3657 lctl.clear_log(config.record_device, config.record_log)
3658 lctl.record(config.record_device, config.record_log)
3660 # init module manager
3661 mod_manager = kmod_manager(config.lustre, config.portals)
3663 doHost(lustreDB, node_list)
3665 if not config.record:
3670 process_updates(lustreDB, config.record_device, config.record_log)
3672 if __name__ == "__main__":
3675 except Lustre.LconfError, e:
3677 # traceback.print_exc(file=sys.stdout)
3679 except CommandError, e:
3683 if first_cleanup_error:
3684 sys.exit(first_cleanup_error)