3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = '../portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
96 "undefined" : (1 << 0),
106 "portals" : (1 << 10),
108 "pinger" : (1 << 12),
109 "filter" : (1 << 13),
114 "ptlrouter" : (1 << 18),
118 "confobd" : (1 << 22),
124 first_cleanup_error = 0
125 def cleanup_error(rc):
126 global first_cleanup_error
127 if not first_cleanup_error:
128 first_cleanup_error = rc
130 # ============================================================
131 # debugging and error funcs
133 def fixme(msg = "this feature"):
134 raise Lustre.LconfError, msg + ' not implemented yet.'
137 msg = string.join(map(str,args))
138 if not config.noexec:
139 raise Lustre.LconfError(msg)
144 msg = string.join(map(str,args))
149 print string.strip(s)
153 msg = string.join(map(str,args))
156 # ack, python's builtin int() does not support '0x123' syntax.
157 # eval can do it, although what a hack!
161 return eval(s, {}, {})
164 except SyntaxError, e:
165 raise ValueError("not a number")
167 raise ValueError("not a number")
169 # ============================================================
170 # locally defined exceptions
171 class CommandError (exceptions.Exception):
172 def __init__(self, cmd_name, cmd_err, rc=None):
173 self.cmd_name = cmd_name
174 self.cmd_err = cmd_err
179 if type(self.cmd_err) == types.StringType:
181 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
183 print "! %s: %s" % (self.cmd_name, self.cmd_err)
184 elif type(self.cmd_err) == types.ListType:
186 print "! %s (error %d):" % (self.cmd_name, self.rc)
188 print "! %s:" % (self.cmd_name)
189 for s in self.cmd_err:
190 print "> %s" %(string.strip(s))
195 # ============================================================
196 # handle daemons, like the acceptor
198 """ Manage starting and stopping a daemon. Assumes daemon manages
199 it's own pid file. """
201 def __init__(self, cmd):
207 log(self.command, "already running.")
209 self.path = find_prog(self.command)
211 panic(self.command, "not found.")
212 ret, out = runcmd(self.path +' '+ self.command_line())
214 raise CommandError(self.path, out, ret)
218 pid = self.read_pidfile()
221 log ("killing process", pid)
224 log("was unable to find pid of " + self.command)
225 #time.sleep(1) # let daemon die
227 log("unable to kill", self.command, e)
229 log("unable to kill", self.command)
232 pid = self.read_pidfile()
238 log("was unable to find pid of " + self.command)
245 def read_pidfile(self):
247 fp = open(self.pidfile(), 'r')
257 def clean_pidfile(self):
258 """ Remove a stale pidfile """
259 log("removing stale pidfile:", self.pidfile())
261 os.unlink(self.pidfile())
263 log(self.pidfile(), e)
265 class AcceptorHandler(DaemonHandler):
266 def __init__(self, port, net_type):
267 DaemonHandler.__init__(self, "acceptor")
272 return "/var/run/%s-%d.pid" % (self.command, self.port)
274 def command_line(self):
275 return string.join(map(str,(self.flags, self.port)))
279 # start the acceptors
281 if config.lctl_dump or config.record:
283 for port in acceptors.keys():
284 daemon = acceptors[port]
285 if not daemon.running():
288 def run_one_acceptor(port):
289 if config.lctl_dump or config.record:
291 if acceptors.has_key(port):
292 daemon = acceptors[port]
293 if not daemon.running():
296 panic("run_one_acceptor: No acceptor defined for port:", port)
298 def stop_acceptor(port):
299 if acceptors.has_key(port):
300 daemon = acceptors[port]
305 # ============================================================
306 # handle lctl interface
309 Manage communication with lctl
312 def __init__(self, cmd):
314 Initialize close by finding the lctl binary.
316 self.lctl = find_prog(cmd)
318 self.record_device = ''
321 debug('! lctl not found')
324 raise CommandError('lctl', "unable to find lctl binary.")
326 def use_save_file(self, file):
327 self.save_file = file
329 def record(self, dev_name, logname):
330 log("Recording log", logname, "on", dev_name)
331 self.record_device = dev_name
332 self.record_log = logname
334 def end_record(self):
335 log("End recording log", self.record_log, "on", self.record_device)
336 self.record_device = None
337 self.record_log = None
339 def set_nonblock(self, fd):
340 fl = fcntl.fcntl(fd, F_GETFL)
341 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
346 the cmds are written to stdin of lctl
347 lctl doesn't return errors when run in script mode, so
349 should modify command line to accept multiple commands, or
350 create complex command line options
354 cmds = '\n dump ' + self.save_file + '\n' + cmds
355 elif self.record_device:
359 %s""" % (self.record_device, self.record_log, cmds)
361 debug("+", cmd_line, cmds)
362 if config.noexec: return (0, [])
364 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
365 child.tochild.write(cmds + "\n")
366 child.tochild.close()
367 # print "LCTL:", cmds
369 # From "Python Cookbook" from O'Reilly
370 outfile = child.fromchild
371 outfd = outfile.fileno()
372 self.set_nonblock(outfd)
373 errfile = child.childerr
374 errfd = errfile.fileno()
375 self.set_nonblock(errfd)
377 outdata = errdata = ''
380 ready = select.select([outfd,errfd],[],[]) # Wait for input
381 if outfd in ready[0]:
382 outchunk = outfile.read()
383 if outchunk == '': outeof = 1
384 outdata = outdata + outchunk
385 if errfd in ready[0]:
386 errchunk = errfile.read()
387 if errchunk == '': erreof = 1
388 errdata = errdata + errchunk
389 if outeof and erreof: break
390 # end of "borrowed" code
393 if os.WIFEXITED(ret):
394 rc = os.WEXITSTATUS(ret)
397 if rc or len(errdata):
398 raise CommandError(self.lctl, errdata, rc)
401 def runcmd(self, *args):
403 run lctl using the command line
405 cmd = string.join(map(str,args))
406 debug("+", self.lctl, cmd)
407 rc, out = run(self.lctl, cmd)
409 raise CommandError(self.lctl, out, rc)
412 def clear_log(self, dev, log):
413 """ clear an existing log """
418 quit """ % (dev, log)
421 def root_squash(self, name, uid, nid):
425 quit""" % (name, uid, nid)
428 def network(self, net, nid):
433 quit """ % (net, nid)
437 def add_interface(self, net, ip, netmask = ""):
438 """ add an interface """
442 quit """ % (net, ip, netmask)
445 # delete an interface
446 def del_interface(self, net, ip):
447 """ delete an interface """
454 # create a new connection
455 def add_uuid(self, net_type, uuid, nid):
456 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
459 def add_peer(self, net_type, nid, hostaddr, port):
460 if net_type in ('tcp','openib','ra') and not config.lctl_dump:
465 nid, hostaddr, port )
467 elif net_type in ('iib',) and not config.lctl_dump:
474 elif net_type in ('vib',) and not config.lctl_dump:
482 def connect(self, srv):
483 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
484 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
486 hostaddr = string.split(srv.hostaddr[0], '/')[0]
487 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
490 def recover(self, dev_name, new_conn):
493 recover %s""" %(dev_name, new_conn)
496 # add a route to a range
497 def add_route(self, net, gw, lo, hi):
505 except CommandError, e:
509 def del_route(self, net, gw, lo, hi):
514 quit """ % (net, gw, lo, hi)
517 # add a route to a host
518 def add_route_host(self, net, uuid, gw, tgt):
519 self.add_uuid(net, uuid, tgt)
527 except CommandError, e:
531 # add a route to a range
532 def del_route_host(self, net, uuid, gw, tgt):
538 quit """ % (net, gw, tgt)
542 def del_peer(self, net_type, nid, hostaddr):
543 if net_type in ('tcp',) and not config.lctl_dump:
547 del_peer %s %s single_share
551 elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump:
555 del_peer %s single_share
560 # disconnect one connection
561 def disconnect(self, srv):
562 self.del_uuid(srv.nid_uuid)
563 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
565 hostaddr = string.split(srv.hostaddr[0], '/')[0]
566 self.del_peer(srv.net_type, srv.nid, hostaddr)
568 def del_uuid(self, uuid):
576 def disconnectAll(self, net):
584 def attach(self, type, name, uuid):
587 quit""" % (type, name, uuid)
590 def setup(self, name, setup = ""):
594 quit""" % (name, setup)
597 def add_conn(self, name, conn_uuid):
601 quit""" % (name, conn_uuid)
605 # create a new device with lctl
606 def newdev(self, type, name, uuid, setup = ""):
607 self.attach(type, name, uuid);
609 self.setup(name, setup)
610 except CommandError, e:
611 self.cleanup(name, uuid, 0)
616 def cleanup(self, name, uuid, force, failover = 0):
617 if failover: force = 1
623 quit""" % (name, ('', 'force')[force],
624 ('', 'failover')[failover])
628 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
629 stripe_sz, stripe_off, pattern, devlist = None):
632 lov_setup %s %d %d %d %s %s
633 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
637 # add an OBD to a LOV
638 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
640 lov_modify_tgts add %s %s %s %s
641 quit""" % (name, obd_uuid, index, gen)
645 def lmv_setup(self, name, uuid, desc_uuid, devlist):
649 quit""" % (name, uuid, desc_uuid, devlist)
652 # delete an OBD from a LOV
653 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
655 lov_modify_tgts del %s %s %s %s
656 quit""" % (name, obd_uuid, index, gen)
660 def deactivate(self, name):
668 def dump(self, dump_file):
671 quit""" % (dump_file)
674 # get list of devices
675 def device_list(self):
676 devices = '/proc/fs/lustre/devices'
678 if os.access(devices, os.R_OK):
680 fp = open(devices, 'r')
688 def lustre_version(self):
689 rc, out = self.runcmd('version')
693 def mount_option(self, profile, osc, mdc):
695 mount_option %s %s %s
696 quit""" % (profile, osc, mdc)
699 # delete mount options
700 def del_mount_option(self, profile):
706 def set_timeout(self, timeout):
712 def set_lustre_upcall(self, upcall):
717 # ============================================================
718 # Various system-level functions
719 # (ideally moved to their own module)
721 # Run a command and return the output and status.
722 # stderr is sent to /dev/null, could use popen3 to
723 # save it if necessary
726 if config.noexec: return (0, [])
727 f = os.popen(cmd + ' 2>&1')
737 cmd = string.join(map(str,args))
740 # Run a command in the background.
741 def run_daemon(*args):
742 cmd = string.join(map(str,args))
744 if config.noexec: return 0
745 f = os.popen(cmd + ' 2>&1')
753 # Determine full path to use for an external command
754 # searches dirname(argv[0]) first, then PATH
756 syspath = string.split(os.environ['PATH'], ':')
757 cmdpath = os.path.dirname(sys.argv[0])
758 syspath.insert(0, cmdpath);
760 syspath.insert(0, os.path.join(config.portals, 'utils/'))
762 prog = os.path.join(d,cmd)
763 if os.access(prog, os.X_OK):
767 # Recursively look for file starting at base dir
768 def do_find_file(base, mod):
769 fullname = os.path.join(base, mod)
770 if os.access(fullname, os.R_OK):
772 for d in os.listdir(base):
773 dir = os.path.join(base,d)
774 if os.path.isdir(dir):
775 module = do_find_file(dir, mod)
779 # is the path a block device?
786 return stat.S_ISBLK(s[stat.ST_MODE])
788 # find the journal device from mkfs options
794 while i < len(x) - 1:
795 if x[i] == '-J' and x[i+1].startswith('device='):
801 # build fs according to type
803 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
809 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
811 # devsize is in 1k, and fs block count is in 4k
812 block_cnt = devsize/4
814 if fstype in ('ext3', 'extN', 'ldiskfs'):
815 # ext3 journal size is in megabytes
816 # but don't set jsize if mkfsoptions indicates a separate journal device
817 if jsize == 0 and jdev(mkfsoptions) == '':
819 if not is_block(dev):
820 ret, out = runcmd("ls -l %s" %dev)
821 devsize = int(string.split(out[0])[4]) / 1024
823 # sfdisk works for symlink, hardlink, and realdev
824 ret, out = runcmd("sfdisk -s %s" %dev)
826 devsize = int(out[0])
828 # sfdisk -s will fail for too large block device,
829 # then, read the size of partition from /proc/partitions
831 # get the realpath of the device
832 # it may be the real device, such as /dev/hda7
833 # or the hardlink created via mknod for a device
834 if 'realpath' in dir(os.path):
835 real_dev = os.path.realpath(dev)
839 while os.path.islink(real_dev) and (link_count < 20):
840 link_count = link_count + 1
841 dev_link = os.readlink(real_dev)
842 if os.path.isabs(dev_link):
845 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
847 panic("Entountered too many symbolic links resolving block device:", dev)
849 # get the major and minor number of the realpath via ls
850 # it seems python(os.stat) does not return
851 # the st_rdev member of the stat structure
852 ret, out = runcmd("ls -l %s" %real_dev)
853 major = string.split(string.split(out[0])[4], ",")[0]
854 minor = string.split(out[0])[5]
856 # get the devsize from /proc/partitions with the major and minor number
857 ret, out = runcmd("cat /proc/partitions")
860 if string.split(line)[0] == major and string.split(line)[1] == minor:
861 devsize = int(string.split(line)[2])
864 if devsize > 1024 * 1024:
865 jsize = ((devsize / 102400) * 4)
868 if jsize: jopt = "-J size=%d" %(jsize,)
869 if isize: iopt = "-I %d" %(isize,)
870 mkfs = 'mkfs.ext2 -j -b 4096 '
871 if not isblock or config.force:
873 if jdev(mkfsoptions) != '':
874 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
876 jmkfs = jmkfs + '-F '
877 jmkfs = jmkfs + jdev(mkfsoptions)
878 (ret, out) = run (jmkfs)
880 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
881 elif fstype == 'reiserfs':
882 # reiserfs journal size is in blocks
883 if jsize: jopt = "--journal_size %d" %(jsize,)
884 mkfs = 'mkreiserfs -ff'
886 panic('unsupported fs type: ', fstype)
888 if config.mkfsoptions != None:
889 mkfs = mkfs + ' ' + config.mkfsoptions
890 if mkfsoptions != None:
891 mkfs = mkfs + ' ' + mkfsoptions
892 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
894 panic("Unable to build fs:", dev, string.join(out))
895 # enable hash tree indexing on fsswe
896 if fstype in ('ext3', 'extN', 'ldiskfs'):
897 htree = 'echo "feature FEATURE_C5" | debugfs -w'
898 (ret, out) = run (htree, dev)
900 panic("Unable to enable htree:", dev)
902 # some systems use /dev/loopN, some /dev/loop/N
906 if not os.access(loop + str(0), os.R_OK):
908 if not os.access(loop + str(0), os.R_OK):
909 panic ("can't access loop devices")
912 # find loop device assigned to the file
913 def find_assigned_loop(file):
915 for n in xrange(0, MAX_LOOP_DEVICES):
917 if os.access(dev, os.R_OK):
918 (stat, out) = run('losetup', dev)
919 if out and stat == 0:
920 m = re.search(r'\((.*)\)', out[0])
921 if m and file == m.group(1):
925 # find free loop device
926 def find_free_loop(file):
929 # find next free loop
930 for n in xrange(0, MAX_LOOP_DEVICES):
932 if os.access(dev, os.R_OK):
933 (stat, out) = run('losetup', dev)
938 # create file if necessary and assign the first free loop device
939 def init_loop(file, size, fstype, journal_size, inode_size,
940 mkfsoptions, reformat, autoformat, backfstype, backfile):
943 realfstype = backfstype
944 if is_block(backfile):
945 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
946 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
952 dev = find_assigned_loop(realfile)
954 print 'WARNING: file', realfile, 'already mapped to', dev
957 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
958 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
960 panic("Unable to create backing store:", realfile)
961 mkfs(realfile, size, realfstype, journal_size, inode_size,
962 mkfsoptions, isblock=0)
964 dev = find_free_loop(realfile)
966 print "attach " + realfile + " <-> " + dev
967 run('losetup', dev, realfile)
970 print "out of loop devices"
973 # undo loop assignment
974 def clean_loop(dev, fstype, backfstype, backdev):
979 if not is_block(realfile):
980 dev = find_assigned_loop(realfile)
982 print "detach " + dev + " <-> " + realfile
983 ret, out = run('losetup -d', dev)
985 log('unable to clean loop device:', dev, 'for file:', realfile)
988 # finilizes passed device
989 def clean_dev(dev, fstype, backfstype, backdev):
990 if fstype == 'smfs' or not is_block(dev):
991 clean_loop(dev, fstype, backfstype, backdev)
993 # determine if dev is formatted as a <fstype> filesystem
994 def need_format(fstype, dev):
995 # FIXME don't know how to implement this
998 # initialize a block device if needed
999 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
1000 inode_size, mkfsoptions, backfstype, backdev):
1004 if fstype == 'smfs' or not is_block(dev):
1005 dev = init_loop(dev, size, fstype, journal_size, inode_size,
1006 mkfsoptions, reformat, autoformat, backfstype, backdev)
1007 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
1008 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
1011 # panic("device:", dev,
1012 # "not prepared, and autoformat is not set.\n",
1013 # "Rerun with --reformat option to format ALL filesystems")
1018 """lookup IP address for an interface"""
1019 rc, out = run("/sbin/ifconfig", iface)
1022 addr = string.split(out[1])[1]
1023 ip = string.split(addr, ':')[1]
1026 def def_mount_options(fstype, target):
1027 """returns deafult mount options for passed fstype and target (mds, ost)"""
1028 if fstype == 'ext3' or fstype == 'ldiskfs':
1029 mountfsoptions = "errors=remount-ro"
1030 if target == 'ost' and sys_get_branch() == '2.4':
1031 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1032 return mountfsoptions
1035 def sys_get_elan_position_file():
1036 procfiles = ["/proc/elan/device0/position",
1037 "/proc/qsnet/elan4/device0/position",
1038 "/proc/qsnet/elan3/device0/position"]
1040 if os.access(p, os.R_OK):
1044 def sys_get_local_nid(net_type, wildcard, cluster_id):
1045 """Return the local nid."""
1047 if sys_get_elan_position_file():
1048 local = sys_get_local_address('elan', '*', cluster_id)
1050 local = sys_get_local_address(net_type, wildcard, cluster_id)
1053 def sys_get_local_address(net_type, wildcard, cluster_id):
1054 """Return the local address for the network type."""
1056 if net_type in ('tcp','openib','iib','vib','ra'):
1058 iface, star = string.split(wildcard, ':')
1059 local = if2addr(iface)
1061 panic ("unable to determine ip for:", wildcard)
1063 host = socket.gethostname()
1064 local = socket.gethostbyname(host)
1065 elif net_type == 'elan':
1066 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1067 f = sys_get_elan_position_file()
1069 panic ("unable to determine local Elan ID")
1072 lines = fp.readlines()
1076 if a[0] == 'NodeId':
1080 nid = my_int(cluster_id) + my_int(elan_id)
1081 local = "%d" % (nid)
1082 except ValueError, e:
1086 elif net_type == 'lo':
1087 fixme("automatic local address for loopback")
1088 elif net_type == 'gm':
1089 fixme("automatic local address for GM")
1093 def sys_get_branch():
1094 """Returns kernel release"""
1096 fp = open('/proc/sys/kernel/osrelease')
1097 lines = fp.readlines()
1101 version = string.split(l)
1102 a = string.split(version[0], '.')
1103 return a[0] + '.' + a[1]
1108 # XXX: instead of device_list, ask for $name and see what we get
1109 def is_prepared(name):
1110 """Return true if a device exists for the name"""
1111 if config.lctl_dump:
1113 if (config.noexec or config.record) and config.cleanup:
1116 # expect this format:
1117 # 1 UP ldlm ldlm ldlm_UUID 2
1118 out = lctl.device_list()
1120 if name == string.split(s)[3]:
1122 except CommandError, e:
1126 def net_is_prepared():
1127 """If the any device exists, then assume that all networking
1128 has been configured"""
1129 out = lctl.device_list()
1132 def fs_is_mounted(path):
1133 """Return true if path is a mounted lustre filesystem"""
1135 fp = open('/proc/mounts')
1136 lines = fp.readlines()
1140 if a[1] == path and a[2] == 'lustre_lite':
1146 def kmod_find(src_dir, dev_dir, modname):
1147 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1148 for modext in '.ko', '.o':
1149 module = modbase + modext
1151 if os.access(module, os.R_OK):
1157 def kmod_info(modname):
1158 """Returns reference count for passed module name."""
1160 fp = open('/proc/modules')
1161 lines = fp.readlines()
1164 # please forgive my tired fingers for this one
1165 ret = filter(lambda word, mod = modname: word[0] == mod,
1166 map(lambda line: string.split(line), lines))
1170 except Exception, e:
1174 """Presents kernel module"""
1175 def __init__(self, src_dir, dev_dir, name):
1176 self.src_dir = src_dir
1177 self.dev_dir = dev_dir
1182 log ('loading module:', self.name, 'srcdir',
1183 self.src_dir, 'devdir', self.dev_dir)
1185 module = kmod_find(self.src_dir, self.dev_dir,
1188 panic('module not found:', self.name)
1189 (rc, out) = run('/sbin/insmod', module)
1191 raise CommandError('insmod', out, rc)
1193 (rc, out) = run('/sbin/modprobe', self.name)
1195 raise CommandError('modprobe', out, rc)
1199 log('unloading module:', self.name)
1200 (rc, out) = run('/sbin/rmmod', self.name)
1202 log('unable to unload module:', self.name +
1203 "(" + self.refcount() + ")")
1207 """Returns module info if any."""
1208 return kmod_info(self.name)
1211 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1218 """Returns module refcount."""
1225 """Returns 1 if module is used, otherwise 0 is returned."""
1231 if users and users != '(unused)' and users != '-':
1239 """Returns 1 if module is busy, otherwise 0 is returned."""
1240 if self.loaded() and (self.used() or self.refcount() != '0'):
1246 """Manage kernel modules"""
1247 def __init__(self, lustre_dir, portals_dir):
1248 self.lustre_dir = lustre_dir
1249 self.portals_dir = portals_dir
1250 self.kmodule_list = []
1252 def find_module(self, modname):
1253 """Find module by module name"""
1254 for mod in self.kmodule_list:
1255 if mod.name == modname:
1259 def add_portals_module(self, dev_dir, modname):
1260 """Append a module to list of modules to load."""
1262 mod = self.find_module(modname)
1264 mod = kmod(self.portals_dir, dev_dir, modname)
1265 self.kmodule_list.append(mod)
1267 def add_lustre_module(self, dev_dir, modname):
1268 """Append a module to list of modules to load."""
1270 mod = self.find_module(modname)
1272 mod = kmod(self.lustre_dir, dev_dir, modname)
1273 self.kmodule_list.append(mod)
1275 def load_modules(self):
1276 """Load all the modules in the list in the order they appear."""
1277 for mod in self.kmodule_list:
1278 if mod.loaded() and not config.noexec:
1282 def cleanup_modules(self):
1283 """Unload the modules in the list in reverse order."""
1284 rev = self.kmodule_list
1287 if (not mod.loaded() or mod.busy()) and not config.noexec:
1290 if mod.name == 'portals' and config.dump:
1291 lctl.dump(config.dump)
1294 # ============================================================
1295 # Classes to prepare and cleanup the various objects
1298 """ Base class for the rest of the modules. The default cleanup method is
1299 defined here, as well as some utilitiy funcs.
1301 def __init__(self, module_name, db):
1303 self.module_name = module_name
1304 self.name = self.db.getName()
1305 self.uuid = self.db.getUUID()
1309 def info(self, *args):
1310 msg = string.join(map(str,args))
1311 print self.module_name + ":", self.name, self.uuid, msg
1314 """ default cleanup, used for most modules """
1317 lctl.cleanup(self.name, self.uuid, config.force)
1318 except CommandError, e:
1319 log(self.module_name, "cleanup failed: ", self.name)
1323 def add_module(self, manager):
1324 """Adds all needed modules in the order they appear."""
1327 def safe_to_clean(self):
1330 def safe_to_clean_modules(self):
1331 return self.safe_to_clean()
1333 class Network(Module):
1334 def __init__(self,db):
1335 Module.__init__(self, 'NETWORK', db)
1336 self.net_type = self.db.get_val('nettype')
1337 self.nid = self.db.get_val('nid', '*')
1338 self.cluster_id = self.db.get_val('clusterid', "0")
1339 self.port = self.db.get_val_int('port', 0)
1342 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1344 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1345 self.generic_nid = 1
1346 debug("nid:", self.nid)
1348 self.generic_nid = 0
1350 self.nid_uuid = self.nid_to_uuid(self.nid)
1351 self.hostaddr = self.db.get_hostaddr()
1352 if len(self.hostaddr) == 0:
1353 self.hostaddr.append(self.nid)
1354 if '*' in self.hostaddr[0]:
1355 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1356 if not self.hostaddr[0]:
1357 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1358 debug("hostaddr:", self.hostaddr[0])
1360 def add_module(self, manager):
1361 manager.add_portals_module("libcfs", 'libcfs')
1362 manager.add_portals_module("portals", 'portals')
1363 if node_needs_router():
1364 manager.add_portals_module("router", 'kptlrouter')
1365 if self.net_type == 'tcp':
1366 manager.add_portals_module("knals/socknal", 'ksocknal')
1367 if self.net_type == 'elan':
1368 manager.add_portals_module("knals/qswnal", 'kqswnal')
1369 if self.net_type == 'gm':
1370 manager.add_portals_module("knals/gmnal", 'kgmnal')
1371 if self.net_type == 'openib':
1372 manager.add_portals_module("knals/openibnal", 'kopenibnal')
1373 if self.net_type == 'iib':
1374 manager.add_portals_module("knals/iibnal", 'kiibnal')
1375 if self.net_type == 'vib':
1376 self.add_portals_module("knals/vibnal", 'kvibnal')
1377 if self.net_type == 'lo':
1378 manager.add_portals_module("knals/lonal", 'klonal')
1379 if self.net_type == 'ra':
1380 manager.add_portals_module("knals/ranal", 'kranal')
1382 def nid_to_uuid(self, nid):
1383 return "NID_%s_UUID" %(nid,)
1386 if not config.record and net_is_prepared():
1388 self.info(self.net_type, self.nid, self.port)
1389 if not (config.record and self.generic_nid):
1390 lctl.network(self.net_type, self.nid)
1391 if self.net_type == 'tcp':
1393 for hostaddr in self.db.get_hostaddr():
1394 ip = string.split(hostaddr, '/')[0]
1395 if len(string.split(hostaddr, '/')) == 2:
1396 netmask = string.split(hostaddr, '/')[1]
1399 lctl.add_interface(self.net_type, ip, netmask)
1400 if self.net_type == 'elan':
1402 if self.port and node_is_router():
1403 run_one_acceptor(self.port)
1404 self.connect_peer_gateways()
1406 def connect_peer_gateways(self):
1407 for router in self.db.lookup_class('node'):
1408 if router.get_val_int('router', 0):
1409 for netuuid in router.get_networks():
1410 net = self.db.lookup(netuuid)
1412 if (gw.cluster_id == self.cluster_id and
1413 gw.net_type == self.net_type):
1414 if gw.nid != self.nid:
1417 def disconnect_peer_gateways(self):
1418 for router in self.db.lookup_class('node'):
1419 if router.get_val_int('router', 0):
1420 for netuuid in router.get_networks():
1421 net = self.db.lookup(netuuid)
1423 if (gw.cluster_id == self.cluster_id and
1424 gw.net_type == self.net_type):
1425 if gw.nid != self.nid:
1428 except CommandError, e:
1429 print "disconnect failed: ", self.name
1433 def safe_to_clean(self):
1434 return not net_is_prepared()
1437 self.info(self.net_type, self.nid, self.port)
1439 stop_acceptor(self.port)
1440 if node_is_router():
1441 self.disconnect_peer_gateways()
1442 if self.net_type == 'tcp':
1443 for hostaddr in self.db.get_hostaddr():
1444 ip = string.split(hostaddr, '/')[0]
1445 lctl.del_interface(self.net_type, ip)
1447 def correct_level(self, level, op=None):
1450 class RouteTable(Module):
1451 def __init__(self,db):
1452 Module.__init__(self, 'ROUTES', db)
1454 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1456 # only setup connections for tcp, openib, and iib NALs
1458 if not net_type in ('tcp','openib','iib','vib','ra'):
1461 # connect to target if route is to single node and this node is the gw
1462 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1463 if not local_cluster(net_type, tgt_cluster_id):
1464 panic("target", lo, " not on the local cluster")
1465 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1466 # connect to gateway if this node is not the gw
1467 elif (local_cluster(net_type, gw_cluster_id)
1468 and not local_interface(net_type, gw_cluster_id, gw)):
1469 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1474 panic("no server for nid", lo)
1477 return Network(srvdb)
1480 if not config.record and net_is_prepared():
1483 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1484 lctl.add_route(net_type, gw, lo, hi)
1485 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1489 def safe_to_clean(self):
1490 return not net_is_prepared()
1493 if net_is_prepared():
1494 # the network is still being used, don't clean it up
1496 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1497 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1500 lctl.disconnect(srv)
1501 except CommandError, e:
1502 print "disconnect failed: ", self.name
1507 lctl.del_route(net_type, gw, lo, hi)
1508 except CommandError, e:
1509 print "del_route failed: ", self.name
1513 class Management(Module):
1514 def __init__(self, db):
1515 Module.__init__(self, 'MGMT', db)
1517 def add_module(self, manager):
1518 manager.add_lustre_module('lvfs', 'lvfs')
1519 manager.add_lustre_module('obdclass', 'obdclass')
1520 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1521 manager.add_lustre_module('mgmt', 'mgmt_svc')
1524 if not config.record and is_prepared(self.name):
1527 lctl.newdev("mgmt", self.name, self.uuid)
1529 def safe_to_clean(self):
1533 if is_prepared(self.name):
1534 Module.cleanup(self)
1536 def correct_level(self, level, op=None):
1539 # This is only needed to load the modules; the LDLM device
1540 # is now created automatically.
1542 def __init__(self,db):
1543 Module.__init__(self, 'LDLM', db)
1545 def add_module(self, manager):
1546 manager.add_lustre_module('lvfs', 'lvfs')
1547 manager.add_lustre_module('obdclass', 'obdclass')
1548 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1556 def correct_level(self, level, op=None):
1560 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1561 Module.__init__(self, 'LOV', db)
1562 if name_override != None:
1563 self.name = "lov_%s" % name_override
1564 self.mds_uuid = self.db.get_first_ref('mds')
1565 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1566 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1567 self.pattern = self.db.get_val_int('stripepattern', 0)
1568 self.devlist = self.db.get_lov_tgts('lov_tgt')
1569 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1572 self.desc_uuid = self.uuid
1573 self.uuid = generate_client_uuid(self.name)
1574 self.fs_name = fs_name
1576 self.config_only = 1
1578 self.config_only = None
1579 mds = self.db.lookup(self.mds_uuid)
1580 self.mds_name = mds.getName()
1581 for (obd_uuid, index, gen, active) in self.devlist:
1584 self.obdlist.append(obd_uuid)
1585 obd = self.db.lookup(obd_uuid)
1586 osc = get_osc(obd, self.uuid, fs_name)
1588 self.osclist.append((osc, index, gen, active))
1590 panic('osc not found:', obd_uuid)
1596 if not config.record and is_prepared(self.name):
1598 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1599 self.stripe_off, self.pattern, self.devlist,
1601 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1602 self.stripe_sz, self.stripe_off, self.pattern,
1603 string.join(self.obdlist))
1604 for (osc, index, gen, active) in self.osclist:
1605 target_uuid = osc.target_uuid
1607 # Only ignore connect failures with --force, which
1608 # isn't implemented here yet.
1610 osc.prepare(ignore_connect_failure=0)
1611 except CommandError, e:
1612 print "Error preparing OSC %s\n" % osc.uuid
1614 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1617 for (osc, index, gen, active) in self.osclist:
1618 target_uuid = osc.target_uuid
1620 if is_prepared(self.name):
1621 Module.cleanup(self)
1622 if self.config_only:
1623 panic("Can't clean up config_only LOV ", self.name)
1625 def add_module(self, manager):
1626 if self.config_only:
1627 panic("Can't load modules for config_only LOV ", self.name)
1628 for (osc, index, gen, active) in self.osclist:
1629 osc.add_module(manager)
1631 manager.add_lustre_module('lov', 'lov')
1633 def correct_level(self, level, op=None):
1637 def __init__(self, db, uuid, fs_name, name_override = None):
1638 Module.__init__(self, 'LMV', db)
1639 if name_override != None:
1640 self.name = "lmv_%s" % name_override
1642 self.devlist = self.db.get_lmv_tgts('lmv_tgt')
1643 if self.devlist == None:
1644 self.devlist = self.db.get_refs('mds')
1647 self.desc_uuid = self.uuid
1649 self.fs_name = fs_name
1650 for mds_uuid in self.devlist:
1651 mds = self.db.lookup(mds_uuid)
1653 panic("MDS not found!")
1654 mdc = MDC(mds, self.uuid, fs_name)
1656 self.mdclist.append(mdc)
1658 panic('mdc not found:', mds_uuid)
1661 if is_prepared(self.name):
1665 for mdc in self.mdclist:
1667 # Only ignore connect failures with --force, which
1668 # isn't implemented here yet.
1669 mdc.prepare(ignore_connect_failure=0)
1670 except CommandError, e:
1671 print "Error preparing LMV %s\n" % mdc.uuid
1674 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1675 string.join(self.devlist))
1678 for mdc in self.mdclist:
1680 if is_prepared(self.name):
1681 Module.cleanup(self)
1683 def add_module(self, manager):
1684 for mdc in self.mdclist:
1685 mdc.add_module(manager)
1687 manager.add_lustre_module('lmv', 'lmv')
1689 def correct_level(self, level, op=None):
1692 class MDSDEV(Module):
1693 def __init__(self,db):
1694 Module.__init__(self, 'MDSDEV', db)
1695 self.devpath = self.db.get_val('devpath','')
1696 self.backdevpath = self.db.get_val('backdevpath','')
1697 self.size = self.db.get_val_int('devsize', 0)
1698 self.journal_size = self.db.get_val_int('journalsize', 0)
1699 self.fstype = self.db.get_val('fstype', '')
1700 self.backfstype = self.db.get_val('backfstype', '')
1701 self.nspath = self.db.get_val('nspath', '')
1702 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1703 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1704 self.obdtype = self.db.get_val('obdtype', '')
1705 self.root_squash = self.db.get_val('root_squash', '')
1706 self.no_root_squash = self.db.get_val('no_root_squash', '')
1707 # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
1708 target_uuid = self.db.get_first_ref('target')
1709 self.mds = self.db.lookup(target_uuid)
1710 self.name = self.mds.getName()
1711 self.client_uuids = self.mds.get_refs('client')
1716 lmv_uuid = self.db.get_first_ref('lmv')
1717 if lmv_uuid != None:
1718 self.lmv = self.db.lookup(lmv_uuid)
1719 if self.lmv != None:
1720 self.client_uuids = self.lmv.get_refs('client')
1722 # FIXME: if fstype not set, then determine based on kernel version
1723 self.format = self.db.get_val('autoformat', "no")
1724 if self.mds.get_val('failover', 0):
1725 self.failover_mds = 'f'
1727 self.failover_mds = 'n'
1728 active_uuid = get_active_target(self.mds)
1730 panic("No target device found:", target_uuid)
1731 if active_uuid == self.uuid:
1735 if self.active and config.group and config.group != self.mds.get_val('group'):
1738 # default inode inode for case when neither LOV either
1739 # LMV is accessible.
1740 self.inode_size = 256
1742 inode_size = self.db.get_val_int('inodesize', 0)
1743 if not inode_size == 0:
1744 self.inode_size = inode_size
1746 # find the LOV for this MDS
1747 lovconfig_uuid = self.mds.get_first_ref('lovconfig')
1748 if lovconfig_uuid or self.lmv != None:
1749 if self.lmv != None:
1750 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1751 lovconfig = self.lmv.lookup(lovconfig_uuid)
1752 lov_uuid = lovconfig.get_first_ref('lov')
1753 if lov_uuid == None:
1754 panic(self.mds.getName() + ": No LOV found for lovconfig ",
1757 lovconfig = self.mds.lookup(lovconfig_uuid)
1758 lov_uuid = lovconfig.get_first_ref('lov')
1759 if lov_uuid == None:
1760 panic(self.mds.getName() + ": No LOV found for lovconfig ",
1763 if self.lmv != None:
1764 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1765 lovconfig = self.lmv.lookup(lovconfig_uuid)
1766 lov_uuid = lovconfig.get_first_ref('lov')
1768 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, self.name,
1771 # default stripe count controls default inode_size
1772 stripe_count = lov.stripe_cnt
1773 if stripe_count > 77:
1774 self.inode_size = 4096
1775 elif stripe_count > 35:
1776 self.inode_size = 2048
1777 elif stripe_count > 13:
1778 self.inode_size = 1024
1779 elif stripe_count > 3:
1780 self.inode_size = 512
1782 self.inode_size = 256
1784 self.target_dev_uuid = self.uuid
1785 self.uuid = target_uuid
1788 if self.lmv != None:
1789 client_uuid = self.name + "_lmv_UUID"
1790 self.master = LMV(self.lmv, client_uuid,
1791 self.name, self.name)
1793 def add_module(self, manager):
1795 manager.add_lustre_module('mdc', 'mdc')
1796 manager.add_lustre_module('osc', 'osc')
1797 manager.add_lustre_module('ost', 'ost')
1798 manager.add_lustre_module('lov', 'lov')
1799 manager.add_lustre_module('mds', 'mds')
1801 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
1802 manager.add_lustre_module(self.fstype, self.fstype)
1805 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
1807 # if fstype is smfs, then we should also take care about backing
1809 if self.fstype == 'smfs':
1810 manager.add_lustre_module(self.backfstype, self.backfstype)
1811 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
1813 for option in string.split(self.mountfsoptions, ','):
1814 if option == 'snap':
1815 if not self.fstype == 'smfs':
1816 panic("mountoptions has 'snap', but fstype is not smfs.")
1817 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
1818 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
1821 if self.master != None:
1822 self.master.add_module(manager)
1824 def get_mount_options(self, blkdev):
1825 options = def_mount_options(self.fstype, 'mds')
1827 if config.mountfsoptions:
1829 options = "%s,%s" %(options, config.mountfsoptions)
1831 options = config.mountfsoptions
1832 if self.mountfsoptions:
1833 options = "%s,%s" %(options, self.mountfsoptions)
1835 if self.mountfsoptions:
1837 options = "%s,%s" %(options, self.mountfsoptions)
1839 options = self.mountfsoptions
1841 if self.fstype == 'smfs':
1843 options = "%s,type=%s,dev=%s" %(options,
1844 self.backfstype, blkdev)
1846 options = "type=%s,dev=%s" %(self.backfstype, blkdev)
1850 if not config.record and is_prepared(self.name):
1853 debug(self.uuid, "not active")
1856 # run write_conf automatically, if --reformat used
1861 if self.master != None:
1862 self.master.prepare()
1864 # never reformat here
1865 blkdev = block_dev(self.devpath, self.size, self.fstype, 0,
1866 self.format, self.journal_size, self.inode_size,
1867 self.mkfsoptions, self.backfstype, self.backdevpath)
1869 if not is_prepared('MDT'):
1870 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
1872 if self.fstype == 'smfs':
1873 realdev = self.fstype
1877 if self.obdtype == None:
1878 self.obdtype = 'dumb'
1880 if self.master == None:
1881 master_name = 'dumb'
1883 master_name = self.master.name
1885 if self.client_uuids == None:
1886 profile_name = 'dumb'
1888 profile_name = self.name
1890 mountfsoptions = self.get_mount_options(blkdev)
1892 self.info("mds", realdev, mountfsoptions, self.fstype, self.size,
1893 self.format, master_name, profile_name, self.obdtype)
1895 lctl.newdev("mds", self.name, self.uuid,
1896 setup = "%s %s %s %s %s %s" %(realdev,
1897 self.fstype, profile_name, mountfsoptions,
1898 master_name, self.obdtype))
1900 if development_mode():
1901 procentry = "/proc/fs/lustre/mds/grp_hash_upcall"
1902 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/l_getgroups")
1903 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
1904 print "MDS Warning: failed to set group-hash upcall"
1906 run("echo ", upcall, " > ", procentry)
1908 except CommandError, e:
1910 panic("MDS is missing the config log. Need to run " +
1911 "lconf --write_conf.")
1915 if config.root_squash == None:
1916 config.root_squash = self.root_squash
1917 if config.no_root_squash == None:
1918 config.no_root_squash = self.no_root_squash
1919 if config.root_squash:
1920 if config.no_root_squash:
1921 nsnid = config.no_root_squash
1924 lctl.root_squash(self.name, config.root_squash, nsnid)
1926 def write_conf(self):
1927 if not self.client_uuids:
1931 if not is_prepared(self.name):
1932 blkdev = block_dev(self.devpath, self.size, self.fstype,
1933 config.reformat, self.format, self.journal_size,
1934 self.inode_size, self.mkfsoptions,
1935 self.backfstype, self.backdevpath)
1937 if self.fstype == 'smfs':
1938 realdev = self.fstype
1942 # Even for writing logs we mount mds with supplied mount options
1943 # because it will not mount smfs (if used) otherwise.
1944 mountfsoptions = self.get_mount_options(blkdev)
1946 if self.obdtype == None:
1947 self.obdtype = 'dumb'
1949 self.info("mds", realdev, mountfsoptions, self.fstype, self.size,
1950 self.format, "dumb", "dumb", self.obdtype)
1952 lctl.newdev("mds", self.name, self.uuid,
1953 setup ="%s %s %s %s %s %s" %(realdev, self.fstype,
1954 'dumb', mountfsoptions,
1955 'dumb', self.obdtype))
1958 # record logs for all MDS clients
1959 for obd_uuid in self.client_uuids:
1960 log("recording client:", obd_uuid)
1962 client_uuid = generate_client_uuid(self.name)
1963 client = VOSC(self.db.lookup(obd_uuid), client_uuid,
1964 self.name, self.name)
1966 lctl.clear_log(self.name, self.name)
1967 lctl.record(self.name, self.name)
1969 lctl.mount_option(self.name, client.get_name(), "")
1971 process_updates(self.db, self.name, self.name, client)
1974 lctl.clear_log(self.name, self.name + '-clean')
1975 lctl.record(self.name, self.name + '-clean')
1977 lctl.del_mount_option(self.name)
1979 process_updates(self.db, self.name, self.name + '-clean', client)
1983 # record logs for each client
1989 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
1991 config_options = CONFIG_FILE
1993 for node_db in self.db.lookup_class('node'):
1994 client_name = node_db.getName()
1995 for prof_uuid in node_db.get_refs('profile'):
1996 prof_db = node_db.lookup(prof_uuid)
1997 # refactor this into a funtion to test "clientness"
1999 for ref_class, ref_uuid in prof_db.get_all_refs():
2000 if ref_class in ('mountpoint','echoclient'):
2001 debug("recording", client_name)
2002 old_noexec = config.noexec
2004 ret, out = run (sys.argv[0], noexec_opt,
2005 " -v --record --nomod",
2006 "--record_log", client_name,
2007 "--record_device", self.name,
2008 "--node", client_name,
2011 for s in out: log("record> ", string.strip(s))
2012 ret, out = run (sys.argv[0], noexec_opt,
2013 "--cleanup -v --record --nomod",
2014 "--record_log", client_name + "-clean",
2015 "--record_device", self.name,
2016 "--node", client_name,
2019 for s in out: log("record> ", string.strip(s))
2020 config.noexec = old_noexec
2023 lctl.cleanup(self.name, self.uuid, 0, 0)
2024 except CommandError, e:
2025 log(self.module_name, "cleanup failed: ", self.name)
2028 Module.cleanup(self)
2030 clean_dev(self.devpath, self.fstype, self.backfstype,
2033 def msd_remaining(self):
2034 out = lctl.device_list()
2036 if string.split(s)[2] in ('mds',):
2039 def safe_to_clean(self):
2042 def safe_to_clean_modules(self):
2043 return not self.msd_remaining()
2047 debug(self.uuid, "not active")
2050 if is_prepared(self.name):
2052 lctl.cleanup(self.name, self.uuid, config.force,
2054 except CommandError, e:
2055 log(self.module_name, "cleanup failed: ", self.name)
2058 Module.cleanup(self)
2060 if self.master != None:
2061 self.master.cleanup()
2062 if not self.msd_remaining() and is_prepared('MDT'):
2064 lctl.cleanup("MDT", "MDT_UUID", config.force,
2066 except CommandError, e:
2067 print "cleanup failed: ", self.name
2071 clean_dev(self.devpath, self.fstype, self.backfstype,
2074 def correct_level(self, level, op=None):
2075 #if self.master != None:
2080 def __init__(self, db):
2081 Module.__init__(self, 'OSD', db)
2082 self.osdtype = self.db.get_val('osdtype')
2083 self.devpath = self.db.get_val('devpath', '')
2084 self.backdevpath = self.db.get_val('backdevpath', '')
2085 self.size = self.db.get_val_int('devsize', 0)
2086 self.journal_size = self.db.get_val_int('journalsize', 0)
2087 self.inode_size = self.db.get_val_int('inodesize', 0)
2088 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2089 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2090 self.fstype = self.db.get_val('fstype', '')
2091 self.backfstype = self.db.get_val('backfstype', '')
2092 self.nspath = self.db.get_val('nspath', '')
2093 target_uuid = self.db.get_first_ref('target')
2094 ost = self.db.lookup(target_uuid)
2095 self.name = ost.getName()
2096 self.format = self.db.get_val('autoformat', 'yes')
2097 if ost.get_val('failover', 0):
2098 self.failover_ost = 'f'
2100 self.failover_ost = 'n'
2102 active_uuid = get_active_target(ost)
2104 panic("No target device found:", target_uuid)
2105 if active_uuid == self.uuid:
2109 if self.active and config.group and config.group != ost.get_val('group'):
2112 self.target_dev_uuid = self.uuid
2113 self.uuid = target_uuid
2115 def add_module(self, manager):
2117 manager.add_lustre_module('ost', 'ost')
2119 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2120 manager.add_lustre_module(self.fstype, self.fstype)
2123 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2125 if self.fstype == 'smfs':
2126 manager.add_lustre_module(self.backfstype, self.backfstype)
2127 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2129 for option in self.mountfsoptions:
2130 if option == 'snap':
2131 if not self.fstype == 'smfs':
2132 panic("mountoptions with snap, but fstype is not smfs\n")
2133 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2134 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2136 manager.add_lustre_module(self.osdtype, self.osdtype)
2138 def get_mount_options(self, blkdev):
2139 options = def_mount_options(self.fstype, 'ost')
2141 if config.mountfsoptions:
2143 options = "%s,%s" %(options, config.mountfsoptions)
2145 options = config.mountfsoptions
2146 if self.mountfsoptions:
2147 options = "%s,%s" %(options, self.mountfsoptions)
2149 if self.mountfsoptions:
2151 options = "%s,%s" %(options, self.mountfsoptions)
2153 options = self.mountfsoptions
2155 if self.fstype == 'smfs':
2157 options = "%s,type=%s,dev=%s" %(options,
2158 self.backfstype, blkdev)
2160 options = "type=%s,dev=%s" %(self.backfstype,
2164 # need to check /proc/mounts and /etc/mtab before
2165 # formatting anything.
2166 # FIXME: check if device is already formatted.
2168 if is_prepared(self.name):
2171 debug(self.uuid, "not active")
2174 if self.osdtype == 'obdecho':
2177 blkdev = block_dev(self.devpath, self.size, self.fstype,
2178 config.reformat, self.format, self.journal_size,
2179 self.inode_size, self.mkfsoptions, self.backfstype,
2182 if self.fstype == 'smfs':
2183 realdev = self.fstype
2187 mountfsoptions = self.get_mount_options(blkdev)
2189 self.info(self.osdtype, realdev, mountfsoptions, self.fstype,
2190 self.size, self.format, self.journal_size, self.inode_size)
2192 lctl.newdev(self.osdtype, self.name, self.uuid,
2193 setup ="%s %s %s %s" %(realdev, self.fstype,
2196 if not is_prepared('OSS'):
2197 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
2199 def osd_remaining(self):
2200 out = lctl.device_list()
2202 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2205 def safe_to_clean(self):
2208 def safe_to_clean_modules(self):
2209 return not self.osd_remaining()
2213 debug(self.uuid, "not active")
2215 if is_prepared(self.name):
2218 lctl.cleanup(self.name, self.uuid, config.force,
2220 except CommandError, e:
2221 log(self.module_name, "cleanup failed: ", self.name)
2224 if not self.osd_remaining() and is_prepared('OSS'):
2226 lctl.cleanup("OSS", "OSS_UUID", config.force,
2228 except CommandError, e:
2229 print "cleanup failed: ", self.name
2232 if not self.osdtype == 'obdecho':
2233 clean_dev(self.devpath, self.fstype, self.backfstype,
2236 def correct_level(self, level, op=None):
2239 def mgmt_uuid_for_fs(mtpt_name):
2242 mtpt_db = toplustreDB.lookup_name(mtpt_name)
2243 fs_uuid = mtpt_db.get_first_ref('filesystem')
2244 fs = toplustreDB.lookup(fs_uuid)
2247 return fs.get_first_ref('mgmt')
2249 # Generic client module, used by OSC and MDC
2250 class Client(Module):
2251 def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
2253 self.target_name = tgtdb.getName()
2254 self.target_uuid = tgtdb.getUUID()
2255 self.module_dir = module_dir
2256 self.module = module
2260 self.tgt_dev_uuid = get_active_target(tgtdb)
2261 if not self.tgt_dev_uuid:
2262 panic("No target device found for target(1):", self.target_name)
2267 self.module = module
2268 self.module_name = string.upper(module)
2270 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2271 self.target_name, fs_name)
2273 self.name = self_name
2275 self.lookup_server(self.tgt_dev_uuid)
2276 mgmt_uuid = mgmt_uuid_for_fs(fs_name)
2278 self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid)
2281 self.fs_name = fs_name
2282 if not self.module_dir:
2283 self.module_dir = module
2285 def add_module(self, manager):
2286 manager.add_lustre_module(self.module_dir, self.module)
2288 def lookup_server(self, srv_uuid):
2289 """ Lookup a server's network information """
2290 self._server_nets = get_ost_net(self.db, srv_uuid)
2291 if len(self._server_nets) == 0:
2292 panic ("Unable to find a server for:", srv_uuid)
2297 def get_servers(self):
2298 return self._server_nets
2300 def prepare(self, ignore_connect_failure = 0):
2301 self.info(self.target_uuid)
2302 if not config.record and is_prepared(self.name):
2305 srv = choose_local_server(self.get_servers())
2309 routes = find_route(self.get_servers())
2310 if len(routes) == 0:
2311 panic ("no route to", self.target_uuid)
2312 for (srv, r) in routes:
2313 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2314 except CommandError, e:
2315 if not ignore_connect_failure:
2318 if self.permits_inactive() and (self.target_uuid in config.inactive or self.active == 0):
2319 debug("%s inactive" % self.target_uuid)
2320 inactive_p = "inactive"
2322 debug("%s active" % self.target_uuid)
2324 lctl.newdev(self.module, self.name, self.uuid,
2325 setup ="%s %s %s %s" % (self.target_uuid, srv.nid_uuid,
2326 inactive_p, self.mgmt_name))
2329 if is_prepared(self.name):
2330 Module.cleanup(self)
2332 srv = choose_local_server(self.get_servers())
2334 lctl.disconnect(srv)
2336 for (srv, r) in find_route(self.get_servers()):
2337 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2338 except CommandError, e:
2339 log(self.module_name, "cleanup failed: ", self.name)
2343 def correct_level(self, level, op=None):
2346 def deactivate(self):
2348 lctl.deactivate(self.name)
2349 except CommandError, e:
2350 log(self.module_name, "deactivate failed: ", self.name)
2355 def __init__(self, db, uuid, fs_name):
2356 Client.__init__(self, db, uuid, 'mdc', fs_name)
2358 def permits_inactive(self):
2362 def __init__(self, db, uuid, fs_name):
2363 Client.__init__(self, db, uuid, 'osc', fs_name)
2365 def permits_inactive(self):
2368 def mgmtcli_name_for_uuid(uuid):
2369 return 'MGMTCLI_%s' % uuid
2371 class ManagementClient(Client):
2372 def __init__(self, db, uuid):
2373 Client.__init__(self, db, uuid, 'mgmt_cli', '',
2374 self_name = mgmtcli_name_for_uuid(db.getUUID()),
2375 module_dir = 'mgmt')
2377 class CMOBD(Module):
2378 def __init__(self, db):
2379 Module.__init__(self, 'CMOBD', db)
2380 self.name = self.db.getName();
2381 self.uuid = generate_client_uuid(self.name)
2382 self.master_uuid = self.db.get_first_ref('masterobd')
2383 self.cache_uuid = self.db.get_first_ref('cacheobd')
2385 master_obd = self.db.lookup(self.master_uuid)
2387 panic('master obd not found:', self.master_uuid)
2389 cache_obd = self.db.lookup(self.cache_uuid)
2391 panic('cache obd not found:', self.cache_uuid)
2396 master_class = master_obd.get_class()
2397 cache_class = cache_obd.get_class()
2399 if master_class == 'ost' or master_class == 'lov':
2400 client_uuid = "%s_lov_master_UUID" % (self.name)
2401 self.master = LOV(master_obd, client_uuid, self.name);
2402 elif master_class == 'mds':
2403 self.master = get_mdc(db, self.name, self.master_uuid)
2404 elif master_class == 'lmv':
2405 client_uuid = "%s_lmv_master_UUID" % (self.name)
2406 self.master = LMV(master_obd, client_uuid, self.name);
2408 panic("unknown master obd class '%s'" %(master_class))
2410 if cache_class == 'ost' or cache_class == 'lov':
2411 client_uuid = "%s_lov_cache_UUID" % (self.name)
2412 self.cache = LOV(cache_obd, client_uuid, self.name);
2413 elif cache_class == 'mds':
2414 self.cache = get_mdc(db, self.name, self.cache_uuid)
2415 elif cache_class == 'lmv':
2416 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2417 self.cache = LMV(cache_obd, client_uuid, self.name);
2419 panic("unknown cache obd class '%s'" %(cache_class))
2422 self.master.prepare()
2423 if not config.record and is_prepared(self.name):
2425 self.info(self.master_uuid, self.cache_uuid)
2426 lctl.newdev("cmobd", self.name, self.uuid,
2427 setup ="%s %s" %(self.master.uuid,
2436 def get_master_name(self):
2437 return self.master.name
2439 def get_cache_name(self):
2440 return self.cache.name
2443 if is_prepared(self.name):
2444 Module.cleanup(self)
2446 self.master.cleanup()
2448 def add_module(self, manager):
2449 manager.add_lustre_module('cmobd', 'cmobd')
2450 self.master.add_module(manager)
2452 def correct_level(self, level, op=None):
2456 def __init__(self, db, uuid, name):
2457 Module.__init__(self, 'COBD', db)
2458 self.name = self.db.getName();
2459 self.uuid = generate_client_uuid(self.name)
2460 self.master_uuid = self.db.get_first_ref('masterobd')
2461 self.cache_uuid = self.db.get_first_ref('cacheobd')
2463 master_obd = self.db.lookup(self.master_uuid)
2465 panic('master obd not found:', self.master_uuid)
2467 cache_obd = self.db.lookup(self.cache_uuid)
2469 panic('cache obd not found:', self.cache_uuid)
2474 master_class = master_obd.get_class()
2475 cache_class = cache_obd.get_class()
2477 if master_class == 'ost' or master_class == 'lov':
2478 client_uuid = "%s_lov_master_UUID" % (self.name)
2479 self.master = LOV(master_obd, client_uuid, name);
2480 elif master_class == 'mds':
2481 self.master = get_mdc(db, name, self.master_uuid)
2482 elif master_class == 'lmv':
2483 client_uuid = "%s_lmv_master_UUID" % (self.name)
2484 self.master = LMV(master_obd, client_uuid, self.name);
2486 panic("unknown master obd class '%s'" %(master_class))
2488 if cache_class == 'ost' or cache_class == 'lov':
2489 client_uuid = "%s_lov_cache_UUID" % (self.name)
2490 self.cache = LOV(cache_obd, client_uuid, name);
2491 elif cache_class == 'mds':
2492 self.cache = get_mdc(db, name, self.cache_uuid)
2493 elif cache_class == 'lmv':
2494 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2495 self.cache = LMV(cache_obd, client_uuid, self.name);
2497 panic("unknown cache obd class '%s'" %(cache_class))
2505 def get_master_name(self):
2506 return self.master.name
2508 def get_cache_name(self):
2509 return self.cache.name
2512 self.master.prepare()
2513 self.cache.prepare()
2514 if not config.record and is_prepared(self.name):
2516 self.info(self.master_uuid, self.cache_uuid)
2517 lctl.newdev("cobd", self.name, self.uuid,
2518 setup ="%s %s" %(self.master.name,
2522 if is_prepared(self.name):
2523 Module.cleanup(self)
2524 self.master.cleanup()
2525 self.cache.cleanup()
2527 def add_module(self, manager):
2528 manager.add_lustre_module('cobd', 'cobd')
2529 self.master.add_module(manager)
2531 # virtual interface for OSC and LOV
2533 def __init__(self, db, client_uuid, name, name_override = None):
2534 Module.__init__(self, 'VOSC', db)
2535 if db.get_class() == 'lov':
2536 self.osc = LOV(db, client_uuid, name, name_override)
2538 elif db.get_class() == 'cobd':
2539 self.osc = COBD(db, client_uuid, name)
2542 self.osc = OSC(db, client_uuid, name)
2546 return self.osc.get_uuid()
2549 return self.osc.get_name()
2557 def add_module(self, manager):
2558 self.osc.add_module(manager)
2560 def correct_level(self, level, op=None):
2561 return self.osc.correct_level(level, op)
2563 # virtual interface for MDC and LMV
2565 def __init__(self, db, client_uuid, name, name_override = None):
2566 Module.__init__(self, 'VMDC', db)
2567 if db.get_class() == 'lmv':
2568 self.mdc = LMV(db, client_uuid, name, name_override)
2569 elif db.get_class() == 'cobd':
2570 self.mdc = COBD(db, client_uuid, name)
2572 self.mdc = MDC(db, client_uuid, name)
2575 return self.mdc.uuid
2578 return self.mdc.name
2586 def add_module(self, manager):
2587 self.mdc.add_module(manager)
2589 def correct_level(self, level, op=None):
2590 return self.mdc.correct_level(level, op)
2592 class ECHO_CLIENT(Module):
2593 def __init__(self,db):
2594 Module.__init__(self, 'ECHO_CLIENT', db)
2595 self.obd_uuid = self.db.get_first_ref('obd')
2596 obd = self.db.lookup(self.obd_uuid)
2597 self.uuid = generate_client_uuid(self.name)
2598 self.osc = VOSC(obd, self.uuid, self.name)
2601 if not config.record and is_prepared(self.name):
2604 self.osc.prepare() # XXX This is so cheating. -p
2605 self.info(self.obd_uuid)
2607 lctl.newdev("echo_client", self.name, self.uuid,
2608 setup = self.osc.get_name())
2611 if is_prepared(self.name):
2612 Module.cleanup(self)
2615 def add_module(self, manager):
2616 self.osc.add_module(manager)
2617 manager.add_lustre_module('obdecho', 'obdecho')
2619 def correct_level(self, level, op=None):
2622 def generate_client_uuid(name):
2623 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2625 int(random.random() * 1048576),
2626 int(random.random() * 1048576))
2627 return client_uuid[:36]
2629 class Mountpoint(Module):
2630 def __init__(self,db):
2631 Module.__init__(self, 'MTPT', db)
2632 self.path = self.db.get_val('path')
2633 self.clientoptions = self.db.get_val('clientoptions', '')
2634 self.fs_uuid = self.db.get_first_ref('filesystem')
2635 fs = self.db.lookup(self.fs_uuid)
2636 self.mds_uuid = fs.get_first_ref('lmv')
2637 if not self.mds_uuid:
2638 self.mds_uuid = fs.get_first_ref('mds')
2639 self.obd_uuid = fs.get_first_ref('obd')
2640 self.mgmt_uuid = fs.get_first_ref('mgmt')
2641 client_uuid = generate_client_uuid(self.name)
2643 ost = self.db.lookup(self.obd_uuid)
2645 panic("no ost: ", self.obd_uuid)
2647 mds = self.db.lookup(self.mds_uuid)
2649 panic("no mds: ", self.mds_uuid)
2651 self.vosc = VOSC(ost, client_uuid, self.name, self.name)
2652 self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
2655 self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid),
2661 if not config.record and fs_is_mounted(self.path):
2662 log(self.path, "already mounted.")
2666 self.mgmtcli.prepare()
2669 vmdc_name = self.vmdc.get_name()
2671 self.info(self.path, self.mds_uuid, self.obd_uuid)
2672 if config.record or config.lctl_dump:
2673 lctl.mount_option(local_node_name, self.vosc.get_name(), vmdc_name)
2676 if config.clientoptions:
2677 if self.clientoptions:
2678 self.clientoptions = self.clientoptions + ',' + \
2679 config.clientoptions
2681 self.clientoptions = config.clientoptions
2682 if self.clientoptions:
2683 self.clientoptions = ',' + self.clientoptions
2684 # Linux kernel will deal with async and not pass it to ll_fill_super,
2685 # so replace it with Lustre async
2686 self.clientoptions = string.replace(self.clientoptions, "async",
2689 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s%s %s %s" % \
2690 (self.vosc.get_name(), vmdc_name, self.clientoptions,
2691 config.config, self.path)
2692 run("mkdir", self.path)
2697 panic("mount failed:", self.path, ":", string.join(val))
2700 self.info(self.path, self.mds_uuid,self.obd_uuid)
2702 if config.record or config.lctl_dump:
2703 lctl.del_mount_option(local_node_name)
2705 if fs_is_mounted(self.path):
2707 (rc, out) = run("umount", "-f", self.path)
2709 (rc, out) = run("umount", self.path)
2711 raise CommandError('umount', out, rc)
2713 if fs_is_mounted(self.path):
2714 panic("fs is still mounted:", self.path)
2719 self.mgmtcli.cleanup()
2721 def add_module(self, manager):
2722 manager.add_lustre_module('mdc', 'mdc')
2725 self.mgmtcli.add_module(manager)
2727 self.vosc.add_module(manager)
2728 self.vmdc.add_module(manager)
2730 manager.add_lustre_module('llite', 'llite')
2732 def correct_level(self, level, op=None):
2735 # ============================================================
2736 # misc query functions
2738 def get_ost_net(self, osd_uuid):
2742 osd = self.lookup(osd_uuid)
2743 node_uuid = osd.get_first_ref('node')
2744 node = self.lookup(node_uuid)
2746 panic("unable to find node for osd_uuid:", osd_uuid,
2747 " node_ref:", node_uuid_)
2748 for net_uuid in node.get_networks():
2749 db = node.lookup(net_uuid)
2750 srv_list.append(Network(db))
2754 # the order of iniitailization is based on level.
2755 def getServiceLevel(self):
2756 type = self.get_class()
2758 if type in ('network',):
2760 elif type in ('routetbl',):
2762 elif type in ('ldlm',):
2764 elif type in ('osd', 'cobd'):
2766 elif type in ('mdsdev',):
2768 elif type in ('lmv',):
2770 elif type in ('cmobd',):
2772 elif type in ('mountpoint', 'echoclient'):
2775 panic("Unknown type: ", type)
2777 if ret < config.minlevel or ret > config.maxlevel:
2782 # return list of services in a profile. list is a list of tuples
2783 # [(level, db_object),]
2784 def getServices(self):
2786 for ref_class, ref_uuid in self.get_all_refs():
2787 servdb = self.lookup(ref_uuid)
2789 level = getServiceLevel(servdb)
2791 list.append((level, servdb))
2793 panic('service not found: ' + ref_uuid)
2799 ############################################################
2801 # FIXME: clean this mess up!
2803 # OSC is no longer in the xml, so we have to fake it.
2804 # this is getting ugly and begging for another refactoring
2805 def get_osc(ost_db, uuid, fs_name):
2806 osc = OSC(ost_db, uuid, fs_name)
2809 def get_mdc(db, fs_name, mds_uuid):
2810 mds_db = db.lookup(mds_uuid);
2812 error("no mds:", mds_uuid)
2813 mdc = MDC(mds_db, mds_uuid, fs_name)
2816 ############################################################
2817 # routing ("rooting")
2819 # list of (nettype, cluster_id, nid)
2822 def find_local_clusters(node_db):
2823 global local_clusters
2824 for netuuid in node_db.get_networks():
2825 net = node_db.lookup(netuuid)
2827 debug("add_local", netuuid)
2828 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2830 if acceptors.has_key(srv.port):
2831 panic("duplicate port:", srv.port)
2832 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
2834 # This node is a gateway.
2836 def node_is_router():
2839 # If there are any routers found in the config, then this will be true
2840 # and all nodes will load kptlrouter.
2842 def node_needs_router():
2843 return needs_router or is_router
2845 # list of (nettype, gw, tgt_cluster_id, lo, hi)
2846 # Currently, these local routes are only added to kptlrouter route
2847 # table if they are needed to connect to a specific server. This
2848 # should be changed so all available routes are loaded, and the
2849 # ptlrouter can make all the decisions.
2852 def find_local_routes(lustre):
2853 """ Scan the lustre config looking for routers . Build list of
2855 global local_routes, needs_router
2857 list = lustre.lookup_class('node')
2859 if router.get_val_int('router', 0):
2861 for (local_type, local_cluster_id, local_nid) in local_clusters:
2863 for netuuid in router.get_networks():
2864 db = router.lookup(netuuid)
2865 if (local_type == db.get_val('nettype') and
2866 local_cluster_id == db.get_val('clusterid')):
2867 gw = db.get_val('nid')
2870 debug("find_local_routes: gw is", gw)
2871 for route in router.get_local_routes(local_type, gw):
2872 local_routes.append(route)
2873 debug("find_local_routes:", local_routes)
2876 def choose_local_server(srv_list):
2877 for srv in srv_list:
2878 if local_cluster(srv.net_type, srv.cluster_id):
2881 def local_cluster(net_type, cluster_id):
2882 for cluster in local_clusters:
2883 if net_type == cluster[0] and cluster_id == cluster[1]:
2887 def local_interface(net_type, cluster_id, nid):
2888 for cluster in local_clusters:
2889 if (net_type == cluster[0] and cluster_id == cluster[1]
2890 and nid == cluster[2]):
2894 def find_route(srv_list):
2896 frm_type = local_clusters[0][0]
2897 for srv in srv_list:
2898 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
2899 to_type = srv.net_type
2901 cluster_id = srv.cluster_id
2902 debug ('looking for route to', to_type, to)
2903 for r in local_routes:
2904 debug("find_route: ", r)
2905 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
2906 result.append((srv, r))
2909 def get_active_target(db):
2910 target_uuid = db.getUUID()
2911 target_name = db.getName()
2912 node_name = get_select(target_name)
2914 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
2916 tgt_dev_uuid = db.get_first_ref('active')
2919 def get_server_by_nid_uuid(db, nid_uuid):
2920 for n in db.lookup_class("network"):
2922 if net.nid_uuid == nid_uuid:
2926 ############################################################
2930 type = db.get_class()
2931 debug('Service:', type, db.getName(), db.getUUID())
2936 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2937 elif type == 'network':
2939 elif type == 'routetbl':
2943 elif type == 'cobd':
2944 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2945 elif type == 'cmobd':
2947 elif type == 'mdsdev':
2949 elif type == 'mountpoint':
2951 elif type == 'echoclient':
2956 panic ("unknown service type:", type)
2960 # Prepare the system to run lustre using a particular profile
2961 # in a the configuration.
2962 # * load & the modules
2963 # * setup networking for the current node
2964 # * make sure partitions are in place and prepared
2965 # * initialize devices with lctl
2966 # Levels is important, and needs to be enforced.
2967 def for_each_profile(db, prof_list, operation):
2968 for prof_uuid in prof_list:
2969 prof_db = db.lookup(prof_uuid)
2971 panic("profile:", prof_uuid, "not found.")
2972 services = getServices(prof_db)
2975 def magic_get_osc(db, rec, lov):
2977 lov_uuid = lov.get_uuid()
2978 lov_name = lov.osc.fs_name
2980 lov_uuid = rec.getAttribute('lov_uuidref')
2981 # FIXME: better way to find the mountpoint?
2982 filesystems = db.root_node.getElementsByTagName('filesystem')
2984 for fs in filesystems:
2985 ref = fs.getElementsByTagName('obd_ref')
2986 if ref[0].getAttribute('uuidref') == lov_uuid:
2987 fsuuid = fs.getAttribute('uuid')
2991 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
2993 mtpts = db.root_node.getElementsByTagName('mountpoint')
2996 ref = fs.getElementsByTagName('filesystem_ref')
2997 if ref[0].getAttribute('uuidref') == fsuuid:
2998 lov_name = fs.getAttribute('name')
3002 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
3004 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3006 ost_uuid = rec.getAttribute('ost_uuidref')
3007 obd = db.lookup(ost_uuid)
3010 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
3012 osc = get_osc(obd, lov_uuid, lov_name)
3014 panic('osc not found:', obd_uuid)
3017 # write logs for update records. sadly, logs of all types -- and updates in
3018 # particular -- are something of an afterthought. lconf needs rewritten with
3019 # these as core concepts. so this is a pretty big hack.
3020 def process_update_record(db, update, lov):
3021 for rec in update.childNodes:
3022 if rec.nodeType != rec.ELEMENT_NODE:
3025 log("found "+rec.nodeName+" record in update version " +
3026 str(update.getAttribute('version')))
3028 lov_uuid = rec.getAttribute('lov_uuidref')
3029 ost_uuid = rec.getAttribute('ost_uuidref')
3030 index = rec.getAttribute('index')
3031 gen = rec.getAttribute('generation')
3033 if not lov_uuid or not ost_uuid or not index or not gen:
3034 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
3037 tmplov = db.lookup(lov_uuid)
3039 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
3040 lov_name = tmplov.getName()
3042 lov_name = lov.osc.name
3044 # ------------------------------------------------------------- add
3045 if rec.nodeName == 'add':
3047 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3050 osc = magic_get_osc(db, rec, lov)
3053 # Only ignore connect failures with --force, which
3054 # isn't implemented here yet.
3055 osc.prepare(ignore_connect_failure=0)
3056 except CommandError, e:
3057 print "Error preparing OSC %s\n" % osc.uuid
3060 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3062 # ------------------------------------------------------ deactivate
3063 elif rec.nodeName == 'deactivate':
3067 osc = magic_get_osc(db, rec, lov)
3071 except CommandError, e:
3072 print "Error deactivating OSC %s\n" % osc.uuid
3075 # ---------------------------------------------------------- delete
3076 elif rec.nodeName == 'delete':
3080 osc = magic_get_osc(db, rec, lov)
3086 except CommandError, e:
3087 print "Error cleaning up OSC %s\n" % osc.uuid
3090 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3092 def process_updates(db, log_device, log_name, lov = None):
3093 updates = db.root_node.getElementsByTagName('update')
3095 if not u.childNodes:
3096 log("ignoring empty update record (version " +
3097 str(u.getAttribute('version')) + ")")
3100 version = u.getAttribute('version')
3101 real_name = "%s-%s" % (log_name, version)
3102 lctl.clear_log(log_device, real_name)
3103 lctl.record(log_device, real_name)
3105 process_update_record(db, u, lov)
3109 def doWriteconf(services):
3113 if s[1].get_class() == 'mdsdev':
3114 n = newService(s[1])
3117 def doSetup(services):
3122 n = newService(s[1])
3124 slist.append((n.level, n))
3127 nl = n[1].correct_level(n[0])
3128 nlist.append((nl, n[1]))
3133 def doLoadModules(services):
3137 # adding all needed modules from all services
3139 n = newService(s[1])
3140 n.add_module(mod_manager)
3142 # loading all registered modules
3143 mod_manager.load_modules()
3145 def doUnloadModules(services):
3149 # adding all needed modules from all services
3151 n = newService(s[1])
3152 if n.safe_to_clean_modules():
3153 n.add_module(mod_manager)
3155 # unloading all registered modules
3156 mod_manager.cleanup_modules()
3158 def doCleanup(services):
3164 n = newService(s[1])
3166 slist.append((n.level, n))
3169 nl = n[1].correct_level(n[0])
3170 nlist.append((nl, n[1]))
3175 if n[1].safe_to_clean():
3180 def doHost(lustreDB, hosts):
3181 global is_router, local_node_name
3184 node_db = lustreDB.lookup_name(h, 'node')
3188 panic('No host entry found.')
3190 local_node_name = node_db.get_val('name', 0)
3191 is_router = node_db.get_val_int('router', 0)
3192 lustre_upcall = node_db.get_val('lustreUpcall', '')
3193 portals_upcall = node_db.get_val('portalsUpcall', '')
3194 timeout = node_db.get_val_int('timeout', 0)
3195 ptldebug = node_db.get_val('ptldebug', '')
3196 subsystem = node_db.get_val('subsystem', '')
3198 find_local_clusters(node_db)
3200 find_local_routes(lustreDB)
3202 # Two step process: (1) load modules, (2) setup lustre
3203 # if not cleaning, load modules first.
3204 prof_list = node_db.get_refs('profile')
3206 if config.write_conf:
3207 for_each_profile(node_db, prof_list, doLoadModules)
3209 for_each_profile(node_db, prof_list, doWriteconf)
3210 for_each_profile(node_db, prof_list, doUnloadModules)
3213 elif config.recover:
3214 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3215 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3216 "--client_uuid <UUID> --conn_uuid <UUID>")
3217 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3219 elif config.cleanup:
3221 # the command line can override this value
3223 # ugly hack, only need to run lctl commands for --dump
3224 if config.lctl_dump or config.record:
3225 for_each_profile(node_db, prof_list, doCleanup)
3228 sys_set_timeout(timeout)
3229 sys_set_ptldebug(ptldebug)
3230 sys_set_subsystem(subsystem)
3231 sys_set_lustre_upcall(lustre_upcall)
3232 sys_set_portals_upcall(portals_upcall)
3234 for_each_profile(node_db, prof_list, doCleanup)
3235 for_each_profile(node_db, prof_list, doUnloadModules)
3239 # ugly hack, only need to run lctl commands for --dump
3240 if config.lctl_dump or config.record:
3241 sys_set_timeout(timeout)
3242 sys_set_lustre_upcall(lustre_upcall)
3243 for_each_profile(node_db, prof_list, doSetup)
3247 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3248 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3250 for_each_profile(node_db, prof_list, doLoadModules)
3252 sys_set_debug_path()
3253 sys_set_ptldebug(ptldebug)
3254 sys_set_subsystem(subsystem)
3255 script = config.gdb_script
3256 run(lctl.lctl, ' modules >', script)
3258 log ("The GDB module script is in", script)
3259 # pause, so user has time to break and
3262 sys_set_timeout(timeout)
3263 sys_set_lustre_upcall(lustre_upcall)
3264 sys_set_portals_upcall(portals_upcall)
3266 for_each_profile(node_db, prof_list, doSetup)
3269 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3270 tgt = lustreDB.lookup(tgt_uuid)
3272 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3273 new_uuid = get_active_target(tgt)
3275 raise Lustre.LconfError("doRecovery: no active target found for: " +
3277 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3279 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3281 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3283 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3286 lctl.disconnect(oldnet)
3287 except CommandError, e:
3288 log("recover: disconnect", nid_uuid, "failed: ")
3293 except CommandError, e:
3294 log("recover: connect failed")
3297 lctl.recover(client_uuid, net.nid_uuid)
3300 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3301 base = os.path.dirname(cmd)
3302 if development_mode():
3303 if not config.lustre:
3304 debug('using objdir module paths')
3305 config.lustre = (os.path.join(base, ".."))
3306 # normalize the portals dir, using command line arg if set
3308 portals_dir = config.portals
3309 dir = os.path.join(config.lustre, portals_dir)
3310 config.portals = dir
3311 debug('config.portals', config.portals)
3312 elif config.lustre and config.portals:
3314 # if --lustre and --portals, normalize portals
3315 # can ignore POTRALS_DIR here, since it is probly useless here
3316 config.portals = os.path.join(config.lustre, config.portals)
3317 debug('config.portals B', config.portals)
3319 def sysctl(path, val):
3320 debug("+ sysctl", path, val)
3324 fp = open(os.path.join('/proc/sys', path), 'w')
3331 def sys_set_debug_path():
3332 sysctl('portals/debug_path', config.debug_path)
3334 def sys_set_lustre_upcall(upcall):
3335 # the command overrides the value in the node config
3336 if config.lustre_upcall:
3337 upcall = config.lustre_upcall
3339 upcall = config.upcall
3341 lctl.set_lustre_upcall(upcall)
3343 def sys_set_portals_upcall(upcall):
3344 # the command overrides the value in the node config
3345 if config.portals_upcall:
3346 upcall = config.portals_upcall
3348 upcall = config.upcall
3350 sysctl('portals/upcall', upcall)
3352 def sys_set_timeout(timeout):
3353 # the command overrides the value in the node config
3354 if config.timeout and config.timeout > 0:
3355 timeout = config.timeout
3356 if timeout != None and timeout > 0:
3357 lctl.set_timeout(timeout)
3359 def sys_tweak_socknal ():
3360 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3361 if sys_get_branch() == '2.6':
3362 fp = open('/proc/meminfo')
3363 lines = fp.readlines()
3368 if a[0] == 'MemTotal:':
3370 debug("memtotal" + memtotal)
3371 if int(memtotal) < 262144:
3372 minfree = int(memtotal) / 16
3375 debug("+ minfree ", minfree)
3376 sysctl("vm/min_free_kbytes", minfree)
3377 if config.single_socket:
3378 sysctl("socknal/typed", 0)
3380 def sys_optimize_elan ():
3381 procfiles = ["/proc/elan/config/eventint_punt_loops",
3382 "/proc/qsnet/elan3/config/eventint_punt_loops",
3383 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3385 if os.access(p, os.W_OK):
3386 run ("echo 1 > " + p)
3388 def sys_set_ptldebug(ptldebug):
3390 ptldebug = config.ptldebug
3393 val = eval(ptldebug, ptldebug_names)
3394 val = "0x%x" % (val)
3395 sysctl('portals/debug', val)
3396 except NameError, e:
3399 def sys_set_subsystem(subsystem):
3400 if config.subsystem:
3401 subsystem = config.subsystem
3404 val = eval(subsystem, subsystem_names)
3405 val = "0x%x" % (val)
3406 sysctl('portals/subsystem_debug', val)
3407 except NameError, e:
3410 def sys_set_netmem_max(path, max):
3411 debug("setting", path, "to at least", max)
3419 fp = open(path, 'w')
3420 fp.write('%d\n' %(max))
3424 def sys_make_devices():
3425 if not os.access('/dev/portals', os.R_OK):
3426 run('mknod /dev/portals c 10 240')
3427 if not os.access('/dev/obd', os.R_OK):
3428 run('mknod /dev/obd c 10 241')
3431 # Add dir to the global PATH, if not already there.
3432 def add_to_path(new_dir):
3433 syspath = string.split(os.environ['PATH'], ':')
3434 if new_dir in syspath:
3436 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3438 def default_debug_path():
3439 path = '/tmp/lustre-log'
3440 if os.path.isdir('/r'):
3445 def default_gdb_script():
3446 script = '/tmp/ogdb'
3447 if os.path.isdir('/r'):
3448 return '/r' + script
3453 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3454 # ensure basic elements are in the system path
3455 def sanitise_path():
3456 for dir in DEFAULT_PATH:
3459 # global hack for the --select handling
3461 def init_select(args):
3462 # args = [service=nodeA,service2=nodeB service3=nodeC]
3465 list = string.split(arg, ',')
3467 srv, node = string.split(entry, '=')
3468 tgt_select[srv] = node
3470 def get_select(srv):
3471 if tgt_select.has_key(srv):
3472 return tgt_select[srv]
3476 FLAG = Lustre.Options.FLAG
3477 PARAM = Lustre.Options.PARAM
3478 INTPARAM = Lustre.Options.INTPARAM
3479 PARAMLIST = Lustre.Options.PARAMLIST
3481 ('verbose,v', "Print system commands as they are run"),
3482 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3483 ('config', "Cluster config name used for LDAP query", PARAM),
3484 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3485 ('node', "Load config for <nodename>", PARAM),
3486 ('cleanup,d', "Cleans up config. (Shutdown)"),
3487 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3489 ('single_socket', "socknal option: only use one socket instead of bundle",
3491 ('failover',"""Used to shut down without saving state.
3492 This will allow this node to "give up" a service to a
3493 another node for failover purposes. This will not
3494 be a clean shutdown.""",
3496 ('gdb', """Prints message after creating gdb module script
3497 and sleeps for 5 seconds."""),
3498 ('noexec,n', """Prints the commands and steps that will be run for a
3499 config without executing them. This can used to check if a
3500 config file is doing what it should be doing"""),
3501 ('nomod', "Skip load/unload module step."),
3502 ('nosetup', "Skip device setup/cleanup step."),
3503 ('reformat', "Reformat all devices (without question)"),
3504 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3505 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3506 ('clientoptions', "Additional options for Lustre", PARAM),
3507 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3509 ('write_conf', "Save all the client config information on mds."),
3510 ('record', "Write config information on mds."),
3511 ('record_log', "Name of config record log.", PARAM),
3512 ('record_device', "MDS device name that will record the config commands",
3514 ('root_squash', "MDS squash root to appointed uid",
3516 ('no_root_squash', "Don't squash root for appointed nid",
3518 ('minlevel', "Minimum level of services to configure/cleanup",
3520 ('maxlevel', """Maximum level of services to configure/cleanup
3521 Levels are aproximatly like:
3526 70 - mountpoint, echo_client, osc, mdc, lov""",
3528 ('lustre', """Base directory of lustre sources. This parameter will
3529 cause lconf to load modules from a source tree.""", PARAM),
3530 ('portals', """Portals source directory. If this is a relative path,
3531 then it is assumed to be relative to lustre. """, PARAM),
3532 ('timeout', "Set recovery timeout", INTPARAM),
3533 ('upcall', "Set both portals and lustre upcall script", PARAM),
3534 ('lustre_upcall', "Set lustre upcall script", PARAM),
3535 ('portals_upcall', "Set portals upcall script", PARAM),
3536 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3537 ('ptldebug', "Set the portals debug level", PARAM),
3538 ('subsystem', "Set the portals debug subsystem", PARAM),
3539 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3540 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3541 # Client recovery options
3542 ('recover', "Recover a device"),
3543 ('group', "The group of devices to configure or cleanup", PARAM),
3544 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3545 ('client_uuid', "The failed client (required for recovery)", PARAM),
3546 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3548 ('inactive', """The name of an inactive service, to be ignored during
3549 mounting (currently OST-only). Can be repeated.""",
3554 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3556 # in the upcall this is set to SIG_IGN
3557 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3559 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3561 config, args = cl.parse(sys.argv[1:])
3562 except Lustre.OptionError, e:
3566 setupModulePath(sys.argv[0])
3568 host = socket.gethostname()
3570 # the PRNG is normally seeded with time(), which is not so good for starting
3571 # time-synchronized clusters
3572 input = open('/dev/urandom', 'r')
3574 print 'Unable to open /dev/urandom!'
3576 seed = input.read(32)
3582 init_select(config.select)
3585 # allow config to be fetched via HTTP, but only with python2
3586 if sys.version[0] != '1' and args[0].startswith('http://'):
3589 config_file = urllib2.urlopen(args[0])
3590 except (urllib2.URLError, socket.error), err:
3591 if hasattr(err, 'args'):
3593 print "Could not access '%s': %s" %(args[0], err)
3595 elif not os.access(args[0], os.R_OK):
3596 print 'File not found or readable:', args[0]
3600 config_file = open(args[0], 'r')
3602 dom = xml.dom.minidom.parse(config_file)
3604 panic("%s does not appear to be a config file." % (args[0]))
3605 sys.exit(1) # make sure to die here, even in debug mode.
3607 CONFIG_FILE = args[0]
3608 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3609 if not config.config:
3610 config.config = os.path.basename(args[0])# use full path?
3611 if config.config[-4:] == '.xml':
3612 config.config = config.config[:-4]
3613 elif config.ldapurl:
3614 if not config.config:
3615 panic("--ldapurl requires --config name")
3616 dn = "config=%s,fs=lustre" % (config.config)
3617 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3618 elif config.ptldebug or config.subsystem:
3619 sys_set_ptldebug(None)
3620 sys_set_subsystem(None)
3623 print 'Missing config file or ldap URL.'
3624 print 'see lconf --help for command summary'
3627 toplustreDB = lustreDB
3629 ver = lustreDB.get_version()
3631 panic("No version found in config data, please recreate.")
3632 if ver != Lustre.CONFIG_VERSION:
3633 panic("Config version", ver, "does not match lconf version",
3634 Lustre.CONFIG_VERSION)
3638 node_list.append(config.node)
3641 node_list.append(host)
3642 node_list.append('localhost')
3644 debug("configuring for host: ", node_list)
3647 config.debug_path = config.debug_path + '-' + host
3648 config.gdb_script = config.gdb_script + '-' + host
3650 lctl = LCTLInterface('lctl')
3652 if config.lctl_dump:
3653 lctl.use_save_file(config.lctl_dump)
3656 if not (config.record_device and config.record_log):
3657 panic("When recording, both --record_log and --record_device must be specified.")
3658 lctl.clear_log(config.record_device, config.record_log)
3659 lctl.record(config.record_device, config.record_log)
3661 # init module manager
3662 mod_manager = kmod_manager(config.lustre, config.portals)
3664 doHost(lustreDB, node_list)
3666 if not config.record:
3671 process_updates(lustreDB, config.record_device, config.record_log)
3673 if __name__ == "__main__":
3676 except Lustre.LconfError, e:
3678 # traceback.print_exc(file=sys.stdout)
3680 except CommandError, e:
3684 if first_cleanup_error:
3685 sys.exit(first_cleanup_error)