3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = '../portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
96 "undefined" : (1 << 0),
106 "portals" : (1 << 10),
108 "pinger" : (1 << 12),
109 "filter" : (1 << 13),
114 "ptlrouter" : (1 << 18),
118 "confobd" : (1 << 22),
124 first_cleanup_error = 0
125 def cleanup_error(rc):
126 global first_cleanup_error
127 if not first_cleanup_error:
128 first_cleanup_error = rc
130 # ============================================================
131 # debugging and error funcs
133 def fixme(msg = "this feature"):
134 raise Lustre.LconfError, msg + ' not implemented yet.'
137 msg = string.join(map(str,args))
138 if not config.noexec:
139 raise Lustre.LconfError(msg)
144 msg = string.join(map(str,args))
149 print string.strip(s)
153 msg = string.join(map(str,args))
156 # ack, python's builtin int() does not support '0x123' syntax.
157 # eval can do it, although what a hack!
161 return eval(s, {}, {})
164 except SyntaxError, e:
165 raise ValueError("not a number")
167 raise ValueError("not a number")
169 # ============================================================
170 # locally defined exceptions
171 class CommandError (exceptions.Exception):
172 def __init__(self, cmd_name, cmd_err, rc=None):
173 self.cmd_name = cmd_name
174 self.cmd_err = cmd_err
179 if type(self.cmd_err) == types.StringType:
181 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
183 print "! %s: %s" % (self.cmd_name, self.cmd_err)
184 elif type(self.cmd_err) == types.ListType:
186 print "! %s (error %d):" % (self.cmd_name, self.rc)
188 print "! %s:" % (self.cmd_name)
189 for s in self.cmd_err:
190 print "> %s" %(string.strip(s))
195 # ============================================================
196 # handle daemons, like the acceptor
198 """ Manage starting and stopping a daemon. Assumes daemon manages
199 it's own pid file. """
201 def __init__(self, cmd):
207 log(self.command, "already running.")
209 self.path = find_prog(self.command)
211 panic(self.command, "not found.")
212 ret, out = runcmd(self.path +' '+ self.command_line())
214 raise CommandError(self.path, out, ret)
218 pid = self.read_pidfile()
221 log ("killing process", pid)
224 log("was unable to find pid of " + self.command)
225 #time.sleep(1) # let daemon die
227 log("unable to kill", self.command, e)
229 log("unable to kill", self.command)
232 pid = self.read_pidfile()
238 log("was unable to find pid of " + self.command)
245 def read_pidfile(self):
247 fp = open(self.pidfile(), 'r')
257 def clean_pidfile(self):
258 """ Remove a stale pidfile """
259 log("removing stale pidfile:", self.pidfile())
261 os.unlink(self.pidfile())
263 log(self.pidfile(), e)
265 class AcceptorHandler(DaemonHandler):
266 def __init__(self, port, net_type):
267 DaemonHandler.__init__(self, "acceptor")
272 return "/var/run/%s-%d.pid" % (self.command, self.port)
274 def command_line(self):
275 return string.join(map(str,(self.flags, self.port)))
279 # start the acceptors
281 if config.lctl_dump or config.record:
283 for port in acceptors.keys():
284 daemon = acceptors[port]
285 if not daemon.running():
288 def run_one_acceptor(port):
289 if config.lctl_dump or config.record:
291 if acceptors.has_key(port):
292 daemon = acceptors[port]
293 if not daemon.running():
296 panic("run_one_acceptor: No acceptor defined for port:", port)
298 def stop_acceptor(port):
299 if acceptors.has_key(port):
300 daemon = acceptors[port]
305 # ============================================================
306 # handle lctl interface
309 Manage communication with lctl
312 def __init__(self, cmd):
314 Initialize close by finding the lctl binary.
316 self.lctl = find_prog(cmd)
318 self.record_device = ''
321 debug('! lctl not found')
324 raise CommandError('lctl', "unable to find lctl binary.")
326 def use_save_file(self, file):
327 self.save_file = file
329 def record(self, dev_name, logname):
330 log("Recording log", logname, "on", dev_name)
331 self.record_device = dev_name
332 self.record_log = logname
334 def end_record(self):
335 log("End recording log", self.record_log, "on", self.record_device)
336 self.record_device = None
337 self.record_log = None
339 def set_nonblock(self, fd):
340 fl = fcntl.fcntl(fd, F_GETFL)
341 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
346 the cmds are written to stdin of lctl
347 lctl doesn't return errors when run in script mode, so
349 should modify command line to accept multiple commands, or
350 create complex command line options
354 cmds = '\n dump ' + self.save_file + '\n' + cmds
355 elif self.record_device:
359 %s""" % (self.record_device, self.record_log, cmds)
361 debug("+", cmd_line, cmds)
362 if config.noexec: return (0, [])
364 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
365 child.tochild.write(cmds + "\n")
366 child.tochild.close()
367 # print "LCTL:", cmds
369 # From "Python Cookbook" from O'Reilly
370 outfile = child.fromchild
371 outfd = outfile.fileno()
372 self.set_nonblock(outfd)
373 errfile = child.childerr
374 errfd = errfile.fileno()
375 self.set_nonblock(errfd)
377 outdata = errdata = ''
380 ready = select.select([outfd,errfd],[],[]) # Wait for input
381 if outfd in ready[0]:
382 outchunk = outfile.read()
383 if outchunk == '': outeof = 1
384 outdata = outdata + outchunk
385 if errfd in ready[0]:
386 errchunk = errfile.read()
387 if errchunk == '': erreof = 1
388 errdata = errdata + errchunk
389 if outeof and erreof: break
390 # end of "borrowed" code
393 if os.WIFEXITED(ret):
394 rc = os.WEXITSTATUS(ret)
397 if rc or len(errdata):
398 raise CommandError(self.lctl, errdata, rc)
401 def runcmd(self, *args):
403 run lctl using the command line
405 cmd = string.join(map(str,args))
406 debug("+", self.lctl, cmd)
407 rc, out = run(self.lctl, cmd)
409 raise CommandError(self.lctl, out, rc)
412 def clear_log(self, dev, log):
413 """ clear an existing log """
418 quit """ % (dev, log)
421 def root_squash(self, name, uid, nid):
425 quit""" % (name, uid, nid)
428 def network(self, net, nid):
433 quit """ % (net, nid)
437 def add_interface(self, net, ip, netmask = ""):
438 """ add an interface """
442 quit """ % (net, ip, netmask)
445 # delete an interface
446 def del_interface(self, net, ip):
447 """ delete an interface """
454 # create a new connection
455 def add_uuid(self, net_type, uuid, nid):
456 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
459 def add_peer(self, net_type, nid, hostaddr, port):
460 if net_type in ('tcp','openib','ra') and not config.lctl_dump:
465 nid, hostaddr, port )
467 elif net_type in ('iib',) and not config.lctl_dump:
474 elif net_type in ('vib',) and not config.lctl_dump:
482 def connect(self, srv):
483 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
484 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
486 hostaddr = string.split(srv.hostaddr[0], '/')[0]
487 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
490 def recover(self, dev_name, new_conn):
493 recover %s""" %(dev_name, new_conn)
496 # add a route to a range
497 def add_route(self, net, gw, lo, hi):
505 except CommandError, e:
509 def del_route(self, net, gw, lo, hi):
514 quit """ % (net, gw, lo, hi)
517 # add a route to a host
518 def add_route_host(self, net, uuid, gw, tgt):
519 self.add_uuid(net, uuid, tgt)
527 except CommandError, e:
531 # add a route to a range
532 def del_route_host(self, net, uuid, gw, tgt):
538 quit """ % (net, gw, tgt)
542 def del_peer(self, net_type, nid, hostaddr):
543 if net_type in ('tcp',) and not config.lctl_dump:
547 del_peer %s %s single_share
551 elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump:
555 del_peer %s single_share
560 # disconnect one connection
561 def disconnect(self, srv):
562 self.del_uuid(srv.nid_uuid)
563 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
565 hostaddr = string.split(srv.hostaddr[0], '/')[0]
566 self.del_peer(srv.net_type, srv.nid, hostaddr)
568 def del_uuid(self, uuid):
576 def disconnectAll(self, net):
584 def attach(self, type, name, uuid):
587 quit""" % (type, name, uuid)
590 def set_security(self, name, key, value):
594 quit""" % (name, key, value)
597 def setup(self, name, setup = ""):
601 quit""" % (name, setup)
604 def add_conn(self, name, conn_uuid):
608 quit""" % (name, conn_uuid)
612 # create a new device with lctl
613 def newdev(self, type, name, uuid, setup = ""):
614 self.attach(type, name, uuid);
616 self.setup(name, setup)
617 except CommandError, e:
618 self.cleanup(name, uuid, 0)
623 def cleanup(self, name, uuid, force, failover = 0):
624 if failover: force = 1
630 quit""" % (name, ('', 'force')[force],
631 ('', 'failover')[failover])
635 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
636 stripe_sz, stripe_off, pattern, devlist = None):
639 lov_setup %s %d %d %d %s %s
640 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
644 # add an OBD to a LOV
645 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
647 lov_modify_tgts add %s %s %s %s
648 quit""" % (name, obd_uuid, index, gen)
652 def lmv_setup(self, name, uuid, desc_uuid, devlist):
656 quit""" % (name, uuid, desc_uuid, devlist)
659 # delete an OBD from a LOV
660 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
662 lov_modify_tgts del %s %s %s %s
663 quit""" % (name, obd_uuid, index, gen)
667 def deactivate(self, name):
675 def dump(self, dump_file):
678 quit""" % (dump_file)
681 # get list of devices
682 def device_list(self):
683 devices = '/proc/fs/lustre/devices'
685 if os.access(devices, os.R_OK):
687 fp = open(devices, 'r')
695 def lustre_version(self):
696 rc, out = self.runcmd('version')
700 def mount_option(self, profile, osc, mdc):
702 mount_option %s %s %s
703 quit""" % (profile, osc, mdc)
706 # delete mount options
707 def del_mount_option(self, profile):
713 def set_timeout(self, timeout):
719 def set_lustre_upcall(self, upcall):
724 # ============================================================
725 # Various system-level functions
726 # (ideally moved to their own module)
728 # Run a command and return the output and status.
729 # stderr is sent to /dev/null, could use popen3 to
730 # save it if necessary
733 if config.noexec: return (0, [])
734 f = os.popen(cmd + ' 2>&1')
744 cmd = string.join(map(str,args))
747 # Run a command in the background.
748 def run_daemon(*args):
749 cmd = string.join(map(str,args))
751 if config.noexec: return 0
752 f = os.popen(cmd + ' 2>&1')
760 # Determine full path to use for an external command
761 # searches dirname(argv[0]) first, then PATH
763 syspath = string.split(os.environ['PATH'], ':')
764 cmdpath = os.path.dirname(sys.argv[0])
765 syspath.insert(0, cmdpath);
767 syspath.insert(0, os.path.join(config.portals, 'utils/'))
769 prog = os.path.join(d,cmd)
770 if os.access(prog, os.X_OK):
774 # Recursively look for file starting at base dir
775 def do_find_file(base, mod):
776 fullname = os.path.join(base, mod)
777 if os.access(fullname, os.R_OK):
779 for d in os.listdir(base):
780 dir = os.path.join(base,d)
781 if os.path.isdir(dir):
782 module = do_find_file(dir, mod)
786 # is the path a block device?
793 return stat.S_ISBLK(s[stat.ST_MODE])
795 # find the journal device from mkfs options
801 while i < len(x) - 1:
802 if x[i] == '-J' and x[i+1].startswith('device='):
808 # build fs according to type
810 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
816 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
818 # devsize is in 1k, and fs block count is in 4k
819 block_cnt = devsize/4
821 if fstype in ('ext3', 'extN', 'ldiskfs'):
822 # ext3 journal size is in megabytes
823 # but don't set jsize if mkfsoptions indicates a separate journal device
824 if jsize == 0 and jdev(mkfsoptions) == '':
826 if not is_block(dev):
827 ret, out = runcmd("ls -l %s" %dev)
828 devsize = int(string.split(out[0])[4]) / 1024
830 # sfdisk works for symlink, hardlink, and realdev
831 ret, out = runcmd("sfdisk -s %s" %dev)
833 devsize = int(out[0])
835 # sfdisk -s will fail for too large block device,
836 # then, read the size of partition from /proc/partitions
838 # get the realpath of the device
839 # it may be the real device, such as /dev/hda7
840 # or the hardlink created via mknod for a device
841 if 'realpath' in dir(os.path):
842 real_dev = os.path.realpath(dev)
846 while os.path.islink(real_dev) and (link_count < 20):
847 link_count = link_count + 1
848 dev_link = os.readlink(real_dev)
849 if os.path.isabs(dev_link):
852 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
854 panic("Entountered too many symbolic links resolving block device:", dev)
856 # get the major and minor number of the realpath via ls
857 # it seems python(os.stat) does not return
858 # the st_rdev member of the stat structure
859 ret, out = runcmd("ls -l %s" %real_dev)
860 major = string.split(string.split(out[0])[4], ",")[0]
861 minor = string.split(out[0])[5]
863 # get the devsize from /proc/partitions with the major and minor number
864 ret, out = runcmd("cat /proc/partitions")
867 if string.split(line)[0] == major and string.split(line)[1] == minor:
868 devsize = int(string.split(line)[2])
871 if devsize > 1024 * 1024:
872 jsize = ((devsize / 102400) * 4)
875 if jsize: jopt = "-J size=%d" %(jsize,)
876 if isize: iopt = "-I %d" %(isize,)
877 mkfs = 'mkfs.ext2 -j -b 4096 '
878 if not isblock or config.force:
880 if jdev(mkfsoptions) != '':
881 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
883 jmkfs = jmkfs + '-F '
884 jmkfs = jmkfs + jdev(mkfsoptions)
885 (ret, out) = run (jmkfs)
887 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
888 elif fstype == 'reiserfs':
889 # reiserfs journal size is in blocks
890 if jsize: jopt = "--journal_size %d" %(jsize,)
891 mkfs = 'mkreiserfs -ff'
893 panic('unsupported fs type: ', fstype)
895 if config.mkfsoptions != None:
896 mkfs = mkfs + ' ' + config.mkfsoptions
897 if mkfsoptions != None:
898 mkfs = mkfs + ' ' + mkfsoptions
899 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
901 panic("Unable to build fs:", dev, string.join(out))
902 # enable hash tree indexing on fsswe
903 if fstype in ('ext3', 'extN', 'ldiskfs'):
904 htree = 'echo "feature FEATURE_C5" | debugfs -w'
905 (ret, out) = run (htree, dev)
907 panic("Unable to enable htree:", dev)
909 # some systems use /dev/loopN, some /dev/loop/N
913 if not os.access(loop + str(0), os.R_OK):
915 if not os.access(loop + str(0), os.R_OK):
916 panic ("can't access loop devices")
919 # find loop device assigned to the file
920 def find_assigned_loop(file):
922 for n in xrange(0, MAX_LOOP_DEVICES):
924 if os.access(dev, os.R_OK):
925 (stat, out) = run('losetup', dev)
926 if out and stat == 0:
927 m = re.search(r'\((.*)\)', out[0])
928 if m and file == m.group(1):
932 # find free loop device
933 def find_free_loop(file):
936 # find next free loop
937 for n in xrange(0, MAX_LOOP_DEVICES):
939 if os.access(dev, os.R_OK):
940 (stat, out) = run('losetup', dev)
945 # create file if necessary and assign the first free loop device
946 def init_loop(file, size, fstype, journal_size, inode_size,
947 mkfsoptions, reformat, autoformat, backfstype, backfile):
950 realfstype = backfstype
951 if is_block(backfile):
952 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
953 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
959 dev = find_assigned_loop(realfile)
961 print 'WARNING: file', realfile, 'already mapped to', dev
964 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
965 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
967 panic("Unable to create backing store:", realfile)
968 mkfs(realfile, size, realfstype, journal_size, inode_size,
969 mkfsoptions, isblock=0)
971 dev = find_free_loop(realfile)
973 print "attach " + realfile + " <-> " + dev
974 run('losetup', dev, realfile)
977 print "out of loop devices"
980 # undo loop assignment
981 def clean_loop(dev, fstype, backfstype, backdev):
986 if not is_block(realfile):
987 dev = find_assigned_loop(realfile)
989 print "detach " + dev + " <-> " + realfile
990 ret, out = run('losetup -d', dev)
992 log('unable to clean loop device:', dev, 'for file:', realfile)
995 # finilizes passed device
996 def clean_dev(dev, fstype, backfstype, backdev):
997 if fstype == 'smfs' or not is_block(dev):
998 clean_loop(dev, fstype, backfstype, backdev)
1000 # determine if dev is formatted as a <fstype> filesystem
1001 def need_format(fstype, dev):
1002 # FIXME don't know how to implement this
1005 # initialize a block device if needed
1006 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
1007 inode_size, mkfsoptions, backfstype, backdev):
1011 if fstype == 'smfs' or not is_block(dev):
1012 dev = init_loop(dev, size, fstype, journal_size, inode_size,
1013 mkfsoptions, reformat, autoformat, backfstype, backdev)
1014 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
1015 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
1018 # panic("device:", dev,
1019 # "not prepared, and autoformat is not set.\n",
1020 # "Rerun with --reformat option to format ALL filesystems")
1025 """lookup IP address for an interface"""
1026 rc, out = run("/sbin/ifconfig", iface)
1029 addr = string.split(out[1])[1]
1030 ip = string.split(addr, ':')[1]
1033 def def_mount_options(fstype, target):
1034 """returns deafult mount options for passed fstype and target (mds, ost)"""
1035 if fstype == 'ext3' or fstype == 'ldiskfs':
1036 mountfsoptions = "errors=remount-ro"
1037 if target == 'ost' and sys_get_branch() == '2.4':
1038 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1039 return mountfsoptions
1042 def sys_get_elan_position_file():
1043 procfiles = ["/proc/elan/device0/position",
1044 "/proc/qsnet/elan4/device0/position",
1045 "/proc/qsnet/elan3/device0/position"]
1047 if os.access(p, os.R_OK):
1051 def sys_get_local_nid(net_type, wildcard, cluster_id):
1052 """Return the local nid."""
1054 if sys_get_elan_position_file():
1055 local = sys_get_local_address('elan', '*', cluster_id)
1057 local = sys_get_local_address(net_type, wildcard, cluster_id)
1060 def sys_get_local_address(net_type, wildcard, cluster_id):
1061 """Return the local address for the network type."""
1063 if net_type in ('tcp','openib','iib','vib','ra'):
1065 iface, star = string.split(wildcard, ':')
1066 local = if2addr(iface)
1068 panic ("unable to determine ip for:", wildcard)
1070 host = socket.gethostname()
1071 local = socket.gethostbyname(host)
1072 elif net_type == 'elan':
1073 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1074 f = sys_get_elan_position_file()
1076 panic ("unable to determine local Elan ID")
1079 lines = fp.readlines()
1083 if a[0] == 'NodeId':
1087 nid = my_int(cluster_id) + my_int(elan_id)
1088 local = "%d" % (nid)
1089 except ValueError, e:
1093 elif net_type == 'lo':
1094 fixme("automatic local address for loopback")
1095 elif net_type == 'gm':
1096 fixme("automatic local address for GM")
1100 def sys_get_branch():
1101 """Returns kernel release"""
1103 fp = open('/proc/sys/kernel/osrelease')
1104 lines = fp.readlines()
1108 version = string.split(l)
1109 a = string.split(version[0], '.')
1110 return a[0] + '.' + a[1]
1115 # XXX: instead of device_list, ask for $name and see what we get
1116 def is_prepared(name):
1117 """Return true if a device exists for the name"""
1118 if config.lctl_dump:
1120 if (config.noexec or config.record) and config.cleanup:
1123 # expect this format:
1124 # 1 UP ldlm ldlm ldlm_UUID 2
1125 out = lctl.device_list()
1127 if name == string.split(s)[3]:
1129 except CommandError, e:
1133 def net_is_prepared():
1134 """If the any device exists, then assume that all networking
1135 has been configured"""
1136 out = lctl.device_list()
1139 def fs_is_mounted(path):
1140 """Return true if path is a mounted lustre filesystem"""
1142 fp = open('/proc/mounts')
1143 lines = fp.readlines()
1147 if a[1] == path and a[2] == 'lustre_lite':
1153 def kmod_find(src_dir, dev_dir, modname):
1154 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1155 for modext in '.ko', '.o':
1156 module = modbase + modext
1158 if os.access(module, os.R_OK):
1164 def kmod_info(modname):
1165 """Returns reference count for passed module name."""
1167 fp = open('/proc/modules')
1168 lines = fp.readlines()
1171 # please forgive my tired fingers for this one
1172 ret = filter(lambda word, mod = modname: word[0] == mod,
1173 map(lambda line: string.split(line), lines))
1177 except Exception, e:
1181 """Presents kernel module"""
1182 def __init__(self, src_dir, dev_dir, name):
1183 self.src_dir = src_dir
1184 self.dev_dir = dev_dir
1187 # FIXME we ignore the failure of loading gss module, because we might
1188 # don't need it at all.
1191 log ('loading module:', self.name, 'srcdir',
1192 self.src_dir, 'devdir', self.dev_dir)
1194 module = kmod_find(self.src_dir, self.dev_dir,
1196 if not module and self.name != 'ptlrpcs_gss':
1197 panic('module not found:', self.name)
1198 (rc, out) = run('/sbin/insmod', module)
1200 if self.name == 'ptlrpcs_gss':
1201 print "Warning: not support gss security!"
1203 raise CommandError('insmod', out, rc)
1205 (rc, out) = run('/sbin/modprobe', self.name)
1207 if self.name == 'ptlrpcs_gss':
1208 print "Warning: not support gss security!"
1210 raise CommandError('modprobe', out, rc)
1214 log('unloading module:', self.name)
1215 (rc, out) = run('/sbin/rmmod', self.name)
1217 log('unable to unload module:', self.name +
1218 "(" + self.refcount() + ")")
1222 """Returns module info if any."""
1223 return kmod_info(self.name)
1226 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1233 """Returns module refcount."""
1240 """Returns 1 if module is used, otherwise 0 is returned."""
1246 if users and users != '(unused)' and users != '-':
1254 """Returns 1 if module is busy, otherwise 0 is returned."""
1255 if self.loaded() and (self.used() or self.refcount() != '0'):
1261 """Manage kernel modules"""
1262 def __init__(self, lustre_dir, portals_dir):
1263 self.lustre_dir = lustre_dir
1264 self.portals_dir = portals_dir
1265 self.kmodule_list = []
1267 def find_module(self, modname):
1268 """Find module by module name"""
1269 for mod in self.kmodule_list:
1270 if mod.name == modname:
1274 def add_portals_module(self, dev_dir, modname):
1275 """Append a module to list of modules to load."""
1277 mod = self.find_module(modname)
1279 mod = kmod(self.portals_dir, dev_dir, modname)
1280 self.kmodule_list.append(mod)
1282 def add_lustre_module(self, dev_dir, modname):
1283 """Append a module to list of modules to load."""
1285 mod = self.find_module(modname)
1287 mod = kmod(self.lustre_dir, dev_dir, modname)
1288 self.kmodule_list.append(mod)
1290 def load_modules(self):
1291 """Load all the modules in the list in the order they appear."""
1292 for mod in self.kmodule_list:
1293 if mod.loaded() and not config.noexec:
1297 def cleanup_modules(self):
1298 """Unload the modules in the list in reverse order."""
1299 rev = self.kmodule_list
1302 if (not mod.loaded() or mod.busy()) and not config.noexec:
1305 if mod.name == 'portals' and config.dump:
1306 lctl.dump(config.dump)
1309 # ============================================================
1310 # Classes to prepare and cleanup the various objects
1313 """ Base class for the rest of the modules. The default cleanup method is
1314 defined here, as well as some utilitiy funcs.
1316 def __init__(self, module_name, db):
1318 self.module_name = module_name
1319 self.name = self.db.getName()
1320 self.uuid = self.db.getUUID()
1324 def info(self, *args):
1325 msg = string.join(map(str,args))
1326 print self.module_name + ":", self.name, self.uuid, msg
1329 """ default cleanup, used for most modules """
1332 lctl.cleanup(self.name, self.uuid, config.force)
1333 except CommandError, e:
1334 log(self.module_name, "cleanup failed: ", self.name)
1338 def add_module(self, manager):
1339 """Adds all needed modules in the order they appear."""
1342 def safe_to_clean(self):
1345 def safe_to_clean_modules(self):
1346 return self.safe_to_clean()
1348 class Network(Module):
1349 def __init__(self,db):
1350 Module.__init__(self, 'NETWORK', db)
1351 self.net_type = self.db.get_val('nettype')
1352 self.nid = self.db.get_val('nid', '*')
1353 self.cluster_id = self.db.get_val('clusterid', "0")
1354 self.port = self.db.get_val_int('port', 0)
1357 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1359 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1360 self.generic_nid = 1
1361 debug("nid:", self.nid)
1363 self.generic_nid = 0
1365 self.nid_uuid = self.nid_to_uuid(self.nid)
1366 self.hostaddr = self.db.get_hostaddr()
1367 if len(self.hostaddr) == 0:
1368 self.hostaddr.append(self.nid)
1369 if '*' in self.hostaddr[0]:
1370 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1371 if not self.hostaddr[0]:
1372 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1373 debug("hostaddr:", self.hostaddr[0])
1375 def add_module(self, manager):
1376 manager.add_portals_module("libcfs", 'libcfs')
1377 manager.add_portals_module("portals", 'portals')
1378 if node_needs_router():
1379 manager.add_portals_module("router", 'kptlrouter')
1380 if self.net_type == 'tcp':
1381 manager.add_portals_module("knals/socknal", 'ksocknal')
1382 if self.net_type == 'elan':
1383 manager.add_portals_module("knals/qswnal", 'kqswnal')
1384 if self.net_type == 'gm':
1385 manager.add_portals_module("knals/gmnal", 'kgmnal')
1386 if self.net_type == 'openib':
1387 manager.add_portals_module("knals/openibnal", 'kopenibnal')
1388 if self.net_type == 'iib':
1389 manager.add_portals_module("knals/iibnal", 'kiibnal')
1390 if self.net_type == 'vib':
1391 self.add_portals_module("knals/vibnal", 'kvibnal')
1392 if self.net_type == 'lo':
1393 manager.add_portals_module("knals/lonal", 'klonal')
1394 if self.net_type == 'ra':
1395 manager.add_portals_module("knals/ranal", 'kranal')
1397 def nid_to_uuid(self, nid):
1398 return "NID_%s_UUID" %(nid,)
1401 if not config.record and net_is_prepared():
1403 self.info(self.net_type, self.nid, self.port)
1404 if not (config.record and self.generic_nid):
1405 lctl.network(self.net_type, self.nid)
1406 if self.net_type == 'tcp':
1408 for hostaddr in self.db.get_hostaddr():
1409 ip = string.split(hostaddr, '/')[0]
1410 if len(string.split(hostaddr, '/')) == 2:
1411 netmask = string.split(hostaddr, '/')[1]
1414 lctl.add_interface(self.net_type, ip, netmask)
1415 if self.net_type == 'elan':
1417 if self.port and node_is_router():
1418 run_one_acceptor(self.port)
1419 self.connect_peer_gateways()
1421 def connect_peer_gateways(self):
1422 for router in self.db.lookup_class('node'):
1423 if router.get_val_int('router', 0):
1424 for netuuid in router.get_networks():
1425 net = self.db.lookup(netuuid)
1427 if (gw.cluster_id == self.cluster_id and
1428 gw.net_type == self.net_type):
1429 if gw.nid != self.nid:
1432 def disconnect_peer_gateways(self):
1433 for router in self.db.lookup_class('node'):
1434 if router.get_val_int('router', 0):
1435 for netuuid in router.get_networks():
1436 net = self.db.lookup(netuuid)
1438 if (gw.cluster_id == self.cluster_id and
1439 gw.net_type == self.net_type):
1440 if gw.nid != self.nid:
1443 except CommandError, e:
1444 print "disconnect failed: ", self.name
1448 def safe_to_clean(self):
1449 return not net_is_prepared()
1452 self.info(self.net_type, self.nid, self.port)
1454 stop_acceptor(self.port)
1455 if node_is_router():
1456 self.disconnect_peer_gateways()
1457 if self.net_type == 'tcp':
1458 for hostaddr in self.db.get_hostaddr():
1459 ip = string.split(hostaddr, '/')[0]
1460 lctl.del_interface(self.net_type, ip)
1462 def correct_level(self, level, op=None):
1465 class RouteTable(Module):
1466 def __init__(self,db):
1467 Module.__init__(self, 'ROUTES', db)
1469 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1471 # only setup connections for tcp, openib, and iib NALs
1473 if not net_type in ('tcp','openib','iib','vib','ra'):
1476 # connect to target if route is to single node and this node is the gw
1477 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1478 if not local_cluster(net_type, tgt_cluster_id):
1479 panic("target", lo, " not on the local cluster")
1480 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1481 # connect to gateway if this node is not the gw
1482 elif (local_cluster(net_type, gw_cluster_id)
1483 and not local_interface(net_type, gw_cluster_id, gw)):
1484 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1489 panic("no server for nid", lo)
1492 return Network(srvdb)
1495 if not config.record and net_is_prepared():
1498 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1499 lctl.add_route(net_type, gw, lo, hi)
1500 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1504 def safe_to_clean(self):
1505 return not net_is_prepared()
1508 if net_is_prepared():
1509 # the network is still being used, don't clean it up
1511 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1512 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1515 lctl.disconnect(srv)
1516 except CommandError, e:
1517 print "disconnect failed: ", self.name
1522 lctl.del_route(net_type, gw, lo, hi)
1523 except CommandError, e:
1524 print "del_route failed: ", self.name
1528 class Management(Module):
1529 def __init__(self, db):
1530 Module.__init__(self, 'MGMT', db)
1532 def add_module(self, manager):
1533 manager.add_lustre_module('lvfs', 'lvfs')
1534 manager.add_lustre_module('obdclass', 'obdclass')
1535 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1536 manager.add_lustre_module('mgmt', 'mgmt_svc')
1539 if not config.record and is_prepared(self.name):
1542 lctl.newdev("mgmt", self.name, self.uuid)
1544 def safe_to_clean(self):
1548 if is_prepared(self.name):
1549 Module.cleanup(self)
1551 def correct_level(self, level, op=None):
1554 # This is only needed to load the modules; the LDLM device
1555 # is now created automatically.
1557 def __init__(self,db):
1558 Module.__init__(self, 'LDLM', db)
1560 def add_module(self, manager):
1561 manager.add_lustre_module('lvfs', 'lvfs')
1562 manager.add_lustre_module('obdclass', 'obdclass')
1563 manager.add_lustre_module('sec', 'ptlrpcs')
1564 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1565 manager.add_lustre_module('sec/gss', 'ptlrpcs_gss')
1573 def correct_level(self, level, op=None):
1577 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1578 Module.__init__(self, 'LOV', db)
1579 if name_override != None:
1580 self.name = "lov_%s" % name_override
1581 self.mds_uuid = self.db.get_first_ref('mds')
1582 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1583 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1584 self.pattern = self.db.get_val_int('stripepattern', 0)
1585 self.devlist = self.db.get_lov_tgts('lov_tgt')
1586 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1589 self.desc_uuid = self.uuid
1590 self.uuid = generate_client_uuid(self.name)
1591 self.fs_name = fs_name
1593 self.config_only = 1
1595 self.config_only = None
1596 mds = self.db.lookup(self.mds_uuid)
1597 self.mds_name = mds.getName()
1598 for (obd_uuid, index, gen, active) in self.devlist:
1601 self.obdlist.append(obd_uuid)
1602 obd = self.db.lookup(obd_uuid)
1603 osc = get_osc(obd, self.uuid, fs_name)
1605 self.osclist.append((osc, index, gen, active))
1607 panic('osc not found:', obd_uuid)
1613 if not config.record and is_prepared(self.name):
1615 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1616 self.stripe_off, self.pattern, self.devlist,
1618 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1619 self.stripe_sz, self.stripe_off, self.pattern,
1620 string.join(self.obdlist))
1621 for (osc, index, gen, active) in self.osclist:
1622 target_uuid = osc.target_uuid
1624 # Only ignore connect failures with --force, which
1625 # isn't implemented here yet.
1627 osc.prepare(ignore_connect_failure=0)
1628 except CommandError, e:
1629 print "Error preparing OSC %s\n" % osc.uuid
1631 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1634 for (osc, index, gen, active) in self.osclist:
1635 target_uuid = osc.target_uuid
1637 if is_prepared(self.name):
1638 Module.cleanup(self)
1639 if self.config_only:
1640 panic("Can't clean up config_only LOV ", self.name)
1642 def add_module(self, manager):
1643 if self.config_only:
1644 panic("Can't load modules for config_only LOV ", self.name)
1645 for (osc, index, gen, active) in self.osclist:
1646 osc.add_module(manager)
1648 manager.add_lustre_module('lov', 'lov')
1650 def correct_level(self, level, op=None):
1654 def __init__(self, db, uuid, fs_name, name_override = None):
1655 Module.__init__(self, 'LMV', db)
1656 if name_override != None:
1657 self.name = "lmv_%s" % name_override
1659 self.devlist = self.db.get_lmv_tgts('lmv_tgt')
1660 if self.devlist == None:
1661 self.devlist = self.db.get_refs('mds')
1664 self.desc_uuid = self.uuid
1666 self.fs_name = fs_name
1667 for mds_uuid in self.devlist:
1668 mds = self.db.lookup(mds_uuid)
1670 panic("MDS not found!")
1671 mdc = MDC(mds, self.uuid, fs_name)
1673 self.mdclist.append(mdc)
1675 panic('mdc not found:', mds_uuid)
1678 if is_prepared(self.name):
1682 for mdc in self.mdclist:
1684 # Only ignore connect failures with --force, which
1685 # isn't implemented here yet.
1686 mdc.prepare(ignore_connect_failure=0)
1687 except CommandError, e:
1688 print "Error preparing LMV %s\n" % mdc.uuid
1691 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1692 string.join(self.devlist))
1695 for mdc in self.mdclist:
1697 if is_prepared(self.name):
1698 Module.cleanup(self)
1700 def add_module(self, manager):
1701 for mdc in self.mdclist:
1702 mdc.add_module(manager)
1704 manager.add_lustre_module('lmv', 'lmv')
1706 def correct_level(self, level, op=None):
1709 class MDSDEV(Module):
1710 def __init__(self,db):
1711 Module.__init__(self, 'MDSDEV', db)
1712 self.devpath = self.db.get_val('devpath','')
1713 self.backdevpath = self.db.get_val('backdevpath','')
1714 self.size = self.db.get_val_int('devsize', 0)
1715 self.journal_size = self.db.get_val_int('journalsize', 0)
1716 self.fstype = self.db.get_val('fstype', '')
1717 self.backfstype = self.db.get_val('backfstype', '')
1718 self.nspath = self.db.get_val('nspath', '')
1719 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1720 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1721 self.obdtype = self.db.get_val('obdtype', '')
1722 self.root_squash = self.db.get_val('root_squash', '')
1723 self.no_root_squash = self.db.get_val('no_root_squash', '')
1724 # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
1725 target_uuid = self.db.get_first_ref('target')
1726 self.mds = self.db.lookup(target_uuid)
1727 self.name = self.mds.getName()
1728 self.client_uuids = self.mds.get_refs('client')
1733 lmv_uuid = self.db.get_first_ref('lmv')
1734 if lmv_uuid != None:
1735 self.lmv = self.db.lookup(lmv_uuid)
1736 if self.lmv != None:
1737 self.client_uuids = self.lmv.get_refs('client')
1739 # FIXME: if fstype not set, then determine based on kernel version
1740 self.format = self.db.get_val('autoformat', "no")
1741 if self.mds.get_val('failover', 0):
1742 self.failover_mds = 'f'
1744 self.failover_mds = 'n'
1745 active_uuid = get_active_target(self.mds)
1747 panic("No target device found:", target_uuid)
1748 if active_uuid == self.uuid:
1752 if self.active and config.group and config.group != self.mds.get_val('group'):
1755 # default inode inode for case when neither LOV either
1756 # LMV is accessible.
1757 self.inode_size = 256
1759 inode_size = self.db.get_val_int('inodesize', 0)
1760 if not inode_size == 0:
1761 self.inode_size = inode_size
1763 # find the LOV for this MDS
1764 lovconfig_uuid = self.mds.get_first_ref('lovconfig')
1765 if lovconfig_uuid or self.lmv != None:
1766 if self.lmv != None:
1767 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1768 lovconfig = self.lmv.lookup(lovconfig_uuid)
1769 lov_uuid = lovconfig.get_first_ref('lov')
1770 if lov_uuid == None:
1771 panic(self.mds.getName() + ": No LOV found for lovconfig ",
1774 lovconfig = self.mds.lookup(lovconfig_uuid)
1775 lov_uuid = lovconfig.get_first_ref('lov')
1776 if lov_uuid == None:
1777 panic(self.mds.getName() + ": No LOV found for lovconfig ",
1780 if self.lmv != None:
1781 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1782 lovconfig = self.lmv.lookup(lovconfig_uuid)
1783 lov_uuid = lovconfig.get_first_ref('lov')
1785 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, self.name,
1788 # default stripe count controls default inode_size
1789 stripe_count = lov.stripe_cnt
1790 if stripe_count > 77:
1791 self.inode_size = 4096
1792 elif stripe_count > 35:
1793 self.inode_size = 2048
1794 elif stripe_count > 13:
1795 self.inode_size = 1024
1796 elif stripe_count > 3:
1797 self.inode_size = 512
1799 self.inode_size = 256
1801 self.target_dev_uuid = self.uuid
1802 self.uuid = target_uuid
1805 if self.lmv != None:
1806 client_uuid = self.name + "_lmv_UUID"
1807 self.master = LMV(self.lmv, client_uuid,
1808 self.name, self.name)
1810 def add_module(self, manager):
1812 manager.add_lustre_module('mdc', 'mdc')
1813 manager.add_lustre_module('osc', 'osc')
1814 manager.add_lustre_module('ost', 'ost')
1815 manager.add_lustre_module('lov', 'lov')
1816 manager.add_lustre_module('mds', 'mds')
1818 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
1819 manager.add_lustre_module(self.fstype, self.fstype)
1822 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
1824 # if fstype is smfs, then we should also take care about backing
1826 if self.fstype == 'smfs':
1827 manager.add_lustre_module(self.backfstype, self.backfstype)
1828 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
1830 for option in string.split(self.mountfsoptions, ','):
1831 if option == 'snap':
1832 if not self.fstype == 'smfs':
1833 panic("mountoptions has 'snap', but fstype is not smfs.")
1834 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
1835 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
1838 if self.master != None:
1839 self.master.add_module(manager)
1841 def get_mount_options(self, blkdev):
1842 options = def_mount_options(self.fstype, 'mds')
1844 if config.mountfsoptions:
1846 options = "%s,%s" %(options, config.mountfsoptions)
1848 options = config.mountfsoptions
1849 if self.mountfsoptions:
1850 options = "%s,%s" %(options, self.mountfsoptions)
1852 if self.mountfsoptions:
1854 options = "%s,%s" %(options, self.mountfsoptions)
1856 options = self.mountfsoptions
1858 if self.fstype == 'smfs':
1860 options = "%s,type=%s,dev=%s" %(options,
1861 self.backfstype, blkdev)
1863 options = "type=%s,dev=%s" %(self.backfstype, blkdev)
1867 if not config.record and is_prepared(self.name):
1870 debug(self.uuid, "not active")
1873 # run write_conf automatically, if --reformat used
1878 if self.master != None:
1879 self.master.prepare()
1881 # never reformat here
1882 blkdev = block_dev(self.devpath, self.size, self.fstype, 0,
1883 self.format, self.journal_size, self.inode_size,
1884 self.mkfsoptions, self.backfstype, self.backdevpath)
1886 if not is_prepared('MDT'):
1887 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
1889 if self.fstype == 'smfs':
1890 realdev = self.fstype
1894 if self.obdtype == None:
1895 self.obdtype = 'dumb'
1897 if self.master == None:
1898 master_name = 'dumb'
1900 master_name = self.master.name
1902 if self.client_uuids == None:
1903 profile_name = 'dumb'
1905 profile_name = self.name
1907 mountfsoptions = self.get_mount_options(blkdev)
1909 self.info("mds", realdev, mountfsoptions, self.fstype, self.size,
1910 self.format, master_name, profile_name, self.obdtype)
1912 lctl.attach("mds", self.name, self.uuid)
1913 if config.mds_mds_sec:
1914 lctl.set_security(self.name, "mds_mds_sec", config.mds_mds_sec)
1915 if config.mds_ost_sec:
1916 lctl.set_security(self.name, "mds_ost_sec", config.mds_ost_sec)
1918 lctl.setup(self.name, setup = "%s %s %s %s %s %s" %(realdev,
1919 self.fstype, profile_name, mountfsoptions,
1920 master_name, self.obdtype))
1922 if development_mode():
1923 procentry = "/proc/fs/lustre/mds/lsd_upcall"
1924 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
1925 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
1926 print "MDS Warning: failed to set lsd cache upcall"
1928 run("echo ", upcall, " > ", procentry)
1930 except CommandError, e:
1932 panic("MDS is missing the config log. Need to run " +
1933 "lconf --write_conf.")
1937 if config.root_squash == None:
1938 config.root_squash = self.root_squash
1939 if config.no_root_squash == None:
1940 config.no_root_squash = self.no_root_squash
1941 if config.root_squash:
1942 if config.no_root_squash:
1943 nsnid = config.no_root_squash
1946 lctl.root_squash(self.name, config.root_squash, nsnid)
1948 def write_conf(self):
1949 if not self.client_uuids:
1953 if not is_prepared(self.name):
1954 blkdev = block_dev(self.devpath, self.size, self.fstype,
1955 config.reformat, self.format, self.journal_size,
1956 self.inode_size, self.mkfsoptions,
1957 self.backfstype, self.backdevpath)
1959 if self.fstype == 'smfs':
1960 realdev = self.fstype
1964 # Even for writing logs we mount mds with supplied mount options
1965 # because it will not mount smfs (if used) otherwise.
1966 mountfsoptions = self.get_mount_options(blkdev)
1968 if self.obdtype == None:
1969 self.obdtype = 'dumb'
1971 self.info("mds", realdev, mountfsoptions, self.fstype, self.size,
1972 self.format, "dumb", "dumb", self.obdtype)
1974 lctl.newdev("mds", self.name, self.uuid,
1975 setup ="%s %s %s %s %s %s" %(realdev, self.fstype,
1976 'dumb', mountfsoptions,
1977 'dumb', self.obdtype))
1980 # record logs for all MDS clients
1981 for obd_uuid in self.client_uuids:
1982 log("recording client:", obd_uuid)
1984 client_uuid = generate_client_uuid(self.name)
1985 client = VOSC(self.db.lookup(obd_uuid), client_uuid,
1986 self.name, self.name)
1988 lctl.clear_log(self.name, self.name)
1989 lctl.record(self.name, self.name)
1991 lctl.mount_option(self.name, client.get_name(), "")
1993 process_updates(self.db, self.name, self.name, client)
1996 lctl.clear_log(self.name, self.name + '-clean')
1997 lctl.record(self.name, self.name + '-clean')
1999 lctl.del_mount_option(self.name)
2001 process_updates(self.db, self.name, self.name + '-clean', client)
2005 # record logs for each client
2011 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
2013 config_options = CONFIG_FILE
2015 for node_db in self.db.lookup_class('node'):
2016 client_name = node_db.getName()
2017 for prof_uuid in node_db.get_refs('profile'):
2018 prof_db = node_db.lookup(prof_uuid)
2019 # refactor this into a funtion to test "clientness"
2021 for ref_class, ref_uuid in prof_db.get_all_refs():
2022 if ref_class in ('mountpoint','echoclient'):
2023 debug("recording", client_name)
2024 old_noexec = config.noexec
2026 ret, out = run (sys.argv[0], noexec_opt,
2027 " -v --record --nomod",
2028 "--record_log", client_name,
2029 "--record_device", self.name,
2030 "--node", client_name,
2033 for s in out: log("record> ", string.strip(s))
2034 ret, out = run (sys.argv[0], noexec_opt,
2035 "--cleanup -v --record --nomod",
2036 "--record_log", client_name + "-clean",
2037 "--record_device", self.name,
2038 "--node", client_name,
2041 for s in out: log("record> ", string.strip(s))
2042 config.noexec = old_noexec
2045 lctl.cleanup(self.name, self.uuid, 0, 0)
2046 except CommandError, e:
2047 log(self.module_name, "cleanup failed: ", self.name)
2050 Module.cleanup(self)
2052 clean_dev(self.devpath, self.fstype, self.backfstype,
2055 def msd_remaining(self):
2056 out = lctl.device_list()
2058 if string.split(s)[2] in ('mds',):
2061 def safe_to_clean(self):
2064 def safe_to_clean_modules(self):
2065 return not self.msd_remaining()
2069 debug(self.uuid, "not active")
2072 if is_prepared(self.name):
2074 lctl.cleanup(self.name, self.uuid, config.force,
2076 except CommandError, e:
2077 log(self.module_name, "cleanup failed: ", self.name)
2080 Module.cleanup(self)
2082 if self.master != None:
2083 self.master.cleanup()
2084 if not self.msd_remaining() and is_prepared('MDT'):
2086 lctl.cleanup("MDT", "MDT_UUID", config.force,
2088 except CommandError, e:
2089 print "cleanup failed: ", self.name
2093 clean_dev(self.devpath, self.fstype, self.backfstype,
2096 def correct_level(self, level, op=None):
2097 #if self.master != None:
2102 def __init__(self, db):
2103 Module.__init__(self, 'OSD', db)
2104 self.osdtype = self.db.get_val('osdtype')
2105 self.devpath = self.db.get_val('devpath', '')
2106 self.backdevpath = self.db.get_val('backdevpath', '')
2107 self.size = self.db.get_val_int('devsize', 0)
2108 self.journal_size = self.db.get_val_int('journalsize', 0)
2109 self.inode_size = self.db.get_val_int('inodesize', 0)
2110 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2111 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2112 self.fstype = self.db.get_val('fstype', '')
2113 self.backfstype = self.db.get_val('backfstype', '')
2114 self.nspath = self.db.get_val('nspath', '')
2115 target_uuid = self.db.get_first_ref('target')
2116 ost = self.db.lookup(target_uuid)
2117 self.name = ost.getName()
2118 self.format = self.db.get_val('autoformat', 'yes')
2119 if ost.get_val('failover', 0):
2120 self.failover_ost = 'f'
2122 self.failover_ost = 'n'
2124 active_uuid = get_active_target(ost)
2126 panic("No target device found:", target_uuid)
2127 if active_uuid == self.uuid:
2131 if self.active and config.group and config.group != ost.get_val('group'):
2134 self.target_dev_uuid = self.uuid
2135 self.uuid = target_uuid
2137 def add_module(self, manager):
2139 manager.add_lustre_module('ost', 'ost')
2141 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2142 manager.add_lustre_module(self.fstype, self.fstype)
2145 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2147 if self.fstype == 'smfs':
2148 manager.add_lustre_module(self.backfstype, self.backfstype)
2149 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2151 for option in self.mountfsoptions:
2152 if option == 'snap':
2153 if not self.fstype == 'smfs':
2154 panic("mountoptions with snap, but fstype is not smfs\n")
2155 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2156 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2158 manager.add_lustre_module(self.osdtype, self.osdtype)
2160 def get_mount_options(self, blkdev):
2161 options = def_mount_options(self.fstype, 'ost')
2163 if config.mountfsoptions:
2165 options = "%s,%s" %(options, config.mountfsoptions)
2167 options = config.mountfsoptions
2168 if self.mountfsoptions:
2169 options = "%s,%s" %(options, self.mountfsoptions)
2171 if self.mountfsoptions:
2173 options = "%s,%s" %(options, self.mountfsoptions)
2175 options = self.mountfsoptions
2177 if self.fstype == 'smfs':
2179 options = "%s,type=%s,dev=%s" %(options,
2180 self.backfstype, blkdev)
2182 options = "type=%s,dev=%s" %(self.backfstype,
2186 # need to check /proc/mounts and /etc/mtab before
2187 # formatting anything.
2188 # FIXME: check if device is already formatted.
2190 if is_prepared(self.name):
2193 debug(self.uuid, "not active")
2196 if self.osdtype == 'obdecho':
2199 blkdev = block_dev(self.devpath, self.size, self.fstype,
2200 config.reformat, self.format, self.journal_size,
2201 self.inode_size, self.mkfsoptions, self.backfstype,
2204 if self.fstype == 'smfs':
2205 realdev = self.fstype
2209 mountfsoptions = self.get_mount_options(blkdev)
2211 self.info(self.osdtype, realdev, mountfsoptions, self.fstype,
2212 self.size, self.format, self.journal_size, self.inode_size)
2214 lctl.newdev(self.osdtype, self.name, self.uuid,
2215 setup ="%s %s %s %s" %(realdev, self.fstype,
2218 if not is_prepared('OSS'):
2219 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
2221 def osd_remaining(self):
2222 out = lctl.device_list()
2224 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2227 def safe_to_clean(self):
2230 def safe_to_clean_modules(self):
2231 return not self.osd_remaining()
2235 debug(self.uuid, "not active")
2237 if is_prepared(self.name):
2240 lctl.cleanup(self.name, self.uuid, config.force,
2242 except CommandError, e:
2243 log(self.module_name, "cleanup failed: ", self.name)
2246 if not self.osd_remaining() and is_prepared('OSS'):
2248 lctl.cleanup("OSS", "OSS_UUID", config.force,
2250 except CommandError, e:
2251 print "cleanup failed: ", self.name
2254 if not self.osdtype == 'obdecho':
2255 clean_dev(self.devpath, self.fstype, self.backfstype,
2258 def correct_level(self, level, op=None):
2261 def mgmt_uuid_for_fs(mtpt_name):
2264 mtpt_db = toplustreDB.lookup_name(mtpt_name)
2265 fs_uuid = mtpt_db.get_first_ref('filesystem')
2266 fs = toplustreDB.lookup(fs_uuid)
2269 return fs.get_first_ref('mgmt')
2271 # Generic client module, used by OSC and MDC
2272 class Client(Module):
2273 def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
2275 self.target_name = tgtdb.getName()
2276 self.target_uuid = tgtdb.getUUID()
2277 self.module_dir = module_dir
2278 self.module = module
2282 self.tgt_dev_uuid = get_active_target(tgtdb)
2283 if not self.tgt_dev_uuid:
2284 panic("No target device found for target(1):", self.target_name)
2289 self.module = module
2290 self.module_name = string.upper(module)
2292 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2293 self.target_name, fs_name)
2295 self.name = self_name
2297 self.lookup_server(self.tgt_dev_uuid)
2298 mgmt_uuid = mgmt_uuid_for_fs(fs_name)
2300 self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid)
2303 self.fs_name = fs_name
2304 if not self.module_dir:
2305 self.module_dir = module
2307 def add_module(self, manager):
2308 manager.add_lustre_module(self.module_dir, self.module)
2310 def lookup_server(self, srv_uuid):
2311 """ Lookup a server's network information """
2312 self._server_nets = get_ost_net(self.db, srv_uuid)
2313 if len(self._server_nets) == 0:
2314 panic ("Unable to find a server for:", srv_uuid)
2319 def get_servers(self):
2320 return self._server_nets
2322 def prepare(self, ignore_connect_failure = 0):
2323 self.info(self.target_uuid)
2324 if not config.record and is_prepared(self.name):
2327 srv = choose_local_server(self.get_servers())
2331 routes = find_route(self.get_servers())
2332 if len(routes) == 0:
2333 panic ("no route to", self.target_uuid)
2334 for (srv, r) in routes:
2335 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2336 except CommandError, e:
2337 if not ignore_connect_failure:
2340 if self.permits_inactive() and (self.target_uuid in config.inactive or self.active == 0):
2341 debug("%s inactive" % self.target_uuid)
2342 inactive_p = "inactive"
2344 debug("%s active" % self.target_uuid)
2346 lctl.newdev(self.module, self.name, self.uuid,
2347 setup ="%s %s %s %s" % (self.target_uuid, srv.nid_uuid,
2348 inactive_p, self.mgmt_name))
2351 if is_prepared(self.name):
2352 Module.cleanup(self)
2354 srv = choose_local_server(self.get_servers())
2356 lctl.disconnect(srv)
2358 for (srv, r) in find_route(self.get_servers()):
2359 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2360 except CommandError, e:
2361 log(self.module_name, "cleanup failed: ", self.name)
2365 def correct_level(self, level, op=None):
2368 def deactivate(self):
2370 lctl.deactivate(self.name)
2371 except CommandError, e:
2372 log(self.module_name, "deactivate failed: ", self.name)
2377 def __init__(self, db, uuid, fs_name):
2378 Client.__init__(self, db, uuid, 'mdc', fs_name)
2380 def permits_inactive(self):
2384 def __init__(self, db, uuid, fs_name):
2385 Client.__init__(self, db, uuid, 'osc', fs_name)
2387 def permits_inactive(self):
2390 def mgmtcli_name_for_uuid(uuid):
2391 return 'MGMTCLI_%s' % uuid
2393 class ManagementClient(Client):
2394 def __init__(self, db, uuid):
2395 Client.__init__(self, db, uuid, 'mgmt_cli', '',
2396 self_name = mgmtcli_name_for_uuid(db.getUUID()),
2397 module_dir = 'mgmt')
2399 class CMOBD(Module):
2400 def __init__(self, db):
2401 Module.__init__(self, 'CMOBD', db)
2402 self.name = self.db.getName();
2403 self.uuid = generate_client_uuid(self.name)
2404 self.master_uuid = self.db.get_first_ref('masterobd')
2405 self.cache_uuid = self.db.get_first_ref('cacheobd')
2407 master_obd = self.db.lookup(self.master_uuid)
2409 panic('master obd not found:', self.master_uuid)
2411 cache_obd = self.db.lookup(self.cache_uuid)
2413 panic('cache obd not found:', self.cache_uuid)
2418 master_class = master_obd.get_class()
2419 cache_class = cache_obd.get_class()
2421 if master_class == 'ost' or master_class == 'lov':
2422 client_uuid = "%s_lov_master_UUID" % (self.name)
2423 self.master = LOV(master_obd, client_uuid, self.name);
2424 elif master_class == 'mds':
2425 self.master = get_mdc(db, self.name, self.master_uuid)
2426 elif master_class == 'lmv':
2427 client_uuid = "%s_lmv_master_UUID" % (self.name)
2428 self.master = LMV(master_obd, client_uuid, self.name);
2430 panic("unknown master obd class '%s'" %(master_class))
2432 if cache_class == 'ost' or cache_class == 'lov':
2433 client_uuid = "%s_lov_cache_UUID" % (self.name)
2434 self.cache = LOV(cache_obd, client_uuid, self.name);
2435 elif cache_class == 'mds':
2436 self.cache = get_mdc(db, self.name, self.cache_uuid)
2437 elif cache_class == 'lmv':
2438 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2439 self.cache = LMV(cache_obd, client_uuid, self.name);
2441 panic("unknown cache obd class '%s'" %(cache_class))
2444 self.master.prepare()
2445 if not config.record and is_prepared(self.name):
2447 self.info(self.master_uuid, self.cache_uuid)
2448 lctl.newdev("cmobd", self.name, self.uuid,
2449 setup ="%s %s" %(self.master.uuid,
2458 def get_master_name(self):
2459 return self.master.name
2461 def get_cache_name(self):
2462 return self.cache.name
2465 if is_prepared(self.name):
2466 Module.cleanup(self)
2468 self.master.cleanup()
2470 def add_module(self, manager):
2471 manager.add_lustre_module('cmobd', 'cmobd')
2472 self.master.add_module(manager)
2474 def correct_level(self, level, op=None):
2478 def __init__(self, db, uuid, name):
2479 Module.__init__(self, 'COBD', db)
2480 self.name = self.db.getName();
2481 self.uuid = generate_client_uuid(self.name)
2482 self.master_uuid = self.db.get_first_ref('masterobd')
2483 self.cache_uuid = self.db.get_first_ref('cacheobd')
2485 master_obd = self.db.lookup(self.master_uuid)
2487 panic('master obd not found:', self.master_uuid)
2489 cache_obd = self.db.lookup(self.cache_uuid)
2491 panic('cache obd not found:', self.cache_uuid)
2496 master_class = master_obd.get_class()
2497 cache_class = cache_obd.get_class()
2499 if master_class == 'ost' or master_class == 'lov':
2500 client_uuid = "%s_lov_master_UUID" % (self.name)
2501 self.master = LOV(master_obd, client_uuid, name);
2502 elif master_class == 'mds':
2503 self.master = get_mdc(db, name, self.master_uuid)
2504 elif master_class == 'lmv':
2505 client_uuid = "%s_lmv_master_UUID" % (self.name)
2506 self.master = LMV(master_obd, client_uuid, self.name);
2508 panic("unknown master obd class '%s'" %(master_class))
2510 if cache_class == 'ost' or cache_class == 'lov':
2511 client_uuid = "%s_lov_cache_UUID" % (self.name)
2512 self.cache = LOV(cache_obd, client_uuid, name);
2513 elif cache_class == 'mds':
2514 self.cache = get_mdc(db, name, self.cache_uuid)
2515 elif cache_class == 'lmv':
2516 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2517 self.cache = LMV(cache_obd, client_uuid, self.name);
2519 panic("unknown cache obd class '%s'" %(cache_class))
2527 def get_master_name(self):
2528 return self.master.name
2530 def get_cache_name(self):
2531 return self.cache.name
2534 self.master.prepare()
2535 self.cache.prepare()
2536 if not config.record and is_prepared(self.name):
2538 self.info(self.master_uuid, self.cache_uuid)
2539 lctl.newdev("cobd", self.name, self.uuid,
2540 setup ="%s %s" %(self.master.name,
2544 if is_prepared(self.name):
2545 Module.cleanup(self)
2546 self.master.cleanup()
2547 self.cache.cleanup()
2549 def add_module(self, manager):
2550 manager.add_lustre_module('cobd', 'cobd')
2551 self.master.add_module(manager)
2553 # virtual interface for OSC and LOV
2555 def __init__(self, db, client_uuid, name, name_override = None):
2556 Module.__init__(self, 'VOSC', db)
2557 if db.get_class() == 'lov':
2558 self.osc = LOV(db, client_uuid, name, name_override)
2560 elif db.get_class() == 'cobd':
2561 self.osc = COBD(db, client_uuid, name)
2564 self.osc = OSC(db, client_uuid, name)
2568 return self.osc.get_uuid()
2571 return self.osc.get_name()
2579 def add_module(self, manager):
2580 self.osc.add_module(manager)
2582 def correct_level(self, level, op=None):
2583 return self.osc.correct_level(level, op)
2585 # virtual interface for MDC and LMV
2587 def __init__(self, db, client_uuid, name, name_override = None):
2588 Module.__init__(self, 'VMDC', db)
2589 if db.get_class() == 'lmv':
2590 self.mdc = LMV(db, client_uuid, name, name_override)
2591 elif db.get_class() == 'cobd':
2592 self.mdc = COBD(db, client_uuid, name)
2594 self.mdc = MDC(db, client_uuid, name)
2597 return self.mdc.uuid
2600 return self.mdc.name
2608 def add_module(self, manager):
2609 self.mdc.add_module(manager)
2611 def correct_level(self, level, op=None):
2612 return self.mdc.correct_level(level, op)
2614 class ECHO_CLIENT(Module):
2615 def __init__(self,db):
2616 Module.__init__(self, 'ECHO_CLIENT', db)
2617 self.obd_uuid = self.db.get_first_ref('obd')
2618 obd = self.db.lookup(self.obd_uuid)
2619 self.uuid = generate_client_uuid(self.name)
2620 self.osc = VOSC(obd, self.uuid, self.name)
2623 if not config.record and is_prepared(self.name):
2626 self.osc.prepare() # XXX This is so cheating. -p
2627 self.info(self.obd_uuid)
2629 lctl.newdev("echo_client", self.name, self.uuid,
2630 setup = self.osc.get_name())
2633 if is_prepared(self.name):
2634 Module.cleanup(self)
2637 def add_module(self, manager):
2638 self.osc.add_module(manager)
2639 manager.add_lustre_module('obdecho', 'obdecho')
2641 def correct_level(self, level, op=None):
2644 def generate_client_uuid(name):
2645 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2647 int(random.random() * 1048576),
2648 int(random.random() * 1048576))
2649 return client_uuid[:36]
2651 class Mountpoint(Module):
2652 def __init__(self,db):
2653 Module.__init__(self, 'MTPT', db)
2654 self.path = self.db.get_val('path')
2655 self.clientoptions = self.db.get_val('clientoptions', '')
2656 self.fs_uuid = self.db.get_first_ref('filesystem')
2657 fs = self.db.lookup(self.fs_uuid)
2658 self.mds_uuid = fs.get_first_ref('lmv')
2659 if not self.mds_uuid:
2660 self.mds_uuid = fs.get_first_ref('mds')
2661 self.obd_uuid = fs.get_first_ref('obd')
2662 self.mgmt_uuid = fs.get_first_ref('mgmt')
2663 client_uuid = generate_client_uuid(self.name)
2665 ost = self.db.lookup(self.obd_uuid)
2667 panic("no ost: ", self.obd_uuid)
2669 mds = self.db.lookup(self.mds_uuid)
2671 panic("no mds: ", self.mds_uuid)
2673 self.vosc = VOSC(ost, client_uuid, self.name, self.name)
2674 self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
2677 self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid),
2683 if not config.record and fs_is_mounted(self.path):
2684 log(self.path, "already mounted.")
2688 self.mgmtcli.prepare()
2691 vmdc_name = self.vmdc.get_name()
2693 self.info(self.path, self.mds_uuid, self.obd_uuid)
2694 if config.record or config.lctl_dump:
2695 lctl.mount_option(local_node_name, self.vosc.get_name(), vmdc_name)
2698 if config.clientoptions:
2699 if self.clientoptions:
2700 self.clientoptions = self.clientoptions + ',' + \
2701 config.clientoptions
2703 self.clientoptions = config.clientoptions
2704 if self.clientoptions:
2705 self.clientoptions = ',' + self.clientoptions
2706 # Linux kernel will deal with async and not pass it to ll_fill_super,
2707 # so replace it with Lustre async
2708 self.clientoptions = string.replace(self.clientoptions, "async",
2713 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,sec=%s%s %s %s" % \
2714 (self.vosc.get_name(), vmdc_name, config.sec, self.clientoptions,
2715 config.config, self.path)
2716 run("mkdir", self.path)
2721 panic("mount failed:", self.path, ":", string.join(val))
2724 self.info(self.path, self.mds_uuid,self.obd_uuid)
2726 if config.record or config.lctl_dump:
2727 lctl.del_mount_option(local_node_name)
2729 if fs_is_mounted(self.path):
2731 (rc, out) = run("umount", "-f", self.path)
2733 (rc, out) = run("umount", self.path)
2735 raise CommandError('umount', out, rc)
2737 if fs_is_mounted(self.path):
2738 panic("fs is still mounted:", self.path)
2743 self.mgmtcli.cleanup()
2745 def add_module(self, manager):
2746 manager.add_lustre_module('mdc', 'mdc')
2749 self.mgmtcli.add_module(manager)
2751 self.vosc.add_module(manager)
2752 self.vmdc.add_module(manager)
2754 manager.add_lustre_module('llite', 'llite')
2756 def correct_level(self, level, op=None):
2759 # ============================================================
2760 # misc query functions
2762 def get_ost_net(self, osd_uuid):
2766 osd = self.lookup(osd_uuid)
2767 node_uuid = osd.get_first_ref('node')
2768 node = self.lookup(node_uuid)
2770 panic("unable to find node for osd_uuid:", osd_uuid,
2771 " node_ref:", node_uuid_)
2772 for net_uuid in node.get_networks():
2773 db = node.lookup(net_uuid)
2774 srv_list.append(Network(db))
2778 # the order of iniitailization is based on level.
2779 def getServiceLevel(self):
2780 type = self.get_class()
2782 if type in ('network',):
2784 elif type in ('routetbl',):
2786 elif type in ('ldlm',):
2788 elif type in ('osd', 'cobd'):
2790 elif type in ('mdsdev',):
2792 elif type in ('lmv',):
2794 elif type in ('cmobd',):
2796 elif type in ('mountpoint', 'echoclient'):
2799 panic("Unknown type: ", type)
2801 if ret < config.minlevel or ret > config.maxlevel:
2806 # return list of services in a profile. list is a list of tuples
2807 # [(level, db_object),]
2808 def getServices(self):
2810 for ref_class, ref_uuid in self.get_all_refs():
2811 servdb = self.lookup(ref_uuid)
2813 level = getServiceLevel(servdb)
2815 list.append((level, servdb))
2817 panic('service not found: ' + ref_uuid)
2823 ############################################################
2825 # FIXME: clean this mess up!
2827 # OSC is no longer in the xml, so we have to fake it.
2828 # this is getting ugly and begging for another refactoring
2829 def get_osc(ost_db, uuid, fs_name):
2830 osc = OSC(ost_db, uuid, fs_name)
2833 def get_mdc(db, fs_name, mds_uuid):
2834 mds_db = db.lookup(mds_uuid);
2836 error("no mds:", mds_uuid)
2837 mdc = MDC(mds_db, mds_uuid, fs_name)
2840 ############################################################
2841 # routing ("rooting")
2843 # list of (nettype, cluster_id, nid)
2846 def find_local_clusters(node_db):
2847 global local_clusters
2848 for netuuid in node_db.get_networks():
2849 net = node_db.lookup(netuuid)
2851 debug("add_local", netuuid)
2852 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2854 if acceptors.has_key(srv.port):
2855 panic("duplicate port:", srv.port)
2856 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
2858 # This node is a gateway.
2860 def node_is_router():
2863 # If there are any routers found in the config, then this will be true
2864 # and all nodes will load kptlrouter.
2866 def node_needs_router():
2867 return needs_router or is_router
2869 # list of (nettype, gw, tgt_cluster_id, lo, hi)
2870 # Currently, these local routes are only added to kptlrouter route
2871 # table if they are needed to connect to a specific server. This
2872 # should be changed so all available routes are loaded, and the
2873 # ptlrouter can make all the decisions.
2876 def find_local_routes(lustre):
2877 """ Scan the lustre config looking for routers . Build list of
2879 global local_routes, needs_router
2881 list = lustre.lookup_class('node')
2883 if router.get_val_int('router', 0):
2885 for (local_type, local_cluster_id, local_nid) in local_clusters:
2887 for netuuid in router.get_networks():
2888 db = router.lookup(netuuid)
2889 if (local_type == db.get_val('nettype') and
2890 local_cluster_id == db.get_val('clusterid')):
2891 gw = db.get_val('nid')
2894 debug("find_local_routes: gw is", gw)
2895 for route in router.get_local_routes(local_type, gw):
2896 local_routes.append(route)
2897 debug("find_local_routes:", local_routes)
2900 def choose_local_server(srv_list):
2901 for srv in srv_list:
2902 if local_cluster(srv.net_type, srv.cluster_id):
2905 def local_cluster(net_type, cluster_id):
2906 for cluster in local_clusters:
2907 if net_type == cluster[0] and cluster_id == cluster[1]:
2911 def local_interface(net_type, cluster_id, nid):
2912 for cluster in local_clusters:
2913 if (net_type == cluster[0] and cluster_id == cluster[1]
2914 and nid == cluster[2]):
2918 def find_route(srv_list):
2920 frm_type = local_clusters[0][0]
2921 for srv in srv_list:
2922 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
2923 to_type = srv.net_type
2925 cluster_id = srv.cluster_id
2926 debug ('looking for route to', to_type, to)
2927 for r in local_routes:
2928 debug("find_route: ", r)
2929 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
2930 result.append((srv, r))
2933 def get_active_target(db):
2934 target_uuid = db.getUUID()
2935 target_name = db.getName()
2936 node_name = get_select(target_name)
2938 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
2940 tgt_dev_uuid = db.get_first_ref('active')
2943 def get_server_by_nid_uuid(db, nid_uuid):
2944 for n in db.lookup_class("network"):
2946 if net.nid_uuid == nid_uuid:
2950 ############################################################
2954 type = db.get_class()
2955 debug('Service:', type, db.getName(), db.getUUID())
2960 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2961 elif type == 'network':
2963 elif type == 'routetbl':
2967 elif type == 'cobd':
2968 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
2969 elif type == 'cmobd':
2971 elif type == 'mdsdev':
2973 elif type == 'mountpoint':
2975 elif type == 'echoclient':
2980 panic ("unknown service type:", type)
2984 # Prepare the system to run lustre using a particular profile
2985 # in a the configuration.
2986 # * load & the modules
2987 # * setup networking for the current node
2988 # * make sure partitions are in place and prepared
2989 # * initialize devices with lctl
2990 # Levels is important, and needs to be enforced.
2991 def for_each_profile(db, prof_list, operation):
2992 for prof_uuid in prof_list:
2993 prof_db = db.lookup(prof_uuid)
2995 panic("profile:", prof_uuid, "not found.")
2996 services = getServices(prof_db)
2999 def magic_get_osc(db, rec, lov):
3001 lov_uuid = lov.get_uuid()
3002 lov_name = lov.osc.fs_name
3004 lov_uuid = rec.getAttribute('lov_uuidref')
3005 # FIXME: better way to find the mountpoint?
3006 filesystems = db.root_node.getElementsByTagName('filesystem')
3008 for fs in filesystems:
3009 ref = fs.getElementsByTagName('obd_ref')
3010 if ref[0].getAttribute('uuidref') == lov_uuid:
3011 fsuuid = fs.getAttribute('uuid')
3015 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
3017 mtpts = db.root_node.getElementsByTagName('mountpoint')
3020 ref = fs.getElementsByTagName('filesystem_ref')
3021 if ref[0].getAttribute('uuidref') == fsuuid:
3022 lov_name = fs.getAttribute('name')
3026 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
3028 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3030 ost_uuid = rec.getAttribute('ost_uuidref')
3031 obd = db.lookup(ost_uuid)
3034 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
3036 osc = get_osc(obd, lov_uuid, lov_name)
3038 panic('osc not found:', obd_uuid)
3041 # write logs for update records. sadly, logs of all types -- and updates in
3042 # particular -- are something of an afterthought. lconf needs rewritten with
3043 # these as core concepts. so this is a pretty big hack.
3044 def process_update_record(db, update, lov):
3045 for rec in update.childNodes:
3046 if rec.nodeType != rec.ELEMENT_NODE:
3049 log("found "+rec.nodeName+" record in update version " +
3050 str(update.getAttribute('version')))
3052 lov_uuid = rec.getAttribute('lov_uuidref')
3053 ost_uuid = rec.getAttribute('ost_uuidref')
3054 index = rec.getAttribute('index')
3055 gen = rec.getAttribute('generation')
3057 if not lov_uuid or not ost_uuid or not index or not gen:
3058 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
3061 tmplov = db.lookup(lov_uuid)
3063 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
3064 lov_name = tmplov.getName()
3066 lov_name = lov.osc.name
3068 # ------------------------------------------------------------- add
3069 if rec.nodeName == 'add':
3071 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3074 osc = magic_get_osc(db, rec, lov)
3077 # Only ignore connect failures with --force, which
3078 # isn't implemented here yet.
3079 osc.prepare(ignore_connect_failure=0)
3080 except CommandError, e:
3081 print "Error preparing OSC %s\n" % osc.uuid
3084 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3086 # ------------------------------------------------------ deactivate
3087 elif rec.nodeName == 'deactivate':
3091 osc = magic_get_osc(db, rec, lov)
3095 except CommandError, e:
3096 print "Error deactivating OSC %s\n" % osc.uuid
3099 # ---------------------------------------------------------- delete
3100 elif rec.nodeName == 'delete':
3104 osc = magic_get_osc(db, rec, lov)
3110 except CommandError, e:
3111 print "Error cleaning up OSC %s\n" % osc.uuid
3114 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3116 def process_updates(db, log_device, log_name, lov = None):
3117 updates = db.root_node.getElementsByTagName('update')
3119 if not u.childNodes:
3120 log("ignoring empty update record (version " +
3121 str(u.getAttribute('version')) + ")")
3124 version = u.getAttribute('version')
3125 real_name = "%s-%s" % (log_name, version)
3126 lctl.clear_log(log_device, real_name)
3127 lctl.record(log_device, real_name)
3129 process_update_record(db, u, lov)
3133 def doWriteconf(services):
3137 if s[1].get_class() == 'mdsdev':
3138 n = newService(s[1])
3141 def doSetup(services):
3146 n = newService(s[1])
3148 slist.append((n.level, n))
3151 nl = n[1].correct_level(n[0])
3152 nlist.append((nl, n[1]))
3157 def doLoadModules(services):
3161 # adding all needed modules from all services
3163 n = newService(s[1])
3164 n.add_module(mod_manager)
3166 # loading all registered modules
3167 mod_manager.load_modules()
3169 def doUnloadModules(services):
3173 # adding all needed modules from all services
3175 n = newService(s[1])
3176 if n.safe_to_clean_modules():
3177 n.add_module(mod_manager)
3179 # unloading all registered modules
3180 mod_manager.cleanup_modules()
3182 def doCleanup(services):
3188 n = newService(s[1])
3190 slist.append((n.level, n))
3193 nl = n[1].correct_level(n[0])
3194 nlist.append((nl, n[1]))
3199 if n[1].safe_to_clean():
3204 def doHost(lustreDB, hosts):
3205 global is_router, local_node_name
3208 node_db = lustreDB.lookup_name(h, 'node')
3212 panic('No host entry found.')
3214 local_node_name = node_db.get_val('name', 0)
3215 is_router = node_db.get_val_int('router', 0)
3216 lustre_upcall = node_db.get_val('lustreUpcall', '')
3217 portals_upcall = node_db.get_val('portalsUpcall', '')
3218 timeout = node_db.get_val_int('timeout', 0)
3219 ptldebug = node_db.get_val('ptldebug', '')
3220 subsystem = node_db.get_val('subsystem', '')
3222 find_local_clusters(node_db)
3224 find_local_routes(lustreDB)
3226 # Two step process: (1) load modules, (2) setup lustre
3227 # if not cleaning, load modules first.
3228 prof_list = node_db.get_refs('profile')
3230 if config.write_conf:
3231 for_each_profile(node_db, prof_list, doLoadModules)
3233 for_each_profile(node_db, prof_list, doWriteconf)
3234 for_each_profile(node_db, prof_list, doUnloadModules)
3237 elif config.recover:
3238 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3239 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3240 "--client_uuid <UUID> --conn_uuid <UUID>")
3241 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3243 elif config.cleanup:
3245 # the command line can override this value
3247 # ugly hack, only need to run lctl commands for --dump
3248 if config.lctl_dump or config.record:
3249 for_each_profile(node_db, prof_list, doCleanup)
3252 sys_set_timeout(timeout)
3253 sys_set_ptldebug(ptldebug)
3254 sys_set_subsystem(subsystem)
3255 sys_set_lustre_upcall(lustre_upcall)
3256 sys_set_portals_upcall(portals_upcall)
3258 for_each_profile(node_db, prof_list, doCleanup)
3259 for_each_profile(node_db, prof_list, doUnloadModules)
3263 # ugly hack, only need to run lctl commands for --dump
3264 if config.lctl_dump or config.record:
3265 sys_set_timeout(timeout)
3266 sys_set_lustre_upcall(lustre_upcall)
3267 for_each_profile(node_db, prof_list, doSetup)
3271 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3272 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3274 for_each_profile(node_db, prof_list, doLoadModules)
3276 sys_set_debug_path()
3277 sys_set_ptldebug(ptldebug)
3278 sys_set_subsystem(subsystem)
3279 script = config.gdb_script
3280 run(lctl.lctl, ' modules >', script)
3282 log ("The GDB module script is in", script)
3283 # pause, so user has time to break and
3286 sys_set_timeout(timeout)
3287 sys_set_lustre_upcall(lustre_upcall)
3288 sys_set_portals_upcall(portals_upcall)
3290 for_each_profile(node_db, prof_list, doSetup)
3293 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3294 tgt = lustreDB.lookup(tgt_uuid)
3296 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3297 new_uuid = get_active_target(tgt)
3299 raise Lustre.LconfError("doRecovery: no active target found for: " +
3301 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3303 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3305 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3307 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3310 lctl.disconnect(oldnet)
3311 except CommandError, e:
3312 log("recover: disconnect", nid_uuid, "failed: ")
3317 except CommandError, e:
3318 log("recover: connect failed")
3321 lctl.recover(client_uuid, net.nid_uuid)
3324 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3325 base = os.path.dirname(cmd)
3326 if development_mode():
3327 if not config.lustre:
3328 debug('using objdir module paths')
3329 config.lustre = (os.path.join(base, ".."))
3330 # normalize the portals dir, using command line arg if set
3332 portals_dir = config.portals
3333 dir = os.path.join(config.lustre, portals_dir)
3334 config.portals = dir
3335 debug('config.portals', config.portals)
3336 elif config.lustre and config.portals:
3338 # if --lustre and --portals, normalize portals
3339 # can ignore POTRALS_DIR here, since it is probly useless here
3340 config.portals = os.path.join(config.lustre, config.portals)
3341 debug('config.portals B', config.portals)
3343 def sysctl(path, val):
3344 debug("+ sysctl", path, val)
3348 fp = open(os.path.join('/proc/sys', path), 'w')
3355 def sys_set_debug_path():
3356 sysctl('portals/debug_path', config.debug_path)
3358 def sys_set_lustre_upcall(upcall):
3359 # the command overrides the value in the node config
3360 if config.lustre_upcall:
3361 upcall = config.lustre_upcall
3363 upcall = config.upcall
3365 lctl.set_lustre_upcall(upcall)
3367 def sys_set_portals_upcall(upcall):
3368 # the command overrides the value in the node config
3369 if config.portals_upcall:
3370 upcall = config.portals_upcall
3372 upcall = config.upcall
3374 sysctl('portals/upcall', upcall)
3376 def sys_set_timeout(timeout):
3377 # the command overrides the value in the node config
3378 if config.timeout and config.timeout > 0:
3379 timeout = config.timeout
3380 if timeout != None and timeout > 0:
3381 lctl.set_timeout(timeout)
3383 def sys_tweak_socknal ():
3384 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3385 if sys_get_branch() == '2.6':
3386 fp = open('/proc/meminfo')
3387 lines = fp.readlines()
3392 if a[0] == 'MemTotal:':
3394 debug("memtotal" + memtotal)
3395 if int(memtotal) < 262144:
3396 minfree = int(memtotal) / 16
3399 debug("+ minfree ", minfree)
3400 sysctl("vm/min_free_kbytes", minfree)
3401 if config.single_socket:
3402 sysctl("socknal/typed", 0)
3404 def sys_optimize_elan ():
3405 procfiles = ["/proc/elan/config/eventint_punt_loops",
3406 "/proc/qsnet/elan3/config/eventint_punt_loops",
3407 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3409 if os.access(p, os.W_OK):
3410 run ("echo 1 > " + p)
3412 def sys_set_ptldebug(ptldebug):
3414 ptldebug = config.ptldebug
3417 val = eval(ptldebug, ptldebug_names)
3418 val = "0x%x" % (val)
3419 sysctl('portals/debug', val)
3420 except NameError, e:
3423 def sys_set_subsystem(subsystem):
3424 if config.subsystem:
3425 subsystem = config.subsystem
3428 val = eval(subsystem, subsystem_names)
3429 val = "0x%x" % (val)
3430 sysctl('portals/subsystem_debug', val)
3431 except NameError, e:
3434 def sys_set_netmem_max(path, max):
3435 debug("setting", path, "to at least", max)
3443 fp = open(path, 'w')
3444 fp.write('%d\n' %(max))
3448 def sys_make_devices():
3449 if not os.access('/dev/portals', os.R_OK):
3450 run('mknod /dev/portals c 10 240')
3451 if not os.access('/dev/obd', os.R_OK):
3452 run('mknod /dev/obd c 10 241')
3455 # Add dir to the global PATH, if not already there.
3456 def add_to_path(new_dir):
3457 syspath = string.split(os.environ['PATH'], ':')
3458 if new_dir in syspath:
3460 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3462 def default_debug_path():
3463 path = '/tmp/lustre-log'
3464 if os.path.isdir('/r'):
3469 def default_gdb_script():
3470 script = '/tmp/ogdb'
3471 if os.path.isdir('/r'):
3472 return '/r' + script
3477 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3478 # ensure basic elements are in the system path
3479 def sanitise_path():
3480 for dir in DEFAULT_PATH:
3483 # global hack for the --select handling
3485 def init_select(args):
3486 # args = [service=nodeA,service2=nodeB service3=nodeC]
3489 list = string.split(arg, ',')
3491 srv, node = string.split(entry, '=')
3492 tgt_select[srv] = node
3494 def get_select(srv):
3495 if tgt_select.has_key(srv):
3496 return tgt_select[srv]
3500 FLAG = Lustre.Options.FLAG
3501 PARAM = Lustre.Options.PARAM
3502 INTPARAM = Lustre.Options.INTPARAM
3503 PARAMLIST = Lustre.Options.PARAMLIST
3505 ('verbose,v', "Print system commands as they are run"),
3506 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3507 ('config', "Cluster config name used for LDAP query", PARAM),
3508 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3509 ('node', "Load config for <nodename>", PARAM),
3510 ('sec', "security flavor <null|krb5i|krb5p> of client", PARAM),
3511 ('mds_mds_sec', "security flavor <null|krb5i|krb5p> of inter mds's", PARAM),
3512 ('mds_ost_sec', "security flavor <null|krb5i|krb5p> of mds's-ost's", PARAM),
3513 ('cleanup,d', "Cleans up config. (Shutdown)"),
3514 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3516 ('single_socket', "socknal option: only use one socket instead of bundle",
3518 ('failover',"""Used to shut down without saving state.
3519 This will allow this node to "give up" a service to a
3520 another node for failover purposes. This will not
3521 be a clean shutdown.""",
3523 ('gdb', """Prints message after creating gdb module script
3524 and sleeps for 5 seconds."""),
3525 ('noexec,n', """Prints the commands and steps that will be run for a
3526 config without executing them. This can used to check if a
3527 config file is doing what it should be doing"""),
3528 ('nomod', "Skip load/unload module step."),
3529 ('nosetup', "Skip device setup/cleanup step."),
3530 ('reformat', "Reformat all devices (without question)"),
3531 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3532 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3533 ('clientoptions', "Additional options for Lustre", PARAM),
3534 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3536 ('write_conf', "Save all the client config information on mds."),
3537 ('record', "Write config information on mds."),
3538 ('record_log', "Name of config record log.", PARAM),
3539 ('record_device', "MDS device name that will record the config commands",
3541 ('root_squash', "MDS squash root to appointed uid",
3543 ('no_root_squash', "Don't squash root for appointed nid",
3545 ('minlevel', "Minimum level of services to configure/cleanup",
3547 ('maxlevel', """Maximum level of services to configure/cleanup
3548 Levels are aproximatly like:
3553 70 - mountpoint, echo_client, osc, mdc, lov""",
3555 ('lustre', """Base directory of lustre sources. This parameter will
3556 cause lconf to load modules from a source tree.""", PARAM),
3557 ('portals', """Portals source directory. If this is a relative path,
3558 then it is assumed to be relative to lustre. """, PARAM),
3559 ('timeout', "Set recovery timeout", INTPARAM),
3560 ('upcall', "Set both portals and lustre upcall script", PARAM),
3561 ('lustre_upcall', "Set lustre upcall script", PARAM),
3562 ('portals_upcall', "Set portals upcall script", PARAM),
3563 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3564 ('ptldebug', "Set the portals debug level", PARAM),
3565 ('subsystem', "Set the portals debug subsystem", PARAM),
3566 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3567 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3568 # Client recovery options
3569 ('recover', "Recover a device"),
3570 ('group', "The group of devices to configure or cleanup", PARAM),
3571 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3572 ('client_uuid', "The failed client (required for recovery)", PARAM),
3573 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3575 ('inactive', """The name of an inactive service, to be ignored during
3576 mounting (currently OST-only). Can be repeated.""",
3581 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3583 # in the upcall this is set to SIG_IGN
3584 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3586 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3588 config, args = cl.parse(sys.argv[1:])
3589 except Lustre.OptionError, e:
3593 setupModulePath(sys.argv[0])
3595 host = socket.gethostname()
3597 # the PRNG is normally seeded with time(), which is not so good for starting
3598 # time-synchronized clusters
3599 input = open('/dev/urandom', 'r')
3601 print 'Unable to open /dev/urandom!'
3603 seed = input.read(32)
3609 init_select(config.select)
3612 # allow config to be fetched via HTTP, but only with python2
3613 if sys.version[0] != '1' and args[0].startswith('http://'):
3616 config_file = urllib2.urlopen(args[0])
3617 except (urllib2.URLError, socket.error), err:
3618 if hasattr(err, 'args'):
3620 print "Could not access '%s': %s" %(args[0], err)
3622 elif not os.access(args[0], os.R_OK):
3623 print 'File not found or readable:', args[0]
3627 config_file = open(args[0], 'r')
3629 dom = xml.dom.minidom.parse(config_file)
3631 panic("%s does not appear to be a config file." % (args[0]))
3632 sys.exit(1) # make sure to die here, even in debug mode.
3634 CONFIG_FILE = args[0]
3635 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3636 if not config.config:
3637 config.config = os.path.basename(args[0])# use full path?
3638 if config.config[-4:] == '.xml':
3639 config.config = config.config[:-4]
3640 elif config.ldapurl:
3641 if not config.config:
3642 panic("--ldapurl requires --config name")
3643 dn = "config=%s,fs=lustre" % (config.config)
3644 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3645 elif config.ptldebug or config.subsystem:
3646 sys_set_ptldebug(None)
3647 sys_set_subsystem(None)
3650 print 'Missing config file or ldap URL.'
3651 print 'see lconf --help for command summary'
3654 toplustreDB = lustreDB
3656 ver = lustreDB.get_version()
3658 panic("No version found in config data, please recreate.")
3659 if ver != Lustre.CONFIG_VERSION:
3660 panic("Config version", ver, "does not match lconf version",
3661 Lustre.CONFIG_VERSION)
3665 node_list.append(config.node)
3668 node_list.append(host)
3669 node_list.append('localhost')
3671 debug("configuring for host: ", node_list)
3674 config.debug_path = config.debug_path + '-' + host
3675 config.gdb_script = config.gdb_script + '-' + host
3677 lctl = LCTLInterface('lctl')
3679 if config.lctl_dump:
3680 lctl.use_save_file(config.lctl_dump)
3683 if not (config.record_device and config.record_log):
3684 panic("When recording, both --record_log and --record_device must be specified.")
3685 lctl.clear_log(config.record_device, config.record_log)
3686 lctl.record(config.record_device, config.record_log)
3688 # init module manager
3689 mod_manager = kmod_manager(config.lustre, config.portals)
3691 doHost(lustreDB, node_list)
3693 if not config.record:
3698 process_updates(lustreDB, config.record_device, config.record_log)
3700 if __name__ == "__main__":
3703 except Lustre.LconfError, e:
3705 # traceback.print_exc(file=sys.stdout)
3707 except CommandError, e:
3711 if first_cleanup_error:
3712 sys.exit(first_cleanup_error)