3 # Copyright (C) 2002-2003 Cluster File Systems, Inc.
4 # Authors: Robert Read <rread@clusterfs.com>
5 # Mike Shaver <shaver@clusterfs.com>
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # lconf - lustre configuration tool
23 # lconf is the main driver script for starting and stopping
24 # lustre filesystem services.
26 # Based in part on the XML obdctl modifications done by Brian Behlendorf
28 import sys, getopt, types
29 import string, os, stat, popen2, socket, time, random, fcntl, select
30 import re, exceptions, signal, traceback
31 import xml.dom.minidom
33 if sys.version[0] == '1':
34 from FCNTL import F_GETFL, F_SETFL
36 from fcntl import F_GETFL, F_SETFL
38 PYMOD_DIR = "/usr/lib/lustre/python"
40 def development_mode():
41 base = os.path.dirname(sys.argv[0])
42 if os.access(base+"/Makefile", os.R_OK):
46 if development_mode():
47 sys.path.append('../utils')
49 sys.path.append(PYMOD_DIR)
55 DEFAULT_TCPBUF = 8388608
58 # Maximum number of devices to search for.
59 # (the /dev/loop* nodes need to be created beforehand)
60 MAX_LOOP_DEVICES = 256
61 PORTALS_DIR = '../portals'
63 # Needed to call lconf --record
66 # Please keep these in sync with the values in portals/kp30.h
78 "warning" : (1 << 10),
82 "portals" : (1 << 14),
84 "dlmtrace" : (1 << 16),
88 "rpctrace" : (1 << 20),
89 "vfstrace" : (1 << 21),
96 "undefined" : (1 << 0),
106 "portals" : (1 << 10),
108 "pinger" : (1 << 12),
109 "filter" : (1 << 13),
114 "ptlrouter" : (1 << 18),
118 "confobd" : (1 << 22),
124 first_cleanup_error = 0
125 def cleanup_error(rc):
126 global first_cleanup_error
127 if not first_cleanup_error:
128 first_cleanup_error = rc
130 # ============================================================
131 # debugging and error funcs
133 def fixme(msg = "this feature"):
134 raise Lustre.LconfError, msg + ' not implemented yet.'
137 msg = string.join(map(str,args))
138 if not config.noexec:
139 raise Lustre.LconfError(msg)
144 msg = string.join(map(str,args))
149 print string.strip(s)
153 msg = string.join(map(str,args))
156 # ack, python's builtin int() does not support '0x123' syntax.
157 # eval can do it, although what a hack!
161 return eval(s, {}, {})
164 except SyntaxError, e:
165 raise ValueError("not a number")
167 raise ValueError("not a number")
169 # ============================================================
170 # locally defined exceptions
171 class CommandError (exceptions.Exception):
172 def __init__(self, cmd_name, cmd_err, rc=None):
173 self.cmd_name = cmd_name
174 self.cmd_err = cmd_err
179 if type(self.cmd_err) == types.StringType:
181 print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err)
183 print "! %s: %s" % (self.cmd_name, self.cmd_err)
184 elif type(self.cmd_err) == types.ListType:
186 print "! %s (error %d):" % (self.cmd_name, self.rc)
188 print "! %s:" % (self.cmd_name)
189 for s in self.cmd_err:
190 print "> %s" %(string.strip(s))
195 # ============================================================
196 # handle daemons, like the acceptor
198 """ Manage starting and stopping a daemon. Assumes daemon manages
199 it's own pid file. """
201 def __init__(self, cmd):
207 log(self.command, "already running.")
209 self.path = find_prog(self.command)
211 panic(self.command, "not found.")
212 ret, out = runcmd(self.path +' '+ self.command_line())
214 raise CommandError(self.path, out, ret)
218 pid = self.read_pidfile()
221 log ("killing process", pid)
224 log("was unable to find pid of " + self.command)
225 #time.sleep(1) # let daemon die
227 log("unable to kill", self.command, e)
229 log("unable to kill", self.command)
232 pid = self.read_pidfile()
238 log("was unable to find pid of " + self.command)
245 def read_pidfile(self):
247 fp = open(self.pidfile(), 'r')
257 def clean_pidfile(self):
258 """ Remove a stale pidfile """
259 log("removing stale pidfile:", self.pidfile())
261 os.unlink(self.pidfile())
263 log(self.pidfile(), e)
265 class AcceptorHandler(DaemonHandler):
266 def __init__(self, port, net_type):
267 DaemonHandler.__init__(self, "acceptor")
272 return "/var/run/%s-%d.pid" % (self.command, self.port)
274 def command_line(self):
275 return string.join(map(str,(self.flags, self.port)))
279 # start the acceptors
281 if config.lctl_dump or config.record:
283 for port in acceptors.keys():
284 daemon = acceptors[port]
285 if not daemon.running():
288 def run_one_acceptor(port):
289 if config.lctl_dump or config.record:
291 if acceptors.has_key(port):
292 daemon = acceptors[port]
293 if not daemon.running():
296 panic("run_one_acceptor: No acceptor defined for port:", port)
298 def stop_acceptor(port):
299 if acceptors.has_key(port):
300 daemon = acceptors[port]
305 # ============================================================
306 # handle lctl interface
309 Manage communication with lctl
312 def __init__(self, cmd):
314 Initialize close by finding the lctl binary.
316 self.lctl = find_prog(cmd)
318 self.record_device = ''
321 debug('! lctl not found')
324 raise CommandError('lctl', "unable to find lctl binary.")
326 def use_save_file(self, file):
327 self.save_file = file
329 def record(self, dev_name, logname):
330 log("Recording log", logname, "on", dev_name)
331 self.record_device = dev_name
332 self.record_log = logname
334 def end_record(self):
335 log("End recording log", self.record_log, "on", self.record_device)
336 self.record_device = None
337 self.record_log = None
339 def set_nonblock(self, fd):
340 fl = fcntl.fcntl(fd, F_GETFL)
341 fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY)
346 the cmds are written to stdin of lctl
347 lctl doesn't return errors when run in script mode, so
349 should modify command line to accept multiple commands, or
350 create complex command line options
354 cmds = '\n dump ' + self.save_file + '\n' + cmds
355 elif self.record_device:
359 %s""" % (self.record_device, self.record_log, cmds)
361 debug("+", cmd_line, cmds)
362 if config.noexec: return (0, [])
364 child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command
365 child.tochild.write(cmds + "\n")
366 child.tochild.close()
367 # print "LCTL:", cmds
369 # From "Python Cookbook" from O'Reilly
370 outfile = child.fromchild
371 outfd = outfile.fileno()
372 self.set_nonblock(outfd)
373 errfile = child.childerr
374 errfd = errfile.fileno()
375 self.set_nonblock(errfd)
377 outdata = errdata = ''
380 ready = select.select([outfd,errfd],[],[]) # Wait for input
381 if outfd in ready[0]:
382 outchunk = outfile.read()
383 if outchunk == '': outeof = 1
384 outdata = outdata + outchunk
385 if errfd in ready[0]:
386 errchunk = errfile.read()
387 if errchunk == '': erreof = 1
388 errdata = errdata + errchunk
389 if outeof and erreof: break
390 # end of "borrowed" code
393 if os.WIFEXITED(ret):
394 rc = os.WEXITSTATUS(ret)
397 if rc or len(errdata):
398 raise CommandError(self.lctl, errdata, rc)
401 def runcmd(self, *args):
403 run lctl using the command line
405 cmd = string.join(map(str,args))
406 debug("+", self.lctl, cmd)
407 rc, out = run(self.lctl, cmd)
409 raise CommandError(self.lctl, out, rc)
412 def clear_log(self, dev, log):
413 """ clear an existing log """
418 quit """ % (dev, log)
421 def root_squash(self, name, uid, nid):
425 quit""" % (name, uid, nid)
428 def network(self, net, nid):
433 quit """ % (net, nid)
437 def add_interface(self, net, ip, netmask = ""):
438 """ add an interface """
442 quit """ % (net, ip, netmask)
445 # delete an interface
446 def del_interface(self, net, ip):
447 """ delete an interface """
454 # create a new connection
455 def add_uuid(self, net_type, uuid, nid):
456 cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type)
459 def add_peer(self, net_type, nid, hostaddr, port):
460 if net_type in ('tcp','openib','ra') and not config.lctl_dump:
465 nid, hostaddr, port )
467 elif net_type in ('iib',) and not config.lctl_dump:
474 elif net_type in ('vib',) and not config.lctl_dump:
482 def connect(self, srv):
483 self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid)
484 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
486 hostaddr = string.split(srv.hostaddr[0], '/')[0]
487 self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port)
490 def recover(self, dev_name, new_conn):
493 recover %s""" %(dev_name, new_conn)
496 # add a route to a range
497 def add_route(self, net, gw, lo, hi):
505 except CommandError, e:
509 def del_route(self, net, gw, lo, hi):
514 quit """ % (net, gw, lo, hi)
517 # add a route to a host
518 def add_route_host(self, net, uuid, gw, tgt):
519 self.add_uuid(net, uuid, tgt)
527 except CommandError, e:
531 # add a route to a range
532 def del_route_host(self, net, uuid, gw, tgt):
538 quit """ % (net, gw, tgt)
542 def del_peer(self, net_type, nid, hostaddr):
543 if net_type in ('tcp',) and not config.lctl_dump:
547 del_peer %s %s single_share
551 elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump:
555 del_peer %s single_share
560 # disconnect one connection
561 def disconnect(self, srv):
562 self.del_uuid(srv.nid_uuid)
563 if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump:
565 hostaddr = string.split(srv.hostaddr[0], '/')[0]
566 self.del_peer(srv.net_type, srv.nid, hostaddr)
568 def del_uuid(self, uuid):
576 def disconnectAll(self, net):
584 def attach(self, type, name, uuid):
587 quit""" % (type, name, uuid)
590 def detach(self, name):
597 def set_security(self, name, key, value):
601 quit""" % (name, key, value)
604 def setup(self, name, setup = ""):
608 quit""" % (name, setup)
611 def add_conn(self, name, conn_uuid):
615 quit""" % (name, conn_uuid)
618 def start(self, name, conf_name):
622 quit""" % (name, conf_name)
625 # create a new device with lctl
626 def newdev(self, type, name, uuid, setup = ""):
627 self.attach(type, name, uuid);
629 self.setup(name, setup)
630 except CommandError, e:
631 self.cleanup(name, uuid, 0)
635 def cleanup(self, name, uuid, force, failover = 0):
636 if failover: force = 1
642 quit""" % (name, ('', 'force')[force],
643 ('', 'failover')[failover])
647 def lov_setup(self, name, uuid, desc_uuid, stripe_cnt,
648 stripe_sz, stripe_off, pattern, devlist = None):
651 lov_setup %s %d %d %d %s %s
652 quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off,
656 # add an OBD to a LOV
657 def lov_add_obd(self, name, uuid, obd_uuid, index, gen):
659 lov_modify_tgts add %s %s %s %s
660 quit""" % (name, obd_uuid, index, gen)
664 def lmv_setup(self, name, uuid, desc_uuid, devlist):
668 quit""" % (name, uuid, desc_uuid, devlist)
671 # delete an OBD from a LOV
672 def lov_del_obd(self, name, uuid, obd_uuid, index, gen):
674 lov_modify_tgts del %s %s %s %s
675 quit""" % (name, obd_uuid, index, gen)
679 def deactivate(self, name):
687 def dump(self, dump_file):
690 quit""" % (dump_file)
693 # get list of devices
694 def device_list(self):
695 devices = '/proc/fs/lustre/devices'
697 if os.access(devices, os.R_OK):
699 fp = open(devices, 'r')
707 def lustre_version(self):
708 rc, out = self.runcmd('version')
712 def mount_option(self, profile, osc, mdc):
714 mount_option %s %s %s
715 quit""" % (profile, osc, mdc)
718 # delete mount options
719 def del_mount_option(self, profile):
725 def set_timeout(self, timeout):
731 def set_lustre_upcall(self, upcall):
736 # ============================================================
737 # Various system-level functions
738 # (ideally moved to their own module)
740 # Run a command and return the output and status.
741 # stderr is sent to /dev/null, could use popen3 to
742 # save it if necessary
745 if config.noexec: return (0, [])
746 f = os.popen(cmd + ' 2>&1')
756 cmd = string.join(map(str,args))
759 # Run a command in the background.
760 def run_daemon(*args):
761 cmd = string.join(map(str,args))
763 if config.noexec: return 0
764 f = os.popen(cmd + ' 2>&1')
772 # Determine full path to use for an external command
773 # searches dirname(argv[0]) first, then PATH
775 syspath = string.split(os.environ['PATH'], ':')
776 cmdpath = os.path.dirname(sys.argv[0])
777 syspath.insert(0, cmdpath);
779 syspath.insert(0, os.path.join(config.portals, 'utils/'))
781 prog = os.path.join(d,cmd)
782 if os.access(prog, os.X_OK):
786 # Recursively look for file starting at base dir
787 def do_find_file(base, mod):
788 fullname = os.path.join(base, mod)
789 if os.access(fullname, os.R_OK):
791 for d in os.listdir(base):
792 dir = os.path.join(base,d)
793 if os.path.isdir(dir):
794 module = do_find_file(dir, mod)
798 # is the path a block device?
805 return stat.S_ISBLK(s[stat.ST_MODE])
807 # find the journal device from mkfs options
813 while i < len(x) - 1:
814 if x[i] == '-J' and x[i+1].startswith('device='):
820 # build fs according to type
822 def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1):
828 panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"%
830 # devsize is in 1k, and fs block count is in 4k
831 block_cnt = devsize/4
833 if fstype in ('ext3', 'extN', 'ldiskfs'):
834 # ext3 journal size is in megabytes
835 # but don't set jsize if mkfsoptions indicates a separate journal device
836 if jsize == 0 and jdev(mkfsoptions) == '':
838 if not is_block(dev):
839 ret, out = runcmd("ls -l %s" %dev)
840 devsize = int(string.split(out[0])[4]) / 1024
842 # sfdisk works for symlink, hardlink, and realdev
843 ret, out = runcmd("sfdisk -s %s" %dev)
845 devsize = int(out[0])
847 # sfdisk -s will fail for too large block device,
848 # then, read the size of partition from /proc/partitions
850 # get the realpath of the device
851 # it may be the real device, such as /dev/hda7
852 # or the hardlink created via mknod for a device
853 if 'realpath' in dir(os.path):
854 real_dev = os.path.realpath(dev)
858 while os.path.islink(real_dev) and (link_count < 20):
859 link_count = link_count + 1
860 dev_link = os.readlink(real_dev)
861 if os.path.isabs(dev_link):
864 real_dev = os.path.join(os.path.dirname(real_dev), dev_link)
866 panic("Entountered too many symbolic links resolving block device:", dev)
868 # get the major and minor number of the realpath via ls
869 # it seems python(os.stat) does not return
870 # the st_rdev member of the stat structure
871 ret, out = runcmd("ls -l %s" %real_dev)
872 major = string.split(string.split(out[0])[4], ",")[0]
873 minor = string.split(out[0])[5]
875 # get the devsize from /proc/partitions with the major and minor number
876 ret, out = runcmd("cat /proc/partitions")
879 if string.split(line)[0] == major and string.split(line)[1] == minor:
880 devsize = int(string.split(line)[2])
883 if devsize > 1024 * 1024:
884 jsize = ((devsize / 102400) * 4)
887 if jsize: jopt = "-J size=%d" %(jsize,)
888 if isize: iopt = "-I %d" %(isize,)
889 mkfs = 'mkfs.ext2 -j -b 4096 '
890 if not isblock or config.force:
892 if jdev(mkfsoptions) != '':
893 jmkfs = 'mkfs.ext2 -b 4096 -O journal_dev '
895 jmkfs = jmkfs + '-F '
896 jmkfs = jmkfs + jdev(mkfsoptions)
897 (ret, out) = run (jmkfs)
899 panic("Unable format journal device:", jdev(mkfsoptions), string.join(out))
900 elif fstype == 'reiserfs':
901 # reiserfs journal size is in blocks
902 if jsize: jopt = "--journal_size %d" %(jsize,)
903 mkfs = 'mkreiserfs -ff'
905 panic('unsupported fs type: ', fstype)
907 if config.mkfsoptions != None:
908 mkfs = mkfs + ' ' + config.mkfsoptions
909 if mkfsoptions != None:
910 mkfs = mkfs + ' ' + mkfsoptions
911 (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt)
913 panic("Unable to build fs:", dev, string.join(out))
914 # enable hash tree indexing on fsswe
915 if fstype in ('ext3', 'extN', 'ldiskfs'):
916 htree = 'echo "feature FEATURE_C5" | debugfs -w'
917 (ret, out) = run (htree, dev)
919 panic("Unable to enable htree:", dev)
921 # some systems use /dev/loopN, some /dev/loop/N
925 if not os.access(loop + str(0), os.R_OK):
927 if not os.access(loop + str(0), os.R_OK):
928 panic ("can't access loop devices")
931 # find loop device assigned to the file
932 def find_assigned_loop(file):
934 for n in xrange(0, MAX_LOOP_DEVICES):
936 if os.access(dev, os.R_OK):
937 (stat, out) = run('losetup', dev)
938 if out and stat == 0:
939 m = re.search(r'\((.*)\)', out[0])
940 if m and file == m.group(1):
944 # find free loop device
945 def find_free_loop(file):
948 # find next free loop
949 for n in xrange(0, MAX_LOOP_DEVICES):
951 if os.access(dev, os.R_OK):
952 (stat, out) = run('losetup', dev)
957 # create file if necessary and assign the first free loop device
958 def init_loop(file, size, fstype, journal_size, inode_size,
959 mkfsoptions, reformat, autoformat, backfstype, backfile):
962 realfstype = backfstype
963 if is_block(backfile):
964 if reformat or (need_format(realfstype, backfile) and autoformat == 'yes'):
965 mkfs(realfile, size, realfstype, journal_size, inode_size, mkfsoptions, isblock=0)
971 dev = find_assigned_loop(realfile)
973 print 'WARNING: file', realfile, 'already mapped to', dev
976 if reformat or not os.access(realfile, os.R_OK | os.W_OK):
977 (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile))
979 panic("Unable to create backing store:", realfile)
980 mkfs(realfile, size, realfstype, journal_size, inode_size,
981 mkfsoptions, isblock=0)
983 dev = find_free_loop(realfile)
985 print "attach " + realfile + " <-> " + dev
986 run('losetup', dev, realfile)
989 print "out of loop devices"
992 # undo loop assignment
993 def clean_loop(dev, fstype, backfstype, backdev):
998 if not is_block(realfile):
999 dev = find_assigned_loop(realfile)
1001 print "detach " + dev + " <-> " + realfile
1002 ret, out = run('losetup -d', dev)
1004 log('unable to clean loop device:', dev, 'for file:', realfile)
1007 # finilizes passed device
1008 def clean_dev(dev, fstype, backfstype, backdev):
1009 if fstype == 'smfs' or not is_block(dev):
1010 clean_loop(dev, fstype, backfstype, backdev)
1012 # determine if dev is formatted as a <fstype> filesystem
1013 def need_format(fstype, dev):
1014 # FIXME don't know how to implement this
1017 # initialize a block device if needed
1018 def block_dev(dev, size, fstype, reformat, autoformat, journal_size,
1019 inode_size, mkfsoptions, backfstype, backdev):
1023 if fstype == 'smfs' or not is_block(dev):
1024 dev = init_loop(dev, size, fstype, journal_size, inode_size,
1025 mkfsoptions, reformat, autoformat, backfstype, backdev)
1026 elif reformat or (need_format(fstype, dev) and autoformat == 'yes'):
1027 mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions,
1030 # panic("device:", dev,
1031 # "not prepared, and autoformat is not set.\n",
1032 # "Rerun with --reformat option to format ALL filesystems")
1037 """lookup IP address for an interface"""
1038 rc, out = run("/sbin/ifconfig", iface)
1041 addr = string.split(out[1])[1]
1042 ip = string.split(addr, ':')[1]
1045 def def_mount_options(fstype, target):
1046 """returns deafult mount options for passed fstype and target (mds, ost)"""
1047 if fstype == 'ext3' or fstype == 'ldiskfs':
1048 mountfsoptions = "errors=remount-ro"
1049 if target == 'ost' and sys_get_branch() == '2.4':
1050 mountfsoptions = "%s,asyncdel" % (mountfsoptions)
1051 return mountfsoptions
1054 def sys_get_elan_position_file():
1055 procfiles = ["/proc/elan/device0/position",
1056 "/proc/qsnet/elan4/device0/position",
1057 "/proc/qsnet/elan3/device0/position"]
1059 if os.access(p, os.R_OK):
1063 def sys_get_local_nid(net_type, wildcard, cluster_id):
1064 """Return the local nid."""
1066 if sys_get_elan_position_file():
1067 local = sys_get_local_address('elan', '*', cluster_id)
1069 local = sys_get_local_address(net_type, wildcard, cluster_id)
1072 def sys_get_local_address(net_type, wildcard, cluster_id):
1073 """Return the local address for the network type."""
1075 if net_type in ('tcp','openib','iib','vib','ra'):
1077 iface, star = string.split(wildcard, ':')
1078 local = if2addr(iface)
1080 panic ("unable to determine ip for:", wildcard)
1082 host = socket.gethostname()
1083 local = socket.gethostbyname(host)
1084 elif net_type == 'elan':
1085 # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()'
1086 f = sys_get_elan_position_file()
1088 panic ("unable to determine local Elan ID")
1091 lines = fp.readlines()
1095 if a[0] == 'NodeId':
1099 nid = my_int(cluster_id) + my_int(elan_id)
1100 local = "%d" % (nid)
1101 except ValueError, e:
1105 elif net_type == 'lo':
1106 fixme("automatic local address for loopback")
1107 elif net_type == 'gm':
1108 fixme("automatic local address for GM")
1112 def sys_get_branch():
1113 """Returns kernel release"""
1115 fp = open('/proc/sys/kernel/osrelease')
1116 lines = fp.readlines()
1120 version = string.split(l)
1121 a = string.split(version[0], '.')
1122 return a[0] + '.' + a[1]
1127 # XXX: instead of device_list, ask for $name and see what we get
1128 def is_prepared(name):
1129 """Return true if a device exists for the name"""
1130 if config.lctl_dump:
1132 if (config.noexec or config.record) and config.cleanup:
1135 # expect this format:
1136 # 1 UP ldlm ldlm ldlm_UUID 2
1137 out = lctl.device_list()
1139 if name == string.split(s)[3]:
1141 except CommandError, e:
1145 def net_is_prepared():
1146 """If the any device exists, then assume that all networking
1147 has been configured"""
1148 out = lctl.device_list()
1151 def fs_is_mounted(path):
1152 """Return true if path is a mounted lustre filesystem"""
1154 fp = open('/proc/mounts')
1155 lines = fp.readlines()
1159 if a[1] == path and a[2] == 'lustre_lite':
1165 def kmod_find(src_dir, dev_dir, modname):
1166 modbase = src_dir +'/'+ dev_dir +'/'+ modname
1167 for modext in '.ko', '.o':
1168 module = modbase + modext
1170 if os.access(module, os.R_OK):
1176 def kmod_info(modname):
1177 """Returns reference count for passed module name."""
1179 fp = open('/proc/modules')
1180 lines = fp.readlines()
1183 # please forgive my tired fingers for this one
1184 ret = filter(lambda word, mod = modname: word[0] == mod,
1185 map(lambda line: string.split(line), lines))
1189 except Exception, e:
1193 """Presents kernel module"""
1194 def __init__(self, src_dir, dev_dir, name):
1195 self.src_dir = src_dir
1196 self.dev_dir = dev_dir
1199 # FIXME we ignore the failure of loading gss module, because we might
1200 # don't need it at all.
1203 log ('loading module:', self.name, 'srcdir',
1204 self.src_dir, 'devdir', self.dev_dir)
1206 module = kmod_find(self.src_dir, self.dev_dir,
1208 if not module and self.name != 'ptlrpcs_gss':
1209 panic('module not found:', self.name)
1210 (rc, out) = run('/sbin/insmod', module)
1212 if self.name == 'ptlrpcs_gss':
1213 print "Warning: not support gss security!"
1215 raise CommandError('insmod', out, rc)
1217 (rc, out) = run('/sbin/modprobe', self.name)
1219 if self.name == 'ptlrpcs_gss':
1220 print "Warning: not support gss security!"
1222 raise CommandError('modprobe', out, rc)
1226 log('unloading module:', self.name)
1227 (rc, out) = run('/sbin/rmmod', self.name)
1229 log('unable to unload module:', self.name +
1230 "(" + self.refcount() + ")")
1234 """Returns module info if any."""
1235 return kmod_info(self.name)
1238 """Returns 1 if module is loaded. Otherwise 0 is returned."""
1245 """Returns module refcount."""
1252 """Returns 1 if module is used, otherwise 0 is returned."""
1258 if users and users != '(unused)' and users != '-':
1266 """Returns 1 if module is busy, otherwise 0 is returned."""
1267 if self.loaded() and (self.used() or self.refcount() != '0'):
1273 """Manage kernel modules"""
1274 def __init__(self, lustre_dir, portals_dir):
1275 self.lustre_dir = lustre_dir
1276 self.portals_dir = portals_dir
1277 self.kmodule_list = []
1279 def find_module(self, modname):
1280 """Find module by module name"""
1281 for mod in self.kmodule_list:
1282 if mod.name == modname:
1286 def add_portals_module(self, dev_dir, modname):
1287 """Append a module to list of modules to load."""
1289 mod = self.find_module(modname)
1291 mod = kmod(self.portals_dir, dev_dir, modname)
1292 self.kmodule_list.append(mod)
1294 def add_lustre_module(self, dev_dir, modname):
1295 """Append a module to list of modules to load."""
1297 mod = self.find_module(modname)
1299 mod = kmod(self.lustre_dir, dev_dir, modname)
1300 self.kmodule_list.append(mod)
1302 def load_modules(self):
1303 """Load all the modules in the list in the order they appear."""
1304 for mod in self.kmodule_list:
1305 if mod.loaded() and not config.noexec:
1309 def cleanup_modules(self):
1310 """Unload the modules in the list in reverse order."""
1311 rev = self.kmodule_list
1314 if (not mod.loaded() or mod.busy()) and not config.noexec:
1317 if mod.name == 'portals' and config.dump:
1318 lctl.dump(config.dump)
1321 # ============================================================
1322 # Classes to prepare and cleanup the various objects
1325 """ Base class for the rest of the modules. The default cleanup method is
1326 defined here, as well as some utilitiy funcs.
1328 def __init__(self, module_name, db):
1330 self.module_name = module_name
1331 self.name = self.db.getName()
1332 self.uuid = self.db.getUUID()
1336 def info(self, *args):
1337 msg = string.join(map(str,args))
1338 print self.module_name + ":", self.name, self.uuid, msg
1341 """ default cleanup, used for most modules """
1344 lctl.cleanup(self.name, self.uuid, config.force)
1345 except CommandError, e:
1346 log(self.module_name, "cleanup failed: ", self.name)
1350 def add_module(self, manager):
1351 """Adds all needed modules in the order they appear."""
1354 def safe_to_clean(self):
1357 def safe_to_clean_modules(self):
1358 return self.safe_to_clean()
1360 class Network(Module):
1361 def __init__(self,db):
1362 Module.__init__(self, 'NETWORK', db)
1363 self.net_type = self.db.get_val('nettype')
1364 self.nid = self.db.get_val('nid', '*')
1365 self.cluster_id = self.db.get_val('clusterid', "0")
1366 self.port = self.db.get_val_int('port', 0)
1369 self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id)
1371 panic("unable to set nid for", self.net_type, self.nid, cluster_id)
1372 self.generic_nid = 1
1373 debug("nid:", self.nid)
1375 self.generic_nid = 0
1377 self.nid_uuid = self.nid_to_uuid(self.nid)
1378 self.hostaddr = self.db.get_hostaddr()
1379 if len(self.hostaddr) == 0:
1380 self.hostaddr.append(self.nid)
1381 if '*' in self.hostaddr[0]:
1382 self.hostaddr[0] = sys_get_local_address(self.net_type, self.hostaddr[0], self.cluster_id)
1383 if not self.hostaddr[0]:
1384 panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id)
1385 debug("hostaddr:", self.hostaddr[0])
1387 def add_module(self, manager):
1388 manager.add_portals_module("libcfs", 'libcfs')
1389 manager.add_portals_module("portals", 'portals')
1391 if node_needs_router():
1392 manager.add_portals_module("router", 'kptlrouter')
1393 if self.net_type == 'tcp':
1394 manager.add_portals_module("knals/socknal", 'ksocknal')
1395 if self.net_type == 'elan':
1396 manager.add_portals_module("knals/qswnal", 'kqswnal')
1397 if self.net_type == 'gm':
1398 manager.add_portals_module("knals/gmnal", 'kgmnal')
1399 if self.net_type == 'openib':
1400 manager.add_portals_module("knals/openibnal", 'kopenibnal')
1401 if self.net_type == 'iib':
1402 manager.add_portals_module("knals/iibnal", 'kiibnal')
1403 if self.net_type == 'vib':
1404 self.add_portals_module("knals/vibnal", 'kvibnal')
1405 if self.net_type == 'lo':
1406 manager.add_portals_module("knals/lonal", 'klonal')
1407 if self.net_type == 'ra':
1408 manager.add_portals_module("knals/ranal", 'kranal')
1410 def nid_to_uuid(self, nid):
1411 return "NID_%s_UUID" %(nid,)
1414 if not config.record and net_is_prepared():
1416 self.info(self.net_type, self.nid, self.port)
1417 if not (config.record and self.generic_nid):
1418 lctl.network(self.net_type, self.nid)
1419 if self.net_type == 'tcp':
1421 for hostaddr in self.db.get_hostaddr():
1422 ip = string.split(hostaddr, '/')[0]
1423 if len(string.split(hostaddr, '/')) == 2:
1424 netmask = string.split(hostaddr, '/')[1]
1427 lctl.add_interface(self.net_type, ip, netmask)
1428 if self.net_type == 'elan':
1430 if self.port and node_is_router():
1431 run_one_acceptor(self.port)
1432 self.connect_peer_gateways()
1434 def connect_peer_gateways(self):
1435 for router in self.db.lookup_class('node'):
1436 if router.get_val_int('router', 0):
1437 for netuuid in router.get_networks():
1438 net = self.db.lookup(netuuid)
1440 if (gw.cluster_id == self.cluster_id and
1441 gw.net_type == self.net_type):
1442 if gw.nid != self.nid:
1445 def disconnect_peer_gateways(self):
1446 for router in self.db.lookup_class('node'):
1447 if router.get_val_int('router', 0):
1448 for netuuid in router.get_networks():
1449 net = self.db.lookup(netuuid)
1451 if (gw.cluster_id == self.cluster_id and
1452 gw.net_type == self.net_type):
1453 if gw.nid != self.nid:
1456 except CommandError, e:
1457 print "disconnect failed: ", self.name
1461 def safe_to_clean(self):
1462 return not net_is_prepared()
1465 self.info(self.net_type, self.nid, self.port)
1467 stop_acceptor(self.port)
1468 if node_is_router():
1469 self.disconnect_peer_gateways()
1470 if self.net_type == 'tcp':
1471 for hostaddr in self.db.get_hostaddr():
1472 ip = string.split(hostaddr, '/')[0]
1473 lctl.del_interface(self.net_type, ip)
1475 def correct_level(self, level, op=None):
1478 class RouteTable(Module):
1479 def __init__(self,db):
1480 Module.__init__(self, 'ROUTES', db)
1482 def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id,
1484 # only setup connections for tcp, openib, and iib NALs
1486 if not net_type in ('tcp','openib','iib','vib','ra'):
1489 # connect to target if route is to single node and this node is the gw
1490 if lo == hi and local_interface(net_type, gw_cluster_id, gw):
1491 if not local_cluster(net_type, tgt_cluster_id):
1492 panic("target", lo, " not on the local cluster")
1493 srvdb = self.db.nid2server(lo, net_type, gw_cluster_id)
1494 # connect to gateway if this node is not the gw
1495 elif (local_cluster(net_type, gw_cluster_id)
1496 and not local_interface(net_type, gw_cluster_id, gw)):
1497 srvdb = self.db.nid2server(gw, net_type, gw_cluster_id)
1502 panic("no server for nid", lo)
1505 return Network(srvdb)
1508 if not config.record and net_is_prepared():
1511 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1512 lctl.add_route(net_type, gw, lo, hi)
1513 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1517 def safe_to_clean(self):
1518 return not net_is_prepared()
1521 if net_is_prepared():
1522 # the network is still being used, don't clean it up
1524 for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl():
1525 srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)
1528 lctl.disconnect(srv)
1529 except CommandError, e:
1530 print "disconnect failed: ", self.name
1535 lctl.del_route(net_type, gw, lo, hi)
1536 except CommandError, e:
1537 print "del_route failed: ", self.name
1541 class Management(Module):
1542 def __init__(self, db):
1543 Module.__init__(self, 'MGMT', db)
1545 def add_module(self, manager):
1546 manager.add_lustre_module('lvfs', 'lvfs')
1547 manager.add_lustre_module('obdclass', 'obdclass')
1548 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1549 manager.add_lustre_module('mgmt', 'mgmt_svc')
1552 if not config.record and is_prepared(self.name):
1555 lctl.newdev("mgmt", self.name, self.uuid)
1557 def safe_to_clean(self):
1561 if is_prepared(self.name):
1562 Module.cleanup(self)
1564 def correct_level(self, level, op=None):
1567 # This is only needed to load the modules; the LDLM device
1568 # is now created automatically.
1570 def __init__(self,db):
1571 Module.__init__(self, 'LDLM', db)
1573 def add_module(self, manager):
1574 manager.add_lustre_module('lvfs', 'lvfs')
1575 manager.add_lustre_module('obdclass', 'obdclass')
1576 manager.add_lustre_module('sec', 'ptlrpcs')
1577 manager.add_lustre_module('ptlrpc', 'ptlrpc')
1578 manager.add_lustre_module('sec/gss', 'ptlrpcs_gss')
1586 def correct_level(self, level, op=None):
1590 def __init__(self, db, uuid, fs_name, name_override = None, config_only = None):
1591 Module.__init__(self, 'LOV', db)
1592 if name_override != None:
1593 self.name = "lov_%s" % name_override
1594 self.mds_uuid = self.db.get_first_ref('mds')
1595 self.stripe_sz = self.db.get_val_int('stripesize', 1048576)
1596 self.stripe_off = self.db.get_val_int('stripeoffset', 0)
1597 self.pattern = self.db.get_val_int('stripepattern', 0)
1598 self.devlist = self.db.get_lov_tgts('lov_tgt')
1599 self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
1602 self.desc_uuid = self.uuid
1603 self.uuid = generate_client_uuid(self.name)
1604 self.fs_name = fs_name
1606 self.config_only = 1
1608 self.config_only = None
1609 mds = self.db.lookup(self.mds_uuid)
1610 self.mds_name = mds.getName()
1611 for (obd_uuid, index, gen, active) in self.devlist:
1614 self.obdlist.append(obd_uuid)
1615 obd = self.db.lookup(obd_uuid)
1616 osc = get_osc(obd, self.uuid, fs_name)
1618 self.osclist.append((osc, index, gen, active))
1620 panic('osc not found:', obd_uuid)
1626 if not config.record and is_prepared(self.name):
1628 self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
1629 self.stripe_off, self.pattern, self.devlist,
1631 lctl.lov_setup(self.name, self.uuid, self.desc_uuid, self.stripe_cnt,
1632 self.stripe_sz, self.stripe_off, self.pattern,
1633 string.join(self.obdlist))
1634 for (osc, index, gen, active) in self.osclist:
1635 target_uuid = osc.target_uuid
1637 # Only ignore connect failures with --force, which
1638 # isn't implemented here yet.
1640 osc.prepare(ignore_connect_failure=0)
1641 except CommandError, e:
1642 print "Error preparing OSC %s\n" % osc.uuid
1644 lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
1647 for (osc, index, gen, active) in self.osclist:
1648 target_uuid = osc.target_uuid
1650 if is_prepared(self.name):
1651 Module.cleanup(self)
1652 if self.config_only:
1653 panic("Can't clean up config_only LOV ", self.name)
1655 def add_module(self, manager):
1656 if self.config_only:
1657 panic("Can't load modules for config_only LOV ", self.name)
1658 for (osc, index, gen, active) in self.osclist:
1659 osc.add_module(manager)
1661 manager.add_lustre_module('lov', 'lov')
1663 def correct_level(self, level, op=None):
1667 def __init__(self, db, uuid, fs_name, name_override = None):
1668 Module.__init__(self, 'LMV', db)
1669 if name_override != None:
1670 self.name = "lmv_%s" % name_override
1672 self.devlist = self.db.get_lmv_tgts('lmv_tgt')
1673 if self.devlist == None:
1674 self.devlist = self.db.get_refs('mds')
1677 self.desc_uuid = self.uuid
1679 self.fs_name = fs_name
1680 for mds_uuid in self.devlist:
1681 mds = self.db.lookup(mds_uuid)
1683 panic("MDS not found!")
1684 mdc = MDC(mds, self.uuid, fs_name)
1686 self.mdclist.append(mdc)
1688 panic('mdc not found:', mds_uuid)
1691 if is_prepared(self.name):
1695 for mdc in self.mdclist:
1697 # Only ignore connect failures with --force, which
1698 # isn't implemented here yet.
1699 mdc.prepare(ignore_connect_failure=0)
1700 except CommandError, e:
1701 print "Error preparing LMV %s\n" % mdc.uuid
1704 lctl.lmv_setup(self.name, self.uuid, self.desc_uuid,
1705 string.join(self.devlist))
1708 for mdc in self.mdclist:
1710 if is_prepared(self.name):
1711 Module.cleanup(self)
1713 def add_module(self, manager):
1714 for mdc in self.mdclist:
1715 mdc.add_module(manager)
1717 manager.add_lustre_module('lmv', 'lmv')
1719 def correct_level(self, level, op=None):
1722 class CONFDEV(Module):
1723 def __init__(self, db, name, target_uuid, uuid):
1724 Module.__init__(self, 'CONFDEV', db)
1725 self.devpath = self.db.get_val('devpath','')
1726 self.backdevpath = self.db.get_val('backdevpath','')
1727 self.size = self.db.get_val_int('devsize', 0)
1728 self.journal_size = self.db.get_val_int('journalsize', 0)
1729 self.fstype = self.db.get_val('fstype', '')
1730 self.backfstype = self.db.get_val('backfstype', '')
1731 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
1732 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
1733 self.target = self.db.lookup(target_uuid)
1734 self.name = "conf_%s" % self.target.getName()
1735 self.client_uuids = self.target.get_refs('client')
1736 self.obdtype = self.db.get_val('obdtype', '')
1738 if self.obdtype == None:
1739 self.obdtype = 'dumb'
1741 self.conf_name = name
1742 self.conf_uuid = uuid
1743 self.realdev = self.devpath
1748 lmv_uuid = self.db.get_first_ref('lmv')
1749 if lmv_uuid != None:
1750 self.lmv = self.db.lookup(lmv_uuid)
1751 if self.lmv != None:
1752 self.client_uuids = self.lmv.get_refs('client')
1754 if self.target.get_class() == 'mds':
1755 if self.target.get_val('failover', 0):
1756 self.failover_mds = 'f'
1758 self.failover_mds = 'n'
1759 self.format = self.db.get_val('autoformat', "no")
1761 self.format = self.db.get_val('autoformat', "yes")
1762 self.osdtype = self.db.get_val('osdtype')
1763 ost = self.db.lookup(target_uuid)
1764 if ost.get_val('failover', 0):
1765 self.failover_ost = 'f'
1767 self.failover_ost = 'n'
1769 self.inode_size = self.get_inode_size()
1771 if self.lmv != None:
1772 client_uuid = self.name + "_lmv_UUID"
1773 self.master = LMV(self.lmv, client_uuid,
1774 self.conf_name, self.conf_name)
1776 def get_inode_size(self):
1777 inode_size = self.db.get_val_int('inodesize', 0)
1778 if inode_size == 0 and self.target.get_class() == 'mds':
1780 # default inode size for case when neither LOV either
1781 # LMV is accessible.
1782 self.inode_size = 256
1784 # find the LOV for this MDS
1785 lovconfig_uuid = self.target.get_first_ref('lovconfig')
1786 if lovconfig_uuid or self.lmv != None:
1787 if self.lmv != None:
1788 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1789 lovconfig = self.lmv.lookup(lovconfig_uuid)
1790 lov_uuid = lovconfig.get_first_ref('lov')
1791 if lov_uuid == None:
1792 panic(self.target.getName() + ": No LOV found for lovconfig ",
1795 lovconfig = self.target.lookup(lovconfig_uuid)
1796 lov_uuid = lovconfig.get_first_ref('lov')
1797 if lov_uuid == None:
1798 panic(self.target.getName() + ": No LOV found for lovconfig ",
1800 if self.lmv != None:
1801 lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
1802 lovconfig = self.lmv.lookup(lovconfig_uuid)
1803 lov_uuid = lovconfig.get_first_ref('lov')
1805 lov = LOV(self.db.lookup(lov_uuid), lov_uuid, self.name,
1808 # default stripe count controls default inode_size
1809 if lov.stripe_cnt > 0:
1810 stripe_count = lov.stripe_cnt
1812 stripe_count = len(lov.devlist)
1813 if stripe_count > 77:
1815 elif stripe_count > 35:
1817 elif stripe_count > 13:
1819 elif stripe_count > 3:
1826 def get_mount_options(self, blkdev):
1827 options = def_mount_options(self.fstype,
1828 self.target.get_class())
1830 if config.mountfsoptions:
1832 options = "%s,%s" %(options, config.mountfsoptions)
1834 options = config.mountfsoptions
1835 if self.mountfsoptions:
1836 options = "%s,%s" %(options, self.mountfsoptions)
1838 if self.mountfsoptions:
1840 options = "%s,%s" %(options, self.mountfsoptions)
1842 options = self.mountfsoptions
1844 if self.fstype == 'smfs':
1846 options = "%s,type=%s,dev=%s" %(options, self.backfstype,
1849 options = "type=%s,dev=%s" %(self.backfstype,
1852 if self.target.get_class() == 'mds':
1854 options = "%s,iopen_nopriv" %(options)
1856 options = "iopen_nopriv"
1861 if is_prepared(self.name):
1864 blkdev = block_dev(self.devpath, self.size, self.fstype,
1865 config.reformat, self.format, self.journal_size,
1866 self.inode_size, self.mkfsoptions, self.backfstype,
1869 if self.fstype == 'smfs':
1870 realdev = self.fstype
1874 mountfsoptions = self.get_mount_options(blkdev)
1876 self.info(self.target.get_class(), realdev, mountfsoptions,
1877 self.fstype, self.size, self.format)
1879 lctl.newdev("confobd", self.name, self.uuid,
1880 setup ="%s %s %s" %(realdev, self.fstype,
1883 self.mountfsoptions = mountfsoptions
1884 self.realdev = realdev
1886 def add_module(self, manager):
1887 manager.add_lustre_module('obdclass', 'confobd')
1889 def write_conf(self):
1890 if self.target.get_class() == 'ost':
1892 lctl.clear_log(self.name, self.target.getName() + '-conf')
1893 lctl.record(self.name, self.target.getName() + '-conf')
1894 lctl.newdev(self.osdtype, self.conf_name, self.conf_uuid,
1895 setup ="%s %s %s %s" %(self.realdev, self.fstype,
1897 self.mountfsoptions))
1899 lctl.clear_log(self.name, 'OSS-conf')
1900 lctl.record(self.name, 'OSS-conf')
1901 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
1906 if self.target.get_class() == 'mds':
1907 if self.master != None:
1908 master_name = self.master.name
1910 master_name = 'dumb'
1913 lctl.clear_log(self.name, self.target.getName() + '-conf')
1914 lctl.record(self.name, self.target.getName() + '-conf')
1915 lctl.newdev("mds", self.conf_name, self.conf_uuid,
1916 setup ="%s %s %s %s %s %s" %(self.realdev, self.fstype,
1917 self.conf_name, self.mountfsoptions,
1918 master_name, self.obdtype))
1922 if not self.client_uuids:
1925 for uuid in self.client_uuids:
1926 log("recording client:", uuid)
1927 client_uuid = generate_client_uuid(self.name)
1928 client = VOSC(self.db.lookup(uuid), client_uuid,
1929 self.target.getName(), self.name)
1931 lctl.clear_log(self.name, self.target.getName())
1932 lctl.record(self.name, self.target.getName())
1934 lctl.mount_option(self.target.getName(), client.get_name(), "")
1938 lctl.clear_log(self.name, self.target.getName() + '-clean')
1939 lctl.record(self.name, self.target.getName() + '-clean')
1941 lctl.del_mount_option(self.target.getName())
1949 # record logs for each client
1951 config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
1953 config_options = CONFIG_FILE
1955 for node_db in self.db.lookup_class('node'):
1956 client_name = node_db.getName()
1957 for prof_uuid in node_db.get_refs('profile'):
1958 prof_db = node_db.lookup(prof_uuid)
1959 # refactor this into a funtion to test "clientness"
1961 for ref_class, ref_uuid in prof_db.get_all_refs():
1962 if ref_class in ('mountpoint','echoclient'):
1963 debug("recording", client_name)
1964 old_noexec = config.noexec
1966 noexec_opt = ('', '-n')
1967 ret, out = run (sys.argv[0],
1968 noexec_opt[old_noexec == 1],
1969 " -v --record --nomod",
1970 "--record_log", client_name,
1971 "--record_device", self.name,
1972 "--node", client_name,
1975 for s in out: log("record> ", string.strip(s))
1976 ret, out = run (sys.argv[0],
1977 noexec_opt[old_noexec == 1],
1978 "--cleanup -v --record --nomod",
1979 "--record_log", client_name + "-clean",
1980 "--record_device", self.name,
1981 "--node", client_name,
1984 for s in out: log("record> ", string.strip(s))
1985 config.noexec = old_noexec
1989 lctl.start(self.name, self.conf_name)
1990 except CommandError, e:
1992 if self.target.get_class() == 'ost':
1993 if not is_prepared('OSS'):
1995 lctl.start(self.name, 'OSS')
1996 except CommandError, e:
2000 if is_prepared(self.name):
2002 lctl.cleanup(self.name, self.uuid, 0, 0)
2003 clean_dev(self.devpath, self.fstype,
2004 self.backfstype, self.backdevpath)
2005 except CommandError, e:
2006 log(self.module_name, "cleanup failed: ", self.name)
2009 Module.cleanup(self)
2011 class MDSDEV(Module):
2012 def __init__(self,db):
2013 Module.__init__(self, 'MDSDEV', db)
2014 self.devpath = self.db.get_val('devpath','')
2015 self.backdevpath = self.db.get_val('backdevpath','')
2016 self.size = self.db.get_val_int('devsize', 0)
2017 self.journal_size = self.db.get_val_int('journalsize', 0)
2018 self.fstype = self.db.get_val('fstype', '')
2019 self.backfstype = self.db.get_val('backfstype', '')
2020 self.nspath = self.db.get_val('nspath', '')
2021 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2022 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2023 self.obdtype = self.db.get_val('obdtype', '')
2024 self.root_squash = self.db.get_val('root_squash', '')
2025 self.no_root_squash = self.db.get_val('no_root_squash', '')
2027 target_uuid = self.db.get_first_ref('target')
2028 self.target = self.db.lookup(target_uuid)
2029 self.name = self.target.getName()
2033 lmv_uuid = self.db.get_first_ref('lmv')
2034 if lmv_uuid != None:
2035 self.lmv = self.db.lookup(lmv_uuid)
2037 active_uuid = get_active_target(self.target)
2039 panic("No target device found:", target_uuid)
2040 if active_uuid == self.uuid:
2042 group = self.target.get_val('group')
2043 if config.group and config.group != group:
2048 self.uuid = target_uuid
2051 if self.lmv != None:
2052 client_uuid = self.name + "_lmv_UUID"
2053 self.master = LMV(self.lmv, client_uuid,
2054 self.name, self.name)
2056 self.confobd = CONFDEV(self.db, self.name,
2057 target_uuid, self.uuid)
2059 def add_module(self, manager):
2061 manager.add_lustre_module('mdc', 'mdc')
2062 manager.add_lustre_module('osc', 'osc')
2063 manager.add_lustre_module('ost', 'ost')
2064 manager.add_lustre_module('lov', 'lov')
2065 manager.add_lustre_module('mds', 'mds')
2067 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2068 manager.add_lustre_module(self.fstype, self.fstype)
2071 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
2073 # if fstype is smfs, then we should also take care about backing
2075 if self.fstype == 'smfs':
2076 manager.add_lustre_module(self.backfstype, self.backfstype)
2077 manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
2079 for option in string.split(self.mountfsoptions, ','):
2080 if option == 'snap':
2081 if not self.fstype == 'smfs':
2082 panic("mountoptions has 'snap', but fstype is not smfs.")
2083 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2084 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2087 if self.master != None:
2088 self.master.add_module(manager)
2090 # add CONFOBD modules
2091 if self.confobd != None:
2092 self.confobd.add_module(manager)
2094 def write_conf(self):
2095 if is_prepared(self.name):
2098 debug(self.uuid, "not active")
2101 self.confobd.prepare()
2102 self.confobd.write_conf()
2103 self.confobd.cleanup()
2106 if is_prepared(self.name):
2109 debug(self.uuid, "not active")
2113 self.confobd.prepare()
2115 self.confobd.write_conf()
2118 if self.master != None:
2119 self.master.prepare()
2121 lctl.attach("mds", self.name, self.uuid)
2122 if config.mds_mds_sec:
2123 lctl.set_security(self.name, "mds_mds_sec", config.mds_mds_sec)
2124 if config.mds_ost_sec:
2125 lctl.set_security(self.name, "mds_ost_sec", config.mds_ost_sec)
2126 lctl.detach(self.name)
2128 if not config.record:
2129 self.confobd.start()
2131 if not is_prepared('MDT'):
2132 lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
2134 if development_mode():
2135 procentry = "/proc/fs/lustre/mds/lsd_upcall"
2136 upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
2137 if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
2138 print "MDS Warning: failed to set lsd cache upcall"
2140 run("echo ", upcall, " > ", procentry)
2142 if config.root_squash == None:
2143 config.root_squash = self.root_squash
2144 if config.no_root_squash == None:
2145 config.no_root_squash = self.no_root_squash
2146 if config.root_squash:
2147 if config.no_root_squash:
2148 nsnid = config.no_root_squash
2151 lctl.root_squash(self.name, config.root_squash, nsnid)
2153 def msd_remaining(self):
2154 out = lctl.device_list()
2156 if string.split(s)[2] in ('mds',):
2159 def safe_to_clean(self):
2162 def safe_to_clean_modules(self):
2163 return not self.msd_remaining()
2167 debug(self.uuid, "not active")
2170 if is_prepared(self.name):
2172 lctl.cleanup(self.name, self.uuid, config.force,
2174 except CommandError, e:
2175 log(self.module_name, "cleanup failed: ", self.name)
2178 Module.cleanup(self)
2180 if self.master != None:
2181 self.master.cleanup()
2182 if not self.msd_remaining() and is_prepared('MDT'):
2184 lctl.cleanup("MDT", "MDT_UUID", config.force,
2186 except CommandError, e:
2187 print "cleanup failed: ", self.name
2192 self.confobd.cleanup()
2194 def correct_level(self, level, op=None):
2195 #if self.master != None:
2200 def __init__(self, db):
2201 Module.__init__(self, 'OSD', db)
2202 self.osdtype = self.db.get_val('osdtype')
2203 self.devpath = self.db.get_val('devpath', '')
2204 self.backdevpath = self.db.get_val('backdevpath', '')
2205 self.size = self.db.get_val_int('devsize', 0)
2206 self.journal_size = self.db.get_val_int('journalsize', 0)
2207 self.inode_size = self.db.get_val_int('inodesize', 0)
2208 self.mkfsoptions = self.db.get_val('mkfsoptions', '')
2209 self.mountfsoptions = self.db.get_val('mountfsoptions', '')
2210 self.fstype = self.db.get_val('fstype', '')
2211 self.backfstype = self.db.get_val('backfstype', '')
2212 self.nspath = self.db.get_val('nspath', '')
2213 target_uuid = self.db.get_first_ref('target')
2214 ost = self.db.lookup(target_uuid)
2215 self.name = ost.getName()
2216 self.format = self.db.get_val('autoformat', 'yes')
2217 if ost.get_val('failover', 0):
2218 self.failover_ost = 'f'
2220 self.failover_ost = 'n'
2222 active_uuid = get_active_target(ost)
2224 panic("No target device found:", target_uuid)
2225 if active_uuid == self.uuid:
2227 group = ost.get_val('group')
2228 if config.group and config.group != group:
2233 self.uuid = target_uuid
2234 self.confobd = CONFDEV(self.db, self.name,
2235 target_uuid, self.uuid)
2237 def add_module(self, manager):
2240 manager.add_lustre_module('ost', 'ost')
2242 if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
2243 manager.add_lustre_module(self.fstype, self.fstype)
2246 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
2248 if self.fstype == 'smfs':
2249 manager.add_lustre_module(self.backfstype, self.backfstype)
2250 manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
2252 for option in self.mountfsoptions:
2253 if option == 'snap':
2254 if not self.fstype == 'smfs':
2255 panic("mountoptions with snap, but fstype is not smfs\n")
2256 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
2257 manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
2259 manager.add_lustre_module(self.osdtype, self.osdtype)
2261 # add CONFOBD modules
2262 if self.confobd != None:
2263 self.confobd.add_module(manager)
2266 if is_prepared(self.name):
2269 debug(self.uuid, "not active")
2273 if self.osdtype == 'obdecho':
2274 self.info(self.osdtype)
2275 lctl.newdev("obdecho", self.name, self.uuid)
2276 if not is_prepared('OSS'):
2277 lctl.newdev("ost", 'OSS', 'OSS_UUID', setup="")
2279 self.confobd.prepare()
2281 self.confobd.write_conf()
2282 if not config.record:
2283 self.confobd.start()
2285 def write_conf(self):
2286 if is_prepared(self.name):
2289 debug(self.uuid, "not active")
2293 if self.osdtype != 'obdecho':
2294 self.confobd.prepare()
2295 self.confobd.write_conf()
2296 if not config.write_conf:
2297 self.confobd.start()
2298 self.confobd.cleanup()
2300 def osd_remaining(self):
2301 out = lctl.device_list()
2303 if string.split(s)[2] in ('obdfilter', 'obdecho'):
2306 def safe_to_clean(self):
2309 def safe_to_clean_modules(self):
2310 return not self.osd_remaining()
2314 debug(self.uuid, "not active")
2317 if is_prepared(self.name):
2320 lctl.cleanup(self.name, self.uuid, config.force,
2322 except CommandError, e:
2323 log(self.module_name, "cleanup failed: ", self.name)
2326 if not self.osd_remaining() and is_prepared('OSS'):
2328 lctl.cleanup("OSS", "OSS_UUID", config.force,
2330 except CommandError, e:
2331 print "cleanup failed: ", self.name
2335 if self.osdtype != 'obdecho':
2337 self.confobd.cleanup()
2339 def correct_level(self, level, op=None):
2342 # Generic client module, used by OSC and MDC
2343 class Client(Module):
2344 def __init__(self, tgtdb, uuid, module, fs_name,
2345 self_name=None, module_dir=None):
2346 self.target_name = tgtdb.getName()
2347 self.target_uuid = tgtdb.getUUID()
2348 self.module_dir = module_dir
2349 self.backup_targets = []
2350 self.module = module
2353 self.tgt_dev_uuid = get_active_target(tgtdb)
2354 if not self.tgt_dev_uuid:
2355 panic("No target device found for target(1):", self.target_name)
2360 self.module = module
2361 self.module_name = string.upper(module)
2363 self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
2364 self.target_name, fs_name)
2366 self.name = self_name
2368 self.lookup_server(self.tgt_dev_uuid)
2369 self.lookup_backup_targets()
2370 self.fs_name = fs_name
2371 if not self.module_dir:
2372 self.module_dir = module
2374 def add_module(self, manager):
2375 manager.add_lustre_module(self.module_dir, self.module)
2377 def lookup_server(self, srv_uuid):
2378 """ Lookup a server's network information """
2379 self._server_nets = get_ost_net(self.db, srv_uuid)
2380 if len(self._server_nets) == 0:
2381 panic ("Unable to find a server for:", srv_uuid)
2386 def get_servers(self):
2387 return self._server_nets
2389 def lookup_backup_targets(self):
2390 """ Lookup alternative network information """
2391 prof_list = toplustreDB.get_refs('profile')
2392 for prof_uuid in prof_list:
2393 prof_db = toplustreDB.lookup(prof_uuid)
2395 panic("profile:", prof_uuid, "not found.")
2396 for ref_class, ref_uuid in prof_db.get_all_refs():
2397 if ref_class in ('osd', 'mdsdev'):
2398 devdb = toplustreDB.lookup(ref_uuid)
2399 uuid = devdb.get_first_ref('target')
2400 if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
2401 self.backup_targets.append(ref_uuid)
2403 def prepare(self, ignore_connect_failure = 0):
2404 self.info(self.target_uuid)
2405 if not config.record and is_prepared(self.name):
2408 srv = choose_local_server(self.get_servers())
2412 routes = find_route(self.get_servers())
2413 if len(routes) == 0:
2414 panic ("no route to", self.target_uuid)
2415 for (srv, r) in routes:
2416 lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3])
2417 except CommandError, e:
2418 if not ignore_connect_failure:
2422 if self.target_uuid in config.inactive and self.permits_inactive():
2423 debug("%s inactive" % self.target_uuid)
2424 inactive_p = "inactive"
2426 debug("%s active" % self.target_uuid)
2428 lctl.newdev(self.module, self.name, self.uuid,
2429 setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
2431 for tgt_dev_uuid in self.backup_targets:
2432 this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
2433 if len(this_nets) == 0:
2434 panic ("Unable to find a server for:", tgt_dev_uuid)
2435 srv = choose_local_server(this_nets)
2439 routes = find_route(this_nets);
2440 if len(routes) == 0:
2441 panic("no route to", tgt_dev_uuid)
2442 for (srv, r) in routes:
2443 lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2445 lctl.add_conn(self.name, srv.nid_uuid);
2448 if is_prepared(self.name):
2449 Module.cleanup(self)
2451 srv = choose_local_server(self.get_servers())
2453 lctl.disconnect(srv)
2455 for (srv, r) in find_route(self.get_servers()):
2456 lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3])
2457 except CommandError, e:
2458 log(self.module_name, "cleanup failed: ", self.name)
2462 for tgt_dev_uuid in self.backup_targets:
2463 this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
2464 srv = choose_local_server(this_net)
2466 lctl.disconnect(srv)
2468 for (srv, r) in find_route(this_net):
2469 lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
2471 def correct_level(self, level, op=None):
2474 def deactivate(self):
2476 lctl.deactivate(self.name)
2477 except CommandError, e:
2478 log(self.module_name, "deactivate failed: ", self.name)
2483 def __init__(self, db, uuid, fs_name):
2484 Client.__init__(self, db, uuid, 'mdc', fs_name)
2486 def permits_inactive(self):
2490 def __init__(self, db, uuid, fs_name):
2491 Client.__init__(self, db, uuid, 'osc', fs_name)
2493 def permits_inactive(self):
2496 class CMOBD(Module):
2497 def __init__(self, db):
2498 Module.__init__(self, 'CMOBD', db)
2499 self.name = self.db.getName();
2500 self.uuid = generate_client_uuid(self.name)
2501 self.master_uuid = self.db.get_first_ref('masterobd')
2502 self.cache_uuid = self.db.get_first_ref('cacheobd')
2504 master_obd = self.db.lookup(self.master_uuid)
2506 panic('master obd not found:', self.master_uuid)
2508 cache_obd = self.db.lookup(self.cache_uuid)
2510 panic('cache obd not found:', self.cache_uuid)
2515 master_class = master_obd.get_class()
2516 cache_class = cache_obd.get_class()
2518 if master_class == 'ost' or master_class == 'lov':
2519 client_uuid = "%s_lov_master_UUID" % (self.name)
2520 self.master = LOV(master_obd, client_uuid, self.name);
2521 elif master_class == 'mds':
2522 self.master = get_mdc(db, self.name, self.master_uuid)
2523 elif master_class == 'lmv':
2524 client_uuid = "%s_lmv_master_UUID" % (self.name)
2525 self.master = LMV(master_obd, client_uuid, self.name);
2527 panic("unknown master obd class '%s'" %(master_class))
2529 if cache_class == 'ost' or cache_class == 'lov':
2530 client_uuid = "%s_lov_cache_UUID" % (self.name)
2531 self.cache = LOV(cache_obd, client_uuid, self.name);
2532 elif cache_class == 'mds':
2533 self.cache = get_mdc(db, self.name, self.cache_uuid)
2534 elif cache_class == 'lmv':
2535 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2536 self.cache = LMV(cache_obd, client_uuid, self.name);
2538 panic("unknown cache obd class '%s'" %(cache_class))
2541 self.master.prepare()
2542 if not config.record and is_prepared(self.name):
2544 self.info(self.master_uuid, self.cache_uuid)
2545 lctl.newdev("cmobd", self.name, self.uuid,
2546 setup ="%s %s" %(self.master.uuid,
2555 def get_master_name(self):
2556 return self.master.name
2558 def get_cache_name(self):
2559 return self.cache.name
2562 if is_prepared(self.name):
2563 Module.cleanup(self)
2565 self.master.cleanup()
2567 def add_module(self, manager):
2568 manager.add_lustre_module('cmobd', 'cmobd')
2569 self.master.add_module(manager)
2571 def correct_level(self, level, op=None):
2575 def __init__(self, db, uuid, name):
2576 Module.__init__(self, 'COBD', db)
2577 self.name = self.db.getName();
2578 self.uuid = generate_client_uuid(self.name)
2579 self.master_uuid = self.db.get_first_ref('masterobd')
2580 self.cache_uuid = self.db.get_first_ref('cacheobd')
2582 master_obd = self.db.lookup(self.master_uuid)
2584 panic('master obd not found:', self.master_uuid)
2586 cache_obd = self.db.lookup(self.cache_uuid)
2588 panic('cache obd not found:', self.cache_uuid)
2593 master_class = master_obd.get_class()
2594 cache_class = cache_obd.get_class()
2596 if master_class == 'ost' or master_class == 'lov':
2597 client_uuid = "%s_lov_master_UUID" % (self.name)
2598 self.master = LOV(master_obd, client_uuid, name);
2599 elif master_class == 'mds':
2600 self.master = get_mdc(db, name, self.master_uuid)
2601 elif master_class == 'lmv':
2602 client_uuid = "%s_lmv_master_UUID" % (self.name)
2603 self.master = LMV(master_obd, client_uuid, self.name);
2605 panic("unknown master obd class '%s'" %(master_class))
2607 if cache_class == 'ost' or cache_class == 'lov':
2608 client_uuid = "%s_lov_cache_UUID" % (self.name)
2609 self.cache = LOV(cache_obd, client_uuid, name);
2610 elif cache_class == 'mds':
2611 self.cache = get_mdc(db, name, self.cache_uuid)
2612 elif cache_class == 'lmv':
2613 client_uuid = "%s_lmv_cache_UUID" % (self.name)
2614 self.cache = LMV(cache_obd, client_uuid, self.name);
2616 panic("unknown cache obd class '%s'" %(cache_class))
2624 def get_master_name(self):
2625 return self.master.name
2627 def get_cache_name(self):
2628 return self.cache.name
2631 self.master.prepare()
2632 self.cache.prepare()
2633 if not config.record and is_prepared(self.name):
2635 self.info(self.master_uuid, self.cache_uuid)
2636 lctl.newdev("cobd", self.name, self.uuid,
2637 setup ="%s %s" %(self.master.name,
2641 if is_prepared(self.name):
2642 Module.cleanup(self)
2643 self.master.cleanup()
2644 self.cache.cleanup()
2646 def add_module(self, manager):
2647 manager.add_lustre_module('cobd', 'cobd')
2648 self.master.add_module(manager)
2650 # virtual interface for OSC and LOV
2652 def __init__(self, db, client_uuid, name, name_override = None):
2653 Module.__init__(self, 'VOSC', db)
2654 if db.get_class() == 'lov':
2655 self.osc = LOV(db, client_uuid, name, name_override)
2657 elif db.get_class() == 'cobd':
2658 self.osc = COBD(db, client_uuid, name)
2661 self.osc = OSC(db, client_uuid, name)
2665 return self.osc.get_uuid()
2668 return self.osc.get_name()
2676 def add_module(self, manager):
2677 self.osc.add_module(manager)
2679 def correct_level(self, level, op=None):
2680 return self.osc.correct_level(level, op)
2682 # virtual interface for MDC and LMV
2684 def __init__(self, db, client_uuid, name, name_override = None):
2685 Module.__init__(self, 'VMDC', db)
2686 if db.get_class() == 'lmv':
2687 self.mdc = LMV(db, client_uuid, name, name_override)
2688 elif db.get_class() == 'cobd':
2689 self.mdc = COBD(db, client_uuid, name)
2691 self.mdc = MDC(db, client_uuid, name)
2694 return self.mdc.uuid
2697 return self.mdc.name
2705 def add_module(self, manager):
2706 self.mdc.add_module(manager)
2708 def correct_level(self, level, op=None):
2709 return self.mdc.correct_level(level, op)
2711 class ECHO_CLIENT(Module):
2712 def __init__(self,db):
2713 Module.__init__(self, 'ECHO_CLIENT', db)
2714 self.obd_uuid = self.db.get_first_ref('obd')
2715 obd = self.db.lookup(self.obd_uuid)
2716 self.uuid = generate_client_uuid(self.name)
2717 self.osc = VOSC(obd, self.uuid, self.name)
2720 if not config.record and is_prepared(self.name):
2723 self.osc.prepare() # XXX This is so cheating. -p
2724 self.info(self.obd_uuid)
2726 lctl.newdev("echo_client", self.name, self.uuid,
2727 setup = self.osc.get_name())
2730 if is_prepared(self.name):
2731 Module.cleanup(self)
2734 def add_module(self, manager):
2735 self.osc.add_module(manager)
2736 manager.add_lustre_module('obdecho', 'obdecho')
2738 def correct_level(self, level, op=None):
2741 def generate_client_uuid(name):
2742 client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
2744 int(random.random() * 1048576),
2745 int(random.random() * 1048576))
2746 return client_uuid[:36]
2748 class Mountpoint(Module):
2749 def __init__(self,db):
2750 Module.__init__(self, 'MTPT', db)
2751 self.path = self.db.get_val('path')
2752 self.clientoptions = self.db.get_val('clientoptions', '')
2753 self.fs_uuid = self.db.get_first_ref('filesystem')
2754 fs = self.db.lookup(self.fs_uuid)
2755 self.mds_uuid = fs.get_first_ref('lmv')
2756 if not self.mds_uuid:
2757 self.mds_uuid = fs.get_first_ref('mds')
2758 self.obd_uuid = fs.get_first_ref('obd')
2759 client_uuid = generate_client_uuid(self.name)
2761 ost = self.db.lookup(self.obd_uuid)
2763 panic("no ost: ", self.obd_uuid)
2765 mds = self.db.lookup(self.mds_uuid)
2767 panic("no mds: ", self.mds_uuid)
2769 self.vosc = VOSC(ost, client_uuid, self.name, self.name)
2770 self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
2773 if not config.record and fs_is_mounted(self.path):
2774 log(self.path, "already mounted.")
2781 self.info(self.path, self.mds_uuid, self.obd_uuid)
2782 if config.record or config.lctl_dump:
2783 lctl.mount_option(local_node_name, self.vosc.get_name(),
2784 self.vmdc.get_name())
2787 if config.clientoptions:
2788 if self.clientoptions:
2789 self.clientoptions = self.clientoptions + ',' + config.clientoptions
2791 self.clientoptions = config.clientoptions
2792 if self.clientoptions:
2793 self.clientoptions = ',' + self.clientoptions
2794 # Linux kernel will deal with async and not pass it to ll_fill_super,
2795 # so replace it with Lustre async
2796 self.clientoptions = string.replace(self.clientoptions, "async", "lasync")
2800 cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,sec=%s%s %s %s" % \
2801 (self.vosc.get_name(), self.vmdc.get_name(), config.sec,
2802 self.clientoptions, config.config, self.path)
2803 run("mkdir", self.path)
2808 panic("mount failed:", self.path, ":", string.join(val))
2811 self.info(self.path, self.mds_uuid,self.obd_uuid)
2813 if config.record or config.lctl_dump:
2814 lctl.del_mount_option(local_node_name)
2816 if fs_is_mounted(self.path):
2818 (rc, out) = run("umount", "-f", self.path)
2820 (rc, out) = run("umount", self.path)
2822 raise CommandError('umount', out, rc)
2824 if fs_is_mounted(self.path):
2825 panic("fs is still mounted:", self.path)
2830 def add_module(self, manager):
2831 self.vosc.add_module(manager)
2832 self.vmdc.add_module(manager)
2833 manager.add_lustre_module('llite', 'llite')
2835 def correct_level(self, level, op=None):
2838 # ============================================================
2839 # misc query functions
2841 def get_ost_net(self, osd_uuid):
2845 osd = self.lookup(osd_uuid)
2846 node_uuid = osd.get_first_ref('node')
2847 node = self.lookup(node_uuid)
2849 panic("unable to find node for osd_uuid:", osd_uuid,
2850 " node_ref:", node_uuid_)
2851 for net_uuid in node.get_networks():
2852 db = node.lookup(net_uuid)
2853 srv_list.append(Network(db))
2856 # the order of iniitailization is based on level.
2857 def getServiceLevel(self):
2858 type = self.get_class()
2860 if type in ('network',):
2862 elif type in ('routetbl',):
2864 elif type in ('ldlm',):
2866 elif type in ('osd', 'cobd'):
2868 elif type in ('mdsdev',):
2870 elif type in ('lmv',):
2872 elif type in ('cmobd',):
2874 elif type in ('mountpoint', 'echoclient'):
2877 panic("Unknown type: ", type)
2879 if ret < config.minlevel or ret > config.maxlevel:
2884 # return list of services in a profile. list is a list of tuples
2885 # [(level, db_object),]
2886 def getServices(self):
2888 for ref_class, ref_uuid in self.get_all_refs():
2889 servdb = self.lookup(ref_uuid)
2891 level = getServiceLevel(servdb)
2893 list.append((level, servdb))
2895 panic('service not found: ' + ref_uuid)
2901 ############################################################
2903 # FIXME: clean this mess up!
2905 # OSC is no longer in the xml, so we have to fake it.
2906 # this is getting ugly and begging for another refactoring
2907 def get_osc(ost_db, uuid, fs_name):
2908 osc = OSC(ost_db, uuid, fs_name)
2911 def get_mdc(db, fs_name, mds_uuid):
2912 mds_db = db.lookup(mds_uuid);
2914 error("no mds:", mds_uuid)
2915 mdc = MDC(mds_db, mds_uuid, fs_name)
2918 ############################################################
2919 # routing ("rooting")
2921 # list of (nettype, cluster_id, nid)
2924 def find_local_clusters(node_db):
2925 global local_clusters
2926 for netuuid in node_db.get_networks():
2927 net = node_db.lookup(netuuid)
2929 debug("add_local", netuuid)
2930 local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
2932 if not acceptors.has_key(srv.port):
2933 acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
2935 # This node is a gateway.
2937 def node_is_router():
2940 # If there are any routers found in the config, then this will be true
2941 # and all nodes will load kptlrouter.
2943 def node_needs_router():
2944 return needs_router or is_router
2946 # list of (nettype, gw, tgt_cluster_id, lo, hi)
2947 # Currently, these local routes are only added to kptlrouter route
2948 # table if they are needed to connect to a specific server. This
2949 # should be changed so all available routes are loaded, and the
2950 # ptlrouter can make all the decisions.
2953 def find_local_routes(lustre):
2954 """ Scan the lustre config looking for routers . Build list of
2956 global local_routes, needs_router
2958 list = lustre.lookup_class('node')
2960 if router.get_val_int('router', 0):
2962 for (local_type, local_cluster_id, local_nid) in local_clusters:
2964 for netuuid in router.get_networks():
2965 db = router.lookup(netuuid)
2966 if (local_type == db.get_val('nettype') and
2967 local_cluster_id == db.get_val('clusterid')):
2968 gw = db.get_val('nid')
2971 debug("find_local_routes: gw is", gw)
2972 for route in router.get_local_routes(local_type, gw):
2973 local_routes.append(route)
2974 debug("find_local_routes:", local_routes)
2977 def choose_local_server(srv_list):
2978 for srv in srv_list:
2979 if local_cluster(srv.net_type, srv.cluster_id):
2982 def local_cluster(net_type, cluster_id):
2983 for cluster in local_clusters:
2984 if net_type == cluster[0] and cluster_id == cluster[1]:
2988 def local_interface(net_type, cluster_id, nid):
2989 for cluster in local_clusters:
2990 if (net_type == cluster[0] and cluster_id == cluster[1]
2991 and nid == cluster[2]):
2995 def find_route(srv_list):
2997 frm_type = local_clusters[0][0]
2998 for srv in srv_list:
2999 debug("find_route: srv:", srv.nid, "type: ", srv.net_type)
3000 to_type = srv.net_type
3002 cluster_id = srv.cluster_id
3003 debug ('looking for route to', to_type, to)
3004 for r in local_routes:
3005 debug("find_route: ", r)
3006 if (r[3] <= to and to <= r[4]) and cluster_id == r[2]:
3007 result.append((srv, r))
3010 def get_active_target(db):
3011 target_uuid = db.getUUID()
3012 target_name = db.getName()
3013 node_name = get_select(target_name)
3015 tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid)
3017 tgt_dev_uuid = db.get_first_ref('active')
3020 def get_server_by_nid_uuid(db, nid_uuid):
3021 for n in db.lookup_class("network"):
3023 if net.nid_uuid == nid_uuid:
3027 ############################################################
3031 type = db.get_class()
3032 debug('Service:', type, db.getName(), db.getUUID())
3037 n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3038 elif type == 'network':
3040 elif type == 'routetbl':
3044 elif type == 'cobd':
3045 n = COBD(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
3046 elif type == 'cmobd':
3048 elif type == 'mdsdev':
3050 elif type == 'mountpoint':
3052 elif type == 'echoclient':
3057 panic ("unknown service type:", type)
3061 # Prepare the system to run lustre using a particular profile
3062 # in a the configuration.
3063 # * load & the modules
3064 # * setup networking for the current node
3065 # * make sure partitions are in place and prepared
3066 # * initialize devices with lctl
3067 # Levels is important, and needs to be enforced.
3068 def for_each_profile(db, prof_list, operation):
3069 for prof_uuid in prof_list:
3070 prof_db = db.lookup(prof_uuid)
3072 panic("profile:", prof_uuid, "not found.")
3073 services = getServices(prof_db)
3076 def magic_get_osc(db, rec, lov):
3078 lov_uuid = lov.get_uuid()
3079 lov_name = lov.osc.fs_name
3081 lov_uuid = rec.getAttribute('lov_uuidref')
3082 # FIXME: better way to find the mountpoint?
3083 filesystems = db.root_node.getElementsByTagName('filesystem')
3085 for fs in filesystems:
3086 ref = fs.getElementsByTagName('obd_ref')
3087 if ref[0].getAttribute('uuidref') == lov_uuid:
3088 fsuuid = fs.getAttribute('uuid')
3092 panic("malformed xml: lov uuid '" + lov_uuid + "' referenced in 'add' record is not used by any filesystems.")
3094 mtpts = db.root_node.getElementsByTagName('mountpoint')
3097 ref = fs.getElementsByTagName('filesystem_ref')
3098 if ref[0].getAttribute('uuidref') == fsuuid:
3099 lov_name = fs.getAttribute('name')
3103 panic("malformed xml: 'add' record references lov uuid '" + lov_uuid + "', which references filesystem uuid '" + fsuuid + "', which does not reference a mountpoint.")
3105 print "lov_uuid: " + lov_uuid + "; lov_name: " + lov_name
3107 ost_uuid = rec.getAttribute('ost_uuidref')
3108 obd = db.lookup(ost_uuid)
3111 panic("malformed xml: 'add' record references ost uuid '" + ost_uuid + "' which cannot be found.")
3113 osc = get_osc(obd, lov_uuid, lov_name)
3115 panic('osc not found:', obd_uuid)
3118 # write logs for update records. sadly, logs of all types -- and updates in
3119 # particular -- are something of an afterthought. lconf needs rewritten with
3120 # these as core concepts. so this is a pretty big hack.
3121 def process_update_record(db, update, lov):
3122 for rec in update.childNodes:
3123 if rec.nodeType != rec.ELEMENT_NODE:
3126 log("found "+rec.nodeName+" record in update version " +
3127 str(update.getAttribute('version')))
3129 lov_uuid = rec.getAttribute('lov_uuidref')
3130 ost_uuid = rec.getAttribute('ost_uuidref')
3131 index = rec.getAttribute('index')
3132 gen = rec.getAttribute('generation')
3134 if not lov_uuid or not ost_uuid or not index or not gen:
3135 panic("malformed xml: 'update' record requires lov_uuid, ost_uuid, index, and generation.")
3138 tmplov = db.lookup(lov_uuid)
3140 panic("malformed xml: 'delete' record contains lov UUID '" + lov_uuid + "', which cannot be located.")
3141 lov_name = tmplov.getName()
3143 lov_name = lov.osc.name
3145 # ------------------------------------------------------------- add
3146 if rec.nodeName == 'add':
3148 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3151 osc = magic_get_osc(db, rec, lov)
3154 # Only ignore connect failures with --force, which
3155 # isn't implemented here yet.
3156 osc.prepare(ignore_connect_failure=0)
3157 except CommandError, e:
3158 print "Error preparing OSC %s\n" % osc.uuid
3161 lctl.lov_add_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3163 # ------------------------------------------------------ deactivate
3164 elif rec.nodeName == 'deactivate':
3168 osc = magic_get_osc(db, rec, lov)
3172 except CommandError, e:
3173 print "Error deactivating OSC %s\n" % osc.uuid
3176 # ---------------------------------------------------------- delete
3177 elif rec.nodeName == 'delete':
3181 osc = magic_get_osc(db, rec, lov)
3187 except CommandError, e:
3188 print "Error cleaning up OSC %s\n" % osc.uuid
3191 lctl.lov_del_obd(lov_name, lov_uuid, ost_uuid, index, gen)
3193 def process_updates(db, log_device, log_name, lov = None):
3194 updates = db.root_node.getElementsByTagName('update')
3196 if not u.childNodes:
3197 log("ignoring empty update record (version " +
3198 str(u.getAttribute('version')) + ")")
3201 version = u.getAttribute('version')
3202 real_name = "%s-%s" % (log_name, version)
3203 lctl.clear_log(log_device, real_name)
3204 lctl.record(log_device, real_name)
3206 process_update_record(db, u, lov)
3210 def doWriteconf(services):
3214 if s[1].get_class() == 'mdsdev' or s[1].get_class() == 'osd':
3215 n = newService(s[1])
3219 def doSetup(services):
3224 n = newService(s[1])
3226 slist.append((n.level, n))
3229 nl = n[1].correct_level(n[0])
3230 nlist.append((nl, n[1]))
3235 def doLoadModules(services):
3239 # adding all needed modules from all services
3241 n = newService(s[1])
3242 n.add_module(mod_manager)
3244 # loading all registered modules
3245 mod_manager.load_modules()
3247 def doUnloadModules(services):
3251 # adding all needed modules from all services
3253 n = newService(s[1])
3254 if n.safe_to_clean_modules():
3255 n.add_module(mod_manager)
3257 # unloading all registered modules
3258 mod_manager.cleanup_modules()
3260 def doCleanup(services):
3266 n = newService(s[1])
3268 slist.append((n.level, n))
3271 nl = n[1].correct_level(n[0])
3272 nlist.append((nl, n[1]))
3277 if n[1].safe_to_clean():
3282 def doHost(lustreDB, hosts):
3283 global is_router, local_node_name
3286 node_db = lustreDB.lookup_name(h, 'node')
3290 panic('No host entry found.')
3292 local_node_name = node_db.get_val('name', 0)
3293 is_router = node_db.get_val_int('router', 0)
3294 lustre_upcall = node_db.get_val('lustreUpcall', '')
3295 portals_upcall = node_db.get_val('portalsUpcall', '')
3296 timeout = node_db.get_val_int('timeout', 0)
3297 ptldebug = node_db.get_val('ptldebug', '')
3298 subsystem = node_db.get_val('subsystem', '')
3300 find_local_clusters(node_db)
3302 find_local_routes(lustreDB)
3304 # Two step process: (1) load modules, (2) setup lustre
3305 # if not cleaning, load modules first.
3306 prof_list = node_db.get_refs('profile')
3308 if config.write_conf:
3309 for_each_profile(node_db, prof_list, doLoadModules)
3311 for_each_profile(node_db, prof_list, doWriteconf)
3312 for_each_profile(node_db, prof_list, doUnloadModules)
3315 elif config.recover:
3316 if not (config.tgt_uuid and config.client_uuid and config.conn_uuid):
3317 raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " +
3318 "--client_uuid <UUID> --conn_uuid <UUID>")
3319 doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid,
3321 elif config.cleanup:
3323 # the command line can override this value
3325 # ugly hack, only need to run lctl commands for --dump
3326 if config.lctl_dump or config.record:
3327 for_each_profile(node_db, prof_list, doCleanup)
3330 sys_set_timeout(timeout)
3331 sys_set_ptldebug(ptldebug)
3332 sys_set_subsystem(subsystem)
3333 sys_set_lustre_upcall(lustre_upcall)
3334 sys_set_portals_upcall(portals_upcall)
3336 for_each_profile(node_db, prof_list, doCleanup)
3337 for_each_profile(node_db, prof_list, doUnloadModules)
3341 # ugly hack, only need to run lctl commands for --dump
3342 if config.lctl_dump or config.record:
3343 sys_set_timeout(timeout)
3344 sys_set_lustre_upcall(lustre_upcall)
3345 for_each_profile(node_db, prof_list, doSetup)
3349 sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
3350 sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
3352 for_each_profile(node_db, prof_list, doLoadModules)
3354 sys_set_debug_path()
3355 sys_set_ptldebug(ptldebug)
3356 sys_set_subsystem(subsystem)
3357 script = config.gdb_script
3358 run(lctl.lctl, ' modules >', script)
3360 log ("The GDB module script is in", script)
3361 # pause, so user has time to break and
3364 sys_set_timeout(timeout)
3365 sys_set_lustre_upcall(lustre_upcall)
3366 sys_set_portals_upcall(portals_upcall)
3368 for_each_profile(node_db, prof_list, doSetup)
3371 def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid):
3372 tgt = lustreDB.lookup(tgt_uuid)
3374 raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.")
3375 new_uuid = get_active_target(tgt)
3377 raise Lustre.LconfError("doRecovery: no active target found for: " +
3379 net = choose_local_server(get_ost_net(lustreDB, new_uuid))
3381 raise Lustre.LconfError("Unable to find a connection to:" + new_uuid)
3383 log("Reconnecting", tgt_uuid, " to ", net.nid_uuid);
3385 oldnet = get_server_by_nid_uuid(lustreDB, nid_uuid)
3388 lctl.disconnect(oldnet)
3389 except CommandError, e:
3390 log("recover: disconnect", nid_uuid, "failed: ")
3395 except CommandError, e:
3396 log("recover: connect failed")
3399 lctl.recover(client_uuid, net.nid_uuid)
3402 def setupModulePath(cmd, portals_dir = PORTALS_DIR):
3403 base = os.path.dirname(cmd)
3404 if development_mode():
3405 if not config.lustre:
3406 debug('using objdir module paths')
3407 config.lustre = (os.path.join(base, ".."))
3408 # normalize the portals dir, using command line arg if set
3410 portals_dir = config.portals
3411 dir = os.path.join(config.lustre, portals_dir)
3412 config.portals = dir
3413 debug('config.portals', config.portals)
3414 elif config.lustre and config.portals:
3416 # if --lustre and --portals, normalize portals
3417 # can ignore POTRALS_DIR here, since it is probly useless here
3418 config.portals = os.path.join(config.lustre, config.portals)
3419 debug('config.portals B', config.portals)
3421 def sysctl(path, val):
3422 debug("+ sysctl", path, val)
3426 fp = open(os.path.join('/proc/sys', path), 'w')
3433 def sys_set_debug_path():
3434 sysctl('portals/debug_path', config.debug_path)
3436 def sys_set_lustre_upcall(upcall):
3437 # the command overrides the value in the node config
3438 if config.lustre_upcall:
3439 upcall = config.lustre_upcall
3441 upcall = config.upcall
3443 lctl.set_lustre_upcall(upcall)
3445 def sys_set_portals_upcall(upcall):
3446 # the command overrides the value in the node config
3447 if config.portals_upcall:
3448 upcall = config.portals_upcall
3450 upcall = config.upcall
3452 sysctl('portals/upcall', upcall)
3454 def sys_set_timeout(timeout):
3455 # the command overrides the value in the node config
3456 if config.timeout and config.timeout > 0:
3457 timeout = config.timeout
3458 if timeout != None and timeout > 0:
3459 lctl.set_timeout(timeout)
3461 def sys_tweak_socknal ():
3462 # reserve at least 8MB, or we run out of RAM in skb_alloc under read
3463 if sys_get_branch() == '2.6':
3464 fp = open('/proc/meminfo')
3465 lines = fp.readlines()
3470 if a[0] == 'MemTotal:':
3472 debug("memtotal" + memtotal)
3473 if int(memtotal) < 262144:
3474 minfree = int(memtotal) / 16
3477 debug("+ minfree ", minfree)
3478 sysctl("vm/min_free_kbytes", minfree)
3479 if config.single_socket:
3480 sysctl("socknal/typed", 0)
3482 def sys_optimize_elan ():
3483 procfiles = ["/proc/elan/config/eventint_punt_loops",
3484 "/proc/qsnet/elan3/config/eventint_punt_loops",
3485 "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
3487 if os.access(p, os.W_OK):
3488 run ("echo 1 > " + p)
3490 def sys_set_ptldebug(ptldebug):
3492 ptldebug = config.ptldebug
3495 val = eval(ptldebug, ptldebug_names)
3496 val = "0x%x" % (val)
3497 sysctl('portals/debug', val)
3498 except NameError, e:
3501 def sys_set_subsystem(subsystem):
3502 if config.subsystem:
3503 subsystem = config.subsystem
3506 val = eval(subsystem, subsystem_names)
3507 val = "0x%x" % (val)
3508 sysctl('portals/subsystem_debug', val)
3509 except NameError, e:
3512 def sys_set_netmem_max(path, max):
3513 debug("setting", path, "to at least", max)
3521 fp = open(path, 'w')
3522 fp.write('%d\n' %(max))
3525 def sys_make_devices():
3526 if not os.access('/dev/portals', os.R_OK):
3527 run('mknod /dev/portals c 10 240')
3528 if not os.access('/dev/obd', os.R_OK):
3529 run('mknod /dev/obd c 10 241')
3531 # Add dir to the global PATH, if not already there.
3532 def add_to_path(new_dir):
3533 syspath = string.split(os.environ['PATH'], ':')
3534 if new_dir in syspath:
3536 os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir
3538 def default_debug_path():
3539 path = '/tmp/lustre-log'
3540 if os.path.isdir('/r'):
3545 def default_gdb_script():
3546 script = '/tmp/ogdb'
3547 if os.path.isdir('/r'):
3548 return '/r' + script
3552 DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
3553 # ensure basic elements are in the system path
3554 def sanitise_path():
3555 for dir in DEFAULT_PATH:
3558 # global hack for the --select handling
3560 def init_select(args):
3561 # args = [service=nodeA,service2=nodeB service3=nodeC]
3564 list = string.split(arg, ',')
3566 srv, node = string.split(entry, '=')
3567 tgt_select[srv] = node
3569 def get_select(srv):
3570 if tgt_select.has_key(srv):
3571 return tgt_select[srv]
3575 FLAG = Lustre.Options.FLAG
3576 PARAM = Lustre.Options.PARAM
3577 INTPARAM = Lustre.Options.INTPARAM
3578 PARAMLIST = Lustre.Options.PARAMLIST
3580 ('verbose,v', "Print system commands as they are run"),
3581 ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM),
3582 ('config', "Cluster config name used for LDAP query", PARAM),
3583 ('select', "service=nodeA,service2=nodeB ", PARAMLIST),
3584 ('node', "Load config for <nodename>", PARAM),
3585 ('sec', "security flavor <null|krb5i|krb5p> of client", PARAM),
3586 ('mds_mds_sec', "security flavor <null|krb5i|krb5p> of inter mds's", PARAM),
3587 ('mds_ost_sec', "security flavor <null|krb5i|krb5p> of mds's-ost's", PARAM),
3588 ('cleanup,d', "Cleans up config. (Shutdown)"),
3589 ('force,f', "Forced unmounting and/or obd detach during cleanup",
3591 ('single_socket', "socknal option: only use one socket instead of bundle",
3593 ('failover',"""Used to shut down without saving state.
3594 This will allow this node to "give up" a service to a
3595 another node for failover purposes. This will not
3596 be a clean shutdown.""",
3598 ('gdb', """Prints message after creating gdb module script
3599 and sleeps for 5 seconds."""),
3600 ('noexec,n', """Prints the commands and steps that will be run for a
3601 config without executing them. This can used to check if a
3602 config file is doing what it should be doing"""),
3603 ('nomod', "Skip load/unload module step."),
3604 ('nosetup', "Skip device setup/cleanup step."),
3605 ('reformat', "Reformat all devices (without question)"),
3606 ('mkfsoptions', "Additional options for the mk*fs command line", PARAM),
3607 ('mountfsoptions', "Additional options for mount fs command line", PARAM),
3608 ('clientoptions', "Additional options for Lustre", PARAM),
3609 ('dump', "Dump the kernel debug log to file before portals is unloaded",
3611 ('write_conf', "Save all the client config information on mds."),
3612 ('record', "Write config information on mds."),
3613 ('record_log', "Name of config record log.", PARAM),
3614 ('record_device', "MDS device name that will record the config commands",
3616 ('root_squash', "MDS squash root to appointed uid",
3618 ('no_root_squash', "Don't squash root for appointed nid",
3620 ('minlevel', "Minimum level of services to configure/cleanup",
3622 ('maxlevel', """Maximum level of services to configure/cleanup
3623 Levels are aproximatly like:
3628 70 - mountpoint, echo_client, osc, mdc, lov""",
3630 ('lustre', """Base directory of lustre sources. This parameter will
3631 cause lconf to load modules from a source tree.""", PARAM),
3632 ('portals', """Portals source directory. If this is a relative path,
3633 then it is assumed to be relative to lustre. """, PARAM),
3634 ('timeout', "Set recovery timeout", INTPARAM),
3635 ('upcall', "Set both portals and lustre upcall script", PARAM),
3636 ('lustre_upcall', "Set lustre upcall script", PARAM),
3637 ('portals_upcall', "Set portals upcall script", PARAM),
3638 ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM),
3639 ('ptldebug', "Set the portals debug level", PARAM),
3640 ('subsystem', "Set the portals debug subsystem", PARAM),
3641 ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()),
3642 ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()),
3643 # Client recovery options
3644 ('recover', "Recover a device"),
3645 ('group', "The group of devices to configure or cleanup", PARAM),
3646 ('tgt_uuid', "The failed target (required for recovery)", PARAM),
3647 ('client_uuid', "The failed client (required for recovery)", PARAM),
3648 ('conn_uuid', "The failed connection (required for recovery)", PARAM),
3650 ('inactive', """The name of an inactive service, to be ignored during
3651 mounting (currently OST-only). Can be repeated.""",
3656 global lctl, config, toplustreDB, CONFIG_FILE, mod_manager
3658 # in the upcall this is set to SIG_IGN
3659 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
3661 cl = Lustre.Options("lconf", "config.xml", lconf_options)
3663 config, args = cl.parse(sys.argv[1:])
3664 except Lustre.OptionError, e:
3668 setupModulePath(sys.argv[0])
3670 host = socket.gethostname()
3672 # the PRNG is normally seeded with time(), which is not so good for starting
3673 # time-synchronized clusters
3674 input = open('/dev/urandom', 'r')
3676 print 'Unable to open /dev/urandom!'
3678 seed = input.read(32)
3684 init_select(config.select)
3687 # allow config to be fetched via HTTP, but only with python2
3688 if sys.version[0] != '1' and args[0].startswith('http://'):
3691 config_file = urllib2.urlopen(args[0])
3692 except (urllib2.URLError, socket.error), err:
3693 if hasattr(err, 'args'):
3695 print "Could not access '%s': %s" %(args[0], err)
3697 elif not os.access(args[0], os.R_OK):
3698 print 'File not found or readable:', args[0]
3702 config_file = open(args[0], 'r')
3704 dom = xml.dom.minidom.parse(config_file)
3706 panic("%s does not appear to be a config file." % (args[0]))
3707 sys.exit(1) # make sure to die here, even in debug mode.
3709 CONFIG_FILE = args[0]
3710 lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
3711 if not config.config:
3712 config.config = os.path.basename(args[0])# use full path?
3713 if config.config[-4:] == '.xml':
3714 config.config = config.config[:-4]
3715 elif config.ldapurl:
3716 if not config.config:
3717 panic("--ldapurl requires --config name")
3718 dn = "config=%s,fs=lustre" % (config.config)
3719 lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
3720 elif config.ptldebug or config.subsystem:
3721 sys_set_ptldebug(None)
3722 sys_set_subsystem(None)
3725 print 'Missing config file or ldap URL.'
3726 print 'see lconf --help for command summary'
3729 toplustreDB = lustreDB
3731 ver = lustreDB.get_version()
3733 panic("No version found in config data, please recreate.")
3734 if ver != Lustre.CONFIG_VERSION:
3735 panic("Config version", ver, "does not match lconf version",
3736 Lustre.CONFIG_VERSION)
3740 node_list.append(config.node)
3743 node_list.append(host)
3744 node_list.append('localhost')
3746 debug("configuring for host: ", node_list)
3749 config.debug_path = config.debug_path + '-' + host
3750 config.gdb_script = config.gdb_script + '-' + host
3752 lctl = LCTLInterface('lctl')
3754 if config.lctl_dump:
3755 lctl.use_save_file(config.lctl_dump)
3758 if not (config.record_device and config.record_log):
3759 panic("When recording, both --record_log and --record_device must be specified.")
3760 lctl.clear_log(config.record_device, config.record_log)
3761 lctl.record(config.record_device, config.record_log)
3763 # init module manager
3764 mod_manager = kmod_manager(config.lustre, config.portals)
3766 doHost(lustreDB, node_list)
3768 if not config.record:
3773 process_updates(lustreDB, config.record_device, config.record_log)
3775 if __name__ == "__main__":
3778 except Lustre.LconfError, e:
3780 # traceback.print_exc(file=sys.stdout)
3782 except CommandError, e:
3786 if first_cleanup_error:
3787 sys.exit(first_cleanup_error)